From 4cd0778094446975e61501a4b929095556c711e3 Mon Sep 17 00:00:00 2001
From: Roger M <roger@serper.dev>
Date: Mon, 29 Apr 2024 19:44:53 +0100
Subject: [PATCH] Add Serper API as an alternative search provider option for
 faster speeds and no rate limitations from Google

---
 README.md                             |  4 +-
 sample.config.toml                    |  1 +
 src/agents/academicSearchAgent.ts     |  4 +-
 src/agents/imageSearchAgent.ts        |  4 +-
 src/agents/redditSearchAgent.ts       |  4 +-
 src/agents/webSearchAgent.ts          |  4 +-
 src/agents/wolframAlphaSearchAgent.ts |  4 +-
 src/agents/youtubeSearchAgent.ts      |  4 +-
 src/config.ts                         |  3 ++
 src/lib/search.ts                     | 40 ++++++++++++++
 src/lib/searxng.ts                    | 21 ++------
 src/lib/serper.ts                     | 76 +++++++++++++++++++++++++++
 12 files changed, 137 insertions(+), 32 deletions(-)
 create mode 100644 src/lib/search.ts
 create mode 100644 src/lib/serper.ts

diff --git a/README.md b/README.md
index 8a3f923..1d285a7 100644
--- a/README.md
+++ b/README.md
@@ -65,8 +65,8 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker.
      - `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**.
      - `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**.
 
-       **Note**: You can change these and use different models after running Perplexica as well from the settings page.
-
+       **Note**: You can change these and use different models after running Perplexica as well from the settings page. 
+   - `SERPER`: The Serper API key, if you prefer not to run SearxNG and want faster search speeds, as well as avoiding rate limitations by Google. You can create a free key allowing up to 2,500 queries [here](https://serper.dev/).
    - `SIMILARITY_MEASURE`: The similarity measure to use (This is filled by default; you can leave it as is if you are unsure about it.)
 
 5. Ensure you are in the directory containing the `docker-compose.yaml` file and execute:
diff --git a/sample.config.toml b/sample.config.toml
index 2d09b4b..fe43831 100644
--- a/sample.config.toml
+++ b/sample.config.toml
@@ -6,6 +6,7 @@ CHAT_MODEL = "gpt-3.5-turbo" # Name of the model to use
 
 [API_KEYS]
 OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef
+SERPER = "" # Serper API key - https://serper.dev
 
 [API_ENDPOINTS]
 SEARXNG = "http://localhost:32768" # SearxNG API URL
diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts
index d08f282..e424ff1 100644
--- a/src/agents/academicSearchAgent.ts
+++ b/src/agents/academicSearchAgent.ts
@@ -11,7 +11,7 @@ import {
 } from '@langchain/core/runnables';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { StreamEvent } from '@langchain/core/tracers/log_stream';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { Embeddings } from '@langchain/core/embeddings';
@@ -111,7 +111,7 @@ const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
         return { query: '', docs: [] };
       }
 
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         language: 'en',
         engines: [
           'arxiv',
diff --git a/src/agents/imageSearchAgent.ts b/src/agents/imageSearchAgent.ts
index 167019f..62eeaa4 100644
--- a/src/agents/imageSearchAgent.ts
+++ b/src/agents/imageSearchAgent.ts
@@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts';
 import formatChatHistoryAsString from '../utils/formatHistory';
 import { BaseMessage } from '@langchain/core/messages';
 import { StringOutputParser } from '@langchain/core/output_parsers';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 
 const imageSearchChainPrompt = `
@@ -52,7 +52,7 @@ const createImageSearchChain = (llm: BaseChatModel) => {
     llm,
     strParser,
     RunnableLambda.from(async (input: string) => {
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         engines: ['bing images', 'google images'],
       });
 
diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts
index 4fee20a..402dd51 100644
--- a/src/agents/redditSearchAgent.ts
+++ b/src/agents/redditSearchAgent.ts
@@ -11,7 +11,7 @@ import {
 } from '@langchain/core/runnables';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { StreamEvent } from '@langchain/core/tracers/log_stream';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { Embeddings } from '@langchain/core/embeddings';
@@ -111,7 +111,7 @@ const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
         return { query: '', docs: [] };
       }
 
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         language: 'en',
         engines: ['reddit'],
       });
diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts
index 7b8e877..3748176 100644
--- a/src/agents/webSearchAgent.ts
+++ b/src/agents/webSearchAgent.ts
@@ -11,7 +11,7 @@ import {
 } from '@langchain/core/runnables';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { StreamEvent } from '@langchain/core/tracers/log_stream';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { Embeddings } from '@langchain/core/embeddings';
@@ -111,7 +111,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
         return { query: '', docs: [] };
       }
 
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         language: 'en',
       });
 
diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts
index cdcd222..92d6bbd 100644
--- a/src/agents/wolframAlphaSearchAgent.ts
+++ b/src/agents/wolframAlphaSearchAgent.ts
@@ -11,7 +11,7 @@ import {
 } from '@langchain/core/runnables';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { StreamEvent } from '@langchain/core/tracers/log_stream';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { Embeddings } from '@langchain/core/embeddings';
@@ -110,7 +110,7 @@ const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
         return { query: '', docs: [] };
       }
 
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         language: 'en',
         engines: ['wolframalpha'],
       });
diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts
index 31ba660..de761ac 100644
--- a/src/agents/youtubeSearchAgent.ts
+++ b/src/agents/youtubeSearchAgent.ts
@@ -11,7 +11,7 @@ import {
 } from '@langchain/core/runnables';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
+import { search } from '../lib/search';
 import type { StreamEvent } from '@langchain/core/tracers/log_stream';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { Embeddings } from '@langchain/core/embeddings';
@@ -111,7 +111,7 @@ const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
         return { query: '', docs: [] };
       }
 
-      const res = await searchSearxng(input, {
+      const res = await search(input, {
         language: 'en',
         engines: ['youtube'],
       });
diff --git a/src/config.ts b/src/config.ts
index f373847..8331b62 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -13,6 +13,7 @@ interface Config {
   };
   API_KEYS: {
     OPENAI: string;
+    SERPER: string;
   };
   API_ENDPOINTS: {
     SEARXNG: string;
@@ -41,6 +42,8 @@ export const getChatModel = () => loadConfig().GENERAL.CHAT_MODEL;
 
 export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI;
 
+export const getSerperApiKey = () => loadConfig().API_KEYS.SERPER;
+
 export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG;
 
 export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA;
diff --git a/src/lib/search.ts b/src/lib/search.ts
new file mode 100644
index 0000000..29f5e79
--- /dev/null
+++ b/src/lib/search.ts
@@ -0,0 +1,40 @@
+import {searchSerper} from "./serper";
+import {searchSearxng} from "./searxng";
+import {getSerperApiKey} from "../config";
+
+export interface SearchOptions {
+    categories?: string[];
+    engines?: string[];
+    language?: string;
+    country?: string;
+    pageno?: number;
+}
+
+export interface SearchResult {
+    title: string;
+    url: string;
+    img_src?: string;
+    thumbnail_src?: string;
+    content?: string;
+    author?: string;
+}
+
+export const search = async (
+    query: string,
+    opts?: SearchOptions,
+) => {
+
+    const hasSerperKey = !!getSerperApiKey();
+    if (hasSerperKey) {
+        let engine = opts?.engines?.[0] ?? 'search';
+        if (engine.match(/search|videos|images|news|scholar'/)) {
+            engine = engine.replaceAll(/google|bing/g, '').trim();
+            return searchSerper(query, {
+                ...opts,
+                engines: [engine],
+            });
+        }
+    }
+
+    return searchSearxng(query, opts);
+};
diff --git a/src/lib/searxng.ts b/src/lib/searxng.ts
index 297e50f..2c5de45 100644
--- a/src/lib/searxng.ts
+++ b/src/lib/searxng.ts
@@ -1,25 +1,10 @@
 import axios from 'axios';
 import { getSearxngApiEndpoint } from '../config';
-
-interface SearxngSearchOptions {
-  categories?: string[];
-  engines?: string[];
-  language?: string;
-  pageno?: number;
-}
-
-interface SearxngSearchResult {
-  title: string;
-  url: string;
-  img_src?: string;
-  thumbnail_src?: string;
-  content?: string;
-  author?: string;
-}
+import {SearchOptions, SearchResult} from "./search";
 
 export const searchSearxng = async (
   query: string,
-  opts?: SearxngSearchOptions,
+  opts?: SearchOptions,
 ) => {
   const searxngURL = getSearxngApiEndpoint();
 
@@ -38,7 +23,7 @@ export const searchSearxng = async (
 
   const res = await axios.get(url.toString());
 
-  const results: SearxngSearchResult[] = res.data.results;
+  const results: SearchResult[] = res.data.results;
   const suggestions: string[] = res.data.suggestions;
 
   return { results, suggestions };
diff --git a/src/lib/serper.ts b/src/lib/serper.ts
new file mode 100644
index 0000000..ed660af
--- /dev/null
+++ b/src/lib/serper.ts
@@ -0,0 +1,76 @@
+import axios from 'axios';
+import {SearchOptions, SearchResult} from "./search";
+import {getSerperApiKey} from "../config";
+
+interface SerperParams {
+    q: string;
+    gl?: string;
+    hl?: string;
+    num?: number;
+    page?: number;
+    type?: 'search' | 'images' | 'news' | 'videos' | 'scholar'
+}
+
+export const searchSerper = async (
+    query: string,
+    opts: SearchOptions = {},
+) => {
+    const serperURL = 'https://google.serper.dev'
+
+    let type = opts.engines?.[0] ?? 'search';
+    const url = `${serperURL}/${type}`;
+
+    const params: SerperParams = {
+        q: query,
+        hl: opts.language ?? 'en',
+        gl: opts.country ?? 'us',
+        page: opts.pageno ?? 1,
+    }
+    const res = await axios.post(url, params, {
+        headers: {
+            'X-API-KEY': getSerperApiKey(),
+        }
+    });
+
+    const data = res.data;
+    let results: SearchResult[] = [];
+    const kg = data.knowledgeGraph;
+    if (kg) {
+        let content: string[] = [];
+        kg.type && content.push(kg.type);
+        kg.description && content.push(kg.description);
+        kg.attributes && Object.entries(kg.attributes).forEach(([k, v]) => content.push(`${k}: ${v}`))
+        results.push({
+            title: kg.title,
+            url: kg.descriptionLink || kg.website,
+            content: content.join('\n'),
+            img_src: kg.imageUrl,
+        });
+    }
+
+    const answerBox = data.answerBox;
+    if (answerBox) {
+        results.push({
+            title: answerBox.title,
+            url: answerBox.link,
+            content: answerBox.answer || answerBox.snippet,
+        });
+    }
+
+    for (const key of ['organic', 'images', 'news', 'videos']) {
+        if (!data[key]) continue;
+        results.push(...data[key].map((r) => ({
+            title: r.title,
+            url: r.link,
+            content: r.snippet,
+            img_src: r.imageUrl,
+            thumbnail_src: r.thumbnailUrl,
+        })));
+    }
+
+    results = results.filter(r=>!!r.url)
+
+    const suggestions: string[] = res.data.relatedSearches?.map(s => s.query) || [];
+
+    return {results, suggestions};
+};