From 4cd0778094446975e61501a4b929095556c711e3 Mon Sep 17 00:00:00 2001 From: Roger M Date: Mon, 29 Apr 2024 19:44:53 +0100 Subject: [PATCH] Add Serper API as an alternative search provider option for faster speeds and no rate limitations from Google --- README.md | 4 +- sample.config.toml | 1 + src/agents/academicSearchAgent.ts | 4 +- src/agents/imageSearchAgent.ts | 4 +- src/agents/redditSearchAgent.ts | 4 +- src/agents/webSearchAgent.ts | 4 +- src/agents/wolframAlphaSearchAgent.ts | 4 +- src/agents/youtubeSearchAgent.ts | 4 +- src/config.ts | 3 ++ src/lib/search.ts | 40 ++++++++++++++ src/lib/searxng.ts | 21 ++------ src/lib/serper.ts | 76 +++++++++++++++++++++++++++ 12 files changed, 137 insertions(+), 32 deletions(-) create mode 100644 src/lib/search.ts create mode 100644 src/lib/serper.ts diff --git a/README.md b/README.md index 8a3f923..1d285a7 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,8 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. - `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**. - `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**. - **Note**: You can change these and use different models after running Perplexica as well from the settings page. - + **Note**: You can change these and use different models after running Perplexica as well from the settings page. + - `SERPER`: The Serper API key, if you prefer not to run SearxNG and want faster search speeds, as well as avoiding rate limitations by Google. You can create a free key allowing up to 2,500 queries [here](https://serper.dev/). - `SIMILARITY_MEASURE`: The similarity measure to use (This is filled by default; you can leave it as is if you are unsure about it.) 5. Ensure you are in the directory containing the `docker-compose.yaml` file and execute: diff --git a/sample.config.toml b/sample.config.toml index 2d09b4b..fe43831 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -6,6 +6,7 @@ CHAT_MODEL = "gpt-3.5-turbo" # Name of the model to use [API_KEYS] OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef +SERPER = "" # Serper API key - https://serper.dev [API_ENDPOINTS] SEARXNG = "http://localhost:32768" # SearxNG API URL diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts index d08f282..e424ff1 100644 --- a/src/agents/academicSearchAgent.ts +++ b/src/agents/academicSearchAgent.ts @@ -11,7 +11,7 @@ import { } from '@langchain/core/runnables'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { StreamEvent } from '@langchain/core/tracers/log_stream'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; @@ -111,7 +111,7 @@ const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { + const res = await search(input, { language: 'en', engines: [ 'arxiv', diff --git a/src/agents/imageSearchAgent.ts b/src/agents/imageSearchAgent.ts index 167019f..62eeaa4 100644 --- a/src/agents/imageSearchAgent.ts +++ b/src/agents/imageSearchAgent.ts @@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '../utils/formatHistory'; import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const imageSearchChainPrompt = ` @@ -52,7 +52,7 @@ const createImageSearchChain = (llm: BaseChatModel) => { llm, strParser, RunnableLambda.from(async (input: string) => { - const res = await searchSearxng(input, { + const res = await search(input, { engines: ['bing images', 'google images'], }); diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts index 4fee20a..402dd51 100644 --- a/src/agents/redditSearchAgent.ts +++ b/src/agents/redditSearchAgent.ts @@ -11,7 +11,7 @@ import { } from '@langchain/core/runnables'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { StreamEvent } from '@langchain/core/tracers/log_stream'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; @@ -111,7 +111,7 @@ const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { + const res = await search(input, { language: 'en', engines: ['reddit'], }); diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index 7b8e877..3748176 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -11,7 +11,7 @@ import { } from '@langchain/core/runnables'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { StreamEvent } from '@langchain/core/tracers/log_stream'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; @@ -111,7 +111,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { + const res = await search(input, { language: 'en', }); diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts index cdcd222..92d6bbd 100644 --- a/src/agents/wolframAlphaSearchAgent.ts +++ b/src/agents/wolframAlphaSearchAgent.ts @@ -11,7 +11,7 @@ import { } from '@langchain/core/runnables'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { StreamEvent } from '@langchain/core/tracers/log_stream'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; @@ -110,7 +110,7 @@ const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { + const res = await search(input, { language: 'en', engines: ['wolframalpha'], }); diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts index 31ba660..de761ac 100644 --- a/src/agents/youtubeSearchAgent.ts +++ b/src/agents/youtubeSearchAgent.ts @@ -11,7 +11,7 @@ import { } from '@langchain/core/runnables'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; +import { search } from '../lib/search'; import type { StreamEvent } from '@langchain/core/tracers/log_stream'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; @@ -111,7 +111,7 @@ const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { + const res = await search(input, { language: 'en', engines: ['youtube'], }); diff --git a/src/config.ts b/src/config.ts index f373847..8331b62 100644 --- a/src/config.ts +++ b/src/config.ts @@ -13,6 +13,7 @@ interface Config { }; API_KEYS: { OPENAI: string; + SERPER: string; }; API_ENDPOINTS: { SEARXNG: string; @@ -41,6 +42,8 @@ export const getChatModel = () => loadConfig().GENERAL.CHAT_MODEL; export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI; +export const getSerperApiKey = () => loadConfig().API_KEYS.SERPER; + export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG; export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA; diff --git a/src/lib/search.ts b/src/lib/search.ts new file mode 100644 index 0000000..29f5e79 --- /dev/null +++ b/src/lib/search.ts @@ -0,0 +1,40 @@ +import {searchSerper} from "./serper"; +import {searchSearxng} from "./searxng"; +import {getSerperApiKey} from "../config"; + +export interface SearchOptions { + categories?: string[]; + engines?: string[]; + language?: string; + country?: string; + pageno?: number; +} + +export interface SearchResult { + title: string; + url: string; + img_src?: string; + thumbnail_src?: string; + content?: string; + author?: string; +} + +export const search = async ( + query: string, + opts?: SearchOptions, +) => { + + const hasSerperKey = !!getSerperApiKey(); + if (hasSerperKey) { + let engine = opts?.engines?.[0] ?? 'search'; + if (engine.match(/search|videos|images|news|scholar'/)) { + engine = engine.replaceAll(/google|bing/g, '').trim(); + return searchSerper(query, { + ...opts, + engines: [engine], + }); + } + } + + return searchSearxng(query, opts); +}; diff --git a/src/lib/searxng.ts b/src/lib/searxng.ts index 297e50f..2c5de45 100644 --- a/src/lib/searxng.ts +++ b/src/lib/searxng.ts @@ -1,25 +1,10 @@ import axios from 'axios'; import { getSearxngApiEndpoint } from '../config'; - -interface SearxngSearchOptions { - categories?: string[]; - engines?: string[]; - language?: string; - pageno?: number; -} - -interface SearxngSearchResult { - title: string; - url: string; - img_src?: string; - thumbnail_src?: string; - content?: string; - author?: string; -} +import {SearchOptions, SearchResult} from "./search"; export const searchSearxng = async ( query: string, - opts?: SearxngSearchOptions, + opts?: SearchOptions, ) => { const searxngURL = getSearxngApiEndpoint(); @@ -38,7 +23,7 @@ export const searchSearxng = async ( const res = await axios.get(url.toString()); - const results: SearxngSearchResult[] = res.data.results; + const results: SearchResult[] = res.data.results; const suggestions: string[] = res.data.suggestions; return { results, suggestions }; diff --git a/src/lib/serper.ts b/src/lib/serper.ts new file mode 100644 index 0000000..ed660af --- /dev/null +++ b/src/lib/serper.ts @@ -0,0 +1,76 @@ +import axios from 'axios'; +import {SearchOptions, SearchResult} from "./search"; +import {getSerperApiKey} from "../config"; + +interface SerperParams { + q: string; + gl?: string; + hl?: string; + num?: number; + page?: number; + type?: 'search' | 'images' | 'news' | 'videos' | 'scholar' +} + +export const searchSerper = async ( + query: string, + opts: SearchOptions = {}, +) => { + const serperURL = 'https://google.serper.dev' + + let type = opts.engines?.[0] ?? 'search'; + const url = `${serperURL}/${type}`; + + const params: SerperParams = { + q: query, + hl: opts.language ?? 'en', + gl: opts.country ?? 'us', + page: opts.pageno ?? 1, + } + const res = await axios.post(url, params, { + headers: { + 'X-API-KEY': getSerperApiKey(), + } + }); + + const data = res.data; + let results: SearchResult[] = []; + const kg = data.knowledgeGraph; + if (kg) { + let content: string[] = []; + kg.type && content.push(kg.type); + kg.description && content.push(kg.description); + kg.attributes && Object.entries(kg.attributes).forEach(([k, v]) => content.push(`${k}: ${v}`)) + results.push({ + title: kg.title, + url: kg.descriptionLink || kg.website, + content: content.join('\n'), + img_src: kg.imageUrl, + }); + } + + const answerBox = data.answerBox; + if (answerBox) { + results.push({ + title: answerBox.title, + url: answerBox.link, + content: answerBox.answer || answerBox.snippet, + }); + } + + for (const key of ['organic', 'images', 'news', 'videos']) { + if (!data[key]) continue; + results.push(...data[key].map((r) => ({ + title: r.title, + url: r.link, + content: r.snippet, + img_src: r.imageUrl, + thumbnail_src: r.thumbnailUrl, + }))); + } + + results = results.filter(r=>!!r.url) + + const suggestions: string[] = res.data.relatedSearches?.map(s => s.query) || []; + + return {results, suggestions}; +};