feat(config): Use toml instead of env
This commit is contained in:
parent
dd1ce4e324
commit
c6a5790d33
26 changed files with 799 additions and 596 deletions
|
@ -9,24 +9,16 @@ import {
|
|||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
modelName: 'text-embedding-3-large',
|
||||
});
|
||||
|
||||
const basicAcademicSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
|
@ -49,7 +41,7 @@ Rephrased question:
|
|||
`;
|
||||
|
||||
const basicAcademicSearchResponsePrompt = `
|
||||
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Acadedemic', this means you will be searching for academic papers and articles on the web.
|
||||
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Academic', this means you will be searching for academic papers and articles on the web.
|
||||
|
||||
Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
|
||||
You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
|
||||
|
@ -104,122 +96,140 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
type BasicChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const basicAcademicSearchRetrieverChain = RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: [
|
||||
'arxiv',
|
||||
'google_scholar',
|
||||
'internet_archive_scholar',
|
||||
'pubmed',
|
||||
],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const createBasicAcademicSearchAnsweringChain = (
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const basicAcademicSearchRetrieverChain =
|
||||
createBasicAcademicSearchRetrieverChain(llm);
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: [
|
||||
'arxiv',
|
||||
'google_scholar',
|
||||
'internet_archive_scholar',
|
||||
'pubmed',
|
||||
],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const basicAcademicSearchAnsweringChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicAcademicSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicAcademicSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicAcademicSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicAcademicSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const basicAcademicSearch = (query: string, history: BaseMessage[]) => {
|
||||
const basicAcademicSearch = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const basicAcademicSearchAnsweringChain =
|
||||
createBasicAcademicSearchAnsweringChain(llm, embeddings);
|
||||
|
||||
const stream = basicAcademicSearchAnsweringChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
@ -242,8 +252,13 @@ const basicAcademicSearch = (query: string, history: BaseMessage[]) => {
|
|||
return emitter;
|
||||
};
|
||||
|
||||
const handleAcademicSearch = (message: string, history: BaseMessage[]) => {
|
||||
const emitter = basicAcademicSearch(message, history);
|
||||
const handleAcademicSearch = (
|
||||
message: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = basicAcademicSearch(message, history, llm, embeddings);
|
||||
return emitter;
|
||||
};
|
||||
|
||||
|
|
|
@ -4,16 +4,11 @@ import {
|
|||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
|
||||
const imageSearchChainPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
|
||||
|
@ -43,38 +38,48 @@ type ImageSearchChainInput = {
|
|||
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const imageSearchChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
chat_history: (input: ImageSearchChainInput) => {
|
||||
return formatChatHistoryAsString(input.chat_history);
|
||||
},
|
||||
query: (input: ImageSearchChainInput) => {
|
||||
return input.query;
|
||||
},
|
||||
}),
|
||||
PromptTemplate.fromTemplate(imageSearchChainPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const res = await searchSearxng(input, {
|
||||
categories: ['images'],
|
||||
engines: ['bing_images', 'google_images'],
|
||||
});
|
||||
const createImageSearchChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
chat_history: (input: ImageSearchChainInput) => {
|
||||
return formatChatHistoryAsString(input.chat_history);
|
||||
},
|
||||
query: (input: ImageSearchChainInput) => {
|
||||
return input.query;
|
||||
},
|
||||
}),
|
||||
PromptTemplate.fromTemplate(imageSearchChainPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const res = await searchSearxng(input, {
|
||||
categories: ['images'],
|
||||
engines: ['bing_images', 'google_images'],
|
||||
});
|
||||
|
||||
const images = [];
|
||||
const images = [];
|
||||
|
||||
res.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
images.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
});
|
||||
}
|
||||
});
|
||||
res.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
images.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return images.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
return images.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
export default imageSearchChain;
|
||||
const handleImageSearch = (
|
||||
input: ImageSearchChainInput,
|
||||
llm: BaseChatModel,
|
||||
) => {
|
||||
const imageSearchChain = createImageSearchChain(llm);
|
||||
return imageSearchChain.invoke(input);
|
||||
};
|
||||
|
||||
export default handleImageSearch;
|
||||
|
|
|
@ -9,24 +9,16 @@ import {
|
|||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
modelName: 'text-embedding-3-large',
|
||||
});
|
||||
|
||||
const basicRedditSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
|
@ -104,118 +96,135 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.filter((sim) => sim.similarity > 0.3)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
type BasicChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const basicRedditSearchRetrieverChain = RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['reddit'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content ? result.content : result.title,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const createBasicRedditSearchAnsweringChain = (
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const basicRedditSearchRetrieverChain =
|
||||
createBasicRedditSearchRetrieverChain(llm);
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['reddit'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content ? result.content : result.title,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const basicRedditSearchAnsweringChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicRedditSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.filter((sim) => sim.similarity > 0.3)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicRedditSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicRedditSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicRedditSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const basicRedditSearch = (query: string, history: BaseMessage[]) => {
|
||||
const basicRedditSearch = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const basicRedditSearchAnsweringChain =
|
||||
createBasicRedditSearchAnsweringChain(llm, embeddings);
|
||||
const stream = basicRedditSearchAnsweringChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
@ -238,8 +247,13 @@ const basicRedditSearch = (query: string, history: BaseMessage[]) => {
|
|||
return emitter;
|
||||
};
|
||||
|
||||
const handleRedditSearch = (message: string, history: BaseMessage[]) => {
|
||||
const emitter = basicRedditSearch(message, history);
|
||||
const handleRedditSearch = (
|
||||
message: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = basicRedditSearch(message, history, llm, embeddings);
|
||||
return emitter;
|
||||
};
|
||||
|
||||
|
|
|
@ -9,24 +9,16 @@ import {
|
|||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
modelName: 'text-embedding-3-large',
|
||||
});
|
||||
|
||||
const basicSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
|
@ -104,117 +96,136 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.filter((sim) => sim.similarity > 0.5)
|
||||
.slice(0, 15)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
type BasicChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const basicWebSearchRetrieverChain = RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const createBasicWebSearchAnsweringChain = (
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm);
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const basicWebSearchAnsweringChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicWebSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.filter((sim) => sim.similarity > 0.5)
|
||||
.slice(0, 15)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicWebSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicWebSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicWebSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const basicWebSearch = (query: string, history: BaseMessage[]) => {
|
||||
const basicWebSearch = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain(
|
||||
llm,
|
||||
embeddings,
|
||||
);
|
||||
|
||||
const stream = basicWebSearchAnsweringChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
@ -237,8 +248,13 @@ const basicWebSearch = (query: string, history: BaseMessage[]) => {
|
|||
return emitter;
|
||||
};
|
||||
|
||||
const handleWebSearch = (message: string, history: BaseMessage[]) => {
|
||||
const emitter = basicWebSearch(message, history);
|
||||
const handleWebSearch = (
|
||||
message: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = basicWebSearch(message, history, llm, embeddings);
|
||||
return emitter;
|
||||
};
|
||||
|
||||
|
|
|
@ -9,19 +9,15 @@ import {
|
|||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { ChatOpenAI, OpenAI } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
const basicWolframAlphaSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
|
@ -99,81 +95,94 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
type BasicChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const basicWolframAlphaSearchRetrieverChain = RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['wolframalpha'],
|
||||
});
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['wolframalpha'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
|
||||
const basicWolframAlphaSearchRetrieverChain =
|
||||
createBasicWolframAlphaSearchRetrieverChain(llm);
|
||||
|
||||
const processDocs = (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
|
||||
const basicWolframAlphaSearchAnsweringChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicWolframAlphaSearchRetrieverChain
|
||||
.pipe(({ query, docs }) => {
|
||||
return docs;
|
||||
})
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
basicWolframAlphaSearchRetrieverChain
|
||||
.pipe(({ query, docs }) => {
|
||||
return docs;
|
||||
})
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicWolframAlphaSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicWolframAlphaSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const basicWolframAlphaSearch = (query: string, history: BaseMessage[]) => {
|
||||
const basicWolframAlphaSearch = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const basicWolframAlphaSearchAnsweringChain =
|
||||
createBasicWolframAlphaSearchAnsweringChain(llm);
|
||||
const stream = basicWolframAlphaSearchAnsweringChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
@ -196,8 +205,13 @@ const basicWolframAlphaSearch = (query: string, history: BaseMessage[]) => {
|
|||
return emitter;
|
||||
};
|
||||
|
||||
const handleWolframAlphaSearch = (message: string, history: BaseMessage[]) => {
|
||||
const emitter = basicWolframAlphaSearch(message, history);
|
||||
const handleWolframAlphaSearch = (
|
||||
message: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = basicWolframAlphaSearch(message, history, llm);
|
||||
return emitter;
|
||||
};
|
||||
|
||||
|
|
|
@ -4,15 +4,11 @@ import {
|
|||
MessagesPlaceholder,
|
||||
} from '@langchain/core/prompts';
|
||||
import { RunnableSequence } from '@langchain/core/runnables';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import eventEmitter from 'events';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
|
||||
const writingAssistantPrompt = `
|
||||
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
|
||||
|
@ -44,22 +40,30 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const writingAssistantChain = RunnableSequence.from([
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', writingAssistantPrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
const createWritingAssistantChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', writingAssistantPrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const handleWritingAssistant = (query: string, history: BaseMessage[]) => {
|
||||
const handleWritingAssistant = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const writingAssistantChain = createWritingAssistantChain(llm);
|
||||
const stream = writingAssistantChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
|
|
@ -9,24 +9,16 @@ import {
|
|||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { searchSearxng } from '../core/searxng';
|
||||
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
|
||||
const llm = new ChatOpenAI({
|
||||
modelName: process.env.MODEL_NAME,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
modelName: 'text-embedding-3-large',
|
||||
});
|
||||
|
||||
const basicYoutubeSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
|
@ -104,118 +96,136 @@ const handleStream = async (
|
|||
}
|
||||
};
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.filter((sim) => sim.similarity > 0.3)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
type BasicChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const basicYoutubeSearchRetrieverChain = RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
if (input === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['youtube'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content ? result.content : result.title,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const createBasicYoutubeSearchAnsweringChain = (
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const basicYoutubeSearchRetrieverChain =
|
||||
createBasicYoutubeSearchRetrieverChain(llm);
|
||||
|
||||
const processDocs = async (docs: Document[]) => {
|
||||
return docs
|
||||
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const rerankDocs = async ({
|
||||
query,
|
||||
docs,
|
||||
}: {
|
||||
query: string;
|
||||
docs: Document[];
|
||||
}) => {
|
||||
if (docs.length === 0) {
|
||||
return docs;
|
||||
}
|
||||
|
||||
const res = await searchSearxng(input, {
|
||||
language: 'en',
|
||||
engines: ['youtube'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent: result.content ? result.content : result.title,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
const docsWithContent = docs.filter(
|
||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||
);
|
||||
|
||||
return { query: input, docs: documents };
|
||||
}),
|
||||
]);
|
||||
const docEmbeddings = await embeddings.embedDocuments(
|
||||
docsWithContent.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const basicYoutubeSearchAnsweringChain = RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicYoutubeSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
const queryEmbedding = await embeddings.embedQuery(query);
|
||||
|
||||
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||
const sim = computeSimilarity(queryEmbedding, docEmbedding);
|
||||
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedDocs = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 15)
|
||||
.filter((sim) => sim.similarity > 0.3)
|
||||
.map((sim) => docsWithContent[sim.index]);
|
||||
|
||||
return sortedDocs;
|
||||
};
|
||||
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: BasicChainInput) => input.query,
|
||||
chat_history: (input: BasicChainInput) => input.chat_history,
|
||||
context: RunnableSequence.from([
|
||||
(input) => ({
|
||||
query: input.query,
|
||||
chat_history: formatChatHistoryAsString(input.chat_history),
|
||||
}),
|
||||
basicYoutubeSearchRetrieverChain
|
||||
.pipe(rerankDocs)
|
||||
.withConfig({
|
||||
runName: 'FinalSourceRetriever',
|
||||
})
|
||||
.pipe(processDocs),
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicYoutubeSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
}),
|
||||
ChatPromptTemplate.fromMessages([
|
||||
['system', basicYoutubeSearchResponsePrompt],
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
['user', '{query}'],
|
||||
]),
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
llm,
|
||||
strParser,
|
||||
]).withConfig({
|
||||
runName: 'FinalResponseGenerator',
|
||||
});
|
||||
};
|
||||
|
||||
const basicYoutubeSearch = (query: string, history: BaseMessage[]) => {
|
||||
const basicYoutubeSearch = (
|
||||
query: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = new eventEmitter();
|
||||
|
||||
try {
|
||||
const basicYoutubeSearchAnsweringChain =
|
||||
createBasicYoutubeSearchAnsweringChain(llm, embeddings);
|
||||
|
||||
const stream = basicYoutubeSearchAnsweringChain.streamEvents(
|
||||
{
|
||||
chat_history: history,
|
||||
|
@ -238,8 +248,13 @@ const basicYoutubeSearch = (query: string, history: BaseMessage[]) => {
|
|||
return emitter;
|
||||
};
|
||||
|
||||
const handleYoutubeSearch = (message: string, history: BaseMessage[]) => {
|
||||
const emitter = basicYoutubeSearch(message, history);
|
||||
const handleYoutubeSearch = (
|
||||
message: string,
|
||||
history: BaseMessage[],
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
const emitter = basicYoutubeSearch(message, history, llm, embeddings);
|
||||
return emitter;
|
||||
};
|
||||
|
||||
|
|
|
@ -3,6 +3,9 @@ import express from 'express';
|
|||
import cors from 'cors';
|
||||
import http from 'http';
|
||||
import routes from './routes';
|
||||
import { getPort } from './config';
|
||||
|
||||
const port = getPort();
|
||||
|
||||
const app = express();
|
||||
const server = http.createServer(app);
|
||||
|
@ -19,8 +22,8 @@ app.get('/api', (_, res) => {
|
|||
res.status(200).json({ status: 'ok' });
|
||||
});
|
||||
|
||||
server.listen(process.env.PORT!, () => {
|
||||
console.log(`API server started on port ${process.env.PORT}`);
|
||||
server.listen(port, () => {
|
||||
console.log(`API server started on port ${port}`);
|
||||
});
|
||||
|
||||
startWebSocketServer(server);
|
||||
|
|
32
src/config.ts
Normal file
32
src/config.ts
Normal file
|
@ -0,0 +1,32 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import toml, { JsonMap } from '@iarna/toml';
|
||||
|
||||
const configFileName = 'config.toml';
|
||||
|
||||
interface Config {
|
||||
GENERAL: {
|
||||
PORT: number;
|
||||
SIMILARITY_MEASURE: string;
|
||||
};
|
||||
API_KEYS: {
|
||||
OPENAI: string;
|
||||
};
|
||||
API_ENDPOINTS: {
|
||||
SEARXNG: string;
|
||||
};
|
||||
}
|
||||
|
||||
const loadConfig = () =>
|
||||
toml.parse(
|
||||
fs.readFileSync(path.join(process.cwd(), `${configFileName}`), 'utf-8'),
|
||||
) as any as Config;
|
||||
|
||||
export const getPort = () => loadConfig().GENERAL.PORT;
|
||||
|
||||
export const getSimilarityMeasure = () =>
|
||||
loadConfig().GENERAL.SIMILARITY_MEASURE;
|
||||
|
||||
export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI;
|
||||
|
||||
export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG;
|
|
@ -1,4 +1,5 @@
|
|||
import axios from 'axios';
|
||||
import { getSearxngApiEndpoint } from '../config';
|
||||
|
||||
interface SearxngSearchOptions {
|
||||
categories?: string[];
|
||||
|
@ -20,7 +21,9 @@ export const searchSearxng = async (
|
|||
query: string,
|
||||
opts?: SearxngSearchOptions,
|
||||
) => {
|
||||
const url = new URL(`${process.env.SEARXNG_API_URL}/search?format=json`);
|
||||
const searxngURL = getSearxngApiEndpoint();
|
||||
|
||||
const url = new URL(`${searxngURL}/search?format=json`);
|
||||
url.searchParams.append('q', query);
|
||||
|
||||
if (opts) {
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import express from 'express';
|
||||
import imageSearchChain from '../agents/imageSearchAgent';
|
||||
import handleImageSearch from '../agents/imageSearchAgent';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { getOpenaiApiKey } from '../config';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
|
@ -7,11 +9,13 @@ router.post('/', async (req, res) => {
|
|||
try {
|
||||
const { query, chat_history } = req.body;
|
||||
|
||||
const images = await imageSearchChain.invoke({
|
||||
query,
|
||||
chat_history,
|
||||
const llm = new ChatOpenAI({
|
||||
temperature: 0.7,
|
||||
openAIApiKey: getOpenaiApiKey(),
|
||||
});
|
||||
|
||||
const images = await handleImageSearch({ query, chat_history }, llm);
|
||||
|
||||
res.status(200).json({ images });
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: 'An error has occurred.' });
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
import dot from 'compute-dot';
|
||||
import cosineSimilarity from 'compute-cosine-similarity';
|
||||
import { getSimilarityMeasure } from '../config';
|
||||
|
||||
const computeSimilarity = (x: number[], y: number[]): number => {
|
||||
if (process.env.SIMILARITY_MEASURE === 'cosine') {
|
||||
const similarityMeasure = getSimilarityMeasure();
|
||||
|
||||
if (similarityMeasure === 'cosine') {
|
||||
return cosineSimilarity(x, y);
|
||||
} else if (process.env.SIMILARITY_MEASURE === 'dot') {
|
||||
} else if (similarityMeasure === 'dot') {
|
||||
return dot(x, y);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,23 @@
|
|||
import { WebSocket } from 'ws';
|
||||
import { handleMessage } from './messageHandler';
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { getOpenaiApiKey } from '../config';
|
||||
|
||||
export const handleConnection = (ws: WebSocket) => {
|
||||
const llm = new ChatOpenAI({
|
||||
temperature: 0.7,
|
||||
openAIApiKey: getOpenaiApiKey(),
|
||||
});
|
||||
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
openAIApiKey: getOpenaiApiKey(),
|
||||
modelName: 'text-embedding-3-large',
|
||||
});
|
||||
|
||||
ws.on(
|
||||
'message',
|
||||
async (message) => await handleMessage(message.toString(), ws),
|
||||
async (message) =>
|
||||
await handleMessage(message.toString(), ws, llm, embeddings),
|
||||
);
|
||||
|
||||
ws.on('close', () => console.log('Connection closed'));
|
||||
|
|
|
@ -6,6 +6,8 @@ import handleWritingAssistant from '../agents/writingAssistant';
|
|||
import handleWolframAlphaSearch from '../agents/wolframAlphaSearchAgent';
|
||||
import handleYoutubeSearch from '../agents/youtubeSearchAgent';
|
||||
import handleRedditSearch from '../agents/redditSearchAgent';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
|
||||
type Message = {
|
||||
type: string;
|
||||
|
@ -58,7 +60,12 @@ const handleEmitterEvents = (
|
|||
});
|
||||
};
|
||||
|
||||
export const handleMessage = async (message: string, ws: WebSocket) => {
|
||||
export const handleMessage = async (
|
||||
message: string,
|
||||
ws: WebSocket,
|
||||
llm: BaseChatModel,
|
||||
embeddings: Embeddings,
|
||||
) => {
|
||||
try {
|
||||
const parsedMessage = JSON.parse(message) as Message;
|
||||
const id = Math.random().toString(36).substring(7);
|
||||
|
@ -83,7 +90,12 @@ export const handleMessage = async (message: string, ws: WebSocket) => {
|
|||
if (parsedMessage.type === 'message') {
|
||||
const handler = searchHandlers[parsedMessage.focusMode];
|
||||
if (handler) {
|
||||
const emitter = handler(parsedMessage.content, history);
|
||||
const emitter = handler(
|
||||
parsedMessage.content,
|
||||
history,
|
||||
llm,
|
||||
embeddings,
|
||||
);
|
||||
handleEmitterEvents(emitter, ws, id);
|
||||
} else {
|
||||
ws.send(JSON.stringify({ type: 'error', data: 'Invalid focus mode' }));
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
import { WebSocketServer } from 'ws';
|
||||
import { handleConnection } from './connectionManager';
|
||||
import http from 'http';
|
||||
import { getPort } from '../config';
|
||||
|
||||
export const initServer = (
|
||||
server: http.Server<typeof http.IncomingMessage, typeof http.ServerResponse>,
|
||||
) => {
|
||||
const port = getPort();
|
||||
const wss = new WebSocketServer({ server });
|
||||
|
||||
wss.on('connection', (ws) => {
|
||||
handleConnection(ws);
|
||||
});
|
||||
|
||||
console.log(`WebSocket server started on port ${process.env.PORT}`);
|
||||
console.log(`WebSocket server started on port ${port}`);
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue