feat(config): Use toml instead of env

This commit is contained in:
ItzCrazyKns 2024-04-20 09:32:19 +05:30
parent dd1ce4e324
commit c6a5790d33
No known key found for this signature in database
GPG key ID: 8162927C7CCE3065
26 changed files with 799 additions and 596 deletions

View file

@ -9,24 +9,16 @@ import {
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../core/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
const llm = new ChatOpenAI({
modelName: process.env.MODEL_NAME,
temperature: 0.7,
});
const embeddings = new OpenAIEmbeddings({
modelName: 'text-embedding-3-large',
});
const basicRedditSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
@ -104,118 +96,135 @@ const handleStream = async (
}
};
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
if (docs.length === 0) {
return docs;
}
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
const docEmbeddings = await embeddings.embedDocuments(
docsWithContent.map((doc) => doc.pageContent),
);
const queryEmbedding = await embeddings.embedQuery(query);
const similarity = docEmbeddings.map((docEmbedding, i) => {
const sim = computeSimilarity(queryEmbedding, docEmbedding);
return {
index: i,
similarity: sim,
};
});
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 15)
.filter((sim) => sim.similarity > 0.3)
.map((sim) => docsWithContent[sim.index]);
return sortedDocs;
};
type BasicChainInput = {
chat_history: BaseMessage[];
query: string;
};
const basicRedditSearchRetrieverChain = RunnableSequence.from([
PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
return RunnableSequence.from([
PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
engines: ['reddit'],
});
const documents = res.results.map(
(result) =>
new Document({
pageContent: result.content ? result.content : result.title,
metadata: {
title: result.title,
url: result.url,
...(result.img_src && { img_src: result.img_src }),
},
}),
);
return { query: input, docs: documents };
}),
]);
};
const createBasicRedditSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicRedditSearchRetrieverChain =
createBasicRedditSearchRetrieverChain(llm);
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
if (docs.length === 0) {
return docs;
}
const res = await searchSearxng(input, {
language: 'en',
engines: ['reddit'],
});
const documents = res.results.map(
(result) =>
new Document({
pageContent: result.content ? result.content : result.title,
metadata: {
title: result.title,
url: result.url,
...(result.img_src && { img_src: result.img_src }),
},
}),
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
return { query: input, docs: documents };
}),
]);
const docEmbeddings = await embeddings.embedDocuments(
docsWithContent.map((doc) => doc.pageContent),
);
const basicRedditSearchAnsweringChain = RunnableSequence.from([
RunnableMap.from({
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicRedditSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
})
.pipe(processDocs),
const queryEmbedding = await embeddings.embedQuery(query);
const similarity = docEmbeddings.map((docEmbedding, i) => {
const sim = computeSimilarity(queryEmbedding, docEmbedding);
return {
index: i,
similarity: sim,
};
});
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 15)
.filter((sim) => sim.similarity > 0.3)
.map((sim) => docsWithContent[sim.index]);
return sortedDocs;
};
return RunnableSequence.from([
RunnableMap.from({
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicRedditSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicRedditSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicRedditSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
});
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
});
};
const basicRedditSearch = (query: string, history: BaseMessage[]) => {
const basicRedditSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const emitter = new eventEmitter();
try {
const basicRedditSearchAnsweringChain =
createBasicRedditSearchAnsweringChain(llm, embeddings);
const stream = basicRedditSearchAnsweringChain.streamEvents(
{
chat_history: history,
@ -238,8 +247,13 @@ const basicRedditSearch = (query: string, history: BaseMessage[]) => {
return emitter;
};
const handleRedditSearch = (message: string, history: BaseMessage[]) => {
const emitter = basicRedditSearch(message, history);
const handleRedditSearch = (
message: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const emitter = basicRedditSearch(message, history, llm, embeddings);
return emitter;
};