feat(app): add file uploads

2024-11-23 15:04:19 +05:30 · 2024-11-23 15:04:19 +05:30 · 4b89008f3a
commit 4b89008f3a
parent c650d1c3d9
25 changed files with 1035 additions and 86 deletions
--- a/src/lib/linkDocument.ts
+++ b/src/lib/linkDocument.ts
@ -1,99 +0,0 @@
-import axios from 'axios';
-import { htmlToText } from 'html-to-text';
-import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
-import { Document } from '@langchain/core/documents';
-import pdfParse from 'pdf-parse';
-import logger from '../utils/logger';
-
-export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
-  const splitter = new RecursiveCharacterTextSplitter();
-
-  let docs: Document[] = [];
-
-  await Promise.all(
-    links.map(async (link) => {
-      link =
-        link.startsWith('http://') || link.startsWith('https://')
-          ? link
-          : `https://${link}`;
-
-      try {
-        const res = await axios.get(link, {
-          responseType: 'arraybuffer',
-        });
-
-        const isPdf = res.headers['content-type'] === 'application/pdf';
-
-        if (isPdf) {
-          const pdfText = await pdfParse(res.data);
-          const parsedText = pdfText.text
-            .replace(/(\r\n|\n|\r)/gm, ' ')
-            .replace(/\s+/g, ' ')
-            .trim();
-
-          const splittedText = await splitter.splitText(parsedText);
-          const title = 'PDF Document';
-
-          const linkDocs = splittedText.map((text) => {
-            return new Document({
-              pageContent: text,
-              metadata: {
-                title: title,
-                url: link,
-              },
-            });
-          });
-
-          docs.push(...linkDocs);
-          return;
-        }
-
-        const parsedText = htmlToText(res.data.toString('utf8'), {
-          selectors: [
-            {
-              selector: 'a',
-              options: {
-                ignoreHref: true,
-              },
-            },
-          ],
-        })
-          .replace(/(\r\n|\n|\r)/gm, ' ')
-          .replace(/\s+/g, ' ')
-          .trim();
-
-        const splittedText = await splitter.splitText(parsedText);
-        const title = res.data
-          .toString('utf8')
-          .match(/<title>(.*?)<\/title>/)?.[1];
-
-        const linkDocs = splittedText.map((text) => {
-          return new Document({
-            pageContent: text,
-            metadata: {
-              title: title || link,
-              url: link,
-            },
-          });
-        });
-
-        docs.push(...linkDocs);
-      } catch (err) {
-        logger.error(
-          `Error at generating documents from links: ${err.message}`,
-        );
-        docs.push(
-          new Document({
-            pageContent: `Failed to retrieve content from the link: ${err.message}`,
-            metadata: {
-              title: 'Failed to retrieve content',
-              url: link,
-            },
-          }),
-        );
-      }
-    }),
-  );
-
-  return docs;
-};
--- a/src/lib/providers/ollama.ts
+++ b/src/lib/providers/ollama.ts
@ -6,7 +6,7 @@ import { ChatOllama } from '@langchain/community/chat_models/ollama';
 export const loadOllamaChatModels = async () => {
  const ollamaEndpoint = getOllamaApiEndpoint();
  const keepAlive = getKeepAlive();
-  
+
  if (!ollamaEndpoint) return {};

  try {
@ -25,7 +25,7 @@ export const loadOllamaChatModels = async () => {
          baseUrl: ollamaEndpoint,
          model: model.model,
          temperature: 0.7,
-          keepAlive: keepAlive
+          keepAlive: keepAlive,
        }),
      };