diff --git a/.assets/perplexica-screenshot.png b/.assets/perplexica-screenshot.png
index c47a544..fc7a697 100644
Binary files a/.assets/perplexica-screenshot.png and b/.assets/perplexica-screenshot.png differ
diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
new file mode 100644
index 0000000..f658c29
--- /dev/null
+++ b/.github/workflows/docker-build.yaml
@@ -0,0 +1,73 @@
+name: Build & Push Docker Images
+
+on:
+ push:
+ branches:
+ - master
+ release:
+ types: [published]
+
+jobs:
+ build-and-push:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ service: [backend, app]
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v2
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+ with:
+ install: true
+
+ - name: Log in to DockerHub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKER_USERNAME }}
+ password: ${{ secrets.DOCKER_PASSWORD }}
+
+ - name: Extract version from release tag
+ if: github.event_name == 'release'
+ id: version
+ run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
+
+ - name: Build and push Docker image for ${{ matrix.service }}
+ if: github.ref == 'refs/heads/master' && github.event_name == 'push'
+ run: |
+ docker buildx create --use
+ if [[ "${{ matrix.service }}" == "backend" ]]; then \
+ DOCKERFILE=backend.dockerfile; \
+ IMAGE_NAME=perplexica-backend; \
+ else \
+ DOCKERFILE=app.dockerfile; \
+ IMAGE_NAME=perplexica-frontend; \
+ fi
+ docker buildx build --platform linux/amd64,linux/arm64 \
+ --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:main \
+ --cache-to=type=inline \
+ -f $DOCKERFILE \
+ -t itzcrazykns1337/${IMAGE_NAME}:main \
+ --push .
+
+ - name: Build and push release Docker image for ${{ matrix.service }}
+ if: github.event_name == 'release'
+ run: |
+ docker buildx create --use
+ if [[ "${{ matrix.service }}" == "backend" ]]; then \
+ DOCKERFILE=backend.dockerfile; \
+ IMAGE_NAME=perplexica-backend; \
+ else \
+ DOCKERFILE=app.dockerfile; \
+ IMAGE_NAME=perplexica-frontend; \
+ fi
+ docker buildx build --platform linux/amd64,linux/arm64 \
+ --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} \
+ --cache-to=type=inline \
+ -f $DOCKERFILE \
+ -t itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} \
+ --push .
diff --git a/.gitignore b/.gitignore
index d64d5cc..8391d19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ yarn-error.log
# Build output
/.next/
/out/
+/dist/
# IDE/Editor specific
.vscode/
@@ -31,4 +32,8 @@ logs/
# Miscellaneous
.DS_Store
-Thumbs.db
\ No newline at end of file
+Thumbs.db
+
+# Db
+db.sqlite
+/searxng
diff --git a/.prettierignore b/.prettierignore
index c184fdb..55d3c7c 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -35,4 +35,7 @@ coverage
*.swp
# Ignore all files with the .DS_Store extension (macOS specific)
-.DS_Store
\ No newline at end of file
+.DS_Store
+
+# Ignore all files in uploads directory
+uploads
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c779f91..b16eccf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,6 +8,7 @@ Perplexica's design consists of two main domains:
- **Frontend (`ui` directory)**: This is a Next.js application holding all user interface components. It's a self-contained environment that manages everything the user interacts with.
- **Backend (root and `src` directory)**: The backend logic is situated in the `src` folder, but the root directory holds the main `package.json` for backend dependency management.
+ - All of the focus modes are created using the Meta Search Agent class present in `src/search/metaSearchAgent.ts`. The main logic behind Perplexica lies there.
## Setting Up Your Environment
@@ -18,7 +19,8 @@ Before diving into coding, setting up your local environment is key. Here's what
1. In the root directory, locate the `sample.config.toml` file.
2. Rename it to `config.toml` and fill in the necessary configuration fields specific to the backend.
3. Run `npm install` to install dependencies.
-4. Use `npm run dev` to start the backend in development mode.
+4. Run `npm run db:push` to set up the local sqlite.
+5. Use `npm run dev` to start the backend in development mode.
### Frontend
diff --git a/README.md b/README.md
index 0cf197b..cf9e459 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@
# 🚀 Perplexica - An AI-powered search engine 🔎
-
+[](https://discord.gg/26aArMy8tT)
+
+
+
## Table of Contents
@@ -10,8 +13,10 @@
- [Installation](#installation)
- [Getting Started with Docker (Recommended)](#getting-started-with-docker-recommended)
- [Non-Docker Installation](#non-docker-installation)
- - [Ollama connection errors](#ollama-connection-errors)
+ - [Ollama Connection Errors](#ollama-connection-errors)
- [Using as a Search Engine](#using-as-a-search-engine)
+- [Using Perplexica's API](#using-perplexicas-api)
+- [Expose Perplexica to a network](#expose-perplexica-to-network)
- [One-Click Deployment](#one-click-deployment)
- [Upcoming Features](#upcoming-features)
- [Support Us](#support-us)
@@ -45,6 +50,7 @@ Want to know more about its architecture and how it works? You can read it [here
- **Wolfram Alpha Search Mode:** Answers queries that need calculations or data analysis using Wolfram Alpha.
- **Reddit Search Mode:** Searches Reddit for discussions and opinions related to the query.
- **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index. Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevant source out of it, ensuring you always get the latest information without the overhead of daily data updates.
+- **API**: Integrate Perplexica into your existing applications and make use of its capibilities.
It has many more features like image and video search. Some of the planned features are mentioned in [upcoming features](#upcoming-features).
@@ -67,7 +73,8 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker.
- `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**.
- `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**.
- - `GROQ`: Your Groq API key. **You only need to fill this if you wish to use Groq's hosted models**
+ - `GROQ`: Your Groq API key. **You only need to fill this if you wish to use Groq's hosted models**.
+ - `ANTHROPIC`: Your Anthropic API key. **You only need to fill this if you wish to use Anthropic models**.
**Note**: You can change these after starting Perplexica from the settings dialog.
@@ -85,25 +92,35 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker.
### Non-Docker Installation
-1. Clone the repository and rename the `sample.config.toml` file to `config.toml` in the root directory. Ensure you complete all required fields in this file.
-2. Rename the `.env.example` file to `.env` in the `ui` folder and fill in all necessary fields.
-3. After populating the configuration and environment files, run `npm i` in both the `ui` folder and the root directory.
-4. Install the dependencies and then execute `npm run build` in both the `ui` folder and the root directory.
-5. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory.
+1. Install SearXNG and allow `JSON` format in the SearXNG settings.
+2. Clone the repository and rename the `sample.config.toml` file to `config.toml` in the root directory. Ensure you complete all required fields in this file.
+3. Rename the `.env.example` file to `.env` in the `ui` folder and fill in all necessary fields.
+4. After populating the configuration and environment files, run `npm i` in both the `ui` folder and the root directory.
+5. Install the dependencies and then execute `npm run build` in both the `ui` folder and the root directory.
+6. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory.
**Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies.
See the [installation documentation](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/installation) for more information like exposing it your network, etc.
-### Ollama connection errors
+### Ollama Connection Errors
-If you're facing an Ollama connection error, it is often related to the backend not being able to connect to Ollama's API. How can you fix it? You can fix it by updating your Ollama API URL in the settings menu to the following:
+If you're encountering an Ollama connection error, it is likely due to the backend being unable to connect to Ollama's API. To fix this issue you can:
-On Windows: `http://host.docker.internal:11434`
-On Mac: `http://host.docker.internal:11434`
-On Linux: `http://private_ip_of_computer_hosting_ollama:11434`
+1. **Check your Ollama API URL:** Ensure that the API URL is correctly set in the settings menu.
+2. **Update API URL Based on OS:**
-You need to edit the ports accordingly.
+ - **Windows:** Use `http://host.docker.internal:11434`
+ - **Mac:** Use `http://host.docker.internal:11434`
+ - **Linux:** Use `http://:11434`
+
+ Adjust the port number if you're using a different one.
+
+3. **Linux Users - Expose Ollama to Network:**
+
+ - Inside `/etc/systemd/system/ollama.service`, you need to add `Environment="OLLAMA_HOST=0.0.0.0"`. Then restart Ollama by `systemctl restart ollama`. For more information see [Ollama docs](https://github.com/ollama/ollama/blob/main/docs/faq.md#setting-environment-variables-on-linux)
+
+ - Ensure that the port (default is 11434) is not blocked by your firewall.
## Using as a Search Engine
@@ -114,17 +131,29 @@ If you wish to use Perplexica as an alternative to traditional search engines li
3. Add a new site search with the following URL: `http://localhost:3000/?q=%s`. Replace `localhost` with your IP address or domain name, and `3000` with the port number if Perplexica is not hosted locally.
4. Click the add button. Now, you can use Perplexica directly from your browser's search bar.
+## Using Perplexica's API
+
+Perplexica also provides an API for developers looking to integrate its powerful search engine into their own applications. You can run searches, use multiple models and get answers to your queries.
+
+For more details, check out the full documentation [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/API/SEARCH.md).
+
+## Expose Perplexica to network
+
+You can access Perplexica over your home network by following our networking guide [here](https://github.com/ItzCrazyKns/Perplexica/blob/master/docs/installation/NETWORKING.md).
+
## One-Click Deployment
[](https://repocloud.io/details/?app_id=267)
## Upcoming Features
-- [ ] Finalizing Copilot Mode
- [x] Add settings page
- [x] Adding support for local LLMs
-- [ ] Adding Discover and History Saving features
+- [x] History Saving features
- [x] Introducing various Focus Modes
+- [x] Adding API support
+- [x] Adding Discover
+- [ ] Finalizing Copilot Mode
## Support Us
@@ -132,11 +161,11 @@ If you find Perplexica useful, consider giving us a star on GitHub. This helps m
### Donations
-We also accept donations to help sustain our project. If you would like to contribute, you can use the following button to make a donation in cryptocurrency. Thank you for your support!
+We also accept donations to help sustain our project. If you would like to contribute, you can use the following options to donate. Thank you for your support!
-
-
-
+| Ethereum |
+| ----------------------------------------------------- |
+| Address: `0xB025a84b2F269570Eb8D4b05DEdaA41D8525B6DD` |
## Contribution
diff --git a/app.dockerfile b/app.dockerfile
index 105cf86..488e64b 100644
--- a/app.dockerfile
+++ b/app.dockerfile
@@ -1,7 +1,7 @@
-FROM node:alpine
+FROM node:20.18.0-alpine
-ARG NEXT_PUBLIC_WS_URL
-ARG NEXT_PUBLIC_API_URL
+ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
@@ -9,7 +9,7 @@ WORKDIR /home/perplexica
COPY ui /home/perplexica/
-RUN yarn install
+RUN yarn install --frozen-lockfile
RUN yarn build
CMD ["yarn", "start"]
\ No newline at end of file
diff --git a/backend.dockerfile b/backend.dockerfile
index 47c5d81..b6ab95a 100644
--- a/backend.dockerfile
+++ b/backend.dockerfile
@@ -1,18 +1,17 @@
-FROM node:buster-slim
-
-ARG SEARXNG_API_URL
+FROM node:18-slim
WORKDIR /home/perplexica
COPY src /home/perplexica/src
COPY tsconfig.json /home/perplexica/
-COPY config.toml /home/perplexica/
+COPY drizzle.config.ts /home/perplexica/
COPY package.json /home/perplexica/
COPY yarn.lock /home/perplexica/
-RUN sed -i "s|SEARXNG = \".*\"|SEARXNG = \"${SEARXNG_API_URL}\"|g" /home/perplexica/config.toml
+RUN mkdir /home/perplexica/data
+RUN mkdir /home/perplexica/uploads
-RUN yarn install
+RUN yarn install --frozen-lockfile --network-timeout 600000
RUN yarn build
CMD ["yarn", "start"]
\ No newline at end of file
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 0000000..d6b7ef3
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 0b3d80e..a0e1d73 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -13,12 +13,19 @@ services:
build:
context: .
dockerfile: backend.dockerfile
- args:
- - SEARXNG_API_URL=http://searxng:8080
+ image: itzcrazykns1337/perplexica-backend:main
+ environment:
+ - SEARXNG_API_URL=http://searxng:8080
depends_on:
- searxng
ports:
- 3001:3001
+ volumes:
+ - backend-dbstore:/home/perplexica/data
+ - uploads:/home/perplexica/uploads
+ - ./config.toml:/home/perplexica/config.toml
+ extra_hosts:
+ - 'host.docker.internal:host-gateway'
networks:
- perplexica-network
restart: unless-stopped
@@ -30,6 +37,7 @@ services:
args:
- NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
- NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+ image: itzcrazykns1337/perplexica-frontend:main
depends_on:
- perplexica-backend
ports:
@@ -40,3 +48,7 @@ services:
networks:
perplexica-network:
+
+volumes:
+ backend-dbstore:
+ uploads:
diff --git a/docs/API/SEARCH.md b/docs/API/SEARCH.md
new file mode 100644
index 0000000..9405bc5
--- /dev/null
+++ b/docs/API/SEARCH.md
@@ -0,0 +1,117 @@
+# Perplexica Search API Documentation
+
+## Overview
+
+Perplexica’s Search API makes it easy to use our AI-powered search engine. You can run different types of searches, pick the models you want to use, and get the most recent info. Follow the following headings to learn more about Perplexica's search API.
+
+## Endpoint
+
+### **POST** `http://localhost:3001/api/search`
+
+**Note**: Replace `3001` with any other port if you've changed the default PORT
+
+### Request
+
+The API accepts a JSON object in the request body, where you define the focus mode, chat models, embedding models, and your query.
+
+#### Request Body Structure
+
+```json
+{
+ "chatModel": {
+ "provider": "openai",
+ "model": "gpt-4o-mini"
+ },
+ "embeddingModel": {
+ "provider": "openai",
+ "model": "text-embedding-3-large"
+ },
+ "optimizationMode": "speed",
+ "focusMode": "webSearch",
+ "query": "What is Perplexica",
+ "history": [
+ ["human", "Hi, how are you?"],
+ ["assistant", "I am doing well, how can I help you today?"]
+ ]
+}
+```
+
+### Request Parameters
+
+- **`chatModel`** (object, optional): Defines the chat model to be used for the query. For model details you can send a GET request at `http://localhost:3001/api/models`. Make sure to use the key value (For example "gpt-4o-mini" instead of the display name "GPT 4 omni mini").
+
+ - `provider`: Specifies the provider for the chat model (e.g., `openai`, `ollama`).
+ - `model`: The specific model from the chosen provider (e.g., `gpt-4o-mini`).
+ - Optional fields for custom OpenAI configuration:
+ - `customOpenAIBaseURL`: If you’re using a custom OpenAI instance, provide the base URL.
+ - `customOpenAIKey`: The API key for a custom OpenAI instance.
+
+- **`embeddingModel`** (object, optional): Defines the embedding model for similarity-based searching. For model details you can send a GET request at `http://localhost:3001/api/models`. Make sure to use the key value (For example "text-embedding-3-large" instead of the display name "Text Embedding 3 Large").
+
+ - `provider`: The provider for the embedding model (e.g., `openai`).
+ - `model`: The specific embedding model (e.g., `text-embedding-3-large`).
+
+- **`focusMode`** (string, required): Specifies which focus mode to use. Available modes:
+
+ - `webSearch`, `academicSearch`, `writingAssistant`, `wolframAlphaSearch`, `youtubeSearch`, `redditSearch`.
+
+- **`optimizationMode`** (string, optional): Specifies the optimization mode to control the balance between performance and quality. Available modes:
+
+ - `speed`: Prioritize speed and return the fastest answer.
+ - `balanced`: Provide a balanced answer with good speed and reasonable quality.
+
+- **`query`** (string, required): The search query or question.
+
+- **`history`** (array, optional): An array of message pairs representing the conversation history. Each pair consists of a role (either 'human' or 'assistant') and the message content. This allows the system to use the context of the conversation to refine results. Example:
+
+ ```json
+ [
+ ["human", "What is Perplexica?"],
+ ["assistant", "Perplexica is an AI-powered search engine..."]
+ ]
+ ```
+
+### Response
+
+The response from the API includes both the final message and the sources used to generate that message.
+
+#### Example Response
+
+```json
+{
+ "message": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Perplexica:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Perplexica offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].",
+ "sources": [
+ {
+ "pageContent": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.",
+ "metadata": {
+ "title": "What is Perplexica, and how does it function as an AI-powered search ...",
+ "url": "https://askai.glarity.app/search/What-is-Perplexica--and-how-does-it-function-as-an-AI-powered-search-engine"
+ }
+ },
+ {
+ "pageContent": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.",
+ "metadata": {
+ "title": "Sahar Mor's Post",
+ "url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-perplexica-activity-7204489745668694016-ncja"
+ }
+ }
+ ....
+ ]
+}
+```
+
+### Fields in the Response
+
+- **`message`** (string): The search result, generated based on the query and focus mode.
+- **`sources`** (array): A list of sources that were used to generate the search result. Each source includes:
+ - `pageContent`: A snippet of the relevant content from the source.
+ - `metadata`: Metadata about the source, including:
+ - `title`: The title of the webpage.
+ - `url`: The URL of the webpage.
+
+### Error Handling
+
+If an error occurs during the search process, the API will return an appropriate error message with an HTTP status code.
+
+- **400**: If the request is malformed or missing required fields (e.g., no focus mode or query).
+- **500**: If an internal server error occurs during the search.
diff --git a/docs/architecture/README.md b/docs/architecture/README.md
index b1fcfcb..5732471 100644
--- a/docs/architecture/README.md
+++ b/docs/architecture/README.md
@@ -1,4 +1,4 @@
-## Perplexica's Architecture
+# Perplexica's Architecture
Perplexica's architecture consists of the following key components:
diff --git a/docs/architecture/WORKING.md b/docs/architecture/WORKING.md
index e39de7a..75b20fd 100644
--- a/docs/architecture/WORKING.md
+++ b/docs/architecture/WORKING.md
@@ -1,4 +1,4 @@
-## How does Perplexica work?
+# How does Perplexica work?
Curious about how Perplexica works? Don't worry, we'll cover it here. Before we begin, make sure you've read about the architecture of Perplexica to ensure you understand what it's made up of. Haven't read it? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md).
@@ -10,10 +10,10 @@ We'll understand how Perplexica works by taking an example of a scenario where a
4. After the information is retrieved, it is based on keyword-based search. We then convert the information into embeddings and the query as well, then we perform a similarity search to find the most relevant sources to answer the query.
5. After all this is done, the sources are passed to the response generator. This chain takes all the chat history, the query, and the sources. It generates a response that is streamed to the UI.
-### How are the answers cited?
+## How are the answers cited?
The LLMs are prompted to do so. We've prompted them so well that they cite the answers themselves, and using some UI magic, we display it to the user.
-### Image and Video Search
+## Image and Video Search
Image and video searches are conducted in a similar manner. A query is always generated first, then we search the web for images and videos that match the query. These results are then returned to the user.
diff --git a/docs/installation/NETWORKING.md b/docs/installation/NETWORKING.md
index baad296..ae39e3f 100644
--- a/docs/installation/NETWORKING.md
+++ b/docs/installation/NETWORKING.md
@@ -10,27 +10,27 @@ This guide will show you how to make Perplexica available over a network. Follow
3. Stop and remove the existing Perplexica containers and images:
-```
-docker compose down --rmi all
-```
+ ```bash
+ docker compose down --rmi all
+ ```
4. Open the `docker-compose.yaml` file in a text editor like Notepad++
5. Replace `127.0.0.1` with the IP address of the server Perplexica is running on in these two lines:
-```
-args:
- - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
- - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
-```
+ ```bash
+ args:
+ - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
+ - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+ ```
6. Save and close the `docker-compose.yaml` file
7. Rebuild and restart the Perplexica container:
-```
-docker compose up -d --build
-```
+ ```bash
+ docker compose up -d --build
+ ```
## macOS
@@ -38,37 +38,37 @@ docker compose up -d --build
2. Navigate to the directory with the `docker-compose.yaml` file:
-```
-cd /path/to/docker-compose.yaml
-```
+ ```bash
+ cd /path/to/docker-compose.yaml
+ ```
3. Stop and remove existing containers and images:
-```
-docker compose down --rmi all
-```
+ ```bash
+ docker compose down --rmi all
+ ```
4. Open `docker-compose.yaml` in a text editor like Sublime Text:
-```
-nano docker-compose.yaml
-```
+ ```bash
+ nano docker-compose.yaml
+ ```
5. Replace `127.0.0.1` with the server IP in these lines:
-```
-args:
- - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
- - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
-```
+ ```bash
+ args:
+ - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
+ - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+ ```
6. Save and exit the editor
7. Rebuild and restart Perplexica:
-```
-docker compose up -d --build
-```
+ ```bash
+ docker compose up -d --build
+ ```
## Linux
@@ -76,34 +76,34 @@ docker compose up -d --build
2. Navigate to the `docker-compose.yaml` directory:
-```
-cd /path/to/docker-compose.yaml
-```
+ ```bash
+ cd /path/to/docker-compose.yaml
+ ```
3. Stop and remove containers and images:
-```
-docker compose down --rmi all
-```
+ ```bash
+ docker compose down --rmi all
+ ```
4. Edit `docker-compose.yaml`:
-```
-nano docker-compose.yaml
-```
+ ```bash
+ nano docker-compose.yaml
+ ```
5. Replace `127.0.0.1` with the server IP:
-```
-args:
- - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
- - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
-```
+ ```bash
+ args:
+ - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
+ - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+ ```
6. Save and exit the editor
7. Rebuild and restart Perplexica:
-```
-docker compose up -d --build
-```
+ ```bash
+ docker compose up -d --build
+ ```
diff --git a/docs/installation/UPDATING.md b/docs/installation/UPDATING.md
new file mode 100644
index 0000000..b41b05a
--- /dev/null
+++ b/docs/installation/UPDATING.md
@@ -0,0 +1,40 @@
+# Update Perplexica to the latest version
+
+To update Perplexica to the latest version, follow these steps:
+
+## For Docker users
+
+1. Clone the latest version of Perplexica from GitHub:
+
+ ```bash
+ git clone https://github.com/ItzCrazyKns/Perplexica.git
+ ```
+
+2. Navigate to the Project Directory.
+
+3. Pull latest images from registry.
+
+ ```bash
+ docker compose pull
+ ```
+
+4. Update and Recreate containers.
+
+ ```bash
+ docker compose up -d
+ ```
+
+5. Once the command completes running go to http://localhost:3000 and verify the latest changes.
+
+## For non Docker users
+
+1. Clone the latest version of Perplexica from GitHub:
+
+ ```bash
+ git clone https://github.com/ItzCrazyKns/Perplexica.git
+ ```
+
+2. Navigate to the Project Directory
+3. Execute `npm i` in both the `ui` folder and the root directory.
+4. Once packages are updated, execute `npm run build` in both the `ui` folder and the root directory.
+5. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory.
diff --git a/drizzle.config.ts b/drizzle.config.ts
new file mode 100644
index 0000000..9ac3ec5
--- /dev/null
+++ b/drizzle.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'drizzle-kit';
+
+export default defineConfig({
+ dialect: 'sqlite',
+ schema: './src/db/schema.ts',
+ out: './drizzle',
+ dbCredentials: {
+ url: './data/db.sqlite',
+ },
+});
diff --git a/package.json b/package.json
index 0308e93..3fce442 100644
--- a/package.json
+++ b/package.json
@@ -1,19 +1,26 @@
{
"name": "perplexica-backend",
- "version": "1.5.0",
+ "version": "1.10.0-rc2",
"license": "MIT",
"author": "ItzCrazyKns",
"scripts": {
- "start": "node dist/app.js",
+ "start": "npm run db:push && node dist/app.js",
"build": "tsc",
- "dev": "nodemon src/app.ts",
+ "dev": "nodemon --ignore uploads/ src/app.ts ",
+ "db:push": "drizzle-kit push sqlite",
"format": "prettier . --check",
"format:write": "prettier . --write"
},
"devDependencies": {
+ "@types/better-sqlite3": "^7.6.10",
"@types/cors": "^2.8.17",
"@types/express": "^4.17.21",
+ "@types/html-to-text": "^9.0.4",
+ "@types/multer": "^1.4.12",
+ "@types/pdf-parse": "^1.1.4",
"@types/readable-stream": "^4.0.11",
+ "@types/ws": "^8.5.12",
+ "drizzle-kit": "^0.22.7",
"nodemon": "^3.1.0",
"prettier": "^3.2.5",
"ts-node": "^10.9.2",
@@ -21,17 +28,26 @@
},
"dependencies": {
"@iarna/toml": "^2.2.5",
+ "@langchain/anthropic": "^0.2.3",
+ "@langchain/community": "^0.2.16",
"@langchain/openai": "^0.0.25",
+ "@langchain/google-genai": "^0.0.23",
"@xenova/transformers": "^2.17.1",
"axios": "^1.6.8",
+ "better-sqlite3": "^11.0.0",
"compute-cosine-similarity": "^1.1.0",
"compute-dot": "^1.1.0",
"cors": "^2.8.5",
"dotenv": "^16.4.5",
+ "drizzle-orm": "^0.31.2",
"express": "^4.19.2",
+ "html-to-text": "^9.0.5",
"langchain": "^0.1.30",
+ "mammoth": "^1.8.0",
+ "multer": "^1.4.5-lts.1",
+ "pdf-parse": "^1.1.1",
"winston": "^3.13.0",
- "ws": "^8.16.0",
+ "ws": "^8.17.1",
"zod": "^3.22.4"
}
}
diff --git a/sample.config.toml b/sample.config.toml
index 8d35666..50ba95d 100644
--- a/sample.config.toml
+++ b/sample.config.toml
@@ -1,10 +1,13 @@
[GENERAL]
PORT = 3001 # Port to run the server on
SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
+KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
[API_KEYS]
OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef
GROQ = "" # Groq API key - gsk_1234567890abcdef1234567890abcdef
+ANTHROPIC = "" # Anthropic API key - sk-ant-1234567890abcdef1234567890abcdef
+GEMINI = "" # Gemini API key - sk-1234567890abcdef1234567890abcdef
[API_ENDPOINTS]
SEARXNG = "http://localhost:32768" # SearxNG API URL
diff --git a/searxng/settings.yml b/searxng/settings.yml
index da973c1..54d27c4 100644
--- a/searxng/settings.yml
+++ b/searxng/settings.yml
@@ -1,2356 +1,17 @@
-general:
- # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG}
- debug: false
- # displayed name
- instance_name: 'searxng'
- # For example: https://example.com/privacy
- privacypolicy_url: false
- # use true to use your own donation page written in searx/info/en/donate.md
- # use false to disable the donation link
- donation_url: false
- # mailto:contact@example.com
- contact_url: false
- # record stats
- enable_metrics: true
+use_default_settings: true
-brand:
- new_issue_url: https://github.com/searxng/searxng/issues/new
- docs_url: https://docs.searxng.org/
- public_instances: https://searx.space
- wiki_url: https://github.com/searxng/searxng/wiki
- issue_url: https://github.com/searxng/searxng/issues
- # custom:
- # maintainer: "Jon Doe"
- # # Custom entries in the footer: [title]: [link]
- # links:
- # Uptime: https://uptime.searxng.org/history/darmarit-org
- # About: "https://searxng.org"
+general:
+ instance_name: 'searxng'
search:
- # Filter results. 0: None, 1: Moderate, 2: Strict
- safe_search: 0
- # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl",
- # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off
- # by default.
autocomplete: 'google'
- # minimun characters to type before autocompleter starts
- autocomplete_min: 4
- # Default search language - leave blank to detect from browser information or
- # use codes from 'languages.py'
- default_lang: 'auto'
- # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages
- # Available languages
- # languages:
- # - all
- # - en
- # - en-US
- # - de
- # - it-IT
- # - fr
- # - fr-BE
- # ban time in seconds after engine errors
- ban_time_on_fail: 5
- # max ban time in seconds after engine errors
- max_ban_time_on_fail: 120
- suspended_times:
- # Engine suspension time after error (in seconds; set to 0 to disable)
- # For error "Access denied" and "HTTP error [402, 403]"
- SearxEngineAccessDenied: 86400
- # For error "CAPTCHA"
- SearxEngineCaptcha: 86400
- # For error "Too many request" and "HTTP error 429"
- SearxEngineTooManyRequests: 3600
- # Cloudflare CAPTCHA
- cf_SearxEngineCaptcha: 1296000
- cf_SearxEngineAccessDenied: 86400
- # ReCAPTCHA
- recaptcha_SearxEngineCaptcha: 604800
-
- # remove format to deny access, use lower case.
- # formats: [html, csv, json, rss]
formats:
- html
- json
server:
- # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS}
- port: 8888
- bind_address: '127.0.0.1'
- # public URL of the instance, to ensure correct inbound links. Is overwritten
- # by ${SEARXNG_URL}.
- base_url: / # "http://example.com/location"
- limiter: false # rate limit the number of request on the instance, block some bots
- public_instance: false # enable features designed only for public instances
-
- # If your instance owns a /etc/searxng/settings.yml file, then set the following
- # values there.
-
secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET}
- # Proxying image results through searx
- image_proxy: false
- # 1.0 and 1.1 are supported
- http_protocol_version: '1.0'
- # POST queries are more secure as they don't show up in history but may cause
- # problems when using Firefox containers
- method: 'POST'
- default_http_headers:
- X-Content-Type-Options: nosniff
- X-Download-Options: noopen
- X-Robots-Tag: noindex, nofollow
- Referrer-Policy: no-referrer
-
-redis:
- # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}.
- # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis
- url: false
-
-ui:
- # Custom static path - leave it blank if you didn't change
- static_path: ''
- static_use_hash: false
- # Custom templates path - leave it blank if you didn't change
- templates_path: ''
- # query_in_title: When true, the result page's titles contains the query
- # it decreases the privacy, since the browser can records the page titles.
- query_in_title: false
- # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page.
- infinite_scroll: false
- # ui theme
- default_theme: simple
- # center the results ?
- center_alignment: false
- # URL prefix of the internet archive, don't forget trailing slash (if needed).
- # cache_url: "https://webcache.googleusercontent.com/search?q=cache:"
- # Default interface locale - leave blank to detect from browser information or
- # use codes from the 'locales' config section
- default_locale: ''
- # Open result links in a new tab by default
- # results_on_new_tab: false
- theme_args:
- # style of simple theme: auto, light, dark
- simple_style: auto
- # Perform search immediately if a category selected.
- # Disable to select multiple categories at once and start the search manually.
- search_on_category_select: true
- # Hotkeys: default or vim
- hotkeys: default
-
-# Lock arbitrary settings on the preferences page. To find the ID of the user
-# setting you want to lock, check the ID of the form on the page "preferences".
-#
-# preferences:
-# lock:
-# - language
-# - autocomplete
-# - method
-# - query_in_title
-
-# searx supports result proxification using an external service:
-# https://github.com/asciimoo/morty uncomment below section if you have running
-# morty proxy the key is base64 encoded (keep the !!binary notation)
-# Note: since commit af77ec3, morty accepts a base64 encoded key.
-#
-# result_proxy:
-# url: http://127.0.0.1:3000/
-# # the key is a base64 encoded string, the YAML !!binary prefix is optional
-# key: !!binary "your_morty_proxy_key"
-# # [true|false] enable the "proxy" button next to each result
-# proxify_results: true
-
-# communication with search engines
-#
-outgoing:
- # default timeout in seconds, can be override by engine
- request_timeout: 3.0
- # the maximum timeout in seconds
- # max_request_timeout: 10.0
- # suffix of searx_useragent, could contain information like an email address
- # to the administrator
- useragent_suffix: ''
- # The maximum number of concurrent connections that may be established.
- pool_connections: 100
- # Allow the connection pool to maintain keep-alive connections below this
- # point.
- pool_maxsize: 20
- # See https://www.python-httpx.org/http2/
- enable_http2: true
- # uncomment below section if you want to use a custom server certificate
- # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults
- # and https://www.python-httpx.org/compatibility/#ssl-configuration
- # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
- #
- # uncomment below section if you want to use a proxyq see: SOCKS proxies
- # https://2.python-requests.org/en/latest/user/advanced/#proxies
- # are also supported: see
- # https://2.python-requests.org/en/latest/user/advanced/#socks
- #
- # proxies:
- # all://:
- # - http://proxy1:8080
- # - http://proxy2:8080
- #
- # using_tor_proxy: true
- #
- # Extra seconds to add in order to account for the time taken by the proxy
- #
- # extra_proxy_timeout: 10.0
- #
- # uncomment below section only if you have more than one network interface
- # which can be the source of outgoing search requests
- #
- # source_ips:
- # - 1.1.1.1
- # - 1.1.1.2
- # - fe80::/126
-
-# External plugin configuration, for more details see
-# https://docs.searxng.org/dev/plugins.html
-#
-# plugins:
-# - plugin1
-# - plugin2
-# - ...
-
-# Comment or un-comment plugin to activate / deactivate by default.
-#
-# enabled_plugins:
-# # these plugins are enabled if nothing is configured ..
-# - 'Hash plugin'
-# - 'Self Information'
-# - 'Tracker URL remover'
-# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
-# # these plugins are disabled if nothing is configured ..
-# - 'Hostname replace' # see hostname_replace configuration below
-# - 'Open Access DOI rewrite'
-# - 'Tor check plugin'
-# # Read the docs before activate: auto-detection of the language could be
-# # detrimental to users expectations / users can activate the plugin in the
-# # preferences if they want.
-# - 'Autodetect search language'
-
-# Configuration of the "Hostname replace" plugin:
-#
-# hostname_replace:
-# '(.*\.)?youtube\.com$': 'invidious.example.com'
-# '(.*\.)?youtu\.be$': 'invidious.example.com'
-# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com'
-# '(.*\.)?reddit\.com$': 'teddit.example.com'
-# '(.*\.)?redd\.it$': 'teddit.example.com'
-# '(www\.)?twitter\.com$': 'nitter.example.com'
-# # to remove matching host names from result list, set value to false
-# 'spam\.example\.com': false
-
-checker:
- # disable checker when in debug mode
- off_when_debug: true
-
- # use "scheduling: false" to disable scheduling
- # scheduling: interval or int
-
- # to activate the scheduler:
- # * uncomment "scheduling" section
- # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1"
- # to your uwsgi.ini
-
- # scheduling:
- # start_after: [300, 1800] # delay to start the first run of the checker
- # every: [86400, 90000] # how often the checker runs
-
- # additional tests: only for the YAML anchors (see the engines section)
- #
- additional_tests:
- rosebud: &test_rosebud
- matrix:
- query: rosebud
- lang: en
- result_container:
- - not_empty
- - ['one_title_contains', 'citizen kane']
- test:
- - unique_results
-
- android: &test_android
- matrix:
- query: ['android']
- lang: ['en', 'de', 'fr', 'zh-CN']
- result_container:
- - not_empty
- - ['one_title_contains', 'google']
- test:
- - unique_results
-
- # tests: only for the YAML anchors (see the engines section)
- tests:
- infobox: &tests_infobox
- infobox:
- matrix:
- query: ['linux', 'new york', 'bbc']
- result_container:
- - has_infobox
-
-categories_as_tabs:
- general:
- images:
- videos:
- news:
- map:
- music:
- it:
- science:
- files:
- social media:
engines:
- - name: 9gag
- engine: 9gag
- shortcut: 9g
- disabled: true
-
- - name: annas archive
- engine: annas_archive
- disabled: true
- shortcut: aa
-
- # - name: annas articles
- # engine: annas_archive
- # shortcut: aaa
- # # https://docs.searxng.org/dev/engines/online/annas_archive.html
- # aa_content: 'journal_article' # book_any .. magazine, standards_document
- # aa_ext: 'pdf' # pdf, epub, ..
- # aa_sort: 'newest' # newest, oldest, largest, smallest
-
- - name: apk mirror
- engine: apkmirror
- timeout: 4.0
- shortcut: apkm
- disabled: true
-
- - name: apple app store
- engine: apple_app_store
- shortcut: aps
- disabled: true
-
- # Requires Tor
- - name: ahmia
- engine: ahmia
- categories: onions
- enable_http: true
- shortcut: ah
-
- - name: anaconda
- engine: xpath
- paging: true
- first_page_num: 0
- search_url: https://anaconda.org/search?q={query}&page={pageno}
- results_xpath: //tbody/tr
- url_xpath: ./td/h5/a[last()]/@href
- title_xpath: ./td/h5
- content_xpath: ./td[h5]/text()
- categories: it
- timeout: 6.0
- shortcut: conda
- disabled: true
-
- - name: arch linux wiki
- engine: archlinux
- shortcut: al
-
- - name: artic
- engine: artic
- shortcut: arc
- timeout: 4.0
-
- - name: arxiv
- engine: arxiv
- shortcut: arx
- timeout: 4.0
-
- - name: ask
- engine: ask
- shortcut: ask
- disabled: true
-
- # tmp suspended: dh key too small
- # - name: base
- # engine: base
- # shortcut: bs
-
- - name: bandcamp
- engine: bandcamp
- shortcut: bc
- categories: music
-
- - name: wikipedia
- engine: wikipedia
- shortcut: wp
- # add "list" to the array to get results in the results list
- display_type: ['infobox']
- base_url: 'https://{language}.wikipedia.org/'
- categories: [general]
-
- - name: bilibili
- engine: bilibili
- shortcut: bil
- disabled: true
-
- - name: bing
- engine: bing
- shortcut: bi
- disabled: true
-
- - name: bing images
- engine: bing_images
- shortcut: bii
-
- - name: bing news
- engine: bing_news
- shortcut: bin
-
- - name: bing videos
- engine: bing_videos
- shortcut: biv
-
- - name: bitbucket
- engine: xpath
- paging: true
- search_url: https://bitbucket.org/repo/all/{pageno}?name={query}
- url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href
- title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]
- content_xpath: //article[@class="repo-summary"]/p
- categories: [it, repos]
- timeout: 4.0
- disabled: true
- shortcut: bb
- about:
- website: https://bitbucket.org/
- wikidata_id: Q2493781
- official_api_documentation: https://developer.atlassian.com/bitbucket
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: bpb
- engine: bpb
- shortcut: bpb
- disabled: true
-
- - name: btdigg
- engine: btdigg
- shortcut: bt
- disabled: true
-
- - name: ccc-tv
- engine: xpath
- paging: false
- search_url: https://media.ccc.de/search/?q={query}
- url_xpath: //div[@class="caption"]/h3/a/@href
- title_xpath: //div[@class="caption"]/h3/a/text()
- content_xpath: //div[@class="caption"]/h4/@title
- categories: videos
- disabled: true
- shortcut: c3tv
- about:
- website: https://media.ccc.de/
- wikidata_id: Q80729951
- official_api_documentation: https://github.com/voc/voctoweb
- use_official_api: false
- require_api_key: false
- results: HTML
- # We don't set language: de here because media.ccc.de is not just
- # for a German audience. It contains many English videos and many
- # German videos have English subtitles.
-
- - name: openverse
- engine: openverse
- categories: images
- shortcut: opv
-
- - name: chefkoch
- engine: chefkoch
- shortcut: chef
- # to show premium or plus results too:
- # skip_premium: false
-
- # - name: core.ac.uk
- # engine: core
- # categories: science
- # shortcut: cor
- # # get your API key from: https://core.ac.uk/api-keys/register/
- # api_key: 'unset'
-
- - name: crossref
- engine: crossref
- shortcut: cr
- timeout: 30
- disabled: true
-
- - name: crowdview
- engine: json_engine
- shortcut: cv
- categories: general
- paging: false
- search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query}
- results_query: results
- url_query: link
- title_query: title
- content_query: snippet
- disabled: true
- about:
- website: https://crowdview.ai/
-
- - name: yep
- engine: yep
- shortcut: yep
- categories: general
- search_type: web
- disabled: true
-
- - name: yep images
- engine: yep
- shortcut: yepi
- categories: images
- search_type: images
- disabled: true
-
- - name: yep news
- engine: yep
- shortcut: yepn
- categories: news
- search_type: news
- disabled: true
-
- - name: curlie
- engine: xpath
- shortcut: cl
- categories: general
- disabled: true
- paging: true
- lang_all: ''
- search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189
- page_size: 20
- results_xpath: //div[@id="site-list-content"]/div[@class="site-item"]
- url_xpath: ./div[@class="title-and-desc"]/a/@href
- title_xpath: ./div[@class="title-and-desc"]/a/div
- content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"]
- about:
- website: https://curlie.org/
- wikidata_id: Q60715723
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: currency
- engine: currency_convert
- categories: general
- shortcut: cc
-
- - name: bahnhof
- engine: json_engine
- search_url: https://www.bahnhof.de/api/stations/search/{query}
- url_prefix: https://www.bahnhof.de/
- url_query: slug
- title_query: name
- content_query: state
- shortcut: bf
- disabled: true
- about:
- website: https://www.bahn.de
- wikidata_id: Q22811603
- use_official_api: false
- require_api_key: false
- results: JSON
- language: de
-
- - name: deezer
- engine: deezer
- shortcut: dz
- disabled: true
-
- - name: destatis
- engine: destatis
- shortcut: destat
- disabled: true
-
- - name: deviantart
- engine: deviantart
- shortcut: da
- timeout: 3.0
-
- - name: ddg definitions
- engine: duckduckgo_definitions
- shortcut: ddd
- weight: 2
- disabled: true
- tests: *tests_infobox
-
- # cloudflare protected
- # - name: digbt
- # engine: digbt
- # shortcut: dbt
- # timeout: 6.0
- # disabled: true
-
- - name: docker hub
- engine: docker_hub
- shortcut: dh
- categories: [it, packages]
-
- - name: erowid
- engine: xpath
- paging: true
- first_page_num: 0
- page_size: 30
- search_url: https://www.erowid.org/search.php?q={query}&s={pageno}
- url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href
- title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text()
- content_xpath: //dl[@class="results-list"]/dd[@class="result-details"]
- categories: []
- shortcut: ew
- disabled: true
- about:
- website: https://www.erowid.org/
- wikidata_id: Q1430691
- official_api_documentation:
- use_official_api: false
- require_api_key: false
- results: HTML
-
- # - name: elasticsearch
- # shortcut: es
- # engine: elasticsearch
- # base_url: http://localhost:9200
- # username: elastic
- # password: changeme
- # index: my-index
- # # available options: match, simple_query_string, term, terms, custom
- # query_type: match
- # # if query_type is set to custom, provide your query here
- # #custom_query_json: {"query":{"match_all": {}}}
- # #show_metadata: false
- # disabled: true
-
- - name: wikidata
- engine: wikidata
- shortcut: wd
- timeout: 3.0
- weight: 2
- # add "list" to the array to get results in the results list
- display_type: ['infobox']
- tests: *tests_infobox
- categories: [general]
-
- - name: duckduckgo
- engine: duckduckgo
- shortcut: ddg
-
- - name: duckduckgo images
- engine: duckduckgo_extra
- categories: [images, web]
- ddg_category: images
- shortcut: ddi
- disabled: true
-
- - name: duckduckgo videos
- engine: duckduckgo_extra
- categories: [videos, web]
- ddg_category: videos
- shortcut: ddv
- disabled: true
-
- - name: duckduckgo news
- engine: duckduckgo_extra
- categories: [news, web]
- ddg_category: news
- shortcut: ddn
- disabled: true
-
- - name: duckduckgo weather
- engine: duckduckgo_weather
- shortcut: ddw
- disabled: true
-
- - name: apple maps
- engine: apple_maps
- shortcut: apm
- disabled: true
- timeout: 5.0
-
- - name: emojipedia
- engine: emojipedia
- timeout: 4.0
- shortcut: em
- disabled: true
-
- - name: tineye
- engine: tineye
- shortcut: tin
- timeout: 9.0
- disabled: true
-
- - name: etymonline
- engine: xpath
- paging: true
- search_url: https://etymonline.com/search?page={pageno}&q={query}
- url_xpath: //a[contains(@class, "word__name--")]/@href
- title_xpath: //a[contains(@class, "word__name--")]
- content_xpath: //section[contains(@class, "word__defination")]
- first_page_num: 1
- shortcut: et
- categories: [dictionaries]
- about:
- website: https://www.etymonline.com/
- wikidata_id: Q1188617
- official_api_documentation:
- use_official_api: false
- require_api_key: false
- results: HTML
-
- # - name: ebay
- # engine: ebay
- # shortcut: eb
- # base_url: 'https://www.ebay.com'
- # disabled: true
- # timeout: 5
-
- - name: 1x
- engine: www1x
- shortcut: 1x
- timeout: 3.0
- disabled: true
-
- - name: fdroid
- engine: fdroid
- shortcut: fd
- disabled: true
-
- - name: flickr
- categories: images
- shortcut: fl
- # You can use the engine using the official stable API, but you need an API
- # key, see: https://www.flickr.com/services/apps/create/
- # engine: flickr
- # api_key: 'apikey' # required!
- # Or you can use the html non-stable engine, activated by default
- engine: flickr_noapi
-
- - name: free software directory
- engine: mediawiki
- shortcut: fsd
- categories: [it, software wikis]
- base_url: https://directory.fsf.org/
- search_type: title
- timeout: 5.0
- disabled: true
- about:
- website: https://directory.fsf.org/
- wikidata_id: Q2470288
-
- # - name: freesound
- # engine: freesound
- # shortcut: fnd
- # disabled: true
- # timeout: 15.0
- # API key required, see: https://freesound.org/docs/api/overview.html
- # api_key: MyAPIkey
-
- - name: frinkiac
- engine: frinkiac
- shortcut: frk
- disabled: true
-
- - name: fyyd
- engine: fyyd
- shortcut: fy
- timeout: 8.0
- disabled: true
-
- - name: genius
- engine: genius
- shortcut: gen
-
- - name: gentoo
- engine: gentoo
- shortcut: ge
- timeout: 10.0
-
- - name: gitlab
- engine: json_engine
- paging: true
- search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno}
- url_query: web_url
- title_query: name_with_namespace
- content_query: description
- page_size: 20
- categories: [it, repos]
- shortcut: gl
- timeout: 10.0
- disabled: true
- about:
- website: https://about.gitlab.com/
- wikidata_id: Q16639197
- official_api_documentation: https://docs.gitlab.com/ee/api/
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: github
- engine: github
- shortcut: gh
-
- # This a Gitea service. If you would like to use a different instance,
- # change codeberg.org to URL of the desired Gitea host. Or you can create a
- # new engine by copying this and changing the name, shortcut and search_url.
-
- - name: codeberg
- engine: json_engine
- search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10
- url_query: html_url
- title_query: name
- content_query: description
- categories: [it, repos]
- shortcut: cb
- disabled: true
- about:
- website: https://codeberg.org/
- wikidata_id:
- official_api_documentation: https://try.gitea.io/api/swagger
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: goodreads
- engine: goodreads
- shortcut: good
- timeout: 4.0
- disabled: true
-
- - name: google
- engine: google
- shortcut: go
- # additional_tests:
- # android: *test_android
-
- - name: google images
- engine: google_images
- shortcut: goi
- # additional_tests:
- # android: *test_android
- # dali:
- # matrix:
- # query: ['Dali Christ']
- # lang: ['en', 'de', 'fr', 'zh-CN']
- # result_container:
- # - ['one_title_contains', 'Salvador']
-
- - name: google news
- engine: google_news
- shortcut: gon
- # additional_tests:
- # android: *test_android
-
- - name: google videos
- engine: google_videos
- shortcut: gov
- # additional_tests:
- # android: *test_android
-
- - name: google scholar
- engine: google_scholar
- shortcut: gos
-
- - name: google play apps
- engine: google_play
- categories: [files, apps]
- shortcut: gpa
- play_categ: apps
- disabled: true
-
- - name: google play movies
- engine: google_play
- categories: videos
- shortcut: gpm
- play_categ: movies
- disabled: true
-
- - name: material icons
- engine: material_icons
- categories: images
- shortcut: mi
- disabled: true
-
- - name: gpodder
- engine: json_engine
- shortcut: gpod
- timeout: 4.0
- paging: false
- search_url: https://gpodder.net/search.json?q={query}
- url_query: url
- title_query: title
- content_query: description
- page_size: 19
- categories: music
- disabled: true
- about:
- website: https://gpodder.net
- wikidata_id: Q3093354
- official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/
- use_official_api: false
- requires_api_key: false
- results: JSON
-
- - name: habrahabr
- engine: xpath
- paging: true
- search_url: https://habr.com/en/search/page{pageno}/?q={query}
- results_xpath: //article[contains(@class, "tm-articles-list__item")]
- url_xpath: .//a[@class="tm-title__link"]/@href
- title_xpath: .//a[@class="tm-title__link"]
- content_xpath: .//div[contains(@class, "article-formatted-body")]
- categories: it
- timeout: 4.0
- disabled: true
- shortcut: habr
- about:
- website: https://habr.com/
- wikidata_id: Q4494434
- official_api_documentation: https://habr.com/en/docs/help/api/
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: hackernews
- engine: hackernews
- shortcut: hn
- disabled: true
-
- - name: hoogle
- engine: xpath
- paging: true
- search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno}
- results_xpath: '//div[@class="result"]'
- title_xpath: './/div[@class="ans"]//a'
- url_xpath: './/div[@class="ans"]//a/@href'
- content_xpath: './/div[@class="from"]'
- page_size: 20
- categories: [it, packages]
- shortcut: ho
- about:
- website: https://hoogle.haskell.org/
- wikidata_id: Q34010
- official_api_documentation: https://hackage.haskell.org/api
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: imdb
- engine: imdb
- shortcut: imdb
- timeout: 6.0
- disabled: true
-
- - name: imgur
- engine: imgur
- shortcut: img
- disabled: true
-
- - name: ina
- engine: ina
- shortcut: in
- timeout: 6.0
- disabled: true
-
- - name: invidious
- engine: invidious
- # Instanes will be selected randomly, see https://api.invidious.io/ for
- # instances that are stable (good uptime) and close to you.
- base_url:
- - https://invidious.io.lol
- - https://invidious.fdn.fr
- - https://yt.artemislena.eu
- - https://invidious.tiekoetter.com
- - https://invidious.flokinet.to
- - https://vid.puffyan.us
- - https://invidious.privacydev.net
- - https://inv.tux.pizza
- shortcut: iv
- timeout: 3.0
- disabled: true
-
- - name: jisho
- engine: jisho
- shortcut: js
- timeout: 3.0
- disabled: true
-
- - name: kickass
- engine: kickass
- base_url:
- - https://kickasstorrents.to
- - https://kickasstorrents.cr
- - https://kickasstorrent.cr
- - https://kickass.sx
- - https://kat.am
- shortcut: kc
- timeout: 4.0
-
- - name: lemmy communities
- engine: lemmy
- lemmy_type: Communities
- shortcut: leco
-
- - name: lemmy users
- engine: lemmy
- network: lemmy communities
- lemmy_type: Users
- shortcut: leus
-
- - name: lemmy posts
- engine: lemmy
- network: lemmy communities
- lemmy_type: Posts
- shortcut: lepo
-
- - name: lemmy comments
- engine: lemmy
- network: lemmy communities
- lemmy_type: Comments
- shortcut: lecom
-
- - name: library genesis
- engine: xpath
- # search_url: https://libgen.is/search.php?req={query}
- search_url: https://libgen.rs/search.php?req={query}
- url_xpath: //a[contains(@href,"book/index.php?md5")]/@href
- title_xpath: //a[contains(@href,"book/")]/text()[1]
- content_xpath: //td/a[1][contains(@href,"=author")]/text()
- categories: files
- timeout: 7.0
- disabled: true
- shortcut: lg
- about:
- website: https://libgen.fun/
- wikidata_id: Q22017206
- official_api_documentation:
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: z-library
- engine: zlibrary
- shortcut: zlib
- categories: files
- timeout: 7.0
-
- - name: library of congress
- engine: loc
- shortcut: loc
- categories: images
-
- - name: lingva
- engine: lingva
- shortcut: lv
- # set lingva instance in url, by default it will use the official instance
- # url: https://lingva.thedaviddelta.com
-
- - name: lobste.rs
- engine: xpath
- search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance
- results_xpath: //li[contains(@class, "story")]
- url_xpath: .//a[@class="u-url"]/@href
- title_xpath: .//a[@class="u-url"]
- content_xpath: .//a[@class="domain"]
- categories: it
- shortcut: lo
- timeout: 5.0
- disabled: true
- about:
- website: https://lobste.rs/
- wikidata_id: Q60762874
- official_api_documentation:
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: mastodon users
- engine: mastodon
- mastodon_type: accounts
- base_url: https://mastodon.social
- shortcut: mau
-
- - name: mastodon hashtags
- engine: mastodon
- mastodon_type: hashtags
- base_url: https://mastodon.social
- shortcut: mah
-
- # - name: matrixrooms
- # engine: mrs
- # # https://docs.searxng.org/dev/engines/online/mrs.html
- # # base_url: https://mrs-api-host
- # shortcut: mtrx
- # disabled: true
-
- - name: mdn
- shortcut: mdn
- engine: json_engine
- categories: [it]
- paging: true
- search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno}
- results_query: documents
- url_query: mdn_url
- url_prefix: https://developer.mozilla.org
- title_query: title
- content_query: summary
- about:
- website: https://developer.mozilla.org
- wikidata_id: Q3273508
- official_api_documentation: null
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: metacpan
- engine: metacpan
- shortcut: cpan
- disabled: true
- number_of_results: 20
-
- # - name: meilisearch
- # engine: meilisearch
- # shortcut: mes
- # enable_http: true
- # base_url: http://localhost:7700
- # index: my-index
-
- - name: mixcloud
- engine: mixcloud
- shortcut: mc
-
- # MongoDB engine
- # Required dependency: pymongo
- # - name: mymongo
- # engine: mongodb
- # shortcut: md
- # exact_match_only: false
- # host: '127.0.0.1'
- # port: 27017
- # enable_http: true
- # results_per_page: 20
- # database: 'business'
- # collection: 'reviews' # name of the db collection
- # key: 'name' # key in the collection to search for
-
- - name: mozhi
- engine: mozhi
- base_url:
- - https://mozhi.aryak.me
- - https://translate.bus-hit.me
- - https://nyc1.mz.ggtyler.dev
- # mozhi_engine: google - see https://mozhi.aryak.me for supported engines
- timeout: 4.0
- shortcut: mz
- disabled: true
-
- - name: mwmbl
- engine: mwmbl
- # api_url: https://api.mwmbl.org
- shortcut: mwm
- disabled: true
-
- - name: npm
- engine: json_engine
- paging: true
- first_page_num: 0
- search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno}
- results_query: results
- url_query: package/links/npm
- title_query: package/name
- content_query: package/description
- page_size: 25
- categories: [it, packages]
- disabled: true
- timeout: 5.0
- shortcut: npm
- about:
- website: https://npms.io/
- wikidata_id: Q7067518
- official_api_documentation: https://api-docs.npms.io/
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: nyaa
- engine: nyaa
- shortcut: nt
- disabled: true
-
- - name: mankier
- engine: json_engine
- search_url: https://www.mankier.com/api/v2/mans/?q={query}
- results_query: results
- url_query: url
- title_query: name
- content_query: description
- categories: it
- shortcut: man
- about:
- website: https://www.mankier.com/
- official_api_documentation: https://www.mankier.com/api
- use_official_api: true
- require_api_key: false
- results: JSON
-
- - name: odysee
- engine: odysee
- shortcut: od
- disabled: true
-
- - name: openairedatasets
- engine: json_engine
- paging: true
- search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
- results_query: response/results/result
- url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
- title_query: metadata/oaf:entity/oaf:result/title/$
- content_query: metadata/oaf:entity/oaf:result/description/$
- content_html_to_text: true
- categories: 'science'
- shortcut: oad
- timeout: 5.0
- about:
- website: https://www.openaire.eu/
- wikidata_id: Q25106053
- official_api_documentation: https://api.openaire.eu/
- use_official_api: false
- require_api_key: false
- results: JSON
-
- - name: openairepublications
- engine: json_engine
- paging: true
- search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
- results_query: response/results/result
- url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
- title_query: metadata/oaf:entity/oaf:result/title/$
- content_query: metadata/oaf:entity/oaf:result/description/$
- content_html_to_text: true
- categories: science
- shortcut: oap
- timeout: 5.0
- about:
- website: https://www.openaire.eu/
- wikidata_id: Q25106053
- official_api_documentation: https://api.openaire.eu/
- use_official_api: false
- require_api_key: false
- results: JSON
-
- # - name: opensemanticsearch
- # engine: opensemantic
- # shortcut: oss
- # base_url: 'http://localhost:8983/solr/opensemanticsearch/'
-
- - name: openstreetmap
- engine: openstreetmap
- shortcut: osm
-
- - name: openrepos
- engine: xpath
- paging: true
- search_url: https://openrepos.net/search/node/{query}?page={pageno}
- url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href
- title_xpath: //li[@class="search-result"]//h3[@class="title"]/a
- content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"]
- categories: files
- timeout: 4.0
- disabled: true
- shortcut: or
- about:
- website: https://openrepos.net/
- wikidata_id:
- official_api_documentation:
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: packagist
- engine: json_engine
- paging: true
- search_url: https://packagist.org/search.json?q={query}&page={pageno}
- results_query: results
- url_query: url
- title_query: name
- content_query: description
- categories: [it, packages]
- disabled: true
- timeout: 5.0
- shortcut: pack
- about:
- website: https://packagist.org
- wikidata_id: Q108311377
- official_api_documentation: https://packagist.org/apidoc
- use_official_api: true
- require_api_key: false
- results: JSON
-
- - name: pdbe
- engine: pdbe
- shortcut: pdb
- # Hide obsolete PDB entries. Default is not to hide obsolete structures
- # hide_obsolete: false
-
- - name: photon
- engine: photon
- shortcut: ph
-
- - name: pinterest
- engine: pinterest
- shortcut: pin
-
- - name: piped
- engine: piped
- shortcut: ppd
- categories: videos
- piped_filter: videos
- timeout: 3.0
-
- # URL to use as link and for embeds
- frontend_url: https://srv.piped.video
- # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/
- backend_url:
- - https://pipedapi.kavin.rocks
- - https://pipedapi-libre.kavin.rocks
- - https://pipedapi.adminforge.de
-
- - name: piped.music
- engine: piped
- network: piped
- shortcut: ppdm
- categories: music
- piped_filter: music_songs
- timeout: 3.0
-
- - name: piratebay
- engine: piratebay
- shortcut: tpb
- # You may need to change this URL to a proxy if piratebay is blocked in your
- # country
- url: https://thepiratebay.org/
- timeout: 3.0
-
- - name: podcastindex
- engine: podcastindex
- shortcut: podcast
-
- # Required dependency: psychopg2
- # - name: postgresql
- # engine: postgresql
- # database: postgres
- # username: postgres
- # password: postgres
- # limit: 10
- # query_str: 'SELECT * from my_table WHERE my_column = %(query)s'
- # shortcut : psql
-
- - name: presearch
- engine: presearch
- search_type: search
- categories: [general, web]
- shortcut: ps
- timeout: 4.0
- disabled: true
-
- - name: presearch images
- engine: presearch
- network: presearch
- search_type: images
- categories: [images, web]
- timeout: 4.0
- shortcut: psimg
- disabled: true
-
- - name: presearch videos
- engine: presearch
- network: presearch
- search_type: videos
- categories: [general, web]
- timeout: 4.0
- shortcut: psvid
- disabled: true
-
- - name: presearch news
- engine: presearch
- network: presearch
- search_type: news
- categories: [news, web]
- timeout: 4.0
- shortcut: psnews
- disabled: true
-
- - name: pub.dev
- engine: xpath
- shortcut: pd
- search_url: https://pub.dev/packages?q={query}&page={pageno}
- paging: true
- results_xpath: //div[contains(@class,"packages-item")]
- url_xpath: ./div/h3/a/@href
- title_xpath: ./div/h3/a
- content_xpath: ./div/div/div[contains(@class,"packages-description")]/span
- categories: [packages, it]
- timeout: 3.0
- disabled: true
- first_page_num: 1
- about:
- website: https://pub.dev/
- official_api_documentation: https://pub.dev/help/api
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: pubmed
- engine: pubmed
- shortcut: pub
- timeout: 3.0
-
- - name: pypi
- shortcut: pypi
- engine: xpath
- paging: true
- search_url: https://pypi.org/search/?q={query}&page={pageno}
- results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]
- url_xpath: ./@href
- title_xpath: ./h3/span[@class="package-snippet__name"]
- content_xpath: ./p
- suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"]
- first_page_num: 1
- categories: [it, packages]
- about:
- website: https://pypi.org
- wikidata_id: Q2984686
- official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: qwant
- qwant_categ: web
- engine: qwant
- shortcut: qw
- categories: [general, web]
- additional_tests:
- rosebud: *test_rosebud
-
- - name: qwant news
- qwant_categ: news
- engine: qwant
- shortcut: qwn
- categories: news
- network: qwant
-
- - name: qwant images
- qwant_categ: images
- engine: qwant
- shortcut: qwi
- categories: [images, web]
- network: qwant
-
- - name: qwant videos
- qwant_categ: videos
- engine: qwant
- shortcut: qwv
- categories: [videos, web]
- network: qwant
-
- # - name: library
- # engine: recoll
- # shortcut: lib
- # base_url: 'https://recoll.example.org/'
- # search_dir: ''
- # mount_prefix: /export
- # dl_prefix: 'https://download.example.org'
- # timeout: 30.0
- # categories: files
- # disabled: true
-
- # - name: recoll library reference
- # engine: recoll
- # base_url: 'https://recoll.example.org/'
- # search_dir: reference
- # mount_prefix: /export
- # dl_prefix: 'https://download.example.org'
- # shortcut: libr
- # timeout: 30.0
- # categories: files
- # disabled: true
-
- - name: radio browser
- engine: radio_browser
- shortcut: rb
-
- - name: reddit
- engine: reddit
- shortcut: re
- page_size: 25
-
- - name: rottentomatoes
- engine: rottentomatoes
- shortcut: rt
- disabled: true
-
- # Required dependency: redis
- # - name: myredis
- # shortcut : rds
- # engine: redis_server
- # exact_match_only: false
- # host: '127.0.0.1'
- # port: 6379
- # enable_http: true
- # password: ''
- # db: 0
-
- # tmp suspended: bad certificate
- # - name: scanr structures
- # shortcut: scs
- # engine: scanr_structures
- # disabled: true
-
- - name: sepiasearch
- engine: sepiasearch
- shortcut: sep
-
- - name: soundcloud
- engine: soundcloud
- shortcut: sc
-
- - name: stackoverflow
- engine: stackexchange
- shortcut: st
- api_site: 'stackoverflow'
- categories: [it, q&a]
-
- - name: askubuntu
- engine: stackexchange
- shortcut: ubuntu
- api_site: 'askubuntu'
- categories: [it, q&a]
-
- - name: internetarchivescholar
- engine: internet_archive_scholar
- shortcut: ias
- timeout: 5.0
-
- - name: superuser
- engine: stackexchange
- shortcut: su
- api_site: 'superuser'
- categories: [it, q&a]
-
- - name: searchcode code
- engine: searchcode_code
- shortcut: scc
- disabled: true
-
- # - name: searx
- # engine: searx_engine
- # shortcut: se
- # instance_urls :
- # - http://127.0.0.1:8888/
- # - ...
- # disabled: true
-
- - name: semantic scholar
- engine: semantic_scholar
- disabled: true
- shortcut: se
-
- # Spotify needs API credentials
- # - name: spotify
- # engine: spotify
- # shortcut: stf
- # api_client_id: *******
- # api_client_secret: *******
-
- # - name: solr
- # engine: solr
- # shortcut: slr
- # base_url: http://localhost:8983
- # collection: collection_name
- # sort: '' # sorting: asc or desc
- # field_list: '' # comma separated list of field names to display on the UI
- # default_fields: '' # default field to query
- # query_fields: '' # query fields
- # enable_http: true
-
- # - name: springer nature
- # engine: springer
- # # get your API key from: https://dev.springernature.com/signup
- # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
- # api_key: 'unset'
- # shortcut: springer
- # timeout: 15.0
-
- - name: startpage
- engine: startpage
- shortcut: sp
- timeout: 6.0
- disabled: true
- additional_tests:
- rosebud: *test_rosebud
-
- - name: tokyotoshokan
- engine: tokyotoshokan
- shortcut: tt
- timeout: 6.0
- disabled: true
-
- - name: solidtorrents
- engine: solidtorrents
- shortcut: solid
- timeout: 4.0
- base_url:
- - https://solidtorrents.to
- - https://bitsearch.to
-
- # For this demo of the sqlite engine download:
- # https://liste.mediathekview.de/filmliste-v2.db.bz2
- # and unpack into searx/data/filmliste-v2.db
- # Query to test: "!demo concert"
- #
- # - name: demo
- # engine: sqlite
- # shortcut: demo
- # categories: general
- # result_template: default.html
- # database: searx/data/filmliste-v2.db
- # query_str: >-
- # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,
- # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,
- # description AS content
- # FROM film
- # WHERE title LIKE :wildcard OR description LIKE :wildcard
- # ORDER BY duration DESC
-
- - name: tagesschau
- engine: tagesschau
- # when set to false, display URLs from Tagesschau, and not the actual source
- # (e.g. NDR, WDR, SWR, HR, ...)
- use_source_url: true
- shortcut: ts
- disabled: true
-
- - name: tmdb
- engine: xpath
- paging: true
- categories: movies
- search_url: https://www.themoviedb.org/search?page={pageno}&query={query}
- results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")]
- url_xpath: .//div[contains(@class,"poster")]/a/@href
- thumbnail_xpath: .//img/@src
- title_xpath: .//div[contains(@class,"title")]//h2
- content_xpath: .//div[contains(@class,"overview")]
- shortcut: tm
- disabled: true
-
- # Requires Tor
- - name: torch
- engine: xpath
- paging: true
- search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and
- results_xpath: //table//tr
- url_xpath: ./td[2]/a
- title_xpath: ./td[2]/b
- content_xpath: ./td[2]/small
- categories: onions
- enable_http: true
- shortcut: tch
-
- # torznab engine lets you query any torznab compatible indexer. Using this
- # engine in combination with Jackett opens the possibility to query a lot of
- # public and private indexers directly from SearXNG. More details at:
- # https://docs.searxng.org/dev/engines/online/torznab.html
- #
- # - name: Torznab EZTV
- # engine: torznab
- # shortcut: eztv
- # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
- # enable_http: true # if using localhost
- # api_key: xxxxxxxxxxxxxxx
- # show_magnet_links: true
- # show_torrent_files: false
- # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
- # torznab_categories: # optional
- # - 2000
- # - 5000
-
- # tmp suspended - too slow, too many errors
- # - name: urbandictionary
- # engine : xpath
- # search_url : https://www.urbandictionary.com/define.php?term={query}
- # url_xpath : //*[@class="word"]/@href
- # title_xpath : //*[@class="def-header"]
- # content_xpath: //*[@class="meaning"]
- # shortcut: ud
-
- - name: unsplash
- engine: unsplash
- shortcut: us
-
- - name: yandex music
- engine: yandex_music
- shortcut: ydm
- disabled: true
- # https://yandex.com/support/music/access.html
- inactive: true
-
- - name: yahoo
- engine: yahoo
- shortcut: yh
- disabled: true
-
- - name: yahoo news
- engine: yahoo_news
- shortcut: yhn
-
- - name: youtube
- shortcut: yt
- # You can use the engine using the official stable API, but you need an API
- # key See: https://console.developers.google.com/project
- #
- # engine: youtube_api
- # api_key: 'apikey' # required!
- #
- # Or you can use the html non-stable engine, activated by default
- engine: youtube_noapi
-
- - name: dailymotion
- engine: dailymotion
- shortcut: dm
-
- - name: vimeo
- engine: vimeo
- shortcut: vm
-
- - name: wiby
- engine: json_engine
- paging: true
- search_url: https://wiby.me/json/?q={query}&p={pageno}
- url_query: URL
- title_query: Title
- content_query: Snippet
- categories: [general, web]
- shortcut: wib
- disabled: true
- about:
- website: https://wiby.me/
-
- - name: alexandria
- engine: json_engine
- shortcut: alx
- categories: general
- paging: true
- search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno}
- results_query: results
- title_query: title
- url_query: url
- content_query: snippet
- timeout: 1.5
- disabled: true
- about:
- website: https://alexandria.org/
- official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md
- use_official_api: true
- require_api_key: false
- results: JSON
-
- - name: wikibooks
- engine: mediawiki
- weight: 0.5
- shortcut: wb
- categories: [general, wikimedia]
- base_url: 'https://{language}.wikibooks.org/'
- search_type: text
- disabled: true
- about:
- website: https://www.wikibooks.org/
- wikidata_id: Q367
-
- - name: wikinews
- engine: mediawiki
- shortcut: wn
- categories: [news, wikimedia]
- base_url: 'https://{language}.wikinews.org/'
- search_type: text
- srsort: create_timestamp_desc
- about:
- website: https://www.wikinews.org/
- wikidata_id: Q964
-
- - name: wikiquote
- engine: mediawiki
- weight: 0.5
- shortcut: wq
- categories: [general, wikimedia]
- base_url: 'https://{language}.wikiquote.org/'
- search_type: text
- disabled: true
- additional_tests:
- rosebud: *test_rosebud
- about:
- website: https://www.wikiquote.org/
- wikidata_id: Q369
-
- - name: wikisource
- engine: mediawiki
- weight: 0.5
- shortcut: ws
- categories: [general, wikimedia]
- base_url: 'https://{language}.wikisource.org/'
- search_type: text
- disabled: true
- about:
- website: https://www.wikisource.org/
- wikidata_id: Q263
-
- - name: wikispecies
- engine: mediawiki
- shortcut: wsp
- categories: [general, science, wikimedia]
- base_url: 'https://species.wikimedia.org/'
- search_type: text
- disabled: true
- about:
- website: https://species.wikimedia.org/
- wikidata_id: Q13679
-
- - name: wiktionary
- engine: mediawiki
- shortcut: wt
- categories: [dictionaries, wikimedia]
- base_url: 'https://{language}.wiktionary.org/'
- search_type: text
- about:
- website: https://www.wiktionary.org/
- wikidata_id: Q151
-
- - name: wikiversity
- engine: mediawiki
- weight: 0.5
- shortcut: wv
- categories: [general, wikimedia]
- base_url: 'https://{language}.wikiversity.org/'
- search_type: text
- disabled: true
- about:
- website: https://www.wikiversity.org/
- wikidata_id: Q370
-
- - name: wikivoyage
- engine: mediawiki
- weight: 0.5
- shortcut: wy
- categories: [general, wikimedia]
- base_url: 'https://{language}.wikivoyage.org/'
- search_type: text
- disabled: true
- about:
- website: https://www.wikivoyage.org/
- wikidata_id: Q373
-
- - name: wikicommons.images
- engine: wikicommons
- shortcut: wc
- categories: images
- number_of_results: 10
-
- name: wolframalpha
- shortcut: wa
- # You can use the engine using the official stable API, but you need an API
- # key. See: https://products.wolframalpha.com/api/
- #
- # engine: wolframalpha_api
- # api_key: ''
- #
- # Or you can use the html non-stable engine, activated by default
- engine: wolframalpha_noapi
- timeout: 6.0
- categories: general
disabled: false
-
- - name: dictzone
- engine: dictzone
- shortcut: dc
-
- - name: mymemory translated
- engine: translated
- shortcut: tl
- timeout: 5.0
- # You can use without an API key, but you are limited to 1000 words/day
- # See: https://mymemory.translated.net/doc/usagelimits.php
- # api_key: ''
-
- # Required dependency: mysql-connector-python
- # - name: mysql
- # engine: mysql_server
- # database: mydatabase
- # username: user
- # password: pass
- # limit: 10
- # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
- # shortcut: mysql
-
- - name: 1337x
- engine: 1337x
- shortcut: 1337x
- disabled: true
-
- - name: duden
- engine: duden
- shortcut: du
- disabled: true
-
- - name: seznam
- shortcut: szn
- engine: seznam
- disabled: true
-
- # - name: deepl
- # engine: deepl
- # shortcut: dpl
- # # You can use the engine using the official stable API, but you need an API key
- # # See: https://www.deepl.com/pro-api?cta=header-pro-api
- # api_key: '' # required!
- # timeout: 5.0
- # disabled: true
-
- - name: mojeek
- shortcut: mjk
- engine: xpath
- paging: true
- categories: [general, web]
- search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang}
- results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"]
- url_xpath: ./@href
- title_xpath: ../h2/a
- content_xpath: ..//p[@class="s"]
- suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a
- first_page_num: 0
- page_size: 10
- max_page: 100
- disabled: true
- about:
- website: https://www.mojeek.com/
- wikidata_id: Q60747299
- official_api_documentation: https://www.mojeek.com/services/api.html/
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: moviepilot
- engine: moviepilot
- shortcut: mp
- disabled: true
-
- - name: naver
- shortcut: nvr
- categories: [general, web]
- engine: xpath
- paging: true
- search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno}
- url_xpath: //a[@class="link_tit"]/@href
- title_xpath: //a[@class="link_tit"]
- content_xpath: //a[@class="total_dsc"]/div
- first_page_num: 1
- page_size: 10
- disabled: true
- about:
- website: https://www.naver.com/
- wikidata_id: Q485639
- official_api_documentation: https://developers.naver.com/docs/nmt/examples/
- use_official_api: false
- require_api_key: false
- results: HTML
- language: ko
-
- - name: rubygems
- shortcut: rbg
- engine: xpath
- paging: true
- search_url: https://rubygems.org/search?page={pageno}&query={query}
- results_xpath: /html/body/main/div/a[@class="gems__gem"]
- url_xpath: ./@href
- title_xpath: ./span/h2
- content_xpath: ./span/p
- suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a
- first_page_num: 1
- categories: [it, packages]
- disabled: true
- about:
- website: https://rubygems.org/
- wikidata_id: Q1853420
- official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: peertube
- engine: peertube
- shortcut: ptb
- paging: true
- # alternatives see: https://instances.joinpeertube.org/instances
- # base_url: https://tube.4aem.com
- categories: videos
- disabled: true
- timeout: 6.0
-
- - name: mediathekviewweb
- engine: mediathekviewweb
- shortcut: mvw
- disabled: true
-
- - name: yacy
- engine: yacy
- categories: general
- search_type: text
- base_url: https://yacy.searchlab.eu
- shortcut: ya
- disabled: true
- # required if you aren't using HTTPS for your local yacy instance
- # https://docs.searxng.org/dev/engines/online/yacy.html
- # enable_http: true
- # timeout: 3.0
- # search_mode: 'global'
-
- - name: yacy images
- engine: yacy
- categories: images
- search_type: image
- base_url: https://yacy.searchlab.eu
- shortcut: yai
- disabled: true
-
- - name: rumble
- engine: rumble
- shortcut: ru
- base_url: https://rumble.com/
- paging: true
- categories: videos
- disabled: true
-
- - name: livespace
- engine: livespace
- shortcut: ls
- categories: videos
- disabled: true
- timeout: 5.0
-
- - name: wordnik
- engine: wordnik
- shortcut: def
- base_url: https://www.wordnik.com/
- categories: [dictionaries]
- timeout: 5.0
-
- - name: woxikon.de synonyme
- engine: xpath
- shortcut: woxi
- categories: [dictionaries]
- timeout: 5.0
- disabled: true
- search_url: https://synonyme.woxikon.de/synonyme/{query}.php
- url_xpath: //div[@class="upper-synonyms"]/a/@href
- content_xpath: //div[@class="synonyms-list-group"]
- title_xpath: //div[@class="upper-synonyms"]/a
- no_result_for_http_status: [404]
- about:
- website: https://www.woxikon.de/
- wikidata_id: # No Wikidata ID
- use_official_api: false
- require_api_key: false
- results: HTML
- language: de
-
- - name: seekr news
- engine: seekr
- shortcut: senews
- categories: news
- seekr_category: news
- disabled: true
-
- - name: seekr images
- engine: seekr
- network: seekr news
- shortcut: seimg
- categories: images
- seekr_category: images
- disabled: true
-
- - name: seekr videos
- engine: seekr
- network: seekr news
- shortcut: sevid
- categories: videos
- seekr_category: videos
- disabled: true
-
- - name: sjp.pwn
- engine: sjp
- shortcut: sjp
- base_url: https://sjp.pwn.pl/
- timeout: 5.0
- disabled: true
-
- - name: stract
- engine: stract
- shortcut: str
- disabled: true
-
- - name: svgrepo
- engine: svgrepo
- shortcut: svg
- timeout: 10.0
- disabled: true
-
- - name: tootfinder
- engine: tootfinder
- shortcut: toot
-
- - name: wallhaven
- engine: wallhaven
- # api_key: abcdefghijklmnopqrstuvwxyz
- shortcut: wh
-
- # wikimini: online encyclopedia for children
- # The fulltext and title parameter is necessary for Wikimini because
- # sometimes it will not show the results and redirect instead
- - name: wikimini
- engine: xpath
- shortcut: wkmn
- search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search
- url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href
- title_xpath: //li//div[@class="mw-search-result-heading"]/a
- content_xpath: //li/div[@class="searchresult"]
- categories: general
- disabled: true
- about:
- website: https://wikimini.org/
- wikidata_id: Q3568032
- use_official_api: false
- require_api_key: false
- results: HTML
- language: fr
-
- - name: wttr.in
- engine: wttr
- shortcut: wttr
- timeout: 9.0
-
- - name: yummly
- engine: yummly
- shortcut: yum
- disabled: true
-
- - name: brave
- engine: brave
- shortcut: br
- time_range_support: true
- paging: true
- categories: [general, web]
- brave_category: search
- # brave_spellcheck: true
-
- - name: brave.images
- engine: brave
- network: brave
- shortcut: brimg
- categories: [images, web]
- brave_category: images
-
- - name: brave.videos
- engine: brave
- network: brave
- shortcut: brvid
- categories: [videos, web]
- brave_category: videos
-
- - name: brave.news
- engine: brave
- network: brave
- shortcut: brnews
- categories: news
- brave_category: news
-
- # - name: brave.goggles
- # engine: brave
- # network: brave
- # shortcut: brgog
- # time_range_support: true
- # paging: true
- # categories: [general, web]
- # brave_category: goggles
- # Goggles: # required! This should be a URL ending in .goggle
-
- - name: lib.rs
- shortcut: lrs
- engine: xpath
- search_url: https://lib.rs/search?q={query}
- results_xpath: /html/body/main/div/ol/li/a
- url_xpath: ./@href
- title_xpath: ./div[@class="h"]/h4
- content_xpath: ./div[@class="h"]/p
- categories: [it, packages]
- disabled: true
- about:
- website: https://lib.rs
- wikidata_id: Q113486010
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: sourcehut
- shortcut: srht
- engine: xpath
- paging: true
- search_url: https://sr.ht/projects?page={pageno}&search={query}
- results_xpath: (//div[@class="event-list"])[1]/div[@class="event"]
- url_xpath: ./h4/a[2]/@href
- title_xpath: ./h4/a[2]
- content_xpath: ./p
- first_page_num: 1
- categories: [it, repos]
- disabled: true
- about:
- website: https://sr.ht
- wikidata_id: Q78514485
- official_api_documentation: https://man.sr.ht/
- use_official_api: false
- require_api_key: false
- results: HTML
-
- - name: goo
- shortcut: goo
- engine: xpath
- paging: true
- search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0
- url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href
- title_xpath: //div[@class="result"]/p[@class='title fsL1']/a
- content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p
- first_page_num: 0
- categories: [general, web]
- disabled: true
- timeout: 4.0
- about:
- website: https://search.goo.ne.jp
- wikidata_id: Q249044
- use_official_api: false
- require_api_key: false
- results: HTML
- language: ja
-
- - name: bt4g
- engine: bt4g
- shortcut: bt4g
-
- - name: pkg.go.dev
- engine: xpath
- shortcut: pgo
- search_url: https://pkg.go.dev/search?limit=100&m=package&q={query}
- results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"]
- url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href
- title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a
- content_xpath: ./p[@class="SearchSnippet-synopsis"]
- categories: [packages, it]
- timeout: 3.0
- disabled: true
- about:
- website: https://pkg.go.dev/
- use_official_api: false
- require_api_key: false
- results: HTML
-
-# Doku engine lets you access to any Doku wiki instance:
-# A public one or a privete/corporate one.
-# - name: ubuntuwiki
-# engine: doku
-# shortcut: uw
-# base_url: 'https://doc.ubuntu-fr.org'
-
-# Be careful when enabling this engine if you are
-# running a public instance. Do not expose any sensitive
-# information. You can restrict access by configuring a list
-# of access tokens under tokens.
-# - name: git grep
-# engine: command
-# command: ['git', 'grep', '{{QUERY}}']
-# shortcut: gg
-# tokens: []
-# disabled: true
-# delimiter:
-# chars: ':'
-# keys: ['filepath', 'code']
-
-# Be careful when enabling this engine if you are
-# running a public instance. Do not expose any sensitive
-# information. You can restrict access by configuring a list
-# of access tokens under tokens.
-# - name: locate
-# engine: command
-# command: ['locate', '{{QUERY}}']
-# shortcut: loc
-# tokens: []
-# disabled: true
-# delimiter:
-# chars: ' '
-# keys: ['line']
-
-# Be careful when enabling this engine if you are
-# running a public instance. Do not expose any sensitive
-# information. You can restrict access by configuring a list
-# of access tokens under tokens.
-# - name: find
-# engine: command
-# command: ['find', '.', '-name', '{{QUERY}}']
-# query_type: path
-# shortcut: fnd
-# tokens: []
-# disabled: true
-# delimiter:
-# chars: ' '
-# keys: ['line']
-
-# Be careful when enabling this engine if you are
-# running a public instance. Do not expose any sensitive
-# information. You can restrict access by configuring a list
-# of access tokens under tokens.
-# - name: pattern search in files
-# engine: command
-# command: ['fgrep', '{{QUERY}}']
-# shortcut: fgr
-# tokens: []
-# disabled: true
-# delimiter:
-# chars: ' '
-# keys: ['line']
-
-# Be careful when enabling this engine if you are
-# running a public instance. Do not expose any sensitive
-# information. You can restrict access by configuring a list
-# of access tokens under tokens.
-# - name: regex search in files
-# engine: command
-# command: ['grep', '{{QUERY}}']
-# shortcut: gr
-# tokens: []
-# disabled: true
-# delimiter:
-# chars: ' '
-# keys: ['line']
-
-doi_resolvers:
- oadoi.org: 'https://oadoi.org/'
- doi.org: 'https://doi.org/'
- doai.io: 'https://dissem.in/'
- sci-hub.se: 'https://sci-hub.se/'
- sci-hub.st: 'https://sci-hub.st/'
- sci-hub.ru: 'https://sci-hub.ru/'
-
-default_doi_resolver: 'oadoi.org'
diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts
deleted file mode 100644
index 5c11307..0000000
--- a/src/agents/academicSearchAgent.ts
+++ /dev/null
@@ -1,265 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- PromptTemplate,
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import {
- RunnableSequence,
- RunnableMap,
- RunnableLambda,
-} from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import formatChatHistoryAsString from '../utils/formatHistory';
-import eventEmitter from 'events';
-import computeSimilarity from '../utils/computeSimilarity';
-import logger from '../utils/logger';
-
-const basicAcademicSearchRetrieverPrompt = `
-You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
-If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
-
-Example:
-1. Follow up question: How does stable diffusion work?
-Rephrased: Stable diffusion working
-
-2. Follow up question: What is linear algebra?
-Rephrased: Linear algebra
-
-3. Follow up question: What is the third law of thermodynamics?
-Rephrased: Third law of thermodynamics
-
-Conversation:
-{chat_history}
-
-Follow up question: {query}
-Rephrased question:
-`;
-
-const basicAcademicSearchResponsePrompt = `
- You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Academic', this means you will be searching for academic papers and articles on the web.
-
- Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
- You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
- You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
- Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
- You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
- Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
-
- Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
- talk about the context in your response.
-
-
- {context}
-
-
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
- Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-type BasicChainInput = {
- chat_history: BaseMessage[];
- query: string;
-};
-
-const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt),
- llm,
- strParser,
- RunnableLambda.from(async (input: string) => {
- if (input === 'not_needed') {
- return { query: '', docs: [] };
- }
-
- const res = await searchSearxng(input, {
- language: 'en',
- engines: [
- 'arxiv',
- 'google scholar',
- 'internetarchivescholar',
- 'pubmed',
- ],
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent: result.content,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: input, docs: documents };
- }),
- ]);
-};
-
-const createBasicAcademicSearchAnsweringChain = (
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const basicAcademicSearchRetrieverChain =
- createBasicAcademicSearchRetrieverChain(llm);
-
- const processDocs = async (docs: Document[]) => {
- return docs
- .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
- .join('\n');
- };
-
- const rerankDocs = async ({
- query,
- docs,
- }: {
- query: string;
- docs: Document[];
- }) => {
- if (docs.length === 0) {
- return docs;
- }
-
- const docsWithContent = docs.filter(
- (doc) => doc.pageContent && doc.pageContent.length > 0,
- );
-
- const [docEmbeddings, queryEmbedding] = await Promise.all([
- embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
- embeddings.embedQuery(query),
- ]);
-
- const similarity = docEmbeddings.map((docEmbedding, i) => {
- const sim = computeSimilarity(queryEmbedding, docEmbedding);
-
- return {
- index: i,
- similarity: sim,
- };
- });
-
- const sortedDocs = similarity
- .sort((a, b) => b.similarity - a.similarity)
- .slice(0, 15)
- .map((sim) => docsWithContent[sim.index]);
-
- return sortedDocs;
- };
-
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- context: RunnableSequence.from([
- (input) => ({
- query: input.query,
- chat_history: formatChatHistoryAsString(input.chat_history),
- }),
- basicAcademicSearchRetrieverChain
- .pipe(rerankDocs)
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(processDocs),
- ]),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', basicAcademicSearchResponsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const basicAcademicSearch = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const basicAcademicSearchAnsweringChain =
- createBasicAcademicSearchAnsweringChain(llm, embeddings);
-
- const stream = basicAcademicSearchAnsweringChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in academic search: ${err}`);
- }
-
- return emitter;
-};
-
-const handleAcademicSearch = (
- message: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = basicAcademicSearch(message, history, llm, embeddings);
- return emitter;
-};
-
-export default handleAcademicSearch;
diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts
deleted file mode 100644
index 34e9ec2..0000000
--- a/src/agents/redditSearchAgent.ts
+++ /dev/null
@@ -1,260 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- PromptTemplate,
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import {
- RunnableSequence,
- RunnableMap,
- RunnableLambda,
-} from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import formatChatHistoryAsString from '../utils/formatHistory';
-import eventEmitter from 'events';
-import computeSimilarity from '../utils/computeSimilarity';
-import logger from '../utils/logger';
-
-const basicRedditSearchRetrieverPrompt = `
-You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
-If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
-
-Example:
-1. Follow up question: Which company is most likely to create an AGI
-Rephrased: Which company is most likely to create an AGI
-
-2. Follow up question: Is Earth flat?
-Rephrased: Is Earth flat?
-
-3. Follow up question: Is there life on Mars?
-Rephrased: Is there life on Mars?
-
-Conversation:
-{chat_history}
-
-Follow up question: {query}
-Rephrased question:
-`;
-
-const basicRedditSearchResponsePrompt = `
- You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Reddit', this means you will be searching for information, opinions and discussions on the web using Reddit.
-
- Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
- You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
- You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
- Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
- You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
- Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
-
- Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Reddit and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
- talk about the context in your response.
-
-
- {context}
-
-
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
- Anything between the \`context\` is retrieved from Reddit and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-type BasicChainInput = {
- chat_history: BaseMessage[];
- query: string;
-};
-
-const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
- llm,
- strParser,
- RunnableLambda.from(async (input: string) => {
- if (input === 'not_needed') {
- return { query: '', docs: [] };
- }
-
- const res = await searchSearxng(input, {
- language: 'en',
- engines: ['reddit'],
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent: result.content ? result.content : result.title,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: input, docs: documents };
- }),
- ]);
-};
-
-const createBasicRedditSearchAnsweringChain = (
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const basicRedditSearchRetrieverChain =
- createBasicRedditSearchRetrieverChain(llm);
-
- const processDocs = async (docs: Document[]) => {
- return docs
- .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
- .join('\n');
- };
-
- const rerankDocs = async ({
- query,
- docs,
- }: {
- query: string;
- docs: Document[];
- }) => {
- if (docs.length === 0) {
- return docs;
- }
-
- const docsWithContent = docs.filter(
- (doc) => doc.pageContent && doc.pageContent.length > 0,
- );
-
- const [docEmbeddings, queryEmbedding] = await Promise.all([
- embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
- embeddings.embedQuery(query),
- ]);
-
- const similarity = docEmbeddings.map((docEmbedding, i) => {
- const sim = computeSimilarity(queryEmbedding, docEmbedding);
-
- return {
- index: i,
- similarity: sim,
- };
- });
-
- const sortedDocs = similarity
- .sort((a, b) => b.similarity - a.similarity)
- .slice(0, 15)
- .filter((sim) => sim.similarity > 0.3)
- .map((sim) => docsWithContent[sim.index]);
-
- return sortedDocs;
- };
-
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- context: RunnableSequence.from([
- (input) => ({
- query: input.query,
- chat_history: formatChatHistoryAsString(input.chat_history),
- }),
- basicRedditSearchRetrieverChain
- .pipe(rerankDocs)
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(processDocs),
- ]),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', basicRedditSearchResponsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const basicRedditSearch = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const basicRedditSearchAnsweringChain =
- createBasicRedditSearchAnsweringChain(llm, embeddings);
- const stream = basicRedditSearchAnsweringChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in RedditSearch: ${err}`);
- }
-
- return emitter;
-};
-
-const handleRedditSearch = (
- message: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = basicRedditSearch(message, history, llm, embeddings);
- return emitter;
-};
-
-export default handleRedditSearch;
diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts
deleted file mode 100644
index 1364742..0000000
--- a/src/agents/webSearchAgent.ts
+++ /dev/null
@@ -1,261 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- PromptTemplate,
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import {
- RunnableSequence,
- RunnableMap,
- RunnableLambda,
-} from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import formatChatHistoryAsString from '../utils/formatHistory';
-import eventEmitter from 'events';
-import computeSimilarity from '../utils/computeSimilarity';
-import logger from '../utils/logger';
-
-const basicSearchRetrieverPrompt = `
-You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
-If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
-
-Example:
-1. Follow up question: What is the capital of France?
-Rephrased: Capital of france
-
-2. Follow up question: What is the population of New York City?
-Rephrased: Population of New York City
-
-3. Follow up question: What is Docker?
-Rephrased: What is Docker
-
-Conversation:
-{chat_history}
-
-Follow up question: {query}
-Rephrased question:
-`;
-
-const basicWebSearchResponsePrompt = `
- You are Perplexica, an AI model who is expert at searching the web and answering user's queries.
-
- Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
- You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
- You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
- Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
- You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
- Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
-
- Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
- talk about the context in your response.
-
-
- {context}
-
-
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
- Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-type BasicChainInput = {
- chat_history: BaseMessage[];
- query: string;
-};
-
-const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(basicSearchRetrieverPrompt),
- llm,
- strParser,
- RunnableLambda.from(async (input: string) => {
- if (input === 'not_needed') {
- return { query: '', docs: [] };
- }
-
- const res = await searchSearxng(input, {
- language: 'en',
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent: result.content,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: input, docs: documents };
- }),
- ]);
-};
-
-const createBasicWebSearchAnsweringChain = (
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm);
-
- const processDocs = async (docs: Document[]) => {
- return docs
- .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
- .join('\n');
- };
-
- const rerankDocs = async ({
- query,
- docs,
- }: {
- query: string;
- docs: Document[];
- }) => {
- if (docs.length === 0) {
- return docs;
- }
-
- const docsWithContent = docs.filter(
- (doc) => doc.pageContent && doc.pageContent.length > 0,
- );
-
- const [docEmbeddings, queryEmbedding] = await Promise.all([
- embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
- embeddings.embedQuery(query),
- ]);
-
- const similarity = docEmbeddings.map((docEmbedding, i) => {
- const sim = computeSimilarity(queryEmbedding, docEmbedding);
-
- return {
- index: i,
- similarity: sim,
- };
- });
-
- const sortedDocs = similarity
- .sort((a, b) => b.similarity - a.similarity)
- .filter((sim) => sim.similarity > 0.5)
- .slice(0, 15)
- .map((sim) => docsWithContent[sim.index]);
-
- return sortedDocs;
- };
-
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- context: RunnableSequence.from([
- (input) => ({
- query: input.query,
- chat_history: formatChatHistoryAsString(input.chat_history),
- }),
- basicWebSearchRetrieverChain
- .pipe(rerankDocs)
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(processDocs),
- ]),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', basicWebSearchResponsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const basicWebSearch = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain(
- llm,
- embeddings,
- );
-
- const stream = basicWebSearchAnsweringChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in websearch: ${err}`);
- }
-
- return emitter;
-};
-
-const handleWebSearch = (
- message: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = basicWebSearch(message, history, llm, embeddings);
- return emitter;
-};
-
-export default handleWebSearch;
diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts
deleted file mode 100644
index f810a1e..0000000
--- a/src/agents/wolframAlphaSearchAgent.ts
+++ /dev/null
@@ -1,219 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- PromptTemplate,
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import {
- RunnableSequence,
- RunnableMap,
- RunnableLambda,
-} from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import formatChatHistoryAsString from '../utils/formatHistory';
-import eventEmitter from 'events';
-import logger from '../utils/logger';
-
-const basicWolframAlphaSearchRetrieverPrompt = `
-You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
-If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
-
-Example:
-1. Follow up question: What is the atomic radius of S?
-Rephrased: Atomic radius of S
-
-2. Follow up question: What is linear algebra?
-Rephrased: Linear algebra
-
-3. Follow up question: What is the third law of thermodynamics?
-Rephrased: Third law of thermodynamics
-
-Conversation:
-{chat_history}
-
-Follow up question: {query}
-Rephrased question:
-`;
-
-const basicWolframAlphaSearchResponsePrompt = `
- You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Wolfram Alpha', this means you will be searching for information on the web using Wolfram Alpha. It is a computational knowledge engine that can answer factual queries and perform computations.
-
- Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
- You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
- You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
- Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
- You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
- Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
-
- Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Wolfram Alpha and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
- talk about the context in your response.
-
-
- {context}
-
-
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
- Anything between the \`context\` is retrieved from Wolfram Alpha and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-type BasicChainInput = {
- chat_history: BaseMessage[];
- query: string;
-};
-
-const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt),
- llm,
- strParser,
- RunnableLambda.from(async (input: string) => {
- if (input === 'not_needed') {
- return { query: '', docs: [] };
- }
-
- const res = await searchSearxng(input, {
- language: 'en',
- engines: ['wolframalpha'],
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent: result.content,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: input, docs: documents };
- }),
- ]);
-};
-
-const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
- const basicWolframAlphaSearchRetrieverChain =
- createBasicWolframAlphaSearchRetrieverChain(llm);
-
- const processDocs = (docs: Document[]) => {
- return docs
- .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
- .join('\n');
- };
-
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- context: RunnableSequence.from([
- (input) => ({
- query: input.query,
- chat_history: formatChatHistoryAsString(input.chat_history),
- }),
- basicWolframAlphaSearchRetrieverChain
- .pipe(({ query, docs }) => {
- return docs;
- })
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(processDocs),
- ]),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', basicWolframAlphaSearchResponsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const basicWolframAlphaSearch = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const basicWolframAlphaSearchAnsweringChain =
- createBasicWolframAlphaSearchAnsweringChain(llm);
- const stream = basicWolframAlphaSearchAnsweringChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in WolframAlphaSearch: ${err}`);
- }
-
- return emitter;
-};
-
-const handleWolframAlphaSearch = (
- message: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = basicWolframAlphaSearch(message, history, llm);
- return emitter;
-};
-
-export default handleWolframAlphaSearch;
diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts
deleted file mode 100644
index 7c2cb49..0000000
--- a/src/agents/writingAssistant.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import { RunnableSequence } from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import eventEmitter from 'events';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import logger from '../utils/logger';
-
-const writingAssistantPrompt = `
-You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
-Since you are a writing assistant, you would not perform web searches. If you think you lack information to answer the query, you can ask the user for more information or suggest them to switch to a different focus mode.
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-const createWritingAssistantChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- ChatPromptTemplate.fromMessages([
- ['system', writingAssistantPrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const handleWritingAssistant = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const writingAssistantChain = createWritingAssistantChain(llm);
- const stream = writingAssistantChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in writing assistant: ${err}`);
- }
-
- return emitter;
-};
-
-export default handleWritingAssistant;
diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts
deleted file mode 100644
index 4e82cc7..0000000
--- a/src/agents/youtubeSearchAgent.ts
+++ /dev/null
@@ -1,261 +0,0 @@
-import { BaseMessage } from '@langchain/core/messages';
-import {
- PromptTemplate,
- ChatPromptTemplate,
- MessagesPlaceholder,
-} from '@langchain/core/prompts';
-import {
- RunnableSequence,
- RunnableMap,
- RunnableLambda,
-} from '@langchain/core/runnables';
-import { StringOutputParser } from '@langchain/core/output_parsers';
-import { Document } from '@langchain/core/documents';
-import { searchSearxng } from '../lib/searxng';
-import type { StreamEvent } from '@langchain/core/tracers/log_stream';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { Embeddings } from '@langchain/core/embeddings';
-import formatChatHistoryAsString from '../utils/formatHistory';
-import eventEmitter from 'events';
-import computeSimilarity from '../utils/computeSimilarity';
-import logger from '../utils/logger';
-
-const basicYoutubeSearchRetrieverPrompt = `
-You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
-If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
-
-Example:
-1. Follow up question: How does an A.C work?
-Rephrased: A.C working
-
-2. Follow up question: Linear algebra explanation video
-Rephrased: What is linear algebra?
-
-3. Follow up question: What is theory of relativity?
-Rephrased: What is theory of relativity?
-
-Conversation:
-{chat_history}
-
-Follow up question: {query}
-Rephrased question:
-`;
-
-const basicYoutubeSearchResponsePrompt = `
- You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcript.
-
- Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
- You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
- You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
- Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
- You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
- Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
-
- Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Youtube and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
- talk about the context in your response.
-
-
- {context}
-
-
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
- Anything between the \`context\` is retrieved from Youtube and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
-`;
-
-const strParser = new StringOutputParser();
-
-const handleStream = async (
- stream: AsyncGenerator,
- emitter: eventEmitter,
-) => {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
-};
-
-type BasicChainInput = {
- chat_history: BaseMessage[];
- query: string;
-};
-
-const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt),
- llm,
- strParser,
- RunnableLambda.from(async (input: string) => {
- if (input === 'not_needed') {
- return { query: '', docs: [] };
- }
-
- const res = await searchSearxng(input, {
- language: 'en',
- engines: ['youtube'],
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent: result.content ? result.content : result.title,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: input, docs: documents };
- }),
- ]);
-};
-
-const createBasicYoutubeSearchAnsweringChain = (
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const basicYoutubeSearchRetrieverChain =
- createBasicYoutubeSearchRetrieverChain(llm);
-
- const processDocs = async (docs: Document[]) => {
- return docs
- .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
- .join('\n');
- };
-
- const rerankDocs = async ({
- query,
- docs,
- }: {
- query: string;
- docs: Document[];
- }) => {
- if (docs.length === 0) {
- return docs;
- }
-
- const docsWithContent = docs.filter(
- (doc) => doc.pageContent && doc.pageContent.length > 0,
- );
-
- const [docEmbeddings, queryEmbedding] = await Promise.all([
- embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
- embeddings.embedQuery(query),
- ]);
-
- const similarity = docEmbeddings.map((docEmbedding, i) => {
- const sim = computeSimilarity(queryEmbedding, docEmbedding);
-
- return {
- index: i,
- similarity: sim,
- };
- });
-
- const sortedDocs = similarity
- .sort((a, b) => b.similarity - a.similarity)
- .slice(0, 15)
- .filter((sim) => sim.similarity > 0.3)
- .map((sim) => docsWithContent[sim.index]);
-
- return sortedDocs;
- };
-
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- context: RunnableSequence.from([
- (input) => ({
- query: input.query,
- chat_history: formatChatHistoryAsString(input.chat_history),
- }),
- basicYoutubeSearchRetrieverChain
- .pipe(rerankDocs)
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(processDocs),
- ]),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', basicYoutubeSearchResponsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
- });
-};
-
-const basicYoutubeSearch = (
- query: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = new eventEmitter();
-
- try {
- const basicYoutubeSearchAnsweringChain =
- createBasicYoutubeSearchAnsweringChain(llm, embeddings);
-
- const stream = basicYoutubeSearchAnsweringChain.streamEvents(
- {
- chat_history: history,
- query: query,
- },
- {
- version: 'v1',
- },
- );
-
- handleStream(stream, emitter);
- } catch (err) {
- emitter.emit(
- 'error',
- JSON.stringify({ data: 'An error has occurred please try again later' }),
- );
- logger.error(`Error in youtube search: ${err}`);
- }
-
- return emitter;
-};
-
-const handleYoutubeSearch = (
- message: string,
- history: BaseMessage[],
- llm: BaseChatModel,
- embeddings: Embeddings,
-) => {
- const emitter = basicYoutubeSearch(message, history, llm, embeddings);
- return emitter;
-};
-
-export default handleYoutubeSearch;
diff --git a/src/app.ts b/src/app.ts
index b8c2371..96b3a0c 100644
--- a/src/app.ts
+++ b/src/app.ts
@@ -28,3 +28,11 @@ server.listen(port, () => {
});
startWebSocketServer(server);
+
+process.on('uncaughtException', (err, origin) => {
+ logger.error(`Uncaught Exception at ${origin}: ${err}`);
+});
+
+process.on('unhandledRejection', (reason, promise) => {
+ logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
+});
diff --git a/src/agents/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts
similarity index 100%
rename from src/agents/imageSearchAgent.ts
rename to src/chains/imageSearchAgent.ts
diff --git a/src/agents/suggestionGeneratorAgent.ts b/src/chains/suggestionGeneratorAgent.ts
similarity index 97%
rename from src/agents/suggestionGeneratorAgent.ts
rename to src/chains/suggestionGeneratorAgent.ts
index 0efdfa9..6ba255d 100644
--- a/src/agents/suggestionGeneratorAgent.ts
+++ b/src/chains/suggestionGeneratorAgent.ts
@@ -47,7 +47,7 @@ const generateSuggestions = (
input: SuggestionGeneratorInput,
llm: BaseChatModel,
) => {
- (llm as ChatOpenAI).temperature = 0;
+ (llm as unknown as ChatOpenAI).temperature = 0;
const suggestionGeneratorChain = createSuggestionGeneratorChain(llm);
return suggestionGeneratorChain.invoke(input);
};
diff --git a/src/agents/videoSearchAgent.ts b/src/chains/videoSearchAgent.ts
similarity index 100%
rename from src/agents/videoSearchAgent.ts
rename to src/chains/videoSearchAgent.ts
diff --git a/src/config.ts b/src/config.ts
index 7c0c7f1..001c259 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -8,10 +8,13 @@ interface Config {
GENERAL: {
PORT: number;
SIMILARITY_MEASURE: string;
+ KEEP_ALIVE: string;
};
API_KEYS: {
OPENAI: string;
GROQ: string;
+ ANTHROPIC: string;
+ GEMINI: string;
};
API_ENDPOINTS: {
SEARXNG: string;
@@ -33,11 +36,18 @@ export const getPort = () => loadConfig().GENERAL.PORT;
export const getSimilarityMeasure = () =>
loadConfig().GENERAL.SIMILARITY_MEASURE;
+export const getKeepAlive = () => loadConfig().GENERAL.KEEP_ALIVE;
+
export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI;
export const getGroqApiKey = () => loadConfig().API_KEYS.GROQ;
-export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG;
+export const getAnthropicApiKey = () => loadConfig().API_KEYS.ANTHROPIC;
+
+export const getGeminiApiKey = () => loadConfig().API_KEYS.GEMINI;
+
+export const getSearxngApiEndpoint = () =>
+ process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA;
diff --git a/src/db/index.ts b/src/db/index.ts
new file mode 100644
index 0000000..b431b47
--- /dev/null
+++ b/src/db/index.ts
@@ -0,0 +1,10 @@
+import { drizzle } from 'drizzle-orm/better-sqlite3';
+import Database from 'better-sqlite3';
+import * as schema from './schema';
+
+const sqlite = new Database('data/db.sqlite');
+const db = drizzle(sqlite, {
+ schema: schema,
+});
+
+export default db;
diff --git a/src/db/schema.ts b/src/db/schema.ts
new file mode 100644
index 0000000..cee9660
--- /dev/null
+++ b/src/db/schema.ts
@@ -0,0 +1,28 @@
+import { sql } from 'drizzle-orm';
+import { text, integer, sqliteTable } from 'drizzle-orm/sqlite-core';
+
+export const messages = sqliteTable('messages', {
+ id: integer('id').primaryKey(),
+ content: text('content').notNull(),
+ chatId: text('chatId').notNull(),
+ messageId: text('messageId').notNull(),
+ role: text('type', { enum: ['assistant', 'user'] }),
+ metadata: text('metadata', {
+ mode: 'json',
+ }),
+});
+
+interface File {
+ name: string;
+ fileId: string;
+}
+
+export const chats = sqliteTable('chats', {
+ id: text('id').primaryKey(),
+ title: text('title').notNull(),
+ createdAt: text('createdAt').notNull(),
+ focusMode: text('focusMode').notNull(),
+ files: text('files', { mode: 'json' })
+ .$type()
+ .default(sql`'[]'`),
+});
diff --git a/src/lib/outputParsers/lineOutputParser.ts b/src/lib/outputParsers/lineOutputParser.ts
new file mode 100644
index 0000000..08711aa
--- /dev/null
+++ b/src/lib/outputParsers/lineOutputParser.ts
@@ -0,0 +1,48 @@
+import { BaseOutputParser } from '@langchain/core/output_parsers';
+
+interface LineOutputParserArgs {
+ key?: string;
+}
+
+class LineOutputParser extends BaseOutputParser {
+ private key = 'questions';
+
+ constructor(args?: LineOutputParserArgs) {
+ super();
+ this.key = args.key ?? this.key;
+ }
+
+ static lc_name() {
+ return 'LineOutputParser';
+ }
+
+ lc_namespace = ['langchain', 'output_parsers', 'line_output_parser'];
+
+ async parse(text: string): Promise {
+ text = text.trim() || '';
+
+ const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
+ const startKeyIndex = text.indexOf(`<${this.key}>`);
+ const endKeyIndex = text.indexOf(`${this.key}>`);
+
+ if (startKeyIndex === -1 || endKeyIndex === -1) {
+ return '';
+ }
+
+ const questionsStartIndex =
+ startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
+ const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
+ const line = text
+ .slice(questionsStartIndex, questionsEndIndex)
+ .trim()
+ .replace(regex, '');
+
+ return line;
+ }
+
+ getFormatInstructions(): string {
+ throw new Error('Not implemented.');
+ }
+}
+
+export default LineOutputParser;
diff --git a/src/lib/outputParsers/listLineOutputParser.ts b/src/lib/outputParsers/listLineOutputParser.ts
index 57a9bbc..f465ef1 100644
--- a/src/lib/outputParsers/listLineOutputParser.ts
+++ b/src/lib/outputParsers/listLineOutputParser.ts
@@ -19,9 +19,16 @@ class LineListOutputParser extends BaseOutputParser {
lc_namespace = ['langchain', 'output_parsers', 'line_list_output_parser'];
async parse(text: string): Promise {
+ text = text.trim() || '';
+
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`${this.key}>`);
+
+ if (startKeyIndex === -1 || endKeyIndex === -1) {
+ return [];
+ }
+
const questionsStartIndex =
startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
diff --git a/src/lib/providers.ts b/src/lib/providers.ts
deleted file mode 100644
index 3223193..0000000
--- a/src/lib/providers.ts
+++ /dev/null
@@ -1,187 +0,0 @@
-import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
-import { ChatOllama } from '@langchain/community/chat_models/ollama';
-import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
-import { HuggingFaceTransformersEmbeddings } from './huggingfaceTransformer';
-import {
- getGroqApiKey,
- getOllamaApiEndpoint,
- getOpenaiApiKey,
-} from '../config';
-import logger from '../utils/logger';
-
-export const getAvailableChatModelProviders = async () => {
- const openAIApiKey = getOpenaiApiKey();
- const groqApiKey = getGroqApiKey();
- const ollamaEndpoint = getOllamaApiEndpoint();
-
- const models = {};
-
- if (openAIApiKey) {
- try {
- models['openai'] = {
- 'GPT-3.5 turbo': new ChatOpenAI({
- openAIApiKey,
- modelName: 'gpt-3.5-turbo',
- temperature: 0.7,
- }),
- 'GPT-4': new ChatOpenAI({
- openAIApiKey,
- modelName: 'gpt-4',
- temperature: 0.7,
- }),
- 'GPT-4 turbo': new ChatOpenAI({
- openAIApiKey,
- modelName: 'gpt-4-turbo',
- temperature: 0.7,
- }),
- 'GPT-4 omni': new ChatOpenAI({
- openAIApiKey,
- modelName: 'gpt-4o',
- temperature: 0.7,
- }),
- };
- } catch (err) {
- logger.error(`Error loading OpenAI models: ${err}`);
- }
- }
-
- if (groqApiKey) {
- try {
- models['groq'] = {
- 'LLaMA3 8b': new ChatOpenAI(
- {
- openAIApiKey: groqApiKey,
- modelName: 'llama3-8b-8192',
- temperature: 0.7,
- },
- {
- baseURL: 'https://api.groq.com/openai/v1',
- },
- ),
- 'LLaMA3 70b': new ChatOpenAI(
- {
- openAIApiKey: groqApiKey,
- modelName: 'llama3-70b-8192',
- temperature: 0.7,
- },
- {
- baseURL: 'https://api.groq.com/openai/v1',
- },
- ),
- 'Mixtral 8x7b': new ChatOpenAI(
- {
- openAIApiKey: groqApiKey,
- modelName: 'mixtral-8x7b-32768',
- temperature: 0.7,
- },
- {
- baseURL: 'https://api.groq.com/openai/v1',
- },
- ),
- 'Gemma 7b': new ChatOpenAI(
- {
- openAIApiKey: groqApiKey,
- modelName: 'gemma-7b-it',
- temperature: 0.7,
- },
- {
- baseURL: 'https://api.groq.com/openai/v1',
- },
- ),
- };
- } catch (err) {
- logger.error(`Error loading Groq models: ${err}`);
- }
- }
-
- if (ollamaEndpoint) {
- try {
- const response = await fetch(`${ollamaEndpoint}/api/tags`, {
- headers: {
- 'Content-Type': 'application/json',
- },
- });
-
- const { models: ollamaModels } = (await response.json()) as any;
-
- models['ollama'] = ollamaModels.reduce((acc, model) => {
- acc[model.model] = new ChatOllama({
- baseUrl: ollamaEndpoint,
- model: model.model,
- temperature: 0.7,
- });
- return acc;
- }, {});
- } catch (err) {
- logger.error(`Error loading Ollama models: ${err}`);
- }
- }
-
- models['custom_openai'] = {};
-
- return models;
-};
-
-export const getAvailableEmbeddingModelProviders = async () => {
- const openAIApiKey = getOpenaiApiKey();
- const ollamaEndpoint = getOllamaApiEndpoint();
-
- const models = {};
-
- if (openAIApiKey) {
- try {
- models['openai'] = {
- 'Text embedding 3 small': new OpenAIEmbeddings({
- openAIApiKey,
- modelName: 'text-embedding-3-small',
- }),
- 'Text embedding 3 large': new OpenAIEmbeddings({
- openAIApiKey,
- modelName: 'text-embedding-3-large',
- }),
- };
- } catch (err) {
- logger.error(`Error loading OpenAI embeddings: ${err}`);
- }
- }
-
- if (ollamaEndpoint) {
- try {
- const response = await fetch(`${ollamaEndpoint}/api/tags`, {
- headers: {
- 'Content-Type': 'application/json',
- },
- });
-
- const { models: ollamaModels } = (await response.json()) as any;
-
- models['ollama'] = ollamaModels.reduce((acc, model) => {
- acc[model.model] = new OllamaEmbeddings({
- baseUrl: ollamaEndpoint,
- model: model.model,
- });
- return acc;
- }, {});
- } catch (err) {
- logger.error(`Error loading Ollama embeddings: ${err}`);
- }
- }
-
- try {
- models['local'] = {
- 'BGE Small': new HuggingFaceTransformersEmbeddings({
- modelName: 'Xenova/bge-small-en-v1.5',
- }),
- 'GTE Small': new HuggingFaceTransformersEmbeddings({
- modelName: 'Xenova/gte-small',
- }),
- 'Bert Multilingual': new HuggingFaceTransformersEmbeddings({
- modelName: 'Xenova/bert-base-multilingual-uncased',
- }),
- };
- } catch (err) {
- logger.error(`Error loading local embeddings: ${err}`);
- }
-
- return models;
-};
diff --git a/src/lib/providers/anthropic.ts b/src/lib/providers/anthropic.ts
new file mode 100644
index 0000000..642a6cb
--- /dev/null
+++ b/src/lib/providers/anthropic.ts
@@ -0,0 +1,59 @@
+import { ChatAnthropic } from '@langchain/anthropic';
+import { getAnthropicApiKey } from '../../config';
+import logger from '../../utils/logger';
+
+export const loadAnthropicChatModels = async () => {
+ const anthropicApiKey = getAnthropicApiKey();
+
+ if (!anthropicApiKey) return {};
+
+ try {
+ const chatModels = {
+ 'claude-3-5-sonnet-20241022': {
+ displayName: 'Claude 3.5 Sonnet',
+ model: new ChatAnthropic({
+ temperature: 0.7,
+ anthropicApiKey: anthropicApiKey,
+ model: 'claude-3-5-sonnet-20241022',
+ }),
+ },
+ 'claude-3-5-haiku-20241022': {
+ displayName: 'Claude 3.5 Haiku',
+ model: new ChatAnthropic({
+ temperature: 0.7,
+ anthropicApiKey: anthropicApiKey,
+ model: 'claude-3-5-haiku-20241022',
+ }),
+ },
+ 'claude-3-opus-20240229': {
+ displayName: 'Claude 3 Opus',
+ model: new ChatAnthropic({
+ temperature: 0.7,
+ anthropicApiKey: anthropicApiKey,
+ model: 'claude-3-opus-20240229',
+ }),
+ },
+ 'claude-3-sonnet-20240229': {
+ displayName: 'Claude 3 Sonnet',
+ model: new ChatAnthropic({
+ temperature: 0.7,
+ anthropicApiKey: anthropicApiKey,
+ model: 'claude-3-sonnet-20240229',
+ }),
+ },
+ 'claude-3-haiku-20240307': {
+ displayName: 'Claude 3 Haiku',
+ model: new ChatAnthropic({
+ temperature: 0.7,
+ anthropicApiKey: anthropicApiKey,
+ model: 'claude-3-haiku-20240307',
+ }),
+ },
+ };
+
+ return chatModels;
+ } catch (err) {
+ logger.error(`Error loading Anthropic models: ${err}`);
+ return {};
+ }
+};
diff --git a/src/lib/providers/gemini.ts b/src/lib/providers/gemini.ts
new file mode 100644
index 0000000..d20c9b8
--- /dev/null
+++ b/src/lib/providers/gemini.ts
@@ -0,0 +1,85 @@
+import {
+ ChatGoogleGenerativeAI,
+ GoogleGenerativeAIEmbeddings,
+} from '@langchain/google-genai';
+import { getGeminiApiKey } from '../../config';
+import logger from '../../utils/logger';
+
+export const loadGeminiChatModels = async () => {
+ const geminiApiKey = getGeminiApiKey();
+
+ if (!geminiApiKey) return {};
+
+ try {
+ const chatModels = {
+ 'gemini-1.5-flash': {
+ displayName: 'Gemini 1.5 Flash',
+ model: new ChatGoogleGenerativeAI({
+ modelName: 'gemini-1.5-flash',
+ temperature: 0.7,
+ apiKey: geminiApiKey,
+ }),
+ },
+ 'gemini-1.5-flash-8b': {
+ displayName: 'Gemini 1.5 Flash 8B',
+ model: new ChatGoogleGenerativeAI({
+ modelName: 'gemini-1.5-flash-8b',
+ temperature: 0.7,
+ apiKey: geminiApiKey,
+ }),
+ },
+ 'gemini-1.5-pro': {
+ displayName: 'Gemini 1.5 Pro',
+ model: new ChatGoogleGenerativeAI({
+ modelName: 'gemini-1.5-pro',
+ temperature: 0.7,
+ apiKey: geminiApiKey,
+ }),
+ },
+ 'gemini-2.0-flash-exp': {
+ displayName: 'Gemini 2.0 Flash Exp',
+ model: new ChatGoogleGenerativeAI({
+ modelName: 'gemini-2.0-flash-exp',
+ temperature: 0.7,
+ apiKey: geminiApiKey,
+ }),
+ },
+ 'gemini-2.0-flash-thinking-exp-01-21': {
+ displayName: 'Gemini 2.0 Flash Thinking Exp 01-21',
+ model: new ChatGoogleGenerativeAI({
+ modelName: 'gemini-2.0-flash-thinking-exp-01-21',
+ temperature: 0.7,
+ apiKey: geminiApiKey,
+ }),
+ },
+ };
+
+ return chatModels;
+ } catch (err) {
+ logger.error(`Error loading Gemini models: ${err}`);
+ return {};
+ }
+};
+
+export const loadGeminiEmbeddingsModels = async () => {
+ const geminiApiKey = getGeminiApiKey();
+
+ if (!geminiApiKey) return {};
+
+ try {
+ const embeddingModels = {
+ 'text-embedding-004': {
+ displayName: 'Text Embedding',
+ model: new GoogleGenerativeAIEmbeddings({
+ apiKey: geminiApiKey,
+ modelName: 'text-embedding-004',
+ }),
+ },
+ };
+
+ return embeddingModels;
+ } catch (err) {
+ logger.error(`Error loading Gemini embeddings model: ${err}`);
+ return {};
+ }
+};
diff --git a/src/lib/providers/groq.ts b/src/lib/providers/groq.ts
new file mode 100644
index 0000000..41004ec
--- /dev/null
+++ b/src/lib/providers/groq.ts
@@ -0,0 +1,136 @@
+import { ChatOpenAI } from '@langchain/openai';
+import { getGroqApiKey } from '../../config';
+import logger from '../../utils/logger';
+
+export const loadGroqChatModels = async () => {
+ const groqApiKey = getGroqApiKey();
+
+ if (!groqApiKey) return {};
+
+ try {
+ const chatModels = {
+ 'llama-3.3-70b-versatile': {
+ displayName: 'Llama 3.3 70B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama-3.3-70b-versatile',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama-3.2-3b-preview': {
+ displayName: 'Llama 3.2 3B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama-3.2-3b-preview',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama-3.2-11b-vision-preview': {
+ displayName: 'Llama 3.2 11B Vision',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama-3.2-11b-vision-preview',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama-3.2-90b-vision-preview': {
+ displayName: 'Llama 3.2 90B Vision',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama-3.2-90b-vision-preview',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama-3.1-8b-instant': {
+ displayName: 'Llama 3.1 8B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama-3.1-8b-instant',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama3-8b-8192': {
+ displayName: 'LLaMA3 8B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama3-8b-8192',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'llama3-70b-8192': {
+ displayName: 'LLaMA3 70B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'llama3-70b-8192',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'mixtral-8x7b-32768': {
+ displayName: 'Mixtral 8x7B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'mixtral-8x7b-32768',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ 'gemma2-9b-it': {
+ displayName: 'Gemma2 9B',
+ model: new ChatOpenAI(
+ {
+ openAIApiKey: groqApiKey,
+ modelName: 'gemma2-9b-it',
+ temperature: 0.7,
+ },
+ {
+ baseURL: 'https://api.groq.com/openai/v1',
+ },
+ ),
+ },
+ };
+
+ return chatModels;
+ } catch (err) {
+ logger.error(`Error loading Groq models: ${err}`);
+ return {};
+ }
+};
diff --git a/src/lib/providers/index.ts b/src/lib/providers/index.ts
new file mode 100644
index 0000000..98846e7
--- /dev/null
+++ b/src/lib/providers/index.ts
@@ -0,0 +1,49 @@
+import { loadGroqChatModels } from './groq';
+import { loadOllamaChatModels, loadOllamaEmbeddingsModels } from './ollama';
+import { loadOpenAIChatModels, loadOpenAIEmbeddingsModels } from './openai';
+import { loadAnthropicChatModels } from './anthropic';
+import { loadTransformersEmbeddingsModels } from './transformers';
+import { loadGeminiChatModels, loadGeminiEmbeddingsModels } from './gemini';
+
+const chatModelProviders = {
+ openai: loadOpenAIChatModels,
+ groq: loadGroqChatModels,
+ ollama: loadOllamaChatModels,
+ anthropic: loadAnthropicChatModels,
+ gemini: loadGeminiChatModels,
+};
+
+const embeddingModelProviders = {
+ openai: loadOpenAIEmbeddingsModels,
+ local: loadTransformersEmbeddingsModels,
+ ollama: loadOllamaEmbeddingsModels,
+ gemini: loadGeminiEmbeddingsModels,
+};
+
+export const getAvailableChatModelProviders = async () => {
+ const models = {};
+
+ for (const provider in chatModelProviders) {
+ const providerModels = await chatModelProviders[provider]();
+ if (Object.keys(providerModels).length > 0) {
+ models[provider] = providerModels;
+ }
+ }
+
+ models['custom_openai'] = {};
+
+ return models;
+};
+
+export const getAvailableEmbeddingModelProviders = async () => {
+ const models = {};
+
+ for (const provider in embeddingModelProviders) {
+ const providerModels = await embeddingModelProviders[provider]();
+ if (Object.keys(providerModels).length > 0) {
+ models[provider] = providerModels;
+ }
+ }
+
+ return models;
+};
diff --git a/src/lib/providers/ollama.ts b/src/lib/providers/ollama.ts
new file mode 100644
index 0000000..7277b27
--- /dev/null
+++ b/src/lib/providers/ollama.ts
@@ -0,0 +1,74 @@
+import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
+import { getKeepAlive, getOllamaApiEndpoint } from '../../config';
+import logger from '../../utils/logger';
+import { ChatOllama } from '@langchain/community/chat_models/ollama';
+import axios from 'axios';
+
+export const loadOllamaChatModels = async () => {
+ const ollamaEndpoint = getOllamaApiEndpoint();
+ const keepAlive = getKeepAlive();
+
+ if (!ollamaEndpoint) return {};
+
+ try {
+ const response = await axios.get(`${ollamaEndpoint}/api/tags`, {
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ });
+
+ const { models: ollamaModels } = response.data;
+
+ const chatModels = ollamaModels.reduce((acc, model) => {
+ acc[model.model] = {
+ displayName: model.name,
+ model: new ChatOllama({
+ baseUrl: ollamaEndpoint,
+ model: model.model,
+ temperature: 0.7,
+ keepAlive: keepAlive,
+ }),
+ };
+
+ return acc;
+ }, {});
+
+ return chatModels;
+ } catch (err) {
+ logger.error(`Error loading Ollama models: ${err}`);
+ return {};
+ }
+};
+
+export const loadOllamaEmbeddingsModels = async () => {
+ const ollamaEndpoint = getOllamaApiEndpoint();
+
+ if (!ollamaEndpoint) return {};
+
+ try {
+ const response = await axios.get(`${ollamaEndpoint}/api/tags`, {
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ });
+
+ const { models: ollamaModels } = response.data;
+
+ const embeddingsModels = ollamaModels.reduce((acc, model) => {
+ acc[model.model] = {
+ displayName: model.name,
+ model: new OllamaEmbeddings({
+ baseUrl: ollamaEndpoint,
+ model: model.model,
+ }),
+ };
+
+ return acc;
+ }, {});
+
+ return embeddingsModels;
+ } catch (err) {
+ logger.error(`Error loading Ollama embeddings model: ${err}`);
+ return {};
+ }
+};
diff --git a/src/lib/providers/openai.ts b/src/lib/providers/openai.ts
new file mode 100644
index 0000000..3747e37
--- /dev/null
+++ b/src/lib/providers/openai.ts
@@ -0,0 +1,89 @@
+import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
+import { getOpenaiApiKey } from '../../config';
+import logger from '../../utils/logger';
+
+export const loadOpenAIChatModels = async () => {
+ const openAIApiKey = getOpenaiApiKey();
+
+ if (!openAIApiKey) return {};
+
+ try {
+ const chatModels = {
+ 'gpt-3.5-turbo': {
+ displayName: 'GPT-3.5 Turbo',
+ model: new ChatOpenAI({
+ openAIApiKey,
+ modelName: 'gpt-3.5-turbo',
+ temperature: 0.7,
+ }),
+ },
+ 'gpt-4': {
+ displayName: 'GPT-4',
+ model: new ChatOpenAI({
+ openAIApiKey,
+ modelName: 'gpt-4',
+ temperature: 0.7,
+ }),
+ },
+ 'gpt-4-turbo': {
+ displayName: 'GPT-4 turbo',
+ model: new ChatOpenAI({
+ openAIApiKey,
+ modelName: 'gpt-4-turbo',
+ temperature: 0.7,
+ }),
+ },
+ 'gpt-4o': {
+ displayName: 'GPT-4 omni',
+ model: new ChatOpenAI({
+ openAIApiKey,
+ modelName: 'gpt-4o',
+ temperature: 0.7,
+ }),
+ },
+ 'gpt-4o-mini': {
+ displayName: 'GPT-4 omni mini',
+ model: new ChatOpenAI({
+ openAIApiKey,
+ modelName: 'gpt-4o-mini',
+ temperature: 0.7,
+ }),
+ },
+ };
+
+ return chatModels;
+ } catch (err) {
+ logger.error(`Error loading OpenAI models: ${err}`);
+ return {};
+ }
+};
+
+export const loadOpenAIEmbeddingsModels = async () => {
+ const openAIApiKey = getOpenaiApiKey();
+
+ if (!openAIApiKey) return {};
+
+ try {
+ const embeddingModels = {
+ 'text-embedding-3-small': {
+ displayName: 'Text Embedding 3 Small',
+ model: new OpenAIEmbeddings({
+ openAIApiKey,
+ modelName: 'text-embedding-3-small',
+ }),
+ },
+ 'text-embedding-3-large': {
+ displayName: 'Text Embedding 3 Large',
+ model: new OpenAIEmbeddings({
+ openAIApiKey,
+ modelName: 'text-embedding-3-large',
+ }),
+ },
+ };
+
+ return embeddingModels;
+ } catch (err) {
+ logger.error(`Error loading OpenAI embeddings model: ${err}`);
+ return {};
+ }
+};
diff --git a/src/lib/providers/transformers.ts b/src/lib/providers/transformers.ts
new file mode 100644
index 0000000..8a3417d
--- /dev/null
+++ b/src/lib/providers/transformers.ts
@@ -0,0 +1,32 @@
+import logger from '../../utils/logger';
+import { HuggingFaceTransformersEmbeddings } from '../huggingfaceTransformer';
+
+export const loadTransformersEmbeddingsModels = async () => {
+ try {
+ const embeddingModels = {
+ 'xenova-bge-small-en-v1.5': {
+ displayName: 'BGE Small',
+ model: new HuggingFaceTransformersEmbeddings({
+ modelName: 'Xenova/bge-small-en-v1.5',
+ }),
+ },
+ 'xenova-gte-small': {
+ displayName: 'GTE Small',
+ model: new HuggingFaceTransformersEmbeddings({
+ modelName: 'Xenova/gte-small',
+ }),
+ },
+ 'xenova-bert-base-multilingual-uncased': {
+ displayName: 'Bert Multilingual',
+ model: new HuggingFaceTransformersEmbeddings({
+ modelName: 'Xenova/bert-base-multilingual-uncased',
+ }),
+ },
+ };
+
+ return embeddingModels;
+ } catch (err) {
+ logger.error(`Error loading Transformers embeddings model: ${err}`);
+ return {};
+ }
+};
diff --git a/src/prompts/academicSearch.ts b/src/prompts/academicSearch.ts
new file mode 100644
index 0000000..c2946ff
--- /dev/null
+++ b/src/prompts/academicSearch.ts
@@ -0,0 +1,65 @@
+export const academicSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+
+Example:
+1. Follow up question: How does stable diffusion work?
+Rephrased: Stable diffusion working
+
+2. Follow up question: What is linear algebra?
+Rephrased: Linear algebra
+
+3. Follow up question: What is the third law of thermodynamics?
+Rephrased: Third law of thermodynamics
+
+Conversation:
+{chat_history}
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+export const academicSearchResponsePrompt = `
+ You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
+
+ Your task is to provide answers that are:
+ - **Informative and relevant**: Thoroughly address the user's query using the given context.
+ - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
+ - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
+ - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
+ - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
+
+ ### Formatting Instructions
+ - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
+ - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
+ - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
+ - **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience.
+ - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
+ - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
+
+ ### Citation Requirements
+ - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
+ - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
+ - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
+ - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
+ - Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
+ - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
+
+ ### Special Instructions
+ - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
+ - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
+ - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
+ - You are set on focus mode 'Academic', this means you will be searching for academic papers and articles on the web.
+
+ ### Example Output
+ - Begin with a brief introduction summarizing the event or query topic.
+ - Follow with detailed sections under clear headings, covering all aspects of the query if possible.
+ - Provide explanations or historical context as needed to enhance understanding.
+ - End with a conclusion or overall perspective if relevant.
+
+
+ {context}
+
+
+ Current date & time in ISO format (UTC timezone) is: {date}.
+`;
diff --git a/src/prompts/index.ts b/src/prompts/index.ts
new file mode 100644
index 0000000..f479185
--- /dev/null
+++ b/src/prompts/index.ts
@@ -0,0 +1,32 @@
+import {
+ academicSearchResponsePrompt,
+ academicSearchRetrieverPrompt,
+} from './academicSearch';
+import {
+ redditSearchResponsePrompt,
+ redditSearchRetrieverPrompt,
+} from './redditSearch';
+import { webSearchResponsePrompt, webSearchRetrieverPrompt } from './webSearch';
+import {
+ wolframAlphaSearchResponsePrompt,
+ wolframAlphaSearchRetrieverPrompt,
+} from './wolframAlpha';
+import { writingAssistantPrompt } from './writingAssistant';
+import {
+ youtubeSearchResponsePrompt,
+ youtubeSearchRetrieverPrompt,
+} from './youtubeSearch';
+
+export default {
+ webSearchResponsePrompt,
+ webSearchRetrieverPrompt,
+ academicSearchResponsePrompt,
+ academicSearchRetrieverPrompt,
+ redditSearchResponsePrompt,
+ redditSearchRetrieverPrompt,
+ wolframAlphaSearchResponsePrompt,
+ wolframAlphaSearchRetrieverPrompt,
+ writingAssistantPrompt,
+ youtubeSearchResponsePrompt,
+ youtubeSearchRetrieverPrompt,
+};
diff --git a/src/prompts/redditSearch.ts b/src/prompts/redditSearch.ts
new file mode 100644
index 0000000..fc71957
--- /dev/null
+++ b/src/prompts/redditSearch.ts
@@ -0,0 +1,65 @@
+export const redditSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+
+Example:
+1. Follow up question: Which company is most likely to create an AGI
+Rephrased: Which company is most likely to create an AGI
+
+2. Follow up question: Is Earth flat?
+Rephrased: Is Earth flat?
+
+3. Follow up question: Is there life on Mars?
+Rephrased: Is there life on Mars?
+
+Conversation:
+{chat_history}
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+export const redditSearchResponsePrompt = `
+ You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
+
+ Your task is to provide answers that are:
+ - **Informative and relevant**: Thoroughly address the user's query using the given context.
+ - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
+ - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
+ - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
+ - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
+
+ ### Formatting Instructions
+ - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
+ - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
+ - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
+ - **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience.
+ - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
+ - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
+
+ ### Citation Requirements
+ - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
+ - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
+ - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
+ - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
+ - Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
+ - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
+
+ ### Special Instructions
+ - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
+ - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
+ - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
+ - You are set on focus mode 'Reddit', this means you will be searching for information, opinions and discussions on the web using Reddit.
+
+ ### Example Output
+ - Begin with a brief introduction summarizing the event or query topic.
+ - Follow with detailed sections under clear headings, covering all aspects of the query if possible.
+ - Provide explanations or historical context as needed to enhance understanding.
+ - End with a conclusion or overall perspective if relevant.
+
+
+ {context}
+
+
+ Current date & time in ISO format (UTC timezone) is: {date}.
+`;
diff --git a/src/prompts/webSearch.ts b/src/prompts/webSearch.ts
new file mode 100644
index 0000000..d8269c8
--- /dev/null
+++ b/src/prompts/webSearch.ts
@@ -0,0 +1,106 @@
+export const webSearchRetrieverPrompt = `
+You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
+If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
+If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
+You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
+
+There are several examples attached for your reference inside the below \`examples\` XML block
+
+
+1. Follow up question: What is the capital of France
+Rephrased question:\`
+
+Capital of france
+
+\`
+
+2. Hi, how are you?
+Rephrased question\`
+
+not_needed
+
+\`
+
+3. Follow up question: What is Docker?
+Rephrased question: \`
+
+What is Docker
+
+\`
+
+4. Follow up question: Can you tell me what is X from https://example.com
+Rephrased question: \`
+
+Can you tell me what is X?
+
+
+
+https://example.com
+
+\`
+
+5. Follow up question: Summarize the content from https://example.com
+Rephrased question: \`
+
+summarize
+
+
+
+https://example.com
+
+\`
+
+
+Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.
+
+
+{chat_history}
+
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+export const webSearchResponsePrompt = `
+ You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
+
+ Your task is to provide answers that are:
+ - **Informative and relevant**: Thoroughly address the user's query using the given context.
+ - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
+ - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
+ - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
+ - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
+
+ ### Formatting Instructions
+ - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
+ - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
+ - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
+ - **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience.
+ - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
+ - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
+
+ ### Citation Requirements
+ - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
+ - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
+ - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
+ - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
+ - Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
+ - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
+
+ ### Special Instructions
+ - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
+ - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
+ - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
+
+ ### Example Output
+ - Begin with a brief introduction summarizing the event or query topic.
+ - Follow with detailed sections under clear headings, covering all aspects of the query if possible.
+ - Provide explanations or historical context as needed to enhance understanding.
+ - End with a conclusion or overall perspective if relevant.
+
+
+ {context}
+
+
+ Current date & time in ISO format (UTC timezone) is: {date}.
+`;
diff --git a/src/prompts/wolframAlpha.ts b/src/prompts/wolframAlpha.ts
new file mode 100644
index 0000000..40410c1
--- /dev/null
+++ b/src/prompts/wolframAlpha.ts
@@ -0,0 +1,65 @@
+export const wolframAlphaSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+
+Example:
+1. Follow up question: What is the atomic radius of S?
+Rephrased: Atomic radius of S
+
+2. Follow up question: What is linear algebra?
+Rephrased: Linear algebra
+
+3. Follow up question: What is the third law of thermodynamics?
+Rephrased: Third law of thermodynamics
+
+Conversation:
+{chat_history}
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+export const wolframAlphaSearchResponsePrompt = `
+ You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
+
+ Your task is to provide answers that are:
+ - **Informative and relevant**: Thoroughly address the user's query using the given context.
+ - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
+ - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
+ - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
+ - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
+
+ ### Formatting Instructions
+ - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
+ - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
+ - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
+ - **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience.
+ - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
+ - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
+
+ ### Citation Requirements
+ - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
+ - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
+ - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
+ - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
+ - Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
+ - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
+
+ ### Special Instructions
+ - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
+ - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
+ - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
+ - You are set on focus mode 'Wolfram Alpha', this means you will be searching for information on the web using Wolfram Alpha. It is a computational knowledge engine that can answer factual queries and perform computations.
+
+ ### Example Output
+ - Begin with a brief introduction summarizing the event or query topic.
+ - Follow with detailed sections under clear headings, covering all aspects of the query if possible.
+ - Provide explanations or historical context as needed to enhance understanding.
+ - End with a conclusion or overall perspective if relevant.
+
+
+ {context}
+
+
+ Current date & time in ISO format (UTC timezone) is: {date}.
+`;
diff --git a/src/prompts/writingAssistant.ts b/src/prompts/writingAssistant.ts
new file mode 100644
index 0000000..f56bf47
--- /dev/null
+++ b/src/prompts/writingAssistant.ts
@@ -0,0 +1,13 @@
+export const writingAssistantPrompt = `
+You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
+Since you are a writing assistant, you would not perform web searches. If you think you lack information to answer the query, you can ask the user for more information or suggest them to switch to a different focus mode.
+You will be shared a context that can contain information from files user has uploaded to get answers from. You will have to generate answers upon that.
+
+You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+
+
+{context}
+
+`;
diff --git a/src/prompts/youtubeSearch.ts b/src/prompts/youtubeSearch.ts
new file mode 100644
index 0000000..5805b54
--- /dev/null
+++ b/src/prompts/youtubeSearch.ts
@@ -0,0 +1,65 @@
+export const youtubeSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+
+Example:
+1. Follow up question: How does an A.C work?
+Rephrased: A.C working
+
+2. Follow up question: Linear algebra explanation video
+Rephrased: What is linear algebra?
+
+3. Follow up question: What is theory of relativity?
+Rephrased: What is theory of relativity?
+
+Conversation:
+{chat_history}
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+export const youtubeSearchResponsePrompt = `
+ You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
+
+ Your task is to provide answers that are:
+ - **Informative and relevant**: Thoroughly address the user's query using the given context.
+ - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
+ - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
+ - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
+ - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
+
+ ### Formatting Instructions
+ - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
+ - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
+ - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
+ - **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience.
+ - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
+ - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
+
+ ### Citation Requirements
+ - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
+ - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
+ - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
+ - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
+ - Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
+ - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
+
+ ### Special Instructions
+ - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
+ - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
+ - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
+ - You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcrip
+
+ ### Example Output
+ - Begin with a brief introduction summarizing the event or query topic.
+ - Follow with detailed sections under clear headings, covering all aspects of the query if possible.
+ - Provide explanations or historical context as needed to enhance understanding.
+ - End with a conclusion or overall perspective if relevant.
+
+
+ {context}
+
+
+ Current date & time in ISO format (UTC timezone) is: {date}.
+`;
diff --git a/src/routes/chats.ts b/src/routes/chats.ts
new file mode 100644
index 0000000..afa74f9
--- /dev/null
+++ b/src/routes/chats.ts
@@ -0,0 +1,66 @@
+import express from 'express';
+import logger from '../utils/logger';
+import db from '../db/index';
+import { eq } from 'drizzle-orm';
+import { chats, messages } from '../db/schema';
+
+const router = express.Router();
+
+router.get('/', async (_, res) => {
+ try {
+ let chats = await db.query.chats.findMany();
+
+ chats = chats.reverse();
+
+ return res.status(200).json({ chats: chats });
+ } catch (err) {
+ res.status(500).json({ message: 'An error has occurred.' });
+ logger.error(`Error in getting chats: ${err.message}`);
+ }
+});
+
+router.get('/:id', async (req, res) => {
+ try {
+ const chatExists = await db.query.chats.findFirst({
+ where: eq(chats.id, req.params.id),
+ });
+
+ if (!chatExists) {
+ return res.status(404).json({ message: 'Chat not found' });
+ }
+
+ const chatMessages = await db.query.messages.findMany({
+ where: eq(messages.chatId, req.params.id),
+ });
+
+ return res.status(200).json({ chat: chatExists, messages: chatMessages });
+ } catch (err) {
+ res.status(500).json({ message: 'An error has occurred.' });
+ logger.error(`Error in getting chat: ${err.message}`);
+ }
+});
+
+router.delete(`/:id`, async (req, res) => {
+ try {
+ const chatExists = await db.query.chats.findFirst({
+ where: eq(chats.id, req.params.id),
+ });
+
+ if (!chatExists) {
+ return res.status(404).json({ message: 'Chat not found' });
+ }
+
+ await db.delete(chats).where(eq(chats.id, req.params.id)).execute();
+ await db
+ .delete(messages)
+ .where(eq(messages.chatId, req.params.id))
+ .execute();
+
+ return res.status(200).json({ message: 'Chat deleted successfully' });
+ } catch (err) {
+ res.status(500).json({ message: 'An error has occurred.' });
+ logger.error(`Error in deleting chat: ${err.message}`);
+ }
+});
+
+export default router;
diff --git a/src/routes/config.ts b/src/routes/config.ts
index bf13b63..6ff80c6 100644
--- a/src/routes/config.ts
+++ b/src/routes/config.ts
@@ -6,40 +6,60 @@ import {
import {
getGroqApiKey,
getOllamaApiEndpoint,
+ getAnthropicApiKey,
+ getGeminiApiKey,
getOpenaiApiKey,
updateConfig,
} from '../config';
+import logger from '../utils/logger';
const router = express.Router();
router.get('/', async (_, res) => {
- const config = {};
+ try {
+ const config = {};
- const [chatModelProviders, embeddingModelProviders] = await Promise.all([
- getAvailableChatModelProviders(),
- getAvailableEmbeddingModelProviders(),
- ]);
+ const [chatModelProviders, embeddingModelProviders] = await Promise.all([
+ getAvailableChatModelProviders(),
+ getAvailableEmbeddingModelProviders(),
+ ]);
- config['chatModelProviders'] = {};
- config['embeddingModelProviders'] = {};
+ config['chatModelProviders'] = {};
+ config['embeddingModelProviders'] = {};
- for (const provider in chatModelProviders) {
- config['chatModelProviders'][provider] = Object.keys(
- chatModelProviders[provider],
- );
+ for (const provider in chatModelProviders) {
+ config['chatModelProviders'][provider] = Object.keys(
+ chatModelProviders[provider],
+ ).map((model) => {
+ return {
+ name: model,
+ displayName: chatModelProviders[provider][model].displayName,
+ };
+ });
+ }
+
+ for (const provider in embeddingModelProviders) {
+ config['embeddingModelProviders'][provider] = Object.keys(
+ embeddingModelProviders[provider],
+ ).map((model) => {
+ return {
+ name: model,
+ displayName: embeddingModelProviders[provider][model].displayName,
+ };
+ });
+ }
+
+ config['openaiApiKey'] = getOpenaiApiKey();
+ config['ollamaApiUrl'] = getOllamaApiEndpoint();
+ config['anthropicApiKey'] = getAnthropicApiKey();
+ config['groqApiKey'] = getGroqApiKey();
+ config['geminiApiKey'] = getGeminiApiKey();
+
+ res.status(200).json(config);
+ } catch (err: any) {
+ res.status(500).json({ message: 'An error has occurred.' });
+ logger.error(`Error getting config: ${err.message}`);
}
-
- for (const provider in embeddingModelProviders) {
- config['embeddingModelProviders'][provider] = Object.keys(
- embeddingModelProviders[provider],
- );
- }
-
- config['openaiApiKey'] = getOpenaiApiKey();
- config['ollamaApiUrl'] = getOllamaApiEndpoint();
- config['groqApiKey'] = getGroqApiKey();
-
- res.status(200).json(config);
});
router.post('/', async (req, res) => {
@@ -49,6 +69,8 @@ router.post('/', async (req, res) => {
API_KEYS: {
OPENAI: config.openaiApiKey,
GROQ: config.groqApiKey,
+ ANTHROPIC: config.anthropicApiKey,
+ GEMINI: config.geminiApiKey,
},
API_ENDPOINTS: {
OLLAMA: config.ollamaApiUrl,
diff --git a/src/routes/discover.ts b/src/routes/discover.ts
new file mode 100644
index 0000000..b6f8ff9
--- /dev/null
+++ b/src/routes/discover.ts
@@ -0,0 +1,48 @@
+import express from 'express';
+import { searchSearxng } from '../lib/searxng';
+import logger from '../utils/logger';
+
+const router = express.Router();
+
+router.get('/', async (req, res) => {
+ try {
+ const data = (
+ await Promise.all([
+ searchSearxng('site:businessinsider.com AI', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ searchSearxng('site:www.exchangewire.com AI', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ searchSearxng('site:yahoo.com AI', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ searchSearxng('site:businessinsider.com tech', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ searchSearxng('site:www.exchangewire.com tech', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ searchSearxng('site:yahoo.com tech', {
+ engines: ['bing news'],
+ pageno: 1,
+ }),
+ ])
+ )
+ .map((result) => result.results)
+ .flat()
+ .sort(() => Math.random() - 0.5);
+
+ return res.json({ blogs: data });
+ } catch (err: any) {
+ logger.error(`Error in discover route: ${err.message}`);
+ return res.status(500).json({ message: 'An error has occurred' });
+ }
+});
+
+export default router;
diff --git a/src/routes/images.ts b/src/routes/images.ts
index 6bd43d3..efa095a 100644
--- a/src/routes/images.ts
+++ b/src/routes/images.ts
@@ -1,17 +1,31 @@
import express from 'express';
-import handleImageSearch from '../agents/imageSearchAgent';
+import handleImageSearch from '../chains/imageSearchAgent';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { getAvailableChatModelProviders } from '../lib/providers';
import { HumanMessage, AIMessage } from '@langchain/core/messages';
import logger from '../utils/logger';
+import { ChatOpenAI } from '@langchain/openai';
const router = express.Router();
+interface ChatModel {
+ provider: string;
+ model: string;
+ customOpenAIBaseURL?: string;
+ customOpenAIKey?: string;
+}
+
+interface ImageSearchBody {
+ query: string;
+ chatHistory: any[];
+ chatModel?: ChatModel;
+}
+
router.post('/', async (req, res) => {
try {
- let { query, chat_history, chat_model_provider, chat_model } = req.body;
+ let body: ImageSearchBody = req.body;
- chat_history = chat_history.map((msg: any) => {
+ const chatHistory = body.chatHistory.map((msg: any) => {
if (msg.role === 'user') {
return new HumanMessage(msg.content);
} else if (msg.role === 'assistant') {
@@ -19,22 +33,50 @@ router.post('/', async (req, res) => {
}
});
- const chatModels = await getAvailableChatModelProviders();
- const provider = chat_model_provider ?? Object.keys(chatModels)[0];
- const chatModel = chat_model ?? Object.keys(chatModels[provider])[0];
+ const chatModelProviders = await getAvailableChatModelProviders();
+
+ const chatModelProvider =
+ body.chatModel?.provider || Object.keys(chatModelProviders)[0];
+ const chatModel =
+ body.chatModel?.model ||
+ Object.keys(chatModelProviders[chatModelProvider])[0];
let llm: BaseChatModel | undefined;
- if (chatModels[provider] && chatModels[provider][chatModel]) {
- llm = chatModels[provider][chatModel] as BaseChatModel | undefined;
+ if (body.chatModel?.provider === 'custom_openai') {
+ if (
+ !body.chatModel?.customOpenAIBaseURL ||
+ !body.chatModel?.customOpenAIKey
+ ) {
+ return res
+ .status(400)
+ .json({ message: 'Missing custom OpenAI base URL or key' });
+ }
+
+ llm = new ChatOpenAI({
+ modelName: body.chatModel.model,
+ openAIApiKey: body.chatModel.customOpenAIKey,
+ temperature: 0.7,
+ configuration: {
+ baseURL: body.chatModel.customOpenAIBaseURL,
+ },
+ }) as unknown as BaseChatModel;
+ } else if (
+ chatModelProviders[chatModelProvider] &&
+ chatModelProviders[chatModelProvider][chatModel]
+ ) {
+ llm = chatModelProviders[chatModelProvider][chatModel]
+ .model as unknown as BaseChatModel | undefined;
}
if (!llm) {
- res.status(500).json({ message: 'Invalid LLM model selected' });
- return;
+ return res.status(400).json({ message: 'Invalid model selected' });
}
- const images = await handleImageSearch({ query, chat_history }, llm);
+ const images = await handleImageSearch(
+ { query: body.query, chat_history: chatHistory },
+ llm,
+ );
res.status(200).json({ images });
} catch (err) {
diff --git a/src/routes/index.ts b/src/routes/index.ts
index 257e677..cb2c915 100644
--- a/src/routes/index.ts
+++ b/src/routes/index.ts
@@ -4,6 +4,10 @@ import videosRouter from './videos';
import configRouter from './config';
import modelsRouter from './models';
import suggestionsRouter from './suggestions';
+import chatsRouter from './chats';
+import searchRouter from './search';
+import discoverRouter from './discover';
+import uploadsRouter from './uploads';
const router = express.Router();
@@ -12,5 +16,9 @@ router.use('/videos', videosRouter);
router.use('/config', configRouter);
router.use('/models', modelsRouter);
router.use('/suggestions', suggestionsRouter);
+router.use('/chats', chatsRouter);
+router.use('/search', searchRouter);
+router.use('/discover', discoverRouter);
+router.use('/uploads', uploadsRouter);
export default router;
diff --git a/src/routes/models.ts b/src/routes/models.ts
index 36df25a..b5fbe12 100644
--- a/src/routes/models.ts
+++ b/src/routes/models.ts
@@ -14,6 +14,18 @@ router.get('/', async (req, res) => {
getAvailableEmbeddingModelProviders(),
]);
+ Object.keys(chatModelProviders).forEach((provider) => {
+ Object.keys(chatModelProviders[provider]).forEach((model) => {
+ delete chatModelProviders[provider][model].model;
+ });
+ });
+
+ Object.keys(embeddingModelProviders).forEach((provider) => {
+ Object.keys(embeddingModelProviders[provider]).forEach((model) => {
+ delete embeddingModelProviders[provider][model].model;
+ });
+ });
+
res.status(200).json({ chatModelProviders, embeddingModelProviders });
} catch (err) {
res.status(500).json({ message: 'An error has occurred.' });
diff --git a/src/routes/search.ts b/src/routes/search.ts
new file mode 100644
index 0000000..e24b3f9
--- /dev/null
+++ b/src/routes/search.ts
@@ -0,0 +1,160 @@
+import express from 'express';
+import logger from '../utils/logger';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import { ChatOpenAI } from '@langchain/openai';
+import {
+ getAvailableChatModelProviders,
+ getAvailableEmbeddingModelProviders,
+} from '../lib/providers';
+import { searchHandlers } from '../websocket/messageHandler';
+import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
+import { MetaSearchAgentType } from '../search/metaSearchAgent';
+
+const router = express.Router();
+
+interface chatModel {
+ provider: string;
+ model: string;
+ customOpenAIBaseURL?: string;
+ customOpenAIKey?: string;
+}
+
+interface embeddingModel {
+ provider: string;
+ model: string;
+}
+
+interface ChatRequestBody {
+ optimizationMode: 'speed' | 'balanced';
+ focusMode: string;
+ chatModel?: chatModel;
+ embeddingModel?: embeddingModel;
+ query: string;
+ history: Array<[string, string]>;
+}
+
+router.post('/', async (req, res) => {
+ try {
+ const body: ChatRequestBody = req.body;
+
+ if (!body.focusMode || !body.query) {
+ return res.status(400).json({ message: 'Missing focus mode or query' });
+ }
+
+ body.history = body.history || [];
+ body.optimizationMode = body.optimizationMode || 'balanced';
+
+ const history: BaseMessage[] = body.history.map((msg) => {
+ if (msg[0] === 'human') {
+ return new HumanMessage({
+ content: msg[1],
+ });
+ } else {
+ return new AIMessage({
+ content: msg[1],
+ });
+ }
+ });
+
+ const [chatModelProviders, embeddingModelProviders] = await Promise.all([
+ getAvailableChatModelProviders(),
+ getAvailableEmbeddingModelProviders(),
+ ]);
+
+ const chatModelProvider =
+ body.chatModel?.provider || Object.keys(chatModelProviders)[0];
+ const chatModel =
+ body.chatModel?.model ||
+ Object.keys(chatModelProviders[chatModelProvider])[0];
+
+ const embeddingModelProvider =
+ body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0];
+ const embeddingModel =
+ body.embeddingModel?.model ||
+ Object.keys(embeddingModelProviders[embeddingModelProvider])[0];
+
+ let llm: BaseChatModel | undefined;
+ let embeddings: Embeddings | undefined;
+
+ if (body.chatModel?.provider === 'custom_openai') {
+ if (
+ !body.chatModel?.customOpenAIBaseURL ||
+ !body.chatModel?.customOpenAIKey
+ ) {
+ return res
+ .status(400)
+ .json({ message: 'Missing custom OpenAI base URL or key' });
+ }
+
+ llm = new ChatOpenAI({
+ modelName: body.chatModel.model,
+ openAIApiKey: body.chatModel.customOpenAIKey,
+ temperature: 0.7,
+ configuration: {
+ baseURL: body.chatModel.customOpenAIBaseURL,
+ },
+ }) as unknown as BaseChatModel;
+ } else if (
+ chatModelProviders[chatModelProvider] &&
+ chatModelProviders[chatModelProvider][chatModel]
+ ) {
+ llm = chatModelProviders[chatModelProvider][chatModel]
+ .model as unknown as BaseChatModel | undefined;
+ }
+
+ if (
+ embeddingModelProviders[embeddingModelProvider] &&
+ embeddingModelProviders[embeddingModelProvider][embeddingModel]
+ ) {
+ embeddings = embeddingModelProviders[embeddingModelProvider][
+ embeddingModel
+ ].model as Embeddings | undefined;
+ }
+
+ if (!llm || !embeddings) {
+ return res.status(400).json({ message: 'Invalid model selected' });
+ }
+
+ const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode];
+
+ if (!searchHandler) {
+ return res.status(400).json({ message: 'Invalid focus mode' });
+ }
+
+ const emitter = await searchHandler.searchAndAnswer(
+ body.query,
+ history,
+ llm,
+ embeddings,
+ body.optimizationMode,
+ [],
+ );
+
+ let message = '';
+ let sources = [];
+
+ emitter.on('data', (data) => {
+ const parsedData = JSON.parse(data);
+ if (parsedData.type === 'response') {
+ message += parsedData.data;
+ } else if (parsedData.type === 'sources') {
+ sources = parsedData.data;
+ }
+ });
+
+ emitter.on('end', () => {
+ res.status(200).json({ message, sources });
+ });
+
+ emitter.on('error', (data) => {
+ const parsedData = JSON.parse(data);
+ res.status(500).json({ message: parsedData.data });
+ });
+ } catch (err: any) {
+ logger.error(`Error in getting search results: ${err.message}`);
+ res.status(500).json({ message: 'An error has occurred.' });
+ }
+});
+
+export default router;
diff --git a/src/routes/suggestions.ts b/src/routes/suggestions.ts
index b15ff5f..1d46e5b 100644
--- a/src/routes/suggestions.ts
+++ b/src/routes/suggestions.ts
@@ -1,17 +1,30 @@
import express from 'express';
-import generateSuggestions from '../agents/suggestionGeneratorAgent';
+import generateSuggestions from '../chains/suggestionGeneratorAgent';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { getAvailableChatModelProviders } from '../lib/providers';
import { HumanMessage, AIMessage } from '@langchain/core/messages';
import logger from '../utils/logger';
+import { ChatOpenAI } from '@langchain/openai';
const router = express.Router();
+interface ChatModel {
+ provider: string;
+ model: string;
+ customOpenAIBaseURL?: string;
+ customOpenAIKey?: string;
+}
+
+interface SuggestionsBody {
+ chatHistory: any[];
+ chatModel?: ChatModel;
+}
+
router.post('/', async (req, res) => {
try {
- let { chat_history, chat_model, chat_model_provider } = req.body;
+ let body: SuggestionsBody = req.body;
- chat_history = chat_history.map((msg: any) => {
+ const chatHistory = body.chatHistory.map((msg: any) => {
if (msg.role === 'user') {
return new HumanMessage(msg.content);
} else if (msg.role === 'assistant') {
@@ -19,22 +32,50 @@ router.post('/', async (req, res) => {
}
});
- const chatModels = await getAvailableChatModelProviders();
- const provider = chat_model_provider ?? Object.keys(chatModels)[0];
- const chatModel = chat_model ?? Object.keys(chatModels[provider])[0];
+ const chatModelProviders = await getAvailableChatModelProviders();
+
+ const chatModelProvider =
+ body.chatModel?.provider || Object.keys(chatModelProviders)[0];
+ const chatModel =
+ body.chatModel?.model ||
+ Object.keys(chatModelProviders[chatModelProvider])[0];
let llm: BaseChatModel | undefined;
- if (chatModels[provider] && chatModels[provider][chatModel]) {
- llm = chatModels[provider][chatModel] as BaseChatModel | undefined;
+ if (body.chatModel?.provider === 'custom_openai') {
+ if (
+ !body.chatModel?.customOpenAIBaseURL ||
+ !body.chatModel?.customOpenAIKey
+ ) {
+ return res
+ .status(400)
+ .json({ message: 'Missing custom OpenAI base URL or key' });
+ }
+
+ llm = new ChatOpenAI({
+ modelName: body.chatModel.model,
+ openAIApiKey: body.chatModel.customOpenAIKey,
+ temperature: 0.7,
+ configuration: {
+ baseURL: body.chatModel.customOpenAIBaseURL,
+ },
+ }) as unknown as BaseChatModel;
+ } else if (
+ chatModelProviders[chatModelProvider] &&
+ chatModelProviders[chatModelProvider][chatModel]
+ ) {
+ llm = chatModelProviders[chatModelProvider][chatModel]
+ .model as unknown as BaseChatModel | undefined;
}
if (!llm) {
- res.status(500).json({ message: 'Invalid LLM model selected' });
- return;
+ return res.status(400).json({ message: 'Invalid model selected' });
}
- const suggestions = await generateSuggestions({ chat_history }, llm);
+ const suggestions = await generateSuggestions(
+ { chat_history: chatHistory },
+ llm,
+ );
res.status(200).json({ suggestions: suggestions });
} catch (err) {
diff --git a/src/routes/uploads.ts b/src/routes/uploads.ts
new file mode 100644
index 0000000..7b063fc
--- /dev/null
+++ b/src/routes/uploads.ts
@@ -0,0 +1,151 @@
+import express from 'express';
+import logger from '../utils/logger';
+import multer from 'multer';
+import path from 'path';
+import crypto from 'crypto';
+import fs from 'fs';
+import { Embeddings } from '@langchain/core/embeddings';
+import { getAvailableEmbeddingModelProviders } from '../lib/providers';
+import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
+import { DocxLoader } from '@langchain/community/document_loaders/fs/docx';
+import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
+import { Document } from 'langchain/document';
+
+const router = express.Router();
+
+const splitter = new RecursiveCharacterTextSplitter({
+ chunkSize: 500,
+ chunkOverlap: 100,
+});
+
+const storage = multer.diskStorage({
+ destination: (req, file, cb) => {
+ cb(null, path.join(process.cwd(), './uploads'));
+ },
+ filename: (req, file, cb) => {
+ const splitedFileName = file.originalname.split('.');
+ const fileExtension = splitedFileName[splitedFileName.length - 1];
+ if (!['pdf', 'docx', 'txt'].includes(fileExtension)) {
+ return cb(new Error('File type is not supported'), '');
+ }
+ cb(null, `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`);
+ },
+});
+
+const upload = multer({ storage });
+
+router.post(
+ '/',
+ upload.fields([
+ { name: 'files' },
+ { name: 'embedding_model', maxCount: 1 },
+ { name: 'embedding_model_provider', maxCount: 1 },
+ ]),
+ async (req, res) => {
+ try {
+ const { embedding_model, embedding_model_provider } = req.body;
+
+ if (!embedding_model || !embedding_model_provider) {
+ res
+ .status(400)
+ .json({ message: 'Missing embedding model or provider' });
+ return;
+ }
+
+ const embeddingModels = await getAvailableEmbeddingModelProviders();
+ const provider =
+ embedding_model_provider ?? Object.keys(embeddingModels)[0];
+ const embeddingModel: Embeddings =
+ embedding_model ?? Object.keys(embeddingModels[provider])[0];
+
+ let embeddingsModel: Embeddings | undefined;
+
+ if (
+ embeddingModels[provider] &&
+ embeddingModels[provider][embeddingModel]
+ ) {
+ embeddingsModel = embeddingModels[provider][embeddingModel].model as
+ | Embeddings
+ | undefined;
+ }
+
+ if (!embeddingsModel) {
+ res.status(400).json({ message: 'Invalid LLM model selected' });
+ return;
+ }
+
+ const files = req.files['files'] as Express.Multer.File[];
+ if (!files || files.length === 0) {
+ res.status(400).json({ message: 'No files uploaded' });
+ return;
+ }
+
+ await Promise.all(
+ files.map(async (file) => {
+ let docs: Document[] = [];
+
+ if (file.mimetype === 'application/pdf') {
+ const loader = new PDFLoader(file.path);
+ docs = await loader.load();
+ } else if (
+ file.mimetype ===
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+ ) {
+ const loader = new DocxLoader(file.path);
+ docs = await loader.load();
+ } else if (file.mimetype === 'text/plain') {
+ const text = fs.readFileSync(file.path, 'utf-8');
+ docs = [
+ new Document({
+ pageContent: text,
+ metadata: {
+ title: file.originalname,
+ },
+ }),
+ ];
+ }
+
+ const splitted = await splitter.splitDocuments(docs);
+
+ const json = JSON.stringify({
+ title: file.originalname,
+ contents: splitted.map((doc) => doc.pageContent),
+ });
+
+ const pathToSave = file.path.replace(/\.\w+$/, '-extracted.json');
+ fs.writeFileSync(pathToSave, json);
+
+ const embeddings = await embeddingsModel.embedDocuments(
+ splitted.map((doc) => doc.pageContent),
+ );
+
+ const embeddingsJSON = JSON.stringify({
+ title: file.originalname,
+ embeddings: embeddings,
+ });
+
+ const pathToSaveEmbeddings = file.path.replace(
+ /\.\w+$/,
+ '-embeddings.json',
+ );
+ fs.writeFileSync(pathToSaveEmbeddings, embeddingsJSON);
+ }),
+ );
+
+ res.status(200).json({
+ files: files.map((file) => {
+ return {
+ fileName: file.originalname,
+ fileExtension: file.filename.split('.').pop(),
+ fileId: file.filename.replace(/\.\w+$/, ''),
+ };
+ }),
+ });
+ } catch (err: any) {
+ logger.error(`Error in uploading file results: ${err.message}`);
+ res.status(500).json({ message: 'An error has occurred.' });
+ }
+ },
+);
+
+export default router;
diff --git a/src/routes/videos.ts b/src/routes/videos.ts
index 0ffdb2c..ad87460 100644
--- a/src/routes/videos.ts
+++ b/src/routes/videos.ts
@@ -3,15 +3,29 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { getAvailableChatModelProviders } from '../lib/providers';
import { HumanMessage, AIMessage } from '@langchain/core/messages';
import logger from '../utils/logger';
-import handleVideoSearch from '../agents/videoSearchAgent';
+import handleVideoSearch from '../chains/videoSearchAgent';
+import { ChatOpenAI } from '@langchain/openai';
const router = express.Router();
+interface ChatModel {
+ provider: string;
+ model: string;
+ customOpenAIBaseURL?: string;
+ customOpenAIKey?: string;
+}
+
+interface VideoSearchBody {
+ query: string;
+ chatHistory: any[];
+ chatModel?: ChatModel;
+}
+
router.post('/', async (req, res) => {
try {
- let { query, chat_history, chat_model_provider, chat_model } = req.body;
+ let body: VideoSearchBody = req.body;
- chat_history = chat_history.map((msg: any) => {
+ const chatHistory = body.chatHistory.map((msg: any) => {
if (msg.role === 'user') {
return new HumanMessage(msg.content);
} else if (msg.role === 'assistant') {
@@ -19,22 +33,50 @@ router.post('/', async (req, res) => {
}
});
- const chatModels = await getAvailableChatModelProviders();
- const provider = chat_model_provider ?? Object.keys(chatModels)[0];
- const chatModel = chat_model ?? Object.keys(chatModels[provider])[0];
+ const chatModelProviders = await getAvailableChatModelProviders();
+
+ const chatModelProvider =
+ body.chatModel?.provider || Object.keys(chatModelProviders)[0];
+ const chatModel =
+ body.chatModel?.model ||
+ Object.keys(chatModelProviders[chatModelProvider])[0];
let llm: BaseChatModel | undefined;
- if (chatModels[provider] && chatModels[provider][chatModel]) {
- llm = chatModels[provider][chatModel] as BaseChatModel | undefined;
+ if (body.chatModel?.provider === 'custom_openai') {
+ if (
+ !body.chatModel?.customOpenAIBaseURL ||
+ !body.chatModel?.customOpenAIKey
+ ) {
+ return res
+ .status(400)
+ .json({ message: 'Missing custom OpenAI base URL or key' });
+ }
+
+ llm = new ChatOpenAI({
+ modelName: body.chatModel.model,
+ openAIApiKey: body.chatModel.customOpenAIKey,
+ temperature: 0.7,
+ configuration: {
+ baseURL: body.chatModel.customOpenAIBaseURL,
+ },
+ }) as unknown as BaseChatModel;
+ } else if (
+ chatModelProviders[chatModelProvider] &&
+ chatModelProviders[chatModelProvider][chatModel]
+ ) {
+ llm = chatModelProviders[chatModelProvider][chatModel]
+ .model as unknown as BaseChatModel | undefined;
}
if (!llm) {
- res.status(500).json({ message: 'Invalid LLM model selected' });
- return;
+ return res.status(400).json({ message: 'Invalid model selected' });
}
- const videos = await handleVideoSearch({ chat_history, query }, llm);
+ const videos = await handleVideoSearch(
+ { chat_history: chatHistory, query: body.query },
+ llm,
+ );
res.status(200).json({ videos });
} catch (err) {
diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts
new file mode 100644
index 0000000..ee82c10
--- /dev/null
+++ b/src/search/metaSearchAgent.ts
@@ -0,0 +1,494 @@
+import { ChatOpenAI } from '@langchain/openai';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import {
+ ChatPromptTemplate,
+ MessagesPlaceholder,
+ PromptTemplate,
+} from '@langchain/core/prompts';
+import {
+ RunnableLambda,
+ RunnableMap,
+ RunnableSequence,
+} from '@langchain/core/runnables';
+import { BaseMessage } from '@langchain/core/messages';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
+import LineOutputParser from '../lib/outputParsers/lineOutputParser';
+import { getDocumentsFromLinks } from '../utils/documents';
+import { Document } from 'langchain/document';
+import { searchSearxng } from '../lib/searxng';
+import path from 'path';
+import fs from 'fs';
+import computeSimilarity from '../utils/computeSimilarity';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import { StreamEvent } from '@langchain/core/tracers/log_stream';
+import { IterableReadableStream } from '@langchain/core/utils/stream';
+
+export interface MetaSearchAgentType {
+ searchAndAnswer: (
+ message: string,
+ history: BaseMessage[],
+ llm: BaseChatModel,
+ embeddings: Embeddings,
+ optimizationMode: 'speed' | 'balanced' | 'quality',
+ fileIds: string[],
+ ) => Promise;
+}
+
+interface Config {
+ searchWeb: boolean;
+ rerank: boolean;
+ summarizer: boolean;
+ rerankThreshold: number;
+ queryGeneratorPrompt: string;
+ responsePrompt: string;
+ activeEngines: string[];
+}
+
+type BasicChainInput = {
+ chat_history: BaseMessage[];
+ query: string;
+};
+
+class MetaSearchAgent implements MetaSearchAgentType {
+ private config: Config;
+ private strParser = new StringOutputParser();
+
+ constructor(config: Config) {
+ this.config = config;
+ }
+
+ private async createSearchRetrieverChain(llm: BaseChatModel) {
+ (llm as unknown as ChatOpenAI).temperature = 0;
+
+ return RunnableSequence.from([
+ PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
+ llm,
+ this.strParser,
+ RunnableLambda.from(async (input: string) => {
+ const linksOutputParser = new LineListOutputParser({
+ key: 'links',
+ });
+
+ const questionOutputParser = new LineOutputParser({
+ key: 'question',
+ });
+
+ const links = await linksOutputParser.parse(input);
+ let question = this.config.summarizer
+ ? await questionOutputParser.parse(input)
+ : input;
+
+ if (question === 'not_needed') {
+ return { query: '', docs: [] };
+ }
+
+ if (links.length > 0) {
+ if (question.length === 0) {
+ question = 'summarize';
+ }
+
+ let docs = [];
+
+ const linkDocs = await getDocumentsFromLinks({ links });
+
+ const docGroups: Document[] = [];
+
+ linkDocs.map((doc) => {
+ const URLDocExists = docGroups.find(
+ (d) =>
+ d.metadata.url === doc.metadata.url &&
+ d.metadata.totalDocs < 10,
+ );
+
+ if (!URLDocExists) {
+ docGroups.push({
+ ...doc,
+ metadata: {
+ ...doc.metadata,
+ totalDocs: 1,
+ },
+ });
+ }
+
+ const docIndex = docGroups.findIndex(
+ (d) =>
+ d.metadata.url === doc.metadata.url &&
+ d.metadata.totalDocs < 10,
+ );
+
+ if (docIndex !== -1) {
+ docGroups[docIndex].pageContent =
+ docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
+ docGroups[docIndex].metadata.totalDocs += 1;
+ }
+ });
+
+ await Promise.all(
+ docGroups.map(async (doc) => {
+ const res = await llm.invoke(`
+ You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
+ text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
+ If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
+
+ - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
+ - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
+ - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
+
+ The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
+
+
+ 1. \`
+ Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers.
+ It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications
+ by using containers.
+
+
+
+ What is Docker and how does it work?
+
+
+ Response:
+ Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application
+ deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in
+ any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
+ \`
+ 2. \`
+ The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
+ relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
+ on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
+ Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
+ General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
+ realm, including astronomy.
+
+
+
+ summarize
+
+
+ Response:
+ The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
+ relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
+ relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
+ 1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
+ \`
+
+
+ Everything below is the actual data you will be working with. Good luck!
+
+
+ ${question}
+
+
+
+ ${doc.pageContent}
+
+
+ Make sure to answer the query in the summary.
+ `);
+
+ const document = new Document({
+ pageContent: res.content as string,
+ metadata: {
+ title: doc.metadata.title,
+ url: doc.metadata.url,
+ },
+ });
+
+ docs.push(document);
+ }),
+ );
+
+ return { query: question, docs: docs };
+ } else {
+ const res = await searchSearxng(question, {
+ language: 'en',
+ engines: this.config.activeEngines,
+ });
+
+ const documents = res.results.map(
+ (result) =>
+ new Document({
+ pageContent:
+ result.content ||
+ (this.config.activeEngines.includes('youtube')
+ ? result.title
+ : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
+ metadata: {
+ title: result.title,
+ url: result.url,
+ ...(result.img_src && { img_src: result.img_src }),
+ },
+ }),
+ );
+
+ return { query: question, docs: documents };
+ }
+ }),
+ ]);
+ }
+
+ private async createAnsweringChain(
+ llm: BaseChatModel,
+ fileIds: string[],
+ embeddings: Embeddings,
+ optimizationMode: 'speed' | 'balanced' | 'quality',
+ ) {
+ return RunnableSequence.from([
+ RunnableMap.from({
+ query: (input: BasicChainInput) => input.query,
+ chat_history: (input: BasicChainInput) => input.chat_history,
+ date: () => new Date().toISOString(),
+ context: RunnableLambda.from(async (input: BasicChainInput) => {
+ const processedHistory = formatChatHistoryAsString(
+ input.chat_history,
+ );
+
+ let docs: Document[] | null = null;
+ let query = input.query;
+
+ if (this.config.searchWeb) {
+ const searchRetrieverChain =
+ await this.createSearchRetrieverChain(llm);
+
+ const searchRetrieverResult = await searchRetrieverChain.invoke({
+ chat_history: processedHistory,
+ query,
+ });
+
+ query = searchRetrieverResult.query;
+ docs = searchRetrieverResult.docs;
+ }
+
+ const sortedDocs = await this.rerankDocs(
+ query,
+ docs ?? [],
+ fileIds,
+ embeddings,
+ optimizationMode,
+ );
+
+ return sortedDocs;
+ })
+ .withConfig({
+ runName: 'FinalSourceRetriever',
+ })
+ .pipe(this.processDocs),
+ }),
+ ChatPromptTemplate.fromMessages([
+ ['system', this.config.responsePrompt],
+ new MessagesPlaceholder('chat_history'),
+ ['user', '{query}'],
+ ]),
+ llm,
+ this.strParser,
+ ]).withConfig({
+ runName: 'FinalResponseGenerator',
+ });
+ }
+
+ private async rerankDocs(
+ query: string,
+ docs: Document[],
+ fileIds: string[],
+ embeddings: Embeddings,
+ optimizationMode: 'speed' | 'balanced' | 'quality',
+ ) {
+ if (docs.length === 0 && fileIds.length === 0) {
+ return docs;
+ }
+
+ const filesData = fileIds
+ .map((file) => {
+ const filePath = path.join(process.cwd(), 'uploads', file);
+
+ const contentPath = filePath + '-extracted.json';
+ const embeddingsPath = filePath + '-embeddings.json';
+
+ const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
+ const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8'));
+
+ const fileSimilaritySearchObject = content.contents.map(
+ (c: string, i) => {
+ return {
+ fileName: content.title,
+ content: c,
+ embeddings: embeddings.embeddings[i],
+ };
+ },
+ );
+
+ return fileSimilaritySearchObject;
+ })
+ .flat();
+
+ if (query.toLocaleLowerCase() === 'summarize') {
+ return docs.slice(0, 15);
+ }
+
+ const docsWithContent = docs.filter(
+ (doc) => doc.pageContent && doc.pageContent.length > 0,
+ );
+
+ if (optimizationMode === 'speed' || this.config.rerank === false) {
+ if (filesData.length > 0) {
+ const [queryEmbedding] = await Promise.all([
+ embeddings.embedQuery(query),
+ ]);
+
+ const fileDocs = filesData.map((fileData) => {
+ return new Document({
+ pageContent: fileData.content,
+ metadata: {
+ title: fileData.fileName,
+ url: `File`,
+ },
+ });
+ });
+
+ const similarity = filesData.map((fileData, i) => {
+ const sim = computeSimilarity(queryEmbedding, fileData.embeddings);
+
+ return {
+ index: i,
+ similarity: sim,
+ };
+ });
+
+ let sortedDocs = similarity
+ .filter(
+ (sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3),
+ )
+ .sort((a, b) => b.similarity - a.similarity)
+ .slice(0, 15)
+ .map((sim) => fileDocs[sim.index]);
+
+ sortedDocs =
+ docsWithContent.length > 0 ? sortedDocs.slice(0, 8) : sortedDocs;
+
+ return [
+ ...sortedDocs,
+ ...docsWithContent.slice(0, 15 - sortedDocs.length),
+ ];
+ } else {
+ return docsWithContent.slice(0, 15);
+ }
+ } else if (optimizationMode === 'balanced') {
+ const [docEmbeddings, queryEmbedding] = await Promise.all([
+ embeddings.embedDocuments(
+ docsWithContent.map((doc) => doc.pageContent),
+ ),
+ embeddings.embedQuery(query),
+ ]);
+
+ docsWithContent.push(
+ ...filesData.map((fileData) => {
+ return new Document({
+ pageContent: fileData.content,
+ metadata: {
+ title: fileData.fileName,
+ url: `File`,
+ },
+ });
+ }),
+ );
+
+ docEmbeddings.push(...filesData.map((fileData) => fileData.embeddings));
+
+ const similarity = docEmbeddings.map((docEmbedding, i) => {
+ const sim = computeSimilarity(queryEmbedding, docEmbedding);
+
+ return {
+ index: i,
+ similarity: sim,
+ };
+ });
+
+ const sortedDocs = similarity
+ .filter((sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3))
+ .sort((a, b) => b.similarity - a.similarity)
+ .slice(0, 15)
+ .map((sim) => docsWithContent[sim.index]);
+
+ return sortedDocs;
+ }
+ }
+
+ private processDocs(docs: Document[]) {
+ return docs
+ .map(
+ (_, index) =>
+ `${index + 1}. ${docs[index].metadata.title} ${docs[index].pageContent}`,
+ )
+ .join('\n');
+ }
+
+ private async handleStream(
+ stream: IterableReadableStream,
+ emitter: eventEmitter,
+ ) {
+ for await (const event of stream) {
+ if (
+ event.event === 'on_chain_end' &&
+ event.name === 'FinalSourceRetriever'
+ ) {
+ ``;
+ emitter.emit(
+ 'data',
+ JSON.stringify({ type: 'sources', data: event.data.output }),
+ );
+ }
+ if (
+ event.event === 'on_chain_stream' &&
+ event.name === 'FinalResponseGenerator'
+ ) {
+ emitter.emit(
+ 'data',
+ JSON.stringify({ type: 'response', data: event.data.chunk }),
+ );
+ }
+ if (
+ event.event === 'on_chain_end' &&
+ event.name === 'FinalResponseGenerator'
+ ) {
+ emitter.emit('end');
+ }
+ }
+ }
+
+ async searchAndAnswer(
+ message: string,
+ history: BaseMessage[],
+ llm: BaseChatModel,
+ embeddings: Embeddings,
+ optimizationMode: 'speed' | 'balanced' | 'quality',
+ fileIds: string[],
+ ) {
+ const emitter = new eventEmitter();
+
+ const answeringChain = await this.createAnsweringChain(
+ llm,
+ fileIds,
+ embeddings,
+ optimizationMode,
+ );
+
+ const stream = answeringChain.streamEvents(
+ {
+ chat_history: history,
+ query: message,
+ },
+ {
+ version: 'v1',
+ },
+ );
+
+ this.handleStream(stream, emitter);
+
+ return emitter;
+ }
+}
+
+export default MetaSearchAgent;
diff --git a/src/utils/documents.ts b/src/utils/documents.ts
new file mode 100644
index 0000000..5cd0366
--- /dev/null
+++ b/src/utils/documents.ts
@@ -0,0 +1,99 @@
+import axios from 'axios';
+import { htmlToText } from 'html-to-text';
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
+import { Document } from '@langchain/core/documents';
+import pdfParse from 'pdf-parse';
+import logger from './logger';
+
+export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
+ const splitter = new RecursiveCharacterTextSplitter();
+
+ let docs: Document[] = [];
+
+ await Promise.all(
+ links.map(async (link) => {
+ link =
+ link.startsWith('http://') || link.startsWith('https://')
+ ? link
+ : `https://${link}`;
+
+ try {
+ const res = await axios.get(link, {
+ responseType: 'arraybuffer',
+ });
+
+ const isPdf = res.headers['content-type'] === 'application/pdf';
+
+ if (isPdf) {
+ const pdfText = await pdfParse(res.data);
+ const parsedText = pdfText.text
+ .replace(/(\r\n|\n|\r)/gm, ' ')
+ .replace(/\s+/g, ' ')
+ .trim();
+
+ const splittedText = await splitter.splitText(parsedText);
+ const title = 'PDF Document';
+
+ const linkDocs = splittedText.map((text) => {
+ return new Document({
+ pageContent: text,
+ metadata: {
+ title: title,
+ url: link,
+ },
+ });
+ });
+
+ docs.push(...linkDocs);
+ return;
+ }
+
+ const parsedText = htmlToText(res.data.toString('utf8'), {
+ selectors: [
+ {
+ selector: 'a',
+ options: {
+ ignoreHref: true,
+ },
+ },
+ ],
+ })
+ .replace(/(\r\n|\n|\r)/gm, ' ')
+ .replace(/\s+/g, ' ')
+ .trim();
+
+ const splittedText = await splitter.splitText(parsedText);
+ const title = res.data
+ .toString('utf8')
+ .match(/(.*?)<\/title>/)?.[1];
+
+ const linkDocs = splittedText.map((text) => {
+ return new Document({
+ pageContent: text,
+ metadata: {
+ title: title || link,
+ url: link,
+ },
+ });
+ });
+
+ docs.push(...linkDocs);
+ } catch (err) {
+ logger.error(
+ `Error at generating documents from links: ${err.message}`,
+ );
+ docs.push(
+ new Document({
+ pageContent: `Failed to retrieve content from the link: ${err.message}`,
+ metadata: {
+ title: 'Failed to retrieve content',
+ url: link,
+ },
+ }),
+ );
+ }
+ }),
+ );
+
+ return docs;
+};
diff --git a/src/utils/files.ts b/src/utils/files.ts
new file mode 100644
index 0000000..e6e91df
--- /dev/null
+++ b/src/utils/files.ts
@@ -0,0 +1,17 @@
+import path from 'path';
+import fs from 'fs';
+
+export const getFileDetails = (fileId: string) => {
+ const fileLoc = path.join(
+ process.cwd(),
+ './uploads',
+ fileId + '-extracted.json',
+ );
+
+ const parsedFile = JSON.parse(fs.readFileSync(fileLoc, 'utf8'));
+
+ return {
+ name: parsedFile.title,
+ fileId: fileId,
+ };
+};
diff --git a/src/websocket/connectionManager.ts b/src/websocket/connectionManager.ts
index 5cb075b..d980500 100644
--- a/src/websocket/connectionManager.ts
+++ b/src/websocket/connectionManager.ts
@@ -45,9 +45,8 @@ export const handleConnection = async (
chatModelProviders[chatModelProvider][chatModel] &&
chatModelProvider != 'custom_openai'
) {
- llm = chatModelProviders[chatModelProvider][chatModel] as
- | BaseChatModel
- | undefined;
+ llm = chatModelProviders[chatModelProvider][chatModel]
+ .model as unknown as BaseChatModel | undefined;
} else if (chatModelProvider == 'custom_openai') {
llm = new ChatOpenAI({
modelName: chatModel,
@@ -56,7 +55,7 @@ export const handleConnection = async (
configuration: {
baseURL: searchParams.get('openAIBaseURL'),
},
- });
+ }) as unknown as BaseChatModel;
}
if (
@@ -65,7 +64,7 @@ export const handleConnection = async (
) {
embeddings = embeddingModelProviders[embeddingModelProvider][
embeddingModel
- ] as Embeddings | undefined;
+ ].model as Embeddings | undefined;
}
if (!llm || !embeddings) {
@@ -79,6 +78,18 @@ export const handleConnection = async (
ws.close();
}
+ const interval = setInterval(() => {
+ if (ws.readyState === ws.OPEN) {
+ ws.send(
+ JSON.stringify({
+ type: 'signal',
+ data: 'open',
+ }),
+ );
+ clearInterval(interval);
+ }
+ }, 5);
+
ws.on(
'message',
async (message) =>
diff --git a/src/websocket/messageHandler.ts b/src/websocket/messageHandler.ts
index 98f67c2..395c0de 100644
--- a/src/websocket/messageHandler.ts
+++ b/src/websocket/messageHandler.ts
@@ -1,37 +1,99 @@
import { EventEmitter, WebSocket } from 'ws';
import { BaseMessage, AIMessage, HumanMessage } from '@langchain/core/messages';
-import handleWebSearch from '../agents/webSearchAgent';
-import handleAcademicSearch from '../agents/academicSearchAgent';
-import handleWritingAssistant from '../agents/writingAssistant';
-import handleWolframAlphaSearch from '../agents/wolframAlphaSearchAgent';
-import handleYoutubeSearch from '../agents/youtubeSearchAgent';
-import handleRedditSearch from '../agents/redditSearchAgent';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import logger from '../utils/logger';
+import db from '../db';
+import { chats, messages as messagesSchema } from '../db/schema';
+import { eq, asc, gt, and } from 'drizzle-orm';
+import crypto from 'crypto';
+import { getFileDetails } from '../utils/files';
+import MetaSearchAgent, {
+ MetaSearchAgentType,
+} from '../search/metaSearchAgent';
+import prompts from '../prompts';
type Message = {
- type: string;
+ messageId: string;
+ chatId: string;
content: string;
- copilot: boolean;
- focusMode: string;
- history: Array<[string, string]>;
};
-const searchHandlers = {
- webSearch: handleWebSearch,
- academicSearch: handleAcademicSearch,
- writingAssistant: handleWritingAssistant,
- wolframAlphaSearch: handleWolframAlphaSearch,
- youtubeSearch: handleYoutubeSearch,
- redditSearch: handleRedditSearch,
+type WSMessage = {
+ message: Message;
+ optimizationMode: 'speed' | 'balanced' | 'quality';
+ type: string;
+ focusMode: string;
+ history: Array<[string, string]>;
+ files: Array;
+};
+
+export const searchHandlers = {
+ webSearch: new MetaSearchAgent({
+ activeEngines: [],
+ queryGeneratorPrompt: prompts.webSearchRetrieverPrompt,
+ responsePrompt: prompts.webSearchResponsePrompt,
+ rerank: true,
+ rerankThreshold: 0.3,
+ searchWeb: true,
+ summarizer: true,
+ }),
+ academicSearch: new MetaSearchAgent({
+ activeEngines: ['arxiv', 'google scholar', 'pubmed'],
+ queryGeneratorPrompt: prompts.academicSearchRetrieverPrompt,
+ responsePrompt: prompts.academicSearchResponsePrompt,
+ rerank: true,
+ rerankThreshold: 0,
+ searchWeb: true,
+ summarizer: false,
+ }),
+ writingAssistant: new MetaSearchAgent({
+ activeEngines: [],
+ queryGeneratorPrompt: '',
+ responsePrompt: prompts.writingAssistantPrompt,
+ rerank: true,
+ rerankThreshold: 0,
+ searchWeb: false,
+ summarizer: false,
+ }),
+ wolframAlphaSearch: new MetaSearchAgent({
+ activeEngines: ['wolframalpha'],
+ queryGeneratorPrompt: prompts.wolframAlphaSearchRetrieverPrompt,
+ responsePrompt: prompts.wolframAlphaSearchResponsePrompt,
+ rerank: false,
+ rerankThreshold: 0,
+ searchWeb: true,
+ summarizer: false,
+ }),
+ youtubeSearch: new MetaSearchAgent({
+ activeEngines: ['youtube'],
+ queryGeneratorPrompt: prompts.youtubeSearchRetrieverPrompt,
+ responsePrompt: prompts.youtubeSearchResponsePrompt,
+ rerank: true,
+ rerankThreshold: 0.3,
+ searchWeb: true,
+ summarizer: false,
+ }),
+ redditSearch: new MetaSearchAgent({
+ activeEngines: ['reddit'],
+ queryGeneratorPrompt: prompts.redditSearchRetrieverPrompt,
+ responsePrompt: prompts.redditSearchResponsePrompt,
+ rerank: true,
+ rerankThreshold: 0.3,
+ searchWeb: true,
+ summarizer: false,
+ }),
};
const handleEmitterEvents = (
emitter: EventEmitter,
ws: WebSocket,
- id: string,
+ messageId: string,
+ chatId: string,
) => {
+ let recievedMessage = '';
+ let sources = [];
+
emitter.on('data', (data) => {
const parsedData = JSON.parse(data);
if (parsedData.type === 'response') {
@@ -39,21 +101,36 @@ const handleEmitterEvents = (
JSON.stringify({
type: 'message',
data: parsedData.data,
- messageId: id,
+ messageId: messageId,
}),
);
+ recievedMessage += parsedData.data;
} else if (parsedData.type === 'sources') {
ws.send(
JSON.stringify({
type: 'sources',
data: parsedData.data,
- messageId: id,
+ messageId: messageId,
}),
);
+ sources = parsedData.data;
}
});
emitter.on('end', () => {
- ws.send(JSON.stringify({ type: 'messageEnd', messageId: id }));
+ ws.send(JSON.stringify({ type: 'messageEnd', messageId: messageId }));
+
+ db.insert(messagesSchema)
+ .values({
+ content: recievedMessage,
+ chatId: chatId,
+ messageId: messageId,
+ role: 'assistant',
+ metadata: JSON.stringify({
+ createdAt: new Date(),
+ ...(sources && sources.length > 0 && { sources }),
+ }),
+ })
+ .execute();
});
emitter.on('error', (data) => {
const parsedData = JSON.parse(data);
@@ -74,8 +151,17 @@ export const handleMessage = async (
embeddings: Embeddings,
) => {
try {
- const parsedMessage = JSON.parse(message) as Message;
- const id = Math.random().toString(36).substring(7);
+ const parsedWSMessage = JSON.parse(message) as WSMessage;
+ const parsedMessage = parsedWSMessage.message;
+
+ if (parsedWSMessage.files.length > 0) {
+ /* TODO: Implement uploads in other classes/single meta class system*/
+ parsedWSMessage.focusMode = 'webSearch';
+ }
+
+ const humanMessageId =
+ parsedMessage.messageId ?? crypto.randomBytes(7).toString('hex');
+ const aiMessageId = crypto.randomBytes(7).toString('hex');
if (!parsedMessage.content)
return ws.send(
@@ -86,7 +172,7 @@ export const handleMessage = async (
}),
);
- const history: BaseMessage[] = parsedMessage.history.map((msg) => {
+ const history: BaseMessage[] = parsedWSMessage.history.map((msg) => {
if (msg[0] === 'human') {
return new HumanMessage({
content: msg[1],
@@ -98,16 +184,71 @@ export const handleMessage = async (
}
});
- if (parsedMessage.type === 'message') {
- const handler = searchHandlers[parsedMessage.focusMode];
+ if (parsedWSMessage.type === 'message') {
+ const handler: MetaSearchAgentType =
+ searchHandlers[parsedWSMessage.focusMode];
+
if (handler) {
- const emitter = handler(
- parsedMessage.content,
- history,
- llm,
- embeddings,
- );
- handleEmitterEvents(emitter, ws, id);
+ try {
+ const emitter = await handler.searchAndAnswer(
+ parsedMessage.content,
+ history,
+ llm,
+ embeddings,
+ parsedWSMessage.optimizationMode,
+ parsedWSMessage.files,
+ );
+
+ handleEmitterEvents(emitter, ws, aiMessageId, parsedMessage.chatId);
+
+ const chat = await db.query.chats.findFirst({
+ where: eq(chats.id, parsedMessage.chatId),
+ });
+
+ if (!chat) {
+ await db
+ .insert(chats)
+ .values({
+ id: parsedMessage.chatId,
+ title: parsedMessage.content,
+ createdAt: new Date().toString(),
+ focusMode: parsedWSMessage.focusMode,
+ files: parsedWSMessage.files.map(getFileDetails),
+ })
+ .execute();
+ }
+
+ const messageExists = await db.query.messages.findFirst({
+ where: eq(messagesSchema.messageId, humanMessageId),
+ });
+
+ if (!messageExists) {
+ await db
+ .insert(messagesSchema)
+ .values({
+ content: parsedMessage.content,
+ chatId: parsedMessage.chatId,
+ messageId: humanMessageId,
+ role: 'user',
+ metadata: JSON.stringify({
+ createdAt: new Date(),
+ }),
+ })
+ .execute();
+ } else {
+ await db
+ .delete(messagesSchema)
+ .where(
+ and(
+ gt(messagesSchema.id, messageExists.id),
+ eq(messagesSchema.chatId, parsedMessage.chatId),
+ ),
+ )
+ .execute();
+ }
+ } catch (err) {
+ console.log(err);
+ }
} else {
ws.send(
JSON.stringify({
diff --git a/ui/app/c/[chatId]/page.tsx b/ui/app/c/[chatId]/page.tsx
new file mode 100644
index 0000000..dc3c92a
--- /dev/null
+++ b/ui/app/c/[chatId]/page.tsx
@@ -0,0 +1,7 @@
+import ChatWindow from '@/components/ChatWindow';
+
+const Page = ({ params }: { params: { chatId: string } }) => {
+ return ;
+};
+
+export default Page;
diff --git a/ui/app/discover/page.tsx b/ui/app/discover/page.tsx
index a443a17..eb94040 100644
--- a/ui/app/discover/page.tsx
+++ b/ui/app/discover/page.tsx
@@ -1,5 +1,113 @@
+'use client';
+
+import { Search } from 'lucide-react';
+import { useEffect, useState } from 'react';
+import Link from 'next/link';
+import { toast } from 'sonner';
+
+interface Discover {
+ title: string;
+ content: string;
+ url: string;
+ thumbnail: string;
+}
+
const Page = () => {
- return