diff --git a/docker-compose.yaml b/docker-compose.yaml index a0e1d73..1a268e1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,49 +6,68 @@ services: ports: - 4000:8080 networks: - - perplexica-network + - xme-network restart: unless-stopped - perplexica-backend: + chroma: + image: chromadb/chroma:latest + environment: + - ALLOW_RESET=true + - CHROMA_SERVER_CORS_ALLOW_ORIGINS=["*"] + ports: + - "8000:8000" + volumes: + - chroma_data:/chroma/chroma + networks: + - xme-network + restart: unless-stopped + + xme-backend: build: context: . dockerfile: backend.dockerfile - image: itzcrazykns1337/perplexica-backend:main + image: itzcrazykns1337/xme-backend:main environment: + - SUPABASE_URL=https://qytbxgzxsywnfhlwcyqa.supabase.co + - SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InF5dGJ4Z3p4c3l3bmZobHdjeXFhIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzEwNTc3MTAsImV4cCI6MjA0NjYzMzcxMH0.XLRq-4CFL2MWxvCLzCv5ZdaF5VSi58cocx9FOyv37jU - SEARXNG_API_URL=http://searxng:8080 + - DATABASE_URL=postgresql://postgres.ineclpmaolnshsnekjad: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@aws-0-us-east-1.pooler.supabase.com:6543/postgres depends_on: - searxng ports: - 3001:3001 volumes: - - backend-dbstore:/home/perplexica/data - - uploads:/home/perplexica/uploads + - backend-dbstore:/home/xme/data + - uploads:/home/xme/uploads - ./config.toml:/home/perplexica/config.toml extra_hosts: - - 'host.docker.internal:host-gateway' + - host.docker.internal:host-gateway networks: - - perplexica-network + - xme-network restart: unless-stopped - perplexica-frontend: + xme-frontend: build: context: . dockerfile: app.dockerfile args: - - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 - image: itzcrazykns1337/perplexica-frontend:main + NEXT_PUBLIC_WS_URL: ws://localhost:3001 + NEXT_PUBLIC_API_URL: http://localhost:3001/api + NEXT_PUBLIC_SUPABASE_URL: https://qytbxgzxsywnfhlwcyqa.supabase.co + NEXT_PUBLIC_SUPABASE_ANON_KEY: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InF5dGJ4Z3p4c3l3bmZobHdjeXFhIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzEwNTc3MTAsImV4cCI6MjA0NjYzMzcxMH0.XLRq-4CFL2MWxvCLzCv5ZdaF5VSi58cocx9FOyv37jU + image: itzcrazykns1337/xme-frontend:main depends_on: - - perplexica-backend + - xme-backend ports: - 3000:3000 networks: - - perplexica-network + - xme-network restart: unless-stopped networks: - perplexica-network: + xme-network: volumes: backend-dbstore: uploads: + chroma_data: diff --git a/package.json b/package.json index 3fce442..e4e7425 100644 --- a/package.json +++ b/package.json @@ -30,11 +30,13 @@ "@iarna/toml": "^2.2.5", "@langchain/anthropic": "^0.2.3", "@langchain/community": "^0.2.16", - "@langchain/openai": "^0.0.25", "@langchain/google-genai": "^0.0.23", + "@langchain/openai": "^0.0.25", + "@supabase/supabase-js": "latest", "@xenova/transformers": "^2.17.1", "axios": "^1.6.8", "better-sqlite3": "^11.0.0", + "chromadb": "^1.9.4", "compute-cosine-similarity": "^1.1.0", "compute-dot": "^1.1.0", "cors": "^2.8.5", diff --git a/project_structure.md b/project_structure.md new file mode 100644 index 0000000..c2501d8 --- /dev/null +++ b/project_structure.md @@ -0,0 +1,157 @@ +# Structure du Projet X-me + +``` +X-me/ +├── .assets/ +├── .dockerignore +├── .git/ +├── .github/ +├── .gitignore +├── .prettierignore +├── .prettierrc.js +├── CONTRIBUTING.md +├── LICENSE +├── README.md +├── app.dockerfile +├── backend.dockerfile +├── config.toml +├── data/ +├── docker-compose.yaml +├── docs/ +├── drizzle.config.ts +├── package.json +├── project_structure.md +├── searxng/ +│ ├── limiter.toml +│ ├── settings.yml +│ └── uwsgi.ini +├── src/ +│ ├── app.ts +│ ├── config.ts +│ ├── chains/ +│ │ ├── expertSearchAgent.ts +│ │ ├── imageSearchAgent.ts +│ │ ├── legalSearchAgent.ts +│ │ ├── suggestionGeneratorAgent.ts +│ │ └── videoSearchAgent.ts +│ ├── db/ +│ │ ├── index.ts +│ │ ├── schema.ts +│ │ └── supabase.ts +│ ├── lib/ +│ │ ├── huggingfaceTransformer.ts +│ │ ├── outputParsers/ +│ │ │ ├── lineOutputParser.ts +│ │ │ └── listLineOutputParser.ts +│ │ ├── providers/ +│ │ │ ├── anthropic.ts +│ │ │ ├── gemini.ts +│ │ │ ├── groq.ts +│ │ │ ├── index.ts +│ │ │ ├── ollama.ts +│ │ │ ├── openai.ts +│ │ │ └── transformers.ts +│ │ └── searxng.ts +│ ├── prompts/ +│ │ ├── academicSearch.ts +│ │ ├── index.ts +│ │ ├── redditSearch.ts +│ │ ├── webSearch.ts +│ │ ├── wolframAlpha.ts +│ │ ├── writingAssistant.ts +│ │ └── youtubeSearch.ts +│ ├── routes/ +│ │ ├── chats.ts +│ │ ├── config.ts +│ │ ├── discover.ts +│ │ ├── images.ts +│ │ ├── index.ts +│ │ ├── legal.ts +│ │ ├── models.ts +│ │ ├── search.ts +│ │ ├── suggestions.ts +│ │ ├── uploads.ts +│ │ └── videos.ts +│ ├── search/ +│ │ └── metaSearchAgent.ts +│ ├── utils/ +│ │ ├── computeSimilarity.ts +│ │ ├── documents.ts +│ │ ├── files.ts +│ │ ├── formatHistory.ts +│ │ └── logger.ts +│ └── websocket/ +│ ├── connectionManager.ts +│ ├── index.ts +│ ├── messageHandler.ts +│ └── websocketServer.ts +├── tsconfig.json +├── ui/ +│ ├── .env.example +│ ├── .eslintrc.json +│ ├── .gitignore +│ ├── .prettierrc.js +│ ├── app/ +│ │ ├── c/ +│ │ │ └── [chatId]/ +│ │ │ └── page.tsx +│ │ ├── chatroom/ +│ │ │ └── page.tsx +│ │ ├── discover/ +│ │ │ └── page.tsx +│ │ ├── favicon.ico +│ │ ├── globals.css +│ │ ├── layout.tsx +│ │ ├── library/ +│ │ │ ├── layout.tsx +│ │ │ └── page.tsx +│ │ └── page.tsx +│ ├── components/ +│ │ ├── ui/ +│ │ │ ├── button.tsx +│ │ │ └── input.tsx +│ │ ├── Chat.tsx +│ │ ├── ChatWindow.tsx +│ │ ├── DeleteChat.tsx +│ │ ├── EmptyChat.tsx +│ │ ├── EmptyChatMessageInput.tsx +│ │ ├── Layout.tsx +│ │ ├── LegalSearch.tsx +│ │ ├── MessageBox.tsx +│ │ ├── MessageBoxLoading.tsx +│ │ ├── MessageInput.tsx +│ │ ├── MessageSources.tsx +│ │ ├── MessageActions/ +│ │ │ ├── Copy.tsx +│ │ │ └── Rewrite.tsx +│ │ ├── MessageInputActions/ +│ │ │ ├── Attach.tsx +│ │ │ ├── AttachSmall.tsx +│ │ │ ├── Copilot.tsx +│ │ │ ├── Focus.tsx +│ │ │ └── Optimization.tsx +│ │ ├── Navbar.tsx +│ │ ├── SearchImages.tsx +│ │ ├── SearchVideos.tsx +│ │ ├── SettingsDialog.tsx +│ │ ├── Sidebar.tsx +│ │ └── theme/ +│ │ ├── Provider.tsx +│ │ └── Switcher.tsx +│ ├── lib/ +│ │ ├── actions.ts +│ │ ├── supabase.ts +│ │ └── utils.ts +│ ├── next.config.mjs +│ ├── package.json +│ ├── postcss.config.js +│ ├── public/ +│ │ ├── next.svg +│ │ └── vercel.svg +│ ├── tailwind.config.ts +│ ├── tsconfig.json +│ └── yarn.lock +├── uploads/ +└── yarn.lock + +Cette arborescence représente la structure complète du projet X-me, incluant tous les fichiers et dossiers. diff --git a/sample.config.toml b/sample.config.toml deleted file mode 100644 index 50ba95d..0000000 --- a/sample.config.toml +++ /dev/null @@ -1,14 +0,0 @@ -[GENERAL] -PORT = 3001 # Port to run the server on -SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" -KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") - -[API_KEYS] -OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef -GROQ = "" # Groq API key - gsk_1234567890abcdef1234567890abcdef -ANTHROPIC = "" # Anthropic API key - sk-ant-1234567890abcdef1234567890abcdef -GEMINI = "" # Gemini API key - sk-1234567890abcdef1234567890abcdef - -[API_ENDPOINTS] -SEARXNG = "http://localhost:32768" # SearxNG API URL -OLLAMA = "" # Ollama API URL - http://host.docker.internal:11434 \ No newline at end of file diff --git a/searxng/settings.yml b/searxng/settings.yml index 54d27c4..7b4a323 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -4,14 +4,79 @@ general: instance_name: 'searxng' search: + # Sources de recherche spécialisées + engines: + - name: legifrance + enabled: true + weight: 3 + - name: service_public + enabled: true + weight: 3 + - name: journal_officiel + enabled: true + weight: 2 + - name: urssaf + enabled: true + weight: 2 + - name: cci + enabled: true + weight: 1 + - name: conseil_etat + enabled: true + weight: 1 + - name: google_images + enabled: true + weight: 2 + - name: bing_images + enabled: true + weight: 2 + - name: wolframalpha + enabled: true + weight: 1 + + # Paramètres de recherche autocomplete: 'google' + language: 'fr' # ou 'en' selon votre marché cible formats: - html - json + - csv + - pdf + + # Filtres spécialisés + filters: + - type: 'time_range' + default: 'year' # Garder car pertinent pour la législation récente + - type: 'legal_type' + options: + - 'loi' + - 'decret' + - 'arrete' + - 'circulaire' + - type: 'jurisdiction' + options: + - 'national' + - 'regional' + - 'european' + - type: 'source' + options: + - 'legifrance' + - 'service_public' + - 'urssaf' + - 'cci' + + # Paramètres de résultats + results: + max_pages: 10 + safe_search: 0 + categories: + - jurisprudence + - professional + - business + - legal + - entreprise + - sociéte + - images server: - secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} - -engines: - - name: wolframalpha - disabled: false + secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} \ No newline at end of file diff --git a/src/app.ts b/src/app.ts index 96b3a0c..3517253 100644 --- a/src/app.ts +++ b/src/app.ts @@ -5,6 +5,7 @@ import http from 'http'; import routes from './routes'; import { getPort } from './config'; import logger from './utils/logger'; +import imagesRouter from './routes/images'; const port = getPort(); @@ -23,6 +24,8 @@ app.get('/api', (_, res) => { res.status(200).json({ status: 'ok' }); }); +app.use('/api/images', imagesRouter); + server.listen(port, () => { logger.info(`Server is running on port ${port}`); }); diff --git a/src/chains/expertSearchAgent.ts b/src/chains/expertSearchAgent.ts new file mode 100644 index 0000000..034fc84 --- /dev/null +++ b/src/chains/expertSearchAgent.ts @@ -0,0 +1,235 @@ +import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { + RunnableLambda, + RunnableMap, + RunnableSequence, +} from '@langchain/core/runnables'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { BaseMessage } from '@langchain/core/messages'; +import { supabase } from '../db/supabase'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import { Expert, ExpertSearchRequest, ExpertSearchResponse } from '../types/types'; + +type ExpertSearchChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const ExpertSearchChainPrompt = ` +Vous êtes un agent spécialisé dans l'analyse et la recherche d'experts professionnels. Votre rôle est d'interpréter les demandes des utilisateurs et d'extraire les informations essentielles pour trouver l'expert le plus pertinent. + +OBJECTIF : +Analyser la requête pour identifier précisément : +1. Le domaine d'expertise recherché +2. La localisation souhaitée (si mentionnée) + +RÈGLES D'EXTRACTION : +- Pour l'EXPERTISE : + * Identifier le domaine principal (comptabilité, droit, marketing, etc.) + * Reconnaître les spécialisations (droit des affaires, marketing digital, etc.) + * Nettoyer les mots parasites (expert, spécialiste, professionnel, etc.) + +- Pour la VILLE : + * Si mentionnée + * Extraire la ville mentionnée + * Ignorer si non spécifiée + * Standardiser le format (tout en minuscules) + +FORMAT DE RÉPONSE STRICT : +Répondre en deux lignes exactement : +expertise: [domaine d'expertise] +ville: [ville si mentionnée] + +EXEMPLES D'ANALYSE : + +1. "Je cherche un expert comptable sur Paris" +expertise: comptabilité +ville: paris + +2. "Il me faudrait un avocat spécialisé en droit des affaires à Lyon" +expertise: droit des affaires +ville: lyon + +Conversation précédente : +{chat_history} + +Requête actuelle : {query} + +Principe de recherche d'expert : +- Pour toute recherche d'expert, extraire UNIQUEMENT : + * L'expertise demandée + * La ville (si mentionnée) + +- Mots déclencheurs à reconnaître : + * "cherche un expert/spécialiste/consultant" + * "besoin d'un professionnel" + * "recherche quelqu'un pour" + * "qui peut m'aider avec" + + +\` +Je cherche un expert comptable + +expertise: comptabilité +ville: +\` + +\` +J'ai besoin d'un spécialiste en droit des sociétés à Lyon + +expertise: droit des sociétés +ville: lyon +\` + +\` +Qui peut m'aider avec ma comptabilité sur Paris ? + +expertise: comptabilité +ville: paris +\` + +`; + +const ExpertAnalysisPrompt = ` +Vous devez générer une synthèse des experts trouvés en vous basant UNIQUEMENT sur les données fournies. + +Contexte de la recherche : {query} + +Experts trouvés (à utiliser EXCLUSIVEMENT) : +{experts} + +Format de la synthèse : +🎯 Synthèse de la recherche +[Résumé bref de la demande] + +💫 Experts disponibles : +[Pour chaque expert trouvé dans les données :] +- [Prénom Nom] à [Ville] + Expertise : [expertises] + Tarif : [tarif]€ + [Point clé de la biographie] + +⚠️ IMPORTANT : N'inventez PAS d'experts. Utilisez UNIQUEMENT les données fournies. +`; + +const strParser = new StringOutputParser(); + +// Fonction pour convertir les données de l'expert +const convertToExpert = (data: any): Expert => { + return { + id: data.id, + id_expert: data.id_expert || '', + nom: data.nom, + prenom: data.prenom, + adresse: data.adresse || '', + pays: data.pays, + ville: data.ville, + expertises: data.expertises, + specialite: data.specialite || data.expertises?.[0] || '', + biographie: data.biographie, + tarif: data.tarif || 0, + services: data.services, + created_at: data.created_at, + image_url: data.image_url + }; +}; + +const createExpertSearchChain = (llm: BaseChatModel) => { + return RunnableSequence.from([ + RunnableMap.from({ + chat_history: (input: ExpertSearchChainInput) => { + return formatChatHistoryAsString(input.chat_history || []); + }, + query: (input: ExpertSearchChainInput) => { + return input.query || ''; + }, + }), + PromptTemplate.fromTemplate(ExpertSearchChainPrompt), + llm, + strParser, + RunnableLambda.from(async (response: string) => { + try { + // Extraire expertise et ville avec gestion des erreurs + const lines = response.split('\n').filter(line => line.trim() !== ''); + const expertise = lines[0]?.replace('expertise:', '')?.trim() || ''; + const ville = lines[1]?.replace('ville:', '')?.trim() || ''; + + if (!expertise) { + return { + experts: [], + synthese: "Je n'ai pas pu identifier l'expertise recherchée." + } as ExpertSearchResponse; + } + + // Rechercher les experts + let query = supabase + .from('experts') + .select('*') + .ilike('expertises', `%${expertise}%`) + .limit(3); + + if (ville) { + query = query.ilike('ville', `%${ville}%`); + } + + const { data: experts, error } = await query; + + if (error) throw error; + + if (!experts || experts.length === 0) { + return { + experts: [], + synthese: "Désolé, je n'ai pas trouvé d'experts correspondant à vos critères." + } as ExpertSearchResponse; + } + + const synthesePrompt = PromptTemplate.fromTemplate(ExpertAnalysisPrompt); + const formattedPrompt = await synthesePrompt.format({ + query: response, + experts: JSON.stringify(experts, null, 2) + }); + + const syntheseResponse = await llm.invoke(formattedPrompt); + const syntheseString = typeof syntheseResponse.content === 'string' + ? syntheseResponse.content + : JSON.stringify(syntheseResponse.content); + + return { + experts: experts.map(convertToExpert), + synthese: syntheseString + } as ExpertSearchResponse; + + } catch (error) { + console.error('❌ Erreur:', error); + return { + experts: [], + synthese: "Une erreur est survenue lors de la recherche d'experts." + } as ExpertSearchResponse; + } + }), + ]); +}; + +const handleExpertSearch = async (input: ExpertSearchRequest, llm: BaseChatModel) => { + try { + // 1. Analyse de la requête via LLM pour extraire l'expertise et la ville + const expertSearchChain = createExpertSearchChain(llm); + const result = await expertSearchChain.invoke({ + query: input.query, + chat_history: input.chat_history || [] + }) as ExpertSearchResponse; // Le résultat est déjà une ExpertSearchResponse + + // Pas besoin de retraiter la réponse car createExpertSearchChain fait déjà tout le travail + return result; + + } catch (error) { + console.error('❌ Erreur dans handleExpertSearch:', error); + return { + experts: [], + synthese: "Une erreur est survenue." + }; + } +}; + +export default handleExpertSearch; \ No newline at end of file diff --git a/src/chains/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts index 167019f..8dabe20 100644 --- a/src/chains/imageSearchAgent.ts +++ b/src/chains/imageSearchAgent.ts @@ -11,25 +11,35 @@ import { searchSearxng } from '../lib/searxng'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const imageSearchChainPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. -You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. +Vous êtes un expert en recherche d'images pour illustrer des contenus business. Votre objectif est de trouver des images élégantes et modernes qui illustrent le sujet de manière indirecte et esthétique. -Example: -1. Follow up question: What is a cat? -Rephrased: A cat +Principes à suivre : +- Privilégier des images lifestyle et esthétiques +- Éviter les schémas, graphiques et images trop techniques +- Favoriser des images avec des personnes dans des situations naturelles +- Choisir des images lumineuses et positives +- Préférer des compositions simples et épurées -2. Follow up question: What is a car? How does it works? -Rephrased: Car working +Format de la requête : +- 2-3 mots-clés maximum +- Ajouter "lifestyle" ou "modern" pour améliorer la qualité +- Toujours ajouter "professional" pour le contexte business -3. Follow up question: How does an AC work? -Rephrased: AC working +Exemples : +1. Question : "Comment créer une entreprise ?" +Requête : "entrepreneur lifestyle modern" -Conversation: +2. Question : "Qu'est-ce qu'un business plan ?" +Requête : "business meeting professional" + +3. Question : "Comment faire sa comptabilité ?" +Requête : "office work lifestyle" + +Conversation : {chat_history} -Follow up question: {query} -Rephrased question: -`; +Question : {query} +Requête de recherche d'image :`; type ImageSearchChainInput = { chat_history: BaseMessage[]; @@ -53,11 +63,12 @@ const createImageSearchChain = (llm: BaseChatModel) => { strParser, RunnableLambda.from(async (input: string) => { const res = await searchSearxng(input, { - engines: ['bing images', 'google images'], + engines: ['google_images', 'bing_images'], + language: 'fr', + categories: ['images'], }); - + const images = []; - res.results.forEach((result) => { if (result.img_src && result.url && result.title) { images.push({ @@ -67,7 +78,7 @@ const createImageSearchChain = (llm: BaseChatModel) => { }); } }); - + return images.slice(0, 10); }), ]); @@ -81,4 +92,4 @@ const handleImageSearch = ( return imageSearchChain.invoke(input); }; -export default handleImageSearch; +export default handleImageSearch; \ No newline at end of file diff --git a/src/chains/legalSearchAgent.ts b/src/chains/legalSearchAgent.ts new file mode 100644 index 0000000..c7a5a33 --- /dev/null +++ b/src/chains/legalSearchAgent.ts @@ -0,0 +1,113 @@ +import { + RunnableSequence, + RunnableMap, + RunnableLambda, + } from '@langchain/core/runnables'; + import { PromptTemplate } from '@langchain/core/prompts'; + import formatChatHistoryAsString from '../utils/formatHistory'; + import { BaseMessage } from '@langchain/core/messages'; + import { StringOutputParser } from '@langchain/core/output_parsers'; + import { searchSearxng } from '../lib/searxng'; + import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; + + const legalSearchChainPrompt = ` + Vous êtes un assistant juridique expert spécialisé dans la recherche documentaire légale française. Votre rôle est d'analyser la question de l'utilisateur et de générer une requête de recherche optimisée. + + Contexte de la conversation : + {chat_history} + + Question actuelle : {query} + + Instructions détaillées : + 1. Analysez précisément : + - Le domaine juridique spécifique (droit du travail, droit des sociétés, etc.) + - Le type de document recherché (loi, décret, jurisprudence, etc.) + - Les points clés de la problématique + + 2. Construisez une requête qui inclut : + - Les termes juridiques exacts (articles de code, références légales) + - Les mots-clés techniques appropriés + - Les synonymes pertinents + - La période temporelle si pertinente (loi récente, modifications) + + 3. Priorisez les sources selon la hiérarchie : + - Codes et lois : Légifrance + - Information officielle : Service-public.fr + - Publications : Journal-officiel + - Informations pratiques : URSSAF, CCI + + Exemples de reformulation : + Question : "Comment créer une SARL ?" + → "Code commerce SARL constitution statuts gérance responsabilité associés capital social formalités légifrance service-public" + + Question : "Licenciement économique procédure" + → "Code travail licenciement économique procédure CSE PSE motif notification délais recours légifrance" + + Question : "Bail commercial résiliation" + → "Code commerce bail commercial résiliation article L145-4 congé indemnité éviction légifrance jurisprudence" + + Reformulez la question de manière précise et technique :`; + + type LegalSearchChainInput = { + chat_history: BaseMessage[]; + query: string; + }; + + const strParser = new StringOutputParser(); + + const createLegalSearchChain = (llm: BaseChatModel) => { + return RunnableSequence.from([ + RunnableMap.from({ + chat_history: (input: LegalSearchChainInput) => { + return formatChatHistoryAsString(input.chat_history); + }, + query: (input: LegalSearchChainInput) => { + return input.query; + }, + }), + PromptTemplate.fromTemplate(legalSearchChainPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + const pdfQuery = `${input} filetype:pdf`; + + const res = await searchSearxng(pdfQuery, { + engines: [ + 'legifrance', + 'journal_officiel', + 'service_public', + 'URSSAF', + 'CCI' + ], + language: 'fr', + categories: ['general', 'files'] + }); + + const documents = []; + + res.results.forEach((result) => { + if (result.url && result.title) { + documents.push({ + url: result.url, + title: result.title, + snippet: result.content || '', + source: result.url.split('/')[2] || 'unknown', + type: 'pdf' + }); + } + }); + + return documents.slice(0, 10); + }), + ]); + }; + + const handleLegalSearch = ( + input: LegalSearchChainInput, + llm: BaseChatModel, + ) => { + const legalSearchChain = createLegalSearchChain(llm); + return legalSearchChain.invoke(input); + }; + + export default handleLegalSearch; \ No newline at end of file diff --git a/src/chains/rag_document_upload.ts b/src/chains/rag_document_upload.ts new file mode 100644 index 0000000..32b4681 --- /dev/null +++ b/src/chains/rag_document_upload.ts @@ -0,0 +1,292 @@ +import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; +import { Document } from '@langchain/core/documents'; +import { Embeddings } from '@langchain/core/embeddings'; +import { Chroma } from '@langchain/community/vectorstores/chroma'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { RunnableSequence, RunnableMap } from '@langchain/core/runnables'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import { BaseMessage } from '@langchain/core/messages'; + +// Type local pour la chaîne de recherche +type SearchInput = { + query: string; + chat_history: BaseMessage[]; + type?: string; +}; + +export class RAGDocumentChain { + private vectorStore: Chroma | null = null; + private textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 200, + separators: ["\n\n", "\n", ".", "!", "?", ";", ":", " ", ""], + keepSeparator: true, + lengthFunction: (text) => text.length + }); + + // Add chunk preprocessing + private preprocessChunk(text: string): string { + return text + .replace(/\s+/g, ' ') + .replace(/\n+/g, ' ') + .trim(); + } + + // Add metadata enrichment + private enrichChunkMetadata(doc: Document): Document { + const metadata = { + ...doc.metadata, + chunk_type: 'text', + word_count: doc.pageContent.split(/\s+/).length, + processed_date: new Date().toISOString() + }; + return new Document({ + pageContent: this.preprocessChunk(doc.pageContent), + metadata + }); + } + + // Add chunk scoring + private scoreChunk(chunk: string): number { + const wordCount = chunk.split(/\s+/).length; + const sentenceCount = chunk.split(/[.!?]+/).length; + return wordCount > 10 && sentenceCount > 0 ? 1 : 0; + } + + public async initializeVectorStoreFromDocuments( + documents: Document[], + embeddings: Embeddings + ) { + try { + console.log("🔄 Préparation des documents..."); + + // Validate and preprocess documents + const validDocuments = documents + .filter(doc => doc.pageContent && doc.pageContent.trim().length > 50) + .map(doc => this.enrichChunkMetadata(doc)); + + // Split documents into chunks + const texts = await this.textSplitter.splitDocuments(validDocuments); + console.log(`📄 ${texts.length} chunks créés`); + + // Score and filter chunks + const scoredTexts = texts.filter(doc => this.scoreChunk(doc.pageContent) > 0); + console.log(`📄 ${scoredTexts.length} chunks valides après scoring`); + + // Deduplicate chunks + const uniqueTexts = this.deduplicateChunks(scoredTexts); + console.log(`📄 ${uniqueTexts.length} chunks uniques après déduplication`); + + // Initialize vector store with optimized settings + this.vectorStore = await Chroma.fromDocuments( + uniqueTexts, + embeddings, + { + collectionName: "uploaded_docs", + url: "http://chroma:8000", + collectionMetadata: { + "hnsw:space": "cosine", + "hnsw:construction_ef": 100, // Increased for better index quality + "hnsw:search_ef": 50, // Balanced for search performance + "hnsw:m": 16 // Number of connections per element + } + } + ); + + console.log("✅ VectorStore initialisé avec succès"); + return { + totalDocuments: documents.length, + validChunks: uniqueTexts.length, + averageChunkSize: this.calculateAverageChunkSize(uniqueTexts) + }; + } catch (error) { + console.error("❌ Erreur lors de l'initialisation:", error); + throw new Error(`Erreur d'initialisation du VectorStore: ${error.message}`); + } + } + + private calculateAverageChunkSize(chunks: Document[]): number { + if (chunks.length === 0) return 0; + const totalLength = chunks.reduce((sum, doc) => sum + doc.pageContent.length, 0); + return Math.round(totalLength / chunks.length); + } + + private deduplicateChunks(chunks: Document[]): Document[] { + const seen = new Set(); + return chunks.filter(chunk => { + const normalized = chunk.pageContent + .toLowerCase() + .replace(/\s+/g, ' ') + .trim(); + + if (seen.has(normalized)) { + return false; + } + seen.add(normalized); + return true; + }); + } + + public async searchSimilarDocuments(query: string, limit: number = 5) { + if (!this.vectorStore) { + console.warn("⚠️ VectorStore non initialisé"); + return []; + } + + try { + console.log("🔍 Recherche pour:", query); + + const initialResults = await this.vectorStore.similaritySearch( + query, + limit * 2, + { + filter: { source: { $exists: true } }, + minScore: 0.7 + } + ); + + const scoredResults = initialResults + .filter(doc => doc.pageContent.trim().length > 50) + .map(doc => ({ + document: doc, + score: this.calculateRelevanceScore(query, doc.pageContent) + })) + .sort((a, b) => b.score - a.score) + .slice(0, limit) + .map(item => { + const doc = item.document; + const pageNumber = doc.metadata.page_number || doc.metadata.pageNumber || 1; + const title = doc.metadata.title || 'Document'; + const source = doc.metadata.source; + + // Préparer le texte à surligner + const searchText = doc.pageContent + .substring(0, 200) + .replace(/[\n\r]+/g, ' ') + .trim(); + + return new Document({ + pageContent: doc.pageContent, + metadata: { + title: title, + pageNumber: pageNumber, + source: source, + type: doc.metadata.type || 'uploaded', + searchText: searchText, + url: source ? + `/api/uploads/${source}/view?page=${pageNumber}&search=${encodeURIComponent(searchText)}` : + undefined + } + }); + }); + + const mergedResults = this.mergeRelatedChunks(scoredResults); + console.log(`📄 ${mergedResults.length} documents pertinents trouvés après reranking`); + return mergedResults; + } catch (error) { + console.error("❌ Erreur de recherche:", error); + return []; + } + } + + private calculateRelevanceScore(query: string, content: string): number { + const normalizedQuery = query.toLowerCase(); + const normalizedContent = content.toLowerCase(); + + // Basic relevance scoring based on multiple factors + let score = 0; + + // Term frequency + const queryTerms = normalizedQuery.split(/\s+/); + queryTerms.forEach(term => { + const termCount = (normalizedContent.match(new RegExp(term, 'g')) || []).length; + score += termCount * 0.1; + }); + + // Exact phrase matching + if (normalizedContent.includes(normalizedQuery)) { + score += 1; + } + + // Content length penalty (prefer shorter, more focused chunks) + const lengthPenalty = Math.max(0, 1 - (content.length / 5000)); + score *= (1 + lengthPenalty); + + return score; + } + + private mergeRelatedChunks(documents: Document[]): Document[] { + const merged: { [key: string]: Document } = {}; + + documents.forEach(doc => { + const source = doc.metadata?.source || ''; + const page = doc.metadata?.pageNumber || 1; + const key = `${source}-${page}`; + + if (!merged[key]) { + merged[key] = doc; + } else { + const existingDoc = merged[key]; + merged[key] = new Document({ + pageContent: `${existingDoc.pageContent}\n\n${doc.pageContent}`, + metadata: { + ...existingDoc.metadata, + searchText: existingDoc.metadata.searchText + } + }); + } + }); + + return Object.values(merged); + } + + public createSearchChain(llm: BaseChatModel) { + return RunnableSequence.from([ + RunnableMap.from({ + query: (input: SearchInput) => input.query, + chat_history: (input: SearchInput) => formatChatHistoryAsString(input.chat_history), + context: async (input: SearchInput) => { + const docs = await this.searchSimilarDocuments(input.query); + return docs.map((doc, i) => { + const source = doc.metadata?.source || 'Document'; + const title = doc.metadata?.title || ''; + const pageNumber = doc.metadata?.pageNumber; + const url = doc.metadata?.url; + + let sourceInfo = `Source: ${title || source}`; + if (pageNumber) sourceInfo += ` (page ${pageNumber})`; + if (url) sourceInfo += `\nURL: ${url}`; + + return `[Source ${i + 1}] ${doc.pageContent}\n${sourceInfo}`; + }).join("\n\n"); + } + }), + PromptTemplate.fromTemplate(` + Tu es un assistant expert qui répond aux questions en se basant uniquement sur le contexte fourni. + Historique de la conversation: + {chat_history} + + Contexte disponible: + {context} + + Question: {query} + + Instructions: + 1. Réponds uniquement en te basant sur le contexte fourni + 2. Si la réponse n'est pas dans le contexte, dis-le clairement + 3. Cite les sources pertinentes en utilisant [Source X] + 4. Sois précis et concis + + Réponse: + `), + llm, + new StringOutputParser() + ]); + } + + public isInitialized(): boolean { + return this.vectorStore !== null; + } +} \ No newline at end of file diff --git a/src/config.ts b/src/config.ts index 001c259..3e73931 100644 --- a/src/config.ts +++ b/src/config.ts @@ -15,10 +15,12 @@ interface Config { GROQ: string; ANTHROPIC: string; GEMINI: string; + SUPABASE: string; }; API_ENDPOINTS: { SEARXNG: string; OLLAMA: string; + SUPABASE_URL: string; }; } @@ -46,9 +48,15 @@ export const getAnthropicApiKey = () => loadConfig().API_KEYS.ANTHROPIC; export const getGeminiApiKey = () => loadConfig().API_KEYS.GEMINI; +export const getSupabaseKey = () => + process.env.SUPABASE_KEY || loadConfig().API_KEYS.SUPABASE; + export const getSearxngApiEndpoint = () => process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; +export const getSupabaseUrl = () => + process.env.SUPABASE_URL || loadConfig().API_ENDPOINTS.SUPABASE_URL; + export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA; export const updateConfig = (config: RecursivePartial) => { diff --git a/src/db/supabase.ts b/src/db/supabase.ts new file mode 100644 index 0000000..255719d --- /dev/null +++ b/src/db/supabase.ts @@ -0,0 +1,29 @@ +// Dans supabase.ts +import { createClient } from '@supabase/supabase-js'; +import { getSupabaseUrl, getSupabaseKey } from '../config'; + +const supabaseUrl = getSupabaseUrl(); +const supabaseKey = getSupabaseKey(); + +if (!supabaseUrl || !supabaseKey) { + throw new Error('Missing Supabase credentials'); +} + +export const supabase = createClient(supabaseUrl, supabaseKey); + +// Fonction de test de connexion +export async function checkSupabaseConnection() { + try { + const { data, error } = await supabase + .from('experts') + .select('*') + .limit(1); + + if (error) throw error; + console.log('✅ Connexion Supabase établie avec succès'); + return true; + } catch (error) { + console.error('❌ Erreur de connexion Supabase:', error); + return false; + } +} \ No newline at end of file diff --git a/src/lib/outputParsers/imageOutputParser.ts b/src/lib/outputParsers/imageOutputParser.ts new file mode 100644 index 0000000..c328206 --- /dev/null +++ b/src/lib/outputParsers/imageOutputParser.ts @@ -0,0 +1,26 @@ +import { BaseOutputParser } from "@langchain/core/output_parsers"; + +export interface ImageSearchResult { + query: string; + context?: string; +} + +class ImageOutputParser extends BaseOutputParser { + lc_namespace = ['langchain', 'output_parsers', 'image_output_parser']; + + async parse(text: string): Promise { + const parts = text.split('IMAGE:'); + return { + query: parts[1]?.trim() || '', + context: parts[0].replace('RÉSUMÉ:', '').trim() + }; + } + + getFormatInstructions(): string { + return `Le format attendu est: +RÉSUMÉ: +IMAGE: `; + } +} + +export default ImageOutputParser; \ No newline at end of file diff --git a/src/lib/searxng.ts b/src/lib/searxng.ts index da62457..839396b 100644 --- a/src/lib/searxng.ts +++ b/src/lib/searxng.ts @@ -1,10 +1,11 @@ import axios from 'axios'; import { getSearxngApiEndpoint } from '../config'; -interface SearxngSearchOptions { - categories?: string[]; - engines?: string[]; +export interface SearxngSearchOptions { language?: string; + engines?: string[]; + categories?: string[]; + limit?: number; pageno?: number; } @@ -19,10 +20,10 @@ interface SearxngSearchResult { iframe_src?: string; } -export const searchSearxng = async ( +export async function searchSearxng( query: string, - opts?: SearxngSearchOptions, -) => { + opts: SearxngSearchOptions = {} +) { const searxngURL = getSearxngApiEndpoint(); const url = new URL(`${searxngURL}/search?format=json`); @@ -44,4 +45,4 @@ export const searchSearxng = async ( const suggestions: string[] = res.data.suggestions; return { results, suggestions }; -}; +} diff --git a/src/prompts/webSearch.ts b/src/prompts/webSearch.ts index d8269c8..813f6ab 100644 --- a/src/prompts/webSearch.ts +++ b/src/prompts/webSearch.ts @@ -1,46 +1,85 @@ export const webSearchRetrieverPrompt = ` -You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. -If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). -If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. -You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. +Tu es X-me une IA analyste spécialisée dans l'entrepreneuriat et le développement des TPE/PME et artisans, avec une expertise particulière en droit des affaires. Votre rôle est de reformuler les questions pour cibler les textes juridiques et réglementaires pertinents. -There are several examples attached for your reference inside the below \`examples\` XML block +### Sources Juridiques Prioritaires +1. **Codes**: + - Code civil + - Code de commerce + - Code du travail + - Code de la consommation + - Code général des impôts + +2. **Textes Réglementaires**: + - Lois + - Décrets + - Arrêtés + - Circulaires + +3. **Jurisprudence**: + - Décisions de la Cour de cassation + - Arrêts du Conseil d'État + - Décisions des Cours d'appel + +4. **Sources Officielles**: + - Journal officiel + - Bulletins officiels + - Documentation administrative + +Pour chaque question, vous devez : +1. Identifier les textes juridiques applicables +2. Citer les articles précis des codes concernés +3. Rechercher la jurisprudence pertinente +4. Vérifier les dernières modifications législatives + +### Sources d'Information Prioritaires +1. **LegalAI**: Légifrance, CNIL, URSSAF pour les aspects juridiques +2. **FinanceAI**: BPI France, Impots.gouv.fr, INSEE pour la finance +3. **GrowthAI**: CREDOC, CMA France pour le développement commercial +4. **MatchAI**: Annuaires des Experts-Comptables, APEC pour l'expertise +5. **StrategyAI**: France Stratégie, Bpifrance Le Lab pour la stratégie +6. **PeopleAI**: DARES, Pôle emploi pour les RH +7. **ToolBoxAI**: CCI France, LegalPlace pour les outils pratiques +8. **TechAI**: INRIA, French Tech pour l'innovation +9. **StartAI**: Portail Auto-Entrepreneur, CCI pour la création +10. **MasterAI**: Data.gouv.fr, Eurostat pour les données centralisées + +Dans l'analyse des questions, privilégiez : +- Les aspects de création et développement d'entreprise +- Les exigences administratives et juridiques +- Les considérations financières et opérationnelles +- L'analyse de marché et la stratégie +- Le développement professionnel et la formation + +Si c'est une tâche simple d'écriture ou un salut (sauf si le salut contient une question après) comme Hi, Hello, How are you, etc. alors vous devez retourner \`not_needed\` comme réponse (C'est parce que le LLM ne devrait pas chercher des informations sur ce sujet). +Si l'utilisateur demande une question d'un certain URL ou veut que vous résumiez un PDF ou une page web (via URL) vous devez retourner les liens à l'intérieur du bloc \`links\` XML et la question à l'intérieur du bloc \`question\` XML. Si l'utilisateur veut que vous résumiez la page web ou le PDF vous devez retourner \`summarize\` à l'intérieur du bloc \`question\` XML en remplacement de la question et le lien à résumer dans le bloc \`links\` XML. +Vous devez toujours retourner la question reformulée à l'intérieur du bloc \`question\` XML, si il n'y a pas de liens dans la question de suivi alors ne pas insérer un bloc \`links\` XML dans votre réponse. + +Il y a plusieurs exemples attachés pour votre référence à l'intérieur du bloc \`examples\` XML -1. Follow up question: What is the capital of France -Rephrased question:\` +1. Question de suivi : Comment créer mon entreprise ? +Question reformulée :\` -Capital of france +Étapes et conditions pour créer une entreprise en France, procédures administratives et aides disponibles selon les sources StartAI (CCI, Auto-entrepreneur) et LegalAI (URSSAF) \` -2. Hi, how are you? -Rephrased question\` +2. Question de suivi : Quels financements sont disponibles ? +Question reformulée :\` + +Options de financement et aides financières disponibles pour les TPE/PME et artisans en France selon FinanceAI (BPI France) et MasterAI (Data.gouv.fr) + +\` + +3. Question de suivi : Bonjour, comment allez-vous ? +Question reformulée :\` not_needed \` -3. Follow up question: What is Docker? -Rephrased question: \` - -What is Docker - -\` - -4. Follow up question: Can you tell me what is X from https://example.com -Rephrased question: \` - -Can you tell me what is X? - - - -https://example.com - -\` - -5. Follow up question: Summarize the content from https://example.com -Rephrased question: \` +4. Question de suivi : Pouvez-vous analyser ce business plan sur https://example.com ? +Question reformulée :\` summarize @@ -51,27 +90,39 @@ https://example.com \` -Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. - {chat_history} -Follow up question: {query} -Rephrased question: +Question de suivi : {query} +Question reformulée : `; export const webSearchResponsePrompt = ` - You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses. + Vous êtes X-me, une IA experte en conseil aux entreprises, spécialisée dans l'accompagnement des TPE, PME et artisans. Votre expertise couvre la création d'entreprise, le développement commercial, la gestion et le conseil stratégique. Vous excellez dans l'analyse des informations du marché et fournissez des conseils pratiques et applicables. - Your task is to provide answers that are: - - **Informative and relevant**: Thoroughly address the user's query using the given context. - - **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically. - - **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights. - - **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included. - - **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable. + ### Sources d'Information Prioritaires + 1. **LegalAI (Administratif & Juridique)**: + - Légifrance, CNIL, URSSAF + - Journal officiel, Cours et tribunaux - ### Formatting Instructions + Vos réponses doivent être : + - **Orientées Business**: Prioriser les informations pertinentes pour les entrepreneurs, dirigeants de TPE/PME et artisans + - **Pratiques et Actionnables**: Fournir des conseils concrets et des solutions réalisables + - **Contextualisées**: Prendre en compte les défis et contraintes spécifiques des petites entreprises + - **Adaptées aux Ressources**: Proposer des solutions tenant compte des moyens limités des petites structures + - **Conformes à la Réglementation**: Inclure les aspects réglementaires et administratifs pertinents pour les entreprises françaises + + ### Domaines d'Expertise + - Création et Développement d'Entreprise + - Démarches Administratives et Juridiques + - Gestion Financière et Recherche de Financements + - Analyse de Marché et Stratégie + - Gestion Opérationnelle et des Ressources + - Transformation Numérique + - Formation Professionnelle et Développement des Compétences + + ### Instructions de Formatage - **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate. - **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience. - **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability. @@ -79,7 +130,7 @@ export const webSearchResponsePrompt = ` - **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title. - **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate. - ### Citation Requirements + ### Citations Requises - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`. - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]." - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context. @@ -87,20 +138,17 @@ export const webSearchResponsePrompt = ` - Always prioritize credibility and accuracy by linking all statements back to their respective context sources. - Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation. - ### Special Instructions - - If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity. - - If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search. - - If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query. - - ### Example Output - - Begin with a brief introduction summarizing the event or query topic. - - Follow with detailed sections under clear headings, covering all aspects of the query if possible. - - Provide explanations or historical context as needed to enhance understanding. - - End with a conclusion or overall perspective if relevant. + ### Instructions Spéciales + - Pour les sujets techniques ou administratifs, fournir des guides étape par étape adaptés aux non-experts + - Pour les solutions ou outils, considérer les contraintes budgétaires des petites entreprises + - Inclure les informations sur les aides et dispositifs de soutien disponibles + - Pour la réglementation, préciser si elle s'applique spécifiquement aux artisans, TPE ou PME + - Mentionner les organisations professionnelles ou ressources pertinentes {context} - Current date & time in ISO format (UTC timezone) is: {date}. + Date et heure actuelles au format ISO (fuseau UTC) : {date}. `; + diff --git a/src/routes/discover.ts b/src/routes/discover.ts index b6f8ff9..4385364 100644 --- a/src/routes/discover.ts +++ b/src/routes/discover.ts @@ -1,48 +1,22 @@ -import express from 'express'; -import { searchSearxng } from '../lib/searxng'; -import logger from '../utils/logger'; +import { Router } from 'express'; +import { supabase } from '../db/supabase'; -const router = express.Router(); +const router = Router(); -router.get('/', async (req, res) => { +// Route pour récupérer les experts +router.get('/experts', async (req, res) => { try { - const data = ( - await Promise.all([ - searchSearxng('site:businessinsider.com AI', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:www.exchangewire.com AI', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:yahoo.com AI', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:businessinsider.com tech', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:www.exchangewire.com tech', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:yahoo.com tech', { - engines: ['bing news'], - pageno: 1, - }), - ]) - ) - .map((result) => result.results) - .flat() - .sort(() => Math.random() - 0.5); - - return res.json({ blogs: data }); - } catch (err: any) { - logger.error(`Error in discover route: ${err.message}`); - return res.status(500).json({ message: 'An error has occurred' }); + const { data, error } = await supabase + .from('experts') + .select('*'); + + if (error) throw error; + + res.json(data); + } catch (error) { + console.error('Error fetching experts:', error); + res.status(500).json({ error: error.message }); } }); -export default router; +export default router; \ No newline at end of file diff --git a/src/routes/experts.ts b/src/routes/experts.ts new file mode 100644 index 0000000..eadd521 --- /dev/null +++ b/src/routes/experts.ts @@ -0,0 +1,114 @@ +import express from 'express'; +import handleExpertSearch from '../chains/expertSearchAgent'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { getAvailableChatModelProviders } from '../lib/providers'; +import { HumanMessage, AIMessage } from '@langchain/core/messages'; +import logger from '../utils/logger'; +import { ChatOpenAI } from '@langchain/openai'; +import { ExpertSearchRequest } from '../types/types'; +import crypto from 'crypto'; + +const router = express.Router(); + +interface ChatModel { + provider: string; + model: string; + customOpenAIBaseURL?: string; + customOpenAIKey?: string; +} + +interface ExpertSearchBody { + query: string; + chatHistory: any[]; + chatModel?: ChatModel; +} + +router.post('/', async (req, res) => { + try { + const body: ExpertSearchBody = req.body; + + // Conversion de l'historique du chat + const chatHistory = body.chatHistory.map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }); + + // Configuration du modèle LLM + const chatModelProviders = await getAvailableChatModelProviders(); + + const chatModelProvider = + body.chatModel?.provider || Object.keys(chatModelProviders)[0]; + const chatModel = + body.chatModel?.model || + Object.keys(chatModelProviders[chatModelProvider])[0]; + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + if ( + !body.chatModel?.customOpenAIBaseURL || + !body.chatModel?.customOpenAIKey + ) { + return res + .status(400) + .json({ message: 'Missing custom OpenAI base URL or key' }); + } + + llm = new ChatOpenAI({ + modelName: body.chatModel.model, + openAIApiKey: body.chatModel.customOpenAIKey, + temperature: 0.7, + configuration: { + baseURL: body.chatModel.customOpenAIBaseURL, + }, + }) as unknown as BaseChatModel; + } else if ( + chatModelProviders[chatModelProvider] && + chatModelProviders[chatModelProvider][chatModel] + ) { + llm = chatModelProviders[chatModelProvider][chatModel] + .model as unknown as BaseChatModel | undefined; + } + + if (!llm) { + return res.status(400).json({ message: 'Invalid model selected' }); + } + + // Génération des IDs uniques + const messageId = crypto.randomBytes(7).toString('hex'); + const chatId = crypto.randomBytes(7).toString('hex'); + + // Préparation de la requête + const expertSearchRequest: ExpertSearchRequest = { + query: body.query, + chat_history: chatHistory, + messageId, + chatId + }; + + // Recherche d'experts + const expertResults = await handleExpertSearch(expertSearchRequest, llm); + console.log("🔍 Experts trouvés:", expertResults.experts.length); + + // Format unifié de la réponse + res.status(200).json({ + type: 'expert_results', + messageId, + data: { + experts: expertResults.experts, + synthese: expertResults.synthese, + query: body.query + } + }); + + } catch (err) { + console.error("🔍 Erreur dans la recherche d'experts:", err); + res.status(500).json({ message: 'Une erreur est survenue.' }); + logger.error(`Erreur dans la recherche d'experts: ${err.message}`); + } +}); + +export default router; \ No newline at end of file diff --git a/src/routes/images.ts b/src/routes/images.ts index efa095a..644a39f 100644 --- a/src/routes/images.ts +++ b/src/routes/images.ts @@ -24,6 +24,7 @@ interface ImageSearchBody { router.post('/', async (req, res) => { try { let body: ImageSearchBody = req.body; + console.log("📸 Requête de recherche d'images reçue:", body.query); const chatHistory = body.chatHistory.map((msg: any) => { if (msg.role === 'user') { @@ -73,6 +74,7 @@ router.post('/', async (req, res) => { return res.status(400).json({ message: 'Invalid model selected' }); } + const images = await handleImageSearch( { query: body.query, chat_history: chatHistory }, llm, diff --git a/src/routes/index.ts b/src/routes/index.ts index cb2c915..1139514 100644 --- a/src/routes/index.ts +++ b/src/routes/index.ts @@ -6,8 +6,10 @@ import modelsRouter from './models'; import suggestionsRouter from './suggestions'; import chatsRouter from './chats'; import searchRouter from './search'; -import discoverRouter from './discover'; +import newsRouter from './news'; import uploadsRouter from './uploads'; +import legalRouter from './legal'; +import discoverRouter from './discover'; const router = express.Router(); @@ -18,7 +20,9 @@ router.use('/models', modelsRouter); router.use('/suggestions', suggestionsRouter); router.use('/chats', chatsRouter); router.use('/search', searchRouter); -router.use('/discover', discoverRouter); +router.use('/news', newsRouter); router.use('/uploads', uploadsRouter); +router.use('/legal', legalRouter); +router.use('/discover', discoverRouter); export default router; diff --git a/src/routes/legal.ts b/src/routes/legal.ts new file mode 100644 index 0000000..850b843 --- /dev/null +++ b/src/routes/legal.ts @@ -0,0 +1,88 @@ +import express from 'express'; +import handleLegalSearch from '../chains/legalSearchAgent'; // Nouveau nom +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { getAvailableChatModelProviders } from '../lib/providers'; +import { HumanMessage, AIMessage } from '@langchain/core/messages'; +import logger from '../utils/logger'; +import { ChatOpenAI } from '@langchain/openai'; + +const router = express.Router(); + +interface ChatModel { + provider: string; + model: string; + customOpenAIBaseURL?: string; + customOpenAIKey?: string; +} + +interface LegalSearchBody { // Renommé + query: string; + chatHistory: any[]; + chatModel?: ChatModel; +} + +router.post('/', async (req, res) => { + try { + let body: LegalSearchBody = req.body; + + const chatHistory = body.chatHistory.map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }); + + const chatModelProviders = await getAvailableChatModelProviders(); + + const chatModelProvider = + body.chatModel?.provider || Object.keys(chatModelProviders)[0]; + const chatModel = + body.chatModel?.model || + Object.keys(chatModelProviders[chatModelProvider])[0]; + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + if ( + !body.chatModel?.customOpenAIBaseURL || + !body.chatModel?.customOpenAIKey + ) { + return res + .status(400) + .json({ message: 'Missing custom OpenAI base URL or key' }); + } + + llm = new ChatOpenAI({ + modelName: body.chatModel.model, + openAIApiKey: body.chatModel.customOpenAIKey, + temperature: 0.7, + configuration: { + baseURL: body.chatModel.customOpenAIBaseURL, + }, + }) as unknown as BaseChatModel; + } else if ( + chatModelProviders[chatModelProvider] && + chatModelProviders[chatModelProvider][chatModel] + ) { + llm = chatModelProviders[chatModelProvider][chatModel] + .model as unknown as BaseChatModel | undefined; + } + + if (!llm) { + return res.status(400).json({ message: 'Invalid model selected' }); + } + + const legalDocuments = await handleLegalSearch( // Renommé + { query: body.query, chat_history: chatHistory }, + llm, + ); + + res.status(200).json({ documents: legalDocuments }); // Modifié la réponse + } catch (err) { + res.status(500).json({ message: 'An error has occurred.' }); + logger.error(`Error in legal search: ${err.message}`); // Mis à jour le message d'erreur + } +}); + +export default router; \ No newline at end of file diff --git a/src/routes/legifrance.ts.bak b/src/routes/legifrance.ts.bak new file mode 100644 index 0000000..0a41e7b --- /dev/null +++ b/src/routes/legifrance.ts.bak @@ -0,0 +1,152 @@ +import express from 'express'; +import { RAGDocumentChain, handleLegiFranceSearch } from '../chains/rag_document_upload'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { getAvailableChatModelProviders } from '../lib/providers'; +import { HumanMessage, AIMessage } from '@langchain/core/messages'; +import logger from '../utils/logger'; +import { ChatOpenAI } from '@langchain/openai'; +import crypto from 'crypto'; +import { Document } from '@langchain/core/schema/document'; +import { OpenAIEmbeddings } from '@langchain/openai'; + +const router = express.Router(); +const ragChain = new RAGDocumentChain(); + +interface ChatModel { + provider: string; + model: string; + customOpenAIBaseURL?: string; + customOpenAIKey?: string; +} + +interface LegiFranceSearchBody { + query: string; + chatHistory: any[]; + chatModel?: ChatModel; + urls?: string[]; +} + +interface LegiFranceRequest { + query: string; + // autres propriétés si nécessaires +} + +router.post('/initialize', async (req, res) => { + try { + const { urls } = req.body; + if (!Array.isArray(urls)) { + return res.status(400).json({ error: "URLs must be an array" }); + } + + // Créer des documents à partir des URLs + const docs = urls.map(url => new Document({ + pageContent: "", // À remplir avec le contenu réel + metadata: { source: url } + })); + + // Initialiser les embeddings (à ajuster selon votre configuration) + const embeddings = new OpenAIEmbeddings({ + openAIApiKey: process.env.OPENAI_API_KEY, + }); + + await ragChain.initializeVectorStore(docs, embeddings); + res.json({ success: true }); + } catch (err) { + logger.error("Error initializing LegiFrance search:", err); + res.status(500).json({ error: "Failed to initialize LegiFrance search" }); + } +}); + +router.post('/search', async (req, res) => { + try { + const body: LegiFranceSearchBody = req.body; + console.log("📚 [LegiFrance] Début de la recherche avec query:", body.query); + + // Configuration du modèle LLM + const chatModelProviders = await getAvailableChatModelProviders(); + const chatModelProvider = body.chatModel?.provider || Object.keys(chatModelProviders)[0]; + const chatModel = body.chatModel?.model || Object.keys(chatModelProviders[chatModelProvider])[0]; + console.log("🤖 [LegiFrance] Modèle sélectionné:", { provider: chatModelProvider, model: chatModel }); + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + if (!body.chatModel?.customOpenAIBaseURL || !body.chatModel?.customOpenAIKey) { + return res.status(400).json({ message: 'Missing custom OpenAI base URL or key' }); + } + + llm = new ChatOpenAI({ + modelName: body.chatModel.model, + openAIApiKey: body.chatModel.customOpenAIKey, + temperature: 0.7, + configuration: { + baseURL: body.chatModel.customOpenAIBaseURL, + }, + }) as unknown as BaseChatModel; + } else if (chatModelProviders[chatModelProvider] && + chatModelProviders[chatModelProvider][chatModel]) { + llm = chatModelProviders[chatModelProvider][chatModel].model as unknown as BaseChatModel; + } + + if (!llm) { + return res.status(400).json({ message: 'Invalid model selected' }); + } + + // Génération des IDs uniques + const messageId = crypto.randomBytes(7).toString('hex'); + const chatId = crypto.randomBytes(7).toString('hex'); + + // Conversion de l'historique du chat + const chatHistory = body.chatHistory.map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }); + console.log("💬 [LegiFrance] Historique du chat converti:", chatHistory); + + console.log("🔍 [LegiFrance] Début de handleLegiFranceSearch avec:", { + query: body.query, + llmType: llm?.constructor.name, + chainStatus: ragChain ? "initialisé" : "non initialisé" + }); + + // Ajouter la recherche avec handleLegiFranceSearch + const result = await handleLegiFranceSearch( + { + query: body.query, + chat_history: chatHistory + }, + llm, + ragChain + ); + + console.log("✅ [LegiFrance] Résultat obtenu:", { + textLength: result.text?.length, + sourcesCount: result.sources?.length + }); + + // Format unifié de la réponse + res.status(200).json({ + type: 'legifrance_results', + messageId, + data: { + text: result.text, + sources: result.sources, + query: body.query + } + }); + + } catch (err) { + console.error("❌ [LegiFrance] Erreur détaillée:", { + message: err.message, + stack: err.stack, + name: err.name + }); + res.status(500).json({ message: 'Une erreur est survenue.' }); + logger.error(`Erreur dans la recherche LegiFrance: ${err.message}`); + } +}); + +export default router; \ No newline at end of file diff --git a/src/routes/news.ts b/src/routes/news.ts new file mode 100644 index 0000000..09bcf6c --- /dev/null +++ b/src/routes/news.ts @@ -0,0 +1,48 @@ +import express from 'express'; +import { searchSearxng } from '../lib/searxng'; +import logger from '../utils/logger'; + +const router = express.Router(); + +router.get('/', async (req, res) => { + try { + const data = ( + await Promise.all([ + searchSearxng('site:businessinsider.com AI', { + engines: ['bing news'], + pageno: 1, + }), + searchSearxng('site:www.exchangewire.com AI', { + engines: ['bing news'], + pageno: 1, + }), + searchSearxng('site:yahoo.com AI', { + engines: ['bing news'], + pageno: 1, + }), + searchSearxng('site:businessinsider.com tech', { + engines: ['bing news'], + pageno: 1, + }), + searchSearxng('site:www.exchangewire.com tech', { + engines: ['bing news'], + pageno: 1, + }), + searchSearxng('site:yahoo.com tech', { + engines: ['bing news'], + pageno: 1, + }), + ]) + ) + .map((result) => result.results) + .flat() + .sort(() => Math.random() - 0.5); + + return res.json({ articles: data }); + } catch (err: any) { + logger.error(`Error in news route: ${err.message}`); + return res.status(500).json({ message: 'An error has occurred' }); + } +}); + +export default router; diff --git a/src/routes/uploads.ts b/src/routes/uploads.ts index 7b063fc..ef837c4 100644 --- a/src/routes/uploads.ts +++ b/src/routes/uploads.ts @@ -9,13 +9,18 @@ import { getAvailableEmbeddingModelProviders } from '../lib/providers'; import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; -import { Document } from 'langchain/document'; +import { Document } from '@langchain/core/documents'; +import { RAGDocumentChain } from '../chains/rag_document_upload'; +import { Chroma } from "langchain/vectorstores/chroma"; const router = express.Router(); const splitter = new RecursiveCharacterTextSplitter({ - chunkSize: 500, - chunkOverlap: 100, + chunkSize: 1000, + chunkOverlap: 200, + separators: ["\n\n", "\n", ".", "!", "?", ";", ":", " ", ""], + keepSeparator: true, + lengthFunction: (text) => text.length }); const storage = multer.diskStorage({ @@ -34,6 +39,29 @@ const storage = multer.diskStorage({ const upload = multer({ storage }); +const preprocessDocument = (doc: Document): Document => { + const cleanContent = doc.pageContent + .replace(/\s+/g, ' ') + .replace(/\n+/g, ' ') + .trim(); + + return new Document({ + pageContent: cleanContent, + metadata: { + ...doc.metadata, + chunk_type: 'text', + word_count: cleanContent.split(/\s+/).length, + processed_date: new Date().toISOString() + } + }); +}; + +const scoreDocument = (doc: Document): number => { + const wordCount = doc.pageContent.split(/\s+/).length; + const sentenceCount = doc.pageContent.split(/[.!?]+/).length; + return wordCount > 10 && sentenceCount > 0 ? 1 : 0; +}; + router.post( '/', upload.fields([ @@ -43,109 +71,220 @@ router.post( ]), async (req, res) => { try { + console.log("📥 [Uploads] Début du traitement avec body:", { + embedding_model: req.body.embedding_model, + embedding_model_provider: req.body.embedding_model_provider + }); + const { embedding_model, embedding_model_provider } = req.body; if (!embedding_model || !embedding_model_provider) { - res - .status(400) - .json({ message: 'Missing embedding model or provider' }); + console.warn("⚠️ [Uploads] Modèle ou provider manquant"); + res.status(400).json({ message: 'Missing embedding model or provider' }); return; } const embeddingModels = await getAvailableEmbeddingModelProviders(); - const provider = - embedding_model_provider ?? Object.keys(embeddingModels)[0]; - const embeddingModel: Embeddings = - embedding_model ?? Object.keys(embeddingModels[provider])[0]; + console.log("🔍 [Uploads] Modèles disponibles:", Object.keys(embeddingModels)); + + const provider = embedding_model_provider ?? Object.keys(embeddingModels)[0]; + const embeddingModel: Embeddings = embedding_model ?? Object.keys(embeddingModels[provider])[0]; + + console.log("🤖 [Uploads] Modèle sélectionné:", { provider, model: embeddingModel }); let embeddingsModel: Embeddings | undefined; - - if ( - embeddingModels[provider] && - embeddingModels[provider][embeddingModel] - ) { - embeddingsModel = embeddingModels[provider][embeddingModel].model as - | Embeddings - | undefined; + if (embeddingModels[provider] && embeddingModels[provider][embeddingModel]) { + embeddingsModel = embeddingModels[provider][embeddingModel].model as Embeddings | undefined; } if (!embeddingsModel) { + console.error("❌ [Uploads] Modèle invalide"); res.status(400).json({ message: 'Invalid LLM model selected' }); return; } const files = req.files['files'] as Express.Multer.File[]; + console.log("📁 [Uploads] Fichiers reçus:", files?.map(f => ({ + name: f.originalname, + path: f.path, + type: f.mimetype + }))); + if (!files || files.length === 0) { + console.warn("⚠️ [Uploads] Aucun fichier reçu"); res.status(400).json({ message: 'No files uploaded' }); return; } + const processedDocs: Document[] = []; + const ragChain = new RAGDocumentChain(); + let totalPages = 0; + await Promise.all( files.map(async (file) => { + console.log(`📄 [Uploads] Traitement du fichier: ${file.originalname}`); let docs: Document[] = []; if (file.mimetype === 'application/pdf') { - const loader = new PDFLoader(file.path); + console.log(`📚 [Uploads] Chargement du PDF: ${file.path}`); + const loader = new PDFLoader(file.path, { + splitPages: true + }); docs = await loader.load(); - } else if ( - file.mimetype === - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' - ) { + totalPages += docs.length; + } else if (file.mimetype === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') { + console.log(`📝 [Uploads] Chargement du DOCX: ${file.path}`); const loader = new DocxLoader(file.path); docs = await loader.load(); + totalPages += docs.length; } else if (file.mimetype === 'text/plain') { + console.log(`📄 [Uploads] Chargement du TXT: ${file.path}`); const text = fs.readFileSync(file.path, 'utf-8'); - docs = [ - new Document({ - pageContent: text, - metadata: { - title: file.originalname, - }, - }), - ]; + docs = [new Document({ + pageContent: text, + metadata: { + title: file.originalname, + source: file.path, + type: 'text' + } + })]; + totalPages += 1; } - const splitted = await splitter.splitDocuments(docs); + const preprocessedDocs = docs.map(preprocessDocument); + const scoredDocs = preprocessedDocs.filter(doc => scoreDocument(doc) > 0); + + console.log(`✂️ [Uploads] Splitting du document en ${scoredDocs.length} parties valides`); + const splitted = await splitter.splitDocuments(scoredDocs); - const json = JSON.stringify({ - title: file.originalname, - contents: splitted.map((doc) => doc.pageContent), + const enrichedDocs = splitted.map((doc, index) => { + const pageNumber = Math.floor(index / (splitted.length / docs.length)) + 1; + return new Document({ + pageContent: doc.pageContent, + metadata: { + ...doc.metadata, + source: file.path, + title: file.originalname, + page_number: pageNumber, + chunk_index: index, + total_chunks: splitted.length, + file_type: file.mimetype, + search_text: doc.pageContent.substring(0, 100).trim() + } + }); }); + processedDocs.push(...enrichedDocs); + const pathToSave = file.path.replace(/\.\w+$/, '-extracted.json'); - fs.writeFileSync(pathToSave, json); - - const embeddings = await embeddingsModel.embedDocuments( - splitted.map((doc) => doc.pageContent), - ); - - const embeddingsJSON = JSON.stringify({ + const contentToSave = { title: file.originalname, - embeddings: embeddings, - }); + contents: enrichedDocs.map((doc) => ({ + content: doc.pageContent, + metadata: doc.metadata + })), + pageCount: docs.length, + processingDate: new Date().toISOString() + }; - const pathToSaveEmbeddings = file.path.replace( - /\.\w+$/, - '-embeddings.json', + fs.writeFileSync(pathToSave, JSON.stringify(contentToSave, null, 2)); + + console.log(`🧮 [Uploads] Génération des embeddings pour ${enrichedDocs.length} chunks`); + const embeddings = await embeddingsModel.embedDocuments( + enrichedDocs.map((doc) => doc.pageContent) ); - fs.writeFileSync(pathToSaveEmbeddings, embeddingsJSON); - }), + + const pathToSaveEmbeddings = file.path.replace(/\.\w+$/, '-embeddings.json'); + const embeddingsToSave = { + title: file.originalname, + embeddings: embeddings.map((embedding, index) => ({ + vector: embedding, + metadata: enrichedDocs[index].metadata + })) + }; + + fs.writeFileSync(pathToSaveEmbeddings, JSON.stringify(embeddingsToSave)); + }) ); + console.log("🔄 [Uploads] Initialisation du vectorStore avec", processedDocs.length, "documents"); + const initResult = await ragChain.initializeVectorStoreFromDocuments( + processedDocs, + embeddingsModel + ); + + console.log("✅ [Uploads] VectorStore initialisé:", initResult); + res.status(200).json({ - files: files.map((file) => { - return { - fileName: file.originalname, - fileExtension: file.filename.split('.').pop(), - fileId: file.filename.replace(/\.\w+$/, ''), - }; - }), + files: files.map((file) => ({ + fileName: file.originalname, + fileExtension: file.filename.split('.').pop(), + fileId: file.filename.replace(/\.\w+$/, ''), + stats: { + chunks: processedDocs.filter(d => d.metadata.source === file.path).length, + pages: totalPages + } + })), }); } catch (err: any) { + console.error("❌ [Uploads] Erreur:", { + message: err.message, + stack: err.stack, + name: err.name + }); logger.error(`Error in uploading file results: ${err.message}`); res.status(500).json({ message: 'An error has occurred.' }); } }, ); +router.get('/:fileId/view', async (req, res) => { + try { + const { fileId } = req.params; + const search = req.query.search as string; + const page = req.query.page as string; + + // Chercher tous les fichiers qui commencent par fileId dans le dossier uploads + const uploadsDir = path.join(process.cwd(), 'uploads'); + const files = fs.readdirSync(uploadsDir); + const pdfFile = files.find(file => file.startsWith(fileId) && file.endsWith('.pdf')); + + if (!pdfFile) { + console.error(`❌ PDF non trouvé pour l'ID: ${fileId}`); + return res.status(404).json({ error: 'Document PDF non trouvé' }); + } + + const filePath = path.join(uploadsDir, pdfFile); + console.log("📄 Envoi du fichier:", filePath); + + // Définir les headers pour le PDF + res.setHeader('Content-Type', 'application/pdf'); + res.setHeader('Content-Disposition', `inline; filename="${pdfFile}"`); + + // Ajouter les paramètres de navigation et de surlignage + if (search) { + // Nettoyer le texte de recherche + const cleanSearch = search + .replace(/[\n\r]+/g, ' ') + .trim(); + + if (cleanSearch) { + res.setHeader('X-PDF-Search', cleanSearch); + res.setHeader('X-PDF-Highlight', 'true'); + res.setHeader('X-PDF-Highlight-Color', '#FFD700'); // Or + } + } + + if (page) { + res.setHeader('X-PDF-Page', page); + } + + // Envoyer le fichier + res.sendFile(filePath); + } catch (error) { + console.error('❌ Erreur lors de la visualisation du document:', error); + res.status(500).json({ error: 'Erreur lors de la visualisation du document' }); + } +}); + export default router; diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts index b1d8114..eef8295 100644 --- a/src/search/metaSearchAgent.ts +++ b/src/search/metaSearchAgent.ts @@ -25,6 +25,14 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import { StreamEvent } from '@langchain/core/tracers/log_stream'; import { IterableReadableStream } from '@langchain/core/utils/stream'; +import handleImageSearch from '../chains/imageSearchAgent'; +import handleExpertSearch from '../chains/expertSearchAgent'; +import { Chroma } from '@langchain/community/vectorstores/chroma'; +import { RAGDocumentChain } from '../chains/rag_document_upload'; +import { SearxngSearchOptions } from '../lib/searxng'; +import { ChromaClient } from 'chromadb'; +import { OpenAIEmbeddings } from '@langchain/openai'; +import { EventEmitter } from 'events'; export interface MetaSearchAgentType { searchAndAnswer: ( @@ -38,13 +46,18 @@ export interface MetaSearchAgentType { } interface Config { - searchWeb: boolean; - rerank: boolean; - summarizer: boolean; - rerankThreshold: number; + activeEngines: string[]; queryGeneratorPrompt: string; responsePrompt: string; - activeEngines: string[]; + rerank: boolean; + rerankThreshold: number; + searchWeb: boolean; + summarizer: boolean; + searchDatabase: boolean; + provider?: string; + model?: string; + customOpenAIBaseURL?: string; + customOpenAIKey?: string; } type BasicChainInput = { @@ -52,12 +65,42 @@ type BasicChainInput = { query: string; }; -class MetaSearchAgent implements MetaSearchAgentType { +interface SearchResponse { + text: string; + sources: Array<{ + title: string; + content: string; + url?: string; + source?: string; + }>; + illustrationImage?: string; +} + +// Ajouter l'interface pour les métadonnées des documents +interface DocumentMetadata { + title?: string; + source?: string; + fileId?: string; + url?: string; // Ajout de l'url optionnelle +} + +interface SearchResult { + pageContent: string; + metadata: { + score?: number; + title?: string; + [key: string]: any; + }; +} + +export class MetaSearchAgent implements MetaSearchAgentType { private config: Config; private strParser = new StringOutputParser(); + private fileIds: string[]; constructor(config: Config) { this.config = config; + this.fileIds = []; } private async createSearchRetrieverChain(llm: BaseChatModel) { @@ -85,147 +128,140 @@ class MetaSearchAgent implements MetaSearchAgentType { return { query: '', docs: [] }; } - if (links.length > 0) { - if (question.length === 0) { - question = 'summarize'; - } + let documents: Document[] = []; - let docs = []; - - const linkDocs = await getDocumentsFromLinks({ links }); - - const docGroups: Document[] = []; - - linkDocs.map((doc) => { - const URLDocExists = docGroups.find( - (d) => - d.metadata.url === doc.metadata.url && - d.metadata.totalDocs < 10, - ); - - if (!URLDocExists) { - docGroups.push({ - ...doc, - metadata: { - ...doc.metadata, - totalDocs: 1, - }, - }); - } - - const docIndex = docGroups.findIndex( - (d) => - d.metadata.url === doc.metadata.url && - d.metadata.totalDocs < 10, - ); - - if (docIndex !== -1) { - docGroups[docIndex].pageContent = - docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; - docGroups[docIndex].metadata.totalDocs += 1; - } - }); - - await Promise.all( - docGroups.map(async (doc) => { - const res = await llm.invoke(` - You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the - text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. - If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. - - - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague. - - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query. - - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format. - - The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag. - - - 1. \` - Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers. - It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications - by using containers. - - - - What is Docker and how does it work? - - - Response: - Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application - deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in - any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed. - \` - 2. \` - The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general - relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based - on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by - Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity. - General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical - realm, including astronomy. - - - - summarize - - - Response: - The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special - relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its - relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in - 1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe. - \` - - - Everything below is the actual data you will be working with. Good luck! - - - ${question} - - - - ${doc.pageContent} - - - Make sure to answer the query in the summary. - `); - - const document = new Document({ - pageContent: res.content as string, - metadata: { - title: doc.metadata.title, - url: doc.metadata.url, - }, - }); - - docs.push(document); - }), - ); - - return { query: question, docs: docs }; - } else { + // Recherche web si activée + if (this.config.searchWeb) { const res = await searchSearxng(question, { - language: 'en', + language: 'fr', engines: this.config.activeEngines, }); - const documents = res.results.map( + documents = res.results.map( (result) => new Document({ pageContent: result.content, metadata: { title: result.title, url: result.url, + type: 'web', ...(result.img_src && { img_src: result.img_src }), }, }), ); - - return { query: question, docs: documents }; } + + // Recherche d'experts si activée + if (this.config.searchDatabase) { + try { + console.log("🔍 Recherche d'experts..."); + const expertResults = await handleExpertSearch( + { + query: question, + chat_history: [], + messageId: 'search_' + Date.now(), + chatId: 'chat_' + Date.now() + }, + llm + ); + + console.log("🔍 Experts trouvés:", expertResults.experts.length); + const expertDocs = expertResults.experts.map(expert => + new Document({ + pageContent: `Expert: ${expert.prenom} ${expert.nom} + Spécialité: ${expert.specialite} + Ville: ${expert.ville} + Tarif: ${expert.tarif}€ + Expertises: ${expert.expertises} + Services: ${JSON.stringify(expert.services)} + ${expert.biographie}`, + metadata: { + type: 'expert', + expert: true, + expertData: expert, + title: `${expert.specialite} - ${expert.ville}`, + url: `/expert/${expert.id_expert}`, + image_url: expert.image_url + } + }) + ); + + documents = [...expertDocs, ...documents]; + } catch (error) { + console.error("Erreur lors de la recherche d'experts:", error); + } + } + + // Trier pour mettre les experts en premier + documents.sort((a, b) => { + if (a.metadata?.type === 'expert' && b.metadata?.type !== 'expert') return -1; + if (a.metadata?.type !== 'expert' && b.metadata?.type === 'expert') return 1; + return 0; + }); + + return { query: question, docs: documents }; }), ]); } + private async loadUploadedDocuments(fileIds: string[]): Promise { + console.log("📂 Chargement des documents:", fileIds); + const docs: Document[] = []; + + for (const fileId of fileIds) { + try { + const filePath = path.join(process.cwd(), 'uploads', fileId); + const contentPath = `${filePath}-extracted.json`; + const embeddingsPath = `${filePath}-embeddings.json`; + + if (!fs.existsSync(contentPath)) { + throw new Error(`Fichier non trouvé: ${contentPath}`); + } + + // Charger le contenu et les embeddings pré-calculés + const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); + const embeddingsData = fs.existsSync(embeddingsPath) + ? JSON.parse(fs.readFileSync(embeddingsPath, 'utf8')) + : null; + + if (!content.contents || !Array.isArray(content.contents)) { + throw new Error(`Structure de contenu invalide pour ${fileId}`); + } + + // Calculer le nombre de chunks par page + const chunksPerPage = Math.ceil(content.contents.length / (content.pageCount || 10)); + + content.contents.forEach((chunk: any, index: number) => { + const pageNumber = Math.floor(index / chunksPerPage) + 1; + const doc = new Document({ + pageContent: typeof chunk === 'string' ? chunk : chunk.content, + metadata: { + ...(typeof chunk === 'object' ? chunk.metadata : {}), + source: fileId, + title: content.title || 'Document sans titre', + pageNumber: pageNumber, + chunkIndex: index, + totalChunks: content.contents.length, + type: 'uploaded', + embedding: embeddingsData?.embeddings[index]?.vector, + searchText: (typeof chunk === 'string' ? chunk : chunk.content) + .substring(0, 100) + .replace(/[\n\r]+/g, ' ') + .trim() + } + }); + docs.push(doc); + }); + + console.log(`📑 Documents chargés depuis ${fileId}:`, docs.length); + } catch (error) { + console.error(`❌ Erreur lors du chargement du fichier ${fileId}:`, error); + } + } + + return docs; + } + private async createAnsweringChain( llm: BaseChatModel, fileIds: string[], @@ -236,186 +272,169 @@ class MetaSearchAgent implements MetaSearchAgentType { RunnableMap.from({ query: (input: BasicChainInput) => input.query, chat_history: (input: BasicChainInput) => input.chat_history, - date: () => new Date().toISOString(), - context: RunnableLambda.from(async (input: BasicChainInput) => { - const processedHistory = formatChatHistoryAsString( - input.chat_history, - ); + docs: RunnableLambda.from(async (input: BasicChainInput) => { + console.log("Début de la recherche..."); + let docs: Document[] = []; - let docs: Document[] | null = null; - let query = input.query; + // 1. D'abord chercher dans les documents uploadés + if (fileIds.length > 0) { + try { + const uploadedDocs = await this.loadUploadedDocuments(fileIds); + console.log("📚 Documents uploadés chargés:", uploadedDocs.length); - if (this.config.searchWeb) { - const searchRetrieverChain = - await this.createSearchRetrieverChain(llm); + // Utiliser RAGDocumentChain pour la recherche dans les documents + const ragChain = new RAGDocumentChain(); + await ragChain.initializeVectorStoreFromDocuments(uploadedDocs, embeddings); + + // Utiliser le type 'specific' pour une recherche précise + const searchChain = ragChain.createSearchChain(llm); + const relevantDocs = await searchChain.invoke({ + query: input.query, + chat_history: input.chat_history, + type: 'specific' + }); - const searchRetrieverResult = await searchRetrieverChain.invoke({ - chat_history: processedHistory, - query, - }); + // Ajouter les documents pertinents avec un score élevé + docs = uploadedDocs.map(doc => ({ + ...doc, + metadata: { + ...doc.metadata, + score: 0.8 // Score élevé pour les documents uploadés + } + })); - query = searchRetrieverResult.query; - docs = searchRetrieverResult.docs; + console.log("📄 Documents pertinents trouvés:", docs.length); + } catch (error) { + console.error("❌ Erreur lors de la recherche dans les documents:", error); + } } - const sortedDocs = await this.rerankDocs( - query, - docs ?? [], + // 2. Ensuite chercher les experts si pertinent + if (this.config.searchDatabase) { + try { + console.log("👥 Recherche d'experts..."); + const expertResults = await handleExpertSearch( + { + query: input.query, + chat_history: input.chat_history, + messageId: 'search_' + Date.now(), + chatId: 'chat_' + Date.now() + }, + llm + ); + + if (expertResults.experts.length > 0) { + const expertDocs = this.convertExpertsToDocuments(expertResults.experts); + docs = [...docs, ...expertDocs]; + } + } catch (error) { + console.error("❌ Erreur lors de la recherche d'experts:", error); + } + } + + // 3. Enfin, compléter avec la recherche web si nécessaire et si peu de résultats + if (this.config.searchWeb && docs.length < 3) { + try { + const webResults = await this.performWebSearch(input.query); + docs = [...docs, ...webResults]; + } catch (error) { + console.error("❌ Erreur lors de la recherche web:", error); + } + } + + console.log("🔍 DEBUG - Avant appel rerankDocs - Mode:", optimizationMode, "Query:", input.query); + return this.rerankDocs( + input.query, + docs, fileIds, embeddings, optimizationMode, + llm ); - - return sortedDocs; - }) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(this.processDocs), + }).withConfig({ runName: 'FinalSourceRetriever' }), }), + + RunnableMap.from({ + query: (input) => input.query, + chat_history: (input) => input.chat_history, + date: () => new Date().toISOString(), + context: (input) => { + console.log("Préparation du contexte..."); + return this.processDocs(input.docs); + }, + docs: (input) => input.docs, + }), + ChatPromptTemplate.fromMessages([ ['system', this.config.responsePrompt], new MessagesPlaceholder('chat_history'), - ['user', '{query}'], + ['user', '{context}\n\n{query}'], ]), llm, this.strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); + ]).withConfig({ runName: 'FinalResponseGenerator' }); } - private async rerankDocs( - query: string, - docs: Document[], - fileIds: string[], - embeddings: Embeddings, - optimizationMode: 'speed' | 'balanced' | 'quality', - ) { - if (docs.length === 0 && fileIds.length === 0) { - return docs; - } - - const filesData = fileIds - .map((file) => { - const filePath = path.join(process.cwd(), 'uploads', file); - - const contentPath = filePath + '-extracted.json'; - const embeddingsPath = filePath + '-embeddings.json'; - - const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); - const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8')); - - const fileSimilaritySearchObject = content.contents.map( - (c: string, i) => { - return { - fileName: content.title, - content: c, - embeddings: embeddings.embeddings[i], - }; - }, - ); - - return fileSimilaritySearchObject; + private convertExpertsToDocuments(experts: any[]) { + return experts.map(expert => + new Document({ + pageContent: `Expert: ${expert.prenom} ${expert.nom} + Spécialité: ${expert.specialite} + Ville: ${expert.ville} + Tarif: ${expert.tarif}€ + Expertises: ${expert.expertises} + Services: ${JSON.stringify(expert.services)} + ${expert.biographie}`, + metadata: { + type: 'expert', + expert: true, + expertData: expert, + title: `${expert.specialite} - ${expert.ville}`, + url: `/expert/${expert.id_expert}`, + image_url: expert.image_url + } }) - .flat(); - - if (query.toLocaleLowerCase() === 'summarize') { - return docs.slice(0, 15); - } - - const docsWithContent = docs.filter( - (doc) => doc.pageContent && doc.pageContent.length > 0, ); + } - if (optimizationMode === 'speed' || this.config.rerank === false) { - if (filesData.length > 0) { - const [queryEmbedding] = await Promise.all([ - embeddings.embedQuery(query), - ]); + private async performWebSearch(query: string) { + const res = await searchSearxng(query, { + language: 'fr', + engines: this.config.activeEngines, + }); - const fileDocs = filesData.map((fileData) => { - return new Document({ - pageContent: fileData.content, - metadata: { - title: fileData.fileName, - url: `File`, - }, - }); - }); - - const similarity = filesData.map((fileData, i) => { - const sim = computeSimilarity(queryEmbedding, fileData.embeddings); - - return { - index: i, - similarity: sim, - }; - }); - - let sortedDocs = similarity - .filter( - (sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3), - ) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => fileDocs[sim.index]); - - sortedDocs = - docsWithContent.length > 0 ? sortedDocs.slice(0, 8) : sortedDocs; - - return [ - ...sortedDocs, - ...docsWithContent.slice(0, 15 - sortedDocs.length), - ]; - } else { - return docsWithContent.slice(0, 15); - } - } else if (optimizationMode === 'balanced') { - const [docEmbeddings, queryEmbedding] = await Promise.all([ - embeddings.embedDocuments( - docsWithContent.map((doc) => doc.pageContent), - ), - embeddings.embedQuery(query), - ]); - - docsWithContent.push( - ...filesData.map((fileData) => { - return new Document({ - pageContent: fileData.content, - metadata: { - title: fileData.fileName, - url: `File`, - }, - }); - }), - ); - - docEmbeddings.push(...filesData.map((fileData) => fileData.embeddings)); - - const similarity = docEmbeddings.map((docEmbedding, i) => { - const sim = computeSimilarity(queryEmbedding, docEmbedding); - - return { - index: i, - similarity: sim, - }; - }); - - const sortedDocs = similarity - .filter((sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3)) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => docsWithContent[sim.index]); - - return sortedDocs; - } + return res.results.map(result => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + type: 'web', + ...(result.img_src && { img_src: result.img_src }), + }, + }) + ); } private processDocs(docs: Document[]) { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); + // Trier les documents par score si disponible + const sortedDocs = docs.sort((a, b) => + (b.metadata?.score || 0) - (a.metadata?.score || 0) + ); + + // Limiter à 5 documents maximum + const limitedDocs = sortedDocs.slice(0, 5); + + // Limiter la taille de chaque document à 1000 caractères + return limitedDocs + .map((doc, index) => { + const content = doc.pageContent.length > 1000 + ? doc.pageContent.substring(0, 1000) + "..." + : doc.pageContent; + + return `${content} [${index + 1}]`; + }) + .join('\n\n'); } private async handleStream( @@ -427,10 +446,65 @@ class MetaSearchAgent implements MetaSearchAgentType { event.event === 'on_chain_end' && event.name === 'FinalSourceRetriever' ) { - ``; + const sources = event.data.output; + + // Normaliser les sources pour le frontend + const normalizedSources = sources?.map(source => { + const isUploadedDoc = source.metadata?.type === 'uploaded'; + const isExpert = source.metadata?.type === 'expert'; + const pageNumber = source.metadata?.pageNumber || 1; + const sourceId = source.metadata?.source; + + // Construire l'URL selon le type de source + let url; + if (isUploadedDoc && sourceId) { + url = `/api/uploads/${sourceId}/content?page=${pageNumber}`; + } else if (isExpert) { + url = source.metadata?.url; + } else if (source.metadata?.type === 'web') { + url = source.metadata?.url; + } + + // Construire un titre descriptif + let title = source.metadata?.title || ''; + if (isUploadedDoc && title) { + title = `${title} - Page ${pageNumber}`; + } else if (isExpert) { + title = source.metadata?.displayTitle || title; + } + + // Limiter la taille du contenu pour éviter les erreurs de payload + const limitedContent = source.pageContent?.substring(0, 1000) || ''; + + return { + pageContent: limitedContent, + metadata: { + title: title, + type: source.metadata?.type || 'web', + url: url, + source: sourceId, + pageNumber: pageNumber, + searchText: source.metadata?.searchText?.substring(0, 200) || limitedContent.substring(0, 200), + expertData: source.metadata?.expertData, + illustrationImage: source.metadata?.illustrationImage, + imageTitle: source.metadata?.imageTitle, + favicon: source.metadata?.favicon, + linkText: source.metadata?.linkText, + expertName: source.metadata?.expertName + } + }; + }) || []; + + console.log("🔍 Sources normalisées:", normalizedSources.length); + emitter.emit( 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), + JSON.stringify({ + type: 'sources', + data: normalizedSources, + illustrationImage: normalizedSources[0]?.metadata?.illustrationImage || null, + imageTitle: normalizedSources[0]?.metadata?.imageTitle || null + }) ); } if ( @@ -439,7 +513,7 @@ class MetaSearchAgent implements MetaSearchAgentType { ) { emitter.emit( 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), + JSON.stringify({ type: 'response', data: event.data.chunk }) ); } if ( @@ -451,6 +525,126 @@ class MetaSearchAgent implements MetaSearchAgentType { } } + private async searchExperts( + query: string, + embeddings: Embeddings, + llm: BaseChatModel + ): Promise { + try { + console.log("👥 Recherche d'experts pour:", query); + const expertResults = await handleExpertSearch( + { + query, + chat_history: [], + messageId: 'search_' + Date.now(), + chatId: 'chat_' + Date.now() + }, + llm + ); + + return expertResults.experts.map(expert => ({ + pageContent: `Expert: ${expert.prenom} ${expert.nom} + Spécialité: ${expert.specialite} + Ville: ${expert.ville} + Tarif: ${expert.tarif}€ + Expertises: ${expert.expertises} + Services: ${JSON.stringify(expert.services)} + ${expert.biographie}`, + metadata: { + type: 'expert', + expert: true, + expertData: expert, + title: `${expert.prenom} ${expert.nom} - ${expert.specialite}`, + url: `/expert/${expert.id_expert}`, + image_url: expert.image_url, + score: 0.6 // Score moyen pour les experts + } + })); + } catch (error) { + console.error("❌ Erreur lors de la recherche d'experts:", error); + return []; + } + } + + private async searchWeb(query: string): Promise { + try { + console.log("🌐 Recherche web pour:", query); + const res = await searchSearxng(query, { + language: 'fr', + engines: this.config.activeEngines, + }); + + return res.results.map(result => ({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + type: 'web', + score: 0.4, // Score plus faible pour les résultats web + ...(result.img_src && { img_src: result.img_src }), + } + })); + } catch (error) { + console.error("❌ Erreur lors de la recherche web:", error); + return []; + } + } + + private async rerankDocs( + query: string, + docs: Document[], + fileIds: string[], + embeddings: Embeddings, + optimizationMode: 'speed' | 'balanced' | 'quality', + llm: BaseChatModel + ) { + console.log("🔍 Mode d'optimisation:", optimizationMode); + console.log("🔍 Query pour la recherche d'image:", query); + + if (optimizationMode === 'balanced' || optimizationMode === 'quality') { + console.log("🔍 Démarrage de la recherche d'images..."); + try { + console.log("🔍 Appel de handleImageSearch avec la query:", query); + const images = await handleImageSearch( + { + query, + chat_history: [], + }, + llm + ); + console.log("🔍 Résultat brut de handleImageSearch:", JSON.stringify(images, null, 2)); + console.log("🔍 Images trouvées:", images?.length); + + if (images && images.length > 0) { + console.log("🔍 Première image trouvée:", { + src: images[0].img_src, + title: images[0].title, + url: images[0].url + }); + return docs.slice(0, 15).map(doc => ({ + ...doc, + metadata: { + ...doc.metadata, + illustrationImage: images[0].img_src, + title: images[0].title + } + })); + } else { + console.log("⚠️ Aucune image trouvée dans le résultat"); + } + } catch (error) { + console.error("❌ Erreur détaillée lors de la recherche d'image:", { + message: error.message, + stack: error.stack + }); + } + } else { + console.log("🔍 Mode speed: pas de recherche d'images"); + } + + return docs.slice(0, 15); + } + async searchAndAnswer( message: string, history: BaseMessage[], @@ -459,29 +653,466 @@ class MetaSearchAgent implements MetaSearchAgentType { optimizationMode: 'speed' | 'balanced' | 'quality', fileIds: string[], ) { + const effectiveMode = 'balanced'; + const emitter = new eventEmitter(); - const answeringChain = await this.createAnsweringChain( - llm, - fileIds, - embeddings, - optimizationMode, - ); + try { + // Analyse sophistiquée de la requête avec LLM + const queryAnalysis = await llm.invoke(`En tant qu'expert en analyse de requêtes, examine cette demande et détermine la stratégie de recherche optimale. - const stream = answeringChain.streamEvents( - { - chat_history: history, - query: message, - }, - { - version: 'v1', - }, - ); +Question/Requête: "${message}" - this.handleStream(stream, emitter); +Documents disponibles: ${fileIds.length > 0 ? "Oui" : "Non"} + +Analyse et réponds au format JSON: +{ + "primaryIntent": "DOCUMENT_QUERY" | "WEB_SEARCH" | "EXPERT_ADVICE" | "HYBRID", + "requiresDocumentSearch": , + "requiresWebSearch": , + "requiresExpertSearch": , + "documentRelevance": <0.0 à 1.0>, + "reasoning": "" +} + +Critères d'analyse: +- DOCUMENT_QUERY: La question porte spécifiquement sur le contenu des documents +- WEB_SEARCH: Recherche d'informations générales ou actuelles +- EXPERT_ADVICE: Demande nécessitant une expertise spécifique +- HYBRID: Combinaison de plusieurs sources + +Prends en compte: +- La présence ou non de documents uploadés +- La spécificité de la question +- Le besoin d'expertise externe +- L'actualité du sujet`); + + const analysis = JSON.parse(String(queryAnalysis.content)); + console.log("🎯 Analyse de la requête:", analysis); + + // 1. Analyse des documents uploadés avec RAG + const uploadedDocs = await this.loadUploadedDocuments(fileIds); + console.log("📚 Documents uploadés chargés:", uploadedDocs.length); + + if (uploadedDocs.length > 0) { + // Création du vectorStore temporaire pour les documents + const vectorStore = await Chroma.fromDocuments(uploadedDocs, embeddings, { + collectionName: "temp_docs", + url: "http://chroma:8000", + numDimensions: 1536 + }); + + // Recherche sémantique sans filtre pour l'instant + const relevantDocs = await vectorStore.similaritySearch(message, 5); + + console.log("📄 Documents pertinents trouvés:", relevantDocs.length); + + // Extraction du contexte pour enrichir la recherche + const documentContext = relevantDocs + .map(doc => doc.pageContent) + .join("\n") + .substring(0, 500); + + const documentTitle = uploadedDocs[0]?.metadata?.title || ""; + const enrichedQuery = `${message} ${documentTitle} ${documentContext}`; + + // 2. Recherche d'experts en BDD + const expertResults = await this.searchExperts(message, embeddings, llm); + + // 3. Recherche web complémentaire avec le contexte enrichi + const webResults = await this.searchWeb(enrichedQuery); + + // Combinaison des résultats avec les scores appropriés + const combinedResults = [ + ...relevantDocs.map(doc => ({ + ...doc, + metadata: { + ...doc.metadata, + score: 0.8 // Score élevé pour les documents uploadés + } + })), + ...expertResults.map(expert => ({ + ...expert, + metadata: { + ...expert.metadata, + score: 0.6 // Score moyen pour les experts + } + })), + ...webResults.map(web => ({ + ...web, + metadata: { + ...web.metadata, + score: 0.4 // Score plus faible pour les résultats web + } + })) + ]; + + // Tri et sélection des meilleurs résultats + const finalResults = await this.rerankDocs( + message, + combinedResults, + fileIds, + embeddings, + effectiveMode, + llm + ); + + // Création de la chaîne de réponse + const answeringChain = await this.createAnsweringChain( + llm, + fileIds, + embeddings, + effectiveMode + ); + + const stream = answeringChain.streamEvents( + { + chat_history: history, + query: `${message}\n\nContexte pertinent:\n${finalResults.map(doc => doc.pageContent).join('\n\n')}` + }, + { + version: 'v1' + } + ); + + this.handleStream(stream, emitter); + } else { + // Fallback sans documents uploadés + const answeringChain = await this.createAnsweringChain( + llm, + fileIds, + embeddings, + effectiveMode + ); + + const stream = answeringChain.streamEvents( + { + chat_history: history, + query: message + }, + { + version: 'v1' + } + ); + + this.handleStream(stream, emitter); + } + } catch (error) { + console.error("❌ Erreur:", error); + // Fallback en mode standard + const answeringChain = await this.createAnsweringChain( + llm, + fileIds, + embeddings, + effectiveMode + ); + + const stream = answeringChain.streamEvents( + { + chat_history: history, + query: message + }, + { + version: 'v1' + } + ); + + this.handleStream(stream, emitter); + } return emitter; } } +export const searchHandlers: Record = { + // ... existing handlers ... + legal: { + searchAndAnswer: async ( + message, + history, + llm, + embeddings, + optimizationMode, + fileIds, + ) => { + const emitter = new eventEmitter(); + + try { + const chain = new RAGDocumentChain(); + await chain.initializeVectorStoreFromDocuments(fileIds.map(fileId => new Document({ + pageContent: '', + metadata: { source: fileId } + })), embeddings); + + const searchChain = chain.createSearchChain(llm); + const results = await searchChain.invoke({ + query: message, + chat_history: history, + type: 'legal' + }); + + // Convertir le résultat en objet SearchResponse + const response: SearchResponse = { + text: results, + sources: [] // Sources vides par défaut + }; + + // Émettre la réponse + emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: response.text, + }) + ); + + emitter.emit('end'); + } catch (error) { + emitter.emit( + 'error', + JSON.stringify({ + type: 'error', + data: error.message, + }) + ); + } + + return emitter; + }, + }, + documents: { + searchAndAnswer: async ( + message, + history, + llm, + embeddings, + optimizationMode, + fileIds, + ) => { + const emitter = new eventEmitter(); + const ragChain = new RAGDocumentChain(); + + try { + const docs = fileIds.map(fileId => { + const filePath = path.join(process.cwd(), 'uploads', fileId); + const contentPath = filePath + '-extracted.json'; + const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); + return new Document({ + pageContent: content.contents.join('\n'), + metadata: { + title: content.title, + source: fileId, + } + }); + }); + + await ragChain.initializeVectorStoreFromDocuments(docs, embeddings); + const chain = ragChain.createSearchChain(llm); + const result = await chain.invoke({ + query: message, + chat_history: history, + type: 'document_search' + }); + + // Convertir le résultat en objet SearchResponse + const response: SearchResponse = { + text: result, + sources: docs.map(doc => ({ + title: doc.metadata?.title || '', + content: doc.pageContent, + source: doc.metadata?.source || 'uploaded_docs' + })) + }; + + emitter.emit('data', JSON.stringify({ + type: 'response', + data: response.text + })); + + emitter.emit('data', JSON.stringify({ + type: 'sources', + data: response.sources + })); + + emitter.emit('end'); + } catch (error) { + emitter.emit('error', JSON.stringify({ + type: 'error', + data: error.message + })); + } + + return emitter; + } + }, + uploads: { + searchAndAnswer: async ( + message, + history, + llm, + embeddings, + optimizationMode, + fileIds, + ) => { + const emitter = new eventEmitter(); + + try { + // Analyse du type de requête avec LLM pour plus de précision + const queryIntent = await llm.invoke(` + Analysez cette requête et déterminez son intention principale : + 1. SUMMARY (demande de résumé ou synthèse globale) + 2. ANALYSIS (demande d'analyse ou d'explication) + 3. SPECIFIC (question spécifique sur le contenu) + 4. COMPARE (demande de comparaison) + + Requête : "${message}" + + Répondez uniquement avec l'intention. + `); + + const intent = String(queryIntent.content).trim(); + console.log("🎯 Intention détectée:", intent); + + // Chargement optimisé des documents + const docs = await Promise.all(fileIds.map(async fileId => { + const filePath = path.join(process.cwd(), 'uploads', fileId); + const contentPath = `${filePath}-extracted.json`; + + if (!fs.existsSync(contentPath)) { + throw new Error(`Fichier non trouvé: ${contentPath}`); + } + + const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); + + // Optimisation : Chunking plus efficace + const chunkSize = 1000; // Taille optimale pour le traitement + const overlap = 100; // Chevauchement pour maintenir le contexte + + const chunks = []; + let currentChunk = ''; + let currentSize = 0; + + content.contents.forEach((text: string) => { + currentChunk += text + ' '; + currentSize += text.length; + + if (currentSize >= chunkSize) { + chunks.push(currentChunk); + // Garder le chevauchement pour le prochain chunk + currentChunk = currentChunk.slice(-overlap); + currentSize = overlap; + } + }); + + if (currentChunk) { + chunks.push(currentChunk); + } + + return chunks.map((chunk, index) => { + const pageNumber = Math.floor(index / (chunks.length / (content.pageCount || 1))) + 1; + + return new Document({ + pageContent: chunk, + metadata: { + title: content.title || 'Document sans titre', + source: fileId, + type: 'uploaded', + url: `/api/uploads/${fileId}/view?page=${pageNumber}`, + pageNumber: pageNumber, + chunkIndex: index, + totalChunks: chunks.length, + searchText: chunk.substring(0, 100).replace(/[\n\r]+/g, ' ').trim() + } + }); + }); + })); + + const flatDocs = docs.flat(); + console.log("📚 Nombre total de chunks:", flatDocs.length); + + const ragChain = new RAGDocumentChain(); + await ragChain.initializeVectorStoreFromDocuments(flatDocs, embeddings); + const chain = ragChain.createSearchChain(llm); + + // Adaptation de la requête selon l'intention détectée par le LLM + let queryPrompt = message; + switch(intent) { + case 'SUMMARY': + queryPrompt = "Fais un résumé complet et structuré de ce document en te concentrant sur les points clés"; + break; + case 'ANALYSIS': + queryPrompt = `Analyse en détail les aspects suivants du document concernant : ${message}. Fournis une analyse structurée avec des exemples du texte.`; + break; + case 'SPECIFIC': + // Garde la question originale mais ajoute du contexte + queryPrompt = `En te basant sur le contenu du document, réponds précisément à cette question : ${message}`; + break; + case 'COMPARE': + queryPrompt = `Compare et analyse en détail les différents aspects concernant : ${message}. Structure ta réponse par points de comparaison.`; + break; + } + + // Stream optimisé avec émission rapide des sources + const stream = await chain.streamEvents( + { + query: queryPrompt, + chat_history: history, + type: intent.toLowerCase() + }, + { version: 'v1' } + ); + + // Gestion optimisée du stream + let sourcesEmitted = false; + for await (const event of stream) { + if (event.event === 'on_chain_stream') { + emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: event.data.chunk + }) + ); + } + + // Émettre les sources plus tôt dans le processus + if (!sourcesEmitted && event.event === 'on_chain_start') { + const sources = flatDocs.slice(0, 5).map(doc => ({ + title: doc.metadata?.title || '', + content: doc.metadata?.searchText || '', + url: doc.metadata?.url, + source: doc.metadata?.source, + type: 'uploaded', + pageNumber: doc.metadata?.pageNumber + })); + + emitter.emit( + 'data', + JSON.stringify({ + type: 'sources', + data: sources + }) + ); + sourcesEmitted = true; + } + + if (event.event === 'on_chain_end') { + emitter.emit('end'); + } + } + + } catch (error) { + console.error("Erreur lors de la recherche dans les documents:", error); + emitter.emit('error', JSON.stringify({ + type: 'error', + data: error.message + })); + } + + return emitter; + } + } +}; + export default MetaSearchAgent; + diff --git a/src/types/index.ts b/src/types/index.ts new file mode 100644 index 0000000..fcb073f --- /dev/null +++ b/src/types/index.ts @@ -0,0 +1 @@ +export * from './types'; diff --git a/src/types/types.ts b/src/types/types.ts new file mode 100644 index 0000000..e2f6e41 --- /dev/null +++ b/src/types/types.ts @@ -0,0 +1,73 @@ +import { BaseMessage } from '@langchain/core/messages'; + +export interface Expert { + id: number; + id_expert: string; + nom: string; + prenom: string; + adresse: string; + pays: string; + ville: string; + expertises: string; + specialite: string; + biographie: string; + tarif: number; + services: any; + created_at: string; + image_url: string; +} + +export interface ExpertSearchRequest { + query: string; + chat_history: BaseMessage[]; + messageId: string; + chatId: string; +} + +export interface ExpertSearchResponse { + experts: Expert[]; + synthese: string; +} + +export interface EnrichedResponse { + text: string; + sources: Source[]; + suggestions: string[]; + images: ImageResult[]; +} + +export interface Source { + title: string; + url: string; + snippet: string; +} + +export interface ImageResult { + url: string; + title: string; + source: string; +} + +export interface DocumentMetadata { + title?: string; + source?: string; + type?: string; + url?: string; + pageNumber?: number; + score?: number; + expertData?: any; + searchText?: string; + illustrationImage?: string; + imageTitle?: string; + [key: string]: any; +} + +export interface NormalizedSource { + pageContent: string; + metadata: DocumentMetadata; +} + +export interface SearchResult { + pageContent: string; + metadata: DocumentMetadata; +} \ No newline at end of file diff --git a/src/websocket/messageHandler.ts b/src/websocket/messageHandler.ts index 6cb3ddf..4f4565e 100644 --- a/src/websocket/messageHandler.ts +++ b/src/websocket/messageHandler.ts @@ -37,6 +37,7 @@ export const searchHandlers = { rerankThreshold: 0.3, searchWeb: true, summarizer: true, + searchDatabase: true, }), academicSearch: new MetaSearchAgent({ activeEngines: ['arxiv', 'google scholar', 'pubmed'], @@ -46,6 +47,7 @@ export const searchHandlers = { rerankThreshold: 0, searchWeb: true, summarizer: false, + searchDatabase: true, }), writingAssistant: new MetaSearchAgent({ activeEngines: [], @@ -55,6 +57,7 @@ export const searchHandlers = { rerankThreshold: 0, searchWeb: false, summarizer: false, + searchDatabase: true, }), wolframAlphaSearch: new MetaSearchAgent({ activeEngines: ['wolframalpha'], @@ -64,6 +67,7 @@ export const searchHandlers = { rerankThreshold: 0, searchWeb: true, summarizer: false, + searchDatabase: true, }), youtubeSearch: new MetaSearchAgent({ activeEngines: ['youtube'], @@ -73,6 +77,7 @@ export const searchHandlers = { rerankThreshold: 0.3, searchWeb: true, summarizer: false, + searchDatabase: true, }), redditSearch: new MetaSearchAgent({ activeEngines: ['reddit'], @@ -82,6 +87,7 @@ export const searchHandlers = { rerankThreshold: 0.3, searchWeb: true, summarizer: false, + searchDatabase: true, }), }; diff --git a/ui/.env.example b/ui/.env.example index 57a3ed9..d6120f4 100644 --- a/ui/.env.example +++ b/ui/.env.example @@ -1,2 +1,4 @@ NEXT_PUBLIC_WS_URL=ws://localhost:3001 -NEXT_PUBLIC_API_URL=http://localhost:3001/api \ No newline at end of file +NEXT_PUBLIC_API_URL=http://localhost:3001/api +NEXT_PUBLIC_SUPABASE_URL=https://qytbxgzxsywnfhlwcyqa.supabase.co +NEXT_PUBLIC_SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InF5dGJ4Z3p4c3l3bmZobHdjeXFhIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzEwNTc3MTAsImV4cCI6MjA0NjYzMzcxMH0.XLRq-4CFL2MWxvCLzCv5ZdaF5VSi58cocx9FOyv37jU diff --git a/ui/app/chatroom/[expertId]/page.tsx b/ui/app/chatroom/[expertId]/page.tsx new file mode 100644 index 0000000..06e7d6f --- /dev/null +++ b/ui/app/chatroom/[expertId]/page.tsx @@ -0,0 +1,295 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import { supabase } from '@/lib/supabase'; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { useParams, useRouter } from 'next/navigation'; +import { toast } from 'sonner'; +import { formatTimeDifference } from '@/lib/utils'; +import Link from 'next/link'; +import { Expert, Message } from '@/types'; + +interface Conversation { + expert: Expert; + lastMessage?: Message; + unreadCount: number; +} + +export default function ChatRoom() { + const router = useRouter(); + const { expertId } = useParams(); + const [conversations, setConversations] = useState([]); + const [messages, setMessages] = useState([]); + const [newMessage, setNewMessage] = useState(''); + const [currentExpert, setCurrentExpert] = useState(null); + + // Charger les conversations + useEffect(() => { + const loadConversations = async () => { + const { data: messages, error } = await supabase + .from('messages') + .select('*, expert:experts(*)') + .or('sender_id.eq.user_id,receiver_id.eq.user_id') + .order('created_at', { ascending: false }); + + if (error) { + toast.error("Erreur lors du chargement des conversations"); + return; + } + + // Grouper les messages par expert + const conversationsMap = new Map(); + messages?.forEach(message => { + const expertId = message.sender_id === 'user_id' ? message.receiver_id : message.sender_id; + if (!conversationsMap.has(expertId)) { + conversationsMap.set(expertId, { + expert: message.expert, + lastMessage: message, + unreadCount: message.sender_id !== 'user_id' && !message.read ? 1 : 0 + }); + } + }); + + setConversations(Array.from(conversationsMap.values())); + }; + + loadConversations(); + }, []); + + // Charger les messages de la conversation courante + useEffect(() => { + if (!expertId) return; + + const loadMessages = async () => { + const { data: expert, error: expertError } = await supabase + .from('experts') + .select('*') + .eq('id_expert', expertId) + .single(); + + if (expertError) { + toast.error("Erreur lors du chargement de l'expert"); + return; + } + + setCurrentExpert(expert); + + const { data: messages, error: messagesError } = await supabase + .from('messages') + .select('*') + .or(`sender_id.eq.${expertId},receiver_id.eq.${expertId}`) + .order('created_at', { ascending: true }); + + if (messagesError) { + toast.error("Erreur lors du chargement des messages"); + return; + } + + setMessages(messages || []); + }; + + loadMessages(); + + // Souscrire aux nouveaux messages + const channel = supabase.channel('public:messages') + .on( + 'postgres_changes', + { + event: 'INSERT', + schema: 'public', + table: 'messages', + }, + (payload) => { + setMessages(current => [...current, payload.new as Message]); + } + ) + .subscribe(); + + return () => { + channel.unsubscribe(); + }; + }, [expertId]); + + const sendMessage = async (e: React.FormEvent) => { + e.preventDefault(); + if (!newMessage.trim() || !expertId) return; + + const { error } = await supabase + .from('messages') + .insert({ + content: newMessage, + sender_id: 'user_id', + receiver_id: expertId, + }); + + if (error) { + toast.error("Erreur lors de l'envoi du message"); + return; + } + + setNewMessage(''); + }; + + const markAsRead = async (messageId: string) => { + const { error } = await supabase + .from('messages') + .update({ read: true }) + .eq('id', messageId); + + if (error) { + toast.error("Erreur lors de la mise à jour du message"); + } + }; + + // Utilisez markAsRead quand un message est affiché + useEffect(() => { + if (!messages.length) return; + + // Marquer les messages non lus comme lus + messages + .filter(msg => !msg.read && msg.sender_id !== 'user_id') + .forEach(msg => markAsRead(msg.id)); + }, [messages]); + + return ( +
+ {/* Liste des conversations - cachée sur mobile si conversation active */} +
+
+

Messages

+
+
+ {conversations.length > 0 ? ( + conversations.map((conversation) => ( + +
+ {(conversation.expert.avatar_url || conversation.expert.image_url) && ( + {`${conversation.expert.prenom} + )} +
+
+

+ {conversation.expert.prenom} {conversation.expert.nom} +

+ {conversation.lastMessage && ( +

+ {conversation.lastMessage.content} +

+ )} +
+ {conversation.unreadCount > 0 && ( +
+ {conversation.unreadCount} +
+ )} + + )) + ) : ( +
+ Aucune conversation +
+ )} +
+
+ + {/* Zone de chat - plein écran sur mobile si conversation active */} +
+ {expertId && currentExpert ? ( + <> + {/* En-tête avec bouton retour sur mobile */} +
+ +
+ {currentExpert.avatar_url && ( + // eslint-disable-next-line @next/next/no-img-element + + )} +
+

+ {currentExpert.prenom} {currentExpert.nom} +

+
+ + {/* Messages avec padding ajusté */} +
+ {messages.map((message) => ( +
+
+
{message.content}
+
+ {formatTimeDifference(new Date(message.created_at), new Date())} +
+
+
+ ))} +
+ + {/* Formulaire d'envoi fixé en bas sur mobile */} +
+ setNewMessage(e.target.value)} + placeholder="Écrivez votre message..." + className="flex-1" + /> + +
+ + ) : ( +
+
+

+ Bienvenue dans votre messagerie +

+

+ Sélectionnez une conversation ou commencez à discuter avec un expert +

+ +
+
+ )} +
+
+ ); +} \ No newline at end of file diff --git a/ui/app/chatroom/page.tsx b/ui/app/chatroom/page.tsx new file mode 100644 index 0000000..867dcd5 --- /dev/null +++ b/ui/app/chatroom/page.tsx @@ -0,0 +1,37 @@ +'use client'; + +import { useRouter } from 'next/navigation'; +import { Button } from "@/components/ui/button"; + +export default function ChatRoomHome() { + const router = useRouter(); + + return ( +
+ {/* Liste des conversations (même composant que dans [expertId]/page.tsx) */} +
+
+

Messages

+
+
+ {/* La liste des conversations sera chargée ici */} +
+
+ + {/* Zone de bienvenue (visible uniquement sur desktop) */} +
+
+

+ Bienvenue dans votre messagerie +

+

+ Sélectionnez une conversation ou commencez à discuter avec un expert +

+ +
+
+
+ ); +} \ No newline at end of file diff --git a/ui/app/discover/page.tsx b/ui/app/discover/page.tsx index eb94040..c81f360 100644 --- a/ui/app/discover/page.tsx +++ b/ui/app/discover/page.tsx @@ -1,50 +1,231 @@ 'use client'; -import { Search } from 'lucide-react'; -import { useEffect, useState } from 'react'; +import { Search, Filter, X } from 'lucide-react'; +import { useEffect, useState, useCallback } from 'react'; import Link from 'next/link'; import { toast } from 'sonner'; +import { supabase } from '@/lib/supabase'; +import { Button } from "@/components/ui/button"; +import { useRouter } from 'next/navigation'; +import Image from 'next/image'; +import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { FilterModal } from "@/components/FilterModal"; -interface Discover { - title: string; - content: string; - url: string; - thumbnail: string; +interface Expert { + id: number; + id_expert: string; + nom: string; + prenom: string; + adresse: string; + pays: string; + ville: string; + expertises: string; + biographie: string; + tarif: number; + services: any; + created_at: string; + image_url: string; } +interface Location { + pays: string; + villes: string[]; +} + +interface Expertise { + id: string; + name: string; +} + +const ExpertCard = ({ expert }: { expert: Expert }) => { + const router = useRouter(); + + const handleContact = async (e: React.MouseEvent) => { + e.preventDefault(); // Empêche la navigation vers la page expert + + try { + // Vérifier si une conversation existe déjà + const { data: existingMessages } = await supabase + .from('messages') + .select('*') + .or(`sender_id.eq.user_id,receiver_id.eq.${expert.id_expert}`) + .limit(1); + + if (!existingMessages || existingMessages.length === 0) { + // Si pas de conversation existante, créer le premier message + const { error: messageError } = await supabase + .from('messages') + .insert({ + content: `Bonjour ${expert.prenom}, je souhaiterais échanger avec vous.`, + sender_id: 'user_id', // À remplacer par l'ID de l'utilisateur connecté + receiver_id: expert.id_expert, + read: false + }); + + if (messageError) { + throw messageError; + } + } + + // Rediriger vers la conversation + router.push(`/chatroom/${expert.id_expert}`); + toast.success(`Conversation ouverte avec ${expert.prenom} ${expert.nom}`); + } catch (error) { + console.error('Error starting conversation:', error); + toast.error("Erreur lors de l'ouverture de la conversation"); + } + }; + + return ( + +
+ {expert.image_url ? ( + {`${expert.prenom} { + // Fallback en cas d'erreur de chargement de l'image + const target = e.target as HTMLImageElement; + target.onerror = null; + target.src = '/placeholder-image.jpg'; + }} + /> + ) : ( +
+ Pas d'image +
+ )} +
+ +
+
+ {expert.prenom} {expert.nom} +
+
+

+ {expert.ville}, {expert.pays} +

+

+ {expert.expertises} +

+ {expert.tarif && ( +

+ {expert.tarif}€ /heure +

+ )} + +
+
+ + ); +}; + const Page = () => { - const [discover, setDiscover] = useState(null); + const [experts, setExperts] = useState(null); const [loading, setLoading] = useState(true); + const [selectedPays, setSelectedPays] = useState(''); + const [selectedVille, setSelectedVille] = useState(''); + const [locations, setLocations] = useState([]); + const [selectedExpertises, setSelectedExpertises] = useState([]); + const [open, setOpen] = useState(false); + + // Calcul du nombre de filtres actifs + const activeFiltersCount = [ + ...(selectedExpertises.length > 0 ? [1] : []), + selectedPays, + selectedVille + ].filter(Boolean).length; + + // Récupérer les experts avec filtres + const fetchExperts = useCallback(async () => { + try { + let query = supabase + .from('experts') + .select('*'); + + if (selectedExpertises.length > 0) { + // Adaptez cette partie selon la structure de votre base de données + query = query.contains('expertises', selectedExpertises); + } + + // Filtre par pays + if (selectedPays) { + query = query.eq('pays', selectedPays); + } + + // Filtre par ville + if (selectedVille) { + query = query.eq('ville', selectedVille); + } + + const { data, error } = await query; + + if (error) throw error; + setExperts(data); + } catch (err: any) { + console.error('Error fetching experts:', err.message); + toast.error('Erreur lors du chargement des experts'); + } finally { + setLoading(false); + } + }, [selectedPays, selectedVille, selectedExpertises]); + + // Récupérer la liste des pays et villes uniques + const fetchLocations = async () => { + try { + const { data, error } = await supabase + .from('experts') + .select('pays, ville'); + + if (error) throw error; + + // Créer un objet avec pays et villes uniques + const locationMap = new Map>(); + + data.forEach(expert => { + if (expert.pays) { + if (!locationMap.has(expert.pays)) { + locationMap.set(expert.pays, new Set()); + } + if (expert.ville) { + locationMap.get(expert.pays)?.add(expert.ville); + } + } + }); + + // Convertir en tableau trié + const sortedLocations = Array.from(locationMap).map(([pays, villes]) => ({ + pays, + villes: Array.from(villes).sort() + })).sort((a, b) => a.pays.localeCompare(b.pays)); + + setLocations(sortedLocations); + } catch (err: any) { + console.error('Error fetching locations:', err.message); + } + }; + + // Reset ville quand le pays change + useEffect(() => { + setSelectedVille(''); + }, [selectedPays]); useEffect(() => { - const fetchData = async () => { - try { - const res = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/discover`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }); - - const data = await res.json(); - - if (!res.ok) { - throw new Error(data.message); - } - - data.blogs = data.blogs.filter((blog: Discover) => blog.thumbnail); - - setDiscover(data.blogs); - } catch (err: any) { - console.error('Error fetching data:', err.message); - toast.error('Error fetching data'); - } finally { - setLoading(false); - } - }; - - fetchData(); - }, []); + fetchExperts(); + fetchLocations(); + }, [fetchExperts]); return loading ? (
@@ -66,47 +247,64 @@ const Page = () => {
) : ( - <> -
-
-
- -

Discover

+
+
+
+
+
+ +

Nos Experts

+
+
+ Plus de 300 experts à votre écoute +
-
-
- -
- {discover && - discover?.map((item, i) => ( - - {item.title} -
-
- {item.title.slice(0, 100)}... -
-

- {item.content.slice(0, 100)}... -

-
- - ))} + + {/* CTA Filtres unifié */} +
- + + {/* Modale de filtres */} + + +
+ +
+ {experts && experts.length > 0 ? ( + experts.map((expert) => ( + + )) + ) : ( +

+ Aucun expert trouvé +

+ )} +
+
); }; diff --git a/ui/app/layout.tsx b/ui/app/layout.tsx index 684a99c..efc351b 100644 --- a/ui/app/layout.tsx +++ b/ui/app/layout.tsx @@ -14,9 +14,9 @@ const montserrat = Montserrat({ }); export const metadata: Metadata = { - title: 'Perplexica - Chat with the internet', + title: 'X&me - Chat with the internet', description: - 'Perplexica is an AI powered chatbot that is connected to the internet.', + 'X&me is an AI powered chatbot that is connected to the internet.', }; export default function RootLayout({ diff --git a/ui/app/page.tsx b/ui/app/page.tsx index e18aca9..f0a27c0 100644 --- a/ui/app/page.tsx +++ b/ui/app/page.tsx @@ -3,8 +3,8 @@ import { Metadata } from 'next'; import { Suspense } from 'react'; export const metadata: Metadata = { - title: 'Chat - Perplexica', - description: 'Chat with the internet, chat with Perplexica.', + title: 'Chat - X-me', + description: 'Chat with the internet, chat with X-me.', }; const Home = () => { diff --git a/ui/components/EmptyChat.tsx b/ui/components/EmptyChat.tsx index c47c301..282dc92 100644 --- a/ui/components/EmptyChat.tsx +++ b/ui/components/EmptyChat.tsx @@ -38,8 +38,11 @@ const EmptyChat = ({

- Research begins here. + Ici c'est vous le patron.

+

+ Posez des questions, recherchez un expert pour répondre à vos besoins entrepreneuriaux +

setMessage(e.target.value)} minRows={2} className="bg-transparent placeholder:text-black/50 dark:placeholder:text-white/50 text-sm text-black dark:text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48" - placeholder="Ask anything..." + placeholder="Posez votre question..." />
diff --git a/ui/components/FilterModal.tsx b/ui/components/FilterModal.tsx new file mode 100644 index 0000000..ab24b19 --- /dev/null +++ b/ui/components/FilterModal.tsx @@ -0,0 +1,158 @@ +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "@/components/ui/dialog"; +import { Button } from "@/components/ui/button"; +import { Expert, Location } from "@/types"; // Ajustez le chemin selon votre structure +import { Dispatch, SetStateAction } from 'react'; // Ajout de l'import + +interface Expertise { + id: string; + name: string; +} + +interface FilterModalProps { + open: boolean; + setOpen: (open: boolean) => void; + selectedPays: string; + setSelectedPays: (pays: string) => void; + selectedVille: string; + setSelectedVille: (ville: string) => void; + selectedExpertises: string[]; + setSelectedExpertises: Dispatch>; // Correction du type + locations: Location[]; + experts: Expert[] | null; +} + +export const FilterModal = ({ + open, + setOpen, + selectedPays, + setSelectedPays, + selectedVille, + setSelectedVille, + selectedExpertises, + setSelectedExpertises, + locations, + experts, +}: FilterModalProps) => { + const activeFiltersCount = [ + ...(selectedExpertises.length > 0 ? [1] : []), + selectedPays, + selectedVille + ].filter(Boolean).length; + + const expertises: Expertise[] = [ + { id: 'immobilier', name: 'Immobilier' }, + { id: 'finance', name: 'Finance' }, + { id: 'droit', name: 'Droit' }, + { id: 'fiscalite', name: 'Fiscalité' }, + { id: 'assurance', name: 'Assurance' }, + { id: 'patrimoine', name: 'Patrimoine' }, + ]; + + return ( + + + + + Filtres + + {activeFiltersCount} filtre{activeFiltersCount > 1 ? 's' : ''} actif{activeFiltersCount > 1 ? 's' : ''} + + + + Filtrez les experts par expertise et localisation + + + +
+ {/* Section Expertises */} +
+

Expertises

+
+ {expertises.map((expertise) => ( + + ))} +
+
+ + {/* Section Pays */} +
+

Pays

+
+ {locations.map(({ pays }) => ( + + ))} +
+
+ + {/* Section Villes (conditionnelle) */} + {selectedPays && ( +
+

Villes {selectedPays && `(${selectedPays})`}

+
+ {locations + .find(loc => loc.pays === selectedPays) + ?.villes.map(ville => ( + + ))} +
+
+ )} +
+ +
+ + +
+
+
+ ); +}; \ No newline at end of file diff --git a/ui/components/LegalSearch.tsx b/ui/components/LegalSearch.tsx new file mode 100644 index 0000000..f51b1b5 --- /dev/null +++ b/ui/components/LegalSearch.tsx @@ -0,0 +1,200 @@ +/* eslint-disable @next/next/no-img-element */ +import { BookCopy, PlusIcon } from 'lucide-react'; +import { useState } from 'react'; +import { Message } from './ChatWindow'; +import Lightbox, { GenericSlide } from 'yet-another-react-lightbox'; +import 'yet-another-react-lightbox/styles.css'; + +type Document = { + url: string; + title: string; + snippet: string; + source: string; + type: string; + iframe_src: string; +}; + +declare module 'yet-another-react-lightbox' { + export interface PDFSlide extends GenericSlide { + type: 'pdf'; + url: string; + iframe_src: string; + } + + interface SlideTypes { + 'pdf': PDFSlide; + } +} + +const LegalSearch = ({ + query, + chatHistory, +}: { + query: string; + chatHistory: Message[]; +}) => { + const [documents, setDocuments] = useState(null); + const [loading, setLoading] = useState(false); + const [open, setOpen] = useState(false); + const [currentDoc, setCurrentDoc] = useState(null); + + const openDocument = (doc: Document) => { + setCurrentDoc(doc); + setOpen(true); + }; + + return ( + <> + {!loading && documents === null && ( + + )} + {loading && ( +
+ {[...Array(3)].map((_, i) => ( +
+ ))} +
+ )} + {documents !== null && documents.length > 0 && ( + <> +
+ {documents.length > 4 + ? documents.slice(0, 3).map((doc, i) => ( +
openDocument(doc)} + className="bg-light-100 dark:bg-dark-100 p-3 rounded-lg hover:bg-light-200 dark:hover:bg-dark-200 transition duration-200 cursor-pointer" + > +

+ {doc.title} +

+

+ {doc.snippet} +

+
+ + {doc.source} + + + {doc.type} + +
+
+ )) + : documents.map((doc, i) => ( +
openDocument(doc)} + className="bg-light-100 dark:bg-dark-100 p-3 rounded-lg hover:bg-light-200 dark:hover:bg-dark-200 transition duration-200 cursor-pointer" + > +

+ {doc.title} +

+

+ {doc.snippet} +

+
+ + {doc.source} + + + {doc.type} + +
+
+ ))} + {documents.length > 4 && ( + + )} +
+ setOpen(false)} + render={{ + slide: ({ slide }) => + slide.type === 'pdf' ? ( +
+
+

Le document ne peut pas être affiché directement.

+ + Ouvrir le document dans un nouvel onglet + +
+
+

{currentDoc?.title}

+

{currentDoc?.snippet}

+
+
+ ) : null, + }} + slides={[ + { + type: 'pdf', + url: currentDoc?.url || '', + iframe_src: currentDoc?.url || '', + } + ]} + /> + + )} + + ); +}; + +export default LegalSearch; \ No newline at end of file diff --git a/ui/components/MessageBox.tsx b/ui/components/MessageBox.tsx index f23127c..5d4980c 100644 --- a/ui/components/MessageBox.tsx +++ b/ui/components/MessageBox.tsx @@ -16,7 +16,7 @@ import Markdown from 'markdown-to-jsx'; import Copy from './MessageActions/Copy'; import Rewrite from './MessageActions/Rewrite'; import MessageSources from './MessageSources'; -import SearchImages from './SearchImages'; +import LegalSearch from './LegalSearch'; import SearchVideos from './SearchVideos'; import { useSpeech } from 'react-text-to-speech'; @@ -53,8 +53,12 @@ const MessageBox = ({ return setParsedMessage( message.content.replace( regex, - (_, number) => - `${number}`, + (_, number) => { + const url = message.sources?.[number - 1]?.metadata?.url || ''; + // Extraire le nom de domaine sans l'extension + const sourceName = url.replace(/^(?:https?:\/\/)?(?:www\.)?([^./]+).*$/, '$1'); + return `${sourceName}`; + } ), ); } @@ -63,15 +67,18 @@ const MessageBox = ({ setParsedMessage(message.content); }, [message.content, message.sources, message.role]); + useEffect(() => { + }, [message.sources]); + const { speechStatus, start, stop } = useSpeech({ text: speechMessage }); return (
{message.role === 'user' && (
-

+

{message.content} -

+
)} @@ -81,6 +88,24 @@ const MessageBox = ({ ref={dividerRef} className="flex flex-col space-y-6 w-full lg:w-9/12" > + {message.sources && message.sources[0]?.metadata?.illustrationImage && ( +
+
+ Illustration { + console.error("Erreur de chargement de l'image:", e); + (e.target as HTMLImageElement).style.display = 'none'; + }} + /> +
+

+ {message.sources[0].metadata.title || 'Illustration du sujet'} +

+
+ )} {message.sources && message.sources.length > 0 && (
@@ -102,7 +127,7 @@ const MessageBox = ({ size={20} />

- Answer + Question

{ + return

{children}

; + }, + }, + }} > {parsedMessage}
@@ -152,7 +184,7 @@ const MessageBox = ({
-

Related

+

Suggestions

{message.suggestions.map((suggestion, i) => ( @@ -184,7 +216,7 @@ const MessageBox = ({
- diff --git a/ui/components/MessageInputActions/Focus.tsx b/ui/components/MessageInputActions/Focus.tsx index 613078b..3bf6ac4 100644 --- a/ui/components/MessageInputActions/Focus.tsx +++ b/ui/components/MessageInputActions/Focus.tsx @@ -4,7 +4,7 @@ import { Globe, Pencil, ScanEye, - SwatchBook, + Eye, } from 'lucide-react'; import { cn } from '@/lib/utils'; import { @@ -19,52 +19,28 @@ import { Fragment } from 'react'; const focusModes = [ { key: 'webSearch', - title: 'All', - description: 'Searches across all of the internet', + title: 'Recherche internet', + description: 'Recherche sur internet directement', icon: , }, { key: 'academicSearch', - title: 'Academic', - description: 'Search in published academic papers', - icon: , + title: 'Experts', + description: 'Recherche un expert pour vous acccompagner', + icon: , }, { key: 'writingAssistant', - title: 'Writing', + title: 'Document', description: 'Chat without searching the web', icon: , }, { key: 'wolframAlphaSearch', - title: 'Wolfram Alpha', - description: 'Computational knowledge engine', + title: 'Business Plan', + description: 'Réaliser votre Business Plan', icon: , }, - { - key: 'youtubeSearch', - title: 'Youtube', - description: 'Search and watch videos', - icon: ( - - ), - }, - { - key: 'redditSearch', - title: 'Reddit', - description: 'Search for discussions and opinions', - icon: ( - - ), - }, ]; const Focus = ({ diff --git a/ui/components/SearchImages.tsx b/ui/components/SearchImages.tsx index b083af7..03c729d 100644 --- a/ui/components/SearchImages.tsx +++ b/ui/components/SearchImages.tsx @@ -23,54 +23,60 @@ const SearchImages = ({ const [open, setOpen] = useState(false); const [slides, setSlides] = useState([]); + const handleSearch = async () => { + setLoading(true); + try { + console.log("🖼️ Démarrage de la recherche d'images pour:", query); + + const chatModelProvider = localStorage.getItem('chatModelProvider'); + const chatModel = localStorage.getItem('chatModel'); + console.log("🖼️ Modèle configuré:", chatModelProvider, chatModel); + + const response = await fetch('/api/images', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: query, + chatHistory: chatHistory, + chatModel: { + provider: chatModelProvider, + model: chatModel, + }, + }), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + console.log('🖼️ Résultats de la recherche:', data); + + if (data.images && data.images.length > 0) { + setImages(data.images); + setSlides( + data.images.map((image: Image) => ({ + src: image.img_src, + })) + ); + console.log('🖼️ Images et slides mis à jour:', data.images.length); + } else { + console.log('🖼️ Aucune image trouvée'); + } + } catch (error) { + console.error('🖼️ Erreur lors de la recherche:', error); + } finally { + setLoading(false); + } + }; + return ( <> {!loading && images === null && (