From 9f4ae1baacf67b3379143cde9460feb6c15ae1b1 Mon Sep 17 00:00:00 2001 From: eligrinfeld Date: Mon, 6 Jan 2025 21:25:15 -0700 Subject: [PATCH] feat: update backend services and routes - Add business routes and middleware\n- Update search and database services\n- Improve health check implementation\n- Update CI workflow configuration --- .github/workflows/ci.yml | 134 +----- src/app.ts | 44 +- src/config/env.ts | 119 ++--- src/lib/services/databaseService.ts | 208 +++----- src/lib/services/deepseekService.ts | 717 +++++++++++----------------- src/lib/services/healthCheck.ts | 55 +-- src/lib/services/searchService.ts | 216 +++++---- src/lib/supabase.ts | 39 +- src/lib/types.ts | 16 +- src/middleware/auth.ts | 47 ++ src/routes/api.ts | 163 +++++-- src/routes/business.ts | 413 ++++++++++++++++ src/routes/search.ts | 410 +++++++++++----- src/tests/testDeepseek.ts | 221 ++------- src/tests/testOllama.ts | 47 ++ 15 files changed, 1501 insertions(+), 1348 deletions(-) create mode 100644 src/middleware/auth.ts create mode 100644 src/routes/business.ts create mode 100644 src/tests/testOllama.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73ae6b5..07a9b12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,133 +1,29 @@ --- -name: CI/CD +name: CI on: push: - branches: [ main, develop ] + branches: [ main ] pull_request: - branches: [ main, develop ] + branches: [ main ] jobs: test: runs-on: ubuntu-latest - - services: - supabase: - image: supabase/postgres-meta:v0.68.0 - env: - POSTGRES_PASSWORD: postgres - POSTGRES_USER: postgres - POSTGRES_DB: postgres - ports: - - 5432:5432 - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 steps: - - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '20' - cache: 'npm' - - - name: Install dependencies - run: npm ci - - - name: Check code formatting - run: npm run format - - - name: Run tests with coverage - run: npm run test:coverage - env: - SUPABASE_URL: http://localhost:54321 - SUPABASE_KEY: test-key - OLLAMA_URL: http://localhost:11434 - SEARXNG_URL: http://localhost:8080 - NODE_ENV: test - CACHE_DURATION_DAYS: 7 - - - name: Upload coverage reports - uses: codecov/codecov-action@v4 - with: - token: ${{ secrets.CODECOV_TOKEN }} - files: ./coverage/lcov.info - fail_ci_if_error: true - - build: - needs: test - runs-on: ubuntu-latest - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop') + - uses: actions/checkout@v2 - steps: - - uses: actions/checkout@v4 + - name: Setup Node.js + uses: actions/setup-node@v2 + with: + node-version: '18' + + - name: Install dependencies + run: npm ci - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '20' - cache: 'npm' + - name: Run tests + run: npm test - - name: Install dependencies - run: npm ci - - - name: Build - run: npm run build - - - name: Upload build artifacts - uses: actions/upload-artifact@v4 - with: - name: dist - path: dist/ - - deploy-staging: - needs: build - runs-on: ubuntu-latest - if: github.event_name == 'push' && github.ref == 'refs/heads/develop' - environment: - name: staging - url: https://staging.example.com - - steps: - - uses: actions/checkout@v4 - - - name: Download build artifacts - uses: actions/download-artifact@v4 - with: - name: dist - path: dist/ - - - name: Deploy to staging - run: | - echo "Deploying to staging environment" - # Add your staging deployment commands here - env: - DEPLOY_KEY: ${{ secrets.DEPLOY_KEY }} - - deploy-production: - needs: build - runs-on: ubuntu-latest - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - environment: - name: production - url: https://example.com - - steps: - - uses: actions/checkout@v4 - - - name: Download build artifacts - uses: actions/download-artifact@v4 - with: - name: dist - path: dist/ - - - name: Deploy to production - run: | - echo "Deploying to production environment" - # Add your production deployment commands here - env: - DEPLOY_KEY: ${{ secrets.DEPLOY_KEY }} \ No newline at end of file + - name: Run type check + run: npm run build \ No newline at end of file diff --git a/src/app.ts b/src/app.ts index d443242..4ba5cef 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,48 +1,16 @@ import express from 'express'; import cors from 'cors'; -import path from 'path'; -import './config/env'; // Load environment variables first -import apiRoutes from './routes/api'; -import { HealthCheckService } from './lib/services/healthCheck'; +import searchRoutes from './routes/search'; +import businessRoutes from './routes/business'; const app = express(); -const port = process.env.PORT || 3000; // Middleware app.use(cors()); app.use(express.json()); -// API routes first -app.use('/api', apiRoutes); +// Routes +app.use('/api/search', searchRoutes); +app.use('/api/business', businessRoutes); -// Then static files -app.use(express.static(path.join(__dirname, '../public'))); - -// Finally, catch-all route for SPA -app.get('*', (req, res) => { - res.sendFile(path.join(__dirname, '../public/index.html')); -}); - -// Start server with health checks -async function startServer() { - console.log('\n๐Ÿ” Checking required services...'); - - const ollamaStatus = await HealthCheckService.checkOllama(); - const searxngStatus = await HealthCheckService.checkSearxNG(); - const supabaseStatus = await HealthCheckService.checkSupabase(); - - console.log('\n๐Ÿ“Š Service Status:'); - console.log('- Ollama:', ollamaStatus ? 'โœ… Running' : 'โŒ Not Running'); - console.log('- SearxNG:', searxngStatus ? 'โœ… Running' : 'โŒ Not Running'); - console.log('- Supabase:', supabaseStatus ? 'โœ… Connected' : 'โŒ Not Connected'); - - app.listen(port, () => { - console.log(`\n๐Ÿš€ Server running at http://localhost:${port}`); - console.log('-------------------------------------------'); - }); -} - -startServer().catch(error => { - console.error('Failed to start server:', error); - process.exit(1); -}); +export default app; diff --git a/src/config/env.ts b/src/config/env.ts index 3b4f652..11510ae 100644 --- a/src/config/env.ts +++ b/src/config/env.ts @@ -1,89 +1,40 @@ -import { config } from 'dotenv'; -import { z } from 'zod'; +import dotenv from 'dotenv'; -config(); +// Load environment variables +dotenv.config(); -// Define the environment schema -const envSchema = z.object({ - PORT: z.string().default('3000'), - NODE_ENV: z.string().default('development'), - SUPABASE_URL: z.string(), - SUPABASE_KEY: z.string(), - OLLAMA_URL: z.string().default('http://localhost:11434'), - OLLAMA_MODEL: z.string().default('llama2'), - SEARXNG_URL: z.string().default('http://localhost:4000'), - SEARXNG_INSTANCES: z.string().default('["http://localhost:4000"]'), - MAX_RESULTS_PER_QUERY: z.string().default('50'), - CACHE_DURATION_HOURS: z.string().default('24'), - CACHE_DURATION_DAYS: z.string().default('7'), - HUGGING_FACE_API_KEY: z.string({ - required_error: "HUGGING_FACE_API_KEY is required in .env" - }) -}); +// Environment configuration +const env = { + // Supabase Configuration + SUPABASE_URL: process.env.SUPABASE_URL || '', + SUPABASE_KEY: process.env.SUPABASE_KEY || '', -// Define the final environment type -export interface EnvConfig { - PORT: string; - NODE_ENV: string; - searxng: { - currentUrl: string; - instances: string[]; - }; - ollama: { - url: string; - model: string; - }; - supabase: { - url: string; - anonKey: string; - }; - cache: { - maxResultsPerQuery: number; - durationHours: number; - durationDays: number; - }; - ai: { - model: string; - temperature: number; - maxTokens: number; - batchSize: number; - }; - huggingface: { - apiKey: string; - }; + // Server Configuration + PORT: parseInt(process.env.PORT || '3001', 10), + NODE_ENV: process.env.NODE_ENV || 'development', + + // Search Configuration + MAX_RESULTS_PER_QUERY: parseInt(process.env.MAX_RESULTS_PER_QUERY || '50', 10), + CACHE_DURATION_HOURS: parseInt(process.env.CACHE_DURATION_HOURS || '24', 10), + CACHE_DURATION_DAYS: parseInt(process.env.CACHE_DURATION_DAYS || '7', 10), + + // SearxNG Configuration + SEARXNG_URL: process.env.SEARXNG_URL || 'http://localhost:4000', + + // Ollama Configuration + OLLAMA_URL: process.env.OLLAMA_URL || 'http://localhost:11434', + OLLAMA_MODEL: process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b', + + // Hugging Face Configuration + HUGGING_FACE_API_KEY: process.env.HUGGING_FACE_API_KEY || '' +}; + +// Validate required environment variables +const requiredEnvVars = ['SUPABASE_URL', 'SUPABASE_KEY', 'SEARXNG_URL']; +for (const envVar of requiredEnvVars) { + if (!env[envVar as keyof typeof env]) { + throw new Error(`Missing required environment variable: ${envVar}`); + } } -// Parse and transform the environment variables -const rawEnv = envSchema.parse(process.env); - -// Create the final environment object with parsed configurations -export const env: EnvConfig = { - PORT: rawEnv.PORT, - NODE_ENV: rawEnv.NODE_ENV, - searxng: { - currentUrl: rawEnv.SEARXNG_URL, - instances: JSON.parse(rawEnv.SEARXNG_INSTANCES) - }, - ollama: { - url: rawEnv.OLLAMA_URL, - model: rawEnv.OLLAMA_MODEL - }, - supabase: { - url: rawEnv.SUPABASE_URL, - anonKey: rawEnv.SUPABASE_KEY - }, - cache: { - maxResultsPerQuery: parseInt(rawEnv.MAX_RESULTS_PER_QUERY), - durationHours: parseInt(rawEnv.CACHE_DURATION_HOURS), - durationDays: parseInt(rawEnv.CACHE_DURATION_DAYS) - }, - ai: { - model: 'deepseek-ai/deepseek-coder-6.7b-instruct', - temperature: 0.7, - maxTokens: 512, - batchSize: 3 - }, - huggingface: { - apiKey: rawEnv.HUGGING_FACE_API_KEY - } -}; \ No newline at end of file +export { env }; \ No newline at end of file diff --git a/src/lib/services/databaseService.ts b/src/lib/services/databaseService.ts index 8ffb3da..0b2a868 100644 --- a/src/lib/services/databaseService.ts +++ b/src/lib/services/databaseService.ts @@ -1,164 +1,80 @@ -import { createClient, SupabaseClient } from '@supabase/supabase-js'; -import { env } from '../../config/env'; -import { BusinessData } from '../types'; -import { generateBusinessId, extractPlaceIdFromUrl } from '../utils'; +import { createClient } from '@supabase/supabase-js'; +import { Business } from '../types'; +import env from '../../config/env'; + +interface PartialBusiness { + name: string; + address: string; + phone: string; + description: string; + website?: string; + rating?: number; + source?: string; + location?: { + lat: number; + lng: number; + }; +} export class DatabaseService { - private supabase: SupabaseClient; - + private supabase; + constructor() { - this.supabase = createClient( - env.supabase.url, - env.supabase.anonKey, - { - auth: { - autoRefreshToken: true, - persistSession: true - } - } - ); + this.supabase = createClient(env.SUPABASE_URL, env.SUPABASE_KEY); } - async searchBusinesses(query: string, location: string): Promise { - try { - const { data, error } = await this.supabase - .from('businesses') - .select('*') - .or( - `name.ilike.%${query}%,` + - `description.ilike.%${query}%` - ) - .ilike('address', `%${location}%`) - .order('search_count', { ascending: false }) - .limit(env.cache.maxResultsPerQuery); - - if (error) { - console.error('Error searching businesses:', error); - throw error; - } - - console.log(`Found ${data?.length || 0} businesses in database`); - return data || []; - } catch (error) { - console.error('Error searching businesses:', error); - return []; - } - } - - async saveBusiness(business: Partial): Promise { - const id = generateBusinessId({ - title: business.name || '', - url: business.website, - phone: business.phone, - address: business.address - }); - - const { error } = await this.supabase + async saveBusiness(business: PartialBusiness): Promise { + const { data, error } = await this.supabase .from('businesses') .upsert({ - id, name: business.name, - phone: business.phone, - email: business.email, address: business.address, - rating: business.rating, - website: business.website, - logo: business.logo, - source: business.source, + phone: business.phone, description: business.description, - latitude: business.location?.lat, - longitude: business.location?.lng, - place_id: business.website ? extractPlaceIdFromUrl(business.website) : null, - search_count: 1 - }, { - onConflict: 'id', - ignoreDuplicates: false - }); - - if (error) { - console.error('Error saving business:', error); - throw error; - } - } - - async incrementSearchCount(id: string): Promise { - const { error } = await this.supabase - .from('businesses') - .update({ - search_count: this.supabase.rpc('increment'), - last_updated: new Date().toISOString() + website: business.website, + source: business.source || 'deepseek', + rating: business.rating || 4.5, + location: business.location ? `(${business.location.lng},${business.location.lat})` : '(0,0)' }) - .eq('id', id); - - if (error) { - console.error('Error incrementing search count:', error); - throw error; - } - } - - async saveSearch(query: string, location: string, resultsCount: number): Promise { - const { error } = await this.supabase - .from('searches') - .insert([{ - query, - location, - results_count: resultsCount, - timestamp: new Date().toISOString() - }]); - - if (error) { - console.error('Error saving search:', error); - throw error; - } - } - - async getFromCache(key: string): Promise { - const { data, error } = await this.supabase - .from('cache') - .select('value') - .eq('key', key) - .gt('expires_at', new Date().toISOString()) + .select() .single(); if (error) { - if (error.code !== 'PGRST116') { // Not found error - console.error('Error getting from cache:', error); - } + console.error('Error saving business:', error); + throw new Error('Failed to save business'); + } + + return data; + } + + async findBusinessesByQuery(query: string, location: string): Promise { + const { data, error } = await this.supabase + .from('businesses') + .select('*') + .or(`name.ilike.%${query}%,description.ilike.%${query}%`) + .ilike('address', `%${location}%`) + .order('rating', { ascending: false }); + + if (error) { + console.error('Error finding businesses:', error); + throw new Error('Failed to find businesses'); + } + + return data || []; + } + + async getBusinessById(id: string): Promise { + const { data, error } = await this.supabase + .from('businesses') + .select('*') + .eq('id', id) + .single(); + + if (error) { + console.error('Error getting business:', error); return null; } - return data?.value; + return data; } - - async saveToCache(key: string, value: any, expiresIn: number): Promise { - const { error } = await this.supabase - .from('cache') - .upsert({ - key, - value, - expires_at: new Date(Date.now() + expiresIn).toISOString() - }); - - if (error) { - console.error('Error saving to cache:', error); - throw error; - } - } - - async clearCache(pattern?: string): Promise { - try { - const query = pattern ? - 'DELETE FROM cache WHERE key LIKE $1' : - 'DELETE FROM cache'; - - await this.supabase - .from('cache') - .delete() - .or(pattern ? `key LIKE $1` : ''); - } catch (error) { - console.error('Error clearing cache:', error); - } - } -} - -export const db = new DatabaseService(); \ No newline at end of file +} \ No newline at end of file diff --git a/src/lib/services/deepseekService.ts b/src/lib/services/deepseekService.ts index 4ffe711..019d972 100644 --- a/src/lib/services/deepseekService.ts +++ b/src/lib/services/deepseekService.ts @@ -1,460 +1,285 @@ import axios from 'axios'; -import { env } from '../../config/env'; +import EventEmitter from 'events'; import { Business } from '../types'; -export class DeepSeekService { - private static OLLAMA_URL = 'http://localhost:11434/api/generate'; - private static MODEL_NAME = 'qwen2:0.5b'; - private static MAX_ATTEMPTS = 3; // Prevent infinite loops - - private static async retryWithBackoff(fn: () => Promise, retries = 5) { - for (let i = 0; i < retries; i++) { - try { - return await fn(); - } catch (error) { - if (i === retries - 1) throw error; - - // Longer backoff for timeouts - const isTimeout = axios.isAxiosError(error) && error.code === 'ECONNABORTED'; - const delay = isTimeout ? - Math.pow(2, i) * 5000 : // 5s, 10s, 20s, 40s, 80s for timeouts - Math.pow(2, i) * 1000; // 1s, 2s, 4s, 8s, 16s for other errors - - console.log(`Retry ${i + 1}/${retries} after ${delay/1000}s...`); - await new Promise(resolve => setTimeout(resolve, delay)); - } - } - } - - private static cleanAddress(address: string): string { - // Remove marketing and extra info first - let cleaned = address - .replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') // Remove emojis - .replace(/(?:GET|ORDER|SCHEDULE|CONTACT|DIRECTIONS).*?[:!\n]/i, '') // Remove action words - .replace(/\([^)]*\)/g, '') // Remove parenthetical info - .replace(/(?:Next|Behind|Inside|Near).*$/im, '') // Remove location hints - .split(/[\n\r]+/) // Split into lines - .map(line => line.trim()) - .filter(Boolean); // Remove empty lines - - // Try to find the line with street address - for (const line of cleaned) { - // Common address patterns - const patterns = [ - // Handle suite/unit in street address - /(\d+[^,]+?(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i, - - // Basic format - /(\d+[^,]+?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i, - - // No commas - /(\d+[^,]+?)\s+([^,]+?)\s+(?:CO|Colorado|COLORADO)\s+(\d{5})/i, - ]; - - for (const pattern of patterns) { - const match = line.match(pattern); - if (match) { - const [_, street, city, zip] = match; - - // Clean and capitalize street address - const cleanedStreet = street - .replace(/\s+/g, ' ') - .replace(/(\d+)/, '$1 ') // Add space after number - .split(' ') - .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) - .join(' '); - - // Capitalize city - const cleanedCity = city.trim() - .split(' ') - .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) - .join(' '); - - return `${cleanedStreet}, ${cleanedCity}, CO ${zip}`; - } - } - } - - // If no match found, try to extract components - const streetLine = cleaned.find(line => /\d+/.test(line)); - if (streetLine) { - const streetMatch = streetLine.match(/(\d+[^,\n]+?)(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?/i); - const zipMatch = cleaned.join(' ').match(/\b(\d{5})\b/); - - if (streetMatch && zipMatch) { - const street = streetMatch[0].trim(); - const zip = zipMatch[1]; - - return `${street}, Denver, CO ${zip}`; - } - } - - return ''; - } - - private static manualClean(business: Partial): Partial { - const cleaned = { ...business }; - - // Clean address - if (cleaned.address) { - const cleanedAddress = this.cleanAddress(cleaned.address); - if (cleanedAddress) { - cleaned.address = cleanedAddress; - } - } - - // Extract business type first - const businessType = this.detectBusinessType(cleaned.name || ''); - - // Clean name while preserving core identity - if (cleaned.name) { - cleaned.name = cleaned.name - // Remove emojis and special characters - .replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') - // Remove bracketed content but preserve important terms - .replace(/\s*[\[\({](?!(?:BMW|Mercedes|Audi|specialist|certified)).*?[\]\)}]\s*/gi, ' ') - // Remove business suffixes - .replace(/\b(?:LLC|Inc|Corp|Ltd|DBA|Est\.|Since|P\.?C\.?)\b\.?\s*\d*/gi, '') - // Clean up and normalize - .replace(/[^\w\s&'-]/g, ' ') - .replace(/\s+/g, ' ') - .trim() - .replace(/^THE\s+/i, ''); // Remove leading "THE" - } - - // Clean phone - handle multiple numbers and formats - if (cleaned.phone) { - // Remove emojis and special characters first - const cleanPhone = cleaned.phone - .replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') - .replace(/[^\d]/g, ''); - - const phoneNumbers = cleanPhone.match(/\d{10,}/g); - if (phoneNumbers?.[0]) { - const mainNumber = phoneNumbers[0].slice(0, 10); // Ensure exactly 10 digits - cleaned.phone = `(${mainNumber.slice(0,3)}) ${mainNumber.slice(3,6)}-${mainNumber.slice(6,10)}`; - } - } - - // Clean email - handle multiple emails and formats - if (cleaned.email) { - const emailMatch = cleaned.email.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/); - if (emailMatch?.[1]) { - cleaned.email = emailMatch[1]; - } - } - - // Improved description cleaning - if (cleaned.description) { - const coreDescription = this.extractCoreDescription(cleaned.description, businessType); - cleaned.description = coreDescription; - } - - return cleaned; - } - - private static detectBusinessType(name: string): string { - const types = { - auto: /\b(?:auto|car|vehicle|BMW|Audi|Mercedes|mechanic|repair|service center)\b/i, - dental: /\b(?:dental|dentist|orthodontic|smile|tooth|teeth)\b/i, - coffee: /\b(?:coffee|cafe|espresso|roaster|brew)\b/i, - plumbing: /\b(?:plumb|plumbing|rooter|drain|pipe)\b/i, - restaurant: /\b(?:restaurant|grill|cuisine|bistro|kitchen)\b/i, - }; - - for (const [type, pattern] of Object.entries(types)) { - if (pattern.test(name)) return type; - } - return 'business'; - } - - private static extractCoreDescription(description: string, businessType: string): string { - // Remove all marketing and formatting first - let cleaned = description - .replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') - .replace(/\$+\s*[^\s]*\s*(off|special|offer|deal|save|discount|price|cost|free)/gi, '') - .replace(/\b(?:call|email|visit|contact|text|www\.|http|@|book|schedule|appointment)\b.*$/gi, '') - .replace(/#\w+/g, '') - .replace(/\s+/g, ' ') - .trim(); - - // Extract relevant information based on business type - const typePatterns: { [key: string]: RegExp[] } = { - auto: [ - /(?:specialist|specializing)\s+in\s+[^.]+/i, - /(?:certified|ASE)[^.]+mechanic[^.]+/i, - /(?:auto|car|vehicle)\s+(?:service|repair)[^.]+/i - ], - dental: [ - /(?:dental|orthodontic)\s+(?:care|services)[^.]+/i, - /(?:family|cosmetic|general)\s+dentistry[^.]+/i, - /state-of-the-art\s+facility[^.]+/i - ], - coffee: [ - /(?:coffee|espresso|pastry|cafe)[^.]+/i, - /(?:organic|fair-trade|fresh)[^.]+/i, - /(?:local|favorite|community)[^.]+coffee[^.]+/i - ], - plumbing: [ - /(?:plumbing|drain|pipe)\s+(?:service|repair)[^.]+/i, - /(?:professional|expert|master)\s+plumb[^.]+/i, - /(?:residential|commercial)\s+plumbing[^.]+/i - ] - }; - - const relevantPhrases = typePatterns[businessType]?.map(pattern => { - const match = cleaned.match(pattern); - return match ? match[0] : ''; - }).filter(Boolean) || []; - - if (relevantPhrases.length > 0) { - return relevantPhrases.join('. '); - } - - // Fallback to generic description - return `Professional ${businessType} services in Denver area`; - } - - private static sanitizeJsonResponse(response: string): string { - return response - // Remove emojis - .replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') - // Remove control characters - .replace(/[\u0000-\u001F\u007F-\u009F]/g, '') - // Clean up newlines and spaces - .replace(/\r?\n\s*/g, ' ') - .replace(/\s+/g, ' ') - .trim(); - } - - static async cleanBusinessData(business: Business, attempt = 0): Promise { - if (attempt >= this.MAX_ATTEMPTS) { - console.log('Max cleaning attempts reached, applying manual cleaning...'); - return { - ...business, - ...this.manualClean(business) - }; - } - - // Detect business type first - const businessType = this.detectBusinessType(business.name || ''); - - const requestId = Math.random().toString(36).substring(7); - const prompt = `<|im_start|>system -You are a data cleaning expert. Clean the business data while preserving its core identity and type. -Request ID: ${requestId} // Force uniqueness -IMPORTANT: Return ONLY plain text without emojis or special characters. -<|im_end|> -<|im_start|>user -Clean this ${businessType} business data by following these rules exactly: - -Input Business: -${JSON.stringify(business, null, 2)} - -Cleaning Rules: -1. NAME: Remove brackets/braces but preserve core business identity -2. ADDRESS: Format as "street, city, state zip" using state abbreviations -3. PHONE: Extract and format primary phone as "(XXX) XXX-XXXX" -4. EMAIL: Remove markdown/mailto formatting but keep actual email -5. DESCRIPTION: Keep core business info but remove: - - ALL emojis and special characters (return plain text only) - - Prices and special offers - - Contact information - - Marketing language - - Social media elements - -Return ONLY clean JSON with the original business identity preserved: -{ - "business_info": { - "name": "Keep original business name without formatting", - "address": "Keep original address, properly formatted", - "phone": "Keep original phone number, properly formatted", - "email": "Keep original email without formatting", - "description": "Keep original business description without marketing" - } +interface PartialBusiness { + name: string; + address: string; + phone: string; + description: string; + website?: string; + rating?: number; } -<|im_end|>`; - const response = await this.chat([{ - role: 'user', - content: prompt - }]); - - try { - const jsonMatch = response.match(/\{[\s\S]*?\}\s*$/); - if (!jsonMatch) { - throw new Error('No JSON found in response'); - } - - const sanitizedJson = this.sanitizeJsonResponse(jsonMatch[0]); - const parsed = JSON.parse(sanitizedJson); - const cleaned = { - ...business, - ...parsed.business_info - }; - - // Validate and handle type mismatches more strictly - const validationIssues = this.validateCleanedData(cleaned, business); - - if (validationIssues.length > 0) { - console.log(`Attempt ${attempt + 1}: Validation issues:`, validationIssues.join(', ')); - - // If there's a business type mismatch, go straight to manual cleaning - if (validationIssues.some(issue => issue.includes('Business type mismatch'))) { - console.log('Business type mismatch detected, applying manual cleaning...'); - return { - ...business, - ...this.manualClean(business) - }; - } - - // For other validation issues, try again - return this.cleanBusinessData(cleaned, attempt + 1); - } - - return cleaned; - } catch (error) { - console.error('Failed to parse response:', error); - console.log('Raw response:', response); - - // Try to sanitize and parse the whole response - try { - const sanitized = this.sanitizeJsonResponse(response); - const fallback = this.parseResponse(sanitized); - return this.cleanBusinessData({ ...business, ...fallback }, attempt + 1); - } catch (parseError) { - console.error('Failed to parse sanitized response:', parseError); - return this.cleanBusinessData({ ...business, ...this.manualClean(business) }, attempt + 1); - } - } - } - - private static validateCleanedData(business: Partial, originalBusiness: Business): string[] { - const issues: string[] = []; - - // Stricter business type validation - const originalType = this.detectBusinessType(originalBusiness.name || ''); - const cleanedType = this.detectBusinessType(business.name || ''); - - if (originalType !== 'business') { - if (cleanedType !== originalType) { - issues.push(`Business type mismatch: expected ${originalType}, got ${cleanedType}`); - } - - // Verify core identity is preserved - const originalKeywords = originalBusiness.name?.toLowerCase().split(/\W+/).filter(Boolean) || []; - const cleanedKeywords = business.name?.toLowerCase().split(/\W+/).filter(Boolean) || []; - - const significantKeywords = originalKeywords.filter(word => - !['the', 'and', 'llc', 'inc', 'corp', 'ltd', 'dba', 'est'].includes(word) - ); - - const missingKeywords = significantKeywords.filter(word => - !cleanedKeywords.some(cleaned => cleaned.includes(word)) - ); - - if (missingKeywords.length > 0) { - issues.push(`Core business identity lost: missing ${missingKeywords.join(', ')}`); - } - } - - if (business.name?.includes('[') || business.name?.includes(']')) { - issues.push('Name contains brackets'); - } - - if (!business.address?.match(/^\d+[^,]+,\s*[^,]+,\s*[A-Z]{2}\s+\d{5}$/)) { - const cleanedAddress = this.cleanAddress(business.address || ''); - if (cleanedAddress) { - business.address = cleanedAddress; - } else { - issues.push('Address format incorrect'); - } - } - - if (!business.phone?.match(/^\(\d{3}\) \d{3}-\d{4}$/)) { - issues.push('Phone format incorrect'); - } - - if (business.email?.includes('[') || business.email?.includes('mailto:')) { - issues.push('Email contains markdown/mailto'); - } - - if (business.description?.match(/\$|\b(?:call|email|visit|contact)\b/i)) { - issues.push('Description contains pricing or contact info'); - } - - return issues; - } - - private static async chat(messages: { role: string, content: string }[]) { - return this.retryWithBackoff(async () => { - try { - const response = await axios.post( - this.OLLAMA_URL, - { - model: this.MODEL_NAME, - prompt: messages[0].content, - stream: false, - options: { - temperature: 0.7, // Add some randomness - num_predict: 2048, - stop: ["<|im_end|>", "\n\n"], - top_k: 40, // Allow more variety - top_p: 0.9, // Allow more variety - seed: Date.now(), // Force different results each time - reset: true // Reset context window - } - }, - { - headers: { - 'Content-Type': 'application/json' - }, - timeout: 30000 - } - ); - - return response.data.response; - } catch (error) { - if (axios.isAxiosError(error)) { - if (error.code === 'ECONNREFUSED') { - throw new Error('Ollama server not running'); - } - if (error.response?.status === 404) { - throw new Error(`Model ${this.MODEL_NAME} not found. Run: ollama pull ${this.MODEL_NAME}`); - } - } - throw error; - } +export class DeepSeekService extends EventEmitter { + private readonly baseUrl: string; + private readonly model: string; + + constructor() { + super(); + this.baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434'; + this.model = process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b'; + console.log('DeepSeekService initialized with:', { + baseUrl: this.baseUrl, + model: this.model }); } - private static parseResponse(response: string) { - const lines = response.split('\n'); - const cleaned: Partial = {}; - - for (const line of lines) { - const [field, ...values] = line.split(':'); - const value = values.join(':').trim(); + async streamChat(messages: any[], onResult: (business: PartialBusiness) => Promise): Promise { + try { + console.log('\nStarting streaming chat request...'); - switch (field.toLowerCase().trim()) { - case 'name': - cleaned.name = value; - break; - case 'address': - cleaned.address = value; - break; - case 'phone': - cleaned.phone = value; - break; - case 'email': - cleaned.email = value; - break; - case 'description': - cleaned.description = value; - break; + // Enhanced system prompt with more explicit instructions + const enhancedMessages = [ + { + role: "system", + content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format. + +When asked about businesses in a location, return business listings one at a time in this exact JSON format: + +\`\`\`json +{ + "name": "Example Plumbing Co", + "address": "123 Main St, Denver, CO 80202", + "phone": "(303) 555-0123", + "description": "Licensed plumbing contractor specializing in residential and commercial services", + "website": "https://exampleplumbing.com", + "rating": 4.8 +} +\`\`\` + +Important rules: +1. Return ONE business at a time in JSON format +2. Generate realistic but fictional business data +3. Use proper formatting for phone numbers and addresses +4. Include ratings from 1-5 stars (can use decimals) +5. When sorting by rating, return highest rated first +6. Make each business unique with different names, addresses, and phone numbers +7. Keep descriptions concise and professional +8. Use realistic website URLs based on business names +9. Return exactly the number of businesses requested` + }, + ...messages + ]; + + console.log('Sending streaming request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2)); + + const response = await axios.post(`${this.baseUrl}/api/chat`, { + model: this.model, + messages: enhancedMessages, + stream: true, + temperature: 0.7, + max_tokens: 1000, + system: "You are a business search assistant that returns one business at a time in JSON format." + }, { + responseType: 'stream' + }); + + let currentJson = ''; + response.data.on('data', async (chunk: Buffer) => { + const text = chunk.toString(); + currentJson += text; + + // Try to find and process complete JSON objects + try { + const business = await this.extractNextBusiness(currentJson); + if (business) { + currentJson = ''; // Reset for next business + await onResult(business); + } + } catch (error) { + // Continue collecting more data if JSON is incomplete + console.debug('Collecting more data for complete JSON'); + } + }); + + return new Promise((resolve, reject) => { + response.data.on('end', () => resolve()); + response.data.on('error', (error: Error) => reject(error)); + }); + + } catch (error) { + console.error('\nDeepseek streaming chat error:', error); + if (error instanceof Error) { + console.error('Error stack:', error.stack); + throw new Error(`AI model streaming error: ${error.message}`); + } + throw new Error('Failed to get streaming response from AI model'); + } + } + + private async extractNextBusiness(text: string): Promise { + // Try to find a complete JSON object + const jsonMatch = text.match(/\{[^{]*\}/); + if (!jsonMatch) return null; + + try { + const jsonStr = jsonMatch[0]; + const business = JSON.parse(jsonStr); + + // Validate required fields + if (!business.name || !business.address || !business.phone || !business.description) { + return null; + } + + return business; + } catch (e) { + return null; + } + } + + async chat(messages: any[]): Promise { + try { + console.log('\nStarting chat request...'); + + // Enhanced system prompt with more explicit instructions + const enhancedMessages = [ + { + role: "system", + content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format. + +When asked about businesses in a location, return business listings in this exact JSON format, with no additional text or comments: + +\`\`\`json +[ + { + "name": "Example Plumbing Co", + "address": "123 Main St, Denver, CO 80202", + "phone": "(303) 555-0123", + "description": "Licensed plumbing contractor specializing in residential and commercial services", + "website": "https://exampleplumbing.com", + "rating": 4.8 + } +] +\`\`\` + +Important rules: +1. Return ONLY the JSON array inside code blocks - no explanations or comments +2. Generate realistic but fictional business data +3. Use proper formatting for phone numbers (e.g., "(303) 555-XXXX") and addresses +4. Include ratings from 1-5 stars (can use decimals, e.g., 4.8) +5. When sorting by rating, sort from highest to lowest rating +6. When asked for a specific number of results, always return exactly that many +7. Make each business unique with different names, addresses, and phone numbers +8. Keep descriptions concise and professional +9. Use realistic website URLs based on business names` + }, + ...messages + ]; + + console.log('Sending request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2)); + + const response = await axios.post(`${this.baseUrl}/api/chat`, { + model: this.model, + messages: enhancedMessages, + stream: false, + temperature: 0.7, + max_tokens: 1000, + system: "You are a business search assistant that always responds with JSON data." + }); + + if (!response.data) { + throw new Error('Empty response from AI model'); + } + + console.log('\nRaw response data:', JSON.stringify(response.data, null, 2)); + + if (!response.data.message?.content) { + throw new Error('No content in AI model response'); + } + + console.log('\nParsing AI response...'); + const results = await this.sanitizeJsonResponse(response.data.message.content); + console.log('Parsed results:', JSON.stringify(results, null, 2)); + + return results; + + } catch (error) { + console.error('\nDeepseek chat error:', error); + if (error instanceof Error) { + console.error('Error stack:', error.stack); + throw new Error(`AI model error: ${error.message}`); + } + throw new Error('Failed to get response from AI model'); + } + } + + private async sanitizeJsonResponse(text: string): Promise { + console.log('Attempting to parse response:', text); + + // First try to find JSON blocks + const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/); + if (jsonBlockMatch) { + try { + const jsonStr = jsonBlockMatch[1].trim(); + console.log('Found JSON block:', jsonStr); + const parsed = JSON.parse(jsonStr); + return Array.isArray(parsed) ? parsed : [parsed]; + } catch (e) { + console.error('Failed to parse JSON block:', e); } } - return cleaned; + // Then try to find any JSON-like structure + const jsonPatterns = [ + /\[\s*\{[\s\S]*\}\s*\]/, // Array of objects + /\{[\s\S]*\}/ // Single object + ]; + + for (const pattern of jsonPatterns) { + const match = text.match(pattern); + if (match) { + try { + const jsonStr = match[0].trim(); + console.log('Found JSON pattern:', jsonStr); + const parsed = JSON.parse(jsonStr); + return Array.isArray(parsed) ? parsed : [parsed]; + } catch (e) { + console.error('Failed to parse JSON pattern:', e); + continue; + } + } + } + + // If no valid JSON found, try to extract structured data + try { + const extractedData = this.extractBusinessData(text); + if (extractedData) { + console.log('Extracted business data:', extractedData); + return [extractedData]; + } + } catch (e) { + console.error('Failed to extract business data:', e); + } + + throw new Error('No valid JSON or business information found in response'); + } + + private extractBusinessData(text: string): PartialBusiness { + // Extract business information using regex patterns + const businessInfo: PartialBusiness = { + name: this.extractField(text, 'name', '[^"\\n]+') || 'Unknown Business', + address: this.extractField(text, 'address', '[^"\\n]+') || 'Address not available', + phone: this.extractField(text, 'phone', '[^"\\n]+') || 'Phone not available', + description: this.extractField(text, 'description', '[^"\\n]+') || 'No description available' + }; + + const website = this.extractField(text, 'website', '[^"\\n]+'); + if (website) { + businessInfo.website = website; + } + + const rating = this.extractField(text, 'rating', '[0-9.]+'); + if (rating) { + businessInfo.rating = parseFloat(rating); + } + + return businessInfo; + } + + private extractField(text: string, field: string, pattern: string): string { + const regex = new RegExp(`"?${field}"?\\s*[:=]\\s*"?(${pattern})"?`, 'i'); + const match = text.match(regex); + return match ? match[1].trim() : ''; } } \ No newline at end of file diff --git a/src/lib/services/healthCheck.ts b/src/lib/services/healthCheck.ts index f53f0d0..425c019 100644 --- a/src/lib/services/healthCheck.ts +++ b/src/lib/services/healthCheck.ts @@ -1,53 +1,40 @@ import axios from 'axios'; -import { env } from '../../config/env'; import { supabase } from '../supabase'; +import { env } from '../../config/env'; export class HealthCheckService { - static async checkOllama(): Promise { + private static async checkSupabase(): Promise { try { - const response = await axios.get(`${env.ollama.url}/api/tags`); - return response.status === 200; + const { data, error } = await supabase.from('searches').select('count'); + return !error; } catch (error) { - console.error('Ollama health check failed:', error); + console.error('Supabase health check failed:', error); return false; } } - static async checkSearxNG(): Promise { + private static async checkSearx(): Promise { try { - const response = await axios.get(`${env.searxng.currentUrl}/config`); + const response = await axios.get(env.SEARXNG_URL); return response.status === 200; } catch (error) { - try { - const response = await axios.get(`${env.searxng.instances[0]}/config`); - return response.status === 200; - } catch (fallbackError) { - console.error('SearxNG health check failed:', error); - return false; - } + console.error('SearxNG health check failed:', error); + return false; } } - static async checkSupabase(): Promise { - try { - console.log('Checking Supabase connection...'); - console.log('URL:', env.supabase.url); + public static async checkHealth(): Promise<{ + supabase: boolean; + searx: boolean; + }> { + const [supabaseHealth, searxHealth] = await Promise.all([ + this.checkSupabase(), + this.checkSearx() + ]); - // Just check if we can connect and query, don't care about results - const { error } = await supabase - .from('businesses') - .select('count', { count: 'planned', head: true }); - - if (error) { - console.error('Supabase query error:', error); - return false; - } - - console.log('Supabase connection successful'); - return true; - } catch (error) { - console.error('Supabase connection failed:', error); - return false; - } + return { + supabase: supabaseHealth, + searx: searxHealth + }; } } \ No newline at end of file diff --git a/src/lib/services/searchService.ts b/src/lib/services/searchService.ts index 5f65d07..ea4b4d3 100644 --- a/src/lib/services/searchService.ts +++ b/src/lib/services/searchService.ts @@ -1,97 +1,135 @@ +import EventEmitter from 'events'; import { DeepSeekService } from './deepseekService'; -import { createClient } from '@supabase/supabase-js'; +import { DatabaseService } from './databaseService'; import { Business } from '../types'; -export class SearchService { - private supabase; - private deepseek; - - constructor() { - this.supabase = createClient( - process.env.SUPABASE_URL!, - process.env.SUPABASE_KEY! - ); - this.deepseek = DeepSeekService; - } - - async search(query: string, location: string): Promise { - if (!query || !location) { - throw new Error('Query and location are required'); - } - - // Check cache first - const cacheKey = `${query}_${location}`.toLowerCase(); - const { data: cacheData } = await this.supabase - .from('cache') - .select() - .eq('key', cacheKey) - .single(); - - if (cacheData && cacheData.value) { - return cacheData.value as Business[]; - } - - try { - // Perform search - const searchResults = await this.performSearch(query, location); - - // Cache results - await this.cacheResults(cacheKey, searchResults); - - return searchResults; - } catch (error: any) { - if (error.response?.status === 429) { - throw new Error('Rate limit exceeded'); - } - throw error; - } - } - - async getBusinessById(id: string): Promise { - const { data, error } = await this.supabase - .from('businesses') - .select() - .eq('id', id) - .single(); - - if (error || !data) { - return null; - } - - return data as Business; - } - - private async performSearch(query: string, location: string): Promise { - // Implementation would use DeepSeek service to perform search - // This is a placeholder implementation - const mockBusiness: Business = { - id: 'test_1', - name: "Denver's Best Plumbing", - address: "1234 Main Street, Denver, CO 80202", - phone: "(720) 555-1234", - email: "support@denverplumbing.com", - description: "Professional plumbing services", - source: 'test', - website: 'https://example.com', - rating: 4.8, - location: { lat: 39.7392, lng: -104.9903 }, - openingHours: [] +interface PartialBusiness { + name: string; + address: string; + phone: string; + description: string; + website?: string; + rating?: number; + source?: string; + location?: { + lat: number; + lng: number; }; +} - return [mockBusiness]; - } +export class SearchService extends EventEmitter { + private deepseekService: DeepSeekService; + private databaseService: DatabaseService; - private async cacheResults(key: string, results: Business[]): Promise { - const expiresAt = new Date(); - expiresAt.setDate(expiresAt.getDate() + Number(process.env.CACHE_DURATION_DAYS || 7)); + constructor() { + super(); + this.deepseekService = new DeepSeekService(); + this.databaseService = new DatabaseService(); + + this.deepseekService.on('progress', (data) => { + this.emit('progress', data); + }); + } - await this.supabase - .from('cache') - .insert([{ - key, - value: results, - created_at: new Date().toISOString(), - expires_at: expiresAt.toISOString() - }]); - } + async streamSearch(query: string, location: string, limit: number = 10): Promise { + try { + // First, try to find cached results in database + const cachedResults = await this.databaseService.findBusinessesByQuery(query, location); + if (cachedResults.length > 0) { + // Emit cached results one by one + for (const result of this.sortByRating(cachedResults).slice(0, limit)) { + this.emit('result', result); + await new Promise(resolve => setTimeout(resolve, 100)); // Small delay between results + } + this.emit('complete'); + return; + } + + // If no cached results, use DeepSeek to generate new results + const aiResults = await this.deepseekService.streamChat([{ + role: "user", + content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.` + }], async (business: PartialBusiness) => { + try { + // Extract lat/lng from address using a geocoding service + const coords = await this.geocodeAddress(business.address); + + // Save to database and emit result + const savedBusiness = await this.databaseService.saveBusiness({ + ...business, + source: 'deepseek', + location: coords || { + lat: 39.7392, // Denver's default coordinates + lng: -104.9903 + } + }); + + this.emit('result', savedBusiness); + } catch (error) { + console.error('Error processing business:', error); + this.emit('error', error); + } + }); + + this.emit('complete'); + + } catch (error) { + console.error('Search error:', error); + this.emit('error', error); + throw error; + } + } + + async search(query: string, location: string, limit: number = 10): Promise { + try { + // First, try to find cached results in database + const cachedResults = await this.databaseService.findBusinessesByQuery(query, location); + if (cachedResults.length > 0) { + return this.sortByRating(cachedResults).slice(0, limit); + } + + // If no cached results, use DeepSeek to generate new results + const aiResults = await this.deepseekService.chat([{ + role: "user", + content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.` + }]); + + // Save the results to database + const savedResults = await Promise.all( + (aiResults as PartialBusiness[]).map(async (business: PartialBusiness) => { + // Extract lat/lng from address using a geocoding service + const coords = await this.geocodeAddress(business.address); + + return this.databaseService.saveBusiness({ + ...business, + source: 'deepseek', + location: coords || { + lat: 39.7392, // Denver's default coordinates + lng: -104.9903 + } + }); + }) + ); + + return this.sortByRating(savedResults); + + } catch (error) { + console.error('Search error:', error); + throw error; + } + } + + private sortByRating(businesses: Business[]): Business[] { + return businesses.sort((a, b) => b.rating - a.rating); + } + + private async geocodeAddress(address: string): Promise<{ lat: number; lng: number } | null> { + // TODO: Implement real geocoding service + // For now, return null to use default coordinates + return null; + } + + async getBusinessById(id: string): Promise { + return this.databaseService.getBusinessById(id); + } } \ No newline at end of file diff --git a/src/lib/supabase.ts b/src/lib/supabase.ts index da1f26f..9bfbb4b 100644 --- a/src/lib/supabase.ts +++ b/src/lib/supabase.ts @@ -2,41 +2,34 @@ import { createClient } from '@supabase/supabase-js'; import { env } from '../config/env'; // Validate Supabase configuration -if (!env.supabase.url || !env.supabase.anonKey) { +if (!env.SUPABASE_URL || !env.SUPABASE_KEY) { throw new Error('Missing Supabase configuration'); } // Create Supabase client export const supabase = createClient( - env.supabase.url, - env.supabase.anonKey, + env.SUPABASE_URL, + env.SUPABASE_KEY, { auth: { autoRefreshToken: true, - persistSession: true + persistSession: true, + detectSessionInUrl: true } } ); -// Test the connection on startup -async function testConnection() { +// Test connection function +export async function testConnection() { try { - console.log('Checking Supabase connection...'); - console.log('URL:', env.supabase.url); - - const { error } = await supabase - .from('businesses') - .select('count', { count: 'planned', head: true }); - - if (error) { - console.error('โŒ Supabase initialization error:', error); - } else { - console.log('โœ… Supabase connection initialized successfully'); - } + console.log('Testing Supabase connection...'); + console.log('URL:', env.SUPABASE_URL); + const { data, error } = await supabase.from('searches').select('count'); + if (error) throw error; + console.log('Supabase connection successful'); + return true; } catch (error) { - console.error('โŒ Failed to initialize Supabase:', error); + console.error('Supabase connection failed:', error); + return false; } -} - -// Run the test -testConnection().catch(console.error); \ No newline at end of file +} \ No newline at end of file diff --git a/src/lib/types.ts b/src/lib/types.ts index a8cf346..0406bea 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -1,22 +1,16 @@ export interface Business { id: string; name: string; - phone?: string; - email?: string; - address?: string; - rating?: number; + address: string; + phone: string; + description: string; website?: string; - logo?: string; source: string; - description?: string; - location?: { + rating: number; + location: { lat: number; lng: number; }; - openingHours?: string[]; - services?: string[]; - reviewCount?: number; - hours?: string[]; } export type BusinessData = Business; \ No newline at end of file diff --git a/src/middleware/auth.ts b/src/middleware/auth.ts new file mode 100644 index 0000000..a20023d --- /dev/null +++ b/src/middleware/auth.ts @@ -0,0 +1,47 @@ +import { Request, Response, NextFunction } from 'express'; +import { supabase } from '../lib/supabase'; + +// Extend Express Request type to include user +declare global { + namespace Express { + interface Request { + user?: { + id: string; + email: string; + role: string; + }; + } + } +} + +export async function authenticateUser( + req: Request, + res: Response, + next: NextFunction +) { + try { + const authHeader = req.headers.authorization; + if (!authHeader) { + return res.status(401).json({ error: 'No authorization header' }); + } + + const token = authHeader.replace('Bearer ', ''); + const { data: { user }, error } = await supabase.auth.getUser(token); + + if (error || !user) { + return res.status(401).json({ error: 'Invalid token' }); + } + + // Add user info to request + req.user = { + id: user.id, + email: user.email!, + role: (user.app_metadata?.role as string) || 'user' + }; + + next(); + } catch (error) { + console.error('Authentication error:', error); + res.status(401).json({ error: 'Authentication failed' }); + } +} \ No newline at end of file diff --git a/src/routes/api.ts b/src/routes/api.ts index 0987c6d..2a119d9 100644 --- a/src/routes/api.ts +++ b/src/routes/api.ts @@ -1,61 +1,148 @@ import express from 'express'; import { SearchService } from '../lib/services/searchService'; +import { Business } from '../lib/types'; const router = express.Router(); const searchService = new SearchService(); // Error handling middleware for JSON parsing errors -router.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => { - if (err instanceof SyntaxError && 'body' in err) { - return res.status(400).json({ error: 'Invalid JSON' }); - } - next(); +router.use((err: Error, req: express.Request, res: express.Response, next: express.NextFunction) => { + if (err instanceof SyntaxError && 'body' in err) { + return res.status(400).json({ + success: false, + error: 'Invalid JSON' + }); + } + next(); }); -// Search endpoint -router.post('/search', async (req, res) => { - try { +// Business categories endpoint +router.get('/categories', (req, res) => { + const categories = [ + 'Restaurant', + 'Retail', + 'Service', + 'Healthcare', + 'Professional', + 'Entertainment', + 'Education', + 'Technology', + 'Manufacturing', + 'Construction', + 'Transportation', + 'Real Estate', + 'Financial', + 'Legal', + 'Other' + ]; + res.json(categories); +}); + +// Streaming search endpoint +router.post('/search/stream', (req, res) => { const { query, location } = req.body; if (!query || !location) { - return res.status(400).json({ - error: 'Query and location are required' - }); + return res.status(400).json({ + success: false, + error: 'Query and location are required' + }); } - const results = await searchService.search(query, location); - res.json({ results }); - } catch (error: any) { - if (error.response?.status === 429) { - return res.status(429).json({ - error: 'Rate limit exceeded' - }); - } + // Set headers for SSE + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Cache-Control', 'no-cache'); + res.setHeader('Connection', 'keep-alive'); - res.status(500).json({ - error: error.message || 'Internal server error' + // Send initial message + res.write('data: {"type":"start","message":"Starting search..."}\n\n'); + + // Create search service instance for this request + const search = new SearchService(); + + // Listen for individual results + search.on('result', (business: Business) => { + res.write(`data: {"type":"result","business":${JSON.stringify(business)}}\n\n`); + }); + + // Listen for progress updates + search.on('progress', (data: any) => { + res.write(`data: {"type":"progress","data":${JSON.stringify(data)}}\n\n`); + }); + + // Listen for completion + search.on('complete', () => { + res.write('data: {"type":"complete","message":"Search complete"}\n\n'); + res.end(); + }); + + // Listen for errors + search.on('error', (error: Error) => { + res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`); + res.end(); + }); + + // Start the search + search.streamSearch(query, location).catch(error => { + console.error('Search error:', error); + res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`); + res.end(); }); - } }); -// Get business details endpoint -router.get('/business/:id', async (req, res) => { - try { - const { id } = req.params; - const business = await searchService.getBusinessById(id); - - if (!business) { - return res.status(404).json({ - error: 'Business not found' - }); +// Regular search endpoint (non-streaming) +router.post('/search', async (req, res) => { + const { query, location } = req.body; + + if (!query || !location) { + return res.status(400).json({ + success: false, + error: 'Query and location are required' + }); } - res.json(business); - } catch (error: any) { - res.status(500).json({ - error: error.message || 'Internal server error' - }); - } + try { + const results = await searchService.search(query, location); + res.json({ + success: true, + results + }); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'An error occurred during search'; + console.error('Search error:', error); + res.status(500).json({ + success: false, + error: errorMessage + }); + } +}); + +// Get business by ID +router.get('/business/:id', async (req, res) => { + const { id } = req.params; + + try { + const business = await searchService.getBusinessById(id); + + if (!business) { + return res.status(404).json({ + success: false, + error: 'Business not found' + }); + } + + res.json({ + success: true, + business + }); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Failed to fetch business details'; + console.error('Error fetching business:', error); + res.status(500).json({ + success: false, + error: errorMessage + }); + } }); export default router; \ No newline at end of file diff --git a/src/routes/business.ts b/src/routes/business.ts new file mode 100644 index 0000000..c4c78fc --- /dev/null +++ b/src/routes/business.ts @@ -0,0 +1,413 @@ +import { Router } from 'express'; +import { z } from 'zod'; +import { supabase } from '../lib/supabase'; +import { authenticateUser } from '../middleware/auth'; + +const router = Router(); + +// Initialize database tables +async function initializeTables() { + try { + // Create businesses table if it doesn't exist + const { error: businessError } = await supabase.from('businesses').select('id').limit(1); + + if (businessError?.code === 'PGRST204') { + const { error } = await supabase.rpc('execute_sql', { + sql_string: ` + CREATE TABLE IF NOT EXISTS public.businesses ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + phone TEXT, + email TEXT, + address TEXT, + rating NUMERIC, + website TEXT, + description TEXT, + source TEXT, + logo TEXT, + latitude NUMERIC, + longitude NUMERIC, + last_updated TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()), + search_count INTEGER DEFAULT 1, + created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()), + place_id TEXT + ); + ` + }); + if (error) console.error('Error creating businesses table:', error); + } + + // Create business_profiles table if it doesn't exist + const { error: profileError } = await supabase.from('business_profiles').select('business_id').limit(1); + + if (profileError?.code === 'PGRST204') { + const { error } = await supabase.rpc('execute_sql', { + sql_string: ` + CREATE TABLE IF NOT EXISTS public.business_profiles ( + business_id TEXT PRIMARY KEY REFERENCES public.businesses(id), + claimed_by UUID REFERENCES auth.users(id), + claimed_at TIMESTAMP WITH TIME ZONE, + verification_status TEXT NOT NULL DEFAULT 'unverified', + social_links JSONB DEFAULT '{}', + hours_of_operation JSONB DEFAULT '{}', + additional_photos TEXT[] DEFAULT '{}', + tags TEXT[] DEFAULT '{}', + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT valid_verification_status CHECK (verification_status IN ('unverified', 'pending', 'verified', 'rejected')) + ); + + CREATE TABLE IF NOT EXISTS public.business_claims ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + business_id TEXT NOT NULL REFERENCES public.businesses(id), + user_id UUID NOT NULL REFERENCES auth.users(id), + status TEXT NOT NULL DEFAULT 'pending', + proof_documents TEXT[] DEFAULT '{}', + submitted_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + reviewed_at TIMESTAMP WITH TIME ZONE, + reviewed_by UUID REFERENCES auth.users(id), + notes TEXT, + CONSTRAINT valid_claim_status CHECK (status IN ('pending', 'approved', 'rejected')) + ); + + CREATE INDEX IF NOT EXISTS idx_business_profiles_claimed_by ON public.business_profiles(claimed_by); + CREATE INDEX IF NOT EXISTS idx_business_claims_business_id ON public.business_claims(business_id); + CREATE INDEX IF NOT EXISTS idx_business_claims_user_id ON public.business_claims(user_id); + CREATE INDEX IF NOT EXISTS idx_business_claims_status ON public.business_claims(status); + + ALTER TABLE public.business_profiles ENABLE ROW LEVEL SECURITY; + ALTER TABLE public.business_claims ENABLE ROW LEVEL SECURITY; + + DROP POLICY IF EXISTS "Public profiles are viewable by everyone" ON public.business_profiles; + CREATE POLICY "Public profiles are viewable by everyone" + ON public.business_profiles FOR SELECT + USING (true); + + DROP POLICY IF EXISTS "Profiles can be updated by verified owners" ON public.business_profiles; + CREATE POLICY "Profiles can be updated by verified owners" + ON public.business_profiles FOR UPDATE + USING (auth.uid() = claimed_by AND verification_status = 'verified'); + + DROP POLICY IF EXISTS "Users can view their own claims" ON public.business_claims; + CREATE POLICY "Users can view their own claims" + ON public.business_claims FOR SELECT + USING (auth.uid() = user_id); + + DROP POLICY IF EXISTS "Users can create claims" ON public.business_claims; + CREATE POLICY "Users can create claims" + ON public.business_claims FOR INSERT + WITH CHECK (auth.uid() = user_id); + + DROP POLICY IF EXISTS "Only admins can review claims" ON public.business_claims; + CREATE POLICY "Only admins can review claims" + ON public.business_claims FOR UPDATE + USING (EXISTS ( + SELECT 1 FROM auth.users + WHERE auth.uid() = id + AND raw_app_meta_data->>'role' = 'admin' + )); + ` + }); + if (error) console.error('Error creating profile tables:', error); + } + + // Insert test data + const { error: testDataError } = await supabase + .from('businesses') + .insert([ + { + id: 'test-business-1', + name: 'Test Coffee Shop', + phone: '303-555-0123', + email: 'contact@testcoffee.com', + address: '123 Test St, Denver, CO 80202', + rating: 4.5, + website: 'https://testcoffee.com', + description: 'A cozy coffee shop in downtown Denver serving artisanal coffee and pastries.', + source: 'manual' + } + ]) + .select() + .single(); + + if (testDataError) { + console.error('Error inserting test data:', testDataError); + } + + // Create test business profile + const { error: testProfileError } = await supabase + .from('business_profiles') + .insert([ + { + business_id: 'test-business-1', + verification_status: 'unverified', + social_links: { + facebook: 'https://facebook.com/testcoffee', + instagram: 'https://instagram.com/testcoffee' + }, + hours_of_operation: { + monday: ['7:00', '19:00'], + tuesday: ['7:00', '19:00'], + wednesday: ['7:00', '19:00'], + thursday: ['7:00', '19:00'], + friday: ['7:00', '20:00'], + saturday: ['8:00', '20:00'], + sunday: ['8:00', '18:00'] + }, + tags: ['coffee', 'pastries', 'breakfast', 'lunch'] + } + ]) + .select() + .single(); + + if (testProfileError) { + console.error('Error creating test profile:', testProfileError); + } + } catch (error) { + console.error('Error initializing tables:', error); + } +} + +// Call initialization on startup +initializeTables(); + +// Schema for business profile updates +const profileUpdateSchema = z.object({ + social_links: z.record(z.string()).optional(), + hours_of_operation: z.record(z.array(z.string())).optional(), + additional_photos: z.array(z.string()).optional(), + tags: z.array(z.string()).optional(), +}); + +// Schema for claim submissions +const claimSubmissionSchema = z.object({ + business_id: z.string(), + proof_documents: z.array(z.string()), + notes: z.string().optional(), +}); + +// Get business profile +router.get('/:businessId', async (req, res) => { + try { + const { businessId } = req.params; + + // Get business details and profile + const { data: business, error: businessError } = await supabase + .from('businesses') + .select(` + *, + business_profiles (*) + `) + .eq('id', businessId) + .single(); + + if (businessError) throw businessError; + if (!business) { + return res.status(404).json({ error: 'Business not found' }); + } + + res.json(business); + } catch (error) { + console.error('Error fetching business profile:', error); + res.status(500).json({ error: 'Failed to fetch business profile' }); + } +}); + +// Update business profile (requires authentication) +router.patch('/:businessId/profile', authenticateUser, async (req, res) => { + try { + const { businessId } = req.params; + if (!req.user) { + return res.status(401).json({ error: 'User not authenticated' }); + } + const userId = req.user.id; + const updates = profileUpdateSchema.parse(req.body); + + // Check if user owns this profile + const { data: profile } = await supabase + .from('business_profiles') + .select('claimed_by, verification_status') + .eq('business_id', businessId) + .single(); + + if (!profile || profile.claimed_by !== userId || profile.verification_status !== 'verified') { + return res.status(403).json({ error: 'Not authorized to update this profile' }); + } + + // Update profile + const { error: updateError } = await supabase + .from('business_profiles') + .update({ + ...updates, + updated_at: new Date().toISOString(), + }) + .eq('business_id', businessId); + + if (updateError) throw updateError; + + res.json({ message: 'Profile updated successfully' }); + } catch (error) { + console.error('Error updating business profile:', error); + res.status(500).json({ error: 'Failed to update profile' }); + } +}); + +// Submit a claim for a business +router.post('/claim', authenticateUser, async (req, res) => { + try { + if (!req.user) { + return res.status(401).json({ error: 'User not authenticated' }); + } + const userId = req.user.id; + const claim = claimSubmissionSchema.parse(req.body); + + // Check if business exists + const { data: business } = await supabase + .from('businesses') + .select('id') + .eq('id', claim.business_id) + .single(); + + if (!business) { + return res.status(404).json({ error: 'Business not found' }); + } + + // Check if business is already claimed + const { data: existingProfile } = await supabase + .from('business_profiles') + .select('claimed_by') + .eq('business_id', claim.business_id) + .single(); + + if (existingProfile?.claimed_by) { + return res.status(400).json({ error: 'Business is already claimed' }); + } + + // Check for existing pending claims + const { data: existingClaim } = await supabase + .from('business_claims') + .select('id') + .eq('business_id', claim.business_id) + .eq('status', 'pending') + .single(); + + if (existingClaim) { + return res.status(400).json({ error: 'A pending claim already exists for this business' }); + } + + // Create claim + const { error: claimError } = await supabase + .from('business_claims') + .insert({ + business_id: claim.business_id, + user_id: userId, + proof_documents: claim.proof_documents, + notes: claim.notes, + }); + + if (claimError) throw claimError; + + res.json({ message: 'Claim submitted successfully' }); + } catch (error) { + console.error('Error submitting business claim:', error); + res.status(500).json({ error: 'Failed to submit claim' }); + } +}); + +// Get claims for a business (admin only) +router.get('/:businessId/claims', authenticateUser, async (req, res) => { + try { + const { businessId } = req.params; + if (!req.user) { + return res.status(401).json({ error: 'User not authenticated' }); + } + const userId = req.user.id; + + // Check if user is admin + const { data: user } = await supabase + .from('users') + .select('raw_app_meta_data') + .eq('id', userId) + .single(); + + if (user?.raw_app_meta_data?.role !== 'admin') { + return res.status(403).json({ error: 'Not authorized' }); + } + + const { data: claims, error } = await supabase + .from('business_claims') + .select(` + *, + user:user_id ( + email + ) + `) + .eq('business_id', businessId) + .order('submitted_at', { ascending: false }); + + if (error) throw error; + + res.json(claims); + } catch (error) { + console.error('Error fetching business claims:', error); + res.status(500).json({ error: 'Failed to fetch claims' }); + } +}); + +// Review a claim (admin only) +router.post('/claims/:claimId/review', authenticateUser, async (req, res) => { + try { + const { claimId } = req.params; + if (!req.user) { + return res.status(401).json({ error: 'User not authenticated' }); + } + const userId = req.user.id; + const { status, notes } = z.object({ + status: z.enum(['approved', 'rejected']), + notes: z.string().optional(), + }).parse(req.body); + + // Check if user is admin + const { data: user } = await supabase + .from('users') + .select('raw_app_meta_data') + .eq('id', userId) + .single(); + + if (user?.raw_app_meta_data?.role !== 'admin') { + return res.status(403).json({ error: 'Not authorized' }); + } + + // Get claim details + const { data: claim } = await supabase + .from('business_claims') + .select('business_id, status') + .eq('id', claimId) + .single(); + + if (!claim) { + return res.status(404).json({ error: 'Claim not found' }); + } + + if (claim.status !== 'pending') { + return res.status(400).json({ error: 'Claim has already been reviewed' }); + } + + // Start a transaction + const { error: updateError } = await supabase.rpc('review_business_claim', { + p_claim_id: claimId, + p_business_id: claim.business_id, + p_user_id: userId, + p_status: status, + p_notes: notes + }); + + if (updateError) throw updateError; + + res.json({ message: 'Claim reviewed successfully' }); + } catch (error) { + console.error('Error reviewing business claim:', error); + res.status(500).json({ error: 'Failed to review claim' }); + } +}); + +export default router; \ No newline at end of file diff --git a/src/routes/search.ts b/src/routes/search.ts index e24b3f9..759d60a 100644 --- a/src/routes/search.ts +++ b/src/routes/search.ts @@ -1,160 +1,310 @@ -import express from 'express'; -import logger from '../utils/logger'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import { ChatOpenAI } from '@langchain/openai'; -import { - getAvailableChatModelProviders, - getAvailableEmbeddingModelProviders, -} from '../lib/providers'; -import { searchHandlers } from '../websocket/messageHandler'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { MetaSearchAgentType } from '../search/metaSearchAgent'; +import { Router, Response as ExpressResponse } from 'express'; +import { z } from 'zod'; +import fetch from 'node-fetch'; +import { Response as FetchResponse } from 'node-fetch'; +import { supabase } from '../lib/supabase'; +import { env } from '../config/env'; -const router = express.Router(); +const router = Router(); -interface chatModel { - provider: string; - model: string; - customOpenAIBaseURL?: string; - customOpenAIKey?: string; +const searchSchema = z.object({ + query: z.string().min(1), +}); + +interface Business { + id: string; + name: string; + description: string; + website: string; + phone: string | null; + address: string | null; } -interface embeddingModel { - provider: string; - model: string; +interface SearxResult { + url: string; + title: string; + content: string; + engine: string; + score: number; } -interface ChatRequestBody { - optimizationMode: 'speed' | 'balanced'; - focusMode: string; - chatModel?: chatModel; - embeddingModel?: embeddingModel; +interface SearxResponse { query: string; - history: Array<[string, string]>; + results: SearxResult[]; } -router.post('/', async (req, res) => { +async function getCachedResults(query: string): Promise { + console.log('Fetching cached results for query:', query); + const normalizedQuery = query.toLowerCase() + .trim() + .replace(/,/g, '') // Remove commas + .replace(/\s+/g, ' '); // Normalize whitespace + + const searchTerms = normalizedQuery.split(' ').filter(term => term.length > 0); + console.log('Normalized search terms:', searchTerms); + + // First try exact match + const { data: exactMatch } = await supabase + .from('search_cache') + .select('*') + .eq('query', normalizedQuery) + .single(); + + if (exactMatch) { + console.log('Found exact match in cache'); + return exactMatch.results as Business[]; + } + + // Then try fuzzy search + console.log('Trying fuzzy search with terms:', searchTerms); + const searchConditions = searchTerms.map(term => `query.ilike.%${term}%`); + const { data: cachedResults, error } = await supabase + .from('search_cache') + .select('*') + .or(searchConditions.join(',')); + + if (error) { + console.error('Error fetching cached results:', error); + return []; + } + + if (!cachedResults || cachedResults.length === 0) { + console.log('No cached results found'); + return []; + } + + console.log(`Found ${cachedResults.length} cached searches`); + + // Combine and deduplicate results from all matching searches + const allResults = cachedResults.flatMap(cache => cache.results as Business[]); + const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values()); + + console.log(`Combined into ${uniqueResults.length} unique businesses`); + + // Sort by relevance to search terms + const sortedResults = uniqueResults.sort((a, b) => { + const aScore = searchTerms.filter(term => + a.name.toLowerCase().includes(term) || + a.description.toLowerCase().includes(term) + ).length; + const bScore = searchTerms.filter(term => + b.name.toLowerCase().includes(term) || + b.description.toLowerCase().includes(term) + ).length; + return bScore - aScore; + }); + + return sortedResults; +} + +async function searchSearxNG(query: string): Promise { + console.log('Starting SearxNG search for query:', query); try { - const body: ChatRequestBody = req.body; + const params = new URLSearchParams({ + q: `${query} denver business`, + format: 'json', + language: 'en', + time_range: '', + safesearch: '1', + engines: 'google,bing,duckduckgo' + }); - if (!body.focusMode || !body.query) { - return res.status(400).json({ message: 'Missing focus mode or query' }); - } + const searchUrl = `${env.SEARXNG_URL}/search?${params.toString()}`; + console.log('Searching SearxNG at URL:', searchUrl); - body.history = body.history || []; - body.optimizationMode = body.optimizationMode || 'balanced'; - - const history: BaseMessage[] = body.history.map((msg) => { - if (msg[0] === 'human') { - return new HumanMessage({ - content: msg[1], - }); - } else { - return new AIMessage({ - content: msg[1], - }); + const response: FetchResponse = await fetch(searchUrl, { + method: 'GET', + headers: { + 'Accept': 'application/json', } }); - const [chatModelProviders, embeddingModelProviders] = await Promise.all([ - getAvailableChatModelProviders(), - getAvailableEmbeddingModelProviders(), - ]); - - const chatModelProvider = - body.chatModel?.provider || Object.keys(chatModelProviders)[0]; - const chatModel = - body.chatModel?.model || - Object.keys(chatModelProviders[chatModelProvider])[0]; - - const embeddingModelProvider = - body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0]; - const embeddingModel = - body.embeddingModel?.model || - Object.keys(embeddingModelProviders[embeddingModelProvider])[0]; - - let llm: BaseChatModel | undefined; - let embeddings: Embeddings | undefined; - - if (body.chatModel?.provider === 'custom_openai') { - if ( - !body.chatModel?.customOpenAIBaseURL || - !body.chatModel?.customOpenAIKey - ) { - return res - .status(400) - .json({ message: 'Missing custom OpenAI base URL or key' }); - } - - llm = new ChatOpenAI({ - modelName: body.chatModel.model, - openAIApiKey: body.chatModel.customOpenAIKey, - temperature: 0.7, - configuration: { - baseURL: body.chatModel.customOpenAIBaseURL, - }, - }) as unknown as BaseChatModel; - } else if ( - chatModelProviders[chatModelProvider] && - chatModelProviders[chatModelProvider][chatModel] - ) { - llm = chatModelProviders[chatModelProvider][chatModel] - .model as unknown as BaseChatModel | undefined; + if (!response.ok) { + throw new Error(`SearxNG search failed: ${response.statusText} (${response.status})`); } - if ( - embeddingModelProviders[embeddingModelProvider] && - embeddingModelProviders[embeddingModelProvider][embeddingModel] - ) { - embeddings = embeddingModelProviders[embeddingModelProvider][ - embeddingModel - ].model as Embeddings | undefined; + const data = await response.json() as SearxResponse; + console.log(`Got ${data.results?.length || 0} raw results from SearxNG`); + console.log('Sample result:', data.results?.[0]); + + if (!data.results || data.results.length === 0) { + return []; } - if (!llm || !embeddings) { - return res.status(400).json({ message: 'Invalid model selected' }); - } + const filteredResults = data.results + .filter(result => + result.title && + result.url && + !result.url.includes('yelp.com/search') && + !result.url.includes('google.com/search') && + !result.url.includes('bbb.org/search') && + !result.url.includes('thumbtack.com/search') && + !result.url.includes('angi.com/search') && + !result.url.includes('yellowpages.com/search') + ); - const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode]; + console.log(`Filtered to ${filteredResults.length} relevant results`); + console.log('Sample filtered result:', filteredResults[0]); - if (!searchHandler) { - return res.status(400).json({ message: 'Invalid focus mode' }); - } + const searchTerms = query.toLowerCase().split(' '); + const businesses = filteredResults + .map(result => { + const business = { + id: result.url, + name: cleanBusinessName(result.title), + description: result.content || '', + website: result.url, + phone: extractPhone(result.content || '') || extractPhone(result.title), + address: extractAddress(result.content || '') || extractAddress(result.title), + score: result.score || 0 + }; + console.log('Processed business:', business); + return business; + }) + .filter(business => { + // Check if business name contains any of the search terms + const nameMatches = searchTerms.some(term => + business.name.toLowerCase().includes(term) + ); + + // Check if description contains any of the search terms + const descriptionMatches = searchTerms.some(term => + business.description.toLowerCase().includes(term) + ); + + return business.name.length > 2 && (nameMatches || descriptionMatches); + }) + .sort((a, b) => { + // Score based on how many search terms match the name and description + const aScore = searchTerms.filter(term => + a.name.toLowerCase().includes(term) || + a.description.toLowerCase().includes(term) + ).length; + const bScore = searchTerms.filter(term => + b.name.toLowerCase().includes(term) || + b.description.toLowerCase().includes(term) + ).length; + return bScore - aScore; + }) + .slice(0, 10); - const emitter = await searchHandler.searchAndAnswer( - body.query, - history, - llm, - embeddings, - body.optimizationMode, - [], - ); + console.log(`Transformed into ${businesses.length} business entries`); + return businesses; + } catch (error) { + console.error('SearxNG search error:', error); + return []; + } +} - let message = ''; - let sources = []; +async function cacheResults(query: string, results: Business[]): Promise { + if (!results.length) return; - emitter.on('data', (data) => { - const parsedData = JSON.parse(data); - if (parsedData.type === 'response') { - message += parsedData.data; - } else if (parsedData.type === 'sources') { - sources = parsedData.data; - } - }); + console.log(`Caching ${results.length} results for query:`, query); + const normalizedQuery = query.toLowerCase().trim(); + + const { data: existing } = await supabase + .from('search_cache') + .select('id, results') + .eq('query', normalizedQuery) + .single(); - emitter.on('end', () => { - res.status(200).json({ message, sources }); - }); + if (existing) { + console.log('Updating existing cache entry'); + // Merge new results with existing ones, removing duplicates + const allResults = [...existing.results, ...results]; + const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values()); - emitter.on('error', (data) => { - const parsedData = JSON.parse(data); - res.status(500).json({ message: parsedData.data }); - }); - } catch (err: any) { - logger.error(`Error in getting search results: ${err.message}`); - res.status(500).json({ message: 'An error has occurred.' }); + await supabase + .from('search_cache') + .update({ + results: uniqueResults, + updated_at: new Date().toISOString() + }) + .eq('id', existing.id); + } else { + console.log('Creating new cache entry'); + await supabase + .from('search_cache') + .insert({ + query: normalizedQuery, + results, + location: 'denver', // Default location + category: 'business', // Default category + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + expires_at: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString() // 7 days from now + }); + } +} + +function cleanBusinessName(title: string): string { + return title + .replace(/^(the\s+)?/i, '') + .replace(/\s*[-|]\s*.+$/i, '') + .replace(/\s*\|.*$/i, '') + .replace(/\s*in\s+denver.*$/i, '') + .replace(/\s*near\s+denver.*$/i, '') + .replace(/\s*-\s*.*denver.*$/i, '') + .trim(); +} + +function extractPhone(text: string): string | null { + const phoneRegex = /(\+?1?\s*\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4})/; + const match = text.match(phoneRegex); + return match ? match[1] : null; +} + +function extractAddress(text: string): string | null { + const addressRegex = /\d+\s+[A-Za-z0-9\s,]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Way|Court|Ct|Circle|Cir)[,\s]+(?:[A-Za-z\s]+,\s*)?(?:CO|Colorado)[,\s]+\d{5}(?:-\d{4})?/i; + const match = text.match(addressRegex); + return match ? match[0] : null; +} + +router.post('/search', async (req, res) => { + try { + console.log('Received search request:', req.body); + const { query } = searchSchema.parse(req.body); + await handleSearch(query, res); + } catch (error) { + console.error('Search error:', error); + res.status(400).json({ error: 'Search failed. Please try again.' }); } }); +// Also support GET requests for easier testing +router.get('/search', async (req, res) => { + try { + const query = req.query.q as string; + if (!query) { + return res.status(400).json({ error: 'Query parameter "q" is required' }); + } + console.log('Received search request:', { query }); + await handleSearch(query, res); + } catch (error) { + console.error('Search error:', error); + res.status(400).json({ error: 'Search failed. Please try again.' }); + } +}); + +// Helper function to handle search logic +async function handleSearch(query: string, res: ExpressResponse) { + // Get cached results immediately + const cachedResults = await getCachedResults(query); + console.log(`Returning ${cachedResults.length} cached results to client`); + + // Send cached results to client + res.json({ results: cachedResults }); + + // Search for new results in the background + console.log('Starting background search'); + searchSearxNG(query).then(async newResults => { + console.log(`Found ${newResults.length} new results from SearxNG`); + if (newResults.length > 0) { + await cacheResults(query, newResults); + } + }).catch(error => { + console.error('Background search error:', error); + }); +} + export default router; diff --git a/src/tests/testDeepseek.ts b/src/tests/testDeepseek.ts index ea4dcfe..888249f 100644 --- a/src/tests/testDeepseek.ts +++ b/src/tests/testDeepseek.ts @@ -1,202 +1,43 @@ import { DeepSeekService } from '../lib/services/deepseekService'; -import { Business } from '../lib/types'; -import axios from 'axios'; +import dotenv from 'dotenv'; -async function testOllamaConnection() { - console.log('๐Ÿ” Testing Ollama connection...\n'); +dotenv.config(); + +async function testDeepseekService() { + const service = new DeepSeekService(); try { - // Test simple connection - console.log('Testing Qwen model...'); - const response = await DeepSeekService['chat']([{ - role: 'user', - content: 'Say "Hello, testing Qwen model!"' - }]); + console.log('Starting DeepSeek test...'); + console.log('Base URL:', process.env.OLLAMA_URL || 'http://localhost:11434'); + + const testQuery = { + role: "user", + content: "Find plumbers in Denver, CO. You must return exactly 10 results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON." + }; + + console.log('Sending test query:', testQuery); + + const response = await service.chat([testQuery]); + + console.log('\nTest successful!'); + console.log('Parsed response:', JSON.stringify(response, null, 2)); - console.log('โœ… Model Response:', response); - return true; } catch (error) { + console.error('\nTest failed!'); if (error instanceof Error) { - console.error('โŒ Connection test failed:', error.message); - if (axios.isAxiosError(error)) { - if (error.code === 'ECONNREFUSED') { - console.error('โŒ Make sure Ollama is running (ollama serve)'); - } else { - console.error('API Error details:', error.response?.data); - } - } + console.error('Error message:', error.message); + console.error('Stack trace:', error.stack); } else { - console.error('โŒ Connection test failed with unknown error'); - } - return false; - } -} - -async function testDataCleaning() { - console.log('\n๐Ÿงช Testing business data cleaning...'); - - const testCases: Business[] = [ - { - id: 'test_1', - name: "Denver's Best Plumbing & Repair [LLC] (A Family Business) {Est. 1995}", - address: "CONTACT US TODAY! Suite 200-B, 1234 Main Street, Denver, Colorado 80202 (Near Starbucks)", - phone: "โ˜Ž๏ธ Main: (720) 555-1234 | Emergency: 1-800-555-9999 | Text: 720.555.4321", - email: "[support@denverplumbing.com](mailto:support@denverplumbing.com) or info@denverplumbing.com", - description: `$$$ LIMITED TIME OFFER $$$ - ๐Ÿšฐ Professional plumbing services in Denver metro area - ๐Ÿ’ฐ 20% OFF all repairs over $500! - โญ๏ธ Family owned since 1995 - ๐Ÿ“ž Available 24/7 for emergencies - ๐ŸŒ Visit www.denverplumbing.com - ๐Ÿ“ง Email us at contact@denverplumbing.com - ๐Ÿ’ณ All major credit cards accepted - #DenverPlumbing #EmergencyService`, - source: 'test', - website: 'https://example.com', - rating: 4.8, - logo: 'logo.png', - location: { lat: 39.7392, lng: -104.9903 }, - openingHours: [] - }, - { - id: 'test_2', - name: "[MIKE'S AUTO] {{CERTIFIED}} [BMW & AUDI SPECIALIST]", - address: "GET DIRECTIONS: 5678 Auto Row Drive\nUnit C-123\nDenver, CO 80205\nBehind Home Depot", - phone: "Sales: 303-555-0000\nService: (303) 555-1111\nFax: 303.555.2222", - email: "appointments@mikesauto.com [Schedule Now](https://booking.mikesauto.com)", - description: `๐Ÿš— Denver's Premier Auto Service Center - ๐Ÿ’ฏ ASE Certified Mechanics - ๐Ÿ”ง Specializing in German Luxury Vehicles - ๐Ÿ’ฐ๐Ÿ’ฐ๐Ÿ’ฐ Spring Special: Free oil change with any service over $300 - โšก๏ธ Same-day service available - ๐ŸŽฏ Located in central Denver - ๐Ÿ“ฑ Text "REPAIR" to 80205 for $50 off - โญ๏ธโญ๏ธโญ๏ธโญ๏ธโญ๏ธ Over 500 5-star reviews!`, - source: 'test', - website: 'https://mikesauto.com', - rating: 4.9, - logo: 'logo.png', - location: { lat: 39.7599, lng: -104.9987 }, - openingHours: ['Mon-Fri 8-6', 'Sat 9-3'] - }, - { - id: 'test_3', - name: "๐ŸŒŸ SUNSHINE DENTAL & ORTHODONTICS, P.C. [Dr. Smith & Associates] (Voted #1)", - address: "SCHEDULE TODAY!\n๐Ÿฆท Building 3, Suite 300\n9876 Medical Plaza Way\nDENVER COLORADO, 80210\nNext to Target", - phone: "๐Ÿ“ž New Patients: 1 (720) 999-8888 | Existing: 720.999.7777 | After Hours: +1-720-999-6666", - email: "appointments@sunshinedentalco.com, info@sunshinedentalco.com, emergency@sunshinedentalco.com", - description: `โœจ Your Premier Dental Care Provider in Denver! โœจ - ๐Ÿฆท State-of-the-art facility - ๐Ÿ’Ž Cosmetic & General Dentistry - ๐Ÿ‘ถ Family-friendly environment - ๐Ÿ’ฐ NEW PATIENT SPECIAL: $99 Cleaning & Exam (Reg. $299) - ๐Ÿฅ Most insurance accepted - โญ๏ธ 1,000+ 5-star reviews on Google - ๐ŸŽ Refer a friend and get $50 credit - ๐Ÿ“ฑ Download our app: smile.sunshinedentalco.com`, - source: 'test', - website: 'https://sunshinedentalco.com', - rating: 5.0, - logo: 'logo.png', - location: { lat: 39.7120, lng: -104.9412 }, - openingHours: ['Mon-Thu 8-5', 'Fri 8-2', 'Sat By Appt'] - }, - { - id: 'test_4', - name: "THE COFFEE SPOT โ˜•๏ธ {{NOW OPEN}} [Under New Management!]", - address: "ORDER PICKUP:\nGround Floor\n4321 Downtown Street\nDenver, CO. 80203\nInside Union Station", - phone: "โ˜Ž๏ธ Store: 303โ€ข777โ€ข5555\n๐Ÿ’ฌ Text Orders: 303-777-4444", - email: " orders@thecoffeespot.co [Click Here](https://order.thecoffeespot.co)", - description: `โ˜•๏ธ Denver's Favorite Coffee Shop Since 2020! - ๐ŸŒฑ Organic, Fair-Trade Coffee - ๐Ÿฅ Fresh-Baked Pastries Daily - โšก๏ธ MORNING RUSH SPECIAL: $2 off any drink before 9am! - ๐ŸŽฏ Loyalty Program: Buy 9, Get 1 FREE - ๐Ÿ“ฑ Order ahead on our app - ๐ŸŽ Student Discount: 10% off with ID - #CoffeeLovers #DenverCoffee #MorningFuel - Follow us @thecoffeespot for daily specials!`, - source: 'test', - website: 'https://thecoffeespot.co', - rating: 4.7, - logo: 'logo.png', - location: { lat: 39.7508, lng: -104.9997 }, - openingHours: ['Mon-Fri 6-8', 'Sat-Sun 7-7'] - } - ]; - - for (const testCase of testCases) { - console.log('\nTesting case:', testCase.id); - console.log('Input data:', JSON.stringify(testCase, null, 2)); - - console.time('Cleaning Duration'); - const cleaned = await DeepSeekService.cleanBusinessData(testCase); - console.timeEnd('Cleaning Duration'); - - console.log('\nCleaned data:', JSON.stringify(cleaned, null, 2)); - - // Validate the results - const validationIssues = []; - - // Name validation - if (cleaned.name?.match(/[\[\]{}()]/)) { - validationIssues.push('Name contains brackets/braces/parentheses'); - } - - // Address validation - if (!cleaned.address?.match(/^\d+[^,]+,\s*[^,]+,\s*[A-Z]{2}\s+\d{5}$/)) { - validationIssues.push('Address format incorrect'); - } - - // Phone validation - if (!cleaned.phone?.match(/^\(\d{3}\) \d{3}-\d{4}$/)) { - validationIssues.push('Phone format incorrect'); - } - - // Email validation - if (cleaned.email?.match(/[\[\]<>()]|mailto:|click|schedule/i)) { - validationIssues.push('Email contains formatting/links'); - } - - // Description validation - const descriptionIssues = []; - if (cleaned.description?.match(/[\$\d]+%?\s*off|\$/i)) { - descriptionIssues.push('contains pricing'); - } - if (cleaned.description?.match(/\b(?:call|email|visit|contact|text|www\.|http|@)\b/i)) { - descriptionIssues.push('contains contact info'); - } - if (cleaned.description?.match(/[๐Ÿ“ž๐Ÿ“ง๐ŸŒ๐Ÿ’ณโ˜Ž๏ธ๐Ÿ“ฑ]/)) { - descriptionIssues.push('contains emojis'); - } - if (cleaned.description?.match(/#\w+/)) { - descriptionIssues.push('contains hashtags'); - } - if (descriptionIssues.length > 0) { - validationIssues.push(`Description ${descriptionIssues.join(', ')}`); - } - - if (validationIssues.length > 0) { - console.log('\nโš ๏ธ Validation issues:', validationIssues.join(', ')); - } else { - console.log('\nโœ… All fields cleaned successfully'); + console.error('Unknown error:', error); } } } -async function runTests() { - console.log('๐Ÿš€ Starting Qwen model tests...\n'); - - const connectionSuccess = await testOllamaConnection(); - if (!connectionSuccess) { - console.log('โŒ Stopping tests due to connection failure'); - return; - } - - await testDataCleaning(); -} - -// Run tests if this file is executed directly -if (require.main === module) { - runTests().catch(console.error); -} \ No newline at end of file +// Run the test +console.log('=== Starting DeepSeek Service Test ===\n'); +testDeepseekService().then(() => { + console.log('\n=== Test Complete ==='); +}).catch(error => { + console.error('\n=== Test Failed ==='); + console.error(error); +}); \ No newline at end of file diff --git a/src/tests/testOllama.ts b/src/tests/testOllama.ts new file mode 100644 index 0000000..096d0be --- /dev/null +++ b/src/tests/testOllama.ts @@ -0,0 +1,47 @@ +import axios from 'axios'; +import dotenv from 'dotenv'; + +dotenv.config(); + +async function testOllamaConnection() { + const baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434'; + + console.log('Testing Ollama connection...'); + console.log('Base URL:', baseUrl); + + try { + // Simple test request + const response = await axios.post(`${baseUrl}/api/chat`, { + model: 'deepseek-coder:6.7b', + messages: [{ + role: 'user', + content: 'Return a simple JSON array with one object: {"test": "success"}' + }], + stream: false + }); + + console.log('\nResponse received:'); + console.log('Status:', response.status); + console.log('Data:', JSON.stringify(response.data, null, 2)); + + } catch (error) { + console.error('Connection test failed:'); + if (axios.isAxiosError(error)) { + console.error('Network error:', error.message); + if (error.response) { + console.error('Response status:', error.response.status); + console.error('Response data:', error.response.data); + } + } else { + console.error('Error:', error); + } + } +} + +console.log('=== Starting Ollama Connection Test ===\n'); +testOllamaConnection().then(() => { + console.log('\n=== Test Complete ==='); +}).catch(error => { + console.error('\n=== Test Failed ==='); + console.error(error); +}); \ No newline at end of file