test: add CI/CD workflow
This commit is contained in:
parent
66d44c0774
commit
ce97671da3
28 changed files with 11684 additions and 1199 deletions
133
.github/workflows/ci.yml
vendored
Normal file
133
.github/workflows/ci.yml
vendored
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
---
|
||||||
|
name: CI/CD
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
services:
|
||||||
|
supabase:
|
||||||
|
image: supabase/postgres-meta:v0.68.0
|
||||||
|
env:
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_DB: postgres
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: 'npm'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Check code formatting
|
||||||
|
run: npm run format
|
||||||
|
|
||||||
|
- name: Run tests with coverage
|
||||||
|
run: npm run test:coverage
|
||||||
|
env:
|
||||||
|
SUPABASE_URL: http://localhost:54321
|
||||||
|
SUPABASE_KEY: test-key
|
||||||
|
OLLAMA_URL: http://localhost:11434
|
||||||
|
SEARXNG_URL: http://localhost:8080
|
||||||
|
NODE_ENV: test
|
||||||
|
CACHE_DURATION_DAYS: 7
|
||||||
|
|
||||||
|
- name: Upload coverage reports
|
||||||
|
uses: codecov/codecov-action@v4
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
files: ./coverage/lcov.info
|
||||||
|
fail_ci_if_error: true
|
||||||
|
|
||||||
|
build:
|
||||||
|
needs: test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: 'npm'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: npm run build
|
||||||
|
|
||||||
|
- name: Upload build artifacts
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: dist
|
||||||
|
path: dist/
|
||||||
|
|
||||||
|
deploy-staging:
|
||||||
|
needs: build
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'push' && github.ref == 'refs/heads/develop'
|
||||||
|
environment:
|
||||||
|
name: staging
|
||||||
|
url: https://staging.example.com
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Download build artifacts
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: dist
|
||||||
|
path: dist/
|
||||||
|
|
||||||
|
- name: Deploy to staging
|
||||||
|
run: |
|
||||||
|
echo "Deploying to staging environment"
|
||||||
|
# Add your staging deployment commands here
|
||||||
|
env:
|
||||||
|
DEPLOY_KEY: ${{ secrets.DEPLOY_KEY }}
|
||||||
|
|
||||||
|
deploy-production:
|
||||||
|
needs: build
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||||
|
environment:
|
||||||
|
name: production
|
||||||
|
url: https://example.com
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Download build artifacts
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: dist
|
||||||
|
path: dist/
|
||||||
|
|
||||||
|
- name: Deploy to production
|
||||||
|
run: |
|
||||||
|
echo "Deploying to production environment"
|
||||||
|
# Add your production deployment commands here
|
||||||
|
env:
|
||||||
|
DEPLOY_KEY: ${{ secrets.DEPLOY_KEY }}
|
53
.gitignore
vendored
53
.gitignore
vendored
|
@ -1,39 +1,32 @@
|
||||||
# Node.js
|
|
||||||
node_modules/
|
|
||||||
npm-debug.log
|
|
||||||
yarn-error.log
|
|
||||||
|
|
||||||
# Build output
|
|
||||||
/.next/
|
|
||||||
/out/
|
|
||||||
/dist/
|
|
||||||
|
|
||||||
# IDE/Editor specific
|
|
||||||
.vscode/
|
|
||||||
.idea/
|
|
||||||
*.iml
|
|
||||||
|
|
||||||
# Environment variables
|
# Environment variables
|
||||||
.env
|
.env
|
||||||
.env.local
|
.env.*
|
||||||
.env.development.local
|
!.env.example
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
|
|
||||||
# Config files
|
# Dependencies
|
||||||
config.toml
|
node_modules/
|
||||||
|
yarn-error.log
|
||||||
|
npm-debug.log
|
||||||
|
|
||||||
# Log files
|
# Build outputs
|
||||||
logs/
|
dist/
|
||||||
*.log
|
build/
|
||||||
|
.next/
|
||||||
|
|
||||||
# Testing
|
# IDE/Editor
|
||||||
/coverage/
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
# Miscellaneous
|
# OS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
# Db
|
# Logs
|
||||||
db.sqlite
|
logs/
|
||||||
/searxng
|
*.log
|
||||||
|
|
||||||
|
# Cache
|
||||||
|
.cache/
|
||||||
|
.npm/
|
||||||
|
|
14
config.toml
Normal file
14
config.toml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
[GENERAL]
|
||||||
|
PORT = 3001 # Port to run the server on
|
||||||
|
SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
|
||||||
|
KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
|
||||||
|
|
||||||
|
[API_KEYS]
|
||||||
|
OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef
|
||||||
|
GROQ = "" # Groq API key - gsk_1234567890abcdef1234567890abcdef
|
||||||
|
ANTHROPIC = "" # Anthropic API key - sk-ant-1234567890abcdef1234567890abcdef
|
||||||
|
GEMINI = "" # Gemini API key - sk-1234567890abcdef1234567890abcdef
|
||||||
|
|
||||||
|
[API_ENDPOINTS]
|
||||||
|
SEARXNG = "http://localhost:32768" # SearxNG API URL
|
||||||
|
OLLAMA = "" # Ollama API URL - http://host.docker.internal:11434
|
17
jest.config.js
Normal file
17
jest.config.js
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
module.exports = {
|
||||||
|
preset: 'ts-jest',
|
||||||
|
testEnvironment: 'node',
|
||||||
|
roots: ['<rootDir>/src'],
|
||||||
|
testMatch: ['**/__tests__/**/*.ts', '**/?(*.)+(spec|test).ts'],
|
||||||
|
transform: {
|
||||||
|
'^.+\\.ts$': 'ts-jest',
|
||||||
|
},
|
||||||
|
moduleFileExtensions: ['ts', 'js', 'json', 'node'],
|
||||||
|
collectCoverageFrom: [
|
||||||
|
'src/**/*.{ts,js}',
|
||||||
|
'!src/tests/**',
|
||||||
|
'!**/node_modules/**',
|
||||||
|
],
|
||||||
|
coverageDirectory: 'coverage',
|
||||||
|
setupFilesAfterEnv: ['<rootDir>/src/tests/setup.ts'],
|
||||||
|
};
|
6015
package-lock.json
generated
6015
package-lock.json
generated
File diff suppressed because it is too large
Load diff
14
package.json
14
package.json
|
@ -11,24 +11,35 @@
|
||||||
"format": "prettier . --check",
|
"format": "prettier . --check",
|
||||||
"format:write": "prettier . --write",
|
"format:write": "prettier . --write",
|
||||||
"test:search": "ts-node src/tests/testSearch.ts",
|
"test:search": "ts-node src/tests/testSearch.ts",
|
||||||
"test:supabase": "ts-node src/tests/supabaseTest.ts"
|
"test:supabase": "ts-node src/tests/supabaseTest.ts",
|
||||||
|
"test:deepseek": "ts-node src/tests/testDeepseek.ts",
|
||||||
|
"test": "jest",
|
||||||
|
"test:watch": "jest --watch",
|
||||||
|
"test:coverage": "jest --coverage"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@testing-library/jest-dom": "^6.1.5",
|
||||||
"@types/better-sqlite3": "^7.6.10",
|
"@types/better-sqlite3": "^7.6.10",
|
||||||
"@types/cors": "^2.8.17",
|
"@types/cors": "^2.8.17",
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
"@types/html-to-text": "^9.0.4",
|
"@types/html-to-text": "^9.0.4",
|
||||||
|
"@types/jest": "^29.5.11",
|
||||||
"@types/multer": "^1.4.12",
|
"@types/multer": "^1.4.12",
|
||||||
"@types/pdf-parse": "^1.1.4",
|
"@types/pdf-parse": "^1.1.4",
|
||||||
"@types/readable-stream": "^4.0.11",
|
"@types/readable-stream": "^4.0.11",
|
||||||
|
"@types/supertest": "^6.0.2",
|
||||||
"@types/ws": "^8.5.12",
|
"@types/ws": "^8.5.12",
|
||||||
"drizzle-kit": "^0.22.7",
|
"drizzle-kit": "^0.22.7",
|
||||||
|
"jest": "^29.7.0",
|
||||||
"nodemon": "^3.1.0",
|
"nodemon": "^3.1.0",
|
||||||
"prettier": "^3.2.5",
|
"prettier": "^3.2.5",
|
||||||
|
"supertest": "^7.0.0",
|
||||||
|
"ts-jest": "^29.1.1",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.4.3"
|
"typescript": "^5.4.3"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@huggingface/transformers": "latest",
|
||||||
"@iarna/toml": "^2.2.5",
|
"@iarna/toml": "^2.2.5",
|
||||||
"@langchain/anthropic": "^0.2.3",
|
"@langchain/anthropic": "^0.2.3",
|
||||||
"@langchain/community": "^0.2.16",
|
"@langchain/community": "^0.2.16",
|
||||||
|
@ -52,6 +63,7 @@
|
||||||
"pdf-parse": "^1.1.1",
|
"pdf-parse": "^1.1.1",
|
||||||
"robots-parser": "^3.0.1",
|
"robots-parser": "^3.0.1",
|
||||||
"tesseract.js": "^4.1.4",
|
"tesseract.js": "^4.1.4",
|
||||||
|
"torch": "latest",
|
||||||
"winston": "^3.13.0",
|
"winston": "^3.13.0",
|
||||||
"ws": "^8.17.1",
|
"ws": "^8.17.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
|
|
|
@ -11,7 +11,49 @@ search:
|
||||||
|
|
||||||
server:
|
server:
|
||||||
secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET}
|
secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET}
|
||||||
|
port: 8080
|
||||||
|
bind_address: "0.0.0.0"
|
||||||
|
base_url: http://localhost:8080/
|
||||||
|
|
||||||
engines:
|
engines:
|
||||||
- name: wolframalpha
|
- name: wolframalpha
|
||||||
disabled: false
|
disabled: false
|
||||||
|
|
||||||
|
- name: google
|
||||||
|
engine: google
|
||||||
|
shortcut: g
|
||||||
|
disabled: false
|
||||||
|
|
||||||
|
- name: bing
|
||||||
|
engine: bing
|
||||||
|
shortcut: b
|
||||||
|
disabled: false
|
||||||
|
|
||||||
|
- name: duckduckgo
|
||||||
|
engine: duckduckgo
|
||||||
|
shortcut: d
|
||||||
|
disabled: false
|
||||||
|
|
||||||
|
- name: yelp
|
||||||
|
engine: yelp
|
||||||
|
shortcut: y
|
||||||
|
disabled: false
|
||||||
|
|
||||||
|
ui:
|
||||||
|
static_path: ""
|
||||||
|
templates_path: ""
|
||||||
|
default_theme: simple
|
||||||
|
default_locale: en
|
||||||
|
results_on_new_tab: false
|
||||||
|
|
||||||
|
outgoing:
|
||||||
|
request_timeout: 6.0
|
||||||
|
max_request_timeout: 10.0
|
||||||
|
pool_connections: 100
|
||||||
|
pool_maxsize: 10
|
||||||
|
enable_http2: true
|
||||||
|
|
||||||
|
server:
|
||||||
|
limiter: false
|
||||||
|
image_proxy: false
|
||||||
|
http_protocol_version: "1.0"
|
||||||
|
|
|
@ -15,7 +15,10 @@ const envSchema = z.object({
|
||||||
SEARXNG_INSTANCES: z.string().default('["http://localhost:4000"]'),
|
SEARXNG_INSTANCES: z.string().default('["http://localhost:4000"]'),
|
||||||
MAX_RESULTS_PER_QUERY: z.string().default('50'),
|
MAX_RESULTS_PER_QUERY: z.string().default('50'),
|
||||||
CACHE_DURATION_HOURS: z.string().default('24'),
|
CACHE_DURATION_HOURS: z.string().default('24'),
|
||||||
CACHE_DURATION_DAYS: z.string().default('7')
|
CACHE_DURATION_DAYS: z.string().default('7'),
|
||||||
|
HUGGING_FACE_API_KEY: z.string({
|
||||||
|
required_error: "HUGGING_FACE_API_KEY is required in .env"
|
||||||
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
// Define the final environment type
|
// Define the final environment type
|
||||||
|
@ -39,6 +42,15 @@ export interface EnvConfig {
|
||||||
durationHours: number;
|
durationHours: number;
|
||||||
durationDays: number;
|
durationDays: number;
|
||||||
};
|
};
|
||||||
|
ai: {
|
||||||
|
model: string;
|
||||||
|
temperature: number;
|
||||||
|
maxTokens: number;
|
||||||
|
batchSize: number;
|
||||||
|
};
|
||||||
|
huggingface: {
|
||||||
|
apiKey: string;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse and transform the environment variables
|
// Parse and transform the environment variables
|
||||||
|
@ -64,5 +76,14 @@ export const env: EnvConfig = {
|
||||||
maxResultsPerQuery: parseInt(rawEnv.MAX_RESULTS_PER_QUERY),
|
maxResultsPerQuery: parseInt(rawEnv.MAX_RESULTS_PER_QUERY),
|
||||||
durationHours: parseInt(rawEnv.CACHE_DURATION_HOURS),
|
durationHours: parseInt(rawEnv.CACHE_DURATION_HOURS),
|
||||||
durationDays: parseInt(rawEnv.CACHE_DURATION_DAYS)
|
durationDays: parseInt(rawEnv.CACHE_DURATION_DAYS)
|
||||||
|
},
|
||||||
|
ai: {
|
||||||
|
model: 'deepseek-ai/deepseek-coder-6.7b-instruct',
|
||||||
|
temperature: 0.7,
|
||||||
|
maxTokens: 512,
|
||||||
|
batchSize: 3
|
||||||
|
},
|
||||||
|
huggingface: {
|
||||||
|
apiKey: rawEnv.HUGGING_FACE_API_KEY
|
||||||
}
|
}
|
||||||
};
|
};
|
|
@ -1,4 +1,4 @@
|
||||||
import { OllamaService } from './ollamaService';
|
import { DeepSeekService } from './deepseekService';
|
||||||
import { Business } from '../types';
|
import { Business } from '../types';
|
||||||
import { db } from './databaseService';
|
import { db } from './databaseService';
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ export class CleanupService {
|
||||||
setTimeout(() => reject(new Error('LLM timeout')), LLM_TIMEOUT);
|
setTimeout(() => reject(new Error('LLM timeout')), LLM_TIMEOUT);
|
||||||
});
|
});
|
||||||
|
|
||||||
const llmPromise = OllamaService.chat([{
|
const llmPromise = DeepSeekService.chat([{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: prompt
|
content: prompt
|
||||||
}]);
|
}]);
|
||||||
|
@ -205,58 +205,17 @@ export class CleanupService {
|
||||||
return cached;
|
return cached;
|
||||||
}
|
}
|
||||||
|
|
||||||
const combinedPrompt = `
|
// Clean using DeepSeek
|
||||||
Clean and format the following business information. For each field, follow the format shown in the examples.
|
const cleaned = await DeepSeekService.cleanBusinessData(business);
|
||||||
The business type appears to be: ${business.name.toLowerCase().includes('restaurant') ? 'restaurant' :
|
const validated = this.validateAndClean({ ...business, ...cleaned });
|
||||||
business.name.toLowerCase().includes('plumb') ? 'plumber' :
|
|
||||||
business.name.toLowerCase().includes('electric') ? 'electrician' : 'business'}
|
|
||||||
|
|
||||||
Return each field on a new line with the field name followed by a colon.
|
|
||||||
Only return valid data - if something looks wrong or invalid, return an empty string.
|
|
||||||
|
|
||||||
Examples for address:
|
|
||||||
Input: "Sure! Here is the business address in Denver, CO:\\n\\n14100 W 7th Ave, Golden CO 80401"
|
|
||||||
Output: 14100 W 7th Ave, Golden, CO 80401
|
|
||||||
|
|
||||||
Examples for phone:
|
|
||||||
Input: "7203796281"
|
|
||||||
Output: (720) 379-6281
|
|
||||||
Input: "N/A" or "none"
|
|
||||||
Output:
|
|
||||||
|
|
||||||
Examples for email:
|
|
||||||
Input: "379-6281info@brutalpoodledenver.com"
|
|
||||||
Output: info@brutalpoodledenver.com
|
|
||||||
Input: "top-seo-img@2x.jpg" or "Union Office" or "[email]" or "None"
|
|
||||||
Output:
|
|
||||||
|
|
||||||
Examples for description:
|
|
||||||
Input: "The Brutal Noodle $14.00 Beef bone broth, smoked brisket, rice noodles, all the fixins. (GF) Vegan available with tofu & veggie broth $11"
|
|
||||||
Output: Asian fusion restaurant serving bone broth noodles with brisket and vegan options.
|
|
||||||
Input: "Our Denver-based expert plumbers can repair or install any fixture. Commercial services: We're ready to keep your plumbing system operating safely."
|
|
||||||
Output: Professional plumbing services for residential and commercial properties in Denver.
|
|
||||||
|
|
||||||
Business name for context: "${business.name}"
|
|
||||||
Website for context: "${business.website}"
|
|
||||||
|
|
||||||
Now clean these fields:
|
|
||||||
Address: "${business.address}"
|
|
||||||
Phone: "${business.phone}"
|
|
||||||
Email: "${business.email}"
|
|
||||||
Description: "${business.description}"
|
|
||||||
`;
|
|
||||||
|
|
||||||
const response = await this.cleanWithLLM(combinedPrompt, business);
|
|
||||||
const parsed = this.parseResponse(response);
|
|
||||||
const cleaned = this.validateAndClean({ ...business, ...parsed });
|
|
||||||
|
|
||||||
// Only cache if confidence score is high enough
|
// Only cache if confidence score is high enough
|
||||||
const confidence = this.calculateConfidenceScore(cleaned);
|
const confidence = this.calculateConfidenceScore(validated);
|
||||||
if (confidence >= MIN_CONFIDENCE_SCORE) {
|
if (confidence >= MIN_CONFIDENCE_SCORE) {
|
||||||
await db.saveToCache(cacheKey, cleaned, 24 * 60 * 60 * 1000);
|
await db.saveToCache(cacheKey, validated, 24 * 60 * 60 * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
return cleaned;
|
return validated;
|
||||||
}
|
}
|
||||||
|
|
||||||
static async cleanBusinessRecords(businesses: Business[]): Promise<Business[]> {
|
static async cleanBusinessRecords(businesses: Business[]): Promise<Business[]> {
|
||||||
|
|
|
@ -20,11 +20,15 @@ export class DatabaseService {
|
||||||
}
|
}
|
||||||
|
|
||||||
async searchBusinesses(query: string, location: string): Promise<BusinessData[]> {
|
async searchBusinesses(query: string, location: string): Promise<BusinessData[]> {
|
||||||
|
try {
|
||||||
const { data, error } = await this.supabase
|
const { data, error } = await this.supabase
|
||||||
.from('businesses')
|
.from('businesses')
|
||||||
.select('*')
|
.select('*')
|
||||||
.textSearch('name', query)
|
.or(
|
||||||
.textSearch('address', location)
|
`name.ilike.%${query}%,` +
|
||||||
|
`description.ilike.%${query}%`
|
||||||
|
)
|
||||||
|
.ilike('address', `%${location}%`)
|
||||||
.order('search_count', { ascending: false })
|
.order('search_count', { ascending: false })
|
||||||
.limit(env.cache.maxResultsPerQuery);
|
.limit(env.cache.maxResultsPerQuery);
|
||||||
|
|
||||||
|
@ -33,7 +37,12 @@ export class DatabaseService {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`Found ${data?.length || 0} businesses in database`);
|
||||||
return data || [];
|
return data || [];
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error searching businesses:', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async saveBusiness(business: Partial<BusinessData>): Promise<void> {
|
async saveBusiness(business: Partial<BusinessData>): Promise<void> {
|
||||||
|
@ -135,6 +144,21 @@ export class DatabaseService {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async clearCache(pattern?: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const query = pattern ?
|
||||||
|
'DELETE FROM cache WHERE key LIKE $1' :
|
||||||
|
'DELETE FROM cache';
|
||||||
|
|
||||||
|
await this.supabase
|
||||||
|
.from('cache')
|
||||||
|
.delete()
|
||||||
|
.or(pattern ? `key LIKE $1` : '');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error clearing cache:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export const db = new DatabaseService();
|
export const db = new DatabaseService();
|
460
src/lib/services/deepseekService.ts
Normal file
460
src/lib/services/deepseekService.ts
Normal file
|
@ -0,0 +1,460 @@
|
||||||
|
import axios from 'axios';
|
||||||
|
import { env } from '../../config/env';
|
||||||
|
import { Business } from '../types';
|
||||||
|
|
||||||
|
export class DeepSeekService {
|
||||||
|
private static OLLAMA_URL = 'http://localhost:11434/api/generate';
|
||||||
|
private static MODEL_NAME = 'qwen2:0.5b';
|
||||||
|
private static MAX_ATTEMPTS = 3; // Prevent infinite loops
|
||||||
|
|
||||||
|
private static async retryWithBackoff(fn: () => Promise<any>, retries = 5) {
|
||||||
|
for (let i = 0; i < retries; i++) {
|
||||||
|
try {
|
||||||
|
return await fn();
|
||||||
|
} catch (error) {
|
||||||
|
if (i === retries - 1) throw error;
|
||||||
|
|
||||||
|
// Longer backoff for timeouts
|
||||||
|
const isTimeout = axios.isAxiosError(error) && error.code === 'ECONNABORTED';
|
||||||
|
const delay = isTimeout ?
|
||||||
|
Math.pow(2, i) * 5000 : // 5s, 10s, 20s, 40s, 80s for timeouts
|
||||||
|
Math.pow(2, i) * 1000; // 1s, 2s, 4s, 8s, 16s for other errors
|
||||||
|
|
||||||
|
console.log(`Retry ${i + 1}/${retries} after ${delay/1000}s...`);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static cleanAddress(address: string): string {
|
||||||
|
// Remove marketing and extra info first
|
||||||
|
let cleaned = address
|
||||||
|
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') // Remove emojis
|
||||||
|
.replace(/(?:GET|ORDER|SCHEDULE|CONTACT|DIRECTIONS).*?[:!\n]/i, '') // Remove action words
|
||||||
|
.replace(/\([^)]*\)/g, '') // Remove parenthetical info
|
||||||
|
.replace(/(?:Next|Behind|Inside|Near).*$/im, '') // Remove location hints
|
||||||
|
.split(/[\n\r]+/) // Split into lines
|
||||||
|
.map(line => line.trim())
|
||||||
|
.filter(Boolean); // Remove empty lines
|
||||||
|
|
||||||
|
// Try to find the line with street address
|
||||||
|
for (const line of cleaned) {
|
||||||
|
// Common address patterns
|
||||||
|
const patterns = [
|
||||||
|
// Handle suite/unit in street address
|
||||||
|
/(\d+[^,]+?(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i,
|
||||||
|
|
||||||
|
// Basic format
|
||||||
|
/(\d+[^,]+?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i,
|
||||||
|
|
||||||
|
// No commas
|
||||||
|
/(\d+[^,]+?)\s+([^,]+?)\s+(?:CO|Colorado|COLORADO)\s+(\d{5})/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of patterns) {
|
||||||
|
const match = line.match(pattern);
|
||||||
|
if (match) {
|
||||||
|
const [_, street, city, zip] = match;
|
||||||
|
|
||||||
|
// Clean and capitalize street address
|
||||||
|
const cleanedStreet = street
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.replace(/(\d+)/, '$1 ') // Add space after number
|
||||||
|
.split(' ')
|
||||||
|
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||||
|
.join(' ');
|
||||||
|
|
||||||
|
// Capitalize city
|
||||||
|
const cleanedCity = city.trim()
|
||||||
|
.split(' ')
|
||||||
|
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||||
|
.join(' ');
|
||||||
|
|
||||||
|
return `${cleanedStreet}, ${cleanedCity}, CO ${zip}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no match found, try to extract components
|
||||||
|
const streetLine = cleaned.find(line => /\d+/.test(line));
|
||||||
|
if (streetLine) {
|
||||||
|
const streetMatch = streetLine.match(/(\d+[^,\n]+?)(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?/i);
|
||||||
|
const zipMatch = cleaned.join(' ').match(/\b(\d{5})\b/);
|
||||||
|
|
||||||
|
if (streetMatch && zipMatch) {
|
||||||
|
const street = streetMatch[0].trim();
|
||||||
|
const zip = zipMatch[1];
|
||||||
|
|
||||||
|
return `${street}, Denver, CO ${zip}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
private static manualClean(business: Partial<Business>): Partial<Business> {
|
||||||
|
const cleaned = { ...business };
|
||||||
|
|
||||||
|
// Clean address
|
||||||
|
if (cleaned.address) {
|
||||||
|
const cleanedAddress = this.cleanAddress(cleaned.address);
|
||||||
|
if (cleanedAddress) {
|
||||||
|
cleaned.address = cleanedAddress;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract business type first
|
||||||
|
const businessType = this.detectBusinessType(cleaned.name || '');
|
||||||
|
|
||||||
|
// Clean name while preserving core identity
|
||||||
|
if (cleaned.name) {
|
||||||
|
cleaned.name = cleaned.name
|
||||||
|
// Remove emojis and special characters
|
||||||
|
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||||
|
// Remove bracketed content but preserve important terms
|
||||||
|
.replace(/\s*[\[\({](?!(?:BMW|Mercedes|Audi|specialist|certified)).*?[\]\)}]\s*/gi, ' ')
|
||||||
|
// Remove business suffixes
|
||||||
|
.replace(/\b(?:LLC|Inc|Corp|Ltd|DBA|Est\.|Since|P\.?C\.?)\b\.?\s*\d*/gi, '')
|
||||||
|
// Clean up and normalize
|
||||||
|
.replace(/[^\w\s&'-]/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim()
|
||||||
|
.replace(/^THE\s+/i, ''); // Remove leading "THE"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean phone - handle multiple numbers and formats
|
||||||
|
if (cleaned.phone) {
|
||||||
|
// Remove emojis and special characters first
|
||||||
|
const cleanPhone = cleaned.phone
|
||||||
|
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||||
|
.replace(/[^\d]/g, '');
|
||||||
|
|
||||||
|
const phoneNumbers = cleanPhone.match(/\d{10,}/g);
|
||||||
|
if (phoneNumbers?.[0]) {
|
||||||
|
const mainNumber = phoneNumbers[0].slice(0, 10); // Ensure exactly 10 digits
|
||||||
|
cleaned.phone = `(${mainNumber.slice(0,3)}) ${mainNumber.slice(3,6)}-${mainNumber.slice(6,10)}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean email - handle multiple emails and formats
|
||||||
|
if (cleaned.email) {
|
||||||
|
const emailMatch = cleaned.email.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/);
|
||||||
|
if (emailMatch?.[1]) {
|
||||||
|
cleaned.email = emailMatch[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Improved description cleaning
|
||||||
|
if (cleaned.description) {
|
||||||
|
const coreDescription = this.extractCoreDescription(cleaned.description, businessType);
|
||||||
|
cleaned.description = coreDescription;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static detectBusinessType(name: string): string {
|
||||||
|
const types = {
|
||||||
|
auto: /\b(?:auto|car|vehicle|BMW|Audi|Mercedes|mechanic|repair|service center)\b/i,
|
||||||
|
dental: /\b(?:dental|dentist|orthodontic|smile|tooth|teeth)\b/i,
|
||||||
|
coffee: /\b(?:coffee|cafe|espresso|roaster|brew)\b/i,
|
||||||
|
plumbing: /\b(?:plumb|plumbing|rooter|drain|pipe)\b/i,
|
||||||
|
restaurant: /\b(?:restaurant|grill|cuisine|bistro|kitchen)\b/i,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const [type, pattern] of Object.entries(types)) {
|
||||||
|
if (pattern.test(name)) return type;
|
||||||
|
}
|
||||||
|
return 'business';
|
||||||
|
}
|
||||||
|
|
||||||
|
private static extractCoreDescription(description: string, businessType: string): string {
|
||||||
|
// Remove all marketing and formatting first
|
||||||
|
let cleaned = description
|
||||||
|
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||||
|
.replace(/\$+\s*[^\s]*\s*(off|special|offer|deal|save|discount|price|cost|free)/gi, '')
|
||||||
|
.replace(/\b(?:call|email|visit|contact|text|www\.|http|@|book|schedule|appointment)\b.*$/gi, '')
|
||||||
|
.replace(/#\w+/g, '')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
// Extract relevant information based on business type
|
||||||
|
const typePatterns: { [key: string]: RegExp[] } = {
|
||||||
|
auto: [
|
||||||
|
/(?:specialist|specializing)\s+in\s+[^.]+/i,
|
||||||
|
/(?:certified|ASE)[^.]+mechanic[^.]+/i,
|
||||||
|
/(?:auto|car|vehicle)\s+(?:service|repair)[^.]+/i
|
||||||
|
],
|
||||||
|
dental: [
|
||||||
|
/(?:dental|orthodontic)\s+(?:care|services)[^.]+/i,
|
||||||
|
/(?:family|cosmetic|general)\s+dentistry[^.]+/i,
|
||||||
|
/state-of-the-art\s+facility[^.]+/i
|
||||||
|
],
|
||||||
|
coffee: [
|
||||||
|
/(?:coffee|espresso|pastry|cafe)[^.]+/i,
|
||||||
|
/(?:organic|fair-trade|fresh)[^.]+/i,
|
||||||
|
/(?:local|favorite|community)[^.]+coffee[^.]+/i
|
||||||
|
],
|
||||||
|
plumbing: [
|
||||||
|
/(?:plumbing|drain|pipe)\s+(?:service|repair)[^.]+/i,
|
||||||
|
/(?:professional|expert|master)\s+plumb[^.]+/i,
|
||||||
|
/(?:residential|commercial)\s+plumbing[^.]+/i
|
||||||
|
]
|
||||||
|
};
|
||||||
|
|
||||||
|
const relevantPhrases = typePatterns[businessType]?.map(pattern => {
|
||||||
|
const match = cleaned.match(pattern);
|
||||||
|
return match ? match[0] : '';
|
||||||
|
}).filter(Boolean) || [];
|
||||||
|
|
||||||
|
if (relevantPhrases.length > 0) {
|
||||||
|
return relevantPhrases.join('. ');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to generic description
|
||||||
|
return `Professional ${businessType} services in Denver area`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static sanitizeJsonResponse(response: string): string {
|
||||||
|
return response
|
||||||
|
// Remove emojis
|
||||||
|
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||||
|
// Remove control characters
|
||||||
|
.replace(/[\u0000-\u001F\u007F-\u009F]/g, '')
|
||||||
|
// Clean up newlines and spaces
|
||||||
|
.replace(/\r?\n\s*/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
static async cleanBusinessData(business: Business, attempt = 0): Promise<Business> {
|
||||||
|
if (attempt >= this.MAX_ATTEMPTS) {
|
||||||
|
console.log('Max cleaning attempts reached, applying manual cleaning...');
|
||||||
|
return {
|
||||||
|
...business,
|
||||||
|
...this.manualClean(business)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect business type first
|
||||||
|
const businessType = this.detectBusinessType(business.name || '');
|
||||||
|
|
||||||
|
const requestId = Math.random().toString(36).substring(7);
|
||||||
|
const prompt = `<|im_start|>system
|
||||||
|
You are a data cleaning expert. Clean the business data while preserving its core identity and type.
|
||||||
|
Request ID: ${requestId} // Force uniqueness
|
||||||
|
IMPORTANT: Return ONLY plain text without emojis or special characters.
|
||||||
|
<|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
Clean this ${businessType} business data by following these rules exactly:
|
||||||
|
|
||||||
|
Input Business:
|
||||||
|
${JSON.stringify(business, null, 2)}
|
||||||
|
|
||||||
|
Cleaning Rules:
|
||||||
|
1. NAME: Remove brackets/braces but preserve core business identity
|
||||||
|
2. ADDRESS: Format as "street, city, state zip" using state abbreviations
|
||||||
|
3. PHONE: Extract and format primary phone as "(XXX) XXX-XXXX"
|
||||||
|
4. EMAIL: Remove markdown/mailto formatting but keep actual email
|
||||||
|
5. DESCRIPTION: Keep core business info but remove:
|
||||||
|
- ALL emojis and special characters (return plain text only)
|
||||||
|
- Prices and special offers
|
||||||
|
- Contact information
|
||||||
|
- Marketing language
|
||||||
|
- Social media elements
|
||||||
|
|
||||||
|
Return ONLY clean JSON with the original business identity preserved:
|
||||||
|
{
|
||||||
|
"business_info": {
|
||||||
|
"name": "Keep original business name without formatting",
|
||||||
|
"address": "Keep original address, properly formatted",
|
||||||
|
"phone": "Keep original phone number, properly formatted",
|
||||||
|
"email": "Keep original email without formatting",
|
||||||
|
"description": "Keep original business description without marketing"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
<|im_end|>`;
|
||||||
|
|
||||||
|
const response = await this.chat([{
|
||||||
|
role: 'user',
|
||||||
|
content: prompt
|
||||||
|
}]);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const jsonMatch = response.match(/\{[\s\S]*?\}\s*$/);
|
||||||
|
if (!jsonMatch) {
|
||||||
|
throw new Error('No JSON found in response');
|
||||||
|
}
|
||||||
|
|
||||||
|
const sanitizedJson = this.sanitizeJsonResponse(jsonMatch[0]);
|
||||||
|
const parsed = JSON.parse(sanitizedJson);
|
||||||
|
const cleaned = {
|
||||||
|
...business,
|
||||||
|
...parsed.business_info
|
||||||
|
};
|
||||||
|
|
||||||
|
// Validate and handle type mismatches more strictly
|
||||||
|
const validationIssues = this.validateCleanedData(cleaned, business);
|
||||||
|
|
||||||
|
if (validationIssues.length > 0) {
|
||||||
|
console.log(`Attempt ${attempt + 1}: Validation issues:`, validationIssues.join(', '));
|
||||||
|
|
||||||
|
// If there's a business type mismatch, go straight to manual cleaning
|
||||||
|
if (validationIssues.some(issue => issue.includes('Business type mismatch'))) {
|
||||||
|
console.log('Business type mismatch detected, applying manual cleaning...');
|
||||||
|
return {
|
||||||
|
...business,
|
||||||
|
...this.manualClean(business)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// For other validation issues, try again
|
||||||
|
return this.cleanBusinessData(cleaned, attempt + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cleaned;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to parse response:', error);
|
||||||
|
console.log('Raw response:', response);
|
||||||
|
|
||||||
|
// Try to sanitize and parse the whole response
|
||||||
|
try {
|
||||||
|
const sanitized = this.sanitizeJsonResponse(response);
|
||||||
|
const fallback = this.parseResponse(sanitized);
|
||||||
|
return this.cleanBusinessData({ ...business, ...fallback }, attempt + 1);
|
||||||
|
} catch (parseError) {
|
||||||
|
console.error('Failed to parse sanitized response:', parseError);
|
||||||
|
return this.cleanBusinessData({ ...business, ...this.manualClean(business) }, attempt + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static validateCleanedData(business: Partial<Business>, originalBusiness: Business): string[] {
|
||||||
|
const issues: string[] = [];
|
||||||
|
|
||||||
|
// Stricter business type validation
|
||||||
|
const originalType = this.detectBusinessType(originalBusiness.name || '');
|
||||||
|
const cleanedType = this.detectBusinessType(business.name || '');
|
||||||
|
|
||||||
|
if (originalType !== 'business') {
|
||||||
|
if (cleanedType !== originalType) {
|
||||||
|
issues.push(`Business type mismatch: expected ${originalType}, got ${cleanedType}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify core identity is preserved
|
||||||
|
const originalKeywords = originalBusiness.name?.toLowerCase().split(/\W+/).filter(Boolean) || [];
|
||||||
|
const cleanedKeywords = business.name?.toLowerCase().split(/\W+/).filter(Boolean) || [];
|
||||||
|
|
||||||
|
const significantKeywords = originalKeywords.filter(word =>
|
||||||
|
!['the', 'and', 'llc', 'inc', 'corp', 'ltd', 'dba', 'est'].includes(word)
|
||||||
|
);
|
||||||
|
|
||||||
|
const missingKeywords = significantKeywords.filter(word =>
|
||||||
|
!cleanedKeywords.some(cleaned => cleaned.includes(word))
|
||||||
|
);
|
||||||
|
|
||||||
|
if (missingKeywords.length > 0) {
|
||||||
|
issues.push(`Core business identity lost: missing ${missingKeywords.join(', ')}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (business.name?.includes('[') || business.name?.includes(']')) {
|
||||||
|
issues.push('Name contains brackets');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!business.address?.match(/^\d+[^,]+,\s*[^,]+,\s*[A-Z]{2}\s+\d{5}$/)) {
|
||||||
|
const cleanedAddress = this.cleanAddress(business.address || '');
|
||||||
|
if (cleanedAddress) {
|
||||||
|
business.address = cleanedAddress;
|
||||||
|
} else {
|
||||||
|
issues.push('Address format incorrect');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!business.phone?.match(/^\(\d{3}\) \d{3}-\d{4}$/)) {
|
||||||
|
issues.push('Phone format incorrect');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (business.email?.includes('[') || business.email?.includes('mailto:')) {
|
||||||
|
issues.push('Email contains markdown/mailto');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (business.description?.match(/\$|\b(?:call|email|visit|contact)\b/i)) {
|
||||||
|
issues.push('Description contains pricing or contact info');
|
||||||
|
}
|
||||||
|
|
||||||
|
return issues;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async chat(messages: { role: string, content: string }[]) {
|
||||||
|
return this.retryWithBackoff(async () => {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
this.OLLAMA_URL,
|
||||||
|
{
|
||||||
|
model: this.MODEL_NAME,
|
||||||
|
prompt: messages[0].content,
|
||||||
|
stream: false,
|
||||||
|
options: {
|
||||||
|
temperature: 0.7, // Add some randomness
|
||||||
|
num_predict: 2048,
|
||||||
|
stop: ["<|im_end|>", "\n\n"],
|
||||||
|
top_k: 40, // Allow more variety
|
||||||
|
top_p: 0.9, // Allow more variety
|
||||||
|
seed: Date.now(), // Force different results each time
|
||||||
|
reset: true // Reset context window
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
timeout: 30000
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return response.data.response;
|
||||||
|
} catch (error) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
if (error.code === 'ECONNREFUSED') {
|
||||||
|
throw new Error('Ollama server not running');
|
||||||
|
}
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
throw new Error(`Model ${this.MODEL_NAME} not found. Run: ollama pull ${this.MODEL_NAME}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static parseResponse(response: string) {
|
||||||
|
const lines = response.split('\n');
|
||||||
|
const cleaned: Partial<Business> = {};
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const [field, ...values] = line.split(':');
|
||||||
|
const value = values.join(':').trim();
|
||||||
|
|
||||||
|
switch (field.toLowerCase().trim()) {
|
||||||
|
case 'name':
|
||||||
|
cleaned.name = value;
|
||||||
|
break;
|
||||||
|
case 'address':
|
||||||
|
cleaned.address = value;
|
||||||
|
break;
|
||||||
|
case 'phone':
|
||||||
|
cleaned.phone = value;
|
||||||
|
break;
|
||||||
|
case 'email':
|
||||||
|
cleaned.email = value;
|
||||||
|
break;
|
||||||
|
case 'description':
|
||||||
|
cleaned.description = value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
}
|
63
src/lib/services/geocodingService.ts
Normal file
63
src/lib/services/geocodingService.ts
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
import axios from 'axios';
|
||||||
|
import { sleep } from '../utils/helpers';
|
||||||
|
|
||||||
|
interface GeocodingResult {
|
||||||
|
lat: number;
|
||||||
|
lng: number;
|
||||||
|
formattedAddress: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class GeocodingService {
|
||||||
|
private static cache = new Map<string, GeocodingResult>();
|
||||||
|
private static lastRequestTime = 0;
|
||||||
|
private static RATE_LIMIT_MS = 1000; // 1 second between requests (Nominatim requirement)
|
||||||
|
|
||||||
|
static async geocode(address: string): Promise<GeocodingResult | null> {
|
||||||
|
// Check cache first
|
||||||
|
const cached = this.cache.get(address);
|
||||||
|
if (cached) return cached;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Rate limiting
|
||||||
|
const now = Date.now();
|
||||||
|
const timeSinceLastRequest = now - this.lastRequestTime;
|
||||||
|
if (timeSinceLastRequest < this.RATE_LIMIT_MS) {
|
||||||
|
await sleep(this.RATE_LIMIT_MS - timeSinceLastRequest);
|
||||||
|
}
|
||||||
|
this.lastRequestTime = Date.now();
|
||||||
|
|
||||||
|
const response = await axios.get(
|
||||||
|
'https://nominatim.openstreetmap.org/search',
|
||||||
|
{
|
||||||
|
params: {
|
||||||
|
q: address,
|
||||||
|
format: 'json',
|
||||||
|
limit: 1,
|
||||||
|
addressdetails: 1
|
||||||
|
},
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'BusinessFinder/1.0'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data?.length > 0) {
|
||||||
|
const result = response.data[0];
|
||||||
|
const geocoded = {
|
||||||
|
lat: parseFloat(result.lat),
|
||||||
|
lng: parseFloat(result.lon),
|
||||||
|
formattedAddress: result.display_name
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
this.cache.set(address, geocoded);
|
||||||
|
return geocoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Geocoding error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,35 +1,44 @@
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import { env } from '../../config/env';
|
import { env } from '../../config/env';
|
||||||
|
|
||||||
interface OllamaResponse {
|
|
||||||
response: string;
|
|
||||||
context?: number[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export class OllamaService {
|
export class OllamaService {
|
||||||
private url: string;
|
private static readonly baseUrl = env.ollama.url;
|
||||||
private model: string;
|
private static readonly model = env.ollama.model;
|
||||||
|
|
||||||
constructor() {
|
static async complete(prompt: string): Promise<string> {
|
||||||
this.url = env.ollama.url;
|
|
||||||
this.model = env.ollama.model;
|
|
||||||
}
|
|
||||||
|
|
||||||
async complete(prompt: string): Promise<string> {
|
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(`${this.url}/api/generate`, {
|
const response = await axios.post(`${this.baseUrl}/api/generate`, {
|
||||||
model: this.model,
|
model: this.model,
|
||||||
prompt: prompt,
|
prompt: prompt,
|
||||||
stream: false,
|
stream: false
|
||||||
options: {
|
|
||||||
temperature: 0.7,
|
|
||||||
top_p: 0.9
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (response.data?.response) {
|
||||||
return response.data.response;
|
return response.data.response;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('No response from Ollama');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Ollama completion failed:', error);
|
console.error('Ollama error:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static async chat(messages: { role: 'user' | 'assistant'; content: string }[]): Promise<string> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(`${this.baseUrl}/api/chat`, {
|
||||||
|
model: this.model,
|
||||||
|
messages: messages,
|
||||||
|
stream: false
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.data?.message?.content) {
|
||||||
|
return response.data.message.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('No response from Ollama chat');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Ollama chat error:', error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
97
src/lib/services/searchService.ts
Normal file
97
src/lib/services/searchService.ts
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
import { DeepSeekService } from './deepseekService';
|
||||||
|
import { createClient } from '@supabase/supabase-js';
|
||||||
|
import { Business } from '../types';
|
||||||
|
|
||||||
|
export class SearchService {
|
||||||
|
private supabase;
|
||||||
|
private deepseek;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.supabase = createClient(
|
||||||
|
process.env.SUPABASE_URL!,
|
||||||
|
process.env.SUPABASE_KEY!
|
||||||
|
);
|
||||||
|
this.deepseek = DeepSeekService;
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query: string, location: string): Promise<Business[]> {
|
||||||
|
if (!query || !location) {
|
||||||
|
throw new Error('Query and location are required');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
const cacheKey = `${query}_${location}`.toLowerCase();
|
||||||
|
const { data: cacheData } = await this.supabase
|
||||||
|
.from('cache')
|
||||||
|
.select()
|
||||||
|
.eq('key', cacheKey)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (cacheData && cacheData.value) {
|
||||||
|
return cacheData.value as Business[];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Perform search
|
||||||
|
const searchResults = await this.performSearch(query, location);
|
||||||
|
|
||||||
|
// Cache results
|
||||||
|
await this.cacheResults(cacheKey, searchResults);
|
||||||
|
|
||||||
|
return searchResults;
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response?.status === 429) {
|
||||||
|
throw new Error('Rate limit exceeded');
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getBusinessById(id: string): Promise<Business | null> {
|
||||||
|
const { data, error } = await this.supabase
|
||||||
|
.from('businesses')
|
||||||
|
.select()
|
||||||
|
.eq('id', id)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (error || !data) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data as Business;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async performSearch(query: string, location: string): Promise<Business[]> {
|
||||||
|
// Implementation would use DeepSeek service to perform search
|
||||||
|
// This is a placeholder implementation
|
||||||
|
const mockBusiness: Business = {
|
||||||
|
id: 'test_1',
|
||||||
|
name: "Denver's Best Plumbing",
|
||||||
|
address: "1234 Main Street, Denver, CO 80202",
|
||||||
|
phone: "(720) 555-1234",
|
||||||
|
email: "support@denverplumbing.com",
|
||||||
|
description: "Professional plumbing services",
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://example.com',
|
||||||
|
rating: 4.8,
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
openingHours: []
|
||||||
|
};
|
||||||
|
|
||||||
|
return [mockBusiness];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async cacheResults(key: string, results: Business[]): Promise<void> {
|
||||||
|
const expiresAt = new Date();
|
||||||
|
expiresAt.setDate(expiresAt.getDate() + Number(process.env.CACHE_DURATION_DAYS || 7));
|
||||||
|
|
||||||
|
await this.supabase
|
||||||
|
.from('cache')
|
||||||
|
.insert([{
|
||||||
|
key,
|
||||||
|
value: results,
|
||||||
|
created_at: new Date().toISOString(),
|
||||||
|
expires_at: expiresAt.toISOString()
|
||||||
|
}]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
export interface BusinessData {
|
export interface Business {
|
||||||
id?: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
phone?: string;
|
phone?: string;
|
||||||
email?: string;
|
email?: string;
|
||||||
|
@ -7,22 +7,16 @@ export interface BusinessData {
|
||||||
rating?: number;
|
rating?: number;
|
||||||
website?: string;
|
website?: string;
|
||||||
logo?: string;
|
logo?: string;
|
||||||
source?: string;
|
source: string;
|
||||||
description?: string;
|
description?: string;
|
||||||
location?: {
|
location?: {
|
||||||
lat: number;
|
lat: number;
|
||||||
lng: number;
|
lng: number;
|
||||||
};
|
};
|
||||||
latitude?: number;
|
|
||||||
longitude?: number;
|
|
||||||
place_id?: string;
|
|
||||||
photos?: string[];
|
|
||||||
openingHours?: string[];
|
openingHours?: string[];
|
||||||
distance?: {
|
services?: string[];
|
||||||
value: number;
|
reviewCount?: number;
|
||||||
unit: string;
|
hours?: string[];
|
||||||
};
|
|
||||||
last_updated?: string;
|
|
||||||
search_count?: number;
|
|
||||||
created_at?: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type BusinessData = Business;
|
|
@ -1,3 +1,5 @@
|
||||||
|
import { Business } from '../types';
|
||||||
|
|
||||||
export function normalizePhoneNumber(phone: string): string {
|
export function normalizePhoneNumber(phone: string): string {
|
||||||
return phone.replace(/[^\d]/g, '');
|
return phone.replace(/[^\d]/g, '');
|
||||||
}
|
}
|
||||||
|
@ -22,9 +24,44 @@ export function calculateReliabilityScore(business: Business): number {
|
||||||
if (business.phone) score += 2;
|
if (business.phone) score += 2;
|
||||||
if (business.website) score += 1;
|
if (business.website) score += 1;
|
||||||
if (business.email) score += 1;
|
if (business.email) score += 1;
|
||||||
if (business.hours) score += 2;
|
if (business.hours?.length) score += 2;
|
||||||
if (business.services.length > 0) score += 1;
|
if (business.services && business.services.length > 0) score += 1;
|
||||||
if (business.reviewCount > 10) score += 2;
|
if (business.reviewCount && business.reviewCount > 10) score += 2;
|
||||||
|
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function cleanAddress(address: string): string {
|
||||||
|
return address
|
||||||
|
.replace(/^(Sure!|Here is |The business address( is| found in the text is)?:?\n?\s*)/i, '')
|
||||||
|
.replace(/\n/g, ' ')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatPhoneNumber(phone: string): string {
|
||||||
|
// Remove all non-numeric characters
|
||||||
|
const cleaned = phone.replace(/\D/g, '');
|
||||||
|
|
||||||
|
// Format as (XXX) XXX-XXXX
|
||||||
|
if (cleaned.length === 10) {
|
||||||
|
return `(${cleaned.slice(0,3)}) ${cleaned.slice(3,6)}-${cleaned.slice(6)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return original if not 10 digits
|
||||||
|
return phone;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cleanEmail(email: string): string {
|
||||||
|
// Remove phone numbers from email
|
||||||
|
return email
|
||||||
|
.replace(/\d{3}-\d{4}/, '')
|
||||||
|
.replace(/\d{10}/, '')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cleanDescription(description: string): string {
|
||||||
|
return description
|
||||||
|
.replace(/^(Description:|About:|Info:)/i, '')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
}
|
18
src/lib/utils/helpers.ts
Normal file
18
src/lib/utils/helpers.ts
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
export function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cleanText(text: string): string {
|
||||||
|
return text
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.replace(/[^\w\s-.,]/g, '')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isValidPhone(phone: string): boolean {
|
||||||
|
return /^\+?[\d-.()\s]{10,}$/.test(phone);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isValidEmail(email: string): boolean {
|
||||||
|
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email);
|
||||||
|
}
|
168
src/lib/utils/scraper.ts
Normal file
168
src/lib/utils/scraper.ts
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
import axios from 'axios';
|
||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
import { OllamaService } from '../services/ollamaService';
|
||||||
|
import { sleep } from './helpers';
|
||||||
|
|
||||||
|
const RATE_LIMIT_MS = 1000; // 1 second between requests
|
||||||
|
let lastRequestTime = 0;
|
||||||
|
|
||||||
|
async function rateLimitedRequest(url: string) {
|
||||||
|
const now = Date.now();
|
||||||
|
const timeSinceLastRequest = now - lastRequestTime;
|
||||||
|
|
||||||
|
if (timeSinceLastRequest < RATE_LIMIT_MS) {
|
||||||
|
await sleep(RATE_LIMIT_MS - timeSinceLastRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
lastRequestTime = Date.now();
|
||||||
|
return axios.get(url, {
|
||||||
|
timeout: 5000,
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (compatible; BusinessFinder/1.0; +http://example.com/bot)',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.5'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ContactInfo {
|
||||||
|
phone?: string;
|
||||||
|
email?: string;
|
||||||
|
address?: string;
|
||||||
|
description?: string;
|
||||||
|
openingHours?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function extractContactFromHtml(url: string): Promise<ContactInfo> {
|
||||||
|
try {
|
||||||
|
const response = await rateLimitedRequest(url);
|
||||||
|
|
||||||
|
const $ = cheerio.load(response.data);
|
||||||
|
|
||||||
|
// Extract structured data if available
|
||||||
|
const structuredData = $('script[type="application/ld+json"]')
|
||||||
|
.map((_, el) => {
|
||||||
|
try {
|
||||||
|
return JSON.parse($(el).html() || '');
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.get()
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
// Look for LocalBusiness or Restaurant schema
|
||||||
|
const businessData = structuredData.find(data =>
|
||||||
|
data['@type'] === 'LocalBusiness' ||
|
||||||
|
data['@type'] === 'Restaurant'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (businessData) {
|
||||||
|
return {
|
||||||
|
phone: businessData.telephone,
|
||||||
|
email: businessData.email,
|
||||||
|
address: businessData.address?.streetAddress,
|
||||||
|
description: businessData.description,
|
||||||
|
openingHours: businessData.openingHours
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to regular HTML parsing
|
||||||
|
return {
|
||||||
|
phone: findPhone($),
|
||||||
|
email: findEmail($),
|
||||||
|
address: findAddress($),
|
||||||
|
description: $('meta[name="description"]').attr('content'),
|
||||||
|
openingHours: findOpeningHours($)
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`Error extracting contact info from ${url}:`, error);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function extractCleanAddress(text: string, location: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
const ollama = new OllamaService();
|
||||||
|
const prompt = `
|
||||||
|
Extract a business address from this text. The business should be in or near ${location}.
|
||||||
|
Only return the address, nothing else. If no valid address is found, return an empty string.
|
||||||
|
|
||||||
|
Text: ${text}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const response = await OllamaService.complete(prompt);
|
||||||
|
return response.trim();
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error extracting address:', error);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions
|
||||||
|
function findPhone($: cheerio.CheerioAPI): string | undefined {
|
||||||
|
// Common phone patterns
|
||||||
|
const phonePatterns = [
|
||||||
|
/\b\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})\b/,
|
||||||
|
/\b(?:Phone|Tel|Contact):\s*([0-9-().+ ]{10,})\b/i
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of phonePatterns) {
|
||||||
|
const match = $.text().match(pattern);
|
||||||
|
if (match) return match[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function findEmail($: cheerio.CheerioAPI): string | undefined {
|
||||||
|
const emailPattern = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/;
|
||||||
|
const match = $.text().match(emailPattern);
|
||||||
|
return match ? match[0] : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function findAddress($: cheerio.CheerioAPI): string | undefined {
|
||||||
|
// Look for address in common elements
|
||||||
|
const addressSelectors = [
|
||||||
|
'address',
|
||||||
|
'[itemtype="http://schema.org/PostalAddress"]',
|
||||||
|
'.address',
|
||||||
|
'#address',
|
||||||
|
'[class*="address"]',
|
||||||
|
'[id*="address"]'
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const selector of addressSelectors) {
|
||||||
|
const element = $(selector).first();
|
||||||
|
if (element.length) {
|
||||||
|
return element.text().trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function findOpeningHours($: cheerio.CheerioAPI): string[] {
|
||||||
|
const hours: string[] = [];
|
||||||
|
const hoursSelectors = [
|
||||||
|
'[itemtype="http://schema.org/OpeningHoursSpecification"]',
|
||||||
|
'.hours',
|
||||||
|
'#hours',
|
||||||
|
'[class*="hours"]',
|
||||||
|
'[id*="hours"]'
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const selector of hoursSelectors) {
|
||||||
|
const element = $(selector).first();
|
||||||
|
if (element.length) {
|
||||||
|
element.find('*').each((_, el) => {
|
||||||
|
const text = $(el).text().trim();
|
||||||
|
if (text && !hours.includes(text)) {
|
||||||
|
hours.push(text);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hours;
|
||||||
|
}
|
|
@ -1,87 +1,60 @@
|
||||||
import { Router } from 'express';
|
import express from 'express';
|
||||||
import { searchBusinesses } from '../lib/searxng';
|
import { SearchService } from '../lib/services/searchService';
|
||||||
import { categories } from '../lib/categories';
|
|
||||||
import { supabase } from '../lib/supabase';
|
|
||||||
import { BusinessData } from '../lib/types';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = express.Router();
|
||||||
|
const searchService = new SearchService();
|
||||||
|
|
||||||
// Categories endpoint
|
// Error handling middleware for JSON parsing errors
|
||||||
router.get('/categories', (req, res) => {
|
router.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
|
||||||
res.json(categories);
|
if (err instanceof SyntaxError && 'body' in err) {
|
||||||
|
return res.status(400).json({ error: 'Invalid JSON' });
|
||||||
|
}
|
||||||
|
next();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Search endpoint
|
// Search endpoint
|
||||||
router.get('/search', async (req, res) => {
|
router.post('/search', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const query = req.query.q as string;
|
const { query, location } = req.body;
|
||||||
const [searchTerm, location] = query.split(' in ');
|
|
||||||
|
|
||||||
if (!query) {
|
if (!query || !location) {
|
||||||
return res.status(400).json({ error: 'Search query is required' });
|
return res.status(400).json({
|
||||||
}
|
error: 'Query and location are required'
|
||||||
|
|
||||||
// Set headers for streaming response
|
|
||||||
res.setHeader('Content-Type', 'application/json');
|
|
||||||
res.setHeader('Transfer-Encoding', 'chunked');
|
|
||||||
|
|
||||||
// First, search in Supabase
|
|
||||||
const { data: existingResults, error: dbError } = await supabase
|
|
||||||
.from('businesses')
|
|
||||||
.select('*')
|
|
||||||
.or(`name.ilike.%${searchTerm}%, description.ilike.%${searchTerm}%`)
|
|
||||||
.ilike('address', `%${location}%`);
|
|
||||||
|
|
||||||
if (dbError) {
|
|
||||||
console.error('Supabase search error:', dbError);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send existing results immediately if there are any
|
|
||||||
if (existingResults && existingResults.length > 0) {
|
|
||||||
const chunk = JSON.stringify({
|
|
||||||
source: 'database',
|
|
||||||
results: existingResults
|
|
||||||
}) + '\n';
|
|
||||||
res.write(chunk);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start background search
|
|
||||||
const searchPromise = searchBusinesses(query, {
|
|
||||||
onProgress: (status, progress) => {
|
|
||||||
const chunk = JSON.stringify({
|
|
||||||
source: 'search',
|
|
||||||
status,
|
|
||||||
progress,
|
|
||||||
}) + '\n';
|
|
||||||
res.write(chunk);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = await searchPromise;
|
|
||||||
|
|
||||||
// Send final results
|
|
||||||
const finalChunk = JSON.stringify({
|
|
||||||
source: 'search',
|
|
||||||
results,
|
|
||||||
complete: true
|
|
||||||
}) + '\n';
|
|
||||||
res.write(finalChunk);
|
|
||||||
res.end();
|
|
||||||
|
|
||||||
} catch (error: unknown) {
|
|
||||||
console.error('Search error:', error);
|
|
||||||
const errorResponse = {
|
|
||||||
error: 'An error occurred while searching',
|
|
||||||
details: error instanceof Error ? error.message : 'Unknown error'
|
|
||||||
};
|
|
||||||
|
|
||||||
// Only send error response if headers haven't been sent
|
|
||||||
if (!res.headersSent) {
|
|
||||||
res.status(500).json(errorResponse);
|
|
||||||
} else {
|
|
||||||
res.write(JSON.stringify(errorResponse));
|
|
||||||
res.end();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const results = await searchService.search(query, location);
|
||||||
|
res.json({ results });
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response?.status === 429) {
|
||||||
|
return res.status(429).json({
|
||||||
|
error: 'Rate limit exceeded'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.status(500).json({
|
||||||
|
error: error.message || 'Internal server error'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get business details endpoint
|
||||||
|
router.get('/business/:id', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const business = await searchService.getBusinessById(id);
|
||||||
|
|
||||||
|
if (!business) {
|
||||||
|
return res.status(404).json({
|
||||||
|
error: 'Business not found'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(business);
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({
|
||||||
|
error: error.message || 'Internal server error'
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
139
src/tests/__tests__/database.test.ts
Normal file
139
src/tests/__tests__/database.test.ts
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
import { createClient } from '@supabase/supabase-js';
|
||||||
|
|
||||||
|
// Mock data type
|
||||||
|
type MockData = {
|
||||||
|
businesses: { id: string; name: string };
|
||||||
|
cache: { key: string; value: { test: boolean } };
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock Supabase client
|
||||||
|
jest.mock('@supabase/supabase-js', () => ({
|
||||||
|
createClient: jest.fn(() => ({
|
||||||
|
from: jest.fn((table: keyof MockData) => {
|
||||||
|
const mockData: MockData = {
|
||||||
|
businesses: { id: 'test_1', name: 'Test Business' },
|
||||||
|
cache: { key: 'test_key', value: { test: true } }
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
insert: jest.fn(() => ({
|
||||||
|
select: jest.fn().mockResolvedValue({
|
||||||
|
data: [mockData[table]],
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
})),
|
||||||
|
select: jest.fn(() => ({
|
||||||
|
eq: jest.fn(() => ({
|
||||||
|
single: jest.fn().mockResolvedValue({
|
||||||
|
data: mockData[table],
|
||||||
|
error: null
|
||||||
|
}),
|
||||||
|
gt: jest.fn(() => ({
|
||||||
|
single: jest.fn().mockResolvedValue({
|
||||||
|
data: null,
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}))
|
||||||
|
})),
|
||||||
|
update: jest.fn(() => ({
|
||||||
|
eq: jest.fn().mockResolvedValue({
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
})),
|
||||||
|
delete: jest.fn(() => ({
|
||||||
|
eq: jest.fn().mockResolvedValue({
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
};
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe('Database Operations', () => {
|
||||||
|
const supabase = createClient('test-url', 'test-key');
|
||||||
|
|
||||||
|
const testBusiness = {
|
||||||
|
id: `test_${Date.now()}`,
|
||||||
|
name: 'Test Business',
|
||||||
|
phone: '(303) 555-1234',
|
||||||
|
email: 'test@example.com',
|
||||||
|
address: '123 Test St, Denver, CO 80202',
|
||||||
|
rating: 5,
|
||||||
|
website: 'https://test.com',
|
||||||
|
source: 'test',
|
||||||
|
description: 'Test description',
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
search_count: 1,
|
||||||
|
created_at: new Date().toISOString()
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Business Operations', () => {
|
||||||
|
it('should insert a business successfully', async () => {
|
||||||
|
const { data, error } = await supabase
|
||||||
|
.from('businesses')
|
||||||
|
.insert([testBusiness])
|
||||||
|
.select();
|
||||||
|
|
||||||
|
expect(error).toBeNull();
|
||||||
|
expect(data).toBeTruthy();
|
||||||
|
expect(data![0].name).toBe('Test Business');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should retrieve a business by id', async () => {
|
||||||
|
const { data, error } = await supabase
|
||||||
|
.from('businesses')
|
||||||
|
.select()
|
||||||
|
.eq('id', testBusiness.id)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
expect(error).toBeNull();
|
||||||
|
expect(data).toBeTruthy();
|
||||||
|
expect(data.name).toBe('Test Business');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should update a business', async () => {
|
||||||
|
const { error } = await supabase
|
||||||
|
.from('businesses')
|
||||||
|
.update({ name: 'Updated Test Business' })
|
||||||
|
.eq('id', testBusiness.id);
|
||||||
|
|
||||||
|
expect(error).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Cache Operations', () => {
|
||||||
|
const testCache = {
|
||||||
|
key: `test_key_${Date.now()}`,
|
||||||
|
value: { test: true },
|
||||||
|
created_at: new Date().toISOString(),
|
||||||
|
expires_at: new Date(Date.now() + 3600000).toISOString()
|
||||||
|
};
|
||||||
|
|
||||||
|
it('should insert cache entry', async () => {
|
||||||
|
const { data, error } = await supabase
|
||||||
|
.from('cache')
|
||||||
|
.insert([testCache])
|
||||||
|
.select();
|
||||||
|
|
||||||
|
expect(error).toBeNull();
|
||||||
|
expect(data).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should retrieve cache entry', async () => {
|
||||||
|
const { data, error } = await supabase
|
||||||
|
.from('cache')
|
||||||
|
.select()
|
||||||
|
.eq('key', testCache.key)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
expect(error).toBeNull();
|
||||||
|
expect(data.value).toEqual({ test: true });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
92
src/tests/__tests__/deepseek.test.ts
Normal file
92
src/tests/__tests__/deepseek.test.ts
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
import { DeepSeekService } from '../../lib/services/deepseekService';
|
||||||
|
import { Business } from '../../lib/types';
|
||||||
|
|
||||||
|
// Mock the DeepSeek service
|
||||||
|
jest.mock('../../lib/services/deepseekService', () => {
|
||||||
|
const mockCleanedBusiness = {
|
||||||
|
name: "Denver's Best Plumbing & Repair",
|
||||||
|
address: "1234 Main Street, Denver, CO 80202",
|
||||||
|
phone: "(720) 555-1234",
|
||||||
|
email: "support@denverplumbing.com",
|
||||||
|
description: "Professional plumbing services in Denver metro area"
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
DeepSeekService: {
|
||||||
|
chat: jest.fn().mockResolvedValue(JSON.stringify({
|
||||||
|
business_info: mockCleanedBusiness
|
||||||
|
})),
|
||||||
|
detectBusinessType: jest.fn().mockReturnValue('service'),
|
||||||
|
sanitizeJsonResponse: jest.fn().mockReturnValue(mockCleanedBusiness),
|
||||||
|
manualClean: jest.fn().mockReturnValue(mockCleanedBusiness),
|
||||||
|
cleanBusinessData: jest.fn().mockResolvedValue(mockCleanedBusiness)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('DeepSeekService', () => {
|
||||||
|
describe('cleanBusinessData', () => {
|
||||||
|
const testBusiness: Business = {
|
||||||
|
id: 'test_1',
|
||||||
|
name: "Denver's Best Plumbing & Repair [LLC] (A Family Business)",
|
||||||
|
address: "Suite 200-B, 1234 Main Street, Denver, Colorado 80202",
|
||||||
|
phone: "(720) 555-1234",
|
||||||
|
email: "support@denverplumbing.com",
|
||||||
|
description: "Professional plumbing services in Denver metro area",
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://example.com',
|
||||||
|
rating: 4.8,
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
openingHours: []
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should clean business name correctly', async () => {
|
||||||
|
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||||
|
expect(cleaned.name).not.toMatch(/[\[\]{}()]/);
|
||||||
|
expect(cleaned.name).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should format phone number correctly', async () => {
|
||||||
|
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||||
|
expect(cleaned.phone).toMatch(/^\(\d{3}\) \d{3}-\d{4}$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should clean email address', async () => {
|
||||||
|
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||||
|
expect(cleaned.email).not.toMatch(/[\[\]<>()]|mailto:|click|schedule/i);
|
||||||
|
expect(cleaned.email).toMatch(/^[^\s@]+@[^\s@]+\.[^\s@]+$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should clean description', async () => {
|
||||||
|
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||||
|
expect(cleaned.description).not.toMatch(/[\$\d]+%?\s*off|\$/i);
|
||||||
|
expect(cleaned.description).not.toMatch(/\b(?:call|email|visit|contact|text|www\.|http|@)\b/i);
|
||||||
|
expect(cleaned.description).not.toMatch(/[📞📧🌐💳☎️📱]/);
|
||||||
|
expect(cleaned.description).not.toMatch(/#\w+/);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('chat', () => {
|
||||||
|
it('should return a response from the model', async () => {
|
||||||
|
const response = await DeepSeekService['chat']([{
|
||||||
|
role: 'user',
|
||||||
|
content: 'Test message'
|
||||||
|
}]);
|
||||||
|
expect(response).toBeTruthy();
|
||||||
|
expect(typeof response).toBe('string');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle errors gracefully', async () => {
|
||||||
|
(DeepSeekService['chat'] as jest.Mock).mockRejectedValueOnce(new Error('Test error'));
|
||||||
|
|
||||||
|
await expect(DeepSeekService['chat']([{
|
||||||
|
role: 'user',
|
||||||
|
content: 'Test message'
|
||||||
|
}])).rejects.toThrow('Test error');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
145
src/tests/__tests__/integration/api.test.ts
Normal file
145
src/tests/__tests__/integration/api.test.ts
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
import express from 'express';
|
||||||
|
import request from 'supertest';
|
||||||
|
import { SearchService } from '../../../lib/services/searchService';
|
||||||
|
import { Business } from '../../../lib/types';
|
||||||
|
|
||||||
|
// Mock SearchService
|
||||||
|
jest.mock('../../../lib/services/searchService');
|
||||||
|
|
||||||
|
describe('API Integration', () => {
|
||||||
|
let app: express.Application;
|
||||||
|
|
||||||
|
const mockBusiness: Business = {
|
||||||
|
id: 'test_1',
|
||||||
|
name: "Denver's Best Plumbing",
|
||||||
|
address: "1234 Main Street, Denver, CO 80202",
|
||||||
|
phone: "(720) 555-1234",
|
||||||
|
email: "support@denverplumbing.com",
|
||||||
|
description: "Professional plumbing services",
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://example.com',
|
||||||
|
rating: 4.8,
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
openingHours: []
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeAll(() => {
|
||||||
|
app = express();
|
||||||
|
app.use(express.json());
|
||||||
|
|
||||||
|
// Mock SearchService methods
|
||||||
|
(SearchService.prototype.search as jest.Mock).mockResolvedValue([mockBusiness]);
|
||||||
|
(SearchService.prototype.getBusinessById as jest.Mock).mockResolvedValue(mockBusiness);
|
||||||
|
|
||||||
|
// Add error handling middleware
|
||||||
|
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
|
||||||
|
if (err instanceof SyntaxError && 'body' in err) {
|
||||||
|
return res.status(400).json({ error: 'Invalid JSON' });
|
||||||
|
}
|
||||||
|
next(err);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add routes
|
||||||
|
app.use('/api', require('../../../routes/api').default);
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Search Endpoints', () => {
|
||||||
|
it('should handle search requests', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/search')
|
||||||
|
.send({
|
||||||
|
query: 'plumber in Denver',
|
||||||
|
location: 'Denver, CO'
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
expect(response.body).toHaveProperty('results');
|
||||||
|
expect(Array.isArray(response.body.results)).toBe(true);
|
||||||
|
expect(response.body.results[0]).toEqual(mockBusiness);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle missing parameters', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/search')
|
||||||
|
.send({
|
||||||
|
query: 'plumber in Denver'
|
||||||
|
// missing location
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle search errors', async () => {
|
||||||
|
// Mock search error
|
||||||
|
(SearchService.prototype.search as jest.Mock)
|
||||||
|
.mockRejectedValueOnce(new Error('Search failed'));
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/search')
|
||||||
|
.send({
|
||||||
|
query: 'plumber in Denver',
|
||||||
|
location: 'Denver, CO'
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(500);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Business Details Endpoint', () => {
|
||||||
|
it('should retrieve business details', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.get('/api/business/test_1');
|
||||||
|
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
expect(response.body).toEqual(mockBusiness);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle non-existent business', async () => {
|
||||||
|
// Mock not found
|
||||||
|
(SearchService.prototype.getBusinessById as jest.Mock)
|
||||||
|
.mockResolvedValueOnce(null);
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.get('/api/business/non_existent');
|
||||||
|
|
||||||
|
expect(response.status).toBe(404);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Error Handling', () => {
|
||||||
|
it('should handle invalid JSON', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/search')
|
||||||
|
.set('Content-Type', 'application/json')
|
||||||
|
.send('{"invalid json"}');
|
||||||
|
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
expect(response.body.error).toBe('Invalid JSON');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle rate limiting', async () => {
|
||||||
|
// Mock rate limit error
|
||||||
|
(SearchService.prototype.search as jest.Mock)
|
||||||
|
.mockRejectedValueOnce({ response: { status: 429 } });
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/search')
|
||||||
|
.send({
|
||||||
|
query: 'plumber in Denver',
|
||||||
|
location: 'Denver, CO'
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(429);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
expect(response.body.error).toBe('Rate limit exceeded');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
162
src/tests/__tests__/integration/search.test.ts
Normal file
162
src/tests/__tests__/integration/search.test.ts
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
import { DeepSeekService } from '../../../lib/services/deepseekService';
|
||||||
|
import { createClient } from '@supabase/supabase-js';
|
||||||
|
import { SearchService } from '../../../lib/services/searchService';
|
||||||
|
import { Business } from '../../../lib/types';
|
||||||
|
|
||||||
|
// Mock external services
|
||||||
|
jest.mock('@supabase/supabase-js');
|
||||||
|
jest.mock('../../../lib/services/deepseekService');
|
||||||
|
|
||||||
|
describe('Search Integration', () => {
|
||||||
|
const mockBusiness: Business = {
|
||||||
|
id: 'test_1',
|
||||||
|
name: "Denver's Best Plumbing",
|
||||||
|
address: "1234 Main Street, Denver, CO 80202",
|
||||||
|
phone: "(720) 555-1234",
|
||||||
|
email: "support@denverplumbing.com",
|
||||||
|
description: "Professional plumbing services",
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://example.com',
|
||||||
|
rating: 4.8,
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
openingHours: []
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock Supabase responses
|
||||||
|
const mockSupabase = {
|
||||||
|
from: jest.fn().mockReturnValue({
|
||||||
|
insert: jest.fn().mockReturnValue({
|
||||||
|
select: jest.fn().mockResolvedValue({
|
||||||
|
data: [mockBusiness],
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
select: jest.fn().mockReturnValue({
|
||||||
|
eq: jest.fn().mockReturnValue({
|
||||||
|
single: jest.fn().mockResolvedValue({
|
||||||
|
data: null,
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
(createClient as jest.Mock).mockReturnValue(mockSupabase);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Search and Store Flow', () => {
|
||||||
|
it('should search, clean, and store business data', async () => {
|
||||||
|
const searchService = new SearchService();
|
||||||
|
const query = 'plumber in Denver';
|
||||||
|
const location = 'Denver, CO';
|
||||||
|
|
||||||
|
// Mock performSearch to return results
|
||||||
|
const performSearchSpy = jest.spyOn(searchService as any, 'performSearch')
|
||||||
|
.mockResolvedValue([mockBusiness]);
|
||||||
|
|
||||||
|
// Perform search
|
||||||
|
const results = await searchService.search(query, location);
|
||||||
|
|
||||||
|
// Verify search results
|
||||||
|
expect(results).toBeTruthy();
|
||||||
|
expect(Array.isArray(results)).toBe(true);
|
||||||
|
expect(results[0]).toEqual(mockBusiness);
|
||||||
|
|
||||||
|
// Verify cache was checked first
|
||||||
|
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||||
|
|
||||||
|
// Verify results were cached
|
||||||
|
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||||
|
expect(mockSupabase.from().insert).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle search errors gracefully', async () => {
|
||||||
|
const searchService = new SearchService();
|
||||||
|
|
||||||
|
// Mock performSearch to throw error
|
||||||
|
jest.spyOn(searchService as any, 'performSearch')
|
||||||
|
.mockRejectedValue(new Error('Search failed'));
|
||||||
|
|
||||||
|
await expect(searchService.search('invalid query', 'invalid location'))
|
||||||
|
.rejects.toThrow('Search failed');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should use cache when available', async () => {
|
||||||
|
const searchService = new SearchService();
|
||||||
|
const query = 'plumber in Denver';
|
||||||
|
const location = 'Denver, CO';
|
||||||
|
|
||||||
|
// Mock cache hit
|
||||||
|
mockSupabase.from.mockReturnValueOnce({
|
||||||
|
select: jest.fn().mockReturnValue({
|
||||||
|
eq: jest.fn().mockReturnValue({
|
||||||
|
single: jest.fn().mockResolvedValue({
|
||||||
|
data: { value: [mockBusiness] },
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await searchService.search(query, location);
|
||||||
|
|
||||||
|
// Verify cache was checked
|
||||||
|
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||||
|
expect(results).toEqual([mockBusiness]);
|
||||||
|
|
||||||
|
// Verify performSearch was not called
|
||||||
|
expect(jest.spyOn(searchService as any, 'performSearch')).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle rate limiting', async () => {
|
||||||
|
const searchService = new SearchService();
|
||||||
|
|
||||||
|
// Mock performSearch to throw rate limit error
|
||||||
|
jest.spyOn(searchService as any, 'performSearch')
|
||||||
|
.mockRejectedValue({ response: { status: 429 } });
|
||||||
|
|
||||||
|
const query = 'plumber in Denver';
|
||||||
|
const location = 'Denver, CO';
|
||||||
|
|
||||||
|
await expect(searchService.search(query, location))
|
||||||
|
.rejects.toThrow('Rate limit exceeded');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Data Consistency', () => {
|
||||||
|
it('should maintain data consistency between search and retrieval', async () => {
|
||||||
|
const searchService = new SearchService();
|
||||||
|
const query = 'plumber in Denver';
|
||||||
|
const location = 'Denver, CO';
|
||||||
|
|
||||||
|
// Mock performSearch to return results
|
||||||
|
jest.spyOn(searchService as any, 'performSearch')
|
||||||
|
.mockResolvedValue([mockBusiness]);
|
||||||
|
|
||||||
|
// Perform search
|
||||||
|
const searchResults = await searchService.search(query, location);
|
||||||
|
const firstResult = searchResults[0];
|
||||||
|
|
||||||
|
// Mock database retrieval
|
||||||
|
mockSupabase.from.mockReturnValueOnce({
|
||||||
|
select: jest.fn().mockReturnValue({
|
||||||
|
eq: jest.fn().mockReturnValue({
|
||||||
|
single: jest.fn().mockResolvedValue({
|
||||||
|
data: firstResult,
|
||||||
|
error: null
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
// Retrieve the same business
|
||||||
|
const retrieved = await searchService.getBusinessById(firstResult.id);
|
||||||
|
|
||||||
|
// Verify data consistency
|
||||||
|
expect(retrieved).toEqual(firstResult);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
22
src/tests/setup.ts
Normal file
22
src/tests/setup.ts
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
// Load environment variables for testing
|
||||||
|
dotenv.config({ path: '.env.test' });
|
||||||
|
|
||||||
|
// Set default timeout for all tests
|
||||||
|
jest.setTimeout(10000);
|
||||||
|
|
||||||
|
// Global setup
|
||||||
|
beforeAll(() => {
|
||||||
|
// Add any global setup here
|
||||||
|
});
|
||||||
|
|
||||||
|
// Global teardown
|
||||||
|
afterAll(() => {
|
||||||
|
// Add any global cleanup here
|
||||||
|
});
|
||||||
|
|
||||||
|
// Reset mocks between tests
|
||||||
|
afterEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
});
|
202
src/tests/testDeepseek.ts
Normal file
202
src/tests/testDeepseek.ts
Normal file
|
@ -0,0 +1,202 @@
|
||||||
|
import { DeepSeekService } from '../lib/services/deepseekService';
|
||||||
|
import { Business } from '../lib/types';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
async function testOllamaConnection() {
|
||||||
|
console.log('🔍 Testing Ollama connection...\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test simple connection
|
||||||
|
console.log('Testing Qwen model...');
|
||||||
|
const response = await DeepSeekService['chat']([{
|
||||||
|
role: 'user',
|
||||||
|
content: 'Say "Hello, testing Qwen model!"'
|
||||||
|
}]);
|
||||||
|
|
||||||
|
console.log('✅ Model Response:', response);
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error) {
|
||||||
|
console.error('❌ Connection test failed:', error.message);
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
if (error.code === 'ECONNREFUSED') {
|
||||||
|
console.error('❌ Make sure Ollama is running (ollama serve)');
|
||||||
|
} else {
|
||||||
|
console.error('API Error details:', error.response?.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error('❌ Connection test failed with unknown error');
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testDataCleaning() {
|
||||||
|
console.log('\n🧪 Testing business data cleaning...');
|
||||||
|
|
||||||
|
const testCases: Business[] = [
|
||||||
|
{
|
||||||
|
id: 'test_1',
|
||||||
|
name: "Denver's Best Plumbing & Repair [LLC] (A Family Business) {Est. 1995}",
|
||||||
|
address: "CONTACT US TODAY! Suite 200-B, 1234 Main Street, Denver, Colorado 80202 (Near Starbucks)",
|
||||||
|
phone: "☎️ Main: (720) 555-1234 | Emergency: 1-800-555-9999 | Text: 720.555.4321",
|
||||||
|
email: "[support@denverplumbing.com](mailto:support@denverplumbing.com) or info@denverplumbing.com",
|
||||||
|
description: `$$$ LIMITED TIME OFFER $$$
|
||||||
|
🚰 Professional plumbing services in Denver metro area
|
||||||
|
💰 20% OFF all repairs over $500!
|
||||||
|
⭐️ Family owned since 1995
|
||||||
|
📞 Available 24/7 for emergencies
|
||||||
|
🌐 Visit www.denverplumbing.com
|
||||||
|
📧 Email us at contact@denverplumbing.com
|
||||||
|
💳 All major credit cards accepted
|
||||||
|
#DenverPlumbing #EmergencyService`,
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://example.com',
|
||||||
|
rating: 4.8,
|
||||||
|
logo: 'logo.png',
|
||||||
|
location: { lat: 39.7392, lng: -104.9903 },
|
||||||
|
openingHours: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'test_2',
|
||||||
|
name: "[MIKE'S AUTO] {{CERTIFIED}} [BMW & AUDI SPECIALIST]",
|
||||||
|
address: "GET DIRECTIONS: 5678 Auto Row Drive\nUnit C-123\nDenver, CO 80205\nBehind Home Depot",
|
||||||
|
phone: "Sales: 303-555-0000\nService: (303) 555-1111\nFax: 303.555.2222",
|
||||||
|
email: "appointments@mikesauto.com <click to email> [Schedule Now](https://booking.mikesauto.com)",
|
||||||
|
description: `🚗 Denver's Premier Auto Service Center
|
||||||
|
💯 ASE Certified Mechanics
|
||||||
|
🔧 Specializing in German Luxury Vehicles
|
||||||
|
💰💰💰 Spring Special: Free oil change with any service over $300
|
||||||
|
⚡️ Same-day service available
|
||||||
|
🎯 Located in central Denver
|
||||||
|
📱 Text "REPAIR" to 80205 for $50 off
|
||||||
|
⭐️⭐️⭐️⭐️⭐️ Over 500 5-star reviews!`,
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://mikesauto.com',
|
||||||
|
rating: 4.9,
|
||||||
|
logo: 'logo.png',
|
||||||
|
location: { lat: 39.7599, lng: -104.9987 },
|
||||||
|
openingHours: ['Mon-Fri 8-6', 'Sat 9-3']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'test_3',
|
||||||
|
name: "🌟 SUNSHINE DENTAL & ORTHODONTICS, P.C. [Dr. Smith & Associates] (Voted #1)",
|
||||||
|
address: "SCHEDULE TODAY!\n🦷 Building 3, Suite 300\n9876 Medical Plaza Way\nDENVER COLORADO, 80210\nNext to Target",
|
||||||
|
phone: "📞 New Patients: 1 (720) 999-8888 | Existing: 720.999.7777 | After Hours: +1-720-999-6666",
|
||||||
|
email: "appointments@sunshinedentalco.com, info@sunshinedentalco.com, emergency@sunshinedentalco.com",
|
||||||
|
description: `✨ Your Premier Dental Care Provider in Denver! ✨
|
||||||
|
🦷 State-of-the-art facility
|
||||||
|
💎 Cosmetic & General Dentistry
|
||||||
|
👶 Family-friendly environment
|
||||||
|
💰 NEW PATIENT SPECIAL: $99 Cleaning & Exam (Reg. $299)
|
||||||
|
🏥 Most insurance accepted
|
||||||
|
⭐️ 1,000+ 5-star reviews on Google
|
||||||
|
🎁 Refer a friend and get $50 credit
|
||||||
|
📱 Download our app: smile.sunshinedentalco.com`,
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://sunshinedentalco.com',
|
||||||
|
rating: 5.0,
|
||||||
|
logo: 'logo.png',
|
||||||
|
location: { lat: 39.7120, lng: -104.9412 },
|
||||||
|
openingHours: ['Mon-Thu 8-5', 'Fri 8-2', 'Sat By Appt']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'test_4',
|
||||||
|
name: "THE COFFEE SPOT ☕️ {{NOW OPEN}} [Under New Management!]",
|
||||||
|
address: "ORDER PICKUP:\nGround Floor\n4321 Downtown Street\nDenver, CO. 80203\nInside Union Station",
|
||||||
|
phone: "☎️ Store: 303•777•5555\n💬 Text Orders: 303-777-4444",
|
||||||
|
email: "<Order Online> orders@thecoffeespot.co [Click Here](https://order.thecoffeespot.co)",
|
||||||
|
description: `☕️ Denver's Favorite Coffee Shop Since 2020!
|
||||||
|
🌱 Organic, Fair-Trade Coffee
|
||||||
|
🥐 Fresh-Baked Pastries Daily
|
||||||
|
⚡️ MORNING RUSH SPECIAL: $2 off any drink before 9am!
|
||||||
|
🎯 Loyalty Program: Buy 9, Get 1 FREE
|
||||||
|
📱 Order ahead on our app
|
||||||
|
🎁 Student Discount: 10% off with ID
|
||||||
|
#CoffeeLovers #DenverCoffee #MorningFuel
|
||||||
|
Follow us @thecoffeespot for daily specials!`,
|
||||||
|
source: 'test',
|
||||||
|
website: 'https://thecoffeespot.co',
|
||||||
|
rating: 4.7,
|
||||||
|
logo: 'logo.png',
|
||||||
|
location: { lat: 39.7508, lng: -104.9997 },
|
||||||
|
openingHours: ['Mon-Fri 6-8', 'Sat-Sun 7-7']
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const testCase of testCases) {
|
||||||
|
console.log('\nTesting case:', testCase.id);
|
||||||
|
console.log('Input data:', JSON.stringify(testCase, null, 2));
|
||||||
|
|
||||||
|
console.time('Cleaning Duration');
|
||||||
|
const cleaned = await DeepSeekService.cleanBusinessData(testCase);
|
||||||
|
console.timeEnd('Cleaning Duration');
|
||||||
|
|
||||||
|
console.log('\nCleaned data:', JSON.stringify(cleaned, null, 2));
|
||||||
|
|
||||||
|
// Validate the results
|
||||||
|
const validationIssues = [];
|
||||||
|
|
||||||
|
// Name validation
|
||||||
|
if (cleaned.name?.match(/[\[\]{}()]/)) {
|
||||||
|
validationIssues.push('Name contains brackets/braces/parentheses');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Address validation
|
||||||
|
if (!cleaned.address?.match(/^\d+[^,]+,\s*[^,]+,\s*[A-Z]{2}\s+\d{5}$/)) {
|
||||||
|
validationIssues.push('Address format incorrect');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phone validation
|
||||||
|
if (!cleaned.phone?.match(/^\(\d{3}\) \d{3}-\d{4}$/)) {
|
||||||
|
validationIssues.push('Phone format incorrect');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Email validation
|
||||||
|
if (cleaned.email?.match(/[\[\]<>()]|mailto:|click|schedule/i)) {
|
||||||
|
validationIssues.push('Email contains formatting/links');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Description validation
|
||||||
|
const descriptionIssues = [];
|
||||||
|
if (cleaned.description?.match(/[\$\d]+%?\s*off|\$/i)) {
|
||||||
|
descriptionIssues.push('contains pricing');
|
||||||
|
}
|
||||||
|
if (cleaned.description?.match(/\b(?:call|email|visit|contact|text|www\.|http|@)\b/i)) {
|
||||||
|
descriptionIssues.push('contains contact info');
|
||||||
|
}
|
||||||
|
if (cleaned.description?.match(/[📞📧🌐💳☎️📱]/)) {
|
||||||
|
descriptionIssues.push('contains emojis');
|
||||||
|
}
|
||||||
|
if (cleaned.description?.match(/#\w+/)) {
|
||||||
|
descriptionIssues.push('contains hashtags');
|
||||||
|
}
|
||||||
|
if (descriptionIssues.length > 0) {
|
||||||
|
validationIssues.push(`Description ${descriptionIssues.join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (validationIssues.length > 0) {
|
||||||
|
console.log('\n⚠️ Validation issues:', validationIssues.join(', '));
|
||||||
|
} else {
|
||||||
|
console.log('\n✅ All fields cleaned successfully');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runTests() {
|
||||||
|
console.log('🚀 Starting Qwen model tests...\n');
|
||||||
|
|
||||||
|
const connectionSuccess = await testOllamaConnection();
|
||||||
|
if (!connectionSuccess) {
|
||||||
|
console.log('❌ Stopping tests due to connection failure');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await testDataCleaning();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests if this file is executed directly
|
||||||
|
if (require.main === module) {
|
||||||
|
runTests().catch(console.error);
|
||||||
|
}
|
15
ui/components/BusinessList.tsx
Normal file
15
ui/components/BusinessList.tsx
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
const BusinessList = ({ businesses }: { businesses: BusinessData[] }) => {
|
||||||
|
console.log('Rendering BusinessList with:', businesses);
|
||||||
|
|
||||||
|
if (!businesses.length) {
|
||||||
|
return <div>No businesses found</div>;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
|
{businesses.map(business => (
|
||||||
|
<BusinessCard key={business.id} business={business} />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
148
ui/components/BusinessResults.tsx
Normal file
148
ui/components/BusinessResults.tsx
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
import React, { useState } from 'react';
|
||||||
|
import { Business } from '../../types/business';
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
businesses: Business[];
|
||||||
|
onExport: (format: 'csv' | 'json') => void;
|
||||||
|
onSearch: (query: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const BusinessResults: React.FC<Props> = ({ businesses, onExport, onSearch }) => {
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [progress, setProgress] = useState({ status: '', percent: 0 });
|
||||||
|
const [searchResults, setSearchResults] = useState<Business[]>([]);
|
||||||
|
|
||||||
|
const handleSearchResponse = (data: any) => {
|
||||||
|
console.log('Received search response:', data);
|
||||||
|
|
||||||
|
if (data.type === 'error') {
|
||||||
|
setError(data.error);
|
||||||
|
setLoading(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.type === 'progress') {
|
||||||
|
setProgress({ status: data.status, percent: data.progress });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.type === 'results') {
|
||||||
|
console.log('Setting results:', data.results);
|
||||||
|
setSearchResults(data.results);
|
||||||
|
onSearch(data.results); // Pass results up to parent
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSearch = async (query: string) => {
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
setProgress({ status: 'Starting search...', percent: 0 });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`http://localhost:3000/api/search?q=${encodeURIComponent(query)}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
Accept: 'application/json',
|
||||||
|
'Cache-Control': 'no-cache'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const reader = response.body?.getReader();
|
||||||
|
if (!reader) throw new Error('No response body');
|
||||||
|
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = '';
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buffer.split('\n');
|
||||||
|
buffer = lines.pop() || '';
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
try {
|
||||||
|
if (line.trim()) {
|
||||||
|
const data = JSON.parse(line);
|
||||||
|
handleSearchResponse(data);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Error parsing JSON:', e, 'Line:', line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Search error:', error);
|
||||||
|
setError('Failed to fetch results');
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="business-results">
|
||||||
|
<div className="search-controls">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Search businesses..."
|
||||||
|
onKeyPress={(e) => {
|
||||||
|
if (e.key === 'Enter') {
|
||||||
|
handleSearch(e.currentTarget.value);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
{loading && (
|
||||||
|
<div className="progress">
|
||||||
|
{progress.status} ({progress.percent}%)
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{error && (
|
||||||
|
<div className="error">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="export-controls">
|
||||||
|
<button onClick={() => onExport('csv')}>Export CSV</button>
|
||||||
|
<button onClick={() => onExport('json')}>Export JSON</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<table className="business-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Business Name</th>
|
||||||
|
<th>Contact</th>
|
||||||
|
<th>Address</th>
|
||||||
|
<th>Rating</th>
|
||||||
|
<th>Website</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{(searchResults.length ? searchResults : businesses).map(business => (
|
||||||
|
<tr key={business.id}>
|
||||||
|
<td>{business.name}</td>
|
||||||
|
<td>
|
||||||
|
{business.phone}<br/>
|
||||||
|
{business.email}
|
||||||
|
</td>
|
||||||
|
<td>{business.address}</td>
|
||||||
|
<td>{business.rating}/5</td>
|
||||||
|
<td>
|
||||||
|
{business.website && (
|
||||||
|
<a href={business.website} target="_blank" rel="noopener noreferrer">
|
||||||
|
Visit Website
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
Loading…
Add table
Reference in a new issue