test: add CI/CD workflow
This commit is contained in:
parent
66d44c0774
commit
ce97671da3
28 changed files with 11684 additions and 1199 deletions
|
@ -1,4 +1,4 @@
|
|||
import { OllamaService } from './ollamaService';
|
||||
import { DeepSeekService } from './deepseekService';
|
||||
import { Business } from '../types';
|
||||
import { db } from './databaseService';
|
||||
|
||||
|
@ -21,7 +21,7 @@ export class CleanupService {
|
|||
setTimeout(() => reject(new Error('LLM timeout')), LLM_TIMEOUT);
|
||||
});
|
||||
|
||||
const llmPromise = OllamaService.chat([{
|
||||
const llmPromise = DeepSeekService.chat([{
|
||||
role: 'user',
|
||||
content: prompt
|
||||
}]);
|
||||
|
@ -205,58 +205,17 @@ export class CleanupService {
|
|||
return cached;
|
||||
}
|
||||
|
||||
const combinedPrompt = `
|
||||
Clean and format the following business information. For each field, follow the format shown in the examples.
|
||||
The business type appears to be: ${business.name.toLowerCase().includes('restaurant') ? 'restaurant' :
|
||||
business.name.toLowerCase().includes('plumb') ? 'plumber' :
|
||||
business.name.toLowerCase().includes('electric') ? 'electrician' : 'business'}
|
||||
|
||||
Return each field on a new line with the field name followed by a colon.
|
||||
Only return valid data - if something looks wrong or invalid, return an empty string.
|
||||
|
||||
Examples for address:
|
||||
Input: "Sure! Here is the business address in Denver, CO:\\n\\n14100 W 7th Ave, Golden CO 80401"
|
||||
Output: 14100 W 7th Ave, Golden, CO 80401
|
||||
|
||||
Examples for phone:
|
||||
Input: "7203796281"
|
||||
Output: (720) 379-6281
|
||||
Input: "N/A" or "none"
|
||||
Output:
|
||||
|
||||
Examples for email:
|
||||
Input: "379-6281info@brutalpoodledenver.com"
|
||||
Output: info@brutalpoodledenver.com
|
||||
Input: "top-seo-img@2x.jpg" or "Union Office" or "[email]" or "None"
|
||||
Output:
|
||||
|
||||
Examples for description:
|
||||
Input: "The Brutal Noodle $14.00 Beef bone broth, smoked brisket, rice noodles, all the fixins. (GF) Vegan available with tofu & veggie broth $11"
|
||||
Output: Asian fusion restaurant serving bone broth noodles with brisket and vegan options.
|
||||
Input: "Our Denver-based expert plumbers can repair or install any fixture. Commercial services: We're ready to keep your plumbing system operating safely."
|
||||
Output: Professional plumbing services for residential and commercial properties in Denver.
|
||||
|
||||
Business name for context: "${business.name}"
|
||||
Website for context: "${business.website}"
|
||||
|
||||
Now clean these fields:
|
||||
Address: "${business.address}"
|
||||
Phone: "${business.phone}"
|
||||
Email: "${business.email}"
|
||||
Description: "${business.description}"
|
||||
`;
|
||||
|
||||
const response = await this.cleanWithLLM(combinedPrompt, business);
|
||||
const parsed = this.parseResponse(response);
|
||||
const cleaned = this.validateAndClean({ ...business, ...parsed });
|
||||
// Clean using DeepSeek
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(business);
|
||||
const validated = this.validateAndClean({ ...business, ...cleaned });
|
||||
|
||||
// Only cache if confidence score is high enough
|
||||
const confidence = this.calculateConfidenceScore(cleaned);
|
||||
const confidence = this.calculateConfidenceScore(validated);
|
||||
if (confidence >= MIN_CONFIDENCE_SCORE) {
|
||||
await db.saveToCache(cacheKey, cleaned, 24 * 60 * 60 * 1000);
|
||||
await db.saveToCache(cacheKey, validated, 24 * 60 * 60 * 1000);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
return validated;
|
||||
}
|
||||
|
||||
static async cleanBusinessRecords(businesses: Business[]): Promise<Business[]> {
|
||||
|
|
|
@ -20,20 +20,29 @@ export class DatabaseService {
|
|||
}
|
||||
|
||||
async searchBusinesses(query: string, location: string): Promise<BusinessData[]> {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.select('*')
|
||||
.textSearch('name', query)
|
||||
.textSearch('address', location)
|
||||
.order('search_count', { ascending: false })
|
||||
.limit(env.cache.maxResultsPerQuery);
|
||||
try {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.select('*')
|
||||
.or(
|
||||
`name.ilike.%${query}%,` +
|
||||
`description.ilike.%${query}%`
|
||||
)
|
||||
.ilike('address', `%${location}%`)
|
||||
.order('search_count', { ascending: false })
|
||||
.limit(env.cache.maxResultsPerQuery);
|
||||
|
||||
if (error) {
|
||||
if (error) {
|
||||
console.error('Error searching businesses:', error);
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(`Found ${data?.length || 0} businesses in database`);
|
||||
return data || [];
|
||||
} catch (error) {
|
||||
console.error('Error searching businesses:', error);
|
||||
throw error;
|
||||
return [];
|
||||
}
|
||||
|
||||
return data || [];
|
||||
}
|
||||
|
||||
async saveBusiness(business: Partial<BusinessData>): Promise<void> {
|
||||
|
@ -135,6 +144,21 @@ export class DatabaseService {
|
|||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async clearCache(pattern?: string): Promise<void> {
|
||||
try {
|
||||
const query = pattern ?
|
||||
'DELETE FROM cache WHERE key LIKE $1' :
|
||||
'DELETE FROM cache';
|
||||
|
||||
await this.supabase
|
||||
.from('cache')
|
||||
.delete()
|
||||
.or(pattern ? `key LIKE $1` : '');
|
||||
} catch (error) {
|
||||
console.error('Error clearing cache:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const db = new DatabaseService();
|
460
src/lib/services/deepseekService.ts
Normal file
460
src/lib/services/deepseekService.ts
Normal file
|
@ -0,0 +1,460 @@
|
|||
import axios from 'axios';
|
||||
import { env } from '../../config/env';
|
||||
import { Business } from '../types';
|
||||
|
||||
export class DeepSeekService {
|
||||
private static OLLAMA_URL = 'http://localhost:11434/api/generate';
|
||||
private static MODEL_NAME = 'qwen2:0.5b';
|
||||
private static MAX_ATTEMPTS = 3; // Prevent infinite loops
|
||||
|
||||
private static async retryWithBackoff(fn: () => Promise<any>, retries = 5) {
|
||||
for (let i = 0; i < retries; i++) {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (error) {
|
||||
if (i === retries - 1) throw error;
|
||||
|
||||
// Longer backoff for timeouts
|
||||
const isTimeout = axios.isAxiosError(error) && error.code === 'ECONNABORTED';
|
||||
const delay = isTimeout ?
|
||||
Math.pow(2, i) * 5000 : // 5s, 10s, 20s, 40s, 80s for timeouts
|
||||
Math.pow(2, i) * 1000; // 1s, 2s, 4s, 8s, 16s for other errors
|
||||
|
||||
console.log(`Retry ${i + 1}/${retries} after ${delay/1000}s...`);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static cleanAddress(address: string): string {
|
||||
// Remove marketing and extra info first
|
||||
let cleaned = address
|
||||
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '') // Remove emojis
|
||||
.replace(/(?:GET|ORDER|SCHEDULE|CONTACT|DIRECTIONS).*?[:!\n]/i, '') // Remove action words
|
||||
.replace(/\([^)]*\)/g, '') // Remove parenthetical info
|
||||
.replace(/(?:Next|Behind|Inside|Near).*$/im, '') // Remove location hints
|
||||
.split(/[\n\r]+/) // Split into lines
|
||||
.map(line => line.trim())
|
||||
.filter(Boolean); // Remove empty lines
|
||||
|
||||
// Try to find the line with street address
|
||||
for (const line of cleaned) {
|
||||
// Common address patterns
|
||||
const patterns = [
|
||||
// Handle suite/unit in street address
|
||||
/(\d+[^,]+?(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i,
|
||||
|
||||
// Basic format
|
||||
/(\d+[^,]+?),\s*([^,]+?),\s*(?:CO|Colorado|COLORADO)[,\s]+(\d{5})/i,
|
||||
|
||||
// No commas
|
||||
/(\d+[^,]+?)\s+([^,]+?)\s+(?:CO|Colorado|COLORADO)\s+(\d{5})/i,
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = line.match(pattern);
|
||||
if (match) {
|
||||
const [_, street, city, zip] = match;
|
||||
|
||||
// Clean and capitalize street address
|
||||
const cleanedStreet = street
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/(\d+)/, '$1 ') // Add space after number
|
||||
.split(' ')
|
||||
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||
.join(' ');
|
||||
|
||||
// Capitalize city
|
||||
const cleanedCity = city.trim()
|
||||
.split(' ')
|
||||
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||
.join(' ');
|
||||
|
||||
return `${cleanedStreet}, ${cleanedCity}, CO ${zip}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no match found, try to extract components
|
||||
const streetLine = cleaned.find(line => /\d+/.test(line));
|
||||
if (streetLine) {
|
||||
const streetMatch = streetLine.match(/(\d+[^,\n]+?)(?:\s+(?:Suite|Ste|Unit|Apt|Building|Bldg|#)\s*[-A-Z0-9]+)?/i);
|
||||
const zipMatch = cleaned.join(' ').match(/\b(\d{5})\b/);
|
||||
|
||||
if (streetMatch && zipMatch) {
|
||||
const street = streetMatch[0].trim();
|
||||
const zip = zipMatch[1];
|
||||
|
||||
return `${street}, Denver, CO ${zip}`;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
private static manualClean(business: Partial<Business>): Partial<Business> {
|
||||
const cleaned = { ...business };
|
||||
|
||||
// Clean address
|
||||
if (cleaned.address) {
|
||||
const cleanedAddress = this.cleanAddress(cleaned.address);
|
||||
if (cleanedAddress) {
|
||||
cleaned.address = cleanedAddress;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract business type first
|
||||
const businessType = this.detectBusinessType(cleaned.name || '');
|
||||
|
||||
// Clean name while preserving core identity
|
||||
if (cleaned.name) {
|
||||
cleaned.name = cleaned.name
|
||||
// Remove emojis and special characters
|
||||
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||
// Remove bracketed content but preserve important terms
|
||||
.replace(/\s*[\[\({](?!(?:BMW|Mercedes|Audi|specialist|certified)).*?[\]\)}]\s*/gi, ' ')
|
||||
// Remove business suffixes
|
||||
.replace(/\b(?:LLC|Inc|Corp|Ltd|DBA|Est\.|Since|P\.?C\.?)\b\.?\s*\d*/gi, '')
|
||||
// Clean up and normalize
|
||||
.replace(/[^\w\s&'-]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.replace(/^THE\s+/i, ''); // Remove leading "THE"
|
||||
}
|
||||
|
||||
// Clean phone - handle multiple numbers and formats
|
||||
if (cleaned.phone) {
|
||||
// Remove emojis and special characters first
|
||||
const cleanPhone = cleaned.phone
|
||||
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||
.replace(/[^\d]/g, '');
|
||||
|
||||
const phoneNumbers = cleanPhone.match(/\d{10,}/g);
|
||||
if (phoneNumbers?.[0]) {
|
||||
const mainNumber = phoneNumbers[0].slice(0, 10); // Ensure exactly 10 digits
|
||||
cleaned.phone = `(${mainNumber.slice(0,3)}) ${mainNumber.slice(3,6)}-${mainNumber.slice(6,10)}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean email - handle multiple emails and formats
|
||||
if (cleaned.email) {
|
||||
const emailMatch = cleaned.email.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/);
|
||||
if (emailMatch?.[1]) {
|
||||
cleaned.email = emailMatch[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Improved description cleaning
|
||||
if (cleaned.description) {
|
||||
const coreDescription = this.extractCoreDescription(cleaned.description, businessType);
|
||||
cleaned.description = coreDescription;
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
private static detectBusinessType(name: string): string {
|
||||
const types = {
|
||||
auto: /\b(?:auto|car|vehicle|BMW|Audi|Mercedes|mechanic|repair|service center)\b/i,
|
||||
dental: /\b(?:dental|dentist|orthodontic|smile|tooth|teeth)\b/i,
|
||||
coffee: /\b(?:coffee|cafe|espresso|roaster|brew)\b/i,
|
||||
plumbing: /\b(?:plumb|plumbing|rooter|drain|pipe)\b/i,
|
||||
restaurant: /\b(?:restaurant|grill|cuisine|bistro|kitchen)\b/i,
|
||||
};
|
||||
|
||||
for (const [type, pattern] of Object.entries(types)) {
|
||||
if (pattern.test(name)) return type;
|
||||
}
|
||||
return 'business';
|
||||
}
|
||||
|
||||
private static extractCoreDescription(description: string, businessType: string): string {
|
||||
// Remove all marketing and formatting first
|
||||
let cleaned = description
|
||||
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||
.replace(/\$+\s*[^\s]*\s*(off|special|offer|deal|save|discount|price|cost|free)/gi, '')
|
||||
.replace(/\b(?:call|email|visit|contact|text|www\.|http|@|book|schedule|appointment)\b.*$/gi, '')
|
||||
.replace(/#\w+/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
// Extract relevant information based on business type
|
||||
const typePatterns: { [key: string]: RegExp[] } = {
|
||||
auto: [
|
||||
/(?:specialist|specializing)\s+in\s+[^.]+/i,
|
||||
/(?:certified|ASE)[^.]+mechanic[^.]+/i,
|
||||
/(?:auto|car|vehicle)\s+(?:service|repair)[^.]+/i
|
||||
],
|
||||
dental: [
|
||||
/(?:dental|orthodontic)\s+(?:care|services)[^.]+/i,
|
||||
/(?:family|cosmetic|general)\s+dentistry[^.]+/i,
|
||||
/state-of-the-art\s+facility[^.]+/i
|
||||
],
|
||||
coffee: [
|
||||
/(?:coffee|espresso|pastry|cafe)[^.]+/i,
|
||||
/(?:organic|fair-trade|fresh)[^.]+/i,
|
||||
/(?:local|favorite|community)[^.]+coffee[^.]+/i
|
||||
],
|
||||
plumbing: [
|
||||
/(?:plumbing|drain|pipe)\s+(?:service|repair)[^.]+/i,
|
||||
/(?:professional|expert|master)\s+plumb[^.]+/i,
|
||||
/(?:residential|commercial)\s+plumbing[^.]+/i
|
||||
]
|
||||
};
|
||||
|
||||
const relevantPhrases = typePatterns[businessType]?.map(pattern => {
|
||||
const match = cleaned.match(pattern);
|
||||
return match ? match[0] : '';
|
||||
}).filter(Boolean) || [];
|
||||
|
||||
if (relevantPhrases.length > 0) {
|
||||
return relevantPhrases.join('. ');
|
||||
}
|
||||
|
||||
// Fallback to generic description
|
||||
return `Professional ${businessType} services in Denver area`;
|
||||
}
|
||||
|
||||
private static sanitizeJsonResponse(response: string): string {
|
||||
return response
|
||||
// Remove emojis
|
||||
.replace(/[\u{1F300}-\u{1F9FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]/gu, '')
|
||||
// Remove control characters
|
||||
.replace(/[\u0000-\u001F\u007F-\u009F]/g, '')
|
||||
// Clean up newlines and spaces
|
||||
.replace(/\r?\n\s*/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
static async cleanBusinessData(business: Business, attempt = 0): Promise<Business> {
|
||||
if (attempt >= this.MAX_ATTEMPTS) {
|
||||
console.log('Max cleaning attempts reached, applying manual cleaning...');
|
||||
return {
|
||||
...business,
|
||||
...this.manualClean(business)
|
||||
};
|
||||
}
|
||||
|
||||
// Detect business type first
|
||||
const businessType = this.detectBusinessType(business.name || '');
|
||||
|
||||
const requestId = Math.random().toString(36).substring(7);
|
||||
const prompt = `<|im_start|>system
|
||||
You are a data cleaning expert. Clean the business data while preserving its core identity and type.
|
||||
Request ID: ${requestId} // Force uniqueness
|
||||
IMPORTANT: Return ONLY plain text without emojis or special characters.
|
||||
<|im_end|>
|
||||
<|im_start|>user
|
||||
Clean this ${businessType} business data by following these rules exactly:
|
||||
|
||||
Input Business:
|
||||
${JSON.stringify(business, null, 2)}
|
||||
|
||||
Cleaning Rules:
|
||||
1. NAME: Remove brackets/braces but preserve core business identity
|
||||
2. ADDRESS: Format as "street, city, state zip" using state abbreviations
|
||||
3. PHONE: Extract and format primary phone as "(XXX) XXX-XXXX"
|
||||
4. EMAIL: Remove markdown/mailto formatting but keep actual email
|
||||
5. DESCRIPTION: Keep core business info but remove:
|
||||
- ALL emojis and special characters (return plain text only)
|
||||
- Prices and special offers
|
||||
- Contact information
|
||||
- Marketing language
|
||||
- Social media elements
|
||||
|
||||
Return ONLY clean JSON with the original business identity preserved:
|
||||
{
|
||||
"business_info": {
|
||||
"name": "Keep original business name without formatting",
|
||||
"address": "Keep original address, properly formatted",
|
||||
"phone": "Keep original phone number, properly formatted",
|
||||
"email": "Keep original email without formatting",
|
||||
"description": "Keep original business description without marketing"
|
||||
}
|
||||
}
|
||||
<|im_end|>`;
|
||||
|
||||
const response = await this.chat([{
|
||||
role: 'user',
|
||||
content: prompt
|
||||
}]);
|
||||
|
||||
try {
|
||||
const jsonMatch = response.match(/\{[\s\S]*?\}\s*$/);
|
||||
if (!jsonMatch) {
|
||||
throw new Error('No JSON found in response');
|
||||
}
|
||||
|
||||
const sanitizedJson = this.sanitizeJsonResponse(jsonMatch[0]);
|
||||
const parsed = JSON.parse(sanitizedJson);
|
||||
const cleaned = {
|
||||
...business,
|
||||
...parsed.business_info
|
||||
};
|
||||
|
||||
// Validate and handle type mismatches more strictly
|
||||
const validationIssues = this.validateCleanedData(cleaned, business);
|
||||
|
||||
if (validationIssues.length > 0) {
|
||||
console.log(`Attempt ${attempt + 1}: Validation issues:`, validationIssues.join(', '));
|
||||
|
||||
// If there's a business type mismatch, go straight to manual cleaning
|
||||
if (validationIssues.some(issue => issue.includes('Business type mismatch'))) {
|
||||
console.log('Business type mismatch detected, applying manual cleaning...');
|
||||
return {
|
||||
...business,
|
||||
...this.manualClean(business)
|
||||
};
|
||||
}
|
||||
|
||||
// For other validation issues, try again
|
||||
return this.cleanBusinessData(cleaned, attempt + 1);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
} catch (error) {
|
||||
console.error('Failed to parse response:', error);
|
||||
console.log('Raw response:', response);
|
||||
|
||||
// Try to sanitize and parse the whole response
|
||||
try {
|
||||
const sanitized = this.sanitizeJsonResponse(response);
|
||||
const fallback = this.parseResponse(sanitized);
|
||||
return this.cleanBusinessData({ ...business, ...fallback }, attempt + 1);
|
||||
} catch (parseError) {
|
||||
console.error('Failed to parse sanitized response:', parseError);
|
||||
return this.cleanBusinessData({ ...business, ...this.manualClean(business) }, attempt + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static validateCleanedData(business: Partial<Business>, originalBusiness: Business): string[] {
|
||||
const issues: string[] = [];
|
||||
|
||||
// Stricter business type validation
|
||||
const originalType = this.detectBusinessType(originalBusiness.name || '');
|
||||
const cleanedType = this.detectBusinessType(business.name || '');
|
||||
|
||||
if (originalType !== 'business') {
|
||||
if (cleanedType !== originalType) {
|
||||
issues.push(`Business type mismatch: expected ${originalType}, got ${cleanedType}`);
|
||||
}
|
||||
|
||||
// Verify core identity is preserved
|
||||
const originalKeywords = originalBusiness.name?.toLowerCase().split(/\W+/).filter(Boolean) || [];
|
||||
const cleanedKeywords = business.name?.toLowerCase().split(/\W+/).filter(Boolean) || [];
|
||||
|
||||
const significantKeywords = originalKeywords.filter(word =>
|
||||
!['the', 'and', 'llc', 'inc', 'corp', 'ltd', 'dba', 'est'].includes(word)
|
||||
);
|
||||
|
||||
const missingKeywords = significantKeywords.filter(word =>
|
||||
!cleanedKeywords.some(cleaned => cleaned.includes(word))
|
||||
);
|
||||
|
||||
if (missingKeywords.length > 0) {
|
||||
issues.push(`Core business identity lost: missing ${missingKeywords.join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (business.name?.includes('[') || business.name?.includes(']')) {
|
||||
issues.push('Name contains brackets');
|
||||
}
|
||||
|
||||
if (!business.address?.match(/^\d+[^,]+,\s*[^,]+,\s*[A-Z]{2}\s+\d{5}$/)) {
|
||||
const cleanedAddress = this.cleanAddress(business.address || '');
|
||||
if (cleanedAddress) {
|
||||
business.address = cleanedAddress;
|
||||
} else {
|
||||
issues.push('Address format incorrect');
|
||||
}
|
||||
}
|
||||
|
||||
if (!business.phone?.match(/^\(\d{3}\) \d{3}-\d{4}$/)) {
|
||||
issues.push('Phone format incorrect');
|
||||
}
|
||||
|
||||
if (business.email?.includes('[') || business.email?.includes('mailto:')) {
|
||||
issues.push('Email contains markdown/mailto');
|
||||
}
|
||||
|
||||
if (business.description?.match(/\$|\b(?:call|email|visit|contact)\b/i)) {
|
||||
issues.push('Description contains pricing or contact info');
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
private static async chat(messages: { role: string, content: string }[]) {
|
||||
return this.retryWithBackoff(async () => {
|
||||
try {
|
||||
const response = await axios.post(
|
||||
this.OLLAMA_URL,
|
||||
{
|
||||
model: this.MODEL_NAME,
|
||||
prompt: messages[0].content,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.7, // Add some randomness
|
||||
num_predict: 2048,
|
||||
stop: ["<|im_end|>", "\n\n"],
|
||||
top_k: 40, // Allow more variety
|
||||
top_p: 0.9, // Allow more variety
|
||||
seed: Date.now(), // Force different results each time
|
||||
reset: true // Reset context window
|
||||
}
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
timeout: 30000
|
||||
}
|
||||
);
|
||||
|
||||
return response.data.response;
|
||||
} catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.code === 'ECONNREFUSED') {
|
||||
throw new Error('Ollama server not running');
|
||||
}
|
||||
if (error.response?.status === 404) {
|
||||
throw new Error(`Model ${this.MODEL_NAME} not found. Run: ollama pull ${this.MODEL_NAME}`);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static parseResponse(response: string) {
|
||||
const lines = response.split('\n');
|
||||
const cleaned: Partial<Business> = {};
|
||||
|
||||
for (const line of lines) {
|
||||
const [field, ...values] = line.split(':');
|
||||
const value = values.join(':').trim();
|
||||
|
||||
switch (field.toLowerCase().trim()) {
|
||||
case 'name':
|
||||
cleaned.name = value;
|
||||
break;
|
||||
case 'address':
|
||||
cleaned.address = value;
|
||||
break;
|
||||
case 'phone':
|
||||
cleaned.phone = value;
|
||||
break;
|
||||
case 'email':
|
||||
cleaned.email = value;
|
||||
break;
|
||||
case 'description':
|
||||
cleaned.description = value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
}
|
63
src/lib/services/geocodingService.ts
Normal file
63
src/lib/services/geocodingService.ts
Normal file
|
@ -0,0 +1,63 @@
|
|||
import axios from 'axios';
|
||||
import { sleep } from '../utils/helpers';
|
||||
|
||||
interface GeocodingResult {
|
||||
lat: number;
|
||||
lng: number;
|
||||
formattedAddress: string;
|
||||
}
|
||||
|
||||
export class GeocodingService {
|
||||
private static cache = new Map<string, GeocodingResult>();
|
||||
private static lastRequestTime = 0;
|
||||
private static RATE_LIMIT_MS = 1000; // 1 second between requests (Nominatim requirement)
|
||||
|
||||
static async geocode(address: string): Promise<GeocodingResult | null> {
|
||||
// Check cache first
|
||||
const cached = this.cache.get(address);
|
||||
if (cached) return cached;
|
||||
|
||||
try {
|
||||
// Rate limiting
|
||||
const now = Date.now();
|
||||
const timeSinceLastRequest = now - this.lastRequestTime;
|
||||
if (timeSinceLastRequest < this.RATE_LIMIT_MS) {
|
||||
await sleep(this.RATE_LIMIT_MS - timeSinceLastRequest);
|
||||
}
|
||||
this.lastRequestTime = Date.now();
|
||||
|
||||
const response = await axios.get(
|
||||
'https://nominatim.openstreetmap.org/search',
|
||||
{
|
||||
params: {
|
||||
q: address,
|
||||
format: 'json',
|
||||
limit: 1,
|
||||
addressdetails: 1
|
||||
},
|
||||
headers: {
|
||||
'User-Agent': 'BusinessFinder/1.0'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (response.data?.length > 0) {
|
||||
const result = response.data[0];
|
||||
const geocoded = {
|
||||
lat: parseFloat(result.lat),
|
||||
lng: parseFloat(result.lon),
|
||||
formattedAddress: result.display_name
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(address, geocoded);
|
||||
return geocoded;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Geocoding error:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,36 +1,45 @@
|
|||
import axios from 'axios';
|
||||
import { env } from '../../config/env';
|
||||
|
||||
interface OllamaResponse {
|
||||
response: string;
|
||||
context?: number[];
|
||||
}
|
||||
|
||||
export class OllamaService {
|
||||
private url: string;
|
||||
private model: string;
|
||||
private static readonly baseUrl = env.ollama.url;
|
||||
private static readonly model = env.ollama.model;
|
||||
|
||||
constructor() {
|
||||
this.url = env.ollama.url;
|
||||
this.model = env.ollama.model;
|
||||
}
|
||||
static async complete(prompt: string): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post(`${this.baseUrl}/api/generate`, {
|
||||
model: this.model,
|
||||
prompt: prompt,
|
||||
stream: false
|
||||
});
|
||||
|
||||
async complete(prompt: string): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post(`${this.url}/api/generate`, {
|
||||
model: this.model,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.7,
|
||||
top_p: 0.9
|
||||
if (response.data?.response) {
|
||||
return response.data.response;
|
||||
}
|
||||
|
||||
throw new Error('No response from Ollama');
|
||||
} catch (error) {
|
||||
console.error('Ollama error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async chat(messages: { role: 'user' | 'assistant'; content: string }[]): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post(`${this.baseUrl}/api/chat`, {
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
if (response.data?.message?.content) {
|
||||
return response.data.message.content;
|
||||
}
|
||||
|
||||
throw new Error('No response from Ollama chat');
|
||||
} catch (error) {
|
||||
console.error('Ollama chat error:', error);
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
|
||||
return response.data.response;
|
||||
} catch (error) {
|
||||
console.error('Ollama completion failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
97
src/lib/services/searchService.ts
Normal file
97
src/lib/services/searchService.ts
Normal file
|
@ -0,0 +1,97 @@
|
|||
import { DeepSeekService } from './deepseekService';
|
||||
import { createClient } from '@supabase/supabase-js';
|
||||
import { Business } from '../types';
|
||||
|
||||
export class SearchService {
|
||||
private supabase;
|
||||
private deepseek;
|
||||
|
||||
constructor() {
|
||||
this.supabase = createClient(
|
||||
process.env.SUPABASE_URL!,
|
||||
process.env.SUPABASE_KEY!
|
||||
);
|
||||
this.deepseek = DeepSeekService;
|
||||
}
|
||||
|
||||
async search(query: string, location: string): Promise<Business[]> {
|
||||
if (!query || !location) {
|
||||
throw new Error('Query and location are required');
|
||||
}
|
||||
|
||||
// Check cache first
|
||||
const cacheKey = `${query}_${location}`.toLowerCase();
|
||||
const { data: cacheData } = await this.supabase
|
||||
.from('cache')
|
||||
.select()
|
||||
.eq('key', cacheKey)
|
||||
.single();
|
||||
|
||||
if (cacheData && cacheData.value) {
|
||||
return cacheData.value as Business[];
|
||||
}
|
||||
|
||||
try {
|
||||
// Perform search
|
||||
const searchResults = await this.performSearch(query, location);
|
||||
|
||||
// Cache results
|
||||
await this.cacheResults(cacheKey, searchResults);
|
||||
|
||||
return searchResults;
|
||||
} catch (error: any) {
|
||||
if (error.response?.status === 429) {
|
||||
throw new Error('Rate limit exceeded');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getBusinessById(id: string): Promise<Business | null> {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.select()
|
||||
.eq('id', id)
|
||||
.single();
|
||||
|
||||
if (error || !data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return data as Business;
|
||||
}
|
||||
|
||||
private async performSearch(query: string, location: string): Promise<Business[]> {
|
||||
// Implementation would use DeepSeek service to perform search
|
||||
// This is a placeholder implementation
|
||||
const mockBusiness: Business = {
|
||||
id: 'test_1',
|
||||
name: "Denver's Best Plumbing",
|
||||
address: "1234 Main Street, Denver, CO 80202",
|
||||
phone: "(720) 555-1234",
|
||||
email: "support@denverplumbing.com",
|
||||
description: "Professional plumbing services",
|
||||
source: 'test',
|
||||
website: 'https://example.com',
|
||||
rating: 4.8,
|
||||
location: { lat: 39.7392, lng: -104.9903 },
|
||||
openingHours: []
|
||||
};
|
||||
|
||||
return [mockBusiness];
|
||||
}
|
||||
|
||||
private async cacheResults(key: string, results: Business[]): Promise<void> {
|
||||
const expiresAt = new Date();
|
||||
expiresAt.setDate(expiresAt.getDate() + Number(process.env.CACHE_DURATION_DAYS || 7));
|
||||
|
||||
await this.supabase
|
||||
.from('cache')
|
||||
.insert([{
|
||||
key,
|
||||
value: results,
|
||||
created_at: new Date().toISOString(),
|
||||
expires_at: expiresAt.toISOString()
|
||||
}]);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue