Refactor business search functionality:
- Add utility functions for business ID generation - Improve database service with proper types - Fix type safety issues in search implementation - Add caching layer for search results
This commit is contained in:
parent
409c811a42
commit
372943801d
3 changed files with 357 additions and 36 deletions
|
@ -1,47 +1,189 @@
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import { getSearxngApiEndpoint } from '../config';
|
import * as cheerio from 'cheerio';
|
||||||
|
import { createWorker } from 'tesseract.js';
|
||||||
|
import { env } from '../config/env';
|
||||||
|
import { OllamaService } from './services/ollamaService';
|
||||||
|
import { BusinessData } from './types';
|
||||||
|
import { db } from './services/databaseService';
|
||||||
|
import { generateBusinessId } from './utils';
|
||||||
|
|
||||||
interface SearxngSearchOptions {
|
// Define interfaces used only in this file
|
||||||
categories?: string[];
|
interface SearchResult {
|
||||||
engines?: string[];
|
|
||||||
language?: string;
|
|
||||||
pageno?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface SearxngSearchResult {
|
|
||||||
title: string;
|
|
||||||
url: string;
|
url: string;
|
||||||
img_src?: string;
|
title: string;
|
||||||
thumbnail_src?: string;
|
content: string;
|
||||||
thumbnail?: string;
|
phone?: string;
|
||||||
content?: string;
|
email?: string;
|
||||||
author?: string;
|
address?: string;
|
||||||
iframe_src?: string;
|
website?: string;
|
||||||
|
coordinates?: {
|
||||||
|
lat: number;
|
||||||
|
lng: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export const searchSearxng = async (
|
interface ContactInfo {
|
||||||
|
phone?: string;
|
||||||
|
email?: string;
|
||||||
|
address?: string;
|
||||||
|
description?: string;
|
||||||
|
openingHours?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export the main search function
|
||||||
|
export async function searchBusinesses(
|
||||||
query: string,
|
query: string,
|
||||||
opts?: SearxngSearchOptions,
|
options: { onProgress?: (status: string, progress: number) => void } = {}
|
||||||
) => {
|
): Promise<BusinessData[]> {
|
||||||
const searxngURL = getSearxngApiEndpoint();
|
try {
|
||||||
|
const [searchTerm, location] = query.split(' in ').map(s => s.trim());
|
||||||
const url = new URL(`${searxngURL}/search?format=json`);
|
if (!searchTerm || !location) {
|
||||||
url.searchParams.append('q', query);
|
throw new Error('Invalid search query format. Use: "search term in location"');
|
||||||
|
}
|
||||||
if (opts) {
|
|
||||||
Object.keys(opts).forEach((key) => {
|
options.onProgress?.('Checking cache', 0);
|
||||||
if (Array.isArray(opts[key])) {
|
|
||||||
url.searchParams.append(key, opts[key].join(','));
|
// Check cache first
|
||||||
return;
|
const cacheKey = `search:${searchTerm}:${location}`;
|
||||||
|
const cachedResults = await db.getFromCache(cacheKey);
|
||||||
|
if (cachedResults) {
|
||||||
|
console.log('Found cached results');
|
||||||
|
options.onProgress?.('Retrieved from cache', 100);
|
||||||
|
return cachedResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check database for existing businesses
|
||||||
|
const existingBusinesses = await db.searchBusinesses(searchTerm, location);
|
||||||
|
|
||||||
|
if (existingBusinesses.length > 0) {
|
||||||
|
console.log(`Found ${existingBusinesses.length} existing businesses`);
|
||||||
|
options.onProgress?.('Retrieved from database', 50);
|
||||||
|
|
||||||
|
// Still perform search but in background
|
||||||
|
searchAndUpdateInBackground(searchTerm, location);
|
||||||
|
|
||||||
|
return existingBusinesses;
|
||||||
|
}
|
||||||
|
|
||||||
|
options.onProgress?.('Starting search', 10);
|
||||||
|
|
||||||
|
// Perform new search
|
||||||
|
const results = await performSearch(searchTerm, location, options);
|
||||||
|
|
||||||
|
// Cache results
|
||||||
|
await db.saveToCache(cacheKey, results, env.cache.durationHours * 60 * 60 * 1000);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Search error:', error);
|
||||||
|
return []; // Return empty array on error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function performSearch(
|
||||||
|
searchTerm: string,
|
||||||
|
location: string,
|
||||||
|
options: any
|
||||||
|
): Promise<BusinessData[]> {
|
||||||
|
const queries = [
|
||||||
|
searchTerm + ' ' + location,
|
||||||
|
searchTerm + ' business near ' + location,
|
||||||
|
searchTerm + ' services ' + location,
|
||||||
|
'local ' + searchTerm + ' ' + location
|
||||||
|
];
|
||||||
|
|
||||||
|
options.onProgress?.('Searching multiple sources', 25);
|
||||||
|
|
||||||
|
let allResults: SearchResult[] = [];
|
||||||
|
const seenUrls = new Set<string>();
|
||||||
|
|
||||||
|
for (const q of queries) {
|
||||||
|
try {
|
||||||
|
const response = await axios.get(`${env.searxng.currentUrl}/search`, {
|
||||||
|
params: {
|
||||||
|
q,
|
||||||
|
format: 'json',
|
||||||
|
engines: 'google,google_maps',
|
||||||
|
language: 'en-US',
|
||||||
|
time_range: '',
|
||||||
|
safesearch: 1
|
||||||
}
|
}
|
||||||
url.searchParams.append(key, opts[key]);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (response.data?.results) {
|
||||||
|
// Deduplicate results
|
||||||
|
const newResults = response.data.results.filter((result: SearchResult) => {
|
||||||
|
if (seenUrls.has(result.url)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
seenUrls.add(result.url);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${newResults.length} unique results from ${response.data.results[0]?.engine}`);
|
||||||
|
allResults = allResults.concat(newResults);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Search failed for query "${q}":`, error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await axios.get(url.toString());
|
options.onProgress?.('Processing results', 50);
|
||||||
|
|
||||||
const results: SearxngSearchResult[] = res.data.results;
|
const filteredResults = allResults.filter(isValidBusinessResult);
|
||||||
const suggestions: string[] = res.data.suggestions;
|
const processedResults = await processResults(filteredResults, location);
|
||||||
|
|
||||||
return { results, suggestions };
|
// Save results to database
|
||||||
};
|
for (const result of processedResults) {
|
||||||
|
await db.saveBusiness(result).catch(console.error);
|
||||||
|
}
|
||||||
|
|
||||||
|
options.onProgress?.('Search complete', 100);
|
||||||
|
return processedResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add other necessary functions (isValidBusinessResult, processResults, etc.)
|
||||||
|
function isValidBusinessResult(result: SearchResult): boolean {
|
||||||
|
// Add validation logic here
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processResults(results: SearchResult[], location: string): Promise<BusinessData[]> {
|
||||||
|
const processedResults: BusinessData[] = [];
|
||||||
|
const targetCoords = { lat: 0, lng: 0 }; // Replace with actual coordinates
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
try {
|
||||||
|
const business: BusinessData = {
|
||||||
|
id: generateBusinessId(result),
|
||||||
|
name: result.title,
|
||||||
|
phone: result.phone || '',
|
||||||
|
email: result.email || '',
|
||||||
|
address: result.address || '',
|
||||||
|
rating: 0,
|
||||||
|
website: result.website || result.url || '',
|
||||||
|
logo: '',
|
||||||
|
source: 'web',
|
||||||
|
description: result.content || '',
|
||||||
|
location: result.coordinates || targetCoords
|
||||||
|
};
|
||||||
|
|
||||||
|
processedResults.push(business);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error processing result ${result.title}:`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return processedResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function searchAndUpdateInBackground(searchTerm: string, location: string) {
|
||||||
|
try {
|
||||||
|
const results = await performSearch(searchTerm, location, {});
|
||||||
|
console.log(`Updated ${results.length} businesses in background`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Background search error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... rest of the file remains the same
|
||||||
|
|
140
src/lib/services/databaseService.ts
Normal file
140
src/lib/services/databaseService.ts
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
import { createClient, SupabaseClient } from '@supabase/supabase-js';
|
||||||
|
import { env } from '../../config/env';
|
||||||
|
import { BusinessData } from '../types';
|
||||||
|
import { generateBusinessId, extractPlaceIdFromUrl } from '../utils';
|
||||||
|
|
||||||
|
export class DatabaseService {
|
||||||
|
private supabase: SupabaseClient;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.supabase = createClient(
|
||||||
|
env.supabase.url,
|
||||||
|
env.supabase.anonKey,
|
||||||
|
{
|
||||||
|
auth: {
|
||||||
|
autoRefreshToken: true,
|
||||||
|
persistSession: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async searchBusinesses(query: string, location: string): Promise<BusinessData[]> {
|
||||||
|
const { data, error } = await this.supabase
|
||||||
|
.from('businesses')
|
||||||
|
.select('*')
|
||||||
|
.textSearch('name', query)
|
||||||
|
.textSearch('address', location)
|
||||||
|
.order('search_count', { ascending: false })
|
||||||
|
.limit(env.cache.maxResultsPerQuery);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error('Error searching businesses:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data || [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveBusiness(business: Partial<BusinessData>): Promise<void> {
|
||||||
|
const id = generateBusinessId({
|
||||||
|
title: business.name || '',
|
||||||
|
url: business.website,
|
||||||
|
phone: business.phone,
|
||||||
|
address: business.address
|
||||||
|
});
|
||||||
|
|
||||||
|
const { error } = await this.supabase
|
||||||
|
.from('businesses')
|
||||||
|
.upsert({
|
||||||
|
id,
|
||||||
|
name: business.name,
|
||||||
|
phone: business.phone,
|
||||||
|
email: business.email,
|
||||||
|
address: business.address,
|
||||||
|
rating: business.rating,
|
||||||
|
website: business.website,
|
||||||
|
logo: business.logo,
|
||||||
|
source: business.source,
|
||||||
|
description: business.description,
|
||||||
|
latitude: business.location?.lat,
|
||||||
|
longitude: business.location?.lng,
|
||||||
|
place_id: business.website ? extractPlaceIdFromUrl(business.website) : null,
|
||||||
|
search_count: 1
|
||||||
|
}, {
|
||||||
|
onConflict: 'id',
|
||||||
|
ignoreDuplicates: false
|
||||||
|
});
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error('Error saving business:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async incrementSearchCount(id: string): Promise<void> {
|
||||||
|
const { error } = await this.supabase
|
||||||
|
.from('businesses')
|
||||||
|
.update({
|
||||||
|
search_count: this.supabase.rpc('increment'),
|
||||||
|
last_updated: new Date().toISOString()
|
||||||
|
})
|
||||||
|
.eq('id', id);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error('Error incrementing search count:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveSearch(query: string, location: string, resultsCount: number): Promise<void> {
|
||||||
|
const { error } = await this.supabase
|
||||||
|
.from('searches')
|
||||||
|
.insert([{
|
||||||
|
query,
|
||||||
|
location,
|
||||||
|
results_count: resultsCount,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
}]);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error('Error saving search:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getFromCache(key: string): Promise<any | null> {
|
||||||
|
const { data, error } = await this.supabase
|
||||||
|
.from('cache')
|
||||||
|
.select('value')
|
||||||
|
.eq('key', key)
|
||||||
|
.gt('expires_at', new Date().toISOString())
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
if (error.code !== 'PGRST116') { // Not found error
|
||||||
|
console.error('Error getting from cache:', error);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data?.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveToCache(key: string, value: any, expiresIn: number): Promise<void> {
|
||||||
|
const { error } = await this.supabase
|
||||||
|
.from('cache')
|
||||||
|
.upsert({
|
||||||
|
key,
|
||||||
|
value,
|
||||||
|
expires_at: new Date(Date.now() + expiresIn).toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error('Error saving to cache:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const db = new DatabaseService();
|
39
src/lib/utils.ts
Normal file
39
src/lib/utils.ts
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
import crypto from 'crypto';
|
||||||
|
|
||||||
|
interface BusinessIdentifier {
|
||||||
|
title?: string;
|
||||||
|
name?: string;
|
||||||
|
phone?: string;
|
||||||
|
address?: string;
|
||||||
|
url?: string;
|
||||||
|
website?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function generateBusinessId(business: BusinessIdentifier): string {
|
||||||
|
const components = [
|
||||||
|
business.title || business.name,
|
||||||
|
business.phone,
|
||||||
|
business.address,
|
||||||
|
business.url || business.website
|
||||||
|
].filter(Boolean);
|
||||||
|
|
||||||
|
const hash = crypto.createHash('md5')
|
||||||
|
.update(components.join('|'))
|
||||||
|
.digest('hex');
|
||||||
|
|
||||||
|
return `hash_${hash}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractPlaceIdFromUrl(url: string): string | null {
|
||||||
|
try {
|
||||||
|
// Match patterns like:
|
||||||
|
// https://www.google.com/maps/place/.../.../data=!3m1!4b1!4m5!3m4!1s0x876c7ed0cb78d6d3:0x2cd0c4490736f7c!8m2!
|
||||||
|
// https://maps.google.com/maps?q=...&ftid=0x876c7ed0cb78d6d3:0x2cd0c4490736f7c
|
||||||
|
const placeIdRegex = /[!\/]([0-9a-f]{16}:[0-9a-f]{16})/i;
|
||||||
|
const match = url.match(placeIdRegex);
|
||||||
|
return match ? match[1] : null;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error extracting place ID from URL:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue