import fs from 'node:fs/promises'; import path from 'node:path'; const OUTPUT_DIR = path.resolve(process.cwd(), 'output', 'outreach'); const TARGET_PER_NICHE = Number(process.env.LEADS_PER_NICHE || 200); const CONCURRENCY = Number(process.env.LEAD_FETCH_CONCURRENCY || 8); const OVERPASS_DELAY_MS = Number(process.env.OVERPASS_DELAY_MS || 20000); const OVERPASS_429_DELAY_MS = Number(process.env.OVERPASS_429_DELAY_MS || 90000); const OVERPASS_MAX_ATTEMPTS = Number(process.env.OVERPASS_MAX_ATTEMPTS || 6); const OVERPASS_URLS = [ 'https://overpass-api.de/api/interpreter', ]; const metros = [ ['New York', 'NY', 40.7128, -74.006], ['Los Angeles', 'CA', 34.0522, -118.2437], ['Chicago', 'IL', 41.8781, -87.6298], ['Houston', 'TX', 29.7604, -95.3698], ['Phoenix', 'AZ', 33.4484, -112.074], ['Philadelphia', 'PA', 39.9526, -75.1652], ['San Antonio', 'TX', 29.4241, -98.4936], ['San Diego', 'CA', 32.7157, -117.1611], ['Dallas', 'TX', 32.7767, -96.797], ['San Jose', 'CA', 37.3382, -121.8863], ['Austin', 'TX', 30.2672, -97.7431], ['Jacksonville', 'FL', 30.3322, -81.6557], ['Fort Worth', 'TX', 32.7555, -97.3308], ['Columbus', 'OH', 39.9612, -82.9988], ['Charlotte', 'NC', 35.2271, -80.8431], ['San Francisco', 'CA', 37.7749, -122.4194], ['Seattle', 'WA', 47.6062, -122.3321], ['Denver', 'CO', 39.7392, -104.9903], ['Miami', 'FL', 25.7617, -80.1918], ['Nashville', 'TN', 36.1627, -86.7816], ]; const niches = [ { id: 'photographers', label: 'Photographers', targetUseCase: 'portfolio, booking, print cards, event galleries', queries: [ ['craft', 'photographer'], ['shop', 'photo_studio'], ['shop', 'photo'], ], }, { id: 'restaurants', label: 'Restaurants', targetUseCase: 'menu QR codes, table tents, review QR codes, coupons', queries: [ ['amenity', 'restaurant'], ['amenity', 'cafe'], ], }, { id: 'real_estate', label: 'Real Estate', targetUseCase: 'yard signs, flyers, open houses, property sheets', queries: [ ['office', 'estate_agent'], ], }, { id: 'events_venues', label: 'Events & Venues', targetUseCase: 'tickets, schedules, check-in, feedback and post-event links', queries: [ ['amenity', 'events_venue'], ['amenity', 'theatre'], ['amenity', 'conference_centre'], ['tourism', 'attraction'], ], }, { id: 'wellness_beauty', label: 'Wellness & Beauty', targetUseCase: 'booking links, price lists, reviews, loyalty offers', queries: [ ['shop', 'beauty'], ['shop', 'hairdresser'], ['leisure', 'fitness_centre'], ['amenity', 'spa'], ], }, ]; function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } function csvEscape(value) { const text = String(value ?? ''); if (/[",\n\r]/.test(text)) { return `"${text.replaceAll('"', '""')}"`; } return text; } function normalizeWebsite(raw) { if (!raw) return ''; let value = String(raw).trim(); if (!value) return ''; if (value.startsWith('mailto:') || value.includes('@') && !value.includes('/')) return ''; if (!/^https?:\/\//i.test(value)) value = `https://${value}`; try { const url = new URL(value); if (!url.hostname.includes('.')) return ''; url.hash = ''; return url.toString().replace(/\/$/, ''); } catch { return ''; } } function getTag(tags, names) { for (const name of names) { if (tags?.[name]) return tags[name]; } return ''; } function buildOverpassQuery(niche, metro, offset) { const [, , lat, lon] = metro; const radius = 25000 + offset * 10000; const clauses = niche.queries.flatMap(([key, value]) => [ `nwr(around:${radius},${lat},${lon})["${key}"="${value}"]["website"];`, `nwr(around:${radius},${lat},${lon})["${key}"="${value}"]["contact:website"];`, `nwr(around:${radius},${lat},${lon})["${key}"="${value}"]["email"];`, `nwr(around:${radius},${lat},${lon})["${key}"="${value}"]["contact:email"];`, ]).join('\n'); return `[out:json][timeout:45]; ( ${clauses} ); out tags center ${Math.min(TARGET_PER_NICHE * 2, 500)};`; } async function fetchOverpass(query, attempt = 0) { const endpoint = OVERPASS_URLS[attempt % OVERPASS_URLS.length]; const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), 90000); try { const response = await fetch(endpoint, { method: 'POST', headers: { 'content-type': 'application/x-www-form-urlencoded;charset=UTF-8' }, body: new URLSearchParams({ data: query }), signal: controller.signal, }); if (!response.ok) { if (response.status === 429 && attempt < OVERPASS_MAX_ATTEMPTS) { const waitMs = OVERPASS_429_DELAY_MS + attempt * 30000; console.warn(`Overpass rate limited; waiting ${Math.round(waitMs / 1000)}s before retry ${attempt + 1}/${OVERPASS_MAX_ATTEMPTS}`); await sleep(waitMs); return fetchOverpass(query, attempt + 1); } if (attempt < OVERPASS_MAX_ATTEMPTS) { await sleep(5000 * (attempt + 1)); return fetchOverpass(query, attempt + 1); } throw new Error(`Overpass ${response.status} ${response.statusText}`); } return response.json(); } catch (error) { if (attempt < OVERPASS_MAX_ATTEMPTS) { await sleep(5000 * (attempt + 1)); return fetchOverpass(query, attempt + 1); } throw error; } finally { clearTimeout(timer); } } function elementToLead(element, niche, metro) { const tags = element.tags || {}; const website = normalizeWebsite(getTag(tags, ['contact:website', 'website', 'url'])); const email = getTag(tags, ['contact:email', 'email']); const phone = getTag(tags, ['contact:phone', 'phone']); const street = [tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' '); const city = tags['addr:city'] || metro[0]; const state = tags['addr:state'] || metro[1]; return { niche: niche.id, niche_label: niche.label, company: tags.name || '', website, email, phone, city, state, country: 'US', street, source: 'OpenStreetMap Overpass', source_id: `${element.type}/${element.id}`, source_url: `https://www.openstreetmap.org/${element.type}/${element.id}`, personalization_signal: '', qr_use_case: niche.targetUseCase, lead_score: 0, email_source: email ? 'osm' : '', opt_out_required: 'yes', }; } function visibleTextEmails(text) { const normalized = text .replaceAll('[at]', '@') .replaceAll('(at)', '@') .replaceAll(' at ', '@') .replaceAll('[dot]', '.') .replaceAll('(dot)', '.') .replaceAll(' dot ', '.'); const matches = normalized.match(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g) || []; return [...new Set(matches.map((email) => email.toLowerCase()))] .filter((email) => !email.endsWith('.png') && !email.endsWith('.jpg') && !email.includes('example.com')) .filter((email) => !email.includes('wixpress.com') && !email.includes('sentry.io')); } function extractContactLinks(html, baseUrl) { const links = []; const regex = /href=["']([^"']+)["']/gi; let match; while ((match = regex.exec(html))) { const href = match[1]; if (/^(mailto:|tel:)/i.test(href)) continue; if (!/(contact|about|team|booking|book|wedding|private-events|catering|visit|location)/i.test(href)) continue; try { const url = new URL(href, baseUrl); if (url.hostname === new URL(baseUrl).hostname) { url.hash = ''; links.push(url.toString()); } } catch { // Ignore malformed links. } } return [...new Set(links)].slice(0, 3); } async function fetchText(url) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), 10000); try { const response = await fetch(url, { headers: { 'user-agent': 'QR Master lead research bot (+https://qrmaster.net/contact)', accept: 'text/html,application/xhtml+xml', }, signal: controller.signal, redirect: 'follow', }); if (!response.ok) return ''; const contentType = response.headers.get('content-type') || ''; if (!contentType.includes('text/html')) return ''; return await response.text(); } catch { return ''; } finally { clearTimeout(timer); } } async function enrichLead(lead) { if (!lead.website || lead.email) { return scoreLead(lead); } const homepage = await fetchText(lead.website); const emails = visibleTextEmails(homepage); const contactLinks = extractContactLinks(homepage, lead.website); for (const link of contactLinks) { if (emails.length > 0) break; const html = await fetchText(link); emails.push(...visibleTextEmails(html)); } const uniqueEmails = [...new Set(emails)]; if (uniqueEmails.length > 0) { lead.email = uniqueEmails[0]; lead.email_source = 'website'; } return scoreLead(lead); } function scoreLead(lead) { let score = 30; if (lead.website) score += 20; if (lead.email) score += 30; if (lead.phone) score += 5; if (!/(gmail|yahoo|hotmail|outlook|icloud)\.com$/i.test(lead.email || '')) score += lead.email ? 10 : 0; if (lead.niche === 'real_estate' || lead.niche === 'restaurants') score += 5; const signalByNiche = { photographers: `${lead.company} can use dynamic QR codes on print cards, gallery cards, event handouts, and portfolio links.`, restaurants: `${lead.company} can use dynamic QR codes for menus, table tents, reviews, coupons, and seasonal specials.`, real_estate: `${lead.company} can use dynamic QR codes on yard signs, flyers, property sheets, and open house material.`, events_venues: `${lead.company} can use dynamic QR codes for schedules, ticketing, venue maps, check-in, and post-event feedback.`, wellness_beauty: `${lead.company} can use dynamic QR codes for booking pages, service menus, price lists, reviews, and loyalty offers.`, }; lead.lead_score = Math.min(score, 100); lead.personalization_signal = signalByNiche[lead.niche] || ''; return lead; } async function mapLimit(items, limit, mapper) { const results = []; let index = 0; async function worker() { while (index < items.length) { const current = index++; results[current] = await mapper(items[current], current); } } await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker)); return results; } async function collectNiche(niche) { const leadsByKey = new Map(); for (let pass = 0; pass < 2 && leadsByKey.size < TARGET_PER_NICHE * 2; pass++) { for (const metro of metros) { if (leadsByKey.size >= TARGET_PER_NICHE * 2) break; const query = buildOverpassQuery(niche, metro, pass); try { const data = await fetchOverpass(query); for (const element of data.elements || []) { const lead = elementToLead(element, niche, metro); if (!lead.company) continue; if (!lead.website && !lead.email) continue; const key = lead.website || `${lead.company}|${lead.city}|${lead.state}`.toLowerCase(); if (!leadsByKey.has(key)) leadsByKey.set(key, lead); } } catch (error) { console.warn(`[${niche.id}] ${metro[0]} skipped: ${error.message}`); } await sleep(OVERPASS_DELAY_MS); } } const rawLeads = [...leadsByKey.values()].slice(0, TARGET_PER_NICHE * 2); console.log(`[${niche.id}] collected ${rawLeads.length}; enriching...`); const enriched = await mapLimit(rawLeads, CONCURRENCY, enrichLead); return enriched .filter((lead) => lead.website || lead.email) .sort((a, b) => b.lead_score - a.lead_score) .slice(0, TARGET_PER_NICHE); } function toCsv(leads) { const headers = [ 'niche', 'niche_label', 'company', 'website', 'email', 'email_source', 'phone', 'city', 'state', 'country', 'street', 'lead_score', 'qr_use_case', 'personalization_signal', 'source', 'source_id', 'source_url', 'opt_out_required', ]; return [ headers.join(','), ...leads.map((lead) => headers.map((header) => csvEscape(lead[header])).join(',')), ].join('\n'); } async function main() { await fs.mkdir(OUTPUT_DIR, { recursive: true }); const allLeads = []; for (const niche of niches) { const leads = await collectNiche(niche); allLeads.push(...leads); const dated = new Date().toISOString().slice(0, 10); await fs.writeFile(path.join(OUTPUT_DIR, `qrmaster-us-leads-${niche.id}-${dated}.csv`), toCsv(leads), 'utf8'); await fs.writeFile(path.join(OUTPUT_DIR, `qrmaster-us-leads-${niche.id}-${dated}.json`), JSON.stringify(leads, null, 2), 'utf8'); console.log(`[${niche.id}] kept ${leads.length}`); } const byKey = new Map(); for (const lead of allLeads) { const key = lead.email || lead.website || `${lead.company}|${lead.city}|${lead.state}`.toLowerCase(); if (!byKey.has(key)) byKey.set(key, lead); } const deduped = [...byKey.values()].sort((a, b) => b.lead_score - a.lead_score); const dated = new Date().toISOString().slice(0, 10); const csvPath = path.join(OUTPUT_DIR, `qrmaster-us-leads-${dated}.csv`); const jsonPath = path.join(OUTPUT_DIR, `qrmaster-us-leads-${dated}.json`); await fs.writeFile(csvPath, toCsv(deduped), 'utf8'); await fs.writeFile(jsonPath, JSON.stringify(deduped, null, 2), 'utf8'); const summary = niches.map((niche) => { const leads = deduped.filter((lead) => lead.niche === niche.id); const withEmail = leads.filter((lead) => lead.email).length; return `${niche.label}: ${leads.length} leads, ${withEmail} emails`; }).join('\n'); console.log(`\nWrote ${deduped.length} leads`); console.log(csvPath); console.log(jsonPath); console.log(summary); } main().catch((error) => { console.error(error); process.exitCode = 1; });