Product hunt launch
This commit is contained in:
326
scripts/validate-lead-emails.mjs
Normal file
326
scripts/validate-lead-emails.mjs
Normal file
@@ -0,0 +1,326 @@
|
||||
import { promises as dns } from "node:dns";
|
||||
import { readdir, readFile, mkdir, writeFile, stat } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
const root = process.cwd();
|
||||
const leadRoot = path.resolve(root, process.argv[2] || "Leads");
|
||||
const excludeFile = path.resolve(root, process.argv[3] || "Leads/lead_emails_1000_2026-05-25.csv");
|
||||
const outputDir = path.resolve(root, process.argv[4] || "Leads/validated");
|
||||
const dateStamp = new Date().toISOString().slice(0, 10);
|
||||
|
||||
const emailPattern = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
|
||||
const strictEmailPattern = /^[A-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?(?:\.[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?)+$/i;
|
||||
const allowedExtensions = new Set([".csv", ".txt", ".md", ".json"]);
|
||||
const generatedPrefixes = [
|
||||
"lead_email_validation_all_",
|
||||
"lead_email_validation_valid_remaining_",
|
||||
"lead_email_validation_unknown_remaining_",
|
||||
"lead_email_validation_invalid_",
|
||||
"lead_email_validation_summary_",
|
||||
];
|
||||
const blockedLeadDomains = new Set([
|
||||
"qrmaster.net",
|
||||
]);
|
||||
const empiricalHighConfidenceDomains = new Set([
|
||||
"gmail.com",
|
||||
"googlemail.com",
|
||||
"accor.com",
|
||||
"hotelbb.com",
|
||||
"losteria.de",
|
||||
"breizhcafe.com",
|
||||
]);
|
||||
const empiricalLowConfidenceDomains = new Set([
|
||||
"aon.at",
|
||||
"countryinn.com",
|
||||
"hilton.com",
|
||||
"hyatt.com",
|
||||
"motel-one.com",
|
||||
"novum-hotels.de",
|
||||
"riu.com",
|
||||
]);
|
||||
|
||||
function csvCell(value) {
|
||||
const text = String(value ?? "");
|
||||
return /[",\r\n]/.test(text) ? `"${text.replace(/"/g, '""')}"` : text;
|
||||
}
|
||||
|
||||
function toCsv(rows, columns) {
|
||||
const lines = [columns.map(csvCell).join(",")];
|
||||
for (const row of rows) {
|
||||
lines.push(columns.map((column) => csvCell(row[column])).join(","));
|
||||
}
|
||||
return `${lines.join("\r\n")}\r\n`;
|
||||
}
|
||||
|
||||
async function collectInputFiles(inputPath) {
|
||||
const inputStat = await stat(inputPath);
|
||||
if (inputStat.isFile()) {
|
||||
return [inputPath];
|
||||
}
|
||||
if (!inputStat.isDirectory()) {
|
||||
throw new Error(`Input path is not a file or directory: ${inputPath}`);
|
||||
}
|
||||
return walkFiles(inputPath);
|
||||
}
|
||||
|
||||
async function walkFiles(dir) {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
const files = [];
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
files.push(...await walkFiles(fullPath));
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile()) continue;
|
||||
if (!allowedExtensions.has(path.extname(entry.name).toLowerCase())) continue;
|
||||
if (generatedPrefixes.some((prefix) => entry.name.startsWith(prefix))) continue;
|
||||
files.push(fullPath);
|
||||
}
|
||||
return files.sort((a, b) => a.localeCompare(b));
|
||||
}
|
||||
|
||||
async function extractEmailsFromFile(filePath) {
|
||||
try {
|
||||
const content = await readFile(filePath, "utf8");
|
||||
return [...content.matchAll(emailPattern)].map((match) =>
|
||||
match[0].trim().replace(/\.+$/, "").toLowerCase(),
|
||||
);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function loadExcludedEmails(filePathsArg) {
|
||||
const excluded = new Set();
|
||||
const filePaths = String(filePathsArg || "")
|
||||
.split(";")
|
||||
.map((filePath) => filePath.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const filePath of filePaths) {
|
||||
try {
|
||||
await stat(filePath);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const emails = await extractEmailsFromFile(filePath);
|
||||
for (const email of emails) excluded.add(email);
|
||||
}
|
||||
|
||||
return excluded;
|
||||
}
|
||||
|
||||
function withTimeout(promise, ms) {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise((_, reject) => {
|
||||
setTimeout(() => reject(new Error("dns_timeout")), ms);
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
async function checkDomain(domain) {
|
||||
try {
|
||||
const mxRecords = await withTimeout(dns.resolveMx(domain), 2500);
|
||||
if (mxRecords.length > 0) {
|
||||
return {
|
||||
dns_status: "mx",
|
||||
mx_hosts: mxRecords
|
||||
.sort((a, b) => a.priority - b.priority)
|
||||
.map((record) => record.exchange)
|
||||
.join(";"),
|
||||
reason: "domain_has_mx",
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Fall through to A lookup. Some domains can receive via address fallback.
|
||||
}
|
||||
|
||||
try {
|
||||
const aRecords = await withTimeout(dns.resolve4(domain), 2000);
|
||||
if (aRecords.length > 0) {
|
||||
return {
|
||||
dns_status: "a_only",
|
||||
mx_hosts: "",
|
||||
reason: "domain_has_a_record_but_no_mx",
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Classified below.
|
||||
}
|
||||
|
||||
return {
|
||||
dns_status: "no_dns",
|
||||
mx_hosts: "",
|
||||
reason: "no_mx_or_a_record",
|
||||
};
|
||||
}
|
||||
|
||||
async function mapLimit(items, limit, worker) {
|
||||
const results = new Map();
|
||||
let index = 0;
|
||||
|
||||
async function runWorker() {
|
||||
while (index < items.length) {
|
||||
const currentIndex = index++;
|
||||
const item = items[currentIndex];
|
||||
if ((currentIndex + 1) % 100 === 0) {
|
||||
console.log(`DNS checked ${currentIndex + 1} / ${items.length} domains...`);
|
||||
}
|
||||
results.set(item, await worker(item));
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(Array.from({ length: Math.min(limit, items.length) }, runWorker));
|
||||
return results;
|
||||
}
|
||||
|
||||
function getConfidence(status, domain) {
|
||||
if (status !== "valid") {
|
||||
return {
|
||||
confidence: "reject",
|
||||
confidence_reason: "not_dns_valid",
|
||||
};
|
||||
}
|
||||
|
||||
if (empiricalLowConfidenceDomains.has(domain)) {
|
||||
return {
|
||||
confidence: "low",
|
||||
confidence_reason: "empirical_low_smartlead_valid_rate",
|
||||
};
|
||||
}
|
||||
|
||||
if (empiricalHighConfidenceDomains.has(domain)) {
|
||||
return {
|
||||
confidence: "high",
|
||||
confidence_reason: "empirical_high_smartlead_valid_rate",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
confidence: "medium",
|
||||
confidence_reason: "dns_valid_unproven_domain",
|
||||
};
|
||||
}
|
||||
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
const excludeEmails = await loadExcludedEmails(excludeFile);
|
||||
const files = await collectInputFiles(leadRoot);
|
||||
const emailSources = new Map();
|
||||
|
||||
for (const file of files) {
|
||||
const emails = await extractEmailsFromFile(file);
|
||||
for (const email of emails) {
|
||||
if (!emailSources.has(email)) emailSources.set(email, []);
|
||||
const sources = emailSources.get(email);
|
||||
if (sources.length < 5) sources.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
const domains = [...new Set(
|
||||
[...emailSources.keys()]
|
||||
.filter((email) => strictEmailPattern.test(email))
|
||||
.map((email) => email.split("@")[1]),
|
||||
)].sort((a, b) => a.localeCompare(b));
|
||||
|
||||
console.log(`Files scanned: ${files.length}`);
|
||||
console.log(`Unique emails found: ${emailSources.size}`);
|
||||
console.log(`Domains to check: ${domains.length}`);
|
||||
|
||||
const dnsResults = await mapLimit(domains, 80, checkDomain);
|
||||
|
||||
const results = [...emailSources.keys()].sort((a, b) => a.localeCompare(b)).map((email) => {
|
||||
const syntaxValid = strictEmailPattern.test(email);
|
||||
const domain = email.includes("@") ? email.split("@")[1] : "";
|
||||
const reserved = /^(example|test|invalid|localhost)(\.|$)/i.test(domain);
|
||||
const dnsResult = dnsResults.get(domain);
|
||||
|
||||
let status = "invalid";
|
||||
let reason = "invalid_syntax";
|
||||
let dnsStatus = "";
|
||||
let mxHosts = "";
|
||||
|
||||
if (syntaxValid && blockedLeadDomains.has(domain)) {
|
||||
reason = "internal_or_generated_domain";
|
||||
} else if (syntaxValid && reserved) {
|
||||
reason = "reserved_or_test_domain";
|
||||
} else if (syntaxValid && dnsResult?.dns_status === "mx") {
|
||||
status = "valid";
|
||||
reason = dnsResult.reason;
|
||||
dnsStatus = dnsResult.dns_status;
|
||||
mxHosts = dnsResult.mx_hosts;
|
||||
} else if (syntaxValid && dnsResult?.dns_status === "a_only") {
|
||||
status = "unknown";
|
||||
reason = dnsResult.reason;
|
||||
dnsStatus = dnsResult.dns_status;
|
||||
} else if (syntaxValid) {
|
||||
reason = dnsResult?.reason || "dns_not_checked";
|
||||
dnsStatus = dnsResult?.dns_status || "";
|
||||
}
|
||||
|
||||
const confidenceResult = getConfidence(status, domain);
|
||||
|
||||
return {
|
||||
email,
|
||||
status,
|
||||
reason,
|
||||
confidence: confidenceResult.confidence,
|
||||
confidence_reason: confidenceResult.confidence_reason,
|
||||
domain,
|
||||
dns_status: dnsStatus,
|
||||
mx_hosts: mxHosts,
|
||||
already_uploaded: excludeEmails.has(email) ? "true" : "false",
|
||||
source_count: emailSources.get(email).length,
|
||||
first_source: emailSources.get(email)[0],
|
||||
};
|
||||
});
|
||||
|
||||
const allOut = path.join(outputDir, `lead_email_validation_all_${dateStamp}.csv`);
|
||||
const validOut = path.join(outputDir, `lead_email_validation_valid_remaining_${dateStamp}.csv`);
|
||||
const highConfidenceOut = path.join(outputDir, `lead_email_validation_high_confidence_remaining_${dateStamp}.csv`);
|
||||
const unknownOut = path.join(outputDir, `lead_email_validation_unknown_remaining_${dateStamp}.csv`);
|
||||
const invalidOut = path.join(outputDir, `lead_email_validation_invalid_${dateStamp}.csv`);
|
||||
const summaryOut = path.join(outputDir, `lead_email_validation_summary_${dateStamp}.txt`);
|
||||
|
||||
const validRemaining = results.filter((row) => row.status === "valid" && row.already_uploaded !== "true");
|
||||
const highConfidenceRemaining = results.filter((row) =>
|
||||
row.status === "valid" &&
|
||||
row.confidence === "high" &&
|
||||
row.already_uploaded !== "true"
|
||||
);
|
||||
const unknownRemaining = results.filter((row) => row.status === "unknown" && row.already_uploaded !== "true");
|
||||
const invalid = results.filter((row) => row.status === "invalid");
|
||||
|
||||
await writeFile(
|
||||
allOut,
|
||||
toCsv(results, ["email", "status", "reason", "confidence", "confidence_reason", "domain", "dns_status", "mx_hosts", "already_uploaded", "source_count", "first_source"]),
|
||||
"utf8",
|
||||
);
|
||||
await writeFile(validOut, toCsv(validRemaining.map(({ email }) => ({ email })), ["email"]), "utf8");
|
||||
await writeFile(highConfidenceOut, toCsv(highConfidenceRemaining.map(({ email }) => ({ email })), ["email"]), "utf8");
|
||||
await writeFile(unknownOut, toCsv(unknownRemaining, ["email", "reason", "domain"]), "utf8");
|
||||
await writeFile(invalidOut, toCsv(invalid, ["email", "reason", "domain"]), "utf8");
|
||||
|
||||
const summary = [
|
||||
`Lead email validation summary - ${dateStamp}`,
|
||||
`Lead root: ${leadRoot}`,
|
||||
`Files scanned: ${files.length}`,
|
||||
`Unique emails found: ${results.length}`,
|
||||
`Already uploaded/excluded: ${results.filter((row) => row.already_uploaded === "true").length}`,
|
||||
`Valid total: ${results.filter((row) => row.status === "valid").length}`,
|
||||
`Valid remaining: ${validRemaining.length}`,
|
||||
`High-confidence valid remaining: ${highConfidenceRemaining.length}`,
|
||||
`Unknown remaining: ${unknownRemaining.length}`,
|
||||
`Invalid total: ${invalid.length}`,
|
||||
`All report: ${allOut}`,
|
||||
`Valid remaining upload file: ${validOut}`,
|
||||
`High-confidence upload file: ${highConfidenceOut}`,
|
||||
`Unknown remaining review file: ${unknownOut}`,
|
||||
`Invalid report: ${invalidOut}`,
|
||||
"",
|
||||
].join("\n");
|
||||
await writeFile(summaryOut, summary, "utf8");
|
||||
|
||||
console.log(summary);
|
||||
@@ -8,6 +8,11 @@ import { motion } from 'framer-motion';
|
||||
import { Globe, User, MapPin, Phone, FileText, Ticket, Smartphone, Star } from 'lucide-react';
|
||||
import { useState, useEffect } from 'react';
|
||||
|
||||
const PRODUCT_HUNT_URL =
|
||||
'https://www.producthunt.com/products/qr-master-2?launch=qr-master-3';
|
||||
const PRODUCT_HUNT_BADGE_URL =
|
||||
'https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1155554&theme=neutral&t=1779882938098';
|
||||
|
||||
const FlippingCard = ({ front, back, delay }: { front: any, back: any, delay: number }) => {
|
||||
const [isFlipped, setIsFlipped] = useState(false);
|
||||
|
||||
@@ -161,6 +166,25 @@ export const Hero: React.FC<HeroProps> = ({ t, headingAs = 'h1' }) => {
|
||||
{t.hero.cta_secondary}
|
||||
</Link>
|
||||
</motion.div>
|
||||
|
||||
<motion.a
|
||||
href={PRODUCT_HUNT_URL}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
aria-label="View QR Master on Product Hunt"
|
||||
initial={{ opacity: 0, y: 16 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.65 }}
|
||||
className="inline-flex rounded-[10px] transition-transform duration-200 hover:-translate-y-0.5 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-4 focus-visible:outline-[#533afd]"
|
||||
>
|
||||
<img
|
||||
src={PRODUCT_HUNT_BADGE_URL}
|
||||
alt="QR Master - Dynamic QR codes with analytics and editable links | Product Hunt"
|
||||
width="250"
|
||||
height="54"
|
||||
className="h-[54px] w-[250px]"
|
||||
/>
|
||||
</motion.a>
|
||||
</div>
|
||||
|
||||
{/* Right Preview Widget */}
|
||||
|
||||
@@ -3,3 +3,9 @@ For lead scraping, do not rely only on pre-enrichment dedupe. Website crawling c
|
||||
|
||||
Lesson:
|
||||
Large API scraping runs should write incremental output or use smaller controlled batches. A long Overpass workflow can hang or rate-limit without producing files, making it hard to recover useful partial results.
|
||||
|
||||
Lesson:
|
||||
For bulk email pre-validation, avoid synchronous per-domain PowerShell DNS checks because slow domains can stall the whole run. Use a concurrent DNS checker with explicit per-query timeouts and write separate valid, unknown, and invalid reports.
|
||||
|
||||
Lesson:
|
||||
DNS/MX-valid is not enough for Smartlead-quality lead uploads. Calibrate high-confidence exports against Smartlead feedback by domain; in the first two batches, gmail.com-style domains were far more reliable than large hotel-chain domains even when both had valid MX records.
|
||||
|
||||
Reference in New Issue
Block a user