const fs = require("fs"); const path = require("path"); const root = process.cwd(); const scanDirs = ["src", "marketing", "articles", "blog-posts-improved"]; const sourceExtensions = new Set([".ts", ".tsx", ".js", ".jsx", ".md", ".mdx"]); const publicDir = path.join(root, "public"); const appDir = path.join(root, "src", "app"); const ignoredPrefixes = [ "/api/", "/_next/", "/auth/", "/r/", "/qr/", "/scan/", ]; const knownDynamicPrefixes = [ "/blog/", "/learn/", "/authors/", "/qr-code-for/", "/use-cases/", ]; const ctaPatterns = [ /get started/i, /start free/i, /try free/i, /create.*qr/i, /generate.*qr/i, /sign up/i, /pricing/i, /upgrade/i, /create.*free/i, /start tracking/i, /create.*editable/i, ]; const nonConversionPageParts = [ "/contact/", "/cookie-policy/", "/privacy/", "/press/", "/authors/", "/blog/", "/newsletter/", ]; const findings = []; const ctas = []; function walk(dir, files = []) { if (!fs.existsSync(dir)) return files; for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { if (entry.name === "node_modules" || entry.name === ".next" || entry.name === ".git") continue; const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { walk(fullPath, files); } else { files.push(fullPath); } } return files; } function toPosix(value) { return value.split(path.sep).join("/"); } function routeFromPageFile(file) { const rel = toPosix(path.relative(appDir, file)); if (!rel.endsWith("/page.tsx") && !rel.endsWith("/page.ts") && !rel.endsWith("/route.ts")) { return null; } const parts = rel.split("/"); parts.pop(); const routeParts = parts.filter((part) => { if (!part) return false; if (part.startsWith("(") && part.endsWith(")")) return false; return true; }); if (routeParts.some((part) => part.startsWith("[") && part.endsWith("]"))) return null; return "/" + routeParts.join("/"); } function collectRoutes() { const routes = new Set(["/"]); for (const file of walk(appDir)) { const route = routeFromPageFile(file); if (route) routes.add(route === "/" ? "/" : route.replace(/\/$/, "")); } for (const file of walk(publicDir)) { const rel = "/" + toPosix(path.relative(publicDir, file)); routes.add(rel); } return routes; } function normalizeHref(rawHref) { if (!rawHref) return null; let href = rawHref.trim(); if (!href || href.startsWith("#")) return null; if (/^(https?:|mailto:|tel:|sms:|javascript:|data:)/i.test(href)) return null; if (!href.startsWith("/")) return null; href = href.split("#")[0].split("?")[0]; if (href.length > 1) href = href.replace(/\/$/, ""); return href || "/"; } function isAllowedDynamicHref(href) { if (ignoredPrefixes.some((prefix) => href.startsWith(prefix))) return true; if (href.includes("[") || href.includes("${") || href.includes("`")) return true; return knownDynamicPrefixes.some((prefix) => href.startsWith(prefix) && href !== prefix.replace(/\/$/, "")); } function lineNumber(content, index) { return content.slice(0, index).split(/\r?\n/).length; } function extractHrefMatches(content) { const matches = []; const patterns = [ /href\s*=\s*["']([^"']+)["']/g, /href\s*=\s*{\s*["']([^"']+)["']\s*}/g, /router\.push\(\s*["']([^"']+)["']\s*\)/g, ]; for (const pattern of patterns) { let match; while ((match = pattern.exec(content)) !== null) { matches.push({ href: match[1], index: match.index }); } } return matches; } function extractAnchors(content) { const anchors = []; const linkPattern = /([\s\S]*?)<\/Link>/g; const anchorPattern = /([\s\S]*?)<\/a>/g; for (const pattern of [linkPattern, anchorPattern]) { let match; while ((match = pattern.exec(content)) !== null) { const href = match[1] || match[2]; const text = match[3] .replace(/<[^>]*>/g, " ") .replace(/\{[^}]*\}/g, " ") .replace(/\s+/g, " ") .trim(); anchors.push({ href, text, index: match.index }); } } return anchors; } function sourceFiles() { const files = []; for (const dir of scanDirs) { for (const file of walk(path.join(root, dir))) { if (sourceExtensions.has(path.extname(file))) files.push(file); } } return files; } function check() { const routes = collectRoutes(); for (const file of sourceFiles()) { const content = fs.readFileSync(file, "utf8"); const rel = toPosix(path.relative(root, file)); for (const item of extractHrefMatches(content)) { const href = normalizeHref(item.href); if (!href) continue; if (routes.has(href) || isAllowedDynamicHref(href)) continue; findings.push({ type: "broken-internal-link", file: rel, line: lineNumber(content, item.index), href, }); } for (const anchor of extractAnchors(content)) { const text = anchor.text || ""; if (!ctaPatterns.some((pattern) => pattern.test(text))) continue; const href = normalizeHref(anchor.href); const status = !href ? "external-or-non-http" : routes.has(href) || isAllowedDynamicHref(href) ? "ok" : "broken"; ctas.push({ file: rel, line: lineNumber(content, anchor.index), text: text.slice(0, 100), href: anchor.href, status, }); } } const brokenCtas = ctas.filter((cta) => cta.status === "broken"); const weakFiles = sourceFiles().filter((file) => { const rel = toPosix(path.relative(root, file)); if (!rel.includes("src/app/") || !rel.endsWith("/page.tsx")) return false; if (!rel.includes("(marketing)")) return false; if (rel.includes("[")) return false; if (nonConversionPageParts.some((part) => rel.includes(part))) return false; const content = fs.readFileSync(file, "utf8"); return !extractAnchors(content).some((anchor) => ctaPatterns.some((pattern) => pattern.test(anchor.text || "")), ); }); const report = { checkedAt: new Date().toISOString(), routeCount: routes.size, filesChecked: sourceFiles().length, brokenInternalLinks: findings, ctaSummary: { total: ctas.length, broken: brokenCtas.length, sample: ctas.slice(0, 50), }, pagesWithoutObviousCta: weakFiles.map((file) => toPosix(path.relative(root, file))).slice(0, 100), }; console.log(JSON.stringify(report, null, 2)); if (findings.length > 0 || brokenCtas.length > 0) { process.exitCode = 1; } } check();