250 lines
6.6 KiB
JavaScript
250 lines
6.6 KiB
JavaScript
const fs = require("fs");
|
|
const path = require("path");
|
|
|
|
const root = process.cwd();
|
|
const scanDirs = ["src", "marketing", "articles", "blog-posts-improved"];
|
|
const sourceExtensions = new Set([".ts", ".tsx", ".js", ".jsx", ".md", ".mdx"]);
|
|
const publicDir = path.join(root, "public");
|
|
const appDir = path.join(root, "src", "app");
|
|
|
|
const ignoredPrefixes = [
|
|
"/api/",
|
|
"/_next/",
|
|
"/auth/",
|
|
"/r/",
|
|
"/qr/",
|
|
"/scan/",
|
|
];
|
|
|
|
const knownDynamicPrefixes = [
|
|
"/blog/",
|
|
"/learn/",
|
|
"/authors/",
|
|
"/qr-code-for/",
|
|
"/use-cases/",
|
|
];
|
|
|
|
const ctaPatterns = [
|
|
/get started/i,
|
|
/start free/i,
|
|
/try free/i,
|
|
/create.*qr/i,
|
|
/generate.*qr/i,
|
|
/sign up/i,
|
|
/pricing/i,
|
|
/upgrade/i,
|
|
/create.*free/i,
|
|
/start tracking/i,
|
|
/create.*editable/i,
|
|
];
|
|
|
|
const nonConversionPageParts = [
|
|
"/contact/",
|
|
"/cookie-policy/",
|
|
"/privacy/",
|
|
"/press/",
|
|
"/authors/",
|
|
"/blog/",
|
|
"/newsletter/",
|
|
];
|
|
|
|
const findings = [];
|
|
const ctas = [];
|
|
|
|
function walk(dir, files = []) {
|
|
if (!fs.existsSync(dir)) return files;
|
|
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
if (entry.name === "node_modules" || entry.name === ".next" || entry.name === ".git") continue;
|
|
const fullPath = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
walk(fullPath, files);
|
|
} else {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
return files;
|
|
}
|
|
|
|
function toPosix(value) {
|
|
return value.split(path.sep).join("/");
|
|
}
|
|
|
|
function routeFromPageFile(file) {
|
|
const rel = toPosix(path.relative(appDir, file));
|
|
if (!rel.endsWith("/page.tsx") && !rel.endsWith("/page.ts") && !rel.endsWith("/route.ts")) {
|
|
return null;
|
|
}
|
|
|
|
const parts = rel.split("/");
|
|
parts.pop();
|
|
const routeParts = parts.filter((part) => {
|
|
if (!part) return false;
|
|
if (part.startsWith("(") && part.endsWith(")")) return false;
|
|
return true;
|
|
});
|
|
|
|
if (routeParts.some((part) => part.startsWith("[") && part.endsWith("]"))) return null;
|
|
return "/" + routeParts.join("/");
|
|
}
|
|
|
|
function collectRoutes() {
|
|
const routes = new Set(["/"]);
|
|
for (const file of walk(appDir)) {
|
|
const route = routeFromPageFile(file);
|
|
if (route) routes.add(route === "/" ? "/" : route.replace(/\/$/, ""));
|
|
}
|
|
|
|
for (const file of walk(publicDir)) {
|
|
const rel = "/" + toPosix(path.relative(publicDir, file));
|
|
routes.add(rel);
|
|
}
|
|
|
|
return routes;
|
|
}
|
|
|
|
function normalizeHref(rawHref) {
|
|
if (!rawHref) return null;
|
|
let href = rawHref.trim();
|
|
if (!href || href.startsWith("#")) return null;
|
|
if (/^(https?:|mailto:|tel:|sms:|javascript:|data:)/i.test(href)) return null;
|
|
|
|
if (!href.startsWith("/")) return null;
|
|
href = href.split("#")[0].split("?")[0];
|
|
if (href.length > 1) href = href.replace(/\/$/, "");
|
|
return href || "/";
|
|
}
|
|
|
|
function isAllowedDynamicHref(href) {
|
|
if (ignoredPrefixes.some((prefix) => href.startsWith(prefix))) return true;
|
|
if (href.includes("[") || href.includes("${") || href.includes("`")) return true;
|
|
return knownDynamicPrefixes.some((prefix) => href.startsWith(prefix) && href !== prefix.replace(/\/$/, ""));
|
|
}
|
|
|
|
function lineNumber(content, index) {
|
|
return content.slice(0, index).split(/\r?\n/).length;
|
|
}
|
|
|
|
function extractHrefMatches(content) {
|
|
const matches = [];
|
|
const patterns = [
|
|
/href\s*=\s*["']([^"']+)["']/g,
|
|
/href\s*=\s*{\s*["']([^"']+)["']\s*}/g,
|
|
/router\.push\(\s*["']([^"']+)["']\s*\)/g,
|
|
];
|
|
|
|
for (const pattern of patterns) {
|
|
let match;
|
|
while ((match = pattern.exec(content)) !== null) {
|
|
matches.push({ href: match[1], index: match.index });
|
|
}
|
|
}
|
|
return matches;
|
|
}
|
|
|
|
function extractAnchors(content) {
|
|
const anchors = [];
|
|
const linkPattern = /<Link\b[\s\S]*?href\s*=\s*(?:["']([^"']+)["']|{\s*["']([^"']+)["']\s*})[\s\S]*?>([\s\S]*?)<\/Link>/g;
|
|
const anchorPattern = /<a\b[\s\S]*?href\s*=\s*(?:["']([^"']+)["']|{\s*["']([^"']+)["']\s*})[\s\S]*?>([\s\S]*?)<\/a>/g;
|
|
|
|
for (const pattern of [linkPattern, anchorPattern]) {
|
|
let match;
|
|
while ((match = pattern.exec(content)) !== null) {
|
|
const href = match[1] || match[2];
|
|
const text = match[3]
|
|
.replace(/<[^>]*>/g, " ")
|
|
.replace(/\{[^}]*\}/g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
anchors.push({ href, text, index: match.index });
|
|
}
|
|
}
|
|
return anchors;
|
|
}
|
|
|
|
function sourceFiles() {
|
|
const files = [];
|
|
for (const dir of scanDirs) {
|
|
for (const file of walk(path.join(root, dir))) {
|
|
if (sourceExtensions.has(path.extname(file))) files.push(file);
|
|
}
|
|
}
|
|
return files;
|
|
}
|
|
|
|
function check() {
|
|
const routes = collectRoutes();
|
|
|
|
for (const file of sourceFiles()) {
|
|
const content = fs.readFileSync(file, "utf8");
|
|
const rel = toPosix(path.relative(root, file));
|
|
|
|
for (const item of extractHrefMatches(content)) {
|
|
const href = normalizeHref(item.href);
|
|
if (!href) continue;
|
|
if (routes.has(href) || isAllowedDynamicHref(href)) continue;
|
|
|
|
findings.push({
|
|
type: "broken-internal-link",
|
|
file: rel,
|
|
line: lineNumber(content, item.index),
|
|
href,
|
|
});
|
|
}
|
|
|
|
for (const anchor of extractAnchors(content)) {
|
|
const text = anchor.text || "";
|
|
if (!ctaPatterns.some((pattern) => pattern.test(text))) continue;
|
|
|
|
const href = normalizeHref(anchor.href);
|
|
const status = !href
|
|
? "external-or-non-http"
|
|
: routes.has(href) || isAllowedDynamicHref(href)
|
|
? "ok"
|
|
: "broken";
|
|
|
|
ctas.push({
|
|
file: rel,
|
|
line: lineNumber(content, anchor.index),
|
|
text: text.slice(0, 100),
|
|
href: anchor.href,
|
|
status,
|
|
});
|
|
}
|
|
}
|
|
|
|
const brokenCtas = ctas.filter((cta) => cta.status === "broken");
|
|
const weakFiles = sourceFiles().filter((file) => {
|
|
const rel = toPosix(path.relative(root, file));
|
|
if (!rel.includes("src/app/") || !rel.endsWith("/page.tsx")) return false;
|
|
if (!rel.includes("(marketing)")) return false;
|
|
if (rel.includes("[")) return false;
|
|
if (nonConversionPageParts.some((part) => rel.includes(part))) return false;
|
|
|
|
const content = fs.readFileSync(file, "utf8");
|
|
return !extractAnchors(content).some((anchor) =>
|
|
ctaPatterns.some((pattern) => pattern.test(anchor.text || "")),
|
|
);
|
|
});
|
|
|
|
const report = {
|
|
checkedAt: new Date().toISOString(),
|
|
routeCount: routes.size,
|
|
filesChecked: sourceFiles().length,
|
|
brokenInternalLinks: findings,
|
|
ctaSummary: {
|
|
total: ctas.length,
|
|
broken: brokenCtas.length,
|
|
sample: ctas.slice(0, 50),
|
|
},
|
|
pagesWithoutObviousCta: weakFiles.map((file) => toPosix(path.relative(root, file))).slice(0, 100),
|
|
};
|
|
|
|
console.log(JSON.stringify(report, null, 2));
|
|
|
|
if (findings.length > 0 || brokenCtas.length > 0) {
|
|
process.exitCode = 1;
|
|
}
|
|
}
|
|
|
|
check();
|