Launch
This commit is contained in:
@@ -1,314 +1,314 @@
|
||||
#!/usr/bin/env node
|
||||
/* eslint-disable no-console */
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const vm = require('vm');
|
||||
const ts = require('typescript');
|
||||
|
||||
const ROOT_DIR = path.resolve(__dirname, '..');
|
||||
const OUTPUT_DIR = path.join(ROOT_DIR, 'audits', 'semantic-search');
|
||||
const CATEGORY_DIR = path.join(OUTPUT_DIR, 'categories');
|
||||
const ROOT_EXPORT_PATH = path.join(ROOT_DIR, 'all-plants-categories.csv');
|
||||
const BATCH_1_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch1.ts');
|
||||
const BATCH_2_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch2.ts');
|
||||
|
||||
const AUDIT_PRIORITY = [
|
||||
'pet_friendly',
|
||||
'air_purifier',
|
||||
'medicinal',
|
||||
'low_light',
|
||||
'bright_light',
|
||||
'sun',
|
||||
'easy',
|
||||
'high_humidity',
|
||||
'hanging',
|
||||
'tree',
|
||||
'large',
|
||||
'patterned',
|
||||
'flowering',
|
||||
'succulent',
|
||||
];
|
||||
|
||||
const HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES = new Set([
|
||||
'pet_friendly',
|
||||
'air_purifier',
|
||||
'medicinal',
|
||||
]);
|
||||
|
||||
const CATEGORY_DISPLAY_ORDER = [
|
||||
'easy',
|
||||
'pet_friendly',
|
||||
'flowering',
|
||||
'succulent',
|
||||
'patterned',
|
||||
'tree',
|
||||
'large',
|
||||
'medicinal',
|
||||
'hanging',
|
||||
'air_purifier',
|
||||
'low_light',
|
||||
'bright_light',
|
||||
'high_humidity',
|
||||
'sun',
|
||||
];
|
||||
|
||||
const resolveTsFilePath = (fromFile, specifier) => {
|
||||
if (!specifier.startsWith('.')) return null;
|
||||
const fromDirectory = path.dirname(fromFile);
|
||||
const absoluteBase = path.resolve(fromDirectory, specifier);
|
||||
const candidates = [
|
||||
absoluteBase,
|
||||
`${absoluteBase}.ts`,
|
||||
`${absoluteBase}.tsx`,
|
||||
path.join(absoluteBase, 'index.ts'),
|
||||
];
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const loadTsModule = (absolutePath, cache = new Map()) => {
|
||||
if (cache.has(absolutePath)) return cache.get(absolutePath);
|
||||
|
||||
const source = fs.readFileSync(absolutePath, 'utf8');
|
||||
const transpiled = ts.transpileModule(source, {
|
||||
compilerOptions: {
|
||||
module: ts.ModuleKind.CommonJS,
|
||||
target: ts.ScriptTarget.ES2020,
|
||||
esModuleInterop: true,
|
||||
jsx: ts.JsxEmit.ReactJSX,
|
||||
},
|
||||
fileName: absolutePath,
|
||||
reportDiagnostics: false,
|
||||
}).outputText;
|
||||
|
||||
const module = { exports: {} };
|
||||
cache.set(absolutePath, module.exports);
|
||||
|
||||
const localRequire = (specifier) => {
|
||||
const resolvedTsPath = resolveTsFilePath(absolutePath, specifier);
|
||||
if (resolvedTsPath) return loadTsModule(resolvedTsPath, cache);
|
||||
return require(specifier);
|
||||
};
|
||||
|
||||
const sandbox = {
|
||||
module,
|
||||
exports: module.exports,
|
||||
require: localRequire,
|
||||
__dirname: path.dirname(absolutePath),
|
||||
__filename: absolutePath,
|
||||
console,
|
||||
process,
|
||||
Buffer,
|
||||
setTimeout,
|
||||
clearTimeout,
|
||||
};
|
||||
|
||||
vm.runInNewContext(transpiled, sandbox, { filename: absolutePath });
|
||||
cache.set(absolutePath, module.exports);
|
||||
return module.exports;
|
||||
};
|
||||
|
||||
const ensureDir = (directoryPath) => {
|
||||
fs.mkdirSync(directoryPath, { recursive: true });
|
||||
};
|
||||
|
||||
const csvEscape = (value) => {
|
||||
const stringValue = String(value ?? '');
|
||||
if (/[",\n]/.test(stringValue)) {
|
||||
return `"${stringValue.replace(/"/g, '""')}"`;
|
||||
}
|
||||
return stringValue;
|
||||
};
|
||||
|
||||
const writeCsv = (filePath, rows) => {
|
||||
if (!rows.length) {
|
||||
fs.writeFileSync(filePath, '', 'utf8');
|
||||
return;
|
||||
}
|
||||
|
||||
const headers = Object.keys(rows[0]);
|
||||
const lines = [headers.join(',')];
|
||||
rows.forEach((row) => {
|
||||
lines.push(headers.map((header) => csvEscape(row[header])).join(','));
|
||||
});
|
||||
fs.writeFileSync(filePath, `${lines.join('\n')}\n`, 'utf8');
|
||||
};
|
||||
|
||||
const normalizeCategoryFilename = (category) => category.replace(/[^a-z0-9_-]+/gi, '-').toLowerCase();
|
||||
|
||||
const sortCategories = (categories = []) => (
|
||||
[...categories].sort((left, right) => {
|
||||
const leftIndex = CATEGORY_DISPLAY_ORDER.indexOf(left);
|
||||
const rightIndex = CATEGORY_DISPLAY_ORDER.indexOf(right);
|
||||
const normalizedLeft = leftIndex === -1 ? Number.MAX_SAFE_INTEGER : leftIndex;
|
||||
const normalizedRight = rightIndex === -1 ? Number.MAX_SAFE_INTEGER : rightIndex;
|
||||
return normalizedLeft - normalizedRight || left.localeCompare(right);
|
||||
})
|
||||
);
|
||||
|
||||
const buildRiskFlags = (entry) => {
|
||||
const categories = new Set(entry.categories || []);
|
||||
const flags = [];
|
||||
|
||||
if (categories.has('low_light') && categories.has('sun')) {
|
||||
flags.push('light_conflict_low_light_and_sun');
|
||||
}
|
||||
if (categories.has('low_light') && categories.has('bright_light')) {
|
||||
flags.push('light_conflict_low_light_and_bright_light');
|
||||
}
|
||||
if (categories.has('succulent') && categories.has('high_humidity')) {
|
||||
flags.push('succulent_high_humidity_combo_review');
|
||||
}
|
||||
|
||||
(entry.categories || []).forEach((category) => {
|
||||
if (HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES.has(category)) {
|
||||
flags.push(`${category}_requires_external_evidence`);
|
||||
}
|
||||
});
|
||||
|
||||
return [...new Set(flags)];
|
||||
};
|
||||
|
||||
const toAuditRow = (entry, category) => ({
|
||||
category,
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
description: entry.description || '',
|
||||
light: entry.careInfo?.light || '',
|
||||
temp: entry.careInfo?.temp || '',
|
||||
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
|
||||
all_categories: sortCategories(entry.categories || []).join('|'),
|
||||
risk_flags: buildRiskFlags(entry).join('|'),
|
||||
audit_status: '',
|
||||
evidence_source: '',
|
||||
evidence_url: '',
|
||||
notes: '',
|
||||
});
|
||||
|
||||
const toPlantCategoryRow = (entry) => ({
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
all_categories: sortCategories(entry.categories || []).join('|'),
|
||||
category_count: (entry.categories || []).length,
|
||||
description: entry.description || '',
|
||||
light: entry.careInfo?.light || '',
|
||||
temp: entry.careInfo?.temp || '',
|
||||
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
|
||||
});
|
||||
|
||||
const loadBatchEntries = () => {
|
||||
const batch1Entries = loadTsModule(BATCH_1_PATH).LEXICON_BATCH_1_ENTRIES;
|
||||
const batch2Entries = loadTsModule(BATCH_2_PATH).LEXICON_BATCH_2_ENTRIES;
|
||||
|
||||
if (!Array.isArray(batch1Entries) || !Array.isArray(batch2Entries)) {
|
||||
throw new Error('Could not load lexicon batch entries.');
|
||||
}
|
||||
|
||||
return [
|
||||
...batch1Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch1.ts', sourceIndex: index + 1 })),
|
||||
...batch2Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch2.ts', sourceIndex: index + 1 })),
|
||||
];
|
||||
};
|
||||
|
||||
const main = () => {
|
||||
ensureDir(CATEGORY_DIR);
|
||||
const entries = loadBatchEntries();
|
||||
const categories = [...new Set(entries.flatMap((entry) => entry.categories || []))].sort();
|
||||
|
||||
const summary = {
|
||||
generatedAt: new Date().toISOString(),
|
||||
totalEntries: entries.length,
|
||||
categories: categories.map((category) => ({
|
||||
category,
|
||||
count: entries.filter((entry) => (entry.categories || []).includes(category)).length,
|
||||
priority: AUDIT_PRIORITY.indexOf(category) >= 0 ? AUDIT_PRIORITY.indexOf(category) + 1 : 999,
|
||||
})).sort((left, right) =>
|
||||
left.priority - right.priority ||
|
||||
right.count - left.count ||
|
||||
left.category.localeCompare(right.category)),
|
||||
};
|
||||
|
||||
const plantCategoryRows = [...entries]
|
||||
.sort((left, right) =>
|
||||
left.botanicalName.localeCompare(right.botanicalName) ||
|
||||
left.name.localeCompare(right.name))
|
||||
.map((entry) => toPlantCategoryRow(entry));
|
||||
|
||||
const masterRows = [];
|
||||
const suspiciousRows = [];
|
||||
|
||||
categories.forEach((category) => {
|
||||
const categoryEntries = entries
|
||||
.filter((entry) => (entry.categories || []).includes(category))
|
||||
.sort((left, right) =>
|
||||
left.botanicalName.localeCompare(right.botanicalName) ||
|
||||
left.name.localeCompare(right.name));
|
||||
|
||||
const rows = categoryEntries.map((entry) => {
|
||||
const row = toAuditRow(entry, category);
|
||||
masterRows.push(row);
|
||||
|
||||
const riskFlags = row.risk_flags ? row.risk_flags.split('|').filter(Boolean) : [];
|
||||
if (riskFlags.length > 0) {
|
||||
suspiciousRows.push({
|
||||
category,
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
risk_flags: riskFlags.join('|'),
|
||||
});
|
||||
}
|
||||
|
||||
return row;
|
||||
});
|
||||
|
||||
writeCsv(path.join(CATEGORY_DIR, `${normalizeCategoryFilename(category)}.csv`), rows);
|
||||
});
|
||||
|
||||
writeCsv(path.join(OUTPUT_DIR, 'all-plants-categories.csv'), plantCategoryRows);
|
||||
writeCsv(ROOT_EXPORT_PATH, plantCategoryRows);
|
||||
writeCsv(path.join(OUTPUT_DIR, 'master.csv'), masterRows);
|
||||
writeCsv(path.join(OUTPUT_DIR, 'suspicious.csv'), suspiciousRows);
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`, 'utf8');
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'suspicious.json'), `${JSON.stringify(suspiciousRows, null, 2)}\n`, 'utf8');
|
||||
|
||||
const readme = `# Semantic Search Audit
|
||||
|
||||
Generated: ${summary.generatedAt}
|
||||
|
||||
Files:
|
||||
- \`summary.json\`: category counts and suggested audit order
|
||||
- \`all-plants-categories.csv\`: one row per plant with its full category list
|
||||
- \`master.csv\`: all category assignments with blank evidence columns
|
||||
- \`suspicious.csv\`: entries that require elevated review based on rule flags
|
||||
- \`categories/*.csv\`: per-category audit sheets
|
||||
|
||||
Suggested audit order:
|
||||
${summary.categories.map((item) => `- ${item.category} (${item.count})`).join('\n')}
|
||||
|
||||
Workflow:
|
||||
1. Review one category CSV at a time.
|
||||
2. Fill \`audit_status\`, \`evidence_source\`, \`evidence_url\`, and \`notes\`.
|
||||
3. Apply only high-confidence source-tag corrections to the lexicon batch files.
|
||||
4. Rebuild the server catalog from batches after source edits.
|
||||
`;
|
||||
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'README.md'), readme, 'utf8');
|
||||
|
||||
console.log(`Audit artifacts written to ${OUTPUT_DIR}`);
|
||||
console.log(`Categories exported: ${categories.length}`);
|
||||
console.log(`Suspicious rows flagged: ${suspiciousRows.length}`);
|
||||
};
|
||||
|
||||
main();
|
||||
#!/usr/bin/env node
|
||||
/* eslint-disable no-console */
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const vm = require('vm');
|
||||
const ts = require('typescript');
|
||||
|
||||
const ROOT_DIR = path.resolve(__dirname, '..');
|
||||
const OUTPUT_DIR = path.join(ROOT_DIR, 'audits', 'semantic-search');
|
||||
const CATEGORY_DIR = path.join(OUTPUT_DIR, 'categories');
|
||||
const ROOT_EXPORT_PATH = path.join(ROOT_DIR, 'all-plants-categories.csv');
|
||||
const BATCH_1_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch1.ts');
|
||||
const BATCH_2_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch2.ts');
|
||||
|
||||
const AUDIT_PRIORITY = [
|
||||
'pet_friendly',
|
||||
'air_purifier',
|
||||
'medicinal',
|
||||
'low_light',
|
||||
'bright_light',
|
||||
'sun',
|
||||
'easy',
|
||||
'high_humidity',
|
||||
'hanging',
|
||||
'tree',
|
||||
'large',
|
||||
'patterned',
|
||||
'flowering',
|
||||
'succulent',
|
||||
];
|
||||
|
||||
const HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES = new Set([
|
||||
'pet_friendly',
|
||||
'air_purifier',
|
||||
'medicinal',
|
||||
]);
|
||||
|
||||
const CATEGORY_DISPLAY_ORDER = [
|
||||
'easy',
|
||||
'pet_friendly',
|
||||
'flowering',
|
||||
'succulent',
|
||||
'patterned',
|
||||
'tree',
|
||||
'large',
|
||||
'medicinal',
|
||||
'hanging',
|
||||
'air_purifier',
|
||||
'low_light',
|
||||
'bright_light',
|
||||
'high_humidity',
|
||||
'sun',
|
||||
];
|
||||
|
||||
const resolveTsFilePath = (fromFile, specifier) => {
|
||||
if (!specifier.startsWith('.')) return null;
|
||||
const fromDirectory = path.dirname(fromFile);
|
||||
const absoluteBase = path.resolve(fromDirectory, specifier);
|
||||
const candidates = [
|
||||
absoluteBase,
|
||||
`${absoluteBase}.ts`,
|
||||
`${absoluteBase}.tsx`,
|
||||
path.join(absoluteBase, 'index.ts'),
|
||||
];
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const loadTsModule = (absolutePath, cache = new Map()) => {
|
||||
if (cache.has(absolutePath)) return cache.get(absolutePath);
|
||||
|
||||
const source = fs.readFileSync(absolutePath, 'utf8');
|
||||
const transpiled = ts.transpileModule(source, {
|
||||
compilerOptions: {
|
||||
module: ts.ModuleKind.CommonJS,
|
||||
target: ts.ScriptTarget.ES2020,
|
||||
esModuleInterop: true,
|
||||
jsx: ts.JsxEmit.ReactJSX,
|
||||
},
|
||||
fileName: absolutePath,
|
||||
reportDiagnostics: false,
|
||||
}).outputText;
|
||||
|
||||
const module = { exports: {} };
|
||||
cache.set(absolutePath, module.exports);
|
||||
|
||||
const localRequire = (specifier) => {
|
||||
const resolvedTsPath = resolveTsFilePath(absolutePath, specifier);
|
||||
if (resolvedTsPath) return loadTsModule(resolvedTsPath, cache);
|
||||
return require(specifier);
|
||||
};
|
||||
|
||||
const sandbox = {
|
||||
module,
|
||||
exports: module.exports,
|
||||
require: localRequire,
|
||||
__dirname: path.dirname(absolutePath),
|
||||
__filename: absolutePath,
|
||||
console,
|
||||
process,
|
||||
Buffer,
|
||||
setTimeout,
|
||||
clearTimeout,
|
||||
};
|
||||
|
||||
vm.runInNewContext(transpiled, sandbox, { filename: absolutePath });
|
||||
cache.set(absolutePath, module.exports);
|
||||
return module.exports;
|
||||
};
|
||||
|
||||
const ensureDir = (directoryPath) => {
|
||||
fs.mkdirSync(directoryPath, { recursive: true });
|
||||
};
|
||||
|
||||
const csvEscape = (value) => {
|
||||
const stringValue = String(value ?? '');
|
||||
if (/[",\n]/.test(stringValue)) {
|
||||
return `"${stringValue.replace(/"/g, '""')}"`;
|
||||
}
|
||||
return stringValue;
|
||||
};
|
||||
|
||||
const writeCsv = (filePath, rows) => {
|
||||
if (!rows.length) {
|
||||
fs.writeFileSync(filePath, '', 'utf8');
|
||||
return;
|
||||
}
|
||||
|
||||
const headers = Object.keys(rows[0]);
|
||||
const lines = [headers.join(',')];
|
||||
rows.forEach((row) => {
|
||||
lines.push(headers.map((header) => csvEscape(row[header])).join(','));
|
||||
});
|
||||
fs.writeFileSync(filePath, `${lines.join('\n')}\n`, 'utf8');
|
||||
};
|
||||
|
||||
const normalizeCategoryFilename = (category) => category.replace(/[^a-z0-9_-]+/gi, '-').toLowerCase();
|
||||
|
||||
const sortCategories = (categories = []) => (
|
||||
[...categories].sort((left, right) => {
|
||||
const leftIndex = CATEGORY_DISPLAY_ORDER.indexOf(left);
|
||||
const rightIndex = CATEGORY_DISPLAY_ORDER.indexOf(right);
|
||||
const normalizedLeft = leftIndex === -1 ? Number.MAX_SAFE_INTEGER : leftIndex;
|
||||
const normalizedRight = rightIndex === -1 ? Number.MAX_SAFE_INTEGER : rightIndex;
|
||||
return normalizedLeft - normalizedRight || left.localeCompare(right);
|
||||
})
|
||||
);
|
||||
|
||||
const buildRiskFlags = (entry) => {
|
||||
const categories = new Set(entry.categories || []);
|
||||
const flags = [];
|
||||
|
||||
if (categories.has('low_light') && categories.has('sun')) {
|
||||
flags.push('light_conflict_low_light_and_sun');
|
||||
}
|
||||
if (categories.has('low_light') && categories.has('bright_light')) {
|
||||
flags.push('light_conflict_low_light_and_bright_light');
|
||||
}
|
||||
if (categories.has('succulent') && categories.has('high_humidity')) {
|
||||
flags.push('succulent_high_humidity_combo_review');
|
||||
}
|
||||
|
||||
(entry.categories || []).forEach((category) => {
|
||||
if (HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES.has(category)) {
|
||||
flags.push(`${category}_requires_external_evidence`);
|
||||
}
|
||||
});
|
||||
|
||||
return [...new Set(flags)];
|
||||
};
|
||||
|
||||
const toAuditRow = (entry, category) => ({
|
||||
category,
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
description: entry.description || '',
|
||||
light: entry.careInfo?.light || '',
|
||||
temp: entry.careInfo?.temp || '',
|
||||
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
|
||||
all_categories: sortCategories(entry.categories || []).join('|'),
|
||||
risk_flags: buildRiskFlags(entry).join('|'),
|
||||
audit_status: '',
|
||||
evidence_source: '',
|
||||
evidence_url: '',
|
||||
notes: '',
|
||||
});
|
||||
|
||||
const toPlantCategoryRow = (entry) => ({
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
all_categories: sortCategories(entry.categories || []).join('|'),
|
||||
category_count: (entry.categories || []).length,
|
||||
description: entry.description || '',
|
||||
light: entry.careInfo?.light || '',
|
||||
temp: entry.careInfo?.temp || '',
|
||||
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
|
||||
});
|
||||
|
||||
const loadBatchEntries = () => {
|
||||
const batch1Entries = loadTsModule(BATCH_1_PATH).LEXICON_BATCH_1_ENTRIES;
|
||||
const batch2Entries = loadTsModule(BATCH_2_PATH).LEXICON_BATCH_2_ENTRIES;
|
||||
|
||||
if (!Array.isArray(batch1Entries) || !Array.isArray(batch2Entries)) {
|
||||
throw new Error('Could not load lexicon batch entries.');
|
||||
}
|
||||
|
||||
return [
|
||||
...batch1Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch1.ts', sourceIndex: index + 1 })),
|
||||
...batch2Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch2.ts', sourceIndex: index + 1 })),
|
||||
];
|
||||
};
|
||||
|
||||
const main = () => {
|
||||
ensureDir(CATEGORY_DIR);
|
||||
const entries = loadBatchEntries();
|
||||
const categories = [...new Set(entries.flatMap((entry) => entry.categories || []))].sort();
|
||||
|
||||
const summary = {
|
||||
generatedAt: new Date().toISOString(),
|
||||
totalEntries: entries.length,
|
||||
categories: categories.map((category) => ({
|
||||
category,
|
||||
count: entries.filter((entry) => (entry.categories || []).includes(category)).length,
|
||||
priority: AUDIT_PRIORITY.indexOf(category) >= 0 ? AUDIT_PRIORITY.indexOf(category) + 1 : 999,
|
||||
})).sort((left, right) =>
|
||||
left.priority - right.priority ||
|
||||
right.count - left.count ||
|
||||
left.category.localeCompare(right.category)),
|
||||
};
|
||||
|
||||
const plantCategoryRows = [...entries]
|
||||
.sort((left, right) =>
|
||||
left.botanicalName.localeCompare(right.botanicalName) ||
|
||||
left.name.localeCompare(right.name))
|
||||
.map((entry) => toPlantCategoryRow(entry));
|
||||
|
||||
const masterRows = [];
|
||||
const suspiciousRows = [];
|
||||
|
||||
categories.forEach((category) => {
|
||||
const categoryEntries = entries
|
||||
.filter((entry) => (entry.categories || []).includes(category))
|
||||
.sort((left, right) =>
|
||||
left.botanicalName.localeCompare(right.botanicalName) ||
|
||||
left.name.localeCompare(right.name));
|
||||
|
||||
const rows = categoryEntries.map((entry) => {
|
||||
const row = toAuditRow(entry, category);
|
||||
masterRows.push(row);
|
||||
|
||||
const riskFlags = row.risk_flags ? row.risk_flags.split('|').filter(Boolean) : [];
|
||||
if (riskFlags.length > 0) {
|
||||
suspiciousRows.push({
|
||||
category,
|
||||
source_file: entry.sourceFile,
|
||||
source_index: entry.sourceIndex,
|
||||
name: entry.name,
|
||||
botanical_name: entry.botanicalName,
|
||||
risk_flags: riskFlags.join('|'),
|
||||
});
|
||||
}
|
||||
|
||||
return row;
|
||||
});
|
||||
|
||||
writeCsv(path.join(CATEGORY_DIR, `${normalizeCategoryFilename(category)}.csv`), rows);
|
||||
});
|
||||
|
||||
writeCsv(path.join(OUTPUT_DIR, 'all-plants-categories.csv'), plantCategoryRows);
|
||||
writeCsv(ROOT_EXPORT_PATH, plantCategoryRows);
|
||||
writeCsv(path.join(OUTPUT_DIR, 'master.csv'), masterRows);
|
||||
writeCsv(path.join(OUTPUT_DIR, 'suspicious.csv'), suspiciousRows);
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`, 'utf8');
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'suspicious.json'), `${JSON.stringify(suspiciousRows, null, 2)}\n`, 'utf8');
|
||||
|
||||
const readme = `# Semantic Search Audit
|
||||
|
||||
Generated: ${summary.generatedAt}
|
||||
|
||||
Files:
|
||||
- \`summary.json\`: category counts and suggested audit order
|
||||
- \`all-plants-categories.csv\`: one row per plant with its full category list
|
||||
- \`master.csv\`: all category assignments with blank evidence columns
|
||||
- \`suspicious.csv\`: entries that require elevated review based on rule flags
|
||||
- \`categories/*.csv\`: per-category audit sheets
|
||||
|
||||
Suggested audit order:
|
||||
${summary.categories.map((item) => `- ${item.category} (${item.count})`).join('\n')}
|
||||
|
||||
Workflow:
|
||||
1. Review one category CSV at a time.
|
||||
2. Fill \`audit_status\`, \`evidence_source\`, \`evidence_url\`, and \`notes\`.
|
||||
3. Apply only high-confidence source-tag corrections to the lexicon batch files.
|
||||
4. Rebuild the server catalog from batches after source edits.
|
||||
`;
|
||||
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'README.md'), readme, 'utf8');
|
||||
|
||||
console.log(`Audit artifacts written to ${OUTPUT_DIR}`);
|
||||
console.log(`Categories exported: ${categories.length}`);
|
||||
console.log(`Suspicious rows flagged: ${suspiciousRows.length}`);
|
||||
};
|
||||
|
||||
main();
|
||||
|
||||
Reference in New Issue
Block a user