Initial commit for Greenlens

This commit is contained in:
Timo Knuth
2026-03-16 21:31:46 +01:00
parent 307135671f
commit 05d4f6e78b
573 changed files with 54233 additions and 1891 deletions

192
scripts/fix_images.js Normal file
View File

@@ -0,0 +1,192 @@
#!/usr/bin/env node
/**
* fix_images.js
* Finds broken image URLs in lexicon/catalog files and replaces them
* using Wikimedia Commons API.
*/
const fs = require('fs');
const https = require('https');
const FILES = [
'constants/lexiconBatch1.ts',
'constants/lexiconBatch2.ts',
'services/backend/mockCatalog.ts',
];
// Known manual fixes (botanicalName -> correct Wikimedia filename)
const MANUAL_FIXES = {
'Chlorophytum comosum': 'Chlorophytum_comosum_01.jpg',
'Syngonium podophyllum': 'Syngonium_podophyllum1.jpg',
'Fuchsia hybrida': 'Fuchsia_%27Beacon%27.jpg',
'Tillandsia usneoides': 'Tillandsia_usneoides_leaves.jpg',
'Tillandsia ionantha': 'Tillandsia_ionantha0.jpg',
};
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function httpGet(url) {
return new Promise((resolve, reject) => {
const req = https.get(url, {
headers: {
'User-Agent': 'GreenLens-ImageFixer/1.0 (educational plant app)'
}
}, (res) => {
if (res.statusCode === 301 || res.statusCode === 302) {
resolve(httpGet(res.headers.location));
return;
}
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => resolve({ status: res.statusCode, body: data }));
});
req.on('error', reject);
req.setTimeout(10000, () => {
req.destroy();
reject(new Error('timeout'));
});
});
}
function checkUrl(url) {
return new Promise((resolve) => {
const req = https.get(url, {
headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0' }
}, (res) => {
res.resume();
resolve(res.statusCode === 200);
});
req.on('error', () => resolve(false));
req.setTimeout(8000, () => { req.destroy(); resolve(false); });
});
}
async function searchWikimediaImage(botanicalName) {
const encoded = encodeURIComponent(botanicalName);
const url = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encoded}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=500&format=json`;
try {
const res = await httpGet(url);
if (res.status !== 200) return null;
const data = JSON.parse(res.body);
const pages = data.query && data.query.pages;
if (!pages) return null;
for (const page of Object.values(pages)) {
const info = page.imageinfo && page.imageinfo[0];
if (!info) continue;
const thumbUrl = info.thumburl || info.url;
if (thumbUrl && (thumbUrl.endsWith('.jpg') || thumbUrl.endsWith('.png') || thumbUrl.endsWith('.JPG') || thumbUrl.endsWith('.PNG'))) {
return thumbUrl;
}
}
} catch (e) {
console.error(` API error for "${botanicalName}": ${e.message}`);
}
return null;
}
function wikimediaThumbUrl(filename) {
// Build a 500px thumb URL from a bare filename
const name = filename.replace(/ /g, '_');
const hash = require('crypto').createHash('md5').update(name).digest('hex');
const d1 = hash[0];
const d2 = hash.substring(0, 2);
const ext = name.split('.').pop().toLowerCase();
const isJpg = ['jpg', 'jpeg'].includes(ext);
return `https://upload.wikimedia.org/wikipedia/commons/thumb/${d1}/${d2}/${name}/500px-${name}`;
}
function parseEntries(content) {
// Match blocks: find name, botanicalName, imageUri
const entries = [];
const regex = /name:\s*['"]([^'"]+)['"]\s*,[\s\S]*?botanicalName:\s*['"]([^'"]+)['"]\s*,[\s\S]*?imageUri:\s*['"]([^'"]+)['"]/g;
let m;
while ((m = regex.exec(content)) !== null) {
entries.push({
name: m[1],
botanicalName: m[2],
imageUri: m[3],
index: m.index,
});
}
return entries;
}
async function processFile(filepath) {
console.log(`\n=== Processing ${filepath} ===`);
let content = fs.readFileSync(filepath, 'utf8');
const entries = parseEntries(content);
console.log(`Found ${entries.length} entries`);
let fixCount = 0;
for (const entry of entries) {
const { name, botanicalName, imageUri } = entry;
// Check if URL is broken
process.stdout.write(` Checking ${botanicalName}... `);
const ok = await checkUrl(imageUri);
if (ok) {
console.log('OK');
await sleep(100);
continue;
}
console.log('BROKEN');
let newUrl = null;
// Check manual fixes first
if (MANUAL_FIXES[botanicalName]) {
const filename = MANUAL_FIXES[botanicalName];
const thumb = wikimediaThumbUrl(filename);
console.log(` -> Manual fix: ${thumb}`);
newUrl = thumb;
} else {
// Query Wikimedia Commons API
console.log(` -> Searching Wikimedia for "${botanicalName}"...`);
newUrl = await searchWikimediaImage(botanicalName);
if (newUrl) {
console.log(` -> Found: ${newUrl}`);
} else {
console.log(` -> No result found, skipping`);
}
}
if (newUrl) {
// Replace the old URL in content (escape for regex)
const escapedOld = imageUri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
content = content.replace(new RegExp(escapedOld, 'g'), newUrl);
fixCount++;
}
await sleep(200);
}
if (fixCount > 0) {
fs.writeFileSync(filepath, content, 'utf8');
console.log(` => Wrote ${fixCount} fixes to ${filepath}`);
} else {
console.log(` => No changes needed`);
}
return fixCount;
}
async function main() {
console.log('GreenLens Image URL Fixer');
console.log('========================');
let totalFixes = 0;
for (const file of FILES) {
if (!fs.existsSync(file)) {
console.log(`\nSkipping ${file} (not found)`);
continue;
}
totalFixes += await processFile(file);
}
console.log(`\nDone. Total fixes: ${totalFixes}`);
}
main().catch(console.error);

View File

@@ -0,0 +1,314 @@
#!/usr/bin/env node
/* eslint-disable no-console */
const fs = require('fs');
const path = require('path');
const vm = require('vm');
const ts = require('typescript');
const ROOT_DIR = path.resolve(__dirname, '..');
const OUTPUT_DIR = path.join(ROOT_DIR, 'audits', 'semantic-search');
const CATEGORY_DIR = path.join(OUTPUT_DIR, 'categories');
const ROOT_EXPORT_PATH = path.join(ROOT_DIR, 'all-plants-categories.csv');
const BATCH_1_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch1.ts');
const BATCH_2_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch2.ts');
const AUDIT_PRIORITY = [
'pet_friendly',
'air_purifier',
'medicinal',
'low_light',
'bright_light',
'sun',
'easy',
'high_humidity',
'hanging',
'tree',
'large',
'patterned',
'flowering',
'succulent',
];
const HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES = new Set([
'pet_friendly',
'air_purifier',
'medicinal',
]);
const CATEGORY_DISPLAY_ORDER = [
'easy',
'pet_friendly',
'flowering',
'succulent',
'patterned',
'tree',
'large',
'medicinal',
'hanging',
'air_purifier',
'low_light',
'bright_light',
'high_humidity',
'sun',
];
const resolveTsFilePath = (fromFile, specifier) => {
if (!specifier.startsWith('.')) return null;
const fromDirectory = path.dirname(fromFile);
const absoluteBase = path.resolve(fromDirectory, specifier);
const candidates = [
absoluteBase,
`${absoluteBase}.ts`,
`${absoluteBase}.tsx`,
path.join(absoluteBase, 'index.ts'),
];
for (const candidate of candidates) {
if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
return candidate;
}
}
return null;
};
const loadTsModule = (absolutePath, cache = new Map()) => {
if (cache.has(absolutePath)) return cache.get(absolutePath);
const source = fs.readFileSync(absolutePath, 'utf8');
const transpiled = ts.transpileModule(source, {
compilerOptions: {
module: ts.ModuleKind.CommonJS,
target: ts.ScriptTarget.ES2020,
esModuleInterop: true,
jsx: ts.JsxEmit.ReactJSX,
},
fileName: absolutePath,
reportDiagnostics: false,
}).outputText;
const module = { exports: {} };
cache.set(absolutePath, module.exports);
const localRequire = (specifier) => {
const resolvedTsPath = resolveTsFilePath(absolutePath, specifier);
if (resolvedTsPath) return loadTsModule(resolvedTsPath, cache);
return require(specifier);
};
const sandbox = {
module,
exports: module.exports,
require: localRequire,
__dirname: path.dirname(absolutePath),
__filename: absolutePath,
console,
process,
Buffer,
setTimeout,
clearTimeout,
};
vm.runInNewContext(transpiled, sandbox, { filename: absolutePath });
cache.set(absolutePath, module.exports);
return module.exports;
};
const ensureDir = (directoryPath) => {
fs.mkdirSync(directoryPath, { recursive: true });
};
const csvEscape = (value) => {
const stringValue = String(value ?? '');
if (/[",\n]/.test(stringValue)) {
return `"${stringValue.replace(/"/g, '""')}"`;
}
return stringValue;
};
const writeCsv = (filePath, rows) => {
if (!rows.length) {
fs.writeFileSync(filePath, '', 'utf8');
return;
}
const headers = Object.keys(rows[0]);
const lines = [headers.join(',')];
rows.forEach((row) => {
lines.push(headers.map((header) => csvEscape(row[header])).join(','));
});
fs.writeFileSync(filePath, `${lines.join('\n')}\n`, 'utf8');
};
const normalizeCategoryFilename = (category) => category.replace(/[^a-z0-9_-]+/gi, '-').toLowerCase();
const sortCategories = (categories = []) => (
[...categories].sort((left, right) => {
const leftIndex = CATEGORY_DISPLAY_ORDER.indexOf(left);
const rightIndex = CATEGORY_DISPLAY_ORDER.indexOf(right);
const normalizedLeft = leftIndex === -1 ? Number.MAX_SAFE_INTEGER : leftIndex;
const normalizedRight = rightIndex === -1 ? Number.MAX_SAFE_INTEGER : rightIndex;
return normalizedLeft - normalizedRight || left.localeCompare(right);
})
);
const buildRiskFlags = (entry) => {
const categories = new Set(entry.categories || []);
const flags = [];
if (categories.has('low_light') && categories.has('sun')) {
flags.push('light_conflict_low_light_and_sun');
}
if (categories.has('low_light') && categories.has('bright_light')) {
flags.push('light_conflict_low_light_and_bright_light');
}
if (categories.has('succulent') && categories.has('high_humidity')) {
flags.push('succulent_high_humidity_combo_review');
}
(entry.categories || []).forEach((category) => {
if (HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES.has(category)) {
flags.push(`${category}_requires_external_evidence`);
}
});
return [...new Set(flags)];
};
const toAuditRow = (entry, category) => ({
category,
source_file: entry.sourceFile,
source_index: entry.sourceIndex,
name: entry.name,
botanical_name: entry.botanicalName,
description: entry.description || '',
light: entry.careInfo?.light || '',
temp: entry.careInfo?.temp || '',
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
all_categories: sortCategories(entry.categories || []).join('|'),
risk_flags: buildRiskFlags(entry).join('|'),
audit_status: '',
evidence_source: '',
evidence_url: '',
notes: '',
});
const toPlantCategoryRow = (entry) => ({
source_file: entry.sourceFile,
source_index: entry.sourceIndex,
name: entry.name,
botanical_name: entry.botanicalName,
all_categories: sortCategories(entry.categories || []).join('|'),
category_count: (entry.categories || []).length,
description: entry.description || '',
light: entry.careInfo?.light || '',
temp: entry.careInfo?.temp || '',
water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
});
const loadBatchEntries = () => {
const batch1Entries = loadTsModule(BATCH_1_PATH).LEXICON_BATCH_1_ENTRIES;
const batch2Entries = loadTsModule(BATCH_2_PATH).LEXICON_BATCH_2_ENTRIES;
if (!Array.isArray(batch1Entries) || !Array.isArray(batch2Entries)) {
throw new Error('Could not load lexicon batch entries.');
}
return [
...batch1Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch1.ts', sourceIndex: index + 1 })),
...batch2Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch2.ts', sourceIndex: index + 1 })),
];
};
const main = () => {
ensureDir(CATEGORY_DIR);
const entries = loadBatchEntries();
const categories = [...new Set(entries.flatMap((entry) => entry.categories || []))].sort();
const summary = {
generatedAt: new Date().toISOString(),
totalEntries: entries.length,
categories: categories.map((category) => ({
category,
count: entries.filter((entry) => (entry.categories || []).includes(category)).length,
priority: AUDIT_PRIORITY.indexOf(category) >= 0 ? AUDIT_PRIORITY.indexOf(category) + 1 : 999,
})).sort((left, right) =>
left.priority - right.priority ||
right.count - left.count ||
left.category.localeCompare(right.category)),
};
const plantCategoryRows = [...entries]
.sort((left, right) =>
left.botanicalName.localeCompare(right.botanicalName) ||
left.name.localeCompare(right.name))
.map((entry) => toPlantCategoryRow(entry));
const masterRows = [];
const suspiciousRows = [];
categories.forEach((category) => {
const categoryEntries = entries
.filter((entry) => (entry.categories || []).includes(category))
.sort((left, right) =>
left.botanicalName.localeCompare(right.botanicalName) ||
left.name.localeCompare(right.name));
const rows = categoryEntries.map((entry) => {
const row = toAuditRow(entry, category);
masterRows.push(row);
const riskFlags = row.risk_flags ? row.risk_flags.split('|').filter(Boolean) : [];
if (riskFlags.length > 0) {
suspiciousRows.push({
category,
source_file: entry.sourceFile,
source_index: entry.sourceIndex,
name: entry.name,
botanical_name: entry.botanicalName,
risk_flags: riskFlags.join('|'),
});
}
return row;
});
writeCsv(path.join(CATEGORY_DIR, `${normalizeCategoryFilename(category)}.csv`), rows);
});
writeCsv(path.join(OUTPUT_DIR, 'all-plants-categories.csv'), plantCategoryRows);
writeCsv(ROOT_EXPORT_PATH, plantCategoryRows);
writeCsv(path.join(OUTPUT_DIR, 'master.csv'), masterRows);
writeCsv(path.join(OUTPUT_DIR, 'suspicious.csv'), suspiciousRows);
fs.writeFileSync(path.join(OUTPUT_DIR, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`, 'utf8');
fs.writeFileSync(path.join(OUTPUT_DIR, 'suspicious.json'), `${JSON.stringify(suspiciousRows, null, 2)}\n`, 'utf8');
const readme = `# Semantic Search Audit
Generated: ${summary.generatedAt}
Files:
- \`summary.json\`: category counts and suggested audit order
- \`all-plants-categories.csv\`: one row per plant with its full category list
- \`master.csv\`: all category assignments with blank evidence columns
- \`suspicious.csv\`: entries that require elevated review based on rule flags
- \`categories/*.csv\`: per-category audit sheets
Suggested audit order:
${summary.categories.map((item) => `- ${item.category} (${item.count})`).join('\n')}
Workflow:
1. Review one category CSV at a time.
2. Fill \`audit_status\`, \`evidence_source\`, \`evidence_url\`, and \`notes\`.
3. Apply only high-confidence source-tag corrections to the lexicon batch files.
4. Rebuild the server catalog from batches after source edits.
`;
fs.writeFileSync(path.join(OUTPUT_DIR, 'README.md'), readme, 'utf8');
console.log(`Audit artifacts written to ${OUTPUT_DIR}`);
console.log(`Categories exported: ${categories.length}`);
console.log(`Suspicious rows flagged: ${suspiciousRows.length}`);
};
main();

55
scripts/validate_all.ts Normal file
View File

@@ -0,0 +1,55 @@
import * as fs from 'fs';
import * as path from 'path';
// Using exact string parsing since importing the TS files directly in tsx could have issues if the environment isn't fully set up, but tsx should work. Let's just import them.
import { LEXICON_BATCH_1_ENTRIES } from '../constants/lexiconBatch1';
import { LEXICON_BATCH_2_ENTRIES } from '../constants/lexiconBatch2';
const allPlants = [...LEXICON_BATCH_1_ENTRIES, ...LEXICON_BATCH_2_ENTRIES];
async function checkUrl(url: string): Promise<boolean> {
const headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Referer': 'https://commons.wikimedia.org/'
};
try {
const response = await fetch(url, { method: 'GET', headers });
return response.status === 200;
} catch (error) {
return false;
}
}
async function run() {
console.log(`Checking ${allPlants.length} plants...`);
let failedCount = 0;
const concurrency = 10;
for (let i = 0; i < allPlants.length; i += concurrency) {
const batch = allPlants.slice(i, i + concurrency);
const results = await Promise.all(batch.map(async p => {
const ok = await checkUrl(p.imageUri);
return {
name: p.name,
url: p.imageUri,
ok
};
}));
for (const res of results) {
if (!res.ok) {
console.log(`❌ Failed: ${res.name} -> ${res.url}`);
failedCount++;
}
}
}
if (failedCount === 0) {
console.log("✅ All image URLs are reachable!");
} else {
console.log(`${failedCount} URLs failed.`);
}
}
run();