Initial commit for Greenlens

2026-03-16 21:31:46 +01:00
parent 307135671f
commit 05d4f6e78b
573 changed files with 54233 additions and 1891 deletions
--- a/scripts/fix_images.js
+++ b/scripts/fix_images.js
@@ -0,0 +1,192 @@
+#!/usr/bin/env node
+/**
+ * fix_images.js
+ * Finds broken image URLs in lexicon/catalog files and replaces them
+ * using Wikimedia Commons API.
+ */
+
+const fs = require('fs');
+const https = require('https');
+
+const FILES = [
+  'constants/lexiconBatch1.ts',
+  'constants/lexiconBatch2.ts',
+  'services/backend/mockCatalog.ts',
+];
+
+// Known manual fixes (botanicalName -> correct Wikimedia filename)
+const MANUAL_FIXES = {
+  'Chlorophytum comosum': 'Chlorophytum_comosum_01.jpg',
+  'Syngonium podophyllum': 'Syngonium_podophyllum1.jpg',
+  'Fuchsia hybrida': 'Fuchsia_%27Beacon%27.jpg',
+  'Tillandsia usneoides': 'Tillandsia_usneoides_leaves.jpg',
+  'Tillandsia ionantha': 'Tillandsia_ionantha0.jpg',
+};
+
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+function httpGet(url) {
+  return new Promise((resolve, reject) => {
+    const req = https.get(url, {
+      headers: {
+        'User-Agent': 'GreenLens-ImageFixer/1.0 (educational plant app)'
+      }
+    }, (res) => {
+      if (res.statusCode === 301 || res.statusCode === 302) {
+        resolve(httpGet(res.headers.location));
+        return;
+      }
+      let data = '';
+      res.on('data', chunk => data += chunk);
+      res.on('end', () => resolve({ status: res.statusCode, body: data }));
+    });
+    req.on('error', reject);
+    req.setTimeout(10000, () => {
+      req.destroy();
+      reject(new Error('timeout'));
+    });
+  });
+}
+
+function checkUrl(url) {
+  return new Promise((resolve) => {
+    const req = https.get(url, {
+      headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0' }
+    }, (res) => {
+      res.resume();
+      resolve(res.statusCode === 200);
+    });
+    req.on('error', () => resolve(false));
+    req.setTimeout(8000, () => { req.destroy(); resolve(false); });
+  });
+}
+
+async function searchWikimediaImage(botanicalName) {
+  const encoded = encodeURIComponent(botanicalName);
+  const url = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encoded}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=500&format=json`;
+
+  try {
+    const res = await httpGet(url);
+    if (res.status !== 200) return null;
+    const data = JSON.parse(res.body);
+    const pages = data.query && data.query.pages;
+    if (!pages) return null;
+
+    for (const page of Object.values(pages)) {
+      const info = page.imageinfo && page.imageinfo[0];
+      if (!info) continue;
+      const thumbUrl = info.thumburl || info.url;
+      if (thumbUrl && (thumbUrl.endsWith('.jpg') || thumbUrl.endsWith('.png') || thumbUrl.endsWith('.JPG') || thumbUrl.endsWith('.PNG'))) {
+        return thumbUrl;
+      }
+    }
+  } catch (e) {
+    console.error(`  API error for "${botanicalName}": ${e.message}`);
+  }
+  return null;
+}
+
+function wikimediaThumbUrl(filename) {
+  // Build a 500px thumb URL from a bare filename
+  const name = filename.replace(/ /g, '_');
+  const hash = require('crypto').createHash('md5').update(name).digest('hex');
+  const d1 = hash[0];
+  const d2 = hash.substring(0, 2);
+  const ext = name.split('.').pop().toLowerCase();
+  const isJpg = ['jpg', 'jpeg'].includes(ext);
+  return `https://upload.wikimedia.org/wikipedia/commons/thumb/${d1}/${d2}/${name}/500px-${name}`;
+}
+
+function parseEntries(content) {
+  // Match blocks: find name, botanicalName, imageUri
+  const entries = [];
+  const regex = /name:\s*['"]([^'"]+)['"]\s*,[\s\S]*?botanicalName:\s*['"]([^'"]+)['"]\s*,[\s\S]*?imageUri:\s*['"]([^'"]+)['"]/g;
+  let m;
+  while ((m = regex.exec(content)) !== null) {
+    entries.push({
+      name: m[1],
+      botanicalName: m[2],
+      imageUri: m[3],
+      index: m.index,
+    });
+  }
+  return entries;
+}
+
+async function processFile(filepath) {
+  console.log(`\n=== Processing ${filepath} ===`);
+  let content = fs.readFileSync(filepath, 'utf8');
+  const entries = parseEntries(content);
+  console.log(`Found ${entries.length} entries`);
+
+  let fixCount = 0;
+
+  for (const entry of entries) {
+    const { name, botanicalName, imageUri } = entry;
+
+    // Check if URL is broken
+    process.stdout.write(`  Checking ${botanicalName}... `);
+    const ok = await checkUrl(imageUri);
+    if (ok) {
+      console.log('OK');
+      await sleep(100);
+      continue;
+    }
+    console.log('BROKEN');
+
+    let newUrl = null;
+
+    // Check manual fixes first
+    if (MANUAL_FIXES[botanicalName]) {
+      const filename = MANUAL_FIXES[botanicalName];
+      const thumb = wikimediaThumbUrl(filename);
+      console.log(`  -> Manual fix: ${thumb}`);
+      newUrl = thumb;
+    } else {
+      // Query Wikimedia Commons API
+      console.log(`  -> Searching Wikimedia for "${botanicalName}"...`);
+      newUrl = await searchWikimediaImage(botanicalName);
+      if (newUrl) {
+        console.log(`  -> Found: ${newUrl}`);
+      } else {
+        console.log(`  -> No result found, skipping`);
+      }
+    }
+
+    if (newUrl) {
+      // Replace the old URL in content (escape for regex)
+      const escapedOld = imageUri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+      content = content.replace(new RegExp(escapedOld, 'g'), newUrl);
+      fixCount++;
+    }
+
+    await sleep(200);
+  }
+
+  if (fixCount > 0) {
+    fs.writeFileSync(filepath, content, 'utf8');
+    console.log(`  => Wrote ${fixCount} fixes to ${filepath}`);
+  } else {
+    console.log(`  => No changes needed`);
+  }
+
+  return fixCount;
+}
+
+async function main() {
+  console.log('GreenLens Image URL Fixer');
+  console.log('========================');
+  let totalFixes = 0;
+  for (const file of FILES) {
+    if (!fs.existsSync(file)) {
+      console.log(`\nSkipping ${file} (not found)`);
+      continue;
+    }
+    totalFixes += await processFile(file);
+  }
+  console.log(`\nDone. Total fixes: ${totalFixes}`);
+}
+
+main().catch(console.error);
--- a/scripts/generate_semantic_audit.js
+++ b/scripts/generate_semantic_audit.js
@@ -0,0 +1,314 @@
+#!/usr/bin/env node
+/* eslint-disable no-console */
+const fs = require('fs');
+const path = require('path');
+const vm = require('vm');
+const ts = require('typescript');
+
+const ROOT_DIR = path.resolve(__dirname, '..');
+const OUTPUT_DIR = path.join(ROOT_DIR, 'audits', 'semantic-search');
+const CATEGORY_DIR = path.join(OUTPUT_DIR, 'categories');
+const ROOT_EXPORT_PATH = path.join(ROOT_DIR, 'all-plants-categories.csv');
+const BATCH_1_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch1.ts');
+const BATCH_2_PATH = path.join(ROOT_DIR, 'constants', 'lexiconBatch2.ts');
+
+const AUDIT_PRIORITY = [
+  'pet_friendly',
+  'air_purifier',
+  'medicinal',
+  'low_light',
+  'bright_light',
+  'sun',
+  'easy',
+  'high_humidity',
+  'hanging',
+  'tree',
+  'large',
+  'patterned',
+  'flowering',
+  'succulent',
+];
+
+const HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES = new Set([
+  'pet_friendly',
+  'air_purifier',
+  'medicinal',
+]);
+
+const CATEGORY_DISPLAY_ORDER = [
+  'easy',
+  'pet_friendly',
+  'flowering',
+  'succulent',
+  'patterned',
+  'tree',
+  'large',
+  'medicinal',
+  'hanging',
+  'air_purifier',
+  'low_light',
+  'bright_light',
+  'high_humidity',
+  'sun',
+];
+
+const resolveTsFilePath = (fromFile, specifier) => {
+  if (!specifier.startsWith('.')) return null;
+  const fromDirectory = path.dirname(fromFile);
+  const absoluteBase = path.resolve(fromDirectory, specifier);
+  const candidates = [
+    absoluteBase,
+    `${absoluteBase}.ts`,
+    `${absoluteBase}.tsx`,
+    path.join(absoluteBase, 'index.ts'),
+  ];
+
+  for (const candidate of candidates) {
+    if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
+      return candidate;
+    }
+  }
+
+  return null;
+};
+
+const loadTsModule = (absolutePath, cache = new Map()) => {
+  if (cache.has(absolutePath)) return cache.get(absolutePath);
+
+  const source = fs.readFileSync(absolutePath, 'utf8');
+  const transpiled = ts.transpileModule(source, {
+    compilerOptions: {
+      module: ts.ModuleKind.CommonJS,
+      target: ts.ScriptTarget.ES2020,
+      esModuleInterop: true,
+      jsx: ts.JsxEmit.ReactJSX,
+    },
+    fileName: absolutePath,
+    reportDiagnostics: false,
+  }).outputText;
+
+  const module = { exports: {} };
+  cache.set(absolutePath, module.exports);
+
+  const localRequire = (specifier) => {
+    const resolvedTsPath = resolveTsFilePath(absolutePath, specifier);
+    if (resolvedTsPath) return loadTsModule(resolvedTsPath, cache);
+    return require(specifier);
+  };
+
+  const sandbox = {
+    module,
+    exports: module.exports,
+    require: localRequire,
+    __dirname: path.dirname(absolutePath),
+    __filename: absolutePath,
+    console,
+    process,
+    Buffer,
+    setTimeout,
+    clearTimeout,
+  };
+
+  vm.runInNewContext(transpiled, sandbox, { filename: absolutePath });
+  cache.set(absolutePath, module.exports);
+  return module.exports;
+};
+
+const ensureDir = (directoryPath) => {
+  fs.mkdirSync(directoryPath, { recursive: true });
+};
+
+const csvEscape = (value) => {
+  const stringValue = String(value ?? '');
+  if (/[",\n]/.test(stringValue)) {
+    return `"${stringValue.replace(/"/g, '""')}"`;
+  }
+  return stringValue;
+};
+
+const writeCsv = (filePath, rows) => {
+  if (!rows.length) {
+    fs.writeFileSync(filePath, '', 'utf8');
+    return;
+  }
+
+  const headers = Object.keys(rows[0]);
+  const lines = [headers.join(',')];
+  rows.forEach((row) => {
+    lines.push(headers.map((header) => csvEscape(row[header])).join(','));
+  });
+  fs.writeFileSync(filePath, `${lines.join('\n')}\n`, 'utf8');
+};
+
+const normalizeCategoryFilename = (category) => category.replace(/[^a-z0-9_-]+/gi, '-').toLowerCase();
+
+const sortCategories = (categories = []) => (
+  [...categories].sort((left, right) => {
+    const leftIndex = CATEGORY_DISPLAY_ORDER.indexOf(left);
+    const rightIndex = CATEGORY_DISPLAY_ORDER.indexOf(right);
+    const normalizedLeft = leftIndex === -1 ? Number.MAX_SAFE_INTEGER : leftIndex;
+    const normalizedRight = rightIndex === -1 ? Number.MAX_SAFE_INTEGER : rightIndex;
+    return normalizedLeft - normalizedRight || left.localeCompare(right);
+  })
+);
+
+const buildRiskFlags = (entry) => {
+  const categories = new Set(entry.categories || []);
+  const flags = [];
+
+  if (categories.has('low_light') && categories.has('sun')) {
+    flags.push('light_conflict_low_light_and_sun');
+  }
+  if (categories.has('low_light') && categories.has('bright_light')) {
+    flags.push('light_conflict_low_light_and_bright_light');
+  }
+  if (categories.has('succulent') && categories.has('high_humidity')) {
+    flags.push('succulent_high_humidity_combo_review');
+  }
+
+  (entry.categories || []).forEach((category) => {
+    if (HIGH_CONFIDENCE_MANUAL_REVIEW_CATEGORIES.has(category)) {
+      flags.push(`${category}_requires_external_evidence`);
+    }
+  });
+
+  return [...new Set(flags)];
+};
+
+const toAuditRow = (entry, category) => ({
+  category,
+  source_file: entry.sourceFile,
+  source_index: entry.sourceIndex,
+  name: entry.name,
+  botanical_name: entry.botanicalName,
+  description: entry.description || '',
+  light: entry.careInfo?.light || '',
+  temp: entry.careInfo?.temp || '',
+  water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
+  all_categories: sortCategories(entry.categories || []).join('|'),
+  risk_flags: buildRiskFlags(entry).join('|'),
+  audit_status: '',
+  evidence_source: '',
+  evidence_url: '',
+  notes: '',
+});
+
+const toPlantCategoryRow = (entry) => ({
+  source_file: entry.sourceFile,
+  source_index: entry.sourceIndex,
+  name: entry.name,
+  botanical_name: entry.botanicalName,
+  all_categories: sortCategories(entry.categories || []).join('|'),
+  category_count: (entry.categories || []).length,
+  description: entry.description || '',
+  light: entry.careInfo?.light || '',
+  temp: entry.careInfo?.temp || '',
+  water_interval_days: entry.careInfo?.waterIntervalDays ?? '',
+});
+
+const loadBatchEntries = () => {
+  const batch1Entries = loadTsModule(BATCH_1_PATH).LEXICON_BATCH_1_ENTRIES;
+  const batch2Entries = loadTsModule(BATCH_2_PATH).LEXICON_BATCH_2_ENTRIES;
+
+  if (!Array.isArray(batch1Entries) || !Array.isArray(batch2Entries)) {
+    throw new Error('Could not load lexicon batch entries.');
+  }
+
+  return [
+    ...batch1Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch1.ts', sourceIndex: index + 1 })),
+    ...batch2Entries.map((entry, index) => ({ ...entry, sourceFile: 'constants/lexiconBatch2.ts', sourceIndex: index + 1 })),
+  ];
+};
+
+const main = () => {
+  ensureDir(CATEGORY_DIR);
+  const entries = loadBatchEntries();
+  const categories = [...new Set(entries.flatMap((entry) => entry.categories || []))].sort();
+
+  const summary = {
+    generatedAt: new Date().toISOString(),
+    totalEntries: entries.length,
+    categories: categories.map((category) => ({
+      category,
+      count: entries.filter((entry) => (entry.categories || []).includes(category)).length,
+      priority: AUDIT_PRIORITY.indexOf(category) >= 0 ? AUDIT_PRIORITY.indexOf(category) + 1 : 999,
+    })).sort((left, right) =>
+      left.priority - right.priority ||
+      right.count - left.count ||
+      left.category.localeCompare(right.category)),
+  };
+
+  const plantCategoryRows = [...entries]
+    .sort((left, right) =>
+      left.botanicalName.localeCompare(right.botanicalName) ||
+      left.name.localeCompare(right.name))
+    .map((entry) => toPlantCategoryRow(entry));
+
+  const masterRows = [];
+  const suspiciousRows = [];
+
+  categories.forEach((category) => {
+    const categoryEntries = entries
+      .filter((entry) => (entry.categories || []).includes(category))
+      .sort((left, right) =>
+        left.botanicalName.localeCompare(right.botanicalName) ||
+        left.name.localeCompare(right.name));
+
+    const rows = categoryEntries.map((entry) => {
+      const row = toAuditRow(entry, category);
+      masterRows.push(row);
+
+      const riskFlags = row.risk_flags ? row.risk_flags.split('|').filter(Boolean) : [];
+      if (riskFlags.length > 0) {
+        suspiciousRows.push({
+          category,
+          source_file: entry.sourceFile,
+          source_index: entry.sourceIndex,
+          name: entry.name,
+          botanical_name: entry.botanicalName,
+          risk_flags: riskFlags.join('|'),
+        });
+      }
+
+      return row;
+    });
+
+    writeCsv(path.join(CATEGORY_DIR, `${normalizeCategoryFilename(category)}.csv`), rows);
+  });
+
+  writeCsv(path.join(OUTPUT_DIR, 'all-plants-categories.csv'), plantCategoryRows);
+  writeCsv(ROOT_EXPORT_PATH, plantCategoryRows);
+  writeCsv(path.join(OUTPUT_DIR, 'master.csv'), masterRows);
+  writeCsv(path.join(OUTPUT_DIR, 'suspicious.csv'), suspiciousRows);
+  fs.writeFileSync(path.join(OUTPUT_DIR, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`, 'utf8');
+  fs.writeFileSync(path.join(OUTPUT_DIR, 'suspicious.json'), `${JSON.stringify(suspiciousRows, null, 2)}\n`, 'utf8');
+
+  const readme = `# Semantic Search Audit
+
+Generated: ${summary.generatedAt}
+
+Files:
+- \`summary.json\`: category counts and suggested audit order
+- \`all-plants-categories.csv\`: one row per plant with its full category list
+- \`master.csv\`: all category assignments with blank evidence columns
+- \`suspicious.csv\`: entries that require elevated review based on rule flags
+- \`categories/*.csv\`: per-category audit sheets
+
+Suggested audit order:
+${summary.categories.map((item) => `- ${item.category} (${item.count})`).join('\n')}
+
+Workflow:
+1. Review one category CSV at a time.
+2. Fill \`audit_status\`, \`evidence_source\`, \`evidence_url\`, and \`notes\`.
+3. Apply only high-confidence source-tag corrections to the lexicon batch files.
+4. Rebuild the server catalog from batches after source edits.
+`;
+
+  fs.writeFileSync(path.join(OUTPUT_DIR, 'README.md'), readme, 'utf8');
+
+  console.log(`Audit artifacts written to ${OUTPUT_DIR}`);
+  console.log(`Categories exported: ${categories.length}`);
+  console.log(`Suspicious rows flagged: ${suspiciousRows.length}`);
+};
+
+main();
--- a/scripts/validate_all.ts
+++ b/scripts/validate_all.ts
@@ -0,0 +1,55 @@
+import * as fs from 'fs';
+import * as path from 'path';
+
+// Using exact string parsing since importing the TS files directly in tsx could have issues if the environment isn't fully set up, but tsx should work. Let's just import them.
+import { LEXICON_BATCH_1_ENTRIES } from '../constants/lexiconBatch1';
+import { LEXICON_BATCH_2_ENTRIES } from '../constants/lexiconBatch2';
+
+const allPlants = [...LEXICON_BATCH_1_ENTRIES, ...LEXICON_BATCH_2_ENTRIES];
+
+async function checkUrl(url: string): Promise<boolean> {
+    const headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+        'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
+        'Referer': 'https://commons.wikimedia.org/'
+    };
+    try {
+        const response = await fetch(url, { method: 'GET', headers });
+        return response.status === 200;
+    } catch (error) {
+        return false;
+    }
+}
+
+async function run() {
+    console.log(`Checking ${allPlants.length} plants...`);
+    let failedCount = 0;
+    const concurrency = 10;
+
+    for (let i = 0; i < allPlants.length; i += concurrency) {
+        const batch = allPlants.slice(i, i + concurrency);
+        const results = await Promise.all(batch.map(async p => {
+            const ok = await checkUrl(p.imageUri);
+            return {
+                name: p.name,
+                url: p.imageUri,
+                ok
+            };
+        }));
+
+        for (const res of results) {
+            if (!res.ok) {
+                console.log(`❌ Failed: ${res.name} -> ${res.url}`);
+                failedCount++;
+            }
+        }
+    }
+
+    if (failedCount === 0) {
+        console.log("✅ All image URLs are reachable!");
+    } else {
+        console.log(`❌ ${failedCount} URLs failed.`);
+    }
+}
+
+run();