Initial commit for Greenlens

This commit is contained in:
Timo Knuth
2026-03-16 21:31:46 +01:00
parent 307135671f
commit 05d4f6e78b
573 changed files with 54233 additions and 1891 deletions

View File

@@ -1,11 +1,15 @@
const crypto = require('crypto');
const { all, get, run } = require('./sqlite');
const { normalizeSearchText, rankHybridEntries } = require('./hybridSearch');
const DEFAULT_LIMIT = 60;
const MAX_LIMIT = 500;
const MAX_AUDIT_DETAILS = 80;
const WIKIMEDIA_FILEPATH_SEGMENT = 'Special:FilePath/';
const WIKIMEDIA_REDIRECT_BASE = 'https://commons.wikimedia.org/wiki/Special:FilePath/';
const WIKIMEDIA_SEARCH_PREFIX = 'wikimedia-search:';
const LOCAL_PLANT_IMAGE_PREFIX = '/plants/';
const LOCAL_PLANT_IMAGE_PATH_PATTERN = /^\/plants\/[A-Za-z0-9/_-]+\.[A-Za-z0-9]+$/;
class PlantImportValidationError extends Error {
constructor(message, details) {
@@ -19,12 +23,7 @@ const normalizeWhitespace = (value) => {
return value.trim().replace(/\s+/g, ' ');
};
const normalizeKey = (value) => {
return normalizeWhitespace(value)
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '');
};
const normalizeKey = (value) => normalizeSearchText(normalizeWhitespace(value));
const unwrapMarkdownLink = (value) => {
const markdownMatch = value.match(/^\[[^\]]+]\((https?:\/\/[^)]+)\)(.*)$/i);
@@ -41,6 +40,16 @@ const tryDecode = (value) => {
}
};
const decodeRepeatedly = (value, rounds = 3) => {
let current = value;
for (let index = 0; index < rounds; index += 1) {
const decoded = tryDecode(current);
if (decoded === current) break;
current = decoded;
}
return current;
};
const convertWikimediaFilePathUrl = (value) => {
const segmentIndex = value.indexOf(WIKIMEDIA_FILEPATH_SEGMENT);
if (segmentIndex < 0) return null;
@@ -55,12 +64,75 @@ const convertWikimediaFilePathUrl = (value) => {
return `${WIKIMEDIA_REDIRECT_BASE}${encodedFileName}`;
};
const toWikimediaFilePathUrl = (value) => {
if (typeof value !== 'string' || !value.includes('upload.wikimedia.org/wikipedia/commons/')) {
return null;
}
const cleanUrl = value.split(/[?#]/)[0];
const parts = cleanUrl.split('/').filter(Boolean);
if (parts.length < 2) return null;
let fileName = null;
const thumbIndex = parts.indexOf('thumb');
if (thumbIndex >= 0 && parts.length >= thumbIndex + 5) {
fileName = parts[parts.length - 2];
} else {
fileName = parts[parts.length - 1];
}
if (!fileName) return null;
const decoded = tryDecode(fileName).trim();
if (!decoded) return null;
return `${WIKIMEDIA_REDIRECT_BASE}${encodeURIComponent(decoded)}`;
};
const normalizeLocalImagePath = (value) => {
if (typeof value !== 'string') return null;
const trimmed = value.trim();
if (!trimmed) return null;
const withoutQuery = trimmed.split(/[?#]/)[0].replace(/\\/g, '/');
const withLeadingSlash = withoutQuery.startsWith('/') ? withoutQuery : `/${withoutQuery}`;
if (!withLeadingSlash.startsWith(LOCAL_PLANT_IMAGE_PREFIX)) return null;
if (withLeadingSlash.includes('..')) return null;
if (!LOCAL_PLANT_IMAGE_PATH_PATTERN.test(withLeadingSlash)) return null;
return withLeadingSlash;
};
const normalizeWikimediaSearchUri = (value) => {
if (typeof value !== 'string') return null;
const trimmed = value.trim();
if (!trimmed.toLowerCase().startsWith(WIKIMEDIA_SEARCH_PREFIX)) return null;
const rawQuery = trimmed.slice(WIKIMEDIA_SEARCH_PREFIX.length).trim();
if (!rawQuery) return null;
const normalizedQuery = normalizeWhitespace(decodeRepeatedly(rawQuery));
if (!normalizedQuery) return null;
return `${WIKIMEDIA_SEARCH_PREFIX}${encodeURIComponent(normalizedQuery)}`;
};
const normalizeImageUri = (rawUri) => {
if (typeof rawUri !== 'string') return null;
const trimmed = rawUri.trim();
if (!trimmed) return null;
const localPath = normalizeLocalImagePath(trimmed);
if (localPath) return localPath;
const wikimediaSearchUri = normalizeWikimediaSearchUri(trimmed);
if (wikimediaSearchUri) return wikimediaSearchUri;
const normalized = unwrapMarkdownLink(trimmed);
const converted = convertWikimediaFilePathUrl(normalized);
const candidate = (converted || normalized).replace(/^http:\/\//i, 'https://');
@@ -142,10 +214,11 @@ const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => {
errors.push({
index,
field: 'imageUri',
message: 'imageUri is missing or invalid. A valid http(s) URL is required.',
message: 'imageUri is missing or invalid. Use a valid http(s) URL, a local /plants/... path, or wikimedia-search:<query>.',
value: rawEntry?.imageUri ?? null,
});
}
const imageStatus = imageUri && imageUri.startsWith(WIKIMEDIA_SEARCH_PREFIX) ? 'pending' : 'ok';
const categories = toArrayOfStrings(rawEntry?.categories);
const confidence = parseNumber(rawEntry?.confidence, 1);
@@ -168,7 +241,7 @@ const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => {
name,
botanicalName,
imageUri,
imageStatus: 'ok',
imageStatus,
description,
categories,
careInfo,
@@ -335,11 +408,12 @@ const parseJsonObject = (value) => {
const toApiPlant = (row) => {
const categories = parseJsonArray(row.categories);
const careInfo = parseJsonObject(row.careInfo);
const imageUri = toWikimediaFilePathUrl(row.imageUri) || row.imageUri;
return {
id: row.id,
name: row.name,
botanicalName: row.botanicalName,
imageUri: row.imageUri,
imageUri,
imageStatus: row.imageStatus || 'ok',
description: row.description || '',
categories,
@@ -349,7 +423,7 @@ const toApiPlant = (row) => {
};
const getPlants = async (db, options = {}) => {
const query = typeof options.query === 'string' ? options.query.trim().toLowerCase() : '';
const query = typeof options.query === 'string' ? options.query.trim() : '';
const category = typeof options.category === 'string' ? options.category.trim() : '';
const limitRaw = Number(options.limit);
const limit = Number.isFinite(limitRaw)
@@ -368,15 +442,6 @@ const getPlants = async (db, options = {}) => {
confidence
FROM plants`;
const params = [];
if (query) {
sql += ` WHERE (
LOWER(name) LIKE ?
OR LOWER(botanicalName) LIKE ?
OR LOWER(COALESCE(description, '')) LIKE ?
)`;
const likePattern = `%${query}%`;
params.push(likePattern, likePattern, likePattern);
}
sql += ' ORDER BY name COLLATE NOCASE ASC';
const rows = await all(db, sql, params);
@@ -386,7 +451,12 @@ const getPlants = async (db, options = {}) => {
results = results.filter((plant) => plant.categories.includes(category));
}
return results.slice(0, limit);
if (!query) {
return results.slice(0, limit);
}
return rankHybridEntries(results, query, limit)
.map((candidate) => candidate.entry);
};
const getPlantDiagnostics = async (db) => {
@@ -565,7 +635,7 @@ const rebuildPlantsCatalog = async (db, rawEntries, options = {}) => {
entry.name,
entry.botanicalName,
entry.imageUri,
'ok',
entry.imageStatus,
entry.description,
JSON.stringify(entry.categories),
JSON.stringify(entry.careInfo),
@@ -647,6 +717,8 @@ module.exports = {
ensurePlantSchema,
getPlantDiagnostics,
getPlants,
normalizeKey,
normalizeImageUri,
toWikimediaFilePathUrl,
rebuildPlantsCatalog,
};