Files
Greenlens/server/lib/plants.js
2026-04-02 11:39:57 +02:00

739 lines
20 KiB
JavaScript

const crypto = require('crypto');
const { all, get, run } = require('./postgres');
const { normalizeSearchText, rankHybridEntries } = require('./hybridSearch');
const DEFAULT_LIMIT = 60;
const MAX_LIMIT = 500;
const MAX_AUDIT_DETAILS = 80;
const WIKIMEDIA_FILEPATH_SEGMENT = 'Special:FilePath/';
const WIKIMEDIA_REDIRECT_BASE = 'https://commons.wikimedia.org/wiki/Special:FilePath/';
const WIKIMEDIA_SEARCH_PREFIX = 'wikimedia-search:';
const LOCAL_PLANT_IMAGE_PREFIX = '/plants/';
const LOCAL_PLANT_IMAGE_PATH_PATTERN = /^\/plants\/[A-Za-z0-9/_-]+\.[A-Za-z0-9]+$/;
class PlantImportValidationError extends Error {
constructor(message, details) {
super(message);
this.name = 'PlantImportValidationError';
this.details = details;
}
}
const normalizeWhitespace = (value) => {
return value.trim().replace(/\s+/g, ' ');
};
const normalizeKey = (value) => normalizeSearchText(normalizeWhitespace(value));
const unwrapMarkdownLink = (value) => {
const markdownMatch = value.match(/^\[[^\]]+]\((https?:\/\/[^)]+)\)(.*)$/i);
if (!markdownMatch) return value;
const [, url, suffix] = markdownMatch;
return `${url}${suffix || ''}`;
};
const tryDecode = (value) => {
try {
return decodeURIComponent(value);
} catch {
return value;
}
};
const decodeRepeatedly = (value, rounds = 3) => {
let current = value;
for (let index = 0; index < rounds; index += 1) {
const decoded = tryDecode(current);
if (decoded === current) break;
current = decoded;
}
return current;
};
const convertWikimediaFilePathUrl = (value) => {
const segmentIndex = value.indexOf(WIKIMEDIA_FILEPATH_SEGMENT);
if (segmentIndex < 0) return null;
const fileNameStart = segmentIndex + WIKIMEDIA_FILEPATH_SEGMENT.length;
const rawFileName = value.slice(fileNameStart).split(/[?#]/)[0].trim();
if (!rawFileName) return null;
const decodedFileName = tryDecode(rawFileName).replace(/\s+/g, ' ').trim();
if (!decodedFileName) return null;
const encodedFileName = encodeURIComponent(decodedFileName).replace(/%2F/g, '/');
return `${WIKIMEDIA_REDIRECT_BASE}${encodedFileName}`;
};
const toWikimediaFilePathUrl = (value) => {
if (typeof value !== 'string' || !value.includes('upload.wikimedia.org/wikipedia/commons/')) {
return null;
}
const cleanUrl = value.split(/[?#]/)[0];
const parts = cleanUrl.split('/').filter(Boolean);
if (parts.length < 2) return null;
let fileName = null;
const thumbIndex = parts.indexOf('thumb');
if (thumbIndex >= 0 && parts.length >= thumbIndex + 5) {
fileName = parts[parts.length - 2];
} else {
fileName = parts[parts.length - 1];
}
if (!fileName) return null;
const decoded = tryDecode(fileName).trim();
if (!decoded) return null;
return `${WIKIMEDIA_REDIRECT_BASE}${encodeURIComponent(decoded)}`;
};
const normalizeLocalImagePath = (value) => {
if (typeof value !== 'string') return null;
const trimmed = value.trim();
if (!trimmed) return null;
const withoutQuery = trimmed.split(/[?#]/)[0].replace(/\\/g, '/');
const withLeadingSlash = withoutQuery.startsWith('/') ? withoutQuery : `/${withoutQuery}`;
if (!withLeadingSlash.startsWith(LOCAL_PLANT_IMAGE_PREFIX)) return null;
if (withLeadingSlash.includes('..')) return null;
if (!LOCAL_PLANT_IMAGE_PATH_PATTERN.test(withLeadingSlash)) return null;
return withLeadingSlash;
};
const normalizeWikimediaSearchUri = (value) => {
if (typeof value !== 'string') return null;
const trimmed = value.trim();
if (!trimmed.toLowerCase().startsWith(WIKIMEDIA_SEARCH_PREFIX)) return null;
const rawQuery = trimmed.slice(WIKIMEDIA_SEARCH_PREFIX.length).trim();
if (!rawQuery) return null;
const normalizedQuery = normalizeWhitespace(decodeRepeatedly(rawQuery));
if (!normalizedQuery) return null;
return `${WIKIMEDIA_SEARCH_PREFIX}${encodeURIComponent(normalizedQuery)}`;
};
const normalizeImageUri = (rawUri) => {
if (typeof rawUri !== 'string') return null;
const trimmed = rawUri.trim();
if (!trimmed) return null;
const localPath = normalizeLocalImagePath(trimmed);
if (localPath) return localPath;
const wikimediaSearchUri = normalizeWikimediaSearchUri(trimmed);
if (wikimediaSearchUri) return wikimediaSearchUri;
const normalized = unwrapMarkdownLink(trimmed);
const converted = convertWikimediaFilePathUrl(normalized);
const candidate = (converted || normalized).replace(/^http:\/\//i, 'https://');
let parsedUrl;
try {
parsedUrl = new URL(candidate);
} catch {
return null;
}
const protocol = parsedUrl.protocol.toLowerCase();
if (protocol !== 'https:' && protocol !== 'http:') return null;
if (!parsedUrl.hostname) return null;
parsedUrl.protocol = 'https:';
return parsedUrl.toString();
};
const toArrayOfStrings = (value) => {
if (!Array.isArray(value)) return [];
const normalized = value
.map((item) => (typeof item === 'string' ? normalizeWhitespace(item) : ''))
.filter(Boolean);
return [...new Set(normalized)];
};
const parseNumber = (value, fallback) => {
const parsed = Number(value);
if (!Number.isFinite(parsed)) return fallback;
return parsed;
};
const buildStablePlantId = (botanicalName) => {
const hash = crypto
.createHash('sha1')
.update(normalizeKey(botanicalName))
.digest('hex')
.slice(0, 16);
return `plant_${hash}`;
};
const parseExistingIdMap = (rows) => {
const botanicalToId = new Map();
rows.forEach((row) => {
if (!row || typeof row.botanicalName !== 'string' || typeof row.id !== 'string') return;
botanicalToId.set(normalizeKey(row.botanicalName), row.id);
});
return botanicalToId;
};
const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => {
const errors = [];
const name = typeof rawEntry?.name === 'string' ? normalizeWhitespace(rawEntry.name) : '';
const botanicalName = typeof rawEntry?.botanicalName === 'string'
? normalizeWhitespace(rawEntry.botanicalName)
: '';
if (!name) {
errors.push({ index, field: 'name', message: 'name is required.' });
}
if (!botanicalName) {
errors.push({ index, field: 'botanicalName', message: 'botanicalName is required.' });
}
const normalizedBotanicalKey = botanicalName ? normalizeKey(botanicalName) : '';
const existingId = preserveExistingIds ? existingIdMap.get(normalizedBotanicalKey) : null;
const incomingId = typeof rawEntry?.id === 'string' ? normalizeWhitespace(rawEntry.id) : '';
const id = incomingId || existingId || (botanicalName ? buildStablePlantId(botanicalName) : '');
if (!id) {
errors.push({ index, field: 'id', message: 'Could not derive stable plant id.' });
}
const imageUri = normalizeImageUri(rawEntry?.imageUri);
if (!imageUri) {
errors.push({
index,
field: 'imageUri',
message: 'imageUri is missing or invalid. Use a valid http(s) URL, a local /plants/... path, or wikimedia-search:<query>.',
value: rawEntry?.imageUri ?? null,
});
}
const imageStatus = imageUri && imageUri.startsWith(WIKIMEDIA_SEARCH_PREFIX) ? 'pending' : 'ok';
const categories = toArrayOfStrings(rawEntry?.categories);
const confidence = parseNumber(rawEntry?.confidence, 1);
const clampedConfidence = Math.max(0, Math.min(1, Number(confidence.toFixed(4))));
const description = typeof rawEntry?.description === 'string' ? rawEntry.description.trim() : '';
const careInfoRaw = rawEntry?.careInfo || {};
const careInfo = {
waterIntervalDays: Math.max(1, Math.round(parseNumber(careInfoRaw.waterIntervalDays, 7))),
light: typeof careInfoRaw.light === 'string' && careInfoRaw.light.trim()
? normalizeWhitespace(careInfoRaw.light)
: 'Unknown',
temp: typeof careInfoRaw.temp === 'string' && careInfoRaw.temp.trim()
? normalizeWhitespace(careInfoRaw.temp)
: 'Unknown',
};
return {
entry: {
id,
name,
botanicalName,
imageUri,
imageStatus,
description,
categories,
careInfo,
confidence: clampedConfidence,
},
errors,
};
};
const collectDuplicateErrors = (entries, getKey, fieldName, message) => {
const counts = new Map();
entries.forEach((entry, index) => {
const key = getKey(entry);
if (!key) return;
const existing = counts.get(key) || [];
existing.push(index);
counts.set(key, existing);
});
const duplicateErrors = [];
counts.forEach((indices, key) => {
if (indices.length <= 1) return;
indices.forEach((index) => {
duplicateErrors.push({
index,
field: fieldName,
message,
value: key,
});
});
});
return duplicateErrors;
};
const assertValidPreparedEntries = (entries, enforceUniqueImages) => {
const duplicateErrors = [];
duplicateErrors.push(
...collectDuplicateErrors(
entries,
(entry) => entry.id,
'id',
'Duplicate plant id detected in import payload.',
),
);
duplicateErrors.push(
...collectDuplicateErrors(
entries,
(entry) => normalizeKey(entry.botanicalName),
'botanicalName',
'Duplicate botanicalName detected in import payload.',
),
);
if (enforceUniqueImages) {
duplicateErrors.push(
...collectDuplicateErrors(
entries,
(entry) => entry.imageUri,
'imageUri',
'Duplicate imageUri detected across multiple plants.',
),
);
}
if (duplicateErrors.length > 0) {
throw new PlantImportValidationError(
'Import payload contains duplicate keys.',
duplicateErrors.slice(0, MAX_AUDIT_DETAILS),
);
}
};
const parseJsonArray = (value) => {
if (!value) return [];
if (Array.isArray(value)) return value;
if (typeof value === 'string') {
try {
const parsed = JSON.parse(value);
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}
return [];
};
const parseJsonObject = (value) => {
if (!value) return {};
if (typeof value === 'object' && !Array.isArray(value)) return value;
if (typeof value === 'string') {
try {
const parsed = JSON.parse(value);
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {};
} catch {
return {};
}
}
return {};
};
const toApiPlant = (row) => {
const categories = parseJsonArray(row.categories);
const careInfo = parseJsonObject(row.careInfo);
const imageUri = toWikimediaFilePathUrl(row.imageUri) || row.imageUri;
return {
id: row.id,
name: row.name,
botanicalName: row.botanicalName,
imageUri,
imageStatus: row.imageStatus || 'ok',
description: row.description || '',
categories,
careInfo,
confidence: Number(row.confidence) || 0,
};
};
const ensurePlantSchema = async (db) => {
await run(
db,
`CREATE TABLE IF NOT EXISTS plants (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
botanical_name TEXT NOT NULL,
image_uri TEXT NOT NULL,
image_status TEXT NOT NULL DEFAULT 'ok',
description TEXT,
categories JSONB NOT NULL DEFAULT '[]'::jsonb,
care_info JSONB NOT NULL DEFAULT '{}'::jsonb,
confidence DOUBLE PRECISION NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
)`,
);
await run(
db,
`CREATE TABLE IF NOT EXISTS plant_import_audit (
id BIGSERIAL PRIMARY KEY,
source TEXT NOT NULL,
imported_count INTEGER NOT NULL DEFAULT 0,
preserved_ids INTEGER NOT NULL DEFAULT 0,
duplicate_image_count INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL,
details JSONB,
backup_table TEXT,
started_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ NOT NULL
)`,
);
await run(
db,
'CREATE UNIQUE INDEX IF NOT EXISTS idx_plants_botanical_name_unique ON plants (LOWER(botanical_name))',
);
await run(
db,
'CREATE INDEX IF NOT EXISTS idx_plants_name ON plants (LOWER(name))',
);
await run(
db,
'CREATE INDEX IF NOT EXISTS idx_plant_import_audit_started_at ON plant_import_audit (started_at DESC)',
);
};
const getPlants = async (db, options = {}) => {
const query = typeof options.query === 'string' ? options.query.trim() : '';
const category = typeof options.category === 'string' ? options.category.trim() : '';
const limitRaw = Number(options.limit);
const limit = Number.isFinite(limitRaw)
? Math.max(1, Math.min(MAX_LIMIT, Math.round(limitRaw)))
: DEFAULT_LIMIT;
const rows = await all(
db,
`SELECT
id,
name,
botanical_name AS "botanicalName",
image_uri AS "imageUri",
image_status AS "imageStatus",
description,
categories,
care_info AS "careInfo",
confidence
FROM plants
ORDER BY LOWER(name) ASC`,
);
let results = rows.map(toApiPlant);
if (category) {
results = results.filter((plant) => plant.categories.includes(category));
}
if (!query) {
return results.slice(0, limit);
}
return rankHybridEntries(results, query, limit)
.map((candidate) => candidate.entry);
};
const getPlantDiagnostics = async (db) => {
const totals = await get(
db,
`SELECT
COUNT(*) AS "totalCount",
SUM(CASE WHEN image_uri IS NULL OR BTRIM(image_uri) = '' THEN 1 ELSE 0 END) AS "missingImageCount",
SUM(CASE WHEN COALESCE(image_status, 'ok') <> 'ok' THEN 1 ELSE 0 END) AS "nonOkImageStatusCount"
FROM plants`,
);
const duplicateImages = await all(
db,
`SELECT image_uri AS "imageUri", COUNT(*) AS count
FROM plants
WHERE image_uri IS NOT NULL AND BTRIM(image_uri) <> ''
GROUP BY image_uri
HAVING COUNT(*) > 1
ORDER BY count DESC, image_uri ASC
LIMIT 200`,
);
const duplicateBotanicalNames = await all(
db,
`SELECT botanical_name AS "botanicalName", COUNT(*) AS count
FROM plants
WHERE botanical_name IS NOT NULL AND BTRIM(botanical_name) <> ''
GROUP BY LOWER(botanical_name), botanical_name
HAVING COUNT(*) > 1
ORDER BY count DESC, botanical_name ASC
LIMIT 200`,
);
const recentAudits = await all(
db,
`SELECT
id,
source,
imported_count AS "importedCount",
preserved_ids AS "preservedIds",
duplicate_image_count AS "duplicateImageCount",
status,
details,
backup_table AS "backupTable",
started_at AS "startedAt",
completed_at AS "completedAt"
FROM plant_import_audit
ORDER BY started_at DESC
LIMIT 20`,
);
return {
totalCount: Number(totals?.totalCount || 0),
missingImageCount: Number(totals?.missingImageCount || 0),
nonOkImageStatusCount: Number(totals?.nonOkImageStatusCount || 0),
duplicateImageCount: duplicateImages.length,
duplicateImages,
duplicateBotanicalNameCount: duplicateBotanicalNames.length,
duplicateBotanicalNames,
recentAudits: recentAudits.map((audit) => ({
...audit,
details: parseJsonObject(audit.details),
})),
};
};
const writeAuditRow = async (db, audit) => {
await run(
db,
`INSERT INTO plant_import_audit (
source,
imported_count,
preserved_ids,
duplicate_image_count,
status,
details,
backup_table,
started_at,
completed_at
) VALUES ($1, $2, $3, $4, $5, CAST($6 AS jsonb), $7, $8, $9)`,
[
audit.source,
audit.importedCount,
audit.preservedIds,
audit.duplicateImageCount,
audit.status,
JSON.stringify(audit.details || {}),
audit.backupTable || null,
audit.startedAt,
audit.completedAt,
],
);
};
const sanitizeIdentifier = (value) => {
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(value)) {
throw new Error(`Invalid SQL identifier: ${value}`);
}
return value;
};
const openTransaction = async (db) => {
if (typeof db.connect === 'function') {
const client = await db.connect();
await run(client, 'BEGIN');
return {
tx: client,
release: () => client.release(),
};
}
await run(db, 'BEGIN');
return {
tx: db,
release: () => {},
};
};
const rebuildPlantsCatalog = async (db, rawEntries, options = {}) => {
if (!Array.isArray(rawEntries)) {
throw new PlantImportValidationError('Import payload must be an array of entries.', [
{ field: 'entries', message: 'Expected an array of plant objects.' },
]);
}
const source = typeof options.source === 'string' && options.source.trim()
? options.source.trim()
: 'manual';
const preserveExistingIds = options.preserveExistingIds !== false;
const enforceUniqueImages = options.enforceUniqueImages !== false;
const startedAtIso = new Date().toISOString();
const existingRows = await all(
db,
'SELECT id, botanical_name AS "botanicalName" FROM plants',
);
const existingIdMap = parseExistingIdMap(existingRows);
const validationErrors = [];
const preparedEntries = rawEntries.map((rawEntry, index) => {
const prepared = prepareEntry(rawEntry, index, existingIdMap, preserveExistingIds);
if (prepared.errors.length > 0) {
validationErrors.push(...prepared.errors);
}
return prepared.entry;
});
if (validationErrors.length > 0) {
throw new PlantImportValidationError(
'Import payload failed validation checks.',
validationErrors.slice(0, MAX_AUDIT_DETAILS),
);
}
assertValidPreparedEntries(preparedEntries, enforceUniqueImages);
const preservedIds = preparedEntries.reduce((count, entry) => {
if (existingIdMap.get(normalizeKey(entry.botanicalName)) === entry.id) return count + 1;
return count;
}, 0);
const timestamp = startedAtIso.replace(/[-:.TZ]/g, '').slice(0, 14);
const backupTable = sanitizeIdentifier(`plants_backup_${timestamp}`);
const details = {
enforceUniqueImages,
preserveExistingIds,
inputCount: rawEntries.length,
preparedCount: preparedEntries.length,
};
const { tx, release } = await openTransaction(db);
try {
await run(tx, `DROP TABLE IF EXISTS ${backupTable}`);
await run(tx, `CREATE TABLE ${backupTable} AS SELECT * FROM plants`);
await run(tx, 'DELETE FROM plants');
for (const entry of preparedEntries) {
await run(
tx,
`INSERT INTO plants (
id,
name,
botanical_name,
image_uri,
image_status,
description,
categories,
care_info,
confidence,
created_at,
updated_at
) VALUES ($1, $2, $3, $4, $5, $6, CAST($7 AS jsonb), CAST($8 AS jsonb), $9, $10, $11)`,
[
entry.id,
entry.name,
entry.botanicalName,
entry.imageUri,
entry.imageStatus,
entry.description,
JSON.stringify(entry.categories),
JSON.stringify(entry.careInfo),
entry.confidence,
startedAtIso,
startedAtIso,
],
);
}
await run(
tx,
'CREATE UNIQUE INDEX IF NOT EXISTS idx_plants_botanical_name_unique ON plants (LOWER(botanical_name))',
);
if (enforceUniqueImages) {
await run(
tx,
'CREATE UNIQUE INDEX IF NOT EXISTS idx_plants_image_uri_unique ON plants (image_uri)',
);
} else {
await run(tx, 'DROP INDEX IF EXISTS idx_plants_image_uri_unique');
}
await run(tx, 'COMMIT');
} catch (error) {
try {
await run(tx, 'ROLLBACK');
} catch (rollbackError) {
console.error('Failed to rollback plant rebuild transaction.', rollbackError);
}
release();
const completedAtIso = new Date().toISOString();
await writeAuditRow(db, {
source,
importedCount: 0,
preservedIds: 0,
duplicateImageCount: 0,
status: 'failed',
details: {
...details,
error: error instanceof Error ? error.message : String(error),
},
backupTable: null,
startedAt: startedAtIso,
completedAt: completedAtIso,
});
throw error;
}
release();
const duplicateImages = await all(
db,
`SELECT image_uri AS "imageUri", COUNT(*) AS count
FROM plants
GROUP BY image_uri
HAVING COUNT(*) > 1`,
);
const completedAtIso = new Date().toISOString();
await writeAuditRow(db, {
source,
importedCount: preparedEntries.length,
preservedIds,
duplicateImageCount: duplicateImages.length,
status: 'success',
details,
backupTable,
startedAt: startedAtIso,
completedAt: completedAtIso,
});
return {
source,
importedCount: preparedEntries.length,
preservedIds,
duplicateImageCount: duplicateImages.length,
backupTable,
startedAt: startedAtIso,
completedAt: completedAtIso,
};
};
module.exports = {
PlantImportValidationError,
ensurePlantSchema,
getPlantDiagnostics,
getPlants,
normalizeKey,
normalizeImageUri,
toWikimediaFilePathUrl,
rebuildPlantsCatalog,
};