Initial commit for Greenlens
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
const crypto = require('crypto');
|
||||
const { all, get, run } = require('./sqlite');
|
||||
const { normalizeSearchText, rankHybridEntries } = require('./hybridSearch');
|
||||
|
||||
const DEFAULT_LIMIT = 60;
|
||||
const MAX_LIMIT = 500;
|
||||
const MAX_AUDIT_DETAILS = 80;
|
||||
const WIKIMEDIA_FILEPATH_SEGMENT = 'Special:FilePath/';
|
||||
const WIKIMEDIA_REDIRECT_BASE = 'https://commons.wikimedia.org/wiki/Special:FilePath/';
|
||||
const WIKIMEDIA_SEARCH_PREFIX = 'wikimedia-search:';
|
||||
const LOCAL_PLANT_IMAGE_PREFIX = '/plants/';
|
||||
const LOCAL_PLANT_IMAGE_PATH_PATTERN = /^\/plants\/[A-Za-z0-9/_-]+\.[A-Za-z0-9]+$/;
|
||||
|
||||
class PlantImportValidationError extends Error {
|
||||
constructor(message, details) {
|
||||
@@ -19,12 +23,7 @@ const normalizeWhitespace = (value) => {
|
||||
return value.trim().replace(/\s+/g, ' ');
|
||||
};
|
||||
|
||||
const normalizeKey = (value) => {
|
||||
return normalizeWhitespace(value)
|
||||
.toLowerCase()
|
||||
.normalize('NFD')
|
||||
.replace(/[\u0300-\u036f]/g, '');
|
||||
};
|
||||
const normalizeKey = (value) => normalizeSearchText(normalizeWhitespace(value));
|
||||
|
||||
const unwrapMarkdownLink = (value) => {
|
||||
const markdownMatch = value.match(/^\[[^\]]+]\((https?:\/\/[^)]+)\)(.*)$/i);
|
||||
@@ -41,6 +40,16 @@ const tryDecode = (value) => {
|
||||
}
|
||||
};
|
||||
|
||||
const decodeRepeatedly = (value, rounds = 3) => {
|
||||
let current = value;
|
||||
for (let index = 0; index < rounds; index += 1) {
|
||||
const decoded = tryDecode(current);
|
||||
if (decoded === current) break;
|
||||
current = decoded;
|
||||
}
|
||||
return current;
|
||||
};
|
||||
|
||||
const convertWikimediaFilePathUrl = (value) => {
|
||||
const segmentIndex = value.indexOf(WIKIMEDIA_FILEPATH_SEGMENT);
|
||||
if (segmentIndex < 0) return null;
|
||||
@@ -55,12 +64,75 @@ const convertWikimediaFilePathUrl = (value) => {
|
||||
return `${WIKIMEDIA_REDIRECT_BASE}${encodedFileName}`;
|
||||
};
|
||||
|
||||
const toWikimediaFilePathUrl = (value) => {
|
||||
if (typeof value !== 'string' || !value.includes('upload.wikimedia.org/wikipedia/commons/')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cleanUrl = value.split(/[?#]/)[0];
|
||||
const parts = cleanUrl.split('/').filter(Boolean);
|
||||
if (parts.length < 2) return null;
|
||||
|
||||
let fileName = null;
|
||||
const thumbIndex = parts.indexOf('thumb');
|
||||
|
||||
if (thumbIndex >= 0 && parts.length >= thumbIndex + 5) {
|
||||
fileName = parts[parts.length - 2];
|
||||
} else {
|
||||
fileName = parts[parts.length - 1];
|
||||
}
|
||||
|
||||
if (!fileName) return null;
|
||||
|
||||
const decoded = tryDecode(fileName).trim();
|
||||
if (!decoded) return null;
|
||||
|
||||
return `${WIKIMEDIA_REDIRECT_BASE}${encodeURIComponent(decoded)}`;
|
||||
};
|
||||
|
||||
const normalizeLocalImagePath = (value) => {
|
||||
if (typeof value !== 'string') return null;
|
||||
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const withoutQuery = trimmed.split(/[?#]/)[0].replace(/\\/g, '/');
|
||||
const withLeadingSlash = withoutQuery.startsWith('/') ? withoutQuery : `/${withoutQuery}`;
|
||||
|
||||
if (!withLeadingSlash.startsWith(LOCAL_PLANT_IMAGE_PREFIX)) return null;
|
||||
if (withLeadingSlash.includes('..')) return null;
|
||||
if (!LOCAL_PLANT_IMAGE_PATH_PATTERN.test(withLeadingSlash)) return null;
|
||||
|
||||
return withLeadingSlash;
|
||||
};
|
||||
|
||||
const normalizeWikimediaSearchUri = (value) => {
|
||||
if (typeof value !== 'string') return null;
|
||||
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed.toLowerCase().startsWith(WIKIMEDIA_SEARCH_PREFIX)) return null;
|
||||
|
||||
const rawQuery = trimmed.slice(WIKIMEDIA_SEARCH_PREFIX.length).trim();
|
||||
if (!rawQuery) return null;
|
||||
|
||||
const normalizedQuery = normalizeWhitespace(decodeRepeatedly(rawQuery));
|
||||
if (!normalizedQuery) return null;
|
||||
|
||||
return `${WIKIMEDIA_SEARCH_PREFIX}${encodeURIComponent(normalizedQuery)}`;
|
||||
};
|
||||
|
||||
const normalizeImageUri = (rawUri) => {
|
||||
if (typeof rawUri !== 'string') return null;
|
||||
|
||||
const trimmed = rawUri.trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const localPath = normalizeLocalImagePath(trimmed);
|
||||
if (localPath) return localPath;
|
||||
|
||||
const wikimediaSearchUri = normalizeWikimediaSearchUri(trimmed);
|
||||
if (wikimediaSearchUri) return wikimediaSearchUri;
|
||||
|
||||
const normalized = unwrapMarkdownLink(trimmed);
|
||||
const converted = convertWikimediaFilePathUrl(normalized);
|
||||
const candidate = (converted || normalized).replace(/^http:\/\//i, 'https://');
|
||||
@@ -142,10 +214,11 @@ const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => {
|
||||
errors.push({
|
||||
index,
|
||||
field: 'imageUri',
|
||||
message: 'imageUri is missing or invalid. A valid http(s) URL is required.',
|
||||
message: 'imageUri is missing or invalid. Use a valid http(s) URL, a local /plants/... path, or wikimedia-search:<query>.',
|
||||
value: rawEntry?.imageUri ?? null,
|
||||
});
|
||||
}
|
||||
const imageStatus = imageUri && imageUri.startsWith(WIKIMEDIA_SEARCH_PREFIX) ? 'pending' : 'ok';
|
||||
|
||||
const categories = toArrayOfStrings(rawEntry?.categories);
|
||||
const confidence = parseNumber(rawEntry?.confidence, 1);
|
||||
@@ -168,7 +241,7 @@ const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => {
|
||||
name,
|
||||
botanicalName,
|
||||
imageUri,
|
||||
imageStatus: 'ok',
|
||||
imageStatus,
|
||||
description,
|
||||
categories,
|
||||
careInfo,
|
||||
@@ -335,11 +408,12 @@ const parseJsonObject = (value) => {
|
||||
const toApiPlant = (row) => {
|
||||
const categories = parseJsonArray(row.categories);
|
||||
const careInfo = parseJsonObject(row.careInfo);
|
||||
const imageUri = toWikimediaFilePathUrl(row.imageUri) || row.imageUri;
|
||||
return {
|
||||
id: row.id,
|
||||
name: row.name,
|
||||
botanicalName: row.botanicalName,
|
||||
imageUri: row.imageUri,
|
||||
imageUri,
|
||||
imageStatus: row.imageStatus || 'ok',
|
||||
description: row.description || '',
|
||||
categories,
|
||||
@@ -349,7 +423,7 @@ const toApiPlant = (row) => {
|
||||
};
|
||||
|
||||
const getPlants = async (db, options = {}) => {
|
||||
const query = typeof options.query === 'string' ? options.query.trim().toLowerCase() : '';
|
||||
const query = typeof options.query === 'string' ? options.query.trim() : '';
|
||||
const category = typeof options.category === 'string' ? options.category.trim() : '';
|
||||
const limitRaw = Number(options.limit);
|
||||
const limit = Number.isFinite(limitRaw)
|
||||
@@ -368,15 +442,6 @@ const getPlants = async (db, options = {}) => {
|
||||
confidence
|
||||
FROM plants`;
|
||||
const params = [];
|
||||
if (query) {
|
||||
sql += ` WHERE (
|
||||
LOWER(name) LIKE ?
|
||||
OR LOWER(botanicalName) LIKE ?
|
||||
OR LOWER(COALESCE(description, '')) LIKE ?
|
||||
)`;
|
||||
const likePattern = `%${query}%`;
|
||||
params.push(likePattern, likePattern, likePattern);
|
||||
}
|
||||
sql += ' ORDER BY name COLLATE NOCASE ASC';
|
||||
|
||||
const rows = await all(db, sql, params);
|
||||
@@ -386,7 +451,12 @@ const getPlants = async (db, options = {}) => {
|
||||
results = results.filter((plant) => plant.categories.includes(category));
|
||||
}
|
||||
|
||||
return results.slice(0, limit);
|
||||
if (!query) {
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
return rankHybridEntries(results, query, limit)
|
||||
.map((candidate) => candidate.entry);
|
||||
};
|
||||
|
||||
const getPlantDiagnostics = async (db) => {
|
||||
@@ -565,7 +635,7 @@ const rebuildPlantsCatalog = async (db, rawEntries, options = {}) => {
|
||||
entry.name,
|
||||
entry.botanicalName,
|
||||
entry.imageUri,
|
||||
'ok',
|
||||
entry.imageStatus,
|
||||
entry.description,
|
||||
JSON.stringify(entry.categories),
|
||||
JSON.stringify(entry.careInfo),
|
||||
@@ -647,6 +717,8 @@ module.exports = {
|
||||
ensurePlantSchema,
|
||||
getPlantDiagnostics,
|
||||
getPlants,
|
||||
normalizeKey,
|
||||
normalizeImageUri,
|
||||
toWikimediaFilePathUrl,
|
||||
rebuildPlantsCatalog,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user