Files
bayarea/fix-encoding.mjs
2026-03-25 20:07:27 -05:00

39 lines
1.2 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { readFileSync, writeFileSync } from 'fs';
const content = readFileSync('src/data/seoData.ts', 'utf8');
// These are the mojibake sequences (UTF-8 bytes misread as Latin-1)
// and their correct Unicode replacements
const replacements = [
['â€"', '—'], // em dash
['â€"', ''], // en dash (different byte sequence)
['’', '\u2019'], // right single quote / apostrophe
['‘', '\u2018'], // left single quote
['“', '\u201c'], // left double quote
['â€', '\u201d'], // right double quote
['…', '…'], // ellipsis
['•', '•'], // bullet
];
let fixed = content;
for (const [bad, good] of replacements) {
const count = fixed.split(bad).length - 1;
if (count > 0) console.log(`Replacing ${count}x: "${bad}" → "${good}"`);
fixed = fixed.split(bad).join(good);
}
// Check for any remaining â sequences
const remaining = fixed.match(/â[^\w\s'"]{1,3}/g);
if (remaining) {
console.log('Remaining unhandled sequences:', [...new Set(remaining)]);
} else {
console.log('No remaining mojibake found.');
}
if (content !== fixed) {
writeFileSync('src/data/seoData.ts', fixed, 'utf8');
console.log('File saved successfully.');
} else {
console.log('No changes needed.');
}