domain_health_status
This commit is contained in:
52
backend/src/routes/health.ts
Normal file
52
backend/src/routes/health.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import { Router } from 'express';
|
||||
import { requireAuth, canAccessDomain } from '../middleware/auth.js';
|
||||
import { runDomainHealthChecks, getPersistedHealth } from '../services/health.js';
|
||||
import { audit } from '../services/audit.js';
|
||||
|
||||
export const healthRouter = Router();
|
||||
healthRouter.use(requireAuth);
|
||||
|
||||
function ensureDomain(req: any, domain: string): void {
|
||||
if (!canAccessDomain(req.user, domain)) {
|
||||
throw Object.assign(new Error('Forbidden'), { status: 403 });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/health/domains/:domain
|
||||
* Read the last persisted health status without re-running checks.
|
||||
* Used by the mailbox view to decide whether to show the banner.
|
||||
*
|
||||
* Returns { domain, checked_at, has_problems, summary } or 404 if
|
||||
* the domain has never been checked.
|
||||
*/
|
||||
healthRouter.get('/domains/:domain', async (req, res) => {
|
||||
const domain = String(req.params.domain).toLowerCase();
|
||||
ensureDomain(req, domain);
|
||||
const status = await getPersistedHealth(domain);
|
||||
if (!status) {
|
||||
res.status(404).json({ error: 'No health check has been performed yet' });
|
||||
return;
|
||||
}
|
||||
res.json(status);
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/health/domains/:domain/check
|
||||
* Run all health checks now. Persists the result and returns the
|
||||
* full report for the modal.
|
||||
*/
|
||||
healthRouter.post('/domains/:domain/check', async (req, res) => {
|
||||
const domain = String(req.params.domain).toLowerCase();
|
||||
ensureDomain(req, domain);
|
||||
const report = await runDomainHealthChecks(domain);
|
||||
await audit(
|
||||
req.user!.email,
|
||||
'domain.health_check',
|
||||
'domain',
|
||||
domain,
|
||||
{ has_problems: report.has_problems },
|
||||
req.ip,
|
||||
);
|
||||
res.json(report);
|
||||
});
|
||||
@@ -11,6 +11,7 @@ import { mailboxesRouter } from './routes/mailboxes.js';
|
||||
import { auditRouter } from './routes/audit.js';
|
||||
import { adminsRouter } from './routes/admins.js';
|
||||
import { billingRouter } from './routes/billing.js';
|
||||
import { healthRouter } from './routes/health.js';
|
||||
import { SyncService } from './services/sync.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
@@ -45,6 +46,7 @@ app.use('/api/mailboxes', mailboxesRouter);
|
||||
app.use('/api/audit', auditRouter);
|
||||
app.use('/api/admins', adminsRouter);
|
||||
app.use('/api/billing', billingRouter);
|
||||
app.use('/api/health', healthRouter);
|
||||
|
||||
app.use((err: any, req: express.Request, res: express.Response, _next: express.NextFunction) => {
|
||||
const status = err.status ?? err.statusCode ?? 500;
|
||||
|
||||
392
backend/src/services/health.ts
Normal file
392
backend/src/services/health.ts
Normal file
@@ -0,0 +1,392 @@
|
||||
import dns from 'node:dns/promises';
|
||||
import tls from 'node:tls';
|
||||
import { pool } from '../db.js';
|
||||
import { config } from '../config.js';
|
||||
|
||||
// ============================================================
|
||||
// Types
|
||||
// ============================================================
|
||||
|
||||
export type HealthLevel = 'ok' | 'warn' | 'fail' | 'unknown';
|
||||
|
||||
export interface HealthFinding {
|
||||
level: HealthLevel;
|
||||
label: string;
|
||||
detail?: string;
|
||||
}
|
||||
|
||||
export interface HealthCheck {
|
||||
id: string;
|
||||
title: string;
|
||||
level: HealthLevel;
|
||||
findings: HealthFinding[];
|
||||
}
|
||||
|
||||
export interface DomainHealthReport {
|
||||
domain: string;
|
||||
checked_at: string;
|
||||
has_problems: boolean;
|
||||
checks: HealthCheck[];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Helpers
|
||||
// ============================================================
|
||||
|
||||
const SUBDOMAINS_FOR_CADDY = ['mail', 'webmail', 'imap', 'smtp'];
|
||||
|
||||
// "warn" level for cert expiring within 14 days, "fail" for already expired.
|
||||
const CERT_WARN_DAYS = 14;
|
||||
|
||||
// Aggregate child finding levels into a parent level (worst wins).
|
||||
function worstLevel(levels: HealthLevel[]): HealthLevel {
|
||||
if (levels.includes('fail')) return 'fail';
|
||||
if (levels.includes('warn')) return 'warn';
|
||||
if (levels.includes('unknown')) return 'unknown';
|
||||
return 'ok';
|
||||
}
|
||||
|
||||
async function withTimeout<T>(p: Promise<T>, ms: number, label: string): Promise<T> {
|
||||
let timer: NodeJS.Timeout | undefined;
|
||||
const timeout = new Promise<never>((_, reject) => {
|
||||
timer = setTimeout(() => reject(new Error(`Timeout: ${label}`)), ms);
|
||||
});
|
||||
try {
|
||||
return await Promise.race([p, timeout]);
|
||||
} finally {
|
||||
if (timer) clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 1) DMS check
|
||||
// ============================================================
|
||||
async function checkDms(domain: string): Promise<HealthCheck> {
|
||||
const findings: HealthFinding[] = [];
|
||||
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT count(*)::int AS n
|
||||
FROM mailboxes
|
||||
WHERE domain=$1 AND status='active'`,
|
||||
[domain],
|
||||
);
|
||||
const count = result.rows[0]?.n ?? 0;
|
||||
|
||||
if (count === 0) {
|
||||
findings.push({
|
||||
level: 'fail',
|
||||
label: 'No active mailboxes',
|
||||
detail: 'This domain has no active mailboxes in DMS.',
|
||||
});
|
||||
} else {
|
||||
findings.push({
|
||||
level: 'ok',
|
||||
label: `${count} active mailbox${count === 1 ? '' : 'es'}`,
|
||||
});
|
||||
}
|
||||
} catch (err: any) {
|
||||
findings.push({
|
||||
level: 'unknown',
|
||||
label: 'Could not query DMS state',
|
||||
detail: err?.message ?? String(err),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
id: 'dms',
|
||||
title: 'DMS',
|
||||
level: worstLevel(findings.map((f) => f.level)),
|
||||
findings,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 2) DNS check
|
||||
// ============================================================
|
||||
|
||||
async function dnsResolve(host: string, type: 'A' | 'AAAA' | 'MX' | 'TXT' | 'CNAME'): Promise<string[]> {
|
||||
try {
|
||||
if (type === 'A') return await withTimeout<string[]>(dns.resolve4(host), 5000, `A ${host}`);
|
||||
if (type === 'AAAA') return await withTimeout<string[]>(dns.resolve6(host), 5000, `AAAA ${host}`);
|
||||
if (type === 'MX') {
|
||||
const mx = await withTimeout<dns.MxRecord[]>(dns.resolveMx(host), 5000, `MX ${host}`);
|
||||
return mx.map((m) => m.exchange);
|
||||
}
|
||||
if (type === 'TXT') {
|
||||
const txt = await withTimeout<string[][]>(dns.resolveTxt(host), 5000, `TXT ${host}`);
|
||||
return txt.map((parts) => parts.join(''));
|
||||
}
|
||||
if (type === 'CNAME') return await withTimeout<string[]>(dns.resolveCname(host), 5000, `CNAME ${host}`);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
async function checkDns(domain: string): Promise<HealthCheck> {
|
||||
const findings: HealthFinding[] = [];
|
||||
|
||||
// ---- MX ----
|
||||
const mx = await dnsResolve(domain, 'MX');
|
||||
if (mx.length === 0) {
|
||||
findings.push({ level: 'fail', label: 'MX', detail: 'No MX record found.' });
|
||||
} else {
|
||||
const sesMx = mx.find((m) => /amazonaws\.com\.?$/i.test(m));
|
||||
if (sesMx) {
|
||||
findings.push({ level: 'ok', label: 'MX', detail: `points to SES (${sesMx})` });
|
||||
} else {
|
||||
findings.push({
|
||||
level: 'warn',
|
||||
label: 'MX',
|
||||
detail: `Not an SES MX record: ${mx.join(', ')}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---- SPF (TXT on root) ----
|
||||
const txt = await dnsResolve(domain, 'TXT');
|
||||
const spf = txt.find((t) => /^v=spf1\b/i.test(t));
|
||||
if (!spf) {
|
||||
findings.push({ level: 'fail', label: 'SPF', detail: 'No SPF record found.' });
|
||||
} else if (/include:amazonses\.com/i.test(spf)) {
|
||||
findings.push({ level: 'ok', label: 'SPF', detail: 'includes amazonses.com' });
|
||||
} else {
|
||||
findings.push({
|
||||
level: 'warn',
|
||||
label: 'SPF',
|
||||
detail: `SPF found but does not include amazonses.com: ${spf.slice(0, 100)}`,
|
||||
});
|
||||
}
|
||||
|
||||
// ---- DMARC ----
|
||||
const dmarc = await dnsResolve(`_dmarc.${domain}`, 'TXT');
|
||||
const dmarcRecord = dmarc.find((t) => /^v=DMARC1\b/i.test(t));
|
||||
if (!dmarcRecord) {
|
||||
findings.push({ level: 'warn', label: 'DMARC', detail: 'No DMARC record found.' });
|
||||
} else {
|
||||
findings.push({ level: 'ok', label: 'DMARC', detail: dmarcRecord.slice(0, 80) });
|
||||
}
|
||||
|
||||
// ---- DKIM (SES uses 3 selectors named "<token>._domainkey") ----
|
||||
// We don't know the SES tokens up front, so we just check whether
|
||||
// there is _ANY_ resolvable DKIM-like CNAME under _domainkey.
|
||||
// Common SES DKIM convention: 3 CNAMEs at <token1|2|3>._domainkey.
|
||||
// We try Amazon's classic pattern first, then fall back to "no info".
|
||||
// This check is best-effort; "unknown" is acceptable.
|
||||
// Note: there's no clean way to enumerate _domainkey subdomains via DNS,
|
||||
// so we record "unknown" rather than making up false positives.
|
||||
findings.push({
|
||||
level: 'unknown',
|
||||
label: 'DKIM',
|
||||
detail: 'Cannot verify automatically — confirm in SES console that 3 DKIM CNAMEs are published.',
|
||||
});
|
||||
|
||||
// ---- Subdomains for Caddy (must resolve, content doesn't matter) ----
|
||||
for (const sub of SUBDOMAINS_FOR_CADDY) {
|
||||
const host = `${sub}.${domain}`;
|
||||
const a = await dnsResolve(host, 'A');
|
||||
const aaaa = a.length === 0 ? await dnsResolve(host, 'AAAA') : [];
|
||||
const cname = a.length === 0 && aaaa.length === 0 ? await dnsResolve(host, 'CNAME') : [];
|
||||
|
||||
if (a.length > 0) {
|
||||
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `A → ${a[0]}` });
|
||||
} else if (aaaa.length > 0) {
|
||||
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `AAAA → ${aaaa[0]}` });
|
||||
} else if (cname.length > 0) {
|
||||
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `CNAME → ${cname[0]}` });
|
||||
} else {
|
||||
findings.push({
|
||||
level: 'fail',
|
||||
label: `DNS ${host}`,
|
||||
detail: 'Does not resolve. Caddy cannot issue a cert without DNS pointing here.',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: 'dns',
|
||||
title: 'DNS',
|
||||
level: worstLevel(findings.map((f) => f.level)),
|
||||
findings,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 3) Caddy cert check
|
||||
// ============================================================
|
||||
|
||||
interface CertResult {
|
||||
validFrom: Date | null;
|
||||
validTo: Date | null;
|
||||
cn: string | null;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
function checkCertOnce(host: string, port = 443, timeoutMs = 7000): Promise<CertResult> {
|
||||
return new Promise((resolve) => {
|
||||
let settled = false;
|
||||
const finish = (r: CertResult) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
try { socket.destroy(); } catch { /* ignore */ }
|
||||
resolve(r);
|
||||
};
|
||||
|
||||
const socket = tls.connect({
|
||||
host,
|
||||
port,
|
||||
servername: host,
|
||||
// We DO want to inspect even bad certs (e.g. self-signed) so we
|
||||
// can report useful info instead of just "connection failed".
|
||||
rejectUnauthorized: false,
|
||||
timeout: timeoutMs,
|
||||
}, () => {
|
||||
try {
|
||||
const cert = socket.getPeerCertificate();
|
||||
if (!cert || Object.keys(cert).length === 0) {
|
||||
finish({ validFrom: null, validTo: null, cn: null, error: 'No peer certificate returned' });
|
||||
return;
|
||||
}
|
||||
const validFrom = cert.valid_from ? new Date(cert.valid_from) : null;
|
||||
const validTo = cert.valid_to ? new Date(cert.valid_to) : null;
|
||||
const cn = cert.subject?.CN ?? null;
|
||||
finish({ validFrom, validTo, cn, error: null });
|
||||
} catch (e: any) {
|
||||
finish({ validFrom: null, validTo: null, cn: null, error: e?.message ?? 'parse error' });
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('error', (e) => finish({ validFrom: null, validTo: null, cn: null, error: e.message }));
|
||||
socket.on('timeout', () => finish({ validFrom: null, validTo: null, cn: null, error: 'TLS handshake timed out' }));
|
||||
});
|
||||
}
|
||||
|
||||
async function checkCaddyCerts(domain: string): Promise<HealthCheck> {
|
||||
const findings: HealthFinding[] = [];
|
||||
const now = Date.now();
|
||||
|
||||
for (const sub of SUBDOMAINS_FOR_CADDY) {
|
||||
const host = `${sub}.${domain}`;
|
||||
const r = await checkCertOnce(host);
|
||||
|
||||
if (r.error || !r.validTo) {
|
||||
findings.push({
|
||||
level: 'fail',
|
||||
label: host,
|
||||
detail: r.error ?? 'No cert info available',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const daysLeft = Math.floor((r.validTo.getTime() - now) / (1000 * 60 * 60 * 24));
|
||||
const expIso = r.validTo.toISOString().slice(0, 10);
|
||||
|
||||
if (daysLeft < 0) {
|
||||
findings.push({
|
||||
level: 'fail',
|
||||
label: host,
|
||||
detail: `Cert EXPIRED on ${expIso} (${Math.abs(daysLeft)} days ago)`,
|
||||
});
|
||||
} else if (daysLeft <= CERT_WARN_DAYS) {
|
||||
findings.push({
|
||||
level: 'warn',
|
||||
label: host,
|
||||
detail: `Cert expires in ${daysLeft} days (${expIso})`,
|
||||
});
|
||||
} else {
|
||||
findings.push({
|
||||
level: 'ok',
|
||||
label: host,
|
||||
detail: `Cert valid until ${expIso} (${daysLeft} days)`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: 'caddy',
|
||||
title: 'TLS certificates',
|
||||
level: worstLevel(findings.map((f) => f.level)),
|
||||
findings,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Public: run all checks for a domain
|
||||
// ============================================================
|
||||
|
||||
export async function runDomainHealthChecks(domain: string): Promise<DomainHealthReport> {
|
||||
const d = domain.toLowerCase();
|
||||
|
||||
const [dmsResult, dnsResult, caddyResult] = await Promise.all([
|
||||
checkDms(d),
|
||||
checkDns(d),
|
||||
checkCaddyCerts(d),
|
||||
]);
|
||||
|
||||
const checks: HealthCheck[] = [dmsResult, dnsResult, caddyResult];
|
||||
const overall = worstLevel(checks.map((c) => c.level));
|
||||
const has_problems = overall === 'fail' || overall === 'warn';
|
||||
|
||||
const report: DomainHealthReport = {
|
||||
domain: d,
|
||||
checked_at: new Date().toISOString(),
|
||||
has_problems,
|
||||
checks,
|
||||
};
|
||||
|
||||
// Persist for the banner.
|
||||
try {
|
||||
await pool.query(
|
||||
`INSERT INTO domain_health_status (domain, checked_at, has_problems, details)
|
||||
VALUES ($1, now(), $2, $3::jsonb)
|
||||
ON CONFLICT (domain) DO UPDATE SET
|
||||
checked_at = EXCLUDED.checked_at,
|
||||
has_problems = EXCLUDED.has_problems,
|
||||
details = EXCLUDED.details`,
|
||||
[d, has_problems, JSON.stringify(report)],
|
||||
);
|
||||
} catch (err) {
|
||||
console.warn('[health] could not persist health status:', err);
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Public: load last persisted status (used by mailbox view banner)
|
||||
// ============================================================
|
||||
|
||||
export interface PersistedHealth {
|
||||
domain: string;
|
||||
checked_at: string;
|
||||
has_problems: boolean;
|
||||
summary: { fail: number; warn: number; unknown: number; ok: number };
|
||||
}
|
||||
|
||||
export async function getPersistedHealth(domain: string): Promise<PersistedHealth | null> {
|
||||
const result = await pool.query(
|
||||
`SELECT domain, checked_at, has_problems, details
|
||||
FROM domain_health_status WHERE domain=$1`,
|
||||
[domain.toLowerCase()],
|
||||
);
|
||||
const row = result.rows[0];
|
||||
if (!row) return null;
|
||||
|
||||
// Build a quick summary of finding counts so the banner can say
|
||||
// "2 problems detected" without needing to rehydrate the whole modal.
|
||||
const counts = { fail: 0, warn: 0, unknown: 0, ok: 0 };
|
||||
const details = row.details as DomainHealthReport;
|
||||
for (const c of details?.checks ?? []) {
|
||||
for (const f of c.findings) {
|
||||
counts[f.level] = (counts[f.level] ?? 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
domain: row.domain,
|
||||
checked_at: row.checked_at instanceof Date ? row.checked_at.toISOString() : row.checked_at,
|
||||
has_problems: row.has_problems,
|
||||
summary: counts,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user