domain_health_status

This commit is contained in:
2026-04-28 20:38:50 -05:00
parent f097f96d06
commit 62219a372a
8 changed files with 743 additions and 4 deletions

View File

@@ -0,0 +1,52 @@
import { Router } from 'express';
import { requireAuth, canAccessDomain } from '../middleware/auth.js';
import { runDomainHealthChecks, getPersistedHealth } from '../services/health.js';
import { audit } from '../services/audit.js';
export const healthRouter = Router();
healthRouter.use(requireAuth);
function ensureDomain(req: any, domain: string): void {
if (!canAccessDomain(req.user, domain)) {
throw Object.assign(new Error('Forbidden'), { status: 403 });
}
}
/**
* GET /api/health/domains/:domain
* Read the last persisted health status without re-running checks.
* Used by the mailbox view to decide whether to show the banner.
*
* Returns { domain, checked_at, has_problems, summary } or 404 if
* the domain has never been checked.
*/
healthRouter.get('/domains/:domain', async (req, res) => {
const domain = String(req.params.domain).toLowerCase();
ensureDomain(req, domain);
const status = await getPersistedHealth(domain);
if (!status) {
res.status(404).json({ error: 'No health check has been performed yet' });
return;
}
res.json(status);
});
/**
* POST /api/health/domains/:domain/check
* Run all health checks now. Persists the result and returns the
* full report for the modal.
*/
healthRouter.post('/domains/:domain/check', async (req, res) => {
const domain = String(req.params.domain).toLowerCase();
ensureDomain(req, domain);
const report = await runDomainHealthChecks(domain);
await audit(
req.user!.email,
'domain.health_check',
'domain',
domain,
{ has_problems: report.has_problems },
req.ip,
);
res.json(report);
});

View File

@@ -11,6 +11,7 @@ import { mailboxesRouter } from './routes/mailboxes.js';
import { auditRouter } from './routes/audit.js';
import { adminsRouter } from './routes/admins.js';
import { billingRouter } from './routes/billing.js';
import { healthRouter } from './routes/health.js';
import { SyncService } from './services/sync.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -45,6 +46,7 @@ app.use('/api/mailboxes', mailboxesRouter);
app.use('/api/audit', auditRouter);
app.use('/api/admins', adminsRouter);
app.use('/api/billing', billingRouter);
app.use('/api/health', healthRouter);
app.use((err: any, req: express.Request, res: express.Response, _next: express.NextFunction) => {
const status = err.status ?? err.statusCode ?? 500;

View File

@@ -0,0 +1,392 @@
import dns from 'node:dns/promises';
import tls from 'node:tls';
import { pool } from '../db.js';
import { config } from '../config.js';
// ============================================================
// Types
// ============================================================
export type HealthLevel = 'ok' | 'warn' | 'fail' | 'unknown';
export interface HealthFinding {
level: HealthLevel;
label: string;
detail?: string;
}
export interface HealthCheck {
id: string;
title: string;
level: HealthLevel;
findings: HealthFinding[];
}
export interface DomainHealthReport {
domain: string;
checked_at: string;
has_problems: boolean;
checks: HealthCheck[];
}
// ============================================================
// Helpers
// ============================================================
const SUBDOMAINS_FOR_CADDY = ['mail', 'webmail', 'imap', 'smtp'];
// "warn" level for cert expiring within 14 days, "fail" for already expired.
const CERT_WARN_DAYS = 14;
// Aggregate child finding levels into a parent level (worst wins).
function worstLevel(levels: HealthLevel[]): HealthLevel {
if (levels.includes('fail')) return 'fail';
if (levels.includes('warn')) return 'warn';
if (levels.includes('unknown')) return 'unknown';
return 'ok';
}
async function withTimeout<T>(p: Promise<T>, ms: number, label: string): Promise<T> {
let timer: NodeJS.Timeout | undefined;
const timeout = new Promise<never>((_, reject) => {
timer = setTimeout(() => reject(new Error(`Timeout: ${label}`)), ms);
});
try {
return await Promise.race([p, timeout]);
} finally {
if (timer) clearTimeout(timer);
}
}
// ============================================================
// 1) DMS check
// ============================================================
async function checkDms(domain: string): Promise<HealthCheck> {
const findings: HealthFinding[] = [];
try {
const result = await pool.query(
`SELECT count(*)::int AS n
FROM mailboxes
WHERE domain=$1 AND status='active'`,
[domain],
);
const count = result.rows[0]?.n ?? 0;
if (count === 0) {
findings.push({
level: 'fail',
label: 'No active mailboxes',
detail: 'This domain has no active mailboxes in DMS.',
});
} else {
findings.push({
level: 'ok',
label: `${count} active mailbox${count === 1 ? '' : 'es'}`,
});
}
} catch (err: any) {
findings.push({
level: 'unknown',
label: 'Could not query DMS state',
detail: err?.message ?? String(err),
});
}
return {
id: 'dms',
title: 'DMS',
level: worstLevel(findings.map((f) => f.level)),
findings,
};
}
// ============================================================
// 2) DNS check
// ============================================================
async function dnsResolve(host: string, type: 'A' | 'AAAA' | 'MX' | 'TXT' | 'CNAME'): Promise<string[]> {
try {
if (type === 'A') return await withTimeout<string[]>(dns.resolve4(host), 5000, `A ${host}`);
if (type === 'AAAA') return await withTimeout<string[]>(dns.resolve6(host), 5000, `AAAA ${host}`);
if (type === 'MX') {
const mx = await withTimeout<dns.MxRecord[]>(dns.resolveMx(host), 5000, `MX ${host}`);
return mx.map((m) => m.exchange);
}
if (type === 'TXT') {
const txt = await withTimeout<string[][]>(dns.resolveTxt(host), 5000, `TXT ${host}`);
return txt.map((parts) => parts.join(''));
}
if (type === 'CNAME') return await withTimeout<string[]>(dns.resolveCname(host), 5000, `CNAME ${host}`);
} catch {
return [];
}
return [];
}
async function checkDns(domain: string): Promise<HealthCheck> {
const findings: HealthFinding[] = [];
// ---- MX ----
const mx = await dnsResolve(domain, 'MX');
if (mx.length === 0) {
findings.push({ level: 'fail', label: 'MX', detail: 'No MX record found.' });
} else {
const sesMx = mx.find((m) => /amazonaws\.com\.?$/i.test(m));
if (sesMx) {
findings.push({ level: 'ok', label: 'MX', detail: `points to SES (${sesMx})` });
} else {
findings.push({
level: 'warn',
label: 'MX',
detail: `Not an SES MX record: ${mx.join(', ')}`,
});
}
}
// ---- SPF (TXT on root) ----
const txt = await dnsResolve(domain, 'TXT');
const spf = txt.find((t) => /^v=spf1\b/i.test(t));
if (!spf) {
findings.push({ level: 'fail', label: 'SPF', detail: 'No SPF record found.' });
} else if (/include:amazonses\.com/i.test(spf)) {
findings.push({ level: 'ok', label: 'SPF', detail: 'includes amazonses.com' });
} else {
findings.push({
level: 'warn',
label: 'SPF',
detail: `SPF found but does not include amazonses.com: ${spf.slice(0, 100)}`,
});
}
// ---- DMARC ----
const dmarc = await dnsResolve(`_dmarc.${domain}`, 'TXT');
const dmarcRecord = dmarc.find((t) => /^v=DMARC1\b/i.test(t));
if (!dmarcRecord) {
findings.push({ level: 'warn', label: 'DMARC', detail: 'No DMARC record found.' });
} else {
findings.push({ level: 'ok', label: 'DMARC', detail: dmarcRecord.slice(0, 80) });
}
// ---- DKIM (SES uses 3 selectors named "<token>._domainkey") ----
// We don't know the SES tokens up front, so we just check whether
// there is _ANY_ resolvable DKIM-like CNAME under _domainkey.
// Common SES DKIM convention: 3 CNAMEs at <token1|2|3>._domainkey.
// We try Amazon's classic pattern first, then fall back to "no info".
// This check is best-effort; "unknown" is acceptable.
// Note: there's no clean way to enumerate _domainkey subdomains via DNS,
// so we record "unknown" rather than making up false positives.
findings.push({
level: 'unknown',
label: 'DKIM',
detail: 'Cannot verify automatically — confirm in SES console that 3 DKIM CNAMEs are published.',
});
// ---- Subdomains for Caddy (must resolve, content doesn't matter) ----
for (const sub of SUBDOMAINS_FOR_CADDY) {
const host = `${sub}.${domain}`;
const a = await dnsResolve(host, 'A');
const aaaa = a.length === 0 ? await dnsResolve(host, 'AAAA') : [];
const cname = a.length === 0 && aaaa.length === 0 ? await dnsResolve(host, 'CNAME') : [];
if (a.length > 0) {
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `A → ${a[0]}` });
} else if (aaaa.length > 0) {
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `AAAA → ${aaaa[0]}` });
} else if (cname.length > 0) {
findings.push({ level: 'ok', label: `DNS ${host}`, detail: `CNAME → ${cname[0]}` });
} else {
findings.push({
level: 'fail',
label: `DNS ${host}`,
detail: 'Does not resolve. Caddy cannot issue a cert without DNS pointing here.',
});
}
}
return {
id: 'dns',
title: 'DNS',
level: worstLevel(findings.map((f) => f.level)),
findings,
};
}
// ============================================================
// 3) Caddy cert check
// ============================================================
interface CertResult {
validFrom: Date | null;
validTo: Date | null;
cn: string | null;
error: string | null;
}
function checkCertOnce(host: string, port = 443, timeoutMs = 7000): Promise<CertResult> {
return new Promise((resolve) => {
let settled = false;
const finish = (r: CertResult) => {
if (settled) return;
settled = true;
try { socket.destroy(); } catch { /* ignore */ }
resolve(r);
};
const socket = tls.connect({
host,
port,
servername: host,
// We DO want to inspect even bad certs (e.g. self-signed) so we
// can report useful info instead of just "connection failed".
rejectUnauthorized: false,
timeout: timeoutMs,
}, () => {
try {
const cert = socket.getPeerCertificate();
if (!cert || Object.keys(cert).length === 0) {
finish({ validFrom: null, validTo: null, cn: null, error: 'No peer certificate returned' });
return;
}
const validFrom = cert.valid_from ? new Date(cert.valid_from) : null;
const validTo = cert.valid_to ? new Date(cert.valid_to) : null;
const cn = cert.subject?.CN ?? null;
finish({ validFrom, validTo, cn, error: null });
} catch (e: any) {
finish({ validFrom: null, validTo: null, cn: null, error: e?.message ?? 'parse error' });
}
});
socket.on('error', (e) => finish({ validFrom: null, validTo: null, cn: null, error: e.message }));
socket.on('timeout', () => finish({ validFrom: null, validTo: null, cn: null, error: 'TLS handshake timed out' }));
});
}
async function checkCaddyCerts(domain: string): Promise<HealthCheck> {
const findings: HealthFinding[] = [];
const now = Date.now();
for (const sub of SUBDOMAINS_FOR_CADDY) {
const host = `${sub}.${domain}`;
const r = await checkCertOnce(host);
if (r.error || !r.validTo) {
findings.push({
level: 'fail',
label: host,
detail: r.error ?? 'No cert info available',
});
continue;
}
const daysLeft = Math.floor((r.validTo.getTime() - now) / (1000 * 60 * 60 * 24));
const expIso = r.validTo.toISOString().slice(0, 10);
if (daysLeft < 0) {
findings.push({
level: 'fail',
label: host,
detail: `Cert EXPIRED on ${expIso} (${Math.abs(daysLeft)} days ago)`,
});
} else if (daysLeft <= CERT_WARN_DAYS) {
findings.push({
level: 'warn',
label: host,
detail: `Cert expires in ${daysLeft} days (${expIso})`,
});
} else {
findings.push({
level: 'ok',
label: host,
detail: `Cert valid until ${expIso} (${daysLeft} days)`,
});
}
}
return {
id: 'caddy',
title: 'TLS certificates',
level: worstLevel(findings.map((f) => f.level)),
findings,
};
}
// ============================================================
// Public: run all checks for a domain
// ============================================================
export async function runDomainHealthChecks(domain: string): Promise<DomainHealthReport> {
const d = domain.toLowerCase();
const [dmsResult, dnsResult, caddyResult] = await Promise.all([
checkDms(d),
checkDns(d),
checkCaddyCerts(d),
]);
const checks: HealthCheck[] = [dmsResult, dnsResult, caddyResult];
const overall = worstLevel(checks.map((c) => c.level));
const has_problems = overall === 'fail' || overall === 'warn';
const report: DomainHealthReport = {
domain: d,
checked_at: new Date().toISOString(),
has_problems,
checks,
};
// Persist for the banner.
try {
await pool.query(
`INSERT INTO domain_health_status (domain, checked_at, has_problems, details)
VALUES ($1, now(), $2, $3::jsonb)
ON CONFLICT (domain) DO UPDATE SET
checked_at = EXCLUDED.checked_at,
has_problems = EXCLUDED.has_problems,
details = EXCLUDED.details`,
[d, has_problems, JSON.stringify(report)],
);
} catch (err) {
console.warn('[health] could not persist health status:', err);
}
return report;
}
// ============================================================
// Public: load last persisted status (used by mailbox view banner)
// ============================================================
export interface PersistedHealth {
domain: string;
checked_at: string;
has_problems: boolean;
summary: { fail: number; warn: number; unknown: number; ok: number };
}
export async function getPersistedHealth(domain: string): Promise<PersistedHealth | null> {
const result = await pool.query(
`SELECT domain, checked_at, has_problems, details
FROM domain_health_status WHERE domain=$1`,
[domain.toLowerCase()],
);
const row = result.rows[0];
if (!row) return null;
// Build a quick summary of finding counts so the banner can say
// "2 problems detected" without needing to rehydrate the whole modal.
const counts = { fail: 0, warn: 0, unknown: 0, ok: 0 };
const details = row.details as DomainHealthReport;
for (const c of details?.checks ?? []) {
for (const f of c.findings) {
counts[f.level] = (counts[f.level] ?? 0) + 1;
}
}
return {
domain: row.domain,
checked_at: row.checked_at instanceof Date ? row.checked_at.toISOString() : row.checked_at,
has_problems: row.has_problems,
summary: counts,
};
}