Files
website-monitor/backend/src/services/fetcher.ts
Timo 2c1ec69a79 Initial implementation of Website Change Detection Monitor MVP
Features implemented:
- Backend API with Express + TypeScript
- User authentication (register/login with JWT)
- Monitor CRUD operations with plan-based limits
- Automated change detection engine
- Email alert system
- Frontend with Next.js + TypeScript
- Dashboard with monitor management
- Login/register pages
- Monitor history viewer
- PostgreSQL database schema
- Docker setup for local development

Technical stack:
- Backend: Express, TypeScript, PostgreSQL, Redis (ready)
- Frontend: Next.js 14, React Query, Tailwind CSS
- Database: PostgreSQL with migrations
- Services: Page fetching, diff detection, email alerts

Documentation:
- README with full setup instructions
- SETUP guide for quick start
- PROJECT_STATUS with current capabilities
- Complete technical specifications

Ready for local testing and feature expansion.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-16 18:46:40 +01:00

129 lines
2.9 KiB
TypeScript

import axios, { AxiosResponse } from 'axios';
import * as cheerio from 'cheerio';
import crypto from 'crypto';
export interface FetchResult {
html: string;
text: string;
hash: string;
status: number;
responseTime: number;
error?: string;
}
export async function fetchPage(
url: string,
elementSelector?: string
): Promise<FetchResult> {
const startTime = Date.now();
try {
// Validate URL
new URL(url);
const response: AxiosResponse = await axios.get(url, {
timeout: 30000,
maxRedirects: 5,
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
Connection: 'keep-alive',
'Upgrade-Insecure-Requests': '1',
},
validateStatus: (status) => status < 500,
});
const responseTime = Date.now() - startTime;
let html = response.data;
// If element selector is provided, extract only that element
if (elementSelector) {
const $ = cheerio.load(html);
const element = $(elementSelector);
if (element.length === 0) {
throw new Error(`Element not found: ${elementSelector}`);
}
html = element.html() || '';
}
// Extract text content
const $ = cheerio.load(html);
const text = $.text().trim();
// Generate hash
const hash = crypto.createHash('sha256').update(html).digest('hex');
return {
html,
text,
hash,
status: response.status,
responseTime,
};
} catch (error: any) {
const responseTime = Date.now() - startTime;
if (error.response) {
return {
html: '',
text: '',
hash: '',
status: error.response.status,
responseTime,
error: `HTTP ${error.response.status}: ${error.response.statusText}`,
};
}
if (error.code === 'ENOTFOUND') {
return {
html: '',
text: '',
hash: '',
status: 0,
responseTime,
error: 'Domain not found',
};
}
if (error.code === 'ETIMEDOUT' || error.code === 'ECONNABORTED') {
return {
html: '',
text: '',
hash: '',
status: 0,
responseTime,
error: 'Request timeout',
};
}
return {
html: '',
text: '',
hash: '',
status: 0,
responseTime,
error: error.message || 'Unknown error',
};
}
}
export function extractTextFromHtml(html: string): string {
const $ = cheerio.load(html);
// Remove script and style elements
$('script').remove();
$('style').remove();
return $.text().trim();
}
export function calculateHash(content: string): string {
return crypto.createHash('sha256').update(content).digest('hex');
}