Initial implementation of Website Change Detection Monitor MVP
Features implemented: - Backend API with Express + TypeScript - User authentication (register/login with JWT) - Monitor CRUD operations with plan-based limits - Automated change detection engine - Email alert system - Frontend with Next.js + TypeScript - Dashboard with monitor management - Login/register pages - Monitor history viewer - PostgreSQL database schema - Docker setup for local development Technical stack: - Backend: Express, TypeScript, PostgreSQL, Redis (ready) - Frontend: Next.js 14, React Query, Tailwind CSS - Database: PostgreSQL with migrations - Services: Page fetching, diff detection, email alerts Documentation: - README with full setup instructions - SETUP guide for quick start - PROJECT_STATUS with current capabilities - Complete technical specifications Ready for local testing and feature expansion. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
128
backend/src/services/fetcher.ts
Normal file
128
backend/src/services/fetcher.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import axios, { AxiosResponse } from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import crypto from 'crypto';
|
||||
|
||||
export interface FetchResult {
|
||||
html: string;
|
||||
text: string;
|
||||
hash: string;
|
||||
status: number;
|
||||
responseTime: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export async function fetchPage(
|
||||
url: string,
|
||||
elementSelector?: string
|
||||
): Promise<FetchResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// Validate URL
|
||||
new URL(url);
|
||||
|
||||
const response: AxiosResponse = await axios.get(url, {
|
||||
timeout: 30000,
|
||||
maxRedirects: 5,
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
Connection: 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
},
|
||||
validateStatus: (status) => status < 500,
|
||||
});
|
||||
|
||||
const responseTime = Date.now() - startTime;
|
||||
let html = response.data;
|
||||
|
||||
// If element selector is provided, extract only that element
|
||||
if (elementSelector) {
|
||||
const $ = cheerio.load(html);
|
||||
const element = $(elementSelector);
|
||||
|
||||
if (element.length === 0) {
|
||||
throw new Error(`Element not found: ${elementSelector}`);
|
||||
}
|
||||
|
||||
html = element.html() || '';
|
||||
}
|
||||
|
||||
// Extract text content
|
||||
const $ = cheerio.load(html);
|
||||
const text = $.text().trim();
|
||||
|
||||
// Generate hash
|
||||
const hash = crypto.createHash('sha256').update(html).digest('hex');
|
||||
|
||||
return {
|
||||
html,
|
||||
text,
|
||||
hash,
|
||||
status: response.status,
|
||||
responseTime,
|
||||
};
|
||||
} catch (error: any) {
|
||||
const responseTime = Date.now() - startTime;
|
||||
|
||||
if (error.response) {
|
||||
return {
|
||||
html: '',
|
||||
text: '',
|
||||
hash: '',
|
||||
status: error.response.status,
|
||||
responseTime,
|
||||
error: `HTTP ${error.response.status}: ${error.response.statusText}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (error.code === 'ENOTFOUND') {
|
||||
return {
|
||||
html: '',
|
||||
text: '',
|
||||
hash: '',
|
||||
status: 0,
|
||||
responseTime,
|
||||
error: 'Domain not found',
|
||||
};
|
||||
}
|
||||
|
||||
if (error.code === 'ETIMEDOUT' || error.code === 'ECONNABORTED') {
|
||||
return {
|
||||
html: '',
|
||||
text: '',
|
||||
hash: '',
|
||||
status: 0,
|
||||
responseTime,
|
||||
error: 'Request timeout',
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
html: '',
|
||||
text: '',
|
||||
hash: '',
|
||||
status: 0,
|
||||
responseTime,
|
||||
error: error.message || 'Unknown error',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export function extractTextFromHtml(html: string): string {
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Remove script and style elements
|
||||
$('script').remove();
|
||||
$('style').remove();
|
||||
|
||||
return $.text().trim();
|
||||
}
|
||||
|
||||
export function calculateHash(content: string): string {
|
||||
return crypto.createHash('sha256').update(content).digest('hex');
|
||||
}
|
||||
Reference in New Issue
Block a user