#!/usr/bin/env node /** * IMPROVED CLEANUP WITH ARCHIVAL * * Instead of just marking tenders as "closed", this script: * 1. Validates all open tender URLs * 2. Saves full tender snapshot before archiving * 3. Marks removed tenders as "archived" (not just closed) * 4. Keeps tender data accessible even after source removes it * 5. Tracks validation failures */ import pg from 'pg'; import dotenv from 'dotenv'; dotenv.config(); const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' }); async function validateAndArchive() { try { console.log(`[${new Date().toISOString()}] Starting tender validation with archival...`); // Get all open tenders const result = await pool.query( `SELECT id, source, source_id, title, description, notice_url, deadline, value_low, value_high, currency, authority_name, location, sector, cpv_codes, published_date, created_at, validation_failures FROM tenders WHERE status = 'open' ORDER BY created_at DESC` ); console.log(`Found ${result.rows.length} open tenders to validate\\n`); let validated = 0; let archived = 0; let failed = 0; let errors = 0; for (const tender of result.rows) { try { // Validate URL const response = await fetch(tender.notice_url, { method: 'HEAD', redirect: 'follow', signal: AbortSignal.timeout(10000) }); const status = response.status; const finalUrl = response.url; const isRemoved = ( status === 404 || finalUrl.includes('/syserror/') || finalUrl.includes('/notfound') || finalUrl.includes('/error') ); if (isRemoved) { // Create snapshot before archiving const snapshot = { source: tender.source, source_id: tender.source_id, title: tender.title, description: tender.description, notice_url: tender.notice_url, deadline: tender.deadline, value_low: tender.value_low, value_high: tender.value_high, currency: tender.currency, authority_name: tender.authority_name, location: tender.location, sector: tender.sector, cpv_codes: tender.cpv_codes, published_date: tender.published_date, scraped_at: tender.created_at, archived_reason: 'URL removed from source', archived_status: status, archived_redirect: finalUrl !== tender.notice_url ? finalUrl : null }; // Archive the tender await pool.query( `UPDATE tenders SET status = 'closed', archived = TRUE, archived_at = NOW(), archived_snapshot = $1, last_validated = NOW(), validation_failures = validation_failures + 1 WHERE id = $2`, [JSON.stringify(snapshot), tender.id] ); archived++; console.log(`📦 Archived: [${tender.source}] ${tender.title.substring(0, 60)}`); } else { // URL still works - update validation timestamp await pool.query( `UPDATE tenders SET last_validated = NOW(), validation_failures = 0 WHERE id = $1`, [tender.id] ); validated++; } } catch (error) { // Network error or timeout const currentFailures = tender.validation_failures || 0; if (currentFailures >= 3) { // After 3 failures, mark as closed (probably permanently gone) const snapshot = { source: tender.source, source_id: tender.source_id, title: tender.title, description: tender.description, notice_url: tender.notice_url, deadline: tender.deadline, scraped_at: tender.created_at, archived_reason: `Validation failed ${currentFailures + 1} times: ${error.message}`, }; await pool.query( `UPDATE tenders SET status = 'closed', archived = TRUE, archived_at = NOW(), archived_snapshot = $1, validation_failures = validation_failures + 1 WHERE id = $2`, [JSON.stringify(snapshot), tender.id] ); archived++; console.log(`⚠️ Archived (after ${currentFailures + 1} failures): ${tender.title.substring(0, 60)}`); } else { // Increment failure count but don't close yet await pool.query( `UPDATE tenders SET validation_failures = validation_failures + 1, last_validated = NOW() WHERE id = $1`, [tender.id] ); failed++; console.log(`❌ Validation failed (${currentFailures + 1}/3): ${tender.title.substring(0, 60)}`); } errors++; } } console.log(`\\n=== VALIDATION COMPLETE ===`); console.log(`Total checked: ${result.rows.length}`); console.log(`✅ Still valid: ${validated}`); console.log(`📦 Archived: ${archived}`); console.log(`❌ Temporary failures: ${failed}`); console.log(`⚠️ Errors: ${errors}`); console.log(`Completion time: ${new Date().toISOString()}`); // Print statistics const stats = await pool.query( `SELECT COUNT(*) FILTER (WHERE status = 'open') as open, COUNT(*) FILTER (WHERE status = 'closed' AND NOT archived) as closed, COUNT(*) FILTER (WHERE archived) as archived, COUNT(*) as total FROM tenders` ); console.log(`\\n=== DATABASE STATS ===`); console.log(`Total tenders: ${stats.rows[0].total}`); console.log(`Open: ${stats.rows[0].open}`); console.log(`Closed: ${stats.rows[0].closed}`); console.log(`Archived: ${stats.rows[0].archived}`); } catch (error) { console.error('Fatal error:', error); } finally { await pool.end(); } } validateAndArchive();