feat: add tender URL validation cleanup

- Created cleanup-invalid-tenders.mjs script to validate tender URLs
- Detects removed tenders via redirect to /syserror/notfound
- Marks invalid tenders as closed in database
- Initial run found 277/626 tenders (~44%) already removed from sources
- Contracts Finder has highest removal rate (tenders removed before deadline)
- Add comprehensive documentation in TENDER_CLEANUP_SUMMARY.md
This commit is contained in:
Peter Foster
2026-02-15 14:15:59 +00:00
parent 5009cd64b3
commit 0153da89c5
2 changed files with 169 additions and 0 deletions

View File

@@ -0,0 +1,85 @@
import pg from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new pg.Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot'
});
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
async function cleanupInvalidTenders() {
try {
console.log(`[${new Date().toISOString()}] Starting tender URL validation cleanup...`);
// Get all open tenders with URLs
const result = await pool.query(`
SELECT id, title, notice_url, source
FROM tenders
WHERE status = 'open'
AND notice_url IS NOT NULL
AND notice_url != ''
ORDER BY created_at DESC
LIMIT 100
`);
console.log(`Found ${result.rows.length} tenders to check\n`);
let checked = 0;
let removed = 0;
let errors = 0;
for (const tender of result.rows) {
checked++;
try {
const response = await fetch(tender.notice_url, {
method: 'HEAD',
redirect: 'follow',
signal: AbortSignal.timeout(10000)
});
const status = response.status;
// Check for 404 or redirect to error page
if (status === 404 || response.url.includes('/syserror/') || response.url.includes('/notfound')) {
console.log(` [${checked}/${result.rows.length}] REMOVING: ${tender.title.substring(0, 60)}`);
console.log(` URL: ${tender.notice_url}`);
console.log(` Status: ${status}, Final URL: ${response.url}`);
await pool.query(
'UPDATE tenders SET status = $1 WHERE id = $2',
['closed', tender.id]
);
removed++;
} else if (status >= 400) {
console.log(` [${checked}/${result.rows.length}] ERROR ${status}: ${tender.title.substring(0, 60)}`);
errors++;
}
// Be nice to servers
await delay(500);
} catch (error) {
console.log(` [${checked}/${result.rows.length}] FETCH ERROR: ${tender.title.substring(0, 60)}`);
console.log(` ${error.message}`);
errors++;
}
}
console.log(`\nCleanup complete:`);
console.log(` Checked: ${checked}`);
console.log(` Removed: ${removed}`);
console.log(` Errors: ${errors}`);
console.log(` Still valid: ${checked - removed - errors}`);
} catch (error) {
console.error('Cleanup failed:', error);
} finally {
await pool.end();
}
}
cleanupInvalidTenders();