Files
tenderpilot/scrapers/update-existing-sectors.js

57 lines
1.6 KiB
JavaScript

import { classifySector } from './classify-sector.js';
import pg from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new pg.Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot'
});
async function updateExistingSectors() {
const client = await pool.connect();
try {
console.log('[INFO] Starting reclassification of existing tenders...');
// Fetch all tenders that need sector classification
const result = await client.query(
'SELECT id, title, description, authority_name FROM tenders WHERE sector IS NULL OR sector = ? ORDER BY id LIMIT 10000'
);
const tenders = result.rows;
console.log(`[INFO] Found ${tenders.length} tenders to reclassify`);
let updated = 0;
let errors = 0;
for (const tender of tenders) {
try {
const sector = classifySector(tender.title || '', tender.description || '', tender.authority_name || '');
await client.query(
'UPDATE tenders SET sector = $1 WHERE id = $2',
[sector, tender.id]
);
updated++;
if (updated % 100 === 0) {
console.log(`[INFO] Updated ${updated} tenders...`);
}
} catch (e) {
errors++;
console.error(`[ERROR] Failed to update tender ${tender.id}: ${e.message}`);
}
}
console.log(`[INFO] Reclassification complete: ${updated} updated, ${errors} errors`);
} catch (error) {
console.error('[ERROR] Fatal error:', error.message);
} finally {
client.release();
await pool.end();
}
}
updateExistingSectors();