Files
tenderpilot/reclassify-sectors.js

65 lines
1.9 KiB
JavaScript

import { classifySector } from './scrapers/classify-sector.js';
import pg from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new pg.Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot'
});
async function reclassifyAllSectors() {
const client = await pool.connect();
try {
console.log('[INFO] Starting reclassification of all tenders...');
// Fetch all tenders
const result = await client.query(
'SELECT id, title, description, authority_name FROM tenders ORDER BY id'
);
const tenders = result.rows;
console.log(`[INFO] Found ${tenders.length} tenders to reclassify`);
let updated = 0;
let errors = 0;
let sectors = {};
for (const tender of tenders) {
try {
const sector = classifySector(tender.title || '', tender.description || '', tender.authority_name || '');
await client.query(
'UPDATE tenders SET sector = $1 WHERE id = $2',
[sector, tender.id]
);
sectors[sector] = (sectors[sector] || 0) + 1;
updated++;
if (updated % 100 === 0) {
console.log(`[INFO] Updated ${updated} tenders...`);
}
} catch (e) {
errors++;
console.error(`[ERROR] Failed to update tender ${tender.id}: ${e.message}`);
}
}
console.log('\n[INFO] Reclassification complete:');
console.log(` Total updated: ${updated}`);
console.log(` Errors: ${errors}`);
console.log('\n[INFO] Sector distribution:');
Object.entries(sectors).sort((a, b) => b[1] - a[1]).forEach(([sector, count]) => {
console.log(` ${sector}: ${count}`);
});
} catch (error) {
console.error('[ERROR] Fatal error:', error.message);
} finally {
client.release();
await pool.end();
}
}
reclassifyAllSectors();