Add sector classification module, integrate into all 7 scrapers, fix CF pagination
This commit is contained in:
64
reclassify-sectors.js
Normal file
64
reclassify-sectors.js
Normal file
@@ -0,0 +1,64 @@
|
||||
import { classifySector } from './scrapers/classify-sector.js';
|
||||
import pg from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const pool = new pg.Pool({
|
||||
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot'
|
||||
});
|
||||
|
||||
async function reclassifyAllSectors() {
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
console.log('[INFO] Starting reclassification of all tenders...');
|
||||
|
||||
// Fetch all tenders
|
||||
const result = await client.query(
|
||||
'SELECT id, title, description, authority_name FROM tenders ORDER BY id'
|
||||
);
|
||||
|
||||
const tenders = result.rows;
|
||||
console.log(`[INFO] Found ${tenders.length} tenders to reclassify`);
|
||||
|
||||
let updated = 0;
|
||||
let errors = 0;
|
||||
let sectors = {};
|
||||
|
||||
for (const tender of tenders) {
|
||||
try {
|
||||
const sector = classifySector(tender.title || '', tender.description || '', tender.authority_name || '');
|
||||
|
||||
await client.query(
|
||||
'UPDATE tenders SET sector = $1 WHERE id = $2',
|
||||
[sector, tender.id]
|
||||
);
|
||||
|
||||
sectors[sector] = (sectors[sector] || 0) + 1;
|
||||
updated++;
|
||||
|
||||
if (updated % 100 === 0) {
|
||||
console.log(`[INFO] Updated ${updated} tenders...`);
|
||||
}
|
||||
} catch (e) {
|
||||
errors++;
|
||||
console.error(`[ERROR] Failed to update tender ${tender.id}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n[INFO] Reclassification complete:');
|
||||
console.log(` Total updated: ${updated}`);
|
||||
console.log(` Errors: ${errors}`);
|
||||
console.log('\n[INFO] Sector distribution:');
|
||||
Object.entries(sectors).sort((a, b) => b[1] - a[1]).forEach(([sector, count]) => {
|
||||
console.log(` ${sector}: ${count}`);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[ERROR] Fatal error:', error.message);
|
||||
} finally {
|
||||
client.release();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
reclassifyAllSectors();
|
||||
Reference in New Issue
Block a user