145 lines
4.6 KiB
JavaScript
145 lines
4.6 KiB
JavaScript
/**
|
|
* Shared sector classification module for TenderRadar scrapers
|
|
* Exports a classifySector function that categorizes tenders into 9 sectors
|
|
*/
|
|
|
|
/**
|
|
* Classify a tender into one of 9 sectors based on title, description, and authority
|
|
* Uses regex patterns in priority order; first match wins
|
|
*
|
|
* @param {string} title - Tender title
|
|
* @param {string} description - Tender description
|
|
* @param {string} authorityName - Procuring authority name
|
|
* @returns {string} One of: Health, Education, Construction, IT & Technology,
|
|
* Transport, Defence, Energy, Government, Other
|
|
*/
|
|
export function classifySector(title, description, authorityName) {
|
|
// Combine all text and normalize to lowercase for case-insensitive matching
|
|
const combined = `${title || ''} ${description || ''} ${authorityName || ''}`.toLowerCase();
|
|
|
|
// 1. Health
|
|
if (/\bnhs\b|hospital|clinical|pharmac|medical|health\s*(care|service)|maternity|mental\s*health|dental|ambulance|patient|surgery|pathology|\bward\b/.test(combined)) {
|
|
return 'Health';
|
|
}
|
|
|
|
// 2. Education
|
|
if (/\bschool|universit|college|educat|academ|learning|pupil|student|teaching/.test(combined)) {
|
|
return 'Education';
|
|
}
|
|
|
|
// 3. Construction
|
|
if (/\bconstruct|demoliti|renovati|building\s*(work|maint|repair)|roofing|plumbing|electrical\s*install|painting\s*(and|&)\s*decorat|repair\s*(of|work)|refurbish|scaffolding|paving|groundwork/.test(combined)) {
|
|
return 'Construction';
|
|
}
|
|
|
|
// 4. IT & Technology
|
|
if (/\bsoftware|\b(it|ict)\s+(service|system|support|infra)|digital\s*(platform|service|transform)|cyber|cloud\s*(comput|service|hosting)|network\s*infra|data\s*(centre|center|analy|manage)/.test(combined)) {
|
|
return 'IT & Technology';
|
|
}
|
|
|
|
// 5. Transport
|
|
if (/\btransport|vehicle|fleet\s*(manage|maint)|highway|railway|bus\s*(service|route)|traffic|parking/.test(combined)) {
|
|
return 'Transport';
|
|
}
|
|
|
|
// 6. Defence
|
|
if (/\bdefence|defense|military|\bmod\b|armed\s*force|navy|royal\s*air/.test(combined)) {
|
|
return 'Defence';
|
|
}
|
|
|
|
// 7. Energy
|
|
if (/\benergy\s*(supply|effic|manage)|electricity|solar|renewable|power\s*generat|gas\s*supply|wind\s*(farm|turbin)/.test(combined)) {
|
|
return 'Energy';
|
|
}
|
|
|
|
// 8. Government - only if nothing else matched
|
|
if (/\bcouncil|government|civic|municipal|parliament|local\s*authorit/.test(combined)) {
|
|
return 'Government';
|
|
}
|
|
|
|
// 9. Other - default fallback
|
|
return 'Other';
|
|
}
|
|
|
|
// Self-test when run directly
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
console.log('Running self-tests...\n');
|
|
|
|
const testCases = [
|
|
{
|
|
title: 'NHS Hospital Supplies',
|
|
description: 'Medical equipment for clinical use',
|
|
authority: 'NHS England',
|
|
expected: 'Health'
|
|
},
|
|
{
|
|
title: 'School Building Construction',
|
|
description: 'New educational facility',
|
|
authority: 'Local Education Authority',
|
|
expected: 'Education'
|
|
},
|
|
{
|
|
title: 'Roofing and Painting Services',
|
|
description: 'Building renovation and repairs',
|
|
authority: 'City Council',
|
|
expected: 'Construction'
|
|
},
|
|
{
|
|
title: 'Software Development Services',
|
|
description: 'IT system and cloud hosting',
|
|
authority: 'Government IT Department',
|
|
expected: 'IT & Technology'
|
|
},
|
|
{
|
|
title: 'Public Transport Fleet Maintenance',
|
|
description: 'Vehicle servicing and support',
|
|
authority: 'Transport Department',
|
|
expected: 'Transport'
|
|
},
|
|
{
|
|
title: 'Military Equipment Supply',
|
|
description: 'Defence and armed forces supplies',
|
|
authority: 'Ministry of Defence',
|
|
expected: 'Defence'
|
|
},
|
|
{
|
|
title: 'Renewable Energy Installation',
|
|
description: 'Solar power and wind turbine project',
|
|
authority: 'Energy Commission',
|
|
expected: 'Energy'
|
|
},
|
|
{
|
|
title: 'Council Office Supplies',
|
|
description: 'General supplies for local government',
|
|
authority: 'City Council',
|
|
expected: 'Government'
|
|
},
|
|
{
|
|
title: 'Generic Office Supplies',
|
|
description: 'Standard stationery and equipment',
|
|
authority: 'Random Organization',
|
|
expected: 'Other'
|
|
}
|
|
];
|
|
|
|
let passed = 0;
|
|
let failed = 0;
|
|
|
|
testCases.forEach((test, index) => {
|
|
const result = classifySector(test.title, test.description, test.authority);
|
|
const status = result === test.expected ? '✓' : '✗';
|
|
|
|
if (result === test.expected) {
|
|
passed++;
|
|
} else {
|
|
failed++;
|
|
}
|
|
|
|
console.log(`${status} Test ${index + 1}: "${test.title}"`);
|
|
console.log(` Expected: ${test.expected}, Got: ${result}\n`);
|
|
});
|
|
|
|
console.log(`\nResults: ${passed} passed, ${failed} failed`);
|
|
process.exit(failed > 0 ? 1 : 0);
|
|
}
|