/** * Shared sector classification module for TenderRadar scrapers * Exports a classifySector function that categorizes tenders into 9 sectors */ /** * Classify a tender into one of 9 sectors based on title, description, and authority * Uses regex patterns in priority order; first match wins * * @param {string} title - Tender title * @param {string} description - Tender description * @param {string} authorityName - Procuring authority name * @returns {string} One of: Health, Education, Construction, IT & Technology, * Transport, Defence, Energy, Government, Other */ export function classifySector(title, description, authorityName) { // Combine all text and normalize to lowercase for case-insensitive matching const combined = `${title || ''} ${description || ''} ${authorityName || ''}`.toLowerCase(); // 1. Health if (/\bnhs\b|hospital|clinical|pharmac|medical|health\s*(care|service)|maternity|mental\s*health|dental|ambulance|patient|surgery|pathology|\bward\b/.test(combined)) { return 'Health'; } // 2. Education if (/\bschool|universit|college|educat|academ|learning|pupil|student|teaching/.test(combined)) { return 'Education'; } // 3. Construction if (/\bconstruct|demoliti|renovati|building\s*(work|maint|repair)|roofing|plumbing|electrical\s*install|painting\s*(and|&)\s*decorat|repair\s*(of|work)|refurbish|scaffolding|paving|groundwork/.test(combined)) { return 'Construction'; } // 4. IT & Technology if (/\bsoftware|\b(it|ict)\s+(service|system|support|infra)|digital\s*(platform|service|transform)|cyber|cloud\s*(comput|service|hosting)|network\s*infra|data\s*(centre|center|analy|manage)/.test(combined)) { return 'IT & Technology'; } // 5. Transport if (/\btransport|vehicle|fleet\s*(manage|maint)|highway|railway|bus\s*(service|route)|traffic|parking/.test(combined)) { return 'Transport'; } // 6. Defence if (/\bdefence|defense|military|\bmod\b|armed\s*force|navy|royal\s*air/.test(combined)) { return 'Defence'; } // 7. Energy if (/\benergy\s*(supply|effic|manage)|electricity|solar|renewable|power\s*generat|gas\s*supply|wind\s*(farm|turbin)/.test(combined)) { return 'Energy'; } // 8. Government - only if nothing else matched if (/\bcouncil|government|civic|municipal|parliament|local\s*authorit/.test(combined)) { return 'Government'; } // 9. Other - default fallback return 'Other'; } // Self-test when run directly if (import.meta.url === `file://${process.argv[1]}`) { console.log('Running self-tests...\n'); const testCases = [ { title: 'NHS Hospital Supplies', description: 'Medical equipment for clinical use', authority: 'NHS England', expected: 'Health' }, { title: 'School Building Construction', description: 'New educational facility', authority: 'Local Education Authority', expected: 'Education' }, { title: 'Roofing and Painting Services', description: 'Building renovation and repairs', authority: 'City Council', expected: 'Construction' }, { title: 'Software Development Services', description: 'IT system and cloud hosting', authority: 'Government IT Department', expected: 'IT & Technology' }, { title: 'Public Transport Fleet Maintenance', description: 'Vehicle servicing and support', authority: 'Transport Department', expected: 'Transport' }, { title: 'Military Equipment Supply', description: 'Defence and armed forces supplies', authority: 'Ministry of Defence', expected: 'Defence' }, { title: 'Renewable Energy Installation', description: 'Solar power and wind turbine project', authority: 'Energy Commission', expected: 'Energy' }, { title: 'Council Office Supplies', description: 'General supplies for local government', authority: 'City Council', expected: 'Government' }, { title: 'Generic Office Supplies', description: 'Standard stationery and equipment', authority: 'Random Organization', expected: 'Other' } ]; let passed = 0; let failed = 0; testCases.forEach((test, index) => { const result = classifySector(test.title, test.description, test.authority); const status = result === test.expected ? '✓' : '✗'; if (result === test.expected) { passed++; } else { failed++; } console.log(`${status} Test ${index + 1}: "${test.title}"`); console.log(` Expected: ${test.expected}, Got: ${result}\n`); }); console.log(`\nResults: ${passed} passed, ${failed} failed`); process.exit(failed > 0 ? 1 : 0); }