diff --git a/debug-ted.mjs b/debug-ted.mjs new file mode 100644 index 0000000..50b110d --- /dev/null +++ b/debug-ted.mjs @@ -0,0 +1,62 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0'; +const response = await axios.get(url, { + timeout: 30000, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } +}); + +const $ = cheerio.load(response.data); + +console.log('=== Searching for data-testid attributes ==='); +let count = 0; +$('[data-testid]').each((i, el) => { + if (count < 10) { + console.log(`${count}: data-testid="${$(el).attr('data-testid')}" tag=${el.name}`); + count++; + } +}); + +console.log('\n=== Looking for href containing Notice ==='); +count = 0; +$('a[href*="Notice"]').each((i, el) => { + if (count < 5) { + const text = $(el).text().substring(0, 80); + const href = $(el).attr('href').substring(0, 100); + console.log(`${count}: ${href} - text: ${text}`); + count++; + } +}); + +console.log('\n=== Looking for main content div ==='); +$('main').each((i, el) => { + if (i < 3) { + const html = $(el).html().substring(0, 1000); + console.log(`main[${i}]: ${html}`); + } +}); + +console.log('\n=== All a tags (first 20 with long href) ==='); +count = 0; +$('a').each((i, el) => { + if (count < 20) { + const href = $(el).attr('href') || ''; + if (href.includes('notice') || href.includes('Notice') || href.includes('OP-') || href.length > 50) { + console.log(`${count}: ${href.substring(0, 150)}`); + count++; + } + } +}); + +console.log('\n=== Looking for articles/items ==='); +count = 0; +$('article, [role="article"], .item, .result, .row').each((i, el) => { + if (count < 10) { + const text = $(el).text().substring(0, 100); + console.log(`${count}: ${text}`); + count++; + } +}); diff --git a/debug-ted2.mjs b/debug-ted2.mjs new file mode 100644 index 0000000..6120ba2 --- /dev/null +++ b/debug-ted2.mjs @@ -0,0 +1,62 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0'; +const response = await axios.get(url, { + timeout: 30000, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } +}); + +const $ = cheerio.load(response.data); + +console.log('Total page length:', response.data.length); + +// Look for JSON in script tags +console.log('\n=== Script tags ==='); +$('script').each((i, el) => { + const text = $(el).text(); + if (text.includes('notice') || text.includes('GB') || text.includes('data') || text.includes('result')) { + console.log(`Script ${i} length: ${text.length} chars`); + console.log(text.substring(0, 500)); + console.log('...'); + } +}); + +// Look for window.__data or similar +const bodyText = response.data; +if (bodyText.includes('window.')) { + console.log('\n=== Window assignments ==='); + const matches = bodyText.match(/window\.[a-zA-Z_]+\s*=/g); + if (matches) { + console.log(matches.slice(0, 10)); + } +} + +// Look for API calls or data in comments +if (bodyText.includes('API') || bodyText.includes('api')) { + console.log('\n=== Found API references ==='); + const apiMatch = bodyText.match(/(?:https?:)?\/\/[^"'<>\s]+api[^"'<>\s]*/gi); + if (apiMatch) { + console.log(apiMatch.slice(0, 10)); + } +} + +// Check for specific data patterns +console.log('\n=== Looking for specific patterns ==='); +if (bodyText.includes('/api/')) { + console.log('Found /api/ endpoint'); + const apiMatches = bodyText.match(/\/api\/[^"'\s<>]+/g); + if (apiMatches) { + console.log('Unique APIs:', [...new Set(apiMatches)].slice(0, 10)); + } +} + +// Look at the actual HTML structure around content +console.log('\n=== Checking for HTML with notice data ==='); +const htmlMatch = bodyText.match(/notice[^<]{0,200} { + try { + console.log('Fetching search page...'); + const resp = await axios.get(searchUrl, { timeout: 10000 }); + const $ = cheerio.load(resp.data); + + console.log('=== PAGE STRUCTURE ==='); + console.log('Title:', $('title').text()); + + console.log('\n=== FORMS ==='); + $('form').each((i, el) => { + const action = $(el).attr('action'); + const method = $(el).attr('method'); + const id = $(el).attr('id'); + console.log(`Form ${i}: method=${method} action=${action} id=${id}`); + + // Look for inputs + $(el).find('input, select, textarea').each((j, inp) => { + const name = $(inp).attr('name'); + const type = $(inp).attr('type'); + const value = $(inp).attr('value'); + console.log(` Input: name=${name} type=${type} value=${value}`); + }); + }); + + console.log('\n=== TABLES/RESULTS ==='); + const tables = $('table'); + console.log('Found', tables.length, 'tables'); + + if (tables.length > 0) { + const firstTable = tables.eq(0); + console.log('\nFirst table rows:'); + firstTable.find('tr').slice(0, 3).each((i, row) => { + const cells = $(row).find('td, th'); + const text = cells.map((j, cell) => $(cell).text().trim().substring(0, 30)).get(); + console.log(` Row ${i}:`, text); + }); + } + + console.log('\n=== LINKS IN RESULTS ==='); + $('a[href*="view"], a[href*="notice"]').slice(0, 10).each((i, el) => { + const href = $(el).attr('href'); + const text = $(el).text().trim().substring(0, 50); + console.log(` ${text} => ${href}`); + }); + + } catch (e) { + console.error('Error:', e.message); + } +})(); diff --git a/etendersni-detail.mjs b/etendersni-detail.mjs new file mode 100644 index 0000000..b515114 --- /dev/null +++ b/etendersni-detail.mjs @@ -0,0 +1,62 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const client = axios.create({ + timeout: 8000, + maxRedirects: 5, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } +}); + +(async () => { + try { + // First, get the list page and extract entryIds + console.log('Fetching home page to find tenders...'); + const listResp = await client.get('https://etendersni.gov.uk/epps/home.do?status=open'); + const $ = cheerio.load(listResp.data); + + // Extract entryIds from links + const entryIds = new Set(); + const links = []; + $('a[href*="entryId"]').each((i, el) => { + const href = $(el).attr('href'); + const text = $(el).text().trim(); + const match = href.match(/entryId=(\d+)/); + if (match) { + entryIds.add(match[1]); + links.push({ id: match[1], text: text.substring(0, 60), href }); + } + }); + + console.log(`Found ${entryIds.size} tenders`); + console.log('\nFirst 3 tenders:'); + links.slice(0, 3).forEach(l => { + console.log(` ID: ${l.id}, Text: "${l.text}"`); + }); + + // Now try to view a detail page + if (links.length > 0) { + const firstId = links[0].id; + const detailUrl = `https://etendersni.gov.uk/epps/viewInfo.do?sec=newItems&entryId=${firstId}`; + console.log(`\nFetching detail page for ID ${firstId}...`); + + const detailResp = await client.get(detailUrl); + const d$ = cheerio.load(detailResp.data); + + console.log('Detail page structure:'); + console.log('Title:', d$('h1, h2, .title, [class*="title"]').first().text().trim().substring(0, 100)); + + // Look for tender details + d$('.card, .panel, [class*="detail"], [class*="info"]').each((i, el) => { + const content = d$(el).text().trim(); + if (content.length > 0 && content.length < 300) { + console.log(` ${content.substring(0, 80)}`); + } + }); + } + + } catch (e) { + console.log('Error:', e.message); + } +})(); diff --git a/etendersni-list.mjs b/etendersni-list.mjs new file mode 100644 index 0000000..75e4187 --- /dev/null +++ b/etendersni-list.mjs @@ -0,0 +1,55 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const baseUrl = 'https://etendersni.gov.uk'; + +// Try different potential URLs for the tenders list +const urls = [ + '/api/search', + '/epps/home.do', + '/epps/viewInfo.do?section=newItems', + '/epps/viewInfo.do?section=notices', +]; + +(async () => { + for (const path of urls) { + try { + const url = baseUrl + path; + console.log(`\n=== Trying ${path} ===`); + const resp = await axios.get(url, { timeout: 8000 }); + const $ = cheerio.load(resp.data); + + // Look for any links with entryId or that look like tender details + const tenderLinks = []; + $('a').each((i, el) => { + const href = $(el).attr('href'); + const text = $(el).text().trim(); + + if (href && ( + href.includes('entryId') || + href.includes('viewInfo') || + href.includes('viewNotice') || + href.includes('detail') || + href.includes('tender') || + text.match(/procurement|tender|notice|opportunity/i) + )) { + const fullUrl = href.startsWith('http') ? href : baseUrl + (href.startsWith('/') ? href : '/epps/' + href); + tenderLinks.push({ + text: text.substring(0, 50), + href: fullUrl + }); + } + }); + + if (tenderLinks.length > 0) { + console.log(`Found ${tenderLinks.length} potential tender links`); + tenderLinks.slice(0, 5).forEach(l => { + console.log(` "${l.text}" => ${l.href}`); + }); + } + + } catch (e) { + console.log(`Error: ${e.message.split('\n')[0]}`); + } + } +})(); diff --git a/etendersni-simple.mjs b/etendersni-simple.mjs new file mode 100644 index 0000000..8879a87 --- /dev/null +++ b/etendersni-simple.mjs @@ -0,0 +1,36 @@ +import axios from 'axios'; + +const client = axios.create({ + timeout: 5000, + maxRedirects: 5, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } +}); + +(async () => { + try { + // Try the home page with a search query parameter + const url = 'https://etendersni.gov.uk/epps/home.do?status=open'; + console.log('Fetching:', url); + const resp = await client.get(url); + console.log('Status:', resp.status); + console.log('URL:', resp.config.url); + + // Check if it has entry IDs + if (resp.data.includes('entryId')) { + console.log('✓ Found entryId in response'); + const matches = resp.data.match(/entryId=(\d+)/g); + if (matches) { + console.log(`Found ${matches.length} tenders:`); + matches.slice(0, 5).forEach(m => console.log(' ', m)); + } + } else { + console.log('No entryId found'); + console.log('Sample:', resp.data.substring(1000, 2000)); + } + + } catch (e) { + console.log('Error:', e.message); + } +})(); diff --git a/etendersni-test.mjs b/etendersni-test.mjs new file mode 100644 index 0000000..9879953 --- /dev/null +++ b/etendersni-test.mjs @@ -0,0 +1,54 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +// Test the eTendersNI site structure +const baseUrl = 'https://etendersni.gov.uk'; +const homeUrl = 'https://etendersni.gov.uk/epps/home.do'; + +(async () => { + try { + console.log('Fetching homepage...'); + const resp = await axios.get(homeUrl, { timeout: 10000, maxRedirects: 5 }); + const $ = cheerio.load(resp.data); + + console.log('\n=== EXTRACTING NAVIGATION LINKS ==='); + const navLinks = new Set(); + $('a').each((i, el) => { + const href = $(el).attr('href'); + const text = $(el).text().trim(); + if (href && (href.includes('search') || href.includes('list') || href.includes('notice') || href.includes('tender'))) { + // Make absolute URLs + const fullUrl = href.startsWith('http') ? href : baseUrl + (href.startsWith('/') ? href : '/epps/' + href); + navLinks.add(`${text.substring(0, 40)} => ${fullUrl}`); + } + }); + + if (navLinks.size > 0) { + console.log('Found search/list links:'); + navLinks.forEach(l => console.log(' ' + l)); + } else { + console.log('No search/list links found. Checking page structure...'); + console.log('Page title:', $('title').text()); + console.log('Meta description:', $('meta[name="description"]').attr('content')); + + // Try to find any forms + console.log('\nForms found:', $('form').length); + $('form').each((i, el) => { + const action = $(el).attr('action'); + const method = $(el).attr('method'); + console.log(` Form ${i}: ${method} ${action}`); + }); + + // Try to find main content areas + console.log('\nMain sections:'); + $('[id*="search"], [class*="search"], [id*="tender"], [class*="tender"]').each((i, el) => { + const id = $(el).attr('id'); + const cls = $(el).attr('class'); + if (id || cls) console.log(` ${id} ${cls}`); + }); + } + + } catch (e) { + console.error('Error:', e.message); + } +})(); diff --git a/etendersni-test2.mjs b/etendersni-test2.mjs new file mode 100644 index 0000000..748c170 --- /dev/null +++ b/etendersni-test2.mjs @@ -0,0 +1,49 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const homeUrl = 'https://etendersni.gov.uk/epps/home.do'; + +(async () => { + try { + const resp = await axios.get(homeUrl, { timeout: 10000 }); + const $ = cheerio.load(resp.data); + + console.log('=== EXPLORING DIVS AND STRUCTURE ===\n'); + + // Look at the call_for_tenders section + const callForTenders = $('#call_for_tenders_sum'); + console.log('Call for tenders section:'); + console.log(callForTenders.html().substring(0, 500)); + + console.log('\n=== ALL SCRIPTS ==='); + const scripts = $('script'); + scripts.each((i, el) => { + const src = $(el).attr('src'); + if (src) console.log('Script:', src); + }); + + console.log('\n=== LOOKING FOR API ENDPOINTS IN JS ==='); + // Look for any API calls or data attributes + $('[data-*]').each((i, el) => { + const attrs = el.attribs; + const dataAttrs = Object.entries(attrs).filter(([k]) => k.startsWith('data-')); + if (dataAttrs.length > 0) { + console.log(`Element ${el.name}: ${dataAttrs.map(([k,v]) => `${k}="${v}"`).join(', ')}`); + } + }); + + // Check for onclick handlers or href patterns + console.log('\n=== POTENTIAL SEARCH/BROWSE LINKS ==='); + $('a, button').each((i, el) => { + const onclick = $(el).attr('onclick'); + const href = $(el).attr('href'); + const text = $(el).text().trim().substring(0, 40); + if (text.match(/tender|notice|search|browse|opportunity|list/i)) { + console.log(`Text: "${text}" onclick: ${onclick} href: ${href}`); + } + }); + + } catch (e) { + console.error('Error:', e.message); + } +})(); diff --git a/etendersni-test3.mjs b/etendersni-test3.mjs new file mode 100644 index 0000000..318e4ea --- /dev/null +++ b/etendersni-test3.mjs @@ -0,0 +1,33 @@ +import axios from 'axios'; + +const urls = [ + 'https://etendersni.gov.uk/epps/api/search', + 'https://etendersni.gov.uk/epps/api/tenders', + 'https://etendersni.gov.uk/api/search', + 'https://etendersni.gov.uk/epps/export.json', + 'https://etendersni.gov.uk/epps/search.do', + 'https://etendersni.gov.uk/epps/searchTender.do', + 'https://etendersni.gov.uk/epps/tenderNoticesSearch.do', + 'https://etendersni.gov.uk/epps/viewNotices.do', +]; + +(async () => { + for (const url of urls) { + try { + const resp = await axios.get(url, { timeout: 5000, maxRedirects: 2 }); + if (resp.status === 200) { + console.log(`✓ ${url}`); + console.log(` Content-Type: ${resp.headers['content-type']}`); + console.log(` Length: ${resp.data.length}`); + if (resp.headers['content-type']?.includes('json')) { + const data = resp.data; + console.log(` Preview: ${JSON.stringify(data).substring(0, 200)}`); + } + } + } catch (e) { + if (e.response?.status !== 404) { + console.log(`✗ ${url} - ${e.message.split('\n')[0]}`); + } + } + } +})(); diff --git a/reclassify-sectors.js b/reclassify-sectors.js new file mode 100644 index 0000000..8cd89d2 --- /dev/null +++ b/reclassify-sectors.js @@ -0,0 +1,64 @@ +import { classifySector } from './scrapers/classify-sector.js'; +import pg from 'pg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot' +}); + +async function reclassifyAllSectors() { + const client = await pool.connect(); + try { + console.log('[INFO] Starting reclassification of all tenders...'); + + // Fetch all tenders + const result = await client.query( + 'SELECT id, title, description, authority_name FROM tenders ORDER BY id' + ); + + const tenders = result.rows; + console.log(`[INFO] Found ${tenders.length} tenders to reclassify`); + + let updated = 0; + let errors = 0; + let sectors = {}; + + for (const tender of tenders) { + try { + const sector = classifySector(tender.title || '', tender.description || '', tender.authority_name || ''); + + await client.query( + 'UPDATE tenders SET sector = $1 WHERE id = $2', + [sector, tender.id] + ); + + sectors[sector] = (sectors[sector] || 0) + 1; + updated++; + + if (updated % 100 === 0) { + console.log(`[INFO] Updated ${updated} tenders...`); + } + } catch (e) { + errors++; + console.error(`[ERROR] Failed to update tender ${tender.id}: ${e.message}`); + } + } + + console.log('\n[INFO] Reclassification complete:'); + console.log(` Total updated: ${updated}`); + console.log(` Errors: ${errors}`); + console.log('\n[INFO] Sector distribution:'); + Object.entries(sectors).sort((a, b) => b[1] - a[1]).forEach(([sector, count]) => { + console.log(` ${sector}: ${count}`); + }); + } catch (error) { + console.error('[ERROR] Fatal error:', error.message); + } finally { + client.release(); + await pool.end(); + } +} + +reclassifyAllSectors(); diff --git a/scrapers/classify-sector.js b/scrapers/classify-sector.js new file mode 100644 index 0000000..60b1204 --- /dev/null +++ b/scrapers/classify-sector.js @@ -0,0 +1,144 @@ +/** + * Shared sector classification module for TenderRadar scrapers + * Exports a classifySector function that categorizes tenders into 9 sectors + */ + +/** + * Classify a tender into one of 9 sectors based on title, description, and authority + * Uses regex patterns in priority order; first match wins + * + * @param {string} title - Tender title + * @param {string} description - Tender description + * @param {string} authorityName - Procuring authority name + * @returns {string} One of: Health, Education, Construction, IT & Technology, + * Transport, Defence, Energy, Government, Other + */ +export function classifySector(title, description, authorityName) { + // Combine all text and normalize to lowercase for case-insensitive matching + const combined = `${title || ''} ${description || ''} ${authorityName || ''}`.toLowerCase(); + + // 1. Health + if (/\bnhs\b|hospital|clinical|pharmac|medical|health\s*(care|service)|maternity|mental\s*health|dental|ambulance|patient|surgery|pathology|\bward\b/.test(combined)) { + return 'Health'; + } + + // 2. Education + if (/\bschool|universit|college|educat|academ|learning|pupil|student|teaching/.test(combined)) { + return 'Education'; + } + + // 3. Construction + if (/\bconstruct|demoliti|renovati|building\s*(work|maint|repair)|roofing|plumbing|electrical\s*install|painting\s*(and|&)\s*decorat|repair\s*(of|work)|refurbish|scaffolding|paving|groundwork/.test(combined)) { + return 'Construction'; + } + + // 4. IT & Technology + if (/\bsoftware|\b(it|ict)\s+(service|system|support|infra)|digital\s*(platform|service|transform)|cyber|cloud\s*(comput|service|hosting)|network\s*infra|data\s*(centre|center|analy|manage)/.test(combined)) { + return 'IT & Technology'; + } + + // 5. Transport + if (/\btransport|vehicle|fleet\s*(manage|maint)|highway|railway|bus\s*(service|route)|traffic|parking/.test(combined)) { + return 'Transport'; + } + + // 6. Defence + if (/\bdefence|defense|military|\bmod\b|armed\s*force|navy|royal\s*air/.test(combined)) { + return 'Defence'; + } + + // 7. Energy + if (/\benergy\s*(supply|effic|manage)|electricity|solar|renewable|power\s*generat|gas\s*supply|wind\s*(farm|turbin)/.test(combined)) { + return 'Energy'; + } + + // 8. Government - only if nothing else matched + if (/\bcouncil|government|civic|municipal|parliament|local\s*authorit/.test(combined)) { + return 'Government'; + } + + // 9. Other - default fallback + return 'Other'; +} + +// Self-test when run directly +if (import.meta.url === `file://${process.argv[1]}`) { + console.log('Running self-tests...\n'); + + const testCases = [ + { + title: 'NHS Hospital Supplies', + description: 'Medical equipment for clinical use', + authority: 'NHS England', + expected: 'Health' + }, + { + title: 'School Building Construction', + description: 'New educational facility', + authority: 'Local Education Authority', + expected: 'Education' + }, + { + title: 'Roofing and Painting Services', + description: 'Building renovation and repairs', + authority: 'City Council', + expected: 'Construction' + }, + { + title: 'Software Development Services', + description: 'IT system and cloud hosting', + authority: 'Government IT Department', + expected: 'IT & Technology' + }, + { + title: 'Public Transport Fleet Maintenance', + description: 'Vehicle servicing and support', + authority: 'Transport Department', + expected: 'Transport' + }, + { + title: 'Military Equipment Supply', + description: 'Defence and armed forces supplies', + authority: 'Ministry of Defence', + expected: 'Defence' + }, + { + title: 'Renewable Energy Installation', + description: 'Solar power and wind turbine project', + authority: 'Energy Commission', + expected: 'Energy' + }, + { + title: 'Council Office Supplies', + description: 'General supplies for local government', + authority: 'City Council', + expected: 'Government' + }, + { + title: 'Generic Office Supplies', + description: 'Standard stationery and equipment', + authority: 'Random Organization', + expected: 'Other' + } + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((test, index) => { + const result = classifySector(test.title, test.description, test.authority); + const status = result === test.expected ? '✓' : '✗'; + + if (result === test.expected) { + passed++; + } else { + failed++; + } + + console.log(`${status} Test ${index + 1}: "${test.title}"`); + console.log(` Expected: ${test.expected}, Got: ${result}\n`); + }); + + console.log(`\nResults: ${passed} passed, ${failed} failed`); + process.exit(failed > 0 ? 1 : 0); +} diff --git a/scrapers/contracts-finder.js b/scrapers/contracts-finder.js index 81f5d50..c5bef75 100755 --- a/scrapers/contracts-finder.js +++ b/scrapers/contracts-finder.js @@ -1,4 +1,5 @@ import axios from 'axios'; +import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; @@ -8,92 +9,127 @@ const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' }); +async function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + async function scrapeTenders() { try { console.log(`[${new Date().toISOString()}] Starting tender scrape...`); - // Get date from 30 days ago + // Get date from 90 days ago const fromDate = new Date(); - fromDate.setDate(fromDate.getDate() - 30); + fromDate.setDate(fromDate.getDate() - 90); const dateStr = fromDate.toISOString().split('T')[0]; - const url = `https://www.contractsfinder.service.gov.uk/Published/Notices/OCDS/Search?stage=tender&output=json&publishedFrom=${dateStr}`; + const baseUrl = `https://www.contractsfinder.service.gov.uk/Published/Notices/OCDS/Search?stage=tender&output=json&publishedFrom=${dateStr}`; - console.log(`Fetching from: ${url}`); - const response = await axios.get(url, { timeout: 30000 }); - - const data = response.data; - const releases = data.releases || []; - - console.log(`Found ${releases.length} tenders`); + console.log(`Base URL: ${baseUrl}`); let insertedCount = 0; + let totalProcessed = 0; + let pageNum = 1; + let hasNextPage = true; + let nextPageUrl = baseUrl; - for (const release of releases) { + while (hasNextPage) { try { - const tender = release.tender || {}; - const planning = release.planning || {}; - const parties = release.parties || []; + console.log(`\nFetching page ${pageNum}...`); + const response = await axios.get(nextPageUrl, { timeout: 30000 }); - // Find procuring entity - const procurer = parties.find(p => p.roles && (p.roles.includes('buyer') || p.roles.includes('procuringEntity') || p.roles.includes('procurer'))) || (release.buyer ? release.buyer : null); + const data = response.data; + const releases = data.releases || []; - const sourceId = release.ocid || release.id; - const title = tender.title || 'Untitled'; - const description = tender.description || ''; - const publishedDate = release.date; - const deadline = tender.tenderPeriod?.endDate; - const authority = procurer?.name || 'Unknown'; - const location = planning?.budget?.description || tender.procurementMethod || ''; - const noticeUrl = release.url || (sourceId ? 'https://www.contractsfinder.service.gov.uk/Notice/' + sourceId.replace('ocds-b5fd17-', '') : ''); - const documentsUrl = tender.documents?.length > 0 ? tender.documents[0].url : ''; + for (const release of releases) { + try { + const tender = release.tender || {}; + const planning = release.planning || {}; + const parties = release.parties || []; + + // Find procuring entity + const procurer = parties.find(p => p.roles && (p.roles.includes('buyer') || p.roles.includes('procuringEntity') || p.roles.includes('procurer'))) || (release.buyer ? release.buyer : null); + + const sourceId = release.ocid || release.id; + const title = tender.title || 'Untitled'; + const description = tender.description || ''; + const publishedDate = release.date; + const deadline = tender.tenderPeriod?.endDate; - // Extract value - let valueLow = null, valueHigh = null; - if (planning?.budget?.amount?.amount) { - valueLow = planning.budget.amount.amount; - valueHigh = planning.budget.amount.amount; - } else if (tender.value?.amount) { - valueLow = tender.value.amount; - valueHigh = tender.value.amount; + // Skip expired tenders + if (deadline && new Date(deadline) < new Date()) continue; + const authority = procurer?.name || 'Unknown'; + const location = planning?.budget?.description || tender.procurementMethod || ''; + const noticeUrl = release.url || (sourceId ? 'https://www.contractsfinder.service.gov.uk/notice/' + sourceId.replace('ocds-b5fd17-', '') : ''); + const documentsUrl = tender.documents?.length > 0 ? tender.documents[0].url : ''; + + // Extract value + let valueLow = null, valueHigh = null; + if (planning?.budget?.amount?.amount) { + valueLow = planning.budget.amount.amount; + valueHigh = planning.budget.amount.amount; + } else if (tender.value?.amount) { + valueLow = tender.value.amount; + valueHigh = tender.value.amount; + } + + const cpvCodes = tender.classification ? [tender.classification.scheme] : []; + + const result = await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING`, + [ + 'contracts_finder', + sourceId, + title.substring(0, 500), + description, + description.substring(0, 500), + cpvCodes, + valueLow, + valueHigh, + 'GBP', + publishedDate, + deadline, + authority, + 'government', + location.substring(0, 255), + documentsUrl, + noticeUrl, + 'open', + classifySector(title, description, authority) + ] + ); + if (result.rowCount > 0) { + insertedCount++; + } + totalProcessed++; + } catch (e) { + console.error('Error inserting tender:', e.message); + } } - const cpvCodes = tender.classification ? [tender.classification.scheme] : []; + console.log(`Page ${pageNum}: fetched ${releases.length} tenders (total: ${totalProcessed})`); - await pool.query( - `INSERT INTO tenders ( - source, source_id, title, description, summary, cpv_codes, - value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) - ON CONFLICT (source_id) DO NOTHING`, - [ - 'contracts_finder', - sourceId, - title.substring(0, 500), - description, - description.substring(0, 500), - cpvCodes, - valueLow, - valueHigh, - 'GBP', - publishedDate, - deadline, - authority, - 'government', - location.substring(0, 255), - documentsUrl, - noticeUrl, - 'open' - ] - ); - insertedCount++; - } catch (e) { - console.error('Error inserting tender:', e.message); + // Check for next page + if (data.links && data.links.next) { + nextPageUrl = data.links.next; + hasNextPage = true; + pageNum++; + // Add 1 second delay between pages to avoid rate limiting + await sleep(1000); + } else { + hasNextPage = false; + } + } catch (error) { + console.error(`Error fetching page ${pageNum}:`, error.message); + hasNextPage = false; } } - console.log(`[${new Date().toISOString()}] Scrape complete. Inserted/updated ${insertedCount} tenders`); + console.log(`\n[${new Date().toISOString()}] Scrape complete. Inserted ${insertedCount} new tenders (total processed: ${totalProcessed})`); } catch (error) { console.error('Error scraping tenders:', error.message); } finally { diff --git a/scrapers/digital-marketplace.js b/scrapers/digital-marketplace.js new file mode 100644 index 0000000..d9f40fa --- /dev/null +++ b/scrapers/digital-marketplace.js @@ -0,0 +1,284 @@ +import axios from 'axios'; +import { classifySector } from './classify-sector.js'; +import pg from 'pg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' +}); + +async function scrapeTenders() { + try { + console.log(`[${new Date().toISOString()}] Starting Digital Marketplace tender scrape...`); + + let insertedCount = 0; + + // Try to scrape from DOS endpoint + try { + insertedCount += await scrapeFromDOSEndpoint(); + } catch (e) { + console.error('Error scraping DOS endpoint:', e.message); + } + + // Try alternative endpoint (if available) + if (insertedCount === 0) { + try { + insertedCount += await scrapeFromGCloudEndpoint(); + } catch (e) { + console.error('Error scraping G-Cloud endpoint:', e.message); + } + } + + console.log(`[${new Date().toISOString()}] Scrape complete. Inserted ${insertedCount} tenders`); + process.exit(0); + } catch (error) { + console.error('Fatal error in scraper:', error.message); + process.exit(1); + } finally { + try { + await pool.end(); + } catch (e) { + // ignore + } + } +} + +async function scrapeFromDOSEndpoint() { + let inserted = 0; + const pageSize = 50; + let page = 1; + const maxPages = 20; + + console.log('Attempting to scrape Digital Outcomes & Specialists...'); + + for (page = 1; page <= maxPages; page++) { + try { + console.log(`Fetching DOS opportunities page ${page}...`); + + const url = 'https://api.digitalmarketplace.service.gov.uk/v0.1/opportunities'; + + let response; + try { + response = await axios.get(url, { + params: { + status: 'open', + page: page, + per_page: pageSize + }, + timeout: 8000, + headers: { + 'User-Agent': 'TenderRadar-Scraper/1.0', + 'Accept': 'application/json' + } + }); + } catch (axiosError) { + if (axiosError.code === 'ECONNABORTED' || axiosError.message.includes('timeout')) { + console.warn(`Timeout on page ${page} - API may be unavailable`); + break; + } + throw axiosError; + } + + const data = response.data; + const opportunities = Array.isArray(data) ? data : (data.opportunities || data.data || []); + + if (!opportunities || opportunities.length === 0) { + console.log('No more opportunities found'); + break; + } + + console.log(`Found ${opportunities.length} opportunities on page ${page}`); + + for (const opp of opportunities) { + try { + const count = await insertOpportunity(opp); + inserted += count; + } catch (e) { + console.error('Error inserting opportunity:', e.message); + } + } + + // Check if there are more pages + if (opportunities.length < pageSize) { + break; + } + + // Small delay between pages + await new Promise(resolve => setTimeout(resolve, 300)); + } catch (error) { + console.error(`Error on page ${page}:`, error.message); + // Try next page + } + } + + console.log(`DOS scraping complete, inserted ${inserted} records`); + return inserted; +} + +async function scrapeFromGCloudEndpoint() { + let inserted = 0; + + console.log('Attempting to scrape G-Cloud services...'); + + try { + const url = 'https://api.digitalmarketplace.service.gov.uk/v0.1/services'; + + const response = await axios.get(url, { + params: { + status: 'published', + page: 1, + per_page: 100 + }, + timeout: 8000 + }); + + const services = Array.isArray(response.data) ? response.data : (response.data.services || response.data.data || []); + + if (services && services.length > 0) { + console.log(`Found ${services.length} G-Cloud services`); + for (const service of services) { + try { + const count = await insertService(service); + inserted += count; + } catch (e) { + console.error('Error inserting service:', e.message); + } + } + } + } catch (e) { + console.warn('G-Cloud endpoint unavailable:', e.message); + } + + return inserted; +} + +async function insertOpportunity(opp) { + const oppId = opp.id || opp.ID || opp.opportunity_id; + if (!oppId) { + return 0; + } + + const sourceId = `dm-${oppId}`; + const title = (opp.title || opp.name || 'Untitled').substring(0, 500); + const description = (opp.description || opp.brief || '').substring(0, 5000); + const summary = (opp.summary || description).substring(0, 500); + + const publishedDate = opp.publishedAt || opp.published_at || opp.createdAt || new Date().toISOString(); + const deadline = opp.applicationsClosedAt || opp.closing_date || opp.deadline; + + const authorityName = (opp.organisation?.name || opp.buyer?.name || opp.organisationName || 'Digital Marketplace').substring(0, 255); + const location = (opp.location || opp.workingArrangements || 'UK').substring(0, 255); + + let valueLow = null, valueHigh = null; + if (opp.budgetRange) { + try { + const matches = String(opp.budgetRange).match(/[0-9,]+\.?[0-9]*/g); + if (matches && matches.length >= 2) { + valueLow = parseFloat(matches[0].replace(/,/g, '')); + valueHigh = parseFloat(matches[matches.length - 1].replace(/,/g, '')); + } + } catch (e) { + // ignore + } + } else if (opp.minBudget || opp.maxBudget) { + valueLow = opp.minBudget ? parseFloat(opp.minBudget) : null; + valueHigh = opp.maxBudget ? parseFloat(opp.maxBudget) : null; + } + + const noticeUrl = opp.link || opp.url || + `https://www.digitalmarketplace.service.gov.uk/digital-outcomes-and-specialists/opportunities/${oppId}`; + + const cpvCodes = opp.specialistRole ? [opp.specialistRole] : (opp.cpv_codes || []); + + try { + const result = await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING`, + [ + 'digital_marketplace', + sourceId, + title, + description, + summary, + cpvCodes, + valueLow, + valueHigh, + 'GBP', + publishedDate, + deadline, + authorityName, + 'government', + location, + '', + noticeUrl, + 'open', + classifySector(title, description, authorityName) + ] + ); + return result.rowCount || 0; + } catch (error) { + if (error.code === '23505') { + return 0; // Already exists + } + throw error; + } +} + +async function insertService(service) { + const serviceId = service.id || service.service_id; + if (!serviceId) { + return 0; + } + + const sourceId = `dm-gcloud-${serviceId}`; + const title = (service.serviceName || service.name || 'Untitled').substring(0, 500); + const description = (service.serviceDescription || service.description || '').substring(0, 5000); + const supplierName = (service.supplierName || 'Digital Marketplace').substring(0, 255); + + const noticeUrl = `https://www.digitalmarketplace.service.gov.uk/g-cloud/services/${serviceId}`; + + try { + const result = await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING`, + [ + 'digital_marketplace', + sourceId, + title, + description, + description.substring(0, 500), + [], + null, + null, + 'GBP', + service.createdAt || new Date().toISOString(), + null, + supplierName, + 'supplier', + 'UK', + '', + noticeUrl, + 'open', + classifySector(title, description, supplierName) + ] + ); + return result.rowCount || 0; + } catch (error) { + if (error.code === '23505') { + return 0; + } + throw error; + } +} + +scrapeTenders(); diff --git a/scrapers/etendersni.js b/scrapers/etendersni.js new file mode 100755 index 0000000..8dd61bc --- /dev/null +++ b/scrapers/etendersni.js @@ -0,0 +1,223 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; +import { classifySector } from './classify-sector.js'; +import pg from 'pg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' +}); + +const client = axios.create({ + timeout: 15000, + maxRedirects: 5, + headers: { + 'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator)' + } +}); + +function parseDate(dateStr) { + if (!dateStr || dateStr.trim() === '') return null; + try { + const date = new Date(dateStr); + if (isNaN(date.getTime())) return null; + return date.toISOString(); + } catch (e) { + return null; + } +} + +function cleanText(text) { + if (!text) return ''; + return text + .replace(/\s+/g, ' ') + .replace(/^\s+|\s+$/g, '') + .trim(); +} + +async function scrapePage(pageNum = 1) { + try { + // Fetch list page with pagination + const listUrl = `https://etendersni.gov.uk/epps/home.do?page=${pageNum}&status=open`; + + console.log(`[${new Date().toISOString()}] Fetching page ${pageNum}: ${listUrl}`); + const listResp = await client.get(listUrl); + const $ = cheerio.load(listResp.data); + + // Extract entryIds and titles from list + const tenders = []; + const processedIds = new Set(); + + $('a[href*="entryId"]').each((i, el) => { + const href = $(el).attr('href'); + const text = $(el).text().trim(); + + if (!href || !text) return; + + const match = href.match(/entryId=(\d+)/); + if (match) { + const id = match[1]; + if (!processedIds.has(id)) { + processedIds.add(id); + tenders.push({ + id, + titleSnippet: text.substring(0, 200), + detailUrl: href.startsWith('http') ? href : 'https://etendersni.gov.uk' + (href.startsWith('/') ? href : '/epps/' + href) + }); + } + } + }); + + console.log(`Found ${tenders.length} tenders on page ${pageNum}`); + + let insertedCount = 0; + + // Fetch detail page for each tender + for (const tender of tenders) { + try { + console.log(` Fetching tender ${tender.id}...`); + const detailResp = await client.get(tender.detailUrl); + const d$ = cheerio.load(detailResp.data); + + // Extract tender details from detail page + let title = tender.titleSnippet; + let description = ''; + let summary = ''; + let deadline = null; + let value = null; + let authority = 'Unknown'; + let location = 'Northern Ireland'; + let documentsUrl = ''; + let cpvCodes = []; + + // Try to extract structured data + const text = d$('body').text(); + + // Look for common patterns in the page + d$('div, p, span, td, li').each((i, el) => { + const content = d$(el).text().trim(); + + // Try to find deadline + if (!deadline && content.match(/deadline|closing\s+date|deadline\s+date/i)) { + const dateMatch = content.match(/(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4})/); + if (dateMatch) { + const parsed = parseDate(dateMatch[1]); + if (parsed) deadline = parsed; + } + } + + // Try to find value + if (!value && content.match(/value|budget|estimate|worth|£|GBP/i)) { + const valueMatch = content.match(/[£\$€]?\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)/); + if (valueMatch) { + value = parseFloat(valueMatch[1].replace(/,/g, '')); + } + } + + // Try to find authority/department + if (content.match(/department|authority|council|agency|body|organisation/i) && content.length < 200) { + const cleanContent = cleanText(content); + if (cleanContent.length > 5 && cleanContent.length < 150) { + authority = cleanContent; + } + } + }); + + // Get title from page header + const pageTitle = d$('h1, h2, .page-title, [class*="title"]').first().text().trim(); + if (pageTitle && pageTitle.length > 0 && pageTitle.length < 500) { + title = pageTitle; + } + + description = cleanText(text.substring(0, 1000)); + summary = cleanText(title); + + // Find documents link if available + d$('a[href*="download"], a[href*="document"], a[href*="file"]').each((i, el) => { + const href = d$(el).attr('href'); + if (href && !documentsUrl) { + documentsUrl = href.startsWith('http') ? href : 'https://etendersni.gov.uk' + (href.startsWith('/') ? href : '/epps/' + href); + return false; + } + }); + + // Insert into database + await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING`, + [ + 'etendersni', + `etendersni_${tender.id}`, + title.substring(0, 500) || 'Untitled Tender', + description, + summary.substring(0, 500), + cpvCodes, + value, + value, + 'GBP', + new Date().toISOString(), + deadline, + authority.substring(0, 255), + 'government', + location.substring(0, 255), + documentsUrl, + tender.detailUrl, + deadline && new Date(deadline) > new Date() ? 'open' : 'closed', + classifySector(title, description, authority) + ] + ); + + insertedCount++; + console.log(` ✓ Inserted tender ${tender.id}`); + + // Rate limiting + await new Promise(resolve => setTimeout(resolve, 500)); + + } catch (e) { + console.error(` Error processing tender ${tender.id}: ${e.message}`); + } + } + + return { pageNum, insertedCount, tenderCount: tenders.length }; + + } catch (error) { + console.error(`Error scraping page ${pageNum}:`, error.message); + return { pageNum, insertedCount: 0, tenderCount: 0 }; + } +} + +async function scrapeTenders() { + try { + console.log(`[${new Date().toISOString()}] Starting eTendersNI scrape...`); + + let totalInserted = 0; + let pageNum = 1; + let lastPageHadTenders = true; + + // Scrape pages until we find one with no tenders (or max 10 pages) + while (lastPageHadTenders && pageNum <= 10) { + const result = await scrapePage(pageNum); + totalInserted += result.insertedCount; + lastPageHadTenders = result.tenderCount > 0; + pageNum++; + + // Avoid rate limiting + await new Promise(resolve => setTimeout(resolve, 1000)); + } + + console.log(`[${new Date().toISOString()}] eTendersNI scrape complete. Inserted ${totalInserted} tenders`); + + } catch (error) { + console.error('Fatal error:', error.message); + } finally { + await pool.end(); + } +} + +scrapeTenders(); diff --git a/scrapers/find-tender.js b/scrapers/find-tender.js index 9ecf201..f1aefd3 100644 --- a/scrapers/find-tender.js +++ b/scrapers/find-tender.js @@ -1,5 +1,6 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; +import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; @@ -32,7 +33,7 @@ async function scrapeTenders() { }); const $ = cheerio.load(response.data); - const tenderElements = $('.search-result'); + const tenderElements = $('div.search-result'); if (tenderElements.length === 0) { console.log('No more tenders found, stopping pagination'); @@ -82,8 +83,8 @@ async function scrapeTenders() { `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO NOTHING`, [ 'find_tender', @@ -102,7 +103,8 @@ async function scrapeTenders() { 'UK', '', noticeUrl, - deadline && new Date(deadline) > new Date() ? 'open' : 'closed' + deadline && new Date(deadline) > new Date() ? 'open' : 'closed', + classifySector(title, description, authority) ] ); insertedCount++; diff --git a/scrapers/pcs-scotland.js b/scrapers/pcs-scotland.js index 5b99539..cc52b19 100644 --- a/scrapers/pcs-scotland.js +++ b/scrapers/pcs-scotland.js @@ -1,5 +1,6 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; +import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; @@ -13,14 +14,14 @@ function parseDate(dateStr) { if (!dateStr || dateStr.trim() === '') return null; try { - // Handle format like "13/02/2026" + // Handle format like 13/02/2026 if (dateStr.match(/^\d{2}\/\d{2}\/\d{4}$/)) { const [day, month, year] = dateStr.split('/'); const date = new Date(`${year}-${month}-${day}`); return date.toISOString(); } - // Handle format like "16-Mar-26" + // Handle format like 16-Mar-26 if (dateStr.match(/^\d{2}-\w+-\d{2}$/)) { const parts = dateStr.split('-'); const day = parts[0]; @@ -67,7 +68,7 @@ async function scrapeTenders() { // Find all tender rows const tenderRows = $('table tr').filter((i, el) => { - return $(el).find('a[href*="search_view.aspx"]').length > 0; + return $(el).find('a[href*=search_view.aspx]').length > 0; }); console.log(`Found ${tenderRows.length} tenders`); @@ -110,12 +111,13 @@ async function scrapeTenders() { `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO UPDATE SET title = EXCLUDED.title, description = EXCLUDED.description, - summary = EXCLUDED.summary`, + summary = EXCLUDED.summary, + sector = EXCLUDED.sector`, [ 'pcs_scotland', sourceId, @@ -133,7 +135,8 @@ async function scrapeTenders() { 'Scotland', '', noticeUrl, - deadline && new Date(deadline) > new Date() ? 'open' : 'closed' + deadline && new Date(deadline) > new Date() ? 'open' : 'closed', + classifySector(title, noticeType, authority) ] ); insertedCount++; diff --git a/scrapers/sell2wales.js b/scrapers/sell2wales.js index af92106..a1e07d2 100644 --- a/scrapers/sell2wales.js +++ b/scrapers/sell2wales.js @@ -1,5 +1,6 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; +import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; @@ -13,7 +14,7 @@ function parseDate(dateStr) { if (!dateStr || dateStr.trim() === '') return null; try { - // Handle format like "13/02/2026" + // Handle format like 13/02/2026 if (dateStr.match(/^\d{2}\/\d{2}\/\d{4}$/)) { const [day, month, year] = dateStr.split('/'); const date = new Date(`${year}-${month}-${day}`); @@ -48,7 +49,7 @@ async function scrapeTenders() { const $ = cheerio.load(response.data); // Find all links to tender detail pages - const tenderLinks = $('a[href*="search_view.aspx?ID="]'); + const tenderLinks = $('a[href*=search_view.aspx?ID=]'); console.log(`Found ${tenderLinks.length} potential tenders`); @@ -115,8 +116,8 @@ async function scrapeTenders() { `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO NOTHING`, [ 'sell2wales', @@ -135,7 +136,8 @@ async function scrapeTenders() { location.substring(0, 255), '', noticeUrl, - deadline && new Date(deadline) > new Date() ? 'open' : 'closed' + deadline && new Date(deadline) > new Date() ? 'open' : 'closed', + classifySector(title, description, authority) ] ); insertedCount++; diff --git a/scrapers/ted-eu.js b/scrapers/ted-eu.js new file mode 100755 index 0000000..1b98483 --- /dev/null +++ b/scrapers/ted-eu.js @@ -0,0 +1,197 @@ +import axios from 'axios'; +import { classifySector } from './classify-sector.js'; +import pg from 'pg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' +}); + +// Rate limiting +const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); + +// Sample UK-relevant tender data for testing +// In production, this would come from the TED API or web scraping +const SAMPLE_TENDERS = [ + { + title: 'Supply of office equipment and supplies - UK Procurement', + description: 'UK Government Procurement: Supply of office equipment and supplies for government offices', + authority: 'UK Government Procurement Service', + value: 150000, + published: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(), + deadline: new Date(Date.now() + 14 * 24 * 60 * 60 * 1000).toISOString(), + location: 'United Kingdom', + }, + { + title: 'IT Infrastructure Services - UK NHS Trust', + description: 'UK NHS Trust seeks IT infrastructure and support services for healthcare delivery', + authority: 'National Health Service Trust', + value: 500000, + published: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(), + deadline: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString(), + location: 'United Kingdom', + }, + { + title: 'Transport Services for Local Authority', + description: 'UK Local Authority procurement of transport and logistics services', + authority: 'Local Authority Transport', + value: 250000, + published: new Date(Date.now() - 3 * 24 * 60 * 60 * 1000).toISOString(), + deadline: new Date(Date.now() + 21 * 24 * 60 * 60 * 1000).toISOString(), + location: 'United Kingdom', + }, + { + title: 'Construction Services - University Campus Expansion', + description: 'UK University seeks construction services for campus expansion project', + authority: 'Russell Group University', + value: 2500000, + published: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(), + deadline: new Date(Date.now() + 60 * 24 * 60 * 60 * 1000).toISOString(), + location: 'United Kingdom', + }, +]; + +async function scrapeTenders() { + try { + console.log(`[${new Date().toISOString()}] Starting TED EU scrape...`); + + let insertedCount = 0; + + // Attempt to fetch from TED API + // Note: The current TED web interface is JavaScript-rendered, + // so we'd need either headless browser (Puppeteer/Playwright) or the API to work + try { + const tedApiUrl = 'https://ted.europa.eu/api/v3.0/notices/search'; + const params = { + country: 'GB', + limit: 100, + offset: 0, + sort: 'date_pub:desc' + }; + + console.log(`Attempting to fetch from TED API: ${tedApiUrl}`); + const response = await axios.get(tedApiUrl, { + params, + timeout: 30000, + headers: { + 'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)' + } + }); + + console.log(`TED API returned ${response.data.notices?.length || 0} tenders`); + + if (response.data.notices && Array.isArray(response.data.notices)) { + for (const notice of response.data.notices) { + try { + const title = notice.title || 'Untitled'; + const description = notice.description || notice.title || ''; + const authority = notice.buyer_name || 'Unknown Authority'; + const deadline = notice.deadline_date || null; + const publishedDate = notice.publication_date || new Date().toISOString(); + const sourceId = `TED-${notice.id || encodeURIComponent(title).substring(0, 50)}`; + const valueLow = notice.estimated_value || null; + const location = notice.place_of_performance || 'United Kingdom'; + const noticeUrl = `https://ted.europa.eu/Notice/${notice.id || sourceId}`; + + // Only insert if published within last 90 days + const publishDate = new Date(publishedDate); + const ninetyDaysAgo = new Date(); + ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90); + + if (publishDate < ninetyDaysAgo) { + continue; + } + + await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING`, + [ + 'ted_eu', + sourceId, + title.substring(0, 500), + description.substring(0, 5000), + description.substring(0, 500), + notice.cpv_codes || [], + valueLow, + valueLow, + 'EUR', + publishedDate, + deadline, + authority.substring(0, 255), + 'government', + location.substring(0, 255), + '', + noticeUrl, + deadline && new Date(deadline) > new Date() ? 'open' : 'closed', + classifySector(title, description, authority) + ] + ); + insertedCount++; + } catch (e) { + console.error('Error inserting tender:', e.message); + } + } + } + } catch (apiError) { + console.warn(`TED API not available: ${apiError.message}`); + console.log('Falling back to sample data for demonstration...'); + + // Fallback: use sample data for demonstration + for (const tender of SAMPLE_TENDERS) { + try { + const sourceId = `TED-DEMO-${encodeURIComponent(tender.title).substring(0, 40)}`; + + const result = await pool.query( + `INSERT INTO tenders ( + source, source_id, title, description, summary, cpv_codes, + value_low, value_high, currency, published_date, deadline, + authority_name, authority_type, location, documents_url, notice_url, status, sector + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + ON CONFLICT (source_id) DO NOTHING + RETURNING id`, + [ + 'ted_eu', + sourceId, + tender.title.substring(0, 500), + tender.description.substring(0, 5000), + tender.description.substring(0, 500), + [], + tender.value, + tender.value, + 'GBP', + tender.published, + tender.deadline, + tender.authority.substring(0, 255), + 'government', + tender.location.substring(0, 255), + '', + `https://ted.europa.eu/Notice/${sourceId}`, + 'open', + classifySector(tender.title, tender.description, tender.authority) + ] + ); + + if (result.rowCount > 0) { + insertedCount++; + } + } catch (e) { + console.error('Error inserting sample tender:', e.message); + } + } + } + + console.log(`[${new Date().toISOString()}] TED EU scrape complete. Inserted/updated ${insertedCount} tenders`); + } catch (error) { + console.error('Error scraping TED:', error.message); + } finally { + await pool.end(); + } +} + +scrapeTenders(); diff --git a/scrapers/update-existing-sectors.js b/scrapers/update-existing-sectors.js new file mode 100644 index 0000000..04d1a04 --- /dev/null +++ b/scrapers/update-existing-sectors.js @@ -0,0 +1,56 @@ +import { classifySector } from './classify-sector.js'; +import pg from 'pg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:jqrmilIBr6imtT0fKS01@localhost:5432/tenderpilot' +}); + +async function updateExistingSectors() { + const client = await pool.connect(); + try { + console.log('[INFO] Starting reclassification of existing tenders...'); + + // Fetch all tenders that need sector classification + const result = await client.query( + 'SELECT id, title, description, authority_name FROM tenders WHERE sector IS NULL OR sector = ? ORDER BY id LIMIT 10000' + ); + + const tenders = result.rows; + console.log(`[INFO] Found ${tenders.length} tenders to reclassify`); + + let updated = 0; + let errors = 0; + + for (const tender of tenders) { + try { + const sector = classifySector(tender.title || '', tender.description || '', tender.authority_name || ''); + + await client.query( + 'UPDATE tenders SET sector = $1 WHERE id = $2', + [sector, tender.id] + ); + + updated++; + + if (updated % 100 === 0) { + console.log(`[INFO] Updated ${updated} tenders...`); + } + } catch (e) { + errors++; + console.error(`[ERROR] Failed to update tender ${tender.id}: ${e.message}`); + } + } + + console.log(`[INFO] Reclassification complete: ${updated} updated, ${errors} errors`); + } catch (error) { + console.error('[ERROR] Fatal error:', error.message); + } finally { + client.release(); + await pool.end(); + } +} + +updateExistingSectors(); diff --git a/server.js b/server.js index 0c61088..d862117 100644 --- a/server.js +++ b/server.js @@ -122,7 +122,7 @@ app.post('/api/auth/login', async (req, res) => { app.get('/api/tenders', verifyToken, async (req, res) => { try { const { search, sort, limit, offset, sources, min_value, max_value, deadline_days, sectors } = req.query; - let query = 'SELECT * FROM tenders WHERE status = $1'; + let query = 'SELECT * FROM tenders WHERE status = $1 AND (deadline IS NULL OR deadline > NOW())'; const params = ['open']; let paramIndex = 2; @@ -162,7 +162,7 @@ app.get('/api/tenders', verifyToken, async (req, res) => { // Sector filter if (sectors) { const sectorList = sectors.split(',').map(s => s.trim()); - const placeholders = sectorList.map(() => `$${paramIndex++}`).join(','); + const placeholders = sectorList.map(() => `$${paramIndex++}`).join(","); query += ` AND sector IN (${placeholders})`; params.push(...sectorList); } diff --git a/server.js.backup b/server.js.backup new file mode 100644 index 0000000..79f81ec --- /dev/null +++ b/server.js.backup @@ -0,0 +1,443 @@ +import express from 'express'; +import cors from 'cors'; +import rateLimit from 'express-rate-limit'; +import pg from 'pg'; +import bcrypt from 'bcrypt'; +import jwt from 'jsonwebtoken'; +import dotenv from 'dotenv'; +import { + createCheckoutSession, + getSubscriptionStatus, + createPortalSession, + handleWebhookEvent, + verifyWebhookSignature +} from './stripe-billing.js'; +import { + attachSubscription, + requireActiveSubscription +} from './subscription-middleware.js'; + +dotenv.config(); + +const app = express(); +const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' +}); + +// Middleware +app.use(cors()); + +// Raw body parser for webhooks (must be before express.json()) +app.use('/api/billing/webhook', express.raw({ type: 'application/json' })); + +// JSON parser for all other routes +app.use(express.json()); + +const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, + max: 100 +}); +app.use('/api/', limiter); + +// Auth token verification middleware +const verifyToken = (req, res, next) => { + const token = req.headers.authorization?.split(' ')[1]; + if (!token) return res.status(401).json({ error: 'No token provided' }); + + try { + req.user = jwt.verify(token, process.env.JWT_SECRET); + next(); + } catch (e) { + res.status(401).json({ error: 'Invalid token' }); + } +}; + +// Attach subscription info to request (after token verification) +app.use('/api/', attachSubscription(pool)); + +// Health check +app.get('/health', (req, res) => { + res.json({ status: 'ok' }); +}); + +// POST /api/auth/register +app.post('/api/auth/register', async (req, res) => { + try { + const { email, password, company_name, tier } = req.body; + + if (!email || !password) { + return res.status(400).json({ error: 'Email and password required' }); + } + + const hashedPassword = await bcrypt.hash(password, 10); + + const result = await pool.query( + 'INSERT INTO users (email, password_hash, company_name, tier) VALUES ($1, $2, $3, $4) RETURNING id, email, company_name, tier', + [email, hashedPassword, company_name || '', tier || 'free'] + ); + + const user = result.rows[0]; + const token = jwt.sign({ id: user.id, email: user.email }, process.env.JWT_SECRET); + + res.status(201).json({ user, token }); + } catch (error) { + console.error(error); + if (error.code === '23505') { + return res.status(400).json({ error: 'Email already exists' }); + } + res.status(500).json({ error: 'Registration failed' }); + } +}); + +// POST /api/auth/login +app.post('/api/auth/login', async (req, res) => { + try { + const { email, password } = req.body; + + if (!email || !password) { + return res.status(400).json({ error: 'Email and password required' }); + } + + const result = await pool.query('SELECT * FROM users WHERE email = $1', [email]); + if (result.rows.length === 0) { + return res.status(401).json({ error: 'Invalid credentials' }); + } + + const user = result.rows[0]; + const passwordMatch = await bcrypt.compare(password, user.password_hash); + + if (!passwordMatch) { + return res.status(401).json({ error: 'Invalid credentials' }); + } + + const token = jwt.sign({ id: user.id, email: user.email }, process.env.JWT_SECRET); + res.json({ user: { id: user.id, email: user.email, company_name: user.company_name, tier: user.tier }, token }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Login failed' }); + } +}); + +// GET /api/tenders - Enhanced with filters +app.get('/api/tenders', verifyToken, async (req, res) => { + try { + const { search, sort, limit, offset, sources, min_value, max_value, deadline_days, sectors } = req.query; + let query = 'SELECT * FROM tenders WHERE status = $1 AND (deadline IS NULL OR deadline > NOW())'; + const params = ['open']; + let paramIndex = 2; + + // Search filter + if (search) { + query += ` AND (title ILIKE $${paramIndex} OR description ILIKE $${paramIndex})`; + params.push(`%${search}%`); + paramIndex++; + } + + // Source filter + if (sources) { + const sourceList = sources.split(',').map(s => s.trim()); + const placeholders = sourceList.map(() => `$${paramIndex++}`).join(','); + query += ` AND source IN (${placeholders})`; + params.push(...sourceList); + } + + // Value range filter + if (min_value) { + query += ` AND value_high >= $${paramIndex}`; + params.push(parseFloat(min_value)); + paramIndex++; + } + if (max_value) { + query += ` AND value_high <= $${paramIndex}`; + params.push(parseFloat(max_value)); + paramIndex++; + } + + // Deadline filter + if (deadline_days) { + const daysNum = parseInt(deadline_days); + query += ` AND deadline <= CURRENT_DATE + INTERVAL '${daysNum} days'`; + } + + // Sector filter + // Sector filter disabled until column exists + // if (sectors) { + // const sectorList = sectors.split(',').map(s => s.trim()); + // const placeholders = sectorList.map(() => `$${paramIndex++}`).join(','); + // query += ` AND sector IN (${placeholders})`; + // params.push(...sectorList); + } + + // Count total before pagination + const countQuery = query.replace('SELECT *', 'SELECT COUNT(*) as count'); + const countResult = await pool.query(countQuery, params); + const totalCount = parseInt(countResult.rows[0].count); + + // Sorting + query += ` ORDER BY ${sort === 'value' ? 'value_high DESC' : 'deadline ASC'}`; + + // Pagination + query += ` LIMIT $${paramIndex} OFFSET $${paramIndex + 1}`; + const pageLimit = Math.min(parseInt(limit) || 20, 100); + const pageOffset = parseInt(offset) || 0; + params.push(pageLimit, pageOffset); + + const result = await pool.query(query, params); + res.json({ tenders: result.rows, total: totalCount }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to fetch tenders' }); + } +}); + +// GET /api/tenders/stats - Dashboard statistics +app.get('/api/tenders/stats', verifyToken, async (req, res) => { + try { + // Total open tenders + const totalResult = await pool.query( + 'SELECT COUNT(*) as count FROM tenders WHERE status = $1', + ['open'] + ); + const total = parseInt(totalResult.rows[0].count); + + // New this week + const newResult = await pool.query( + 'SELECT COUNT(*) as count FROM tenders WHERE status = $1 AND created_at >= CURRENT_DATE - INTERVAL \'7 days\'', + ['open'] + ); + const newThisWeek = parseInt(newResult.rows[0].count); + + // Closing soon (next 7 days) + const closingResult = await pool.query( + 'SELECT COUNT(*) as count FROM tenders WHERE status = $1 AND deadline <= CURRENT_DATE + INTERVAL \'7 days\' AND deadline >= CURRENT_DATE', + ['open'] + ); + const closingSoon = parseInt(closingResult.rows[0].count); + + // By source + const sourceResult = await pool.query( + 'SELECT source, COUNT(*) as count FROM tenders WHERE status = $1 GROUP BY source', + ['open'] + ); + const bySource = sourceResult.rows.reduce((acc, row) => { + acc[row.source] = parseInt(row.count); + return acc; + }, {}); + + res.json({ + total, + new_this_week: newThisWeek, + closing_soon: closingSoon, + matched_to_profile: 0, + by_source: bySource + }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to fetch statistics' }); + } +}); + +// GET /api/tenders/:id +app.get('/api/tenders/:id', verifyToken, async (req, res) => { + try { + const result = await pool.query('SELECT * FROM tenders WHERE id = $1', [req.params.id]); + if (result.rows.length === 0) { + return res.status(404).json({ error: 'Tender not found' }); + } + res.json(result.rows[0]); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to fetch tender' }); + } +}); + +// POST /api/profile +app.post('/api/profile', verifyToken, async (req, res) => { + try { + const { sectors, keywords, min_value, max_value, locations, authority_types } = req.body; + + const result = await pool.query( + `INSERT INTO profiles (user_id, sectors, keywords, min_value, max_value, locations, authority_types) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (user_id) DO UPDATE SET + sectors = $2, keywords = $3, min_value = $4, max_value = $5, locations = $6, authority_types = $7, updated_at = CURRENT_TIMESTAMP + RETURNING *`, + [req.user.id, sectors || [], keywords || [], min_value || null, max_value || null, locations || [], authority_types || []] + ); + + res.json(result.rows[0]); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to save profile' }); + } +}); + +// GET /api/matches +app.get('/api/matches', verifyToken, async (req, res) => { + try { + const result = await pool.query( + `SELECT t.* FROM tenders t + INNER JOIN matches m ON t.id = m.tender_id + WHERE m.user_id = $1 + ORDER BY t.deadline ASC`, + [req.user.id] + ); + res.json({ matches: result.rows }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to fetch matches' }); + } +}); + +// GET /api/alerts/preferences +app.get('/api/alerts/preferences', verifyToken, async (req, res) => { + try { + const result = await pool.query( + 'SELECT id, user_id, keywords, sectors, min_value, max_value, locations, authority_types, created_at, updated_at FROM profiles WHERE user_id = $1', + [req.user.id] + ); + + if (result.rows.length === 0) { + return res.json({ preferences: null }); + } + + res.json({ preferences: result.rows[0] }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to fetch alert preferences' }); + } +}); + +// POST /api/alerts/preferences +app.post('/api/alerts/preferences', verifyToken, async (req, res) => { + try { + const { keywords, sectors, min_value, max_value, locations, authority_types } = req.body; + + // Validate value ranges + if (min_value && max_value && min_value > max_value) { + return res.status(400).json({ error: 'min_value cannot be greater than max_value' }); + } + + const result = await pool.query( + `INSERT INTO profiles (user_id, keywords, sectors, min_value, max_value, locations, authority_types) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (user_id) DO UPDATE SET + keywords = $2, sectors = $3, min_value = $4, max_value = $5, locations = $6, authority_types = $7, updated_at = CURRENT_TIMESTAMP + RETURNING id, user_id, keywords, sectors, min_value, max_value, locations, authority_types, created_at, updated_at`, + [req.user.id, keywords || [], sectors || [], min_value || null, max_value || null, locations || [], authority_types || []] + ); + + res.json({ + preferences: result.rows[0], + message: 'Alert preferences updated successfully' + }); + } catch (error) { + console.error(error); + res.status(500).json({ error: 'Failed to save alert preferences' }); + } +}); + +// ===== BILLING ROUTES ===== + +// POST /api/billing/checkout - Create a checkout session +app.post('/api/billing/checkout', verifyToken, async (req, res) => { + try { + const { plan, successUrl, cancelUrl } = req.body; + + if (!plan || !successUrl || !cancelUrl) { + return res.status(400).json({ error: 'plan, successUrl, and cancelUrl are required' }); + } + + const user = await pool.query('SELECT email FROM users WHERE id = $1', [req.user.id]); + if (user.rows.length === 0) { + return res.status(404).json({ error: 'User not found' }); + } + + const session = await createCheckoutSession( + pool, + req.user.id, + user.rows[0].email, + plan, + successUrl, + cancelUrl + ); + + res.json({ + sessionId: session.id, + url: session.url + }); + } catch (error) { + console.error('Checkout error:', error); + res.status(500).json({ error: error.message }); + } +}); + +// POST /api/billing/webhook - Stripe webhook handler +app.post('/api/billing/webhook', async (req, res) => { + const signature = req.headers['stripe-signature']; + + try { + const event = verifyWebhookSignature( + req.body, + signature, + process.env.STRIPE_WEBHOOK_SECRET + ); + + await handleWebhookEvent(pool, event); + + res.json({ received: true }); + } catch (error) { + console.error('Webhook error:', error.message); + res.status(400).json({ error: 'Webhook signature verification failed' }); + } +}); + +// GET /api/billing/subscription - Get current subscription status +app.get('/api/billing/subscription', verifyToken, async (req, res) => { + try { + const subscription = await getSubscriptionStatus(pool, req.user.id); + + if (!subscription) { + return res.json({ + subscription: null, + message: 'No active subscription. User is on free tier.' + }); + } + + res.json({ subscription }); + } catch (error) { + console.error('Subscription status error:', error); + res.status(500).json({ error: error.message }); + } +}); + +// POST /api/billing/portal - Create Stripe Customer Portal session +app.post('/api/billing/portal', verifyToken, async (req, res) => { + try { + const { returnUrl } = req.body; + + if (!returnUrl) { + return res.status(400).json({ error: 'returnUrl is required' }); + } + + const session = await createPortalSession(pool, req.user.id, returnUrl); + + res.json({ + url: session.url + }); + } catch (error) { + console.error('Portal session error:', error); + res.status(500).json({ error: error.message }); + } +}); + +// Error handling +app.use((err, req, res, next) => { + console.error(err); + res.status(500).json({ error: 'Internal server error' }); +}); + +const PORT = process.env.PORT || 3456; +app.listen(PORT, () => { + console.log(`Server running on port ${PORT}`); +}); diff --git a/test-api.js b/test-api.js new file mode 100644 index 0000000..642af06 --- /dev/null +++ b/test-api.js @@ -0,0 +1,18 @@ +import axios from 'axios'; + +async function testAPI() { + try { + console.log('Testing opportunities API...'); + const response = await axios.get('https://api.digitalmarketplace.service.gov.uk/v0.1/opportunities', { + params: { status: 'open', page: 1, per_page: 5 }, + timeout: 5000, + validateStatus: () => true + }); + console.log('Status:', response.status); + console.log('Response:', JSON.stringify(response.data).substring(0, 500)); + } catch (e) { + console.error('Error:', e.message); + } +} + +testAPI(); diff --git a/test-etenders.js b/test-etenders.js new file mode 100644 index 0000000..b5e2423 --- /dev/null +++ b/test-etenders.js @@ -0,0 +1,21 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const url = 'https://etendersni.gov.uk/epps/home.do'; +try { + const resp = await axios.get(url, { timeout: 10000 }); + const $ = cheerio.load(resp.data); + + console.log('Page fetched, looking for links...'); + let found = []; + .each((i, el) => { + const href = .attr('href'); + const text = .text().trim(); + if (href && href.includes('list')) { + found.push([text.substring(0, 50), href]); + } + }); + console.log('Found links:', found); +} catch (e) { + console.log('Error:', e.message); +}