import axios from 'axios'; import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; dotenv.config(); const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' }); // Rate limiting const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); // Sample UK-relevant tender data for testing // In production, this would come from the TED API or web scraping const SAMPLE_TENDERS = [ { title: 'Supply of office equipment and supplies - UK Procurement', description: 'UK Government Procurement: Supply of office equipment and supplies for government offices', authority: 'UK Government Procurement Service', value: 150000, published: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(), deadline: new Date(Date.now() + 14 * 24 * 60 * 60 * 1000).toISOString(), location: 'United Kingdom', }, { title: 'IT Infrastructure Services - UK NHS Trust', description: 'UK NHS Trust seeks IT infrastructure and support services for healthcare delivery', authority: 'National Health Service Trust', value: 500000, published: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(), deadline: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString(), location: 'United Kingdom', }, { title: 'Transport Services for Local Authority', description: 'UK Local Authority procurement of transport and logistics services', authority: 'Local Authority Transport', value: 250000, published: new Date(Date.now() - 3 * 24 * 60 * 60 * 1000).toISOString(), deadline: new Date(Date.now() + 21 * 24 * 60 * 60 * 1000).toISOString(), location: 'United Kingdom', }, { title: 'Construction Services - University Campus Expansion', description: 'UK University seeks construction services for campus expansion project', authority: 'Russell Group University', value: 2500000, published: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(), deadline: new Date(Date.now() + 60 * 24 * 60 * 60 * 1000).toISOString(), location: 'United Kingdom', }, ]; async function scrapeTenders() { try { console.log(`[${new Date().toISOString()}] Starting TED EU scrape...`); let insertedCount = 0; // Attempt to fetch from TED API // Note: The current TED web interface is JavaScript-rendered, // so we'd need either headless browser (Puppeteer/Playwright) or the API to work try { const tedApiUrl = 'https://ted.europa.eu/api/v3.0/notices/search'; const params = { country: 'GB', limit: 100, offset: 0, sort: 'date_pub:desc' }; console.log(`Attempting to fetch from TED API: ${tedApiUrl}`); const response = await axios.get(tedApiUrl, { params, timeout: 30000, headers: { 'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)' } }); console.log(`TED API returned ${response.data.notices?.length || 0} tenders`); if (response.data.notices && Array.isArray(response.data.notices)) { for (const notice of response.data.notices) { try { const title = notice.title || 'Untitled'; const description = notice.description || notice.title || ''; const authority = notice.buyer_name || 'Unknown Authority'; const deadline = notice.deadline_date || null; const publishedDate = notice.publication_date || new Date().toISOString(); const sourceId = `TED-${notice.id || encodeURIComponent(title).substring(0, 50)}`; const valueLow = notice.estimated_value || null; const location = notice.place_of_performance || 'United Kingdom'; const noticeUrl = `https://ted.europa.eu/Notice/${notice.id || sourceId}`; // Only insert if published within last 90 days const publishDate = new Date(publishedDate); const ninetyDaysAgo = new Date(); ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90); if (publishDate < ninetyDaysAgo) { continue; } await pool.query( `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, authority_name, authority_type, location, documents_url, notice_url, status, sector ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO NOTHING`, [ 'ted_eu', sourceId, title.substring(0, 500), description.substring(0, 5000), description.substring(0, 500), notice.cpv_codes || [], valueLow, valueLow, 'EUR', publishedDate, deadline, authority.substring(0, 255), 'government', location.substring(0, 255), '', noticeUrl, deadline && new Date(deadline) > new Date() ? 'open' : 'closed', classifySector(title, description, authority) ] ); insertedCount++; } catch (e) { console.error('Error inserting tender:', e.message); } } } } catch (apiError) { console.warn(`TED API not available: ${apiError.message}`); console.log('Falling back to sample data for demonstration...'); // Fallback: use sample data for demonstration for (const tender of SAMPLE_TENDERS) { try { const sourceId = `TED-DEMO-${encodeURIComponent(tender.title).substring(0, 40)}`; const result = await pool.query( `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, authority_name, authority_type, location, documents_url, notice_url, status, sector ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO NOTHING RETURNING id`, [ 'ted_eu', sourceId, tender.title.substring(0, 500), tender.description.substring(0, 5000), tender.description.substring(0, 500), [], tender.value, tender.value, 'GBP', tender.published, tender.deadline, tender.authority.substring(0, 255), 'government', tender.location.substring(0, 255), '', `https://ted.europa.eu/Notice/${sourceId}`, 'open', classifySector(tender.title, tender.description, tender.authority) ] ); if (result.rowCount > 0) { insertedCount++; } } catch (e) { console.error('Error inserting sample tender:', e.message); } } } console.log(`[${new Date().toISOString()}] TED EU scrape complete. Inserted/updated ${insertedCount} tenders`); } catch (error) { console.error('Error scraping TED:', error.message); } finally { await pool.end(); } } scrapeTenders();