import { chromium } from 'playwright'; import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; dotenv.config(); const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' }); const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); async function scrapeTenders() { let browser; try { console.log(`[${new Date().toISOString()}] Starting TED EU scraper with Playwright...`); browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); let insertedCount = 0; const maxPages = 3; // Search for UK-relevant tenders (using GBR and United Kingdom keywords) const searchUrl = 'https://ted.europa.eu/en/search/result?q=united+kingdom+OR+UK&page=1'; for (let pageNum = 1; pageNum <= maxPages; pageNum++) { try { const url = `https://ted.europa.eu/en/search/result?q=united+kingdom+OR+UK&page=${pageNum}`; console.log(`Fetching TED page ${pageNum}...`); await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 }); await delay(3000); // Extract tender data from table rows const tenders = await page.evaluate(() => { const results = []; const rows = document.querySelectorAll('tbody tr[data-notice-id], tbody tr'); rows.forEach(row => { try { const link = row.querySelector('a[href*="/notice/"]'); if (!link) return; const cells = row.querySelectorAll('td'); if (cells.length < 4) return; const noticeId = link.textContent.trim(); const href = link.href; const title = cells[2]?.textContent.trim() || ''; const country = cells[3]?.textContent.trim() || ''; const publishedDate = cells[4]?.textContent.trim() || ''; const deadline = cells[5]?.textContent.trim() || ''; // Only include if mentions UK/United Kingdom const rowText = row.textContent.toLowerCase(); if (!rowText.includes('united kingdom') && !rowText.includes('uk') && !rowText.includes('great britain')) { return; } results.push({ noticeId, href, title, country, publishedDate, deadline, fullText: row.textContent.substring(0, 1000) }); } catch (e) { // Skip invalid rows } }); return results; }); console.log(`Found ${tenders.length} UK-related tenders on page ${pageNum}`); if (tenders.length === 0) { console.log('No tenders found, stopping'); break; } for (const tender of tenders) { try { const sourceId = `TED-${tender.noticeId}`; const noticeUrl = tender.href; const title = tender.title.substring(0, 500); const description = tender.fullText || title; // Parse dates (format: DD/MM/YYYY or ISO) let publishedDate = null; let deadline = null; if (tender.publishedDate) { const pubMatch = tender.publishedDate.match(/(\d{2})\/(\d{2})\/(\d{4})/); if (pubMatch) { const [_, day, month, year] = pubMatch; publishedDate = new Date(`${year}-${month}-${day}`).toISOString(); } } if (tender.deadline) { const deadMatch = tender.deadline.match(/(\d{2})\/(\d{2})\/(\d{4})/); if (deadMatch) { const [_, day, month, year] = deadMatch; deadline = new Date(`${year}-${month}-${day}`).toISOString(); } } // Skip if deadline has passed if (deadline && new Date(deadline) < new Date()) { continue; } const sector = await classifySector(title, description); const result = await pool.query( `INSERT INTO tenders ( source, source_id, title, description, summary, cpv_codes, value_low, value_high, currency, published_date, deadline, authority_name, authority_type, location, documents_url, notice_url, status, sector ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) ON CONFLICT (source_id) DO NOTHING RETURNING id`, [ 'ted_eu', sourceId, title, description.substring(0, 2000), description.substring(0, 500), [], null, null, 'EUR', publishedDate || new Date().toISOString(), deadline, 'EU Tender Authority', 'Public Sector', tender.country || 'United Kingdom', '', noticeUrl, 'open', sector ] ); if (result.rows.length > 0) { insertedCount++; console.log(` ✓ Inserted: ${title.substring(0, 60)}...`); } } catch (itemError) { console.error(`Error processing tender: ${itemError.message}`); continue; } } await delay(3000); } catch (pageError) { console.error(`Error fetching page ${pageNum}: ${pageError.message}`); break; } } console.log(`\nTED EU scrape complete. Inserted ${insertedCount} new tenders.`); } catch (error) { console.error('TED EU scraper failed:', error); } finally { if (browser) { await browser.close(); } await pool.end(); } } // Run if called directly if (import.meta.url === `file://${process.argv[1]}`) { scrapeTenders(); } export { scrapeTenders };