2026-02-15 13:28:54 +00:00
|
|
|
import { chromium } from 'playwright';
|
|
|
|
|
import { classifySector } from './classify-sector.js';
|
2026-02-14 17:12:51 +00:00
|
|
|
import pg from 'pg';
|
|
|
|
|
import dotenv from 'dotenv';
|
|
|
|
|
|
|
|
|
|
dotenv.config();
|
|
|
|
|
|
|
|
|
|
const pool = new pg.Pool({
|
|
|
|
|
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot'
|
|
|
|
|
});
|
|
|
|
|
|
2026-02-15 13:28:54 +00:00
|
|
|
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
2026-02-14 17:12:51 +00:00
|
|
|
|
|
|
|
|
async function scrapeTenders() {
|
2026-02-15 13:28:54 +00:00
|
|
|
let browser;
|
|
|
|
|
try {
|
|
|
|
|
console.log(`[${new Date().toISOString()}] Starting TED EU scraper with Playwright...`);
|
|
|
|
|
|
|
|
|
|
browser = await chromium.launch({ headless: true });
|
|
|
|
|
const page = await browser.newPage();
|
|
|
|
|
|
|
|
|
|
let insertedCount = 0;
|
|
|
|
|
const maxPages = 3;
|
|
|
|
|
|
|
|
|
|
// Search for UK-relevant tenders (using GBR and United Kingdom keywords)
|
|
|
|
|
const searchUrl = 'https://ted.europa.eu/en/search/result?q=united+kingdom+OR+UK&page=1';
|
|
|
|
|
|
|
|
|
|
for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
|
|
|
|
|
try {
|
|
|
|
|
const url = `https://ted.europa.eu/en/search/result?q=united+kingdom+OR+UK&page=${pageNum}`;
|
|
|
|
|
|
|
|
|
|
console.log(`Fetching TED page ${pageNum}...`);
|
|
|
|
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
|
|
|
await delay(3000);
|
|
|
|
|
|
|
|
|
|
// Extract tender data from table rows
|
|
|
|
|
const tenders = await page.evaluate(() => {
|
|
|
|
|
const results = [];
|
|
|
|
|
const rows = document.querySelectorAll('tbody tr[data-notice-id], tbody tr');
|
|
|
|
|
|
|
|
|
|
rows.forEach(row => {
|
|
|
|
|
try {
|
|
|
|
|
const link = row.querySelector('a[href*="/notice/"]');
|
|
|
|
|
if (!link) return;
|
|
|
|
|
|
|
|
|
|
const cells = row.querySelectorAll('td');
|
|
|
|
|
if (cells.length < 4) return;
|
|
|
|
|
|
|
|
|
|
const noticeId = link.textContent.trim();
|
|
|
|
|
const href = link.href;
|
|
|
|
|
const title = cells[2]?.textContent.trim() || '';
|
|
|
|
|
const country = cells[3]?.textContent.trim() || '';
|
|
|
|
|
const publishedDate = cells[4]?.textContent.trim() || '';
|
|
|
|
|
const deadline = cells[5]?.textContent.trim() || '';
|
|
|
|
|
|
|
|
|
|
// Only include if mentions UK/United Kingdom
|
|
|
|
|
const rowText = row.textContent.toLowerCase();
|
|
|
|
|
if (!rowText.includes('united kingdom') && !rowText.includes('uk') && !rowText.includes('great britain')) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
results.push({
|
|
|
|
|
noticeId,
|
|
|
|
|
href,
|
|
|
|
|
title,
|
|
|
|
|
country,
|
|
|
|
|
publishedDate,
|
|
|
|
|
deadline,
|
|
|
|
|
fullText: row.textContent.substring(0, 1000)
|
|
|
|
|
});
|
|
|
|
|
} catch (e) {
|
|
|
|
|
// Skip invalid rows
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return results;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
console.log(`Found ${tenders.length} UK-related tenders on page ${pageNum}`);
|
|
|
|
|
|
|
|
|
|
if (tenders.length === 0) {
|
|
|
|
|
console.log('No tenders found, stopping');
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const tender of tenders) {
|
|
|
|
|
try {
|
|
|
|
|
const sourceId = `TED-${tender.noticeId}`;
|
|
|
|
|
const noticeUrl = tender.href;
|
|
|
|
|
const title = tender.title.substring(0, 500);
|
|
|
|
|
const description = tender.fullText || title;
|
|
|
|
|
|
|
|
|
|
// Parse dates (format: DD/MM/YYYY or ISO)
|
|
|
|
|
let publishedDate = null;
|
|
|
|
|
let deadline = null;
|
|
|
|
|
|
|
|
|
|
if (tender.publishedDate) {
|
|
|
|
|
const pubMatch = tender.publishedDate.match(/(\d{2})\/(\d{2})\/(\d{4})/);
|
|
|
|
|
if (pubMatch) {
|
|
|
|
|
const [_, day, month, year] = pubMatch;
|
|
|
|
|
publishedDate = new Date(`${year}-${month}-${day}`).toISOString();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tender.deadline) {
|
|
|
|
|
const deadMatch = tender.deadline.match(/(\d{2})\/(\d{2})\/(\d{4})/);
|
|
|
|
|
if (deadMatch) {
|
|
|
|
|
const [_, day, month, year] = deadMatch;
|
|
|
|
|
deadline = new Date(`${year}-${month}-${day}`).toISOString();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip if deadline has passed
|
|
|
|
|
if (deadline && new Date(deadline) < new Date()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const sector = await classifySector(title, description);
|
|
|
|
|
|
|
|
|
|
const result = await pool.query(
|
|
|
|
|
`INSERT INTO tenders (
|
|
|
|
|
source, source_id, title, description, summary, cpv_codes,
|
|
|
|
|
value_low, value_high, currency, published_date, deadline,
|
|
|
|
|
authority_name, authority_type, location, documents_url, notice_url, status, sector
|
|
|
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
|
|
|
|
|
ON CONFLICT (source_id) DO NOTHING RETURNING id`,
|
|
|
|
|
[
|
|
|
|
|
'ted_eu',
|
|
|
|
|
sourceId,
|
|
|
|
|
title,
|
|
|
|
|
description.substring(0, 2000),
|
|
|
|
|
description.substring(0, 500),
|
|
|
|
|
[],
|
|
|
|
|
null,
|
|
|
|
|
null,
|
|
|
|
|
'EUR',
|
|
|
|
|
publishedDate || new Date().toISOString(),
|
|
|
|
|
deadline,
|
|
|
|
|
'EU Tender Authority',
|
|
|
|
|
'Public Sector',
|
|
|
|
|
tender.country || 'United Kingdom',
|
|
|
|
|
'',
|
|
|
|
|
noticeUrl,
|
|
|
|
|
'open',
|
|
|
|
|
sector
|
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (result.rows.length > 0) {
|
|
|
|
|
insertedCount++;
|
|
|
|
|
console.log(` ✓ Inserted: ${title.substring(0, 60)}...`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} catch (itemError) {
|
|
|
|
|
console.error(`Error processing tender: ${itemError.message}`);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await delay(3000);
|
|
|
|
|
|
|
|
|
|
} catch (pageError) {
|
|
|
|
|
console.error(`Error fetching page ${pageNum}: ${pageError.message}`);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
console.log(`\nTED EU scrape complete. Inserted ${insertedCount} new tenders.`);
|
|
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('TED EU scraper failed:', error);
|
|
|
|
|
} finally {
|
|
|
|
|
if (browser) {
|
|
|
|
|
await browser.close();
|
|
|
|
|
}
|
|
|
|
|
await pool.end();
|
|
|
|
|
}
|
2026-02-15 13:18:50 +00:00
|
|
|
}
|
|
|
|
|
|
2026-02-15 13:28:54 +00:00
|
|
|
// Run if called directly
|
2026-02-15 13:18:50 +00:00
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
|
|
|
scrapeTenders();
|
2026-02-14 17:12:51 +00:00
|
|
|
}
|
|
|
|
|
|
2026-02-15 13:18:50 +00:00
|
|
|
export { scrapeTenders };
|