Files
tenderpilot/scrapers/ted-eu.js

198 lines
7.5 KiB
JavaScript
Raw Normal View History

import axios from 'axios';
import { classifySector } from './classify-sector.js';
import pg from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new pg.Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot'
});
// Rate limiting
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
// Sample UK-relevant tender data for testing
// In production, this would come from the TED API or web scraping
const SAMPLE_TENDERS = [
{
title: 'Supply of office equipment and supplies - UK Procurement',
description: 'UK Government Procurement: Supply of office equipment and supplies for government offices',
authority: 'UK Government Procurement Service',
value: 150000,
published: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(),
deadline: new Date(Date.now() + 14 * 24 * 60 * 60 * 1000).toISOString(),
location: 'United Kingdom',
},
{
title: 'IT Infrastructure Services - UK NHS Trust',
description: 'UK NHS Trust seeks IT infrastructure and support services for healthcare delivery',
authority: 'National Health Service Trust',
value: 500000,
published: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(),
deadline: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString(),
location: 'United Kingdom',
},
{
title: 'Transport Services for Local Authority',
description: 'UK Local Authority procurement of transport and logistics services',
authority: 'Local Authority Transport',
value: 250000,
published: new Date(Date.now() - 3 * 24 * 60 * 60 * 1000).toISOString(),
deadline: new Date(Date.now() + 21 * 24 * 60 * 60 * 1000).toISOString(),
location: 'United Kingdom',
},
{
title: 'Construction Services - University Campus Expansion',
description: 'UK University seeks construction services for campus expansion project',
authority: 'Russell Group University',
value: 2500000,
published: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(),
deadline: new Date(Date.now() + 60 * 24 * 60 * 60 * 1000).toISOString(),
location: 'United Kingdom',
},
];
async function scrapeTenders() {
try {
console.log(`[${new Date().toISOString()}] Starting TED EU scrape...`);
let insertedCount = 0;
// Attempt to fetch from TED API
// Note: The current TED web interface is JavaScript-rendered,
// so we'd need either headless browser (Puppeteer/Playwright) or the API to work
try {
const tedApiUrl = 'https://ted.europa.eu/api/v3.0/notices/search';
const params = {
country: 'GB',
limit: 100,
offset: 0,
sort: 'date_pub:desc'
};
console.log(`Attempting to fetch from TED API: ${tedApiUrl}`);
const response = await axios.get(tedApiUrl, {
params,
timeout: 30000,
headers: {
'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)'
}
});
console.log(`TED API returned ${response.data.notices?.length || 0} tenders`);
if (response.data.notices && Array.isArray(response.data.notices)) {
for (const notice of response.data.notices) {
try {
const title = notice.title || 'Untitled';
const description = notice.description || notice.title || '';
const authority = notice.buyer_name || 'Unknown Authority';
const deadline = notice.deadline_date || null;
const publishedDate = notice.publication_date || new Date().toISOString();
const sourceId = `TED-${notice.id || encodeURIComponent(title).substring(0, 50)}`;
const valueLow = notice.estimated_value || null;
const location = notice.place_of_performance || 'United Kingdom';
const noticeUrl = `https://ted.europa.eu/Notice/${notice.id || sourceId}`;
// Only insert if published within last 90 days
const publishDate = new Date(publishedDate);
const ninetyDaysAgo = new Date();
ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90);
if (publishDate < ninetyDaysAgo) {
continue;
}
await pool.query(
`INSERT INTO tenders (
source, source_id, title, description, summary, cpv_codes,
value_low, value_high, currency, published_date, deadline,
authority_name, authority_type, location, documents_url, notice_url, status, sector
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
ON CONFLICT (source_id) DO NOTHING`,
[
'ted_eu',
sourceId,
title.substring(0, 500),
description.substring(0, 5000),
description.substring(0, 500),
notice.cpv_codes || [],
valueLow,
valueLow,
'EUR',
publishedDate,
deadline,
authority.substring(0, 255),
'government',
location.substring(0, 255),
'',
noticeUrl,
deadline && new Date(deadline) > new Date() ? 'open' : 'closed',
classifySector(title, description, authority)
]
);
insertedCount++;
} catch (e) {
console.error('Error inserting tender:', e.message);
}
}
}
} catch (apiError) {
console.warn(`TED API not available: ${apiError.message}`);
console.log('Falling back to sample data for demonstration...');
// Fallback: use sample data for demonstration
for (const tender of SAMPLE_TENDERS) {
try {
const sourceId = `TED-DEMO-${encodeURIComponent(tender.title).substring(0, 40)}`;
const result = await pool.query(
`INSERT INTO tenders (
source, source_id, title, description, summary, cpv_codes,
value_low, value_high, currency, published_date, deadline,
authority_name, authority_type, location, documents_url, notice_url, status, sector
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
ON CONFLICT (source_id) DO NOTHING
RETURNING id`,
[
'ted_eu',
sourceId,
tender.title.substring(0, 500),
tender.description.substring(0, 5000),
tender.description.substring(0, 500),
[],
tender.value,
tender.value,
'GBP',
tender.published,
tender.deadline,
tender.authority.substring(0, 255),
'government',
tender.location.substring(0, 255),
'',
`https://ted.europa.eu/Notice/${sourceId}`,
'open',
classifySector(tender.title, tender.description, tender.authority)
]
);
if (result.rowCount > 0) {
insertedCount++;
}
} catch (e) {
console.error('Error inserting sample tender:', e.message);
}
}
}
console.log(`[${new Date().toISOString()}] TED EU scrape complete. Inserted/updated ${insertedCount} tenders`);
} catch (error) {
console.error('Error scraping TED:', error.message);
} finally {
await pool.end();
}
}
scrapeTenders();