Files
tenderpilot/scrapers/find-tender.js
Peter Foster d1aa21c59f fix: logo crop, navbar alignment, buyer names, tender URLs
- Crop logo image (remove 58% bottom whitespace)
- Logo 90px, centered with nav links
- Cursor fix restored (no I-beam on non-interactive content)
- Contracts Finder: fix empty authority_name (was looking for procurer role, CF uses buyer)
- Contracts Finder: generate notice_url from OCID when release.url is empty
- Find a Tender: fix doubled base URL in notice_url
- Dashboard: use authority_name field (not buyer) for tender cards
- Card shadows strengthened on auth pages
- Password eye icon repositioned inside input
2026-02-14 16:15:21 +00:00

129 lines
4.4 KiB
JavaScript

import axios from 'axios';
import * as cheerio from 'cheerio';
import pg from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new pg.Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot'
});
// Rate limiting
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
async function scrapeTenders() {
try {
console.log(`[${new Date().toISOString()}] Starting Find a Tender scrape...`);
let insertedCount = 0;
const maxPages = 5; // Limit to first 5 pages to be respectful
for (let page = 1; page <= maxPages; page++) {
console.log(`Fetching page ${page}...`);
const url = `https://www.find-tender.service.gov.uk/Search/Results?page=${page}&sort=recent`;
const response = await axios.get(url, {
timeout: 30000,
headers: {
'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)'
}
});
const $ = cheerio.load(response.data);
const tenderElements = $('.search-result');
if (tenderElements.length === 0) {
console.log('No more tenders found, stopping pagination');
break;
}
console.log(`Found ${tenderElements.length} tenders on page ${page}`);
for (let i = 0; i < tenderElements.length; i++) {
try {
const element = tenderElements.eq(i);
const titleLink = element.find('.search-result-header a').first();
const title = titleLink.text().trim();
const rawHref = titleLink.attr('href') || '';
const noticeUrl = rawHref.startsWith('http') ? rawHref : 'https://www.find-tender.service.gov.uk' + rawHref;
// Extract source ID from URL
const urlMatch = noticeUrl.match(/\/([A-Z0-9-]+)$/);
const sourceId = urlMatch ? urlMatch[1] : noticeUrl;
const authority = element.find('.search-result-sub-header').text().trim();
const description = element.find('.search-result-description').text().trim();
// Extract dates and value
const metadata = element.find('.search-result-metadata').text();
let publishedDate = null;
let deadline = null;
let valueLow = null;
const publishMatch = metadata.match(/Published:\s*(\d{1,2}\s+\w+\s+\d{4})/);
if (publishMatch) {
publishedDate = new Date(publishMatch[1]).toISOString();
}
const deadlineMatch = metadata.match(/Deadline:\s*(\d{1,2}\s+\w+\s+\d{4})/);
if (deadlineMatch) {
deadline = new Date(deadlineMatch[1]).toISOString();
}
const valueMatch = metadata.match(/£([\d,]+)/);
if (valueMatch) {
valueLow = parseFloat(valueMatch[1].replace(/,/g, ''));
}
await pool.query(
`INSERT INTO tenders (
source, source_id, title, description, summary, cpv_codes,
value_low, value_high, currency, published_date, deadline,
authority_name, authority_type, location, documents_url, notice_url, status
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17)
ON CONFLICT (source_id) DO NOTHING`,
[
'find_tender',
sourceId,
title.substring(0, 500),
description,
description.substring(0, 500),
[],
valueLow,
valueLow,
'GBP',
publishedDate,
deadline,
authority,
'government',
'UK',
'',
noticeUrl,
deadline && new Date(deadline) > new Date() ? 'open' : 'closed'
]
);
insertedCount++;
} catch (e) {
console.error('Error inserting tender:', e.message);
}
}
// Rate limiting: wait 2 seconds between pages
if (page < maxPages) {
await delay(2000);
}
}
console.log(`[${new Date().toISOString()}] Find a Tender scrape complete. Inserted/updated ${insertedCount} tenders`);
} catch (error) {
console.error('Error scraping Find a Tender:', error.message);
} finally {
await pool.end();
}
}
scrapeTenders();