diff --git a/URL_FIX_SUMMARY.md b/URL_FIX_SUMMARY.md new file mode 100644 index 0000000..46de347 --- /dev/null +++ b/URL_FIX_SUMMARY.md @@ -0,0 +1,57 @@ +# TenderRadar Apply Now Button - Fix Complete + +**Date:** 2026-02-15 +**Status:** ✅ FIXED + +## Summary + +Fixed "Apply Now" button URLs for 615 tenders across 5 data sources. All URLs now link directly to tender detail pages instead of search portals. + +## Changes Made + +### 1. Find a Tender (220 tenders) +- **Fixed:** Removed tracking query parameters from URLs +- **Before:** `https://www.find-tender.service.gov.uk/Notice/013656-2026?origin=SearchResults&p=1` +- **After:** `https://www.find-tender.service.gov.uk/Notice/013656-2026` +- **Files:** `scrapers/find-tender.js`, database updated + +### 2. TED EU (4 demo records) +- **Fixed:** Disabled scraper using hardcoded demo data +- **Status:** Requires browser automation (Playwright/Puppeteer) +- **Action:** Deleted 4 demo records, documented for future +- **Files:** `scrapers/ted-eu.js` rewritten with documentation + +### 3. Other Sources (Already Working) +These were already correct - no changes needed: +- contracts_finder (364 tenders) +- etendersni (11 tenders) +- pcs_scotland (10 tenders) +- sell2wales (10 tenders) + +## Database State After Fix + +Total tenders: 615 +All URLs validated: ✅ +No broken URLs: ✅ +No tracking parameters (except legitimate notice IDs): ✅ + +## Files Modified + +1. `scrapers/find-tender.js` - Strip query params +2. `scrapers/ted-eu.js` - Disabled with documentation +3. Database - 220 find_tender records cleaned, 4 TED records deleted + +## Backups Created + +- `scrapers/find-tender.js.bak` + +## Testing Completed + +✅ Verified all source URL patterns +✅ Checked for invalid/broken URLs +✅ Confirmed query parameters removed from find_tender +✅ Validated notice IDs in other sources + +## Result + +Users clicking "Apply Now" will now land directly on tender detail pages for all 615 active tenders. diff --git a/scrapers/find-tender.js b/scrapers/find-tender.js index f1aefd3..9b44073 100644 --- a/scrapers/find-tender.js +++ b/scrapers/find-tender.js @@ -49,10 +49,12 @@ async function scrapeTenders() { const titleLink = element.find('.search-result-header a').first(); const title = titleLink.text().trim(); const rawHref = titleLink.attr('href') || ''; - const noticeUrl = rawHref.startsWith('http') ? rawHref : 'https://www.find-tender.service.gov.uk' + rawHref; + const rawUrl = rawHref.startsWith("http") ? rawHref : "https://www.find-tender.service.gov.uk" + rawHref; + // Strip query parameters to get clean notice URL + const noticeUrl = rawUrl.split("?")[0]; // Extract source ID from URL - const urlMatch = noticeUrl.match(/\/([A-Z0-9-]+)$/); + const urlMatch = noticeUrl.match(/\/Notice\/([A-Z0-9-]+)/); const sourceId = urlMatch ? urlMatch[1] : noticeUrl; const authority = element.find('.search-result-sub-header').text().trim(); diff --git a/scrapers/ted-eu.js b/scrapers/ted-eu.js index 1b98483..0c9f6e2 100755 --- a/scrapers/ted-eu.js +++ b/scrapers/ted-eu.js @@ -1,5 +1,3 @@ -import axios from 'axios'; -import { classifySector } from './classify-sector.js'; import pg from 'pg'; import dotenv from 'dotenv'; @@ -9,189 +7,28 @@ const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot' }); -// Rate limiting -const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); - -// Sample UK-relevant tender data for testing -// In production, this would come from the TED API or web scraping -const SAMPLE_TENDERS = [ - { - title: 'Supply of office equipment and supplies - UK Procurement', - description: 'UK Government Procurement: Supply of office equipment and supplies for government offices', - authority: 'UK Government Procurement Service', - value: 150000, - published: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(), - deadline: new Date(Date.now() + 14 * 24 * 60 * 60 * 1000).toISOString(), - location: 'United Kingdom', - }, - { - title: 'IT Infrastructure Services - UK NHS Trust', - description: 'UK NHS Trust seeks IT infrastructure and support services for healthcare delivery', - authority: 'National Health Service Trust', - value: 500000, - published: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(), - deadline: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString(), - location: 'United Kingdom', - }, - { - title: 'Transport Services for Local Authority', - description: 'UK Local Authority procurement of transport and logistics services', - authority: 'Local Authority Transport', - value: 250000, - published: new Date(Date.now() - 3 * 24 * 60 * 60 * 1000).toISOString(), - deadline: new Date(Date.now() + 21 * 24 * 60 * 60 * 1000).toISOString(), - location: 'United Kingdom', - }, - { - title: 'Construction Services - University Campus Expansion', - description: 'UK University seeks construction services for campus expansion project', - authority: 'Russell Group University', - value: 2500000, - published: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(), - deadline: new Date(Date.now() + 60 * 24 * 60 * 60 * 1000).toISOString(), - location: 'United Kingdom', - }, -]; +/** + * TED EU Scraper - DISABLED + * + * The TED (Tenders Electronic Daily) website uses JavaScript rendering, + * which requires browser automation (Playwright/Puppeteer) to scrape effectively. + * + * For now, TED tenders are not included in TenderRadar. + * + * TODO: Implement with Playwright when needed + * API endpoint: https://api.ted.europa.eu/v3/notices/search (requires POST) + * Browser scraping: https://ted.europa.eu/en/search/result?placeOfPerformanceCountry=GBR + */ async function scrapeTenders() { - try { - console.log(`[${new Date().toISOString()}] Starting TED EU scrape...`); - - let insertedCount = 0; - - // Attempt to fetch from TED API - // Note: The current TED web interface is JavaScript-rendered, - // so we'd need either headless browser (Puppeteer/Playwright) or the API to work - try { - const tedApiUrl = 'https://ted.europa.eu/api/v3.0/notices/search'; - const params = { - country: 'GB', - limit: 100, - offset: 0, - sort: 'date_pub:desc' - }; - - console.log(`Attempting to fetch from TED API: ${tedApiUrl}`); - const response = await axios.get(tedApiUrl, { - params, - timeout: 30000, - headers: { - 'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)' - } - }); - - console.log(`TED API returned ${response.data.notices?.length || 0} tenders`); - - if (response.data.notices && Array.isArray(response.data.notices)) { - for (const notice of response.data.notices) { - try { - const title = notice.title || 'Untitled'; - const description = notice.description || notice.title || ''; - const authority = notice.buyer_name || 'Unknown Authority'; - const deadline = notice.deadline_date || null; - const publishedDate = notice.publication_date || new Date().toISOString(); - const sourceId = `TED-${notice.id || encodeURIComponent(title).substring(0, 50)}`; - const valueLow = notice.estimated_value || null; - const location = notice.place_of_performance || 'United Kingdom'; - const noticeUrl = `https://ted.europa.eu/Notice/${notice.id || sourceId}`; - - // Only insert if published within last 90 days - const publishDate = new Date(publishedDate); - const ninetyDaysAgo = new Date(); - ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90); - - if (publishDate < ninetyDaysAgo) { - continue; - } - - await pool.query( - `INSERT INTO tenders ( - source, source_id, title, description, summary, cpv_codes, - value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status, sector - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) - ON CONFLICT (source_id) DO NOTHING`, - [ - 'ted_eu', - sourceId, - title.substring(0, 500), - description.substring(0, 5000), - description.substring(0, 500), - notice.cpv_codes || [], - valueLow, - valueLow, - 'EUR', - publishedDate, - deadline, - authority.substring(0, 255), - 'government', - location.substring(0, 255), - '', - noticeUrl, - deadline && new Date(deadline) > new Date() ? 'open' : 'closed', - classifySector(title, description, authority) - ] - ); - insertedCount++; - } catch (e) { - console.error('Error inserting tender:', e.message); - } - } - } - } catch (apiError) { - console.warn(`TED API not available: ${apiError.message}`); - console.log('Falling back to sample data for demonstration...'); - - // Fallback: use sample data for demonstration - for (const tender of SAMPLE_TENDERS) { - try { - const sourceId = `TED-DEMO-${encodeURIComponent(tender.title).substring(0, 40)}`; - - const result = await pool.query( - `INSERT INTO tenders ( - source, source_id, title, description, summary, cpv_codes, - value_low, value_high, currency, published_date, deadline, - authority_name, authority_type, location, documents_url, notice_url, status, sector - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) - ON CONFLICT (source_id) DO NOTHING - RETURNING id`, - [ - 'ted_eu', - sourceId, - tender.title.substring(0, 500), - tender.description.substring(0, 5000), - tender.description.substring(0, 500), - [], - tender.value, - tender.value, - 'GBP', - tender.published, - tender.deadline, - tender.authority.substring(0, 255), - 'government', - tender.location.substring(0, 255), - '', - `https://ted.europa.eu/Notice/${sourceId}`, - 'open', - classifySector(tender.title, tender.description, tender.authority) - ] - ); - - if (result.rowCount > 0) { - insertedCount++; - } - } catch (e) { - console.error('Error inserting sample tender:', e.message); - } - } - } - - console.log(`[${new Date().toISOString()}] TED EU scrape complete. Inserted/updated ${insertedCount} tenders`); - } catch (error) { - console.error('Error scraping TED:', error.message); - } finally { - await pool.end(); - } + console.log('[TED EU] Scraper disabled - requires browser automation'); + console.log('[TED EU] To enable: implement Playwright/Puppeteer scraping'); + await pool.end(); + return; } -scrapeTenders(); +if (import.meta.url === `file://${process.argv[1]}`) { + scrapeTenders(); +} + +export { scrapeTenders };