fix: clean Apply Now URLs and disable TED demo scraper
- Strip tracking query params from find_tender URLs (?origin=SearchResults) - Disable TED EU scraper (requires browser automation, was using demo data) - Update 220 find_tender database records with clean URLs - Delete 4 TED demo records from database - Add URL_FIX_SUMMARY.md documentation All 615 tenders now have direct links to tender detail pages. Fixes Apply Now button UX issue.
This commit is contained in:
57
URL_FIX_SUMMARY.md
Normal file
57
URL_FIX_SUMMARY.md
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
# TenderRadar Apply Now Button - Fix Complete
|
||||||
|
|
||||||
|
**Date:** 2026-02-15
|
||||||
|
**Status:** ✅ FIXED
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Fixed "Apply Now" button URLs for 615 tenders across 5 data sources. All URLs now link directly to tender detail pages instead of search portals.
|
||||||
|
|
||||||
|
## Changes Made
|
||||||
|
|
||||||
|
### 1. Find a Tender (220 tenders)
|
||||||
|
- **Fixed:** Removed tracking query parameters from URLs
|
||||||
|
- **Before:** `https://www.find-tender.service.gov.uk/Notice/013656-2026?origin=SearchResults&p=1`
|
||||||
|
- **After:** `https://www.find-tender.service.gov.uk/Notice/013656-2026`
|
||||||
|
- **Files:** `scrapers/find-tender.js`, database updated
|
||||||
|
|
||||||
|
### 2. TED EU (4 demo records)
|
||||||
|
- **Fixed:** Disabled scraper using hardcoded demo data
|
||||||
|
- **Status:** Requires browser automation (Playwright/Puppeteer)
|
||||||
|
- **Action:** Deleted 4 demo records, documented for future
|
||||||
|
- **Files:** `scrapers/ted-eu.js` rewritten with documentation
|
||||||
|
|
||||||
|
### 3. Other Sources (Already Working)
|
||||||
|
These were already correct - no changes needed:
|
||||||
|
- contracts_finder (364 tenders)
|
||||||
|
- etendersni (11 tenders)
|
||||||
|
- pcs_scotland (10 tenders)
|
||||||
|
- sell2wales (10 tenders)
|
||||||
|
|
||||||
|
## Database State After Fix
|
||||||
|
|
||||||
|
Total tenders: 615
|
||||||
|
All URLs validated: ✅
|
||||||
|
No broken URLs: ✅
|
||||||
|
No tracking parameters (except legitimate notice IDs): ✅
|
||||||
|
|
||||||
|
## Files Modified
|
||||||
|
|
||||||
|
1. `scrapers/find-tender.js` - Strip query params
|
||||||
|
2. `scrapers/ted-eu.js` - Disabled with documentation
|
||||||
|
3. Database - 220 find_tender records cleaned, 4 TED records deleted
|
||||||
|
|
||||||
|
## Backups Created
|
||||||
|
|
||||||
|
- `scrapers/find-tender.js.bak`
|
||||||
|
|
||||||
|
## Testing Completed
|
||||||
|
|
||||||
|
✅ Verified all source URL patterns
|
||||||
|
✅ Checked for invalid/broken URLs
|
||||||
|
✅ Confirmed query parameters removed from find_tender
|
||||||
|
✅ Validated notice IDs in other sources
|
||||||
|
|
||||||
|
## Result
|
||||||
|
|
||||||
|
Users clicking "Apply Now" will now land directly on tender detail pages for all 615 active tenders.
|
||||||
@@ -49,10 +49,12 @@ async function scrapeTenders() {
|
|||||||
const titleLink = element.find('.search-result-header a').first();
|
const titleLink = element.find('.search-result-header a').first();
|
||||||
const title = titleLink.text().trim();
|
const title = titleLink.text().trim();
|
||||||
const rawHref = titleLink.attr('href') || '';
|
const rawHref = titleLink.attr('href') || '';
|
||||||
const noticeUrl = rawHref.startsWith('http') ? rawHref : 'https://www.find-tender.service.gov.uk' + rawHref;
|
const rawUrl = rawHref.startsWith("http") ? rawHref : "https://www.find-tender.service.gov.uk" + rawHref;
|
||||||
|
// Strip query parameters to get clean notice URL
|
||||||
|
const noticeUrl = rawUrl.split("?")[0];
|
||||||
|
|
||||||
// Extract source ID from URL
|
// Extract source ID from URL
|
||||||
const urlMatch = noticeUrl.match(/\/([A-Z0-9-]+)$/);
|
const urlMatch = noticeUrl.match(/\/Notice\/([A-Z0-9-]+)/);
|
||||||
const sourceId = urlMatch ? urlMatch[1] : noticeUrl;
|
const sourceId = urlMatch ? urlMatch[1] : noticeUrl;
|
||||||
|
|
||||||
const authority = element.find('.search-result-sub-header').text().trim();
|
const authority = element.find('.search-result-sub-header').text().trim();
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import axios from 'axios';
|
|
||||||
import { classifySector } from './classify-sector.js';
|
|
||||||
import pg from 'pg';
|
import pg from 'pg';
|
||||||
import dotenv from 'dotenv';
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
@@ -9,189 +7,28 @@ const pool = new pg.Pool({
|
|||||||
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot'
|
connectionString: process.env.DATABASE_URL || 'postgresql://tenderpilot:tenderpilot123@localhost:5432/tenderpilot'
|
||||||
});
|
});
|
||||||
|
|
||||||
// Rate limiting
|
/**
|
||||||
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
* TED EU Scraper - DISABLED
|
||||||
|
*
|
||||||
// Sample UK-relevant tender data for testing
|
* The TED (Tenders Electronic Daily) website uses JavaScript rendering,
|
||||||
// In production, this would come from the TED API or web scraping
|
* which requires browser automation (Playwright/Puppeteer) to scrape effectively.
|
||||||
const SAMPLE_TENDERS = [
|
*
|
||||||
{
|
* For now, TED tenders are not included in TenderRadar.
|
||||||
title: 'Supply of office equipment and supplies - UK Procurement',
|
*
|
||||||
description: 'UK Government Procurement: Supply of office equipment and supplies for government offices',
|
* TODO: Implement with Playwright when needed
|
||||||
authority: 'UK Government Procurement Service',
|
* API endpoint: https://api.ted.europa.eu/v3/notices/search (requires POST)
|
||||||
value: 150000,
|
* Browser scraping: https://ted.europa.eu/en/search/result?placeOfPerformanceCountry=GBR
|
||||||
published: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(),
|
*/
|
||||||
deadline: new Date(Date.now() + 14 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
location: 'United Kingdom',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: 'IT Infrastructure Services - UK NHS Trust',
|
|
||||||
description: 'UK NHS Trust seeks IT infrastructure and support services for healthcare delivery',
|
|
||||||
authority: 'National Health Service Trust',
|
|
||||||
value: 500000,
|
|
||||||
published: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
deadline: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
location: 'United Kingdom',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: 'Transport Services for Local Authority',
|
|
||||||
description: 'UK Local Authority procurement of transport and logistics services',
|
|
||||||
authority: 'Local Authority Transport',
|
|
||||||
value: 250000,
|
|
||||||
published: new Date(Date.now() - 3 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
deadline: new Date(Date.now() + 21 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
location: 'United Kingdom',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: 'Construction Services - University Campus Expansion',
|
|
||||||
description: 'UK University seeks construction services for campus expansion project',
|
|
||||||
authority: 'Russell Group University',
|
|
||||||
value: 2500000,
|
|
||||||
published: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
deadline: new Date(Date.now() + 60 * 24 * 60 * 60 * 1000).toISOString(),
|
|
||||||
location: 'United Kingdom',
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
async function scrapeTenders() {
|
async function scrapeTenders() {
|
||||||
try {
|
console.log('[TED EU] Scraper disabled - requires browser automation');
|
||||||
console.log(`[${new Date().toISOString()}] Starting TED EU scrape...`);
|
console.log('[TED EU] To enable: implement Playwright/Puppeteer scraping');
|
||||||
|
await pool.end();
|
||||||
let insertedCount = 0;
|
return;
|
||||||
|
|
||||||
// Attempt to fetch from TED API
|
|
||||||
// Note: The current TED web interface is JavaScript-rendered,
|
|
||||||
// so we'd need either headless browser (Puppeteer/Playwright) or the API to work
|
|
||||||
try {
|
|
||||||
const tedApiUrl = 'https://ted.europa.eu/api/v3.0/notices/search';
|
|
||||||
const params = {
|
|
||||||
country: 'GB',
|
|
||||||
limit: 100,
|
|
||||||
offset: 0,
|
|
||||||
sort: 'date_pub:desc'
|
|
||||||
};
|
|
||||||
|
|
||||||
console.log(`Attempting to fetch from TED API: ${tedApiUrl}`);
|
|
||||||
const response = await axios.get(tedApiUrl, {
|
|
||||||
params,
|
|
||||||
timeout: 30000,
|
|
||||||
headers: {
|
|
||||||
'User-Agent': 'TenderRadar/1.0 (UK Public Procurement Aggregator; contact@tenderradar.co.uk)'
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`TED API returned ${response.data.notices?.length || 0} tenders`);
|
|
||||||
|
|
||||||
if (response.data.notices && Array.isArray(response.data.notices)) {
|
|
||||||
for (const notice of response.data.notices) {
|
|
||||||
try {
|
|
||||||
const title = notice.title || 'Untitled';
|
|
||||||
const description = notice.description || notice.title || '';
|
|
||||||
const authority = notice.buyer_name || 'Unknown Authority';
|
|
||||||
const deadline = notice.deadline_date || null;
|
|
||||||
const publishedDate = notice.publication_date || new Date().toISOString();
|
|
||||||
const sourceId = `TED-${notice.id || encodeURIComponent(title).substring(0, 50)}`;
|
|
||||||
const valueLow = notice.estimated_value || null;
|
|
||||||
const location = notice.place_of_performance || 'United Kingdom';
|
|
||||||
const noticeUrl = `https://ted.europa.eu/Notice/${notice.id || sourceId}`;
|
|
||||||
|
|
||||||
// Only insert if published within last 90 days
|
|
||||||
const publishDate = new Date(publishedDate);
|
|
||||||
const ninetyDaysAgo = new Date();
|
|
||||||
ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90);
|
|
||||||
|
|
||||||
if (publishDate < ninetyDaysAgo) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
await pool.query(
|
|
||||||
`INSERT INTO tenders (
|
|
||||||
source, source_id, title, description, summary, cpv_codes,
|
|
||||||
value_low, value_high, currency, published_date, deadline,
|
|
||||||
authority_name, authority_type, location, documents_url, notice_url, status, sector
|
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
|
|
||||||
ON CONFLICT (source_id) DO NOTHING`,
|
|
||||||
[
|
|
||||||
'ted_eu',
|
|
||||||
sourceId,
|
|
||||||
title.substring(0, 500),
|
|
||||||
description.substring(0, 5000),
|
|
||||||
description.substring(0, 500),
|
|
||||||
notice.cpv_codes || [],
|
|
||||||
valueLow,
|
|
||||||
valueLow,
|
|
||||||
'EUR',
|
|
||||||
publishedDate,
|
|
||||||
deadline,
|
|
||||||
authority.substring(0, 255),
|
|
||||||
'government',
|
|
||||||
location.substring(0, 255),
|
|
||||||
'',
|
|
||||||
noticeUrl,
|
|
||||||
deadline && new Date(deadline) > new Date() ? 'open' : 'closed',
|
|
||||||
classifySector(title, description, authority)
|
|
||||||
]
|
|
||||||
);
|
|
||||||
insertedCount++;
|
|
||||||
} catch (e) {
|
|
||||||
console.error('Error inserting tender:', e.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (apiError) {
|
|
||||||
console.warn(`TED API not available: ${apiError.message}`);
|
|
||||||
console.log('Falling back to sample data for demonstration...');
|
|
||||||
|
|
||||||
// Fallback: use sample data for demonstration
|
|
||||||
for (const tender of SAMPLE_TENDERS) {
|
|
||||||
try {
|
|
||||||
const sourceId = `TED-DEMO-${encodeURIComponent(tender.title).substring(0, 40)}`;
|
|
||||||
|
|
||||||
const result = await pool.query(
|
|
||||||
`INSERT INTO tenders (
|
|
||||||
source, source_id, title, description, summary, cpv_codes,
|
|
||||||
value_low, value_high, currency, published_date, deadline,
|
|
||||||
authority_name, authority_type, location, documents_url, notice_url, status, sector
|
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
|
|
||||||
ON CONFLICT (source_id) DO NOTHING
|
|
||||||
RETURNING id`,
|
|
||||||
[
|
|
||||||
'ted_eu',
|
|
||||||
sourceId,
|
|
||||||
tender.title.substring(0, 500),
|
|
||||||
tender.description.substring(0, 5000),
|
|
||||||
tender.description.substring(0, 500),
|
|
||||||
[],
|
|
||||||
tender.value,
|
|
||||||
tender.value,
|
|
||||||
'GBP',
|
|
||||||
tender.published,
|
|
||||||
tender.deadline,
|
|
||||||
tender.authority.substring(0, 255),
|
|
||||||
'government',
|
|
||||||
tender.location.substring(0, 255),
|
|
||||||
'',
|
|
||||||
`https://ted.europa.eu/Notice/${sourceId}`,
|
|
||||||
'open',
|
|
||||||
classifySector(tender.title, tender.description, tender.authority)
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rowCount > 0) {
|
|
||||||
insertedCount++;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.error('Error inserting sample tender:', e.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[${new Date().toISOString()}] TED EU scrape complete. Inserted/updated ${insertedCount} tenders`);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error scraping TED:', error.message);
|
|
||||||
} finally {
|
|
||||||
await pool.end();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scrapeTenders();
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
scrapeTenders();
|
||||||
|
}
|
||||||
|
|
||||||
|
export { scrapeTenders };
|
||||||
|
|||||||
Reference in New Issue
Block a user