Add sector classification module, integrate into all 7 scrapers, fix CF pagination
This commit is contained in:
54
etendersni-test.mjs
Normal file
54
etendersni-test.mjs
Normal file
@@ -0,0 +1,54 @@
|
||||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
// Test the eTendersNI site structure
|
||||
const baseUrl = 'https://etendersni.gov.uk';
|
||||
const homeUrl = 'https://etendersni.gov.uk/epps/home.do';
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log('Fetching homepage...');
|
||||
const resp = await axios.get(homeUrl, { timeout: 10000, maxRedirects: 5 });
|
||||
const $ = cheerio.load(resp.data);
|
||||
|
||||
console.log('\n=== EXTRACTING NAVIGATION LINKS ===');
|
||||
const navLinks = new Set();
|
||||
$('a').each((i, el) => {
|
||||
const href = $(el).attr('href');
|
||||
const text = $(el).text().trim();
|
||||
if (href && (href.includes('search') || href.includes('list') || href.includes('notice') || href.includes('tender'))) {
|
||||
// Make absolute URLs
|
||||
const fullUrl = href.startsWith('http') ? href : baseUrl + (href.startsWith('/') ? href : '/epps/' + href);
|
||||
navLinks.add(`${text.substring(0, 40)} => ${fullUrl}`);
|
||||
}
|
||||
});
|
||||
|
||||
if (navLinks.size > 0) {
|
||||
console.log('Found search/list links:');
|
||||
navLinks.forEach(l => console.log(' ' + l));
|
||||
} else {
|
||||
console.log('No search/list links found. Checking page structure...');
|
||||
console.log('Page title:', $('title').text());
|
||||
console.log('Meta description:', $('meta[name="description"]').attr('content'));
|
||||
|
||||
// Try to find any forms
|
||||
console.log('\nForms found:', $('form').length);
|
||||
$('form').each((i, el) => {
|
||||
const action = $(el).attr('action');
|
||||
const method = $(el).attr('method');
|
||||
console.log(` Form ${i}: ${method} ${action}`);
|
||||
});
|
||||
|
||||
// Try to find main content areas
|
||||
console.log('\nMain sections:');
|
||||
$('[id*="search"], [class*="search"], [id*="tender"], [class*="tender"]').each((i, el) => {
|
||||
const id = $(el).attr('id');
|
||||
const cls = $(el).attr('class');
|
||||
if (id || cls) console.log(` ${id} ${cls}`);
|
||||
});
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
console.error('Error:', e.message);
|
||||
}
|
||||
})();
|
||||
Reference in New Issue
Block a user