Add sector classification module, integrate into all 7 scrapers, fix CF pagination
This commit is contained in:
62
debug-ted2.mjs
Normal file
62
debug-ted2.mjs
Normal file
@@ -0,0 +1,62 @@
|
||||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0';
|
||||
const response = await axios.get(url, {
|
||||
timeout: 30000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
}
|
||||
});
|
||||
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
console.log('Total page length:', response.data.length);
|
||||
|
||||
// Look for JSON in script tags
|
||||
console.log('\n=== Script tags ===');
|
||||
$('script').each((i, el) => {
|
||||
const text = $(el).text();
|
||||
if (text.includes('notice') || text.includes('GB') || text.includes('data') || text.includes('result')) {
|
||||
console.log(`Script ${i} length: ${text.length} chars`);
|
||||
console.log(text.substring(0, 500));
|
||||
console.log('...');
|
||||
}
|
||||
});
|
||||
|
||||
// Look for window.__data or similar
|
||||
const bodyText = response.data;
|
||||
if (bodyText.includes('window.')) {
|
||||
console.log('\n=== Window assignments ===');
|
||||
const matches = bodyText.match(/window\.[a-zA-Z_]+\s*=/g);
|
||||
if (matches) {
|
||||
console.log(matches.slice(0, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Look for API calls or data in comments
|
||||
if (bodyText.includes('API') || bodyText.includes('api')) {
|
||||
console.log('\n=== Found API references ===');
|
||||
const apiMatch = bodyText.match(/(?:https?:)?\/\/[^"'<>\s]+api[^"'<>\s]*/gi);
|
||||
if (apiMatch) {
|
||||
console.log(apiMatch.slice(0, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for specific data patterns
|
||||
console.log('\n=== Looking for specific patterns ===');
|
||||
if (bodyText.includes('/api/')) {
|
||||
console.log('Found /api/ endpoint');
|
||||
const apiMatches = bodyText.match(/\/api\/[^"'\s<>]+/g);
|
||||
if (apiMatches) {
|
||||
console.log('Unique APIs:', [...new Set(apiMatches)].slice(0, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Look at the actual HTML structure around content
|
||||
console.log('\n=== Checking for HTML with notice data ===');
|
||||
const htmlMatch = bodyText.match(/notice[^<]{0,200}</gi);
|
||||
if (htmlMatch) {
|
||||
console.log('Found matches:');
|
||||
console.log(htmlMatch.slice(0, 5));
|
||||
}
|
||||
Reference in New Issue
Block a user