63 lines
1.9 KiB
JavaScript
63 lines
1.9 KiB
JavaScript
import axios from 'axios';
|
|
import * as cheerio from 'cheerio';
|
|
|
|
const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0';
|
|
const response = await axios.get(url, {
|
|
timeout: 30000,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
}
|
|
});
|
|
|
|
const $ = cheerio.load(response.data);
|
|
|
|
console.log('Total page length:', response.data.length);
|
|
|
|
// Look for JSON in script tags
|
|
console.log('\n=== Script tags ===');
|
|
$('script').each((i, el) => {
|
|
const text = $(el).text();
|
|
if (text.includes('notice') || text.includes('GB') || text.includes('data') || text.includes('result')) {
|
|
console.log(`Script ${i} length: ${text.length} chars`);
|
|
console.log(text.substring(0, 500));
|
|
console.log('...');
|
|
}
|
|
});
|
|
|
|
// Look for window.__data or similar
|
|
const bodyText = response.data;
|
|
if (bodyText.includes('window.')) {
|
|
console.log('\n=== Window assignments ===');
|
|
const matches = bodyText.match(/window\.[a-zA-Z_]+\s*=/g);
|
|
if (matches) {
|
|
console.log(matches.slice(0, 10));
|
|
}
|
|
}
|
|
|
|
// Look for API calls or data in comments
|
|
if (bodyText.includes('API') || bodyText.includes('api')) {
|
|
console.log('\n=== Found API references ===');
|
|
const apiMatch = bodyText.match(/(?:https?:)?\/\/[^"'<>\s]+api[^"'<>\s]*/gi);
|
|
if (apiMatch) {
|
|
console.log(apiMatch.slice(0, 10));
|
|
}
|
|
}
|
|
|
|
// Check for specific data patterns
|
|
console.log('\n=== Looking for specific patterns ===');
|
|
if (bodyText.includes('/api/')) {
|
|
console.log('Found /api/ endpoint');
|
|
const apiMatches = bodyText.match(/\/api\/[^"'\s<>]+/g);
|
|
if (apiMatches) {
|
|
console.log('Unique APIs:', [...new Set(apiMatches)].slice(0, 10));
|
|
}
|
|
}
|
|
|
|
// Look at the actual HTML structure around content
|
|
console.log('\n=== Checking for HTML with notice data ===');
|
|
const htmlMatch = bodyText.match(/notice[^<]{0,200}</gi);
|
|
if (htmlMatch) {
|
|
console.log('Found matches:');
|
|
console.log(htmlMatch.slice(0, 5));
|
|
}
|