import axios from 'axios'; import * as cheerio from 'cheerio'; const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0'; const response = await axios.get(url, { timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }); const $ = cheerio.load(response.data); console.log('Total page length:', response.data.length); // Look for JSON in script tags console.log('\n=== Script tags ==='); $('script').each((i, el) => { const text = $(el).text(); if (text.includes('notice') || text.includes('GB') || text.includes('data') || text.includes('result')) { console.log(`Script ${i} length: ${text.length} chars`); console.log(text.substring(0, 500)); console.log('...'); } }); // Look for window.__data or similar const bodyText = response.data; if (bodyText.includes('window.')) { console.log('\n=== Window assignments ==='); const matches = bodyText.match(/window\.[a-zA-Z_]+\s*=/g); if (matches) { console.log(matches.slice(0, 10)); } } // Look for API calls or data in comments if (bodyText.includes('API') || bodyText.includes('api')) { console.log('\n=== Found API references ==='); const apiMatch = bodyText.match(/(?:https?:)?\/\/[^"'<>\s]+api[^"'<>\s]*/gi); if (apiMatch) { console.log(apiMatch.slice(0, 10)); } } // Check for specific data patterns console.log('\n=== Looking for specific patterns ==='); if (bodyText.includes('/api/')) { console.log('Found /api/ endpoint'); const apiMatches = bodyText.match(/\/api\/[^"'\s<>]+/g); if (apiMatches) { console.log('Unique APIs:', [...new Set(apiMatches)].slice(0, 10)); } } // Look at the actual HTML structure around content console.log('\n=== Checking for HTML with notice data ==='); const htmlMatch = bodyText.match(/notice[^<]{0,200}