63 lines
1.7 KiB
JavaScript
63 lines
1.7 KiB
JavaScript
|
|
import axios from 'axios';
|
||
|
|
import * as cheerio from 'cheerio';
|
||
|
|
|
||
|
|
const url = 'https://ted.europa.eu/en/search/result?query=GB&pageNum=0';
|
||
|
|
const response = await axios.get(url, {
|
||
|
|
timeout: 30000,
|
||
|
|
headers: {
|
||
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
const $ = cheerio.load(response.data);
|
||
|
|
|
||
|
|
console.log('=== Searching for data-testid attributes ===');
|
||
|
|
let count = 0;
|
||
|
|
$('[data-testid]').each((i, el) => {
|
||
|
|
if (count < 10) {
|
||
|
|
console.log(`${count}: data-testid="${$(el).attr('data-testid')}" tag=${el.name}`);
|
||
|
|
count++;
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log('\n=== Looking for href containing Notice ===');
|
||
|
|
count = 0;
|
||
|
|
$('a[href*="Notice"]').each((i, el) => {
|
||
|
|
if (count < 5) {
|
||
|
|
const text = $(el).text().substring(0, 80);
|
||
|
|
const href = $(el).attr('href').substring(0, 100);
|
||
|
|
console.log(`${count}: ${href} - text: ${text}`);
|
||
|
|
count++;
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log('\n=== Looking for main content div ===');
|
||
|
|
$('main').each((i, el) => {
|
||
|
|
if (i < 3) {
|
||
|
|
const html = $(el).html().substring(0, 1000);
|
||
|
|
console.log(`main[${i}]: ${html}`);
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log('\n=== All a tags (first 20 with long href) ===');
|
||
|
|
count = 0;
|
||
|
|
$('a').each((i, el) => {
|
||
|
|
if (count < 20) {
|
||
|
|
const href = $(el).attr('href') || '';
|
||
|
|
if (href.includes('notice') || href.includes('Notice') || href.includes('OP-') || href.length > 50) {
|
||
|
|
console.log(`${count}: ${href.substring(0, 150)}`);
|
||
|
|
count++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log('\n=== Looking for articles/items ===');
|
||
|
|
count = 0;
|
||
|
|
$('article, [role="article"], .item, .result, .row').each((i, el) => {
|
||
|
|
if (count < 10) {
|
||
|
|
const text = $(el).text().substring(0, 100);
|
||
|
|
console.log(`${count}: ${text}`);
|
||
|
|
count++;
|
||
|
|
}
|
||
|
|
});
|