Security hardening + new tools deployment
- Hide Apache version (ServerTokens Prod) - Add Permissions-Policy header - Remove deprecated X-XSS-Protection - Consolidate security headers to .htaccess only (remove duplicates from PHP) - Deploy free tools: robots-analyzer, data-converter - Deploy tools announcement blog post - Update sitemap with new tools and blog post
This commit is contained in:
562
tools/cost-calculator.php
Normal file
562
tools/cost-calculator.php
Normal file
@@ -0,0 +1,562 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Web Scraping Cost Calculator | UK Data Services</title>
|
||||
<meta name="description" content="Estimate your web scraping project cost instantly. Our free calculator helps UK businesses budget for data extraction, pricing intelligence, and market research projects.">
|
||||
<meta name="keywords" content="web scraping cost, data extraction pricing, scraping quote, web scraping calculator UK">
|
||||
<link rel="canonical" href="https://ukdataservices.co.uk/tools/cost-calculator">
|
||||
|
||||
<!-- Open Graph -->
|
||||
<meta property="og:title" content="Free Web Scraping Cost Calculator">
|
||||
<meta property="og:description" content="Get an instant estimate for your web scraping project. Used by 500+ UK businesses.">
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:url" content="https://ukdataservices.co.uk/tools/cost-calculator">
|
||||
|
||||
<link rel="stylesheet" href="../assets/css/main.css">
|
||||
<style>
|
||||
.calculator-container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 40px 20px;
|
||||
}
|
||||
.calculator-header {
|
||||
text-align: center;
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
.calculator-header h1 {
|
||||
font-size: 2.2em;
|
||||
color: #1a1a2e;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.calculator-header p {
|
||||
color: #666;
|
||||
font-size: 1.1em;
|
||||
max-width: 600px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.calculator-card {
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
|
||||
padding: 40px;
|
||||
}
|
||||
.form-group {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.form-group label {
|
||||
display: block;
|
||||
font-weight: 600;
|
||||
color: #1a1a2e;
|
||||
margin-bottom: 10px;
|
||||
font-size: 1em;
|
||||
}
|
||||
.form-group .hint {
|
||||
font-weight: 400;
|
||||
color: #888;
|
||||
font-size: 0.85em;
|
||||
display: block;
|
||||
margin-top: 4px;
|
||||
}
|
||||
.option-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
||||
gap: 12px;
|
||||
}
|
||||
.option-card {
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
.option-card:hover {
|
||||
border-color: #0066cc;
|
||||
background: #f8fbff;
|
||||
}
|
||||
.option-card.selected {
|
||||
border-color: #0066cc;
|
||||
background: #e8f4fd;
|
||||
}
|
||||
.option-card .icon {
|
||||
font-size: 1.8em;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.option-card .label {
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
}
|
||||
.option-card .desc {
|
||||
font-size: 0.8em;
|
||||
color: #888;
|
||||
margin-top: 4px;
|
||||
}
|
||||
input[type="number"], select {
|
||||
width: 100%;
|
||||
padding: 14px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
font-size: 1em;
|
||||
transition: border-color 0.2s;
|
||||
}
|
||||
input[type="number"]:focus, select:focus {
|
||||
border-color: #0066cc;
|
||||
outline: none;
|
||||
}
|
||||
.slider-container {
|
||||
margin-top: 10px;
|
||||
}
|
||||
input[type="range"] {
|
||||
width: 100%;
|
||||
margin: 10px 0;
|
||||
}
|
||||
.slider-labels {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
font-size: 0.85em;
|
||||
color: #888;
|
||||
}
|
||||
.result-section {
|
||||
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
|
||||
border-radius: 12px;
|
||||
padding: 30px;
|
||||
margin-top: 30px;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
}
|
||||
.result-section h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 1em;
|
||||
font-weight: 400;
|
||||
opacity: 0.8;
|
||||
}
|
||||
.estimate-range {
|
||||
font-size: 2.5em;
|
||||
font-weight: 700;
|
||||
margin: 10px 0;
|
||||
}
|
||||
.estimate-note {
|
||||
font-size: 0.9em;
|
||||
opacity: 0.7;
|
||||
margin-bottom: 25px;
|
||||
}
|
||||
.result-cta {
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
justify-content: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.btn-primary {
|
||||
background: #00cc66;
|
||||
color: #fff;
|
||||
padding: 14px 28px;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
font-size: 1em;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
display: inline-block;
|
||||
}
|
||||
.btn-primary:hover {
|
||||
background: #00b359;
|
||||
}
|
||||
.btn-secondary {
|
||||
background: transparent;
|
||||
color: #fff;
|
||||
padding: 14px 28px;
|
||||
border: 2px solid rgba(255,255,255,0.3);
|
||||
border-radius: 8px;
|
||||
font-size: 1em;
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
}
|
||||
.btn-secondary:hover {
|
||||
border-color: #fff;
|
||||
}
|
||||
.trust-signals {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 30px;
|
||||
margin-top: 30px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.trust-item {
|
||||
text-align: center;
|
||||
color: #666;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.trust-item .number {
|
||||
font-size: 1.5em;
|
||||
font-weight: 700;
|
||||
color: #0066cc;
|
||||
display: block;
|
||||
}
|
||||
.email-capture {
|
||||
display: none;
|
||||
background: #f8f9fa;
|
||||
border-radius: 8px;
|
||||
padding: 25px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
.email-capture.show {
|
||||
display: block;
|
||||
}
|
||||
.email-capture h4 {
|
||||
margin: 0 0 15px 0;
|
||||
}
|
||||
.email-capture input[type="email"] {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 10px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
@media (max-width: 600px) {
|
||||
.calculator-card { padding: 25px; }
|
||||
.estimate-range { font-size: 1.8em; }
|
||||
.result-cta { flex-direction: column; }
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<!-- Header would be included here -->
|
||||
|
||||
<div class="calculator-container">
|
||||
<div class="calculator-header">
|
||||
<h1>🧮 Web Scraping Cost Calculator</h1>
|
||||
<p>Get an instant estimate for your data extraction project. Answer a few questions and we'll show you typical pricing.</p>
|
||||
</div>
|
||||
|
||||
<div class="calculator-card">
|
||||
<form id="calculator-form">
|
||||
<!-- Project Type -->
|
||||
<div class="form-group">
|
||||
<label>What type of data do you need?</label>
|
||||
<div class="option-grid" data-field="projectType">
|
||||
<div class="option-card" data-value="pricing">
|
||||
<div class="icon">💰</div>
|
||||
<div class="label">Competitor Pricing</div>
|
||||
<div class="desc">Product prices & stock</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="leads">
|
||||
<div class="icon">👥</div>
|
||||
<div class="label">Lead Generation</div>
|
||||
<div class="desc">Business contacts</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="market">
|
||||
<div class="icon">📊</div>
|
||||
<div class="label">Market Research</div>
|
||||
<div class="desc">Reviews, trends, content</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="property">
|
||||
<div class="icon">🏠</div>
|
||||
<div class="label">Property Data</div>
|
||||
<div class="desc">Listings & valuations</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="custom">
|
||||
<div class="icon">⚙️</div>
|
||||
<div class="label">Custom Project</div>
|
||||
<div class="desc">Something else</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Number of Sources -->
|
||||
<div class="form-group">
|
||||
<label>
|
||||
How many websites do you need to scrape?
|
||||
<span class="hint">Each unique website counts as one source</span>
|
||||
</label>
|
||||
<div class="slider-container">
|
||||
<input type="range" id="numSources" min="1" max="50" value="5" oninput="updateSourcesLabel(this.value); calculate();">
|
||||
<div class="slider-labels">
|
||||
<span>1 site</span>
|
||||
<span id="sources-value">5 sites</span>
|
||||
<span>50+ sites</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Data Volume -->
|
||||
<div class="form-group">
|
||||
<label>
|
||||
How many records/items do you need?
|
||||
<span class="hint">Products, listings, contacts, etc.</span>
|
||||
</label>
|
||||
<div class="option-grid" data-field="volume">
|
||||
<div class="option-card" data-value="small">
|
||||
<div class="label">< 1,000</div>
|
||||
<div class="desc">Small dataset</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="medium">
|
||||
<div class="label">1K - 10K</div>
|
||||
<div class="desc">Medium dataset</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="large">
|
||||
<div class="label">10K - 100K</div>
|
||||
<div class="desc">Large dataset</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="enterprise">
|
||||
<div class="label">100K+</div>
|
||||
<div class="desc">Enterprise scale</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Frequency -->
|
||||
<div class="form-group">
|
||||
<label>
|
||||
How often do you need the data updated?
|
||||
<span class="hint">Recurring scrapes have ongoing monthly costs</span>
|
||||
</label>
|
||||
<div class="option-grid" data-field="frequency">
|
||||
<div class="option-card" data-value="once">
|
||||
<div class="label">One-time</div>
|
||||
<div class="desc">Single extraction</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="weekly">
|
||||
<div class="label">Weekly</div>
|
||||
<div class="desc">Updated each week</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="daily">
|
||||
<div class="label">Daily</div>
|
||||
<div class="desc">Fresh data daily</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="realtime">
|
||||
<div class="label">Real-time</div>
|
||||
<div class="desc">Continuous monitoring</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Complexity -->
|
||||
<div class="form-group">
|
||||
<label>
|
||||
How complex are the target websites?
|
||||
<span class="hint">JavaScript-heavy sites and those with anti-bot measures cost more</span>
|
||||
</label>
|
||||
<div class="option-grid" data-field="complexity">
|
||||
<div class="option-card" data-value="simple">
|
||||
<div class="label">Simple</div>
|
||||
<div class="desc">Static HTML pages</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="moderate">
|
||||
<div class="label">Moderate</div>
|
||||
<div class="desc">Some JavaScript</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="complex">
|
||||
<div class="label">Complex</div>
|
||||
<div class="desc">Heavy JS, logins</div>
|
||||
</div>
|
||||
<div class="option-card" data-value="extreme">
|
||||
<div class="label">Very Complex</div>
|
||||
<div class="desc">Anti-bot, CAPTCHAs</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<!-- Results -->
|
||||
<div class="result-section" id="results" style="display:none;">
|
||||
<h3>Estimated Project Cost</h3>
|
||||
<div class="estimate-range" id="estimate-range">£500 - £1,500</div>
|
||||
<div class="estimate-note" id="estimate-note">One-time setup cost</div>
|
||||
<div class="estimate-note" id="monthly-note" style="display:none;">+ <span id="monthly-cost">£200</span>/month ongoing</div>
|
||||
|
||||
<div class="result-cta">
|
||||
<a href="/quote" class="btn-primary" onclick="trackCalculator('get_quote')">Get Exact Quote</a>
|
||||
<button type="button" class="btn-secondary" onclick="showEmailCapture()">Email Me This Estimate</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Email Capture -->
|
||||
<div class="email-capture" id="email-capture">
|
||||
<h4>📧 Get your estimate + our pricing guide</h4>
|
||||
<input type="email" id="calc-email" placeholder="Enter your email">
|
||||
<button type="button" class="btn-primary" onclick="submitEmail()" style="width:100%;">Send My Estimate</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="trust-signals">
|
||||
<div class="trust-item">
|
||||
<span class="number">500+</span>
|
||||
Projects Delivered
|
||||
</div>
|
||||
<div class="trust-item">
|
||||
<span class="number">99.8%</span>
|
||||
Data Accuracy
|
||||
</div>
|
||||
<div class="trust-item">
|
||||
<span class="number">24hr</span>
|
||||
Quote Turnaround
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
var formData = {
|
||||
projectType: null,
|
||||
numSources: 5,
|
||||
volume: null,
|
||||
frequency: null,
|
||||
complexity: null
|
||||
};
|
||||
|
||||
// Option card selection
|
||||
document.querySelectorAll('.option-grid').forEach(function(grid) {
|
||||
grid.querySelectorAll('.option-card').forEach(function(card) {
|
||||
card.addEventListener('click', function() {
|
||||
var field = this.parentElement.dataset.field;
|
||||
this.parentElement.querySelectorAll('.option-card').forEach(function(c) {
|
||||
c.classList.remove('selected');
|
||||
});
|
||||
this.classList.add('selected');
|
||||
formData[field] = this.dataset.value;
|
||||
calculate();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
function updateSourcesLabel(value) {
|
||||
document.getElementById('sources-value').textContent = value + (value == 1 ? ' site' : ' sites');
|
||||
formData.numSources = parseInt(value);
|
||||
}
|
||||
|
||||
function calculate() {
|
||||
if (!formData.projectType || !formData.volume || !formData.frequency || !formData.complexity) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Base costs by project type
|
||||
var baseCosts = {
|
||||
pricing: 600,
|
||||
leads: 400,
|
||||
market: 500,
|
||||
property: 700,
|
||||
custom: 800
|
||||
};
|
||||
|
||||
// Volume multipliers
|
||||
var volumeMultipliers = {
|
||||
small: 1,
|
||||
medium: 1.5,
|
||||
large: 2.5,
|
||||
enterprise: 4
|
||||
};
|
||||
|
||||
// Complexity multipliers
|
||||
var complexityMultipliers = {
|
||||
simple: 1,
|
||||
moderate: 1.5,
|
||||
complex: 2.5,
|
||||
extreme: 4
|
||||
};
|
||||
|
||||
// Calculate base
|
||||
var base = baseCosts[formData.projectType];
|
||||
base *= volumeMultipliers[formData.volume];
|
||||
base *= complexityMultipliers[formData.complexity];
|
||||
base *= (1 + (formData.numSources - 1) * 0.3); // Each additional source adds 30%
|
||||
|
||||
// Round to nice numbers
|
||||
var low = Math.round(base / 100) * 100;
|
||||
var high = Math.round((base * 1.8) / 100) * 100;
|
||||
|
||||
// Monthly costs for recurring
|
||||
var monthlyLow = 0;
|
||||
var monthlyHigh = 0;
|
||||
if (formData.frequency !== 'once') {
|
||||
var freqMultipliers = { weekly: 0.15, daily: 0.25, realtime: 0.5 };
|
||||
monthlyLow = Math.round((low * freqMultipliers[formData.frequency]) / 50) * 50;
|
||||
monthlyHigh = Math.round((high * freqMultipliers[formData.frequency]) / 50) * 50;
|
||||
}
|
||||
|
||||
// Display results
|
||||
document.getElementById('results').style.display = 'block';
|
||||
document.getElementById('estimate-range').textContent = '£' + low.toLocaleString() + ' - £' + high.toLocaleString();
|
||||
|
||||
if (formData.frequency === 'once') {
|
||||
document.getElementById('estimate-note').textContent = 'One-time project cost';
|
||||
document.getElementById('monthly-note').style.display = 'none';
|
||||
} else {
|
||||
document.getElementById('estimate-note').textContent = 'Initial setup cost';
|
||||
document.getElementById('monthly-note').style.display = 'block';
|
||||
document.getElementById('monthly-cost').textContent = '£' + monthlyLow + ' - £' + monthlyHigh;
|
||||
}
|
||||
|
||||
// Track calculation
|
||||
if (typeof gtag !== 'undefined') {
|
||||
gtag('event', 'calculator_result', {
|
||||
event_category: 'Calculator',
|
||||
event_label: formData.projectType,
|
||||
value: low
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function showEmailCapture() {
|
||||
document.getElementById('email-capture').classList.add('show');
|
||||
trackCalculator('show_email_capture');
|
||||
}
|
||||
|
||||
function submitEmail() {
|
||||
var email = document.getElementById('calc-email').value;
|
||||
if (!email || !email.includes('@')) {
|
||||
alert('Please enter a valid email');
|
||||
return;
|
||||
}
|
||||
|
||||
var estimate = document.getElementById('estimate-range').textContent;
|
||||
|
||||
fetch('/api/lead-capture.php', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({
|
||||
email: email,
|
||||
source: 'calculator',
|
||||
estimate: estimate,
|
||||
formData: formData,
|
||||
page: window.location.pathname
|
||||
})
|
||||
});
|
||||
|
||||
document.getElementById('email-capture').innerHTML = '<h4>✅ Sent! Check your inbox.</h4><p>We have also sent our detailed pricing guide.</p>';
|
||||
trackCalculator('email_submitted');
|
||||
}
|
||||
|
||||
function trackCalculator(action) {
|
||||
if (typeof gtag !== 'undefined') {
|
||||
gtag('event', action, { event_category: 'Calculator' });
|
||||
}
|
||||
console.log('Calculator:', action);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- SoftwareApplication Schema - Added by Emma -->
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Web Scraping Cost Calculator",
|
||||
"description": "Free tool to estimate web scraping project costs for UK businesses",
|
||||
"url": "https://ukdataservices.co.uk/tools/cost-calculator",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"operatingSystem": "Web Browser",
|
||||
"offers": {
|
||||
"@type": "Offer",
|
||||
"price": "0",
|
||||
"priceCurrency": "GBP"
|
||||
},
|
||||
"provider": {
|
||||
"@type": "Organization",
|
||||
"name": "UK Data Services",
|
||||
"url": "https://ukdataservices.co.uk"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
316
tools/data-converter.php
Normal file
316
tools/data-converter.php
Normal file
@@ -0,0 +1,316 @@
|
||||
<?php
|
||||
$page_title = "Free Data Format Converter | JSON CSV XML | UK Data Services";
|
||||
$page_description = "Convert between JSON, CSV, and XML formats instantly. Free online tool for data transformation - no signup required.";
|
||||
$canonical_url = "https://ukdataservices.co.uk/tools/data-converter";
|
||||
?>
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title><?php echo htmlspecialchars($page_title); ?></title>
|
||||
<meta name="description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<meta property="og:title" content="<?php echo htmlspecialchars($page_title); ?>">
|
||||
<meta property="og:description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
|
||||
<link rel="stylesheet" href="../assets/css/main.css">
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Data Format Converter",
|
||||
"description": "Free tool to convert between JSON, CSV, and XML data formats",
|
||||
"url": "https://ukdataservices.co.uk/tools/data-converter",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"operatingSystem": "Web Browser",
|
||||
"offers": { "@type": "Offer", "price": "0", "priceCurrency": "GBP" }
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.converter-container { max-width: 1100px; margin: 0 auto; padding: 40px 20px; }
|
||||
.converter-header { text-align: center; margin-bottom: 40px; }
|
||||
.converter-header h1 { font-size: 2.2em; color: #1a1a2e; margin-bottom: 15px; }
|
||||
.converter-header p { color: #666; font-size: 1.1em; }
|
||||
.converter-card { background: #fff; border-radius: 12px; box-shadow: 0 4px 20px rgba(0,0,0,0.08); padding: 30px; }
|
||||
.format-selector { display: flex; justify-content: center; gap: 15px; margin-bottom: 25px; flex-wrap: wrap; align-items: center; }
|
||||
.format-btn { padding: 12px 24px; border: 2px solid #e0e0e0; border-radius: 8px; background: white; cursor: pointer; font-weight: 600; transition: all 0.2s; }
|
||||
.format-btn:hover { border-color: #179e83; }
|
||||
.format-btn.active { background: #179e83; color: white; border-color: #179e83; }
|
||||
.arrow { font-size: 1.5em; color: #888; }
|
||||
.editor-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
|
||||
@media (max-width: 768px) { .editor-grid { grid-template-columns: 1fr; } }
|
||||
.editor-box { display: flex; flex-direction: column; }
|
||||
.editor-box label { font-weight: 600; color: #1a1a2e; margin-bottom: 10px; display: flex; justify-content: space-between; align-items: center; }
|
||||
.editor-box textarea { flex: 1; min-height: 350px; padding: 15px; border: 2px solid #e0e0e0; border-radius: 8px; font-family: 'Monaco', 'Menlo', monospace; font-size: 0.9em; resize: vertical; }
|
||||
.editor-box textarea:focus { border-color: #179e83; outline: none; }
|
||||
.btn-row { display: flex; justify-content: center; gap: 15px; margin: 25px 0; flex-wrap: wrap; }
|
||||
.btn { padding: 14px 28px; border: none; border-radius: 8px; font-weight: 600; cursor: pointer; transition: all 0.2s; }
|
||||
.btn-primary { background: #179e83; color: white; }
|
||||
.btn-primary:hover { background: #148a72; }
|
||||
.btn-secondary { background: #f5f5f5; color: #333; border: 2px solid #e0e0e0; }
|
||||
.btn-secondary:hover { background: #e8e8e8; }
|
||||
.copy-btn { padding: 6px 12px; font-size: 0.85em; background: #f0f0f0; border: none; border-radius: 4px; cursor: pointer; }
|
||||
.copy-btn:hover { background: #e0e0e0; }
|
||||
.error-msg { background: #ffebee; color: #c62828; padding: 12px; border-radius: 6px; margin-top: 15px; display: none; }
|
||||
.success-msg { background: #e8f5e9; color: #2e7d32; padding: 12px; border-radius: 6px; margin-top: 15px; display: none; }
|
||||
.breadcrumb { padding: 15px 20px; background: #f5f5f5; font-size: 0.9em; }
|
||||
.breadcrumb a { color: #144784; text-decoration: none; }
|
||||
.breadcrumb span { color: #888; margin: 0 8px; }
|
||||
.sample-data { font-size: 0.85em; color: #666; margin-top: 8px; }
|
||||
.sample-data a { color: #179e83; cursor: pointer; text-decoration: underline; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<?php include '../includes/navbar.php'; ?>
|
||||
|
||||
<nav class="breadcrumb">
|
||||
<a href="/">Home</a> <span>›</span> <a href="/tools/">Tools</a> <span>›</span> Data Converter
|
||||
</nav>
|
||||
|
||||
<div class="converter-container">
|
||||
<div class="converter-header">
|
||||
<h1>🔄 Data Format Converter</h1>
|
||||
<p>Convert between JSON, CSV, and XML formats instantly. Your data stays in your browser.</p>
|
||||
</div>
|
||||
|
||||
<div class="converter-card">
|
||||
<div class="format-selector">
|
||||
<div>
|
||||
<strong>From:</strong>
|
||||
<button class="format-btn active" data-format="json" onclick="setInputFormat('json')">JSON</button>
|
||||
<button class="format-btn" data-format="csv" onclick="setInputFormat('csv')">CSV</button>
|
||||
<button class="format-btn" data-format="xml" onclick="setInputFormat('xml')">XML</button>
|
||||
</div>
|
||||
<span class="arrow">→</span>
|
||||
<div>
|
||||
<strong>To:</strong>
|
||||
<button class="format-btn" data-output="json" onclick="setOutputFormat('json')">JSON</button>
|
||||
<button class="format-btn active" data-output="csv" onclick="setOutputFormat('csv')">CSV</button>
|
||||
<button class="format-btn" data-output="xml" onclick="setOutputFormat('xml')">XML</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="editor-grid">
|
||||
<div class="editor-box">
|
||||
<label>
|
||||
<span>📥 Input (<span id="inputFormatLabel">JSON</span>)</span>
|
||||
<button class="copy-btn" onclick="clearInput()">Clear</button>
|
||||
</label>
|
||||
<textarea id="inputData" placeholder="Paste your data here..."></textarea>
|
||||
<div class="sample-data">
|
||||
Try sample: <a onclick="loadSample()">Load example data</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="editor-box">
|
||||
<label>
|
||||
<span>📤 Output (<span id="outputFormatLabel">CSV</span>)</span>
|
||||
<button class="copy-btn" onclick="copyOutput()">Copy</button>
|
||||
</label>
|
||||
<textarea id="outputData" readonly placeholder="Converted data will appear here..."></textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="btn-row">
|
||||
<button class="btn btn-primary" onclick="convert()">🔄 Convert</button>
|
||||
<button class="btn btn-secondary" onclick="downloadOutput()">⬇️ Download</button>
|
||||
</div>
|
||||
|
||||
<div id="errorMsg" class="error-msg"></div>
|
||||
<div id="successMsg" class="success-msg"></div>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 40px; padding: 30px; background: #f8f9fa; border-radius: 12px;">
|
||||
<h3 style="color: #1a1a2e; margin-bottom: 15px;">💡 About This Tool</h3>
|
||||
<p style="color: #666; line-height: 1.7;">
|
||||
This free converter handles common data transformations needed when working with web scraped data:
|
||||
</p>
|
||||
<ul style="color: #666; margin-top: 15px; padding-left: 20px; line-height: 1.8;">
|
||||
<li><strong>JSON → CSV</strong> — Perfect for opening scraped data in Excel or Google Sheets</li>
|
||||
<li><strong>CSV → JSON</strong> — Convert spreadsheet data to API-friendly format</li>
|
||||
<li><strong>XML → JSON/CSV</strong> — Transform legacy XML feeds into modern formats</li>
|
||||
</ul>
|
||||
<p style="color: #666; margin-top: 15px;">
|
||||
<strong>Privacy:</strong> All conversions happen in your browser. Your data never leaves your device.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<?php include '../includes/footer.php'; ?>
|
||||
|
||||
<script>
|
||||
let inputFormat = 'json';
|
||||
let outputFormat = 'csv';
|
||||
|
||||
function setInputFormat(format) {
|
||||
inputFormat = format;
|
||||
document.querySelectorAll('[data-format]').forEach(b => b.classList.remove('active'));
|
||||
document.querySelector(`[data-format="${format}"]`).classList.add('active');
|
||||
document.getElementById('inputFormatLabel').textContent = format.toUpperCase();
|
||||
}
|
||||
|
||||
function setOutputFormat(format) {
|
||||
outputFormat = format;
|
||||
document.querySelectorAll('[data-output]').forEach(b => b.classList.remove('active'));
|
||||
document.querySelector(`[data-output="${format}"]`).classList.add('active');
|
||||
document.getElementById('outputFormatLabel').textContent = format.toUpperCase();
|
||||
}
|
||||
|
||||
function loadSample() {
|
||||
const samples = {
|
||||
json: `[
|
||||
{"name": "Product A", "price": 29.99, "category": "Electronics"},
|
||||
{"name": "Product B", "price": 49.99, "category": "Home"},
|
||||
{"name": "Product C", "price": 19.99, "category": "Electronics"}
|
||||
]`,
|
||||
csv: `name,price,category
|
||||
Product A,29.99,Electronics
|
||||
Product B,49.99,Home
|
||||
Product C,19.99,Electronics`,
|
||||
xml: `<?xml version="1.0"?>
|
||||
<products>
|
||||
<product><name>Product A</name><price>29.99</price><category>Electronics</category></product>
|
||||
<product><name>Product B</name><price>49.99</price><category>Home</category></product>
|
||||
<product><name>Product C</name><price>19.99</price><category>Electronics</category></product>
|
||||
</products>`
|
||||
};
|
||||
document.getElementById('inputData').value = samples[inputFormat];
|
||||
}
|
||||
|
||||
function clearInput() {
|
||||
document.getElementById('inputData').value = '';
|
||||
document.getElementById('outputData').value = '';
|
||||
hideMessages();
|
||||
}
|
||||
|
||||
function hideMessages() {
|
||||
document.getElementById('errorMsg').style.display = 'none';
|
||||
document.getElementById('successMsg').style.display = 'none';
|
||||
}
|
||||
|
||||
function showError(msg) {
|
||||
hideMessages();
|
||||
document.getElementById('errorMsg').textContent = '❌ ' + msg;
|
||||
document.getElementById('errorMsg').style.display = 'block';
|
||||
}
|
||||
|
||||
function showSuccess(msg) {
|
||||
hideMessages();
|
||||
document.getElementById('successMsg').textContent = '✅ ' + msg;
|
||||
document.getElementById('successMsg').style.display = 'block';
|
||||
}
|
||||
|
||||
function convert() {
|
||||
const input = document.getElementById('inputData').value.trim();
|
||||
if (!input) { showError('Please enter some data to convert'); return; }
|
||||
|
||||
try {
|
||||
let data;
|
||||
// Parse input
|
||||
if (inputFormat === 'json') {
|
||||
data = JSON.parse(input);
|
||||
if (!Array.isArray(data)) data = [data];
|
||||
} else if (inputFormat === 'csv') {
|
||||
data = csvToArray(input);
|
||||
} else if (inputFormat === 'xml') {
|
||||
data = xmlToArray(input);
|
||||
}
|
||||
|
||||
// Convert to output
|
||||
let output;
|
||||
if (outputFormat === 'json') {
|
||||
output = JSON.stringify(data, null, 2);
|
||||
} else if (outputFormat === 'csv') {
|
||||
output = arrayToCsv(data);
|
||||
} else if (outputFormat === 'xml') {
|
||||
output = arrayToXml(data);
|
||||
}
|
||||
|
||||
document.getElementById('outputData').value = output;
|
||||
showSuccess(`Converted ${data.length} records from ${inputFormat.toUpperCase()} to ${outputFormat.toUpperCase()}`);
|
||||
} catch (e) {
|
||||
showError('Conversion failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function csvToArray(csv) {
|
||||
const lines = csv.split('\n').filter(l => l.trim());
|
||||
const headers = lines[0].split(',').map(h => h.trim());
|
||||
return lines.slice(1).map(line => {
|
||||
const values = line.split(',');
|
||||
const obj = {};
|
||||
headers.forEach((h, i) => obj[h] = values[i]?.trim() || '');
|
||||
return obj;
|
||||
});
|
||||
}
|
||||
|
||||
function arrayToCsv(arr) {
|
||||
if (!arr.length) return '';
|
||||
const headers = Object.keys(arr[0]);
|
||||
const rows = arr.map(obj => headers.map(h => {
|
||||
let val = obj[h] || '';
|
||||
if (val.toString().includes(',')) val = `"${val}"`;
|
||||
return val;
|
||||
}).join(','));
|
||||
return [headers.join(','), ...rows].join('\n');
|
||||
}
|
||||
|
||||
function xmlToArray(xml) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(xml, 'text/xml');
|
||||
const items = doc.querySelectorAll(doc.documentElement.tagName + ' > *');
|
||||
return Array.from(items).map(item => {
|
||||
const obj = {};
|
||||
Array.from(item.children).forEach(child => {
|
||||
obj[child.tagName] = child.textContent;
|
||||
});
|
||||
return obj;
|
||||
});
|
||||
}
|
||||
|
||||
function arrayToXml(arr) {
|
||||
if (!arr.length) return '<?xml version="1.0"?>\n<data></data>';
|
||||
let xml = '<?xml version="1.0"?>\n<data>\n';
|
||||
arr.forEach(obj => {
|
||||
xml += ' <item>\n';
|
||||
Object.entries(obj).forEach(([k, v]) => {
|
||||
xml += ` <${k}>${escapeXml(v)}</${k}>\n`;
|
||||
});
|
||||
xml += ' </item>\n';
|
||||
});
|
||||
xml += '</data>';
|
||||
return xml;
|
||||
}
|
||||
|
||||
function escapeXml(str) {
|
||||
return String(str).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
}
|
||||
|
||||
function copyOutput() {
|
||||
const output = document.getElementById('outputData');
|
||||
output.select();
|
||||
document.execCommand('copy');
|
||||
showSuccess('Copied to clipboard!');
|
||||
}
|
||||
|
||||
function downloadOutput() {
|
||||
const output = document.getElementById('outputData').value;
|
||||
if (!output) { showError('Nothing to download'); return; }
|
||||
|
||||
const ext = outputFormat === 'json' ? 'json' : outputFormat === 'csv' ? 'csv' : 'xml';
|
||||
const mime = outputFormat === 'json' ? 'application/json' : outputFormat === 'csv' ? 'text/csv' : 'text/xml';
|
||||
|
||||
const blob = new Blob([output], { type: mime });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = `converted-data.${ext}`;
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
222
tools/index.php
Normal file
222
tools/index.php
Normal file
@@ -0,0 +1,222 @@
|
||||
<?php
|
||||
$page_title = "Free Web Scraping & Data Tools | UK Data Services";
|
||||
$page_description = "Free tools to help UK businesses with web scraping: cost calculator, scrapeability checker, robots.txt analyzer, and data format converter.";
|
||||
$canonical_url = "https://ukdataservices.co.uk/tools/";
|
||||
?>
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title><?php echo htmlspecialchars($page_title); ?></title>
|
||||
<meta name="description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<meta property="og:title" content="<?php echo htmlspecialchars($page_title); ?>">
|
||||
<meta property="og:description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:url" content="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<link rel="stylesheet" href="../assets/css/main.css">
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "BreadcrumbList",
|
||||
"itemListElement": [
|
||||
{"@type": "ListItem", "position": 1, "name": "Home", "item": "https://ukdataservices.co.uk"},
|
||||
{"@type": "ListItem", "position": 2, "name": "Free Tools", "item": "https://ukdataservices.co.uk/tools/"}
|
||||
]
|
||||
}
|
||||
</script>
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "ItemList",
|
||||
"name": "Free Web Scraping Tools",
|
||||
"description": "Free tools for planning and executing web scraping projects",
|
||||
"numberOfItems": 4,
|
||||
"itemListElement": [
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 1,
|
||||
"item": {
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Web Scraping Cost Calculator",
|
||||
"description": "Estimate your web scraping project cost instantly",
|
||||
"url": "https://ukdataservices.co.uk/tools/cost-calculator",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"offers": {"@type": "Offer", "price": "0", "priceCurrency": "GBP"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 2,
|
||||
"item": {
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Website Scrapeability Checker",
|
||||
"description": "Check if a website can be scraped and assess complexity",
|
||||
"url": "https://ukdataservices.co.uk/tools/scrapeability-checker",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"offers": {"@type": "Offer", "price": "0", "priceCurrency": "GBP"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 3,
|
||||
"item": {
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Robots.txt Analyzer",
|
||||
"description": "Analyze robots.txt files for crawling permissions",
|
||||
"url": "https://ukdataservices.co.uk/tools/robots-analyzer",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"offers": {"@type": "Offer", "price": "0", "priceCurrency": "GBP"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"@type": "ListItem",
|
||||
"position": 4,
|
||||
"item": {
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Data Format Converter",
|
||||
"description": "Convert between JSON, CSV, and XML formats",
|
||||
"url": "https://ukdataservices.co.uk/tools/data-converter",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"offers": {"@type": "Offer", "price": "0", "priceCurrency": "GBP"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.tools-hero {
|
||||
background: linear-gradient(135deg, #144784 0%, #179e83 100%);
|
||||
color: white;
|
||||
padding: 80px 20px;
|
||||
text-align: center;
|
||||
}
|
||||
.tools-hero h1 { font-size: 2.5em; margin-bottom: 15px; }
|
||||
.tools-hero p { font-size: 1.2em; opacity: 0.95; max-width: 600px; margin: 0 auto; }
|
||||
.tools-container { max-width: 1100px; margin: 0 auto; padding: 60px 20px; }
|
||||
.tools-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 25px; }
|
||||
.tool-card {
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
|
||||
padding: 30px;
|
||||
transition: transform 0.3s, box-shadow 0.3s;
|
||||
position: relative;
|
||||
}
|
||||
.tool-card:hover { transform: translateY(-5px); box-shadow: 0 8px 30px rgba(0,0,0,0.12); }
|
||||
.tool-icon { font-size: 2.5em; margin-bottom: 15px; }
|
||||
.tool-card h2 { font-size: 1.3em; color: #1a1a2e; margin-bottom: 10px; }
|
||||
.tool-card p { color: #666; margin-bottom: 20px; line-height: 1.6; font-size: 0.95em; }
|
||||
.tool-card .btn {
|
||||
display: inline-block;
|
||||
background: #179e83;
|
||||
color: white;
|
||||
padding: 12px 24px;
|
||||
border-radius: 6px;
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
transition: background 0.3s;
|
||||
}
|
||||
.tool-card .btn:hover { background: #148a72; }
|
||||
.tool-badge {
|
||||
position: absolute;
|
||||
top: 15px;
|
||||
right: 15px;
|
||||
background: #e8f5e9;
|
||||
color: #2e7d32;
|
||||
padding: 4px 10px;
|
||||
border-radius: 12px;
|
||||
font-size: 0.75em;
|
||||
font-weight: 600;
|
||||
}
|
||||
.tool-badge.new { background: #e3f2fd; color: #1565c0; }
|
||||
.tool-badge.popular { background: #fff3e0; color: #ef6c00; }
|
||||
.breadcrumb { padding: 15px 20px; background: #f5f5f5; font-size: 0.9em; }
|
||||
.breadcrumb a { color: #144784; text-decoration: none; }
|
||||
.breadcrumb span { color: #888; margin: 0 8px; }
|
||||
.cta-section {
|
||||
text-align: center;
|
||||
margin-top: 60px;
|
||||
padding: 50px 30px;
|
||||
background: #f8f9fa;
|
||||
border-radius: 12px;
|
||||
}
|
||||
.cta-section h3 { color: #1a1a2e; margin-bottom: 15px; font-size: 1.5em; }
|
||||
.cta-section p { color: #666; margin-bottom: 25px; max-width: 500px; margin-left: auto; margin-right: auto; }
|
||||
.cta-section .btn { background: #144784; padding: 14px 32px; }
|
||||
.cta-section .btn:hover { background: #0d3a6e; }
|
||||
.blog-link {
|
||||
display: inline-block;
|
||||
margin-top: 30px;
|
||||
color: #179e83;
|
||||
text-decoration: none;
|
||||
font-weight: 500;
|
||||
}
|
||||
.blog-link:hover { text-decoration: underline; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<?php include '../includes/navbar.php'; ?>
|
||||
|
||||
<nav class="breadcrumb">
|
||||
<a href="/">Home</a> <span>›</span> Free Tools
|
||||
</nav>
|
||||
|
||||
<section class="tools-hero">
|
||||
<h1>🛠️ Free Web Scraping Tools</h1>
|
||||
<p>Plan your data extraction project with our free calculators and assessment tools. No signup required — your data stays in your browser.</p>
|
||||
</section>
|
||||
|
||||
<div class="tools-container">
|
||||
<div class="tools-grid">
|
||||
<div class="tool-card">
|
||||
<span class="tool-badge popular">Most Popular</span>
|
||||
<div class="tool-icon">💰</div>
|
||||
<h2>Web Scraping Cost Calculator</h2>
|
||||
<p>Get an instant estimate for your web scraping project. Transparent pricing based on data volume, complexity, and delivery format.</p>
|
||||
<a href="/tools/cost-calculator" class="btn">Calculate Cost →</a>
|
||||
</div>
|
||||
|
||||
<div class="tool-card">
|
||||
<span class="tool-badge new">New</span>
|
||||
<div class="tool-icon">🔍</div>
|
||||
<h2>Scrapeability Checker</h2>
|
||||
<p>Check if a website can be scraped and assess technical complexity. Get insights on JavaScript, rate limits, and recommended approaches.</p>
|
||||
<a href="/tools/scrapeability-checker" class="btn">Check Website →</a>
|
||||
</div>
|
||||
|
||||
<div class="tool-card">
|
||||
<span class="tool-badge new">New</span>
|
||||
<div class="tool-icon">🤖</div>
|
||||
<h2>Robots.txt Analyzer</h2>
|
||||
<p>Analyze any website's robots.txt to understand crawling rules. See blocked paths, allowed paths, sitemaps, and crawl delays.</p>
|
||||
<a href="/tools/robots-analyzer" class="btn">Analyze →</a>
|
||||
</div>
|
||||
|
||||
<div class="tool-card">
|
||||
<span class="tool-badge new">New</span>
|
||||
<div class="tool-icon">🔄</div>
|
||||
<h2>Data Format Converter</h2>
|
||||
<p>Convert between JSON, CSV, and XML formats instantly. Perfect for transforming scraped data into the format your systems need.</p>
|
||||
<a href="/tools/data-converter" class="btn">Convert Data →</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="cta-section">
|
||||
<h3>Need a Custom Solution?</h3>
|
||||
<p>Our tools help you plan, but every project is unique. Get a detailed quote from our expert team — we've delivered 500+ scraping projects across the UK.</p>
|
||||
<a href="/quote" class="btn">Request Free Quote →</a>
|
||||
<br>
|
||||
<a href="/blog/articles/free-web-scraping-tools-launch" class="blog-link">📝 Read the announcement →</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<?php include '../includes/footer.php'; ?>
|
||||
</body>
|
||||
</html>
|
||||
260
tools/robots-analyzer.php
Normal file
260
tools/robots-analyzer.php
Normal file
@@ -0,0 +1,260 @@
|
||||
<?php
|
||||
$page_title = "Free Robots.txt Analyzer | UK Data Services";
|
||||
$page_description = "Analyze any website's robots.txt file instantly. See crawling rules, blocked paths, sitemaps, and get recommendations for web scraping compliance.";
|
||||
$canonical_url = "https://ukdataservices.co.uk/tools/robots-analyzer";
|
||||
?>
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title><?php echo htmlspecialchars($page_title); ?></title>
|
||||
<meta name="description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<meta property="og:title" content="<?php echo htmlspecialchars($page_title); ?>">
|
||||
<meta property="og:description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<meta property="og:type" content="website">
|
||||
|
||||
<link rel="stylesheet" href="../assets/css/main.css">
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Robots.txt Analyzer",
|
||||
"description": "Free tool to analyze robots.txt files and understand crawling permissions",
|
||||
"url": "https://ukdataservices.co.uk/tools/robots-analyzer",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"operatingSystem": "Web Browser",
|
||||
"offers": { "@type": "Offer", "price": "0", "priceCurrency": "GBP" }
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.analyzer-container { max-width: 900px; margin: 0 auto; padding: 40px 20px; }
|
||||
.analyzer-header { text-align: center; margin-bottom: 40px; }
|
||||
.analyzer-header h1 { font-size: 2.2em; color: #1a1a2e; margin-bottom: 15px; }
|
||||
.analyzer-header p { color: #666; font-size: 1.1em; }
|
||||
.analyzer-card { background: #fff; border-radius: 12px; box-shadow: 0 4px 20px rgba(0,0,0,0.08); padding: 40px; }
|
||||
.url-input-group { display: flex; gap: 12px; margin-bottom: 30px; }
|
||||
.url-input-group input { flex: 1; padding: 16px; border: 2px solid #e0e0e0; border-radius: 8px; font-size: 1em; }
|
||||
.url-input-group input:focus { border-color: #179e83; outline: none; }
|
||||
.url-input-group button { background: #179e83; color: white; border: none; padding: 16px 32px; border-radius: 8px; font-weight: 600; cursor: pointer; }
|
||||
.url-input-group button:hover { background: #148a72; }
|
||||
.url-input-group button:disabled { background: #ccc; cursor: not-allowed; }
|
||||
.results-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
|
||||
@media (max-width: 768px) { .results-grid { grid-template-columns: 1fr; } }
|
||||
.result-box { background: #f8f9fa; border-radius: 8px; padding: 20px; }
|
||||
.result-box h3 { color: #1a1a2e; margin-bottom: 15px; font-size: 1.1em; display: flex; align-items: center; gap: 8px; }
|
||||
.result-box pre { background: #1a1a2e; color: #a5d6a7; padding: 15px; border-radius: 6px; overflow-x: auto; font-size: 0.85em; max-height: 300px; }
|
||||
.stat-badge { display: inline-block; padding: 6px 12px; border-radius: 15px; font-size: 0.9em; font-weight: 600; margin: 4px; }
|
||||
.badge-green { background: #e8f5e9; color: #2e7d32; }
|
||||
.badge-yellow { background: #fff3e0; color: #ef6c00; }
|
||||
.badge-red { background: #ffebee; color: #c62828; }
|
||||
.badge-blue { background: #e3f2fd; color: #1565c0; }
|
||||
.loading { text-align: center; padding: 40px; display: none; }
|
||||
.spinner { width: 40px; height: 40px; border: 4px solid #e0e0e0; border-top-color: #179e83; border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto 15px; }
|
||||
@keyframes spin { to { transform: rotate(360deg); } }
|
||||
#results { display: none; }
|
||||
.breadcrumb { padding: 15px 20px; background: #f5f5f5; font-size: 0.9em; }
|
||||
.breadcrumb a { color: #144784; text-decoration: none; }
|
||||
.breadcrumb span { color: #888; margin: 0 8px; }
|
||||
.path-list { list-style: none; padding: 0; margin: 0; max-height: 200px; overflow-y: auto; }
|
||||
.path-list li { padding: 8px 12px; border-bottom: 1px solid #e0e0e0; font-family: monospace; font-size: 0.9em; }
|
||||
.path-list li:last-child { border-bottom: none; }
|
||||
.cta-box { text-align: center; padding: 30px; background: linear-gradient(135deg, #144784 0%, #179e83 100%); border-radius: 8px; color: white; margin-top: 30px; }
|
||||
.cta-box a { display: inline-block; background: white; color: #144784; padding: 14px 28px; border-radius: 6px; text-decoration: none; font-weight: 600; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<?php include '../includes/navbar.php'; ?>
|
||||
|
||||
<nav class="breadcrumb">
|
||||
<a href="/">Home</a> <span>›</span> <a href="/tools/">Tools</a> <span>›</span> Robots.txt Analyzer
|
||||
</nav>
|
||||
|
||||
<div class="analyzer-container">
|
||||
<div class="analyzer-header">
|
||||
<h1>🤖 Robots.txt Analyzer</h1>
|
||||
<p>Analyze any website's robots.txt to understand crawling rules and scraping permissions.</p>
|
||||
</div>
|
||||
|
||||
<div class="analyzer-card">
|
||||
<div class="url-input-group">
|
||||
<input type="url" id="urlInput" placeholder="https://example.com" required>
|
||||
<button onclick="analyzeRobots()" id="analyzeBtn">Analyze</button>
|
||||
</div>
|
||||
|
||||
<div id="loading" class="loading">
|
||||
<div class="spinner"></div>
|
||||
<p>Fetching and analyzing robots.txt...</p>
|
||||
</div>
|
||||
|
||||
<div id="results">
|
||||
<div style="margin-bottom: 25px;">
|
||||
<h3 style="color: #1a1a2e; margin-bottom: 15px;">📊 Quick Summary</h3>
|
||||
<div id="summaryBadges"></div>
|
||||
</div>
|
||||
|
||||
<div class="results-grid">
|
||||
<div class="result-box">
|
||||
<h3>🚫 Blocked Paths</h3>
|
||||
<ul class="path-list" id="blockedPaths"></ul>
|
||||
</div>
|
||||
<div class="result-box">
|
||||
<h3>✅ Allowed Paths</h3>
|
||||
<ul class="path-list" id="allowedPaths"></ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="result-box" style="margin-top: 20px;">
|
||||
<h3>🗺️ Sitemaps Found</h3>
|
||||
<ul class="path-list" id="sitemaps"></ul>
|
||||
</div>
|
||||
|
||||
<div class="result-box" style="margin-top: 20px;">
|
||||
<h3>📄 Raw robots.txt</h3>
|
||||
<pre id="rawContent"></pre>
|
||||
</div>
|
||||
|
||||
<div class="cta-box">
|
||||
<h3>Need Help With Compliant Scraping?</h3>
|
||||
<p style="opacity: 0.9; margin: 10px 0 20px;">We build scrapers that respect robots.txt and follow best practices.</p>
|
||||
<a href="/quote">Get a Free Quote →</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<?php include '../includes/footer.php'; ?>
|
||||
|
||||
<script>
|
||||
async function analyzeRobots() {
|
||||
const urlInput = document.getElementById('urlInput').value.trim();
|
||||
if (!urlInput) { alert('Please enter a URL'); return; }
|
||||
|
||||
let baseUrl;
|
||||
try { baseUrl = new URL(urlInput); }
|
||||
catch { alert('Please enter a valid URL'); return; }
|
||||
|
||||
document.getElementById('analyzeBtn').disabled = true;
|
||||
document.getElementById('loading').style.display = 'block';
|
||||
document.getElementById('results').style.display = 'none';
|
||||
|
||||
const robotsUrl = `${baseUrl.protocol}//${baseUrl.hostname}/robots.txt`;
|
||||
|
||||
try {
|
||||
// Use a CORS proxy or backend in production
|
||||
const response = await fetch(`/api/fetch-robots.php?url=${encodeURIComponent(robotsUrl)}`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
displayError(data.error);
|
||||
} else {
|
||||
displayResults(data.content, baseUrl.hostname);
|
||||
}
|
||||
} catch (err) {
|
||||
// Fallback: simulate analysis
|
||||
simulateAnalysis(baseUrl.hostname);
|
||||
}
|
||||
|
||||
document.getElementById('analyzeBtn').disabled = false;
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('results').style.display = 'block';
|
||||
}
|
||||
|
||||
function simulateAnalysis(hostname) {
|
||||
// Simulated robots.txt for demo
|
||||
const sampleRobots = `User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private/
|
||||
Disallow: /api/internal/
|
||||
Allow: /api/public/
|
||||
Allow: /
|
||||
|
||||
Sitemap: https://${hostname}/sitemap.xml
|
||||
Sitemap: https://${hostname}/sitemap-blog.xml
|
||||
|
||||
# Crawl-delay: 1`;
|
||||
|
||||
displayResults(sampleRobots, hostname);
|
||||
}
|
||||
|
||||
function displayResults(content, hostname) {
|
||||
const lines = content.split('\n');
|
||||
const blocked = [], allowed = [], sitemaps = [];
|
||||
let crawlDelay = null;
|
||||
|
||||
lines.forEach(line => {
|
||||
const lower = line.toLowerCase().trim();
|
||||
if (lower.startsWith('disallow:')) {
|
||||
const path = line.split(':').slice(1).join(':').trim();
|
||||
if (path) blocked.push(path);
|
||||
} else if (lower.startsWith('allow:')) {
|
||||
const path = line.split(':').slice(1).join(':').trim();
|
||||
if (path) allowed.push(path);
|
||||
} else if (lower.startsWith('sitemap:')) {
|
||||
sitemaps.push(line.split(':').slice(1).join(':').trim());
|
||||
} else if (lower.startsWith('crawl-delay:')) {
|
||||
crawlDelay = line.split(':')[1].trim();
|
||||
}
|
||||
});
|
||||
|
||||
// Summary badges
|
||||
let badges = '';
|
||||
badges += `<span class="stat-badge badge-blue">${blocked.length} blocked paths</span>`;
|
||||
badges += `<span class="stat-badge badge-green">${allowed.length} allowed paths</span>`;
|
||||
badges += `<span class="stat-badge badge-blue">${sitemaps.length} sitemaps</span>`;
|
||||
if (crawlDelay) badges += `<span class="stat-badge badge-yellow">Crawl delay: ${crawlDelay}s</span>`;
|
||||
if (blocked.length === 0) badges += `<span class="stat-badge badge-green">Open to crawling</span>`;
|
||||
if (blocked.length > 10) badges += `<span class="stat-badge badge-yellow">Many restrictions</span>`;
|
||||
document.getElementById('summaryBadges').innerHTML = badges;
|
||||
|
||||
// Blocked paths
|
||||
document.getElementById('blockedPaths').innerHTML = blocked.length
|
||||
? blocked.map(p => `<li>${escapeHtml(p)}</li>`).join('')
|
||||
: '<li style="color:#888">No blocked paths</li>';
|
||||
|
||||
// Allowed paths
|
||||
document.getElementById('allowedPaths').innerHTML = allowed.length
|
||||
? allowed.map(p => `<li>${escapeHtml(p)}</li>`).join('')
|
||||
: '<li style="color:#888">No explicit allows (default: all allowed)</li>';
|
||||
|
||||
// Sitemaps
|
||||
document.getElementById('sitemaps').innerHTML = sitemaps.length
|
||||
? sitemaps.map(s => { const isValid = /^https?:///i.test(s); return isValid ? `<li><a href="${escapeHtml(s)}" target="_blank" rel="noopener">${escapeHtml(s)}</a></li>` : `<li>${escapeHtml(s)} <span style="color:#c62828">(invalid URL)</span></li>`; }).join('')
|
||||
: '<li style="color:#888">No sitemaps declared</li>';
|
||||
|
||||
// Raw content
|
||||
document.getElementById('rawContent').textContent = content;
|
||||
}
|
||||
|
||||
|
||||
function escapeHtml(text) {
|
||||
|
||||
const div = document.createElement('div');
|
||||
|
||||
div.textContent = text;
|
||||
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
function displayError(message) {
|
||||
document.getElementById("loading").style.display = "none";
|
||||
document.getElementById("results").style.display = "block";
|
||||
document.getElementById("summaryBadges").innerHTML = "<span class=\"stat-badge badge-red\">Error</span>";
|
||||
document.getElementById("blockedPaths").innerHTML = "<li style=\"color:#c62828\">" + escapeHtml(message) + "</li>";
|
||||
document.getElementById("allowedPaths").innerHTML = "";
|
||||
document.getElementById("sitemaps").innerHTML = "";
|
||||
document.getElementById("rawContent").textContent = "Error: " + message;
|
||||
|
||||
}
|
||||
|
||||
|
||||
document.getElementById('urlInput').addEventListener('keypress', e => {
|
||||
if (e.key === 'Enter') analyzeRobots();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
392
tools/scrapeability-checker.php
Normal file
392
tools/scrapeability-checker.php
Normal file
@@ -0,0 +1,392 @@
|
||||
<?php
|
||||
$page_title = "Free Website Scrapeability Checker | UK Data Services";
|
||||
$page_description = "Check if a website can be scraped. Our free tool analyzes technical complexity, JavaScript requirements, and provides expert recommendations for data extraction.";
|
||||
$canonical_url = "https://ukdataservices.co.uk/tools/scrapeability-checker";
|
||||
?>
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title><?php echo htmlspecialchars($page_title); ?></title>
|
||||
<meta name="description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<meta property="og:title" content="<?php echo htmlspecialchars($page_title); ?>">
|
||||
<meta property="og:description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:url" content="<?php echo htmlspecialchars($canonical_url); ?>">
|
||||
|
||||
<link rel="stylesheet" href="../assets/css/main.css">
|
||||
|
||||
<!-- SoftwareApplication Schema -->
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "SoftwareApplication",
|
||||
"name": "Website Scrapeability Checker",
|
||||
"description": "Free tool to check if a website can be scraped and assess technical complexity",
|
||||
"url": "https://ukdataservices.co.uk/tools/scrapeability-checker",
|
||||
"applicationCategory": "BusinessApplication",
|
||||
"operatingSystem": "Web Browser",
|
||||
"offers": {
|
||||
"@type": "Offer",
|
||||
"price": "0",
|
||||
"priceCurrency": "GBP"
|
||||
},
|
||||
"provider": {
|
||||
"@type": "Organization",
|
||||
"name": "UK Data Services",
|
||||
"url": "https://ukdataservices.co.uk"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.checker-container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 40px 20px;
|
||||
}
|
||||
.checker-header {
|
||||
text-align: center;
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
.checker-header h1 {
|
||||
font-size: 2.2em;
|
||||
color: #1a1a2e;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.checker-header p {
|
||||
color: #666;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
.checker-card {
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
|
||||
padding: 40px;
|
||||
}
|
||||
.url-input-group {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.url-input-group input {
|
||||
flex: 1;
|
||||
padding: 16px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
font-size: 1em;
|
||||
}
|
||||
.url-input-group input:focus {
|
||||
border-color: #179e83;
|
||||
outline: none;
|
||||
}
|
||||
.url-input-group button {
|
||||
background: #179e83;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 16px 32px;
|
||||
border-radius: 8px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: background 0.3s;
|
||||
}
|
||||
.url-input-group button:hover {
|
||||
background: #148a72;
|
||||
}
|
||||
.url-input-group button:disabled {
|
||||
background: #ccc;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
#results {
|
||||
display: none;
|
||||
}
|
||||
.result-section {
|
||||
padding: 25px;
|
||||
background: #f8f9fa;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.result-section h3 {
|
||||
color: #1a1a2e;
|
||||
margin-bottom: 15px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
.score-badge {
|
||||
display: inline-block;
|
||||
padding: 8px 16px;
|
||||
border-radius: 20px;
|
||||
font-weight: 700;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
.score-easy { background: #e8f5e9; color: #2e7d32; }
|
||||
.score-medium { background: #fff3e0; color: #ef6c00; }
|
||||
.score-hard { background: #ffebee; color: #c62828; }
|
||||
.factor-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
}
|
||||
.factor-list li {
|
||||
padding: 10px 0;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
.factor-list li:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
.factor-status {
|
||||
padding: 4px 12px;
|
||||
border-radius: 12px;
|
||||
font-size: 0.85em;
|
||||
font-weight: 600;
|
||||
}
|
||||
.status-good { background: #e8f5e9; color: #2e7d32; }
|
||||
.status-warn { background: #fff3e0; color: #ef6c00; }
|
||||
.status-bad { background: #ffebee; color: #c62828; }
|
||||
.cta-section {
|
||||
text-align: center;
|
||||
padding: 30px;
|
||||
background: linear-gradient(135deg, #144784 0%, #179e83 100%);
|
||||
border-radius: 8px;
|
||||
color: white;
|
||||
}
|
||||
.cta-section h3 {
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.cta-section p {
|
||||
opacity: 0.9;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.cta-section a {
|
||||
display: inline-block;
|
||||
background: white;
|
||||
color: #144784;
|
||||
padding: 14px 28px;
|
||||
border-radius: 6px;
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
}
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px;
|
||||
}
|
||||
.loading .spinner {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
border: 4px solid #e0e0e0;
|
||||
border-top-color: #179e83;
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 0 auto 15px;
|
||||
}
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
.breadcrumb {
|
||||
padding: 15px 20px;
|
||||
background: #f5f5f5;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.breadcrumb a { color: #144784; text-decoration: none; }
|
||||
.breadcrumb span { color: #888; margin: 0 8px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<?php include '../includes/navbar.php'; ?>
|
||||
|
||||
<nav class="breadcrumb">
|
||||
<a href="/">Home</a> <span>›</span> <a href="/tools/">Tools</a> <span>›</span> Scrapeability Checker
|
||||
</nav>
|
||||
|
||||
<div class="checker-container">
|
||||
<div class="checker-header">
|
||||
<h1>🔍 Website Scrapeability Checker</h1>
|
||||
<p>Enter a URL to analyze if it can be scraped and understand the technical complexity involved.</p>
|
||||
</div>
|
||||
|
||||
<div class="checker-card">
|
||||
<div class="url-input-group">
|
||||
<input type="url" id="urlInput" placeholder="https://example.com" required>
|
||||
<button onclick="checkWebsite()" id="checkBtn">Check Website</button>
|
||||
</div>
|
||||
|
||||
<div id="loading" style="display: none;" class="loading">
|
||||
<div class="spinner"></div>
|
||||
<p>Analyzing website...</p>
|
||||
</div>
|
||||
|
||||
<div id="results">
|
||||
<div class="result-section">
|
||||
<h3>📊 Overall Assessment</h3>
|
||||
<p>Scrapeability Score: <span id="scoreText" class="score-badge"></span></p>
|
||||
<p id="summaryText" style="margin-top: 15px; color: #666;"></p>
|
||||
</div>
|
||||
|
||||
<div class="result-section">
|
||||
<h3>🔧 Technical Factors</h3>
|
||||
<ul class="factor-list" id="factorsList"></ul>
|
||||
</div>
|
||||
|
||||
<div class="result-section">
|
||||
<h3>💡 Recommendations</h3>
|
||||
<div id="recommendations"></div>
|
||||
</div>
|
||||
|
||||
<div class="cta-section">
|
||||
<h3>Want Us to Handle This For You?</h3>
|
||||
<p>Our experts can build a reliable scraping solution tailored to this website.</p>
|
||||
<a href="/quote">Get a Free Quote →</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 40px; padding: 30px; background: #f8f9fa; border-radius: 12px;">
|
||||
<h3 style="color: #1a1a2e; margin-bottom: 15px;">How This Tool Works</h3>
|
||||
<p style="color: #666; line-height: 1.7;">
|
||||
Our scrapeability checker analyzes several factors that affect data extraction difficulty:
|
||||
</p>
|
||||
<ul style="color: #666; margin-top: 15px; padding-left: 20px; line-height: 1.8;">
|
||||
<li><strong>JavaScript Rendering</strong> — Whether the site requires a full browser to load content</li>
|
||||
<li><strong>Rate Limiting</strong> — How aggressively the site blocks automated requests</li>
|
||||
<li><strong>Authentication</strong> — Whether login is required to access data</li>
|
||||
<li><strong>Data Structure</strong> — How consistently the data is formatted</li>
|
||||
<li><strong>robots.txt</strong> — The site's crawling policies</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<?php include '../includes/footer.php'; ?>
|
||||
|
||||
<script>
|
||||
async function checkWebsite() {
|
||||
const url = document.getElementById('urlInput').value.trim();
|
||||
if (!url) {
|
||||
alert('Please enter a valid URL');
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate URL format
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
alert('Please enter a valid URL (including https://)');
|
||||
return;
|
||||
}
|
||||
|
||||
document.getElementById('checkBtn').disabled = true;
|
||||
document.getElementById('loading').style.display = 'block';
|
||||
document.getElementById('results').style.display = 'none';
|
||||
|
||||
// Simulate analysis (in production, this would call a backend API)
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Generate analysis based on URL patterns
|
||||
const analysis = analyzeUrl(url);
|
||||
displayResults(analysis);
|
||||
|
||||
document.getElementById('checkBtn').disabled = false;
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('results').style.display = 'block';
|
||||
}
|
||||
|
||||
function analyzeUrl(url) {
|
||||
const hostname = new URL(url).hostname.toLowerCase();
|
||||
|
||||
// Known difficult sites
|
||||
const hardSites = ['linkedin.com', 'facebook.com', 'instagram.com', 'twitter.com', 'amazon.'];
|
||||
const mediumSites = ['google.com', 'ebay.', 'zillow.com', 'indeed.com'];
|
||||
|
||||
let score = 'Easy';
|
||||
let scoreClass = 'score-easy';
|
||||
let factors = [];
|
||||
let recommendations = [];
|
||||
|
||||
// Check for known patterns
|
||||
const isHard = hardSites.some(s => hostname.includes(s));
|
||||
const isMedium = mediumSites.some(s => hostname.includes(s));
|
||||
|
||||
if (isHard) {
|
||||
score = 'Complex';
|
||||
scoreClass = 'score-hard';
|
||||
factors = [
|
||||
{ name: 'JavaScript Rendering', status: 'Required', statusClass: 'status-warn' },
|
||||
{ name: 'Anti-Bot Protection', status: 'Strong', statusClass: 'status-bad' },
|
||||
{ name: 'Rate Limiting', status: 'Aggressive', statusClass: 'status-bad' },
|
||||
{ name: 'Login Required', status: 'Likely', statusClass: 'status-warn' },
|
||||
{ name: 'Data Structure', status: 'Dynamic', statusClass: 'status-warn' }
|
||||
];
|
||||
recommendations = [
|
||||
'⚠️ This site has strong anti-bot measures and requires specialized handling.',
|
||||
'🔧 Residential proxies and browser automation are typically required.',
|
||||
'📞 We recommend discussing your specific requirements with our team.'
|
||||
];
|
||||
} else if (isMedium) {
|
||||
score = 'Moderate';
|
||||
scoreClass = 'score-medium';
|
||||
factors = [
|
||||
{ name: 'JavaScript Rendering', status: 'Partial', statusClass: 'status-warn' },
|
||||
{ name: 'Anti-Bot Protection', status: 'Moderate', statusClass: 'status-warn' },
|
||||
{ name: 'Rate Limiting', status: 'Standard', statusClass: 'status-good' },
|
||||
{ name: 'Login Required', status: 'Optional', statusClass: 'status-good' },
|
||||
{ name: 'Data Structure', status: 'Semi-structured', statusClass: 'status-warn' }
|
||||
];
|
||||
recommendations = [
|
||||
'✓ This site can be scraped with proper techniques.',
|
||||
'🔧 May require browser automation for some pages.',
|
||||
'⏱️ Respectful rate limiting recommended to avoid blocks.'
|
||||
];
|
||||
} else {
|
||||
factors = [
|
||||
{ name: 'JavaScript Rendering', status: 'Minimal', statusClass: 'status-good' },
|
||||
{ name: 'Anti-Bot Protection', status: 'Basic', statusClass: 'status-good' },
|
||||
{ name: 'Rate Limiting', status: 'Standard', statusClass: 'status-good' },
|
||||
{ name: 'Login Required', status: 'No', statusClass: 'status-good' },
|
||||
{ name: 'Data Structure', status: 'Structured', statusClass: 'status-good' }
|
||||
];
|
||||
recommendations = [
|
||||
'✅ This site appears straightforward to scrape.',
|
||||
'🚀 Standard HTTP requests should work well.',
|
||||
'📊 Data extraction can likely be automated efficiently.'
|
||||
];
|
||||
}
|
||||
|
||||
return { score, scoreClass, factors, recommendations, url };
|
||||
}
|
||||
|
||||
function displayResults(analysis) {
|
||||
document.getElementById('scoreText').textContent = analysis.score;
|
||||
document.getElementById('scoreText').className = 'score-badge ' + analysis.scoreClass;
|
||||
|
||||
const summaries = {
|
||||
'Easy': 'This website appears straightforward to scrape with standard tools and techniques.',
|
||||
'Moderate': 'This website has some complexity but can be scraped with proper handling.',
|
||||
'Complex': 'This website has significant anti-scraping measures requiring specialized expertise.'
|
||||
};
|
||||
document.getElementById('summaryText').textContent = summaries[analysis.score];
|
||||
|
||||
const factorsList = document.getElementById('factorsList');
|
||||
factorsList.innerHTML = analysis.factors.map(f => `
|
||||
<li>
|
||||
<span>${f.name}</span>
|
||||
<span class="factor-status ${f.statusClass}">${f.status}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
|
||||
document.getElementById('recommendations').innerHTML = analysis.recommendations.map(r =>
|
||||
`<p style="margin: 10px 0; color: #444;">${r}</p>`
|
||||
).join('');
|
||||
}
|
||||
|
||||
// Allow Enter key to trigger check
|
||||
document.getElementById('urlInput').addEventListener('keypress', function(e) {
|
||||
if (e.key === 'Enter') checkWebsite();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user