- Updated 30 additional files to use shared includes/nav.php - Fixed references to old navbar.php and header.php nav includes - All 52 pages now use single shared navigation
261 lines
12 KiB
PHP
261 lines
12 KiB
PHP
<?php
|
||
$page_title = "Free Robots.txt Analyzer | UK Data Services";
|
||
$page_description = "Analyze any website's robots.txt file instantly. See crawling rules, blocked paths, sitemaps, and get recommendations for web scraping compliance.";
|
||
$canonical_url = "https://ukdataservices.co.uk/tools/robots-analyzer";
|
||
?>
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title><?php echo htmlspecialchars($page_title); ?></title>
|
||
<meta name="description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||
|
||
<meta property="og:title" content="<?php echo htmlspecialchars($page_title); ?>">
|
||
<meta property="og:description" content="<?php echo htmlspecialchars($page_description); ?>">
|
||
<meta property="og:type" content="website">
|
||
|
||
<link rel="stylesheet" href="../assets/css/main.css">
|
||
|
||
<script type="application/ld+json">
|
||
{
|
||
"@context": "https://schema.org",
|
||
"@type": "SoftwareApplication",
|
||
"name": "Robots.txt Analyzer",
|
||
"description": "Free tool to analyze robots.txt files and understand crawling permissions",
|
||
"url": "https://ukdataservices.co.uk/tools/robots-analyzer",
|
||
"applicationCategory": "BusinessApplication",
|
||
"operatingSystem": "Web Browser",
|
||
"offers": { "@type": "Offer", "price": "0", "priceCurrency": "GBP" }
|
||
}
|
||
</script>
|
||
|
||
<style>
|
||
.analyzer-container { max-width: 900px; margin: 0 auto; padding: 40px 20px; }
|
||
.analyzer-header { text-align: center; margin-bottom: 40px; }
|
||
.analyzer-header h1 { font-size: 2.2em; color: #1a1a2e; margin-bottom: 15px; }
|
||
.analyzer-header p { color: #666; font-size: 1.1em; }
|
||
.analyzer-card { background: #fff; border-radius: 12px; box-shadow: 0 4px 20px rgba(0,0,0,0.08); padding: 40px; }
|
||
.url-input-group { display: flex; gap: 12px; margin-bottom: 30px; }
|
||
.url-input-group input { flex: 1; padding: 16px; border: 2px solid #e0e0e0; border-radius: 8px; font-size: 1em; }
|
||
.url-input-group input:focus { border-color: #179e83; outline: none; }
|
||
.url-input-group button { background: #179e83; color: white; border: none; padding: 16px 32px; border-radius: 8px; font-weight: 600; cursor: pointer; }
|
||
.url-input-group button:hover { background: #148a72; }
|
||
.url-input-group button:disabled { background: #ccc; cursor: not-allowed; }
|
||
.results-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
|
||
@media (max-width: 768px) { .results-grid { grid-template-columns: 1fr; } }
|
||
.result-box { background: #f8f9fa; border-radius: 8px; padding: 20px; }
|
||
.result-box h3 { color: #1a1a2e; margin-bottom: 15px; font-size: 1.1em; display: flex; align-items: center; gap: 8px; }
|
||
.result-box pre { background: #1a1a2e; color: #a5d6a7; padding: 15px; border-radius: 6px; overflow-x: auto; font-size: 0.85em; max-height: 300px; }
|
||
.stat-badge { display: inline-block; padding: 6px 12px; border-radius: 15px; font-size: 0.9em; font-weight: 600; margin: 4px; }
|
||
.badge-green { background: #e8f5e9; color: #2e7d32; }
|
||
.badge-yellow { background: #fff3e0; color: #ef6c00; }
|
||
.badge-red { background: #ffebee; color: #c62828; }
|
||
.badge-blue { background: #e3f2fd; color: #1565c0; }
|
||
.loading { text-align: center; padding: 40px; display: none; }
|
||
.spinner { width: 40px; height: 40px; border: 4px solid #e0e0e0; border-top-color: #179e83; border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto 15px; }
|
||
@keyframes spin { to { transform: rotate(360deg); } }
|
||
#results { display: none; }
|
||
.breadcrumb { padding: 15px 20px; background: #f5f5f5; font-size: 0.9em; }
|
||
.breadcrumb a { color: #144784; text-decoration: none; }
|
||
.breadcrumb span { color: #888; margin: 0 8px; }
|
||
.path-list { list-style: none; padding: 0; margin: 0; max-height: 200px; overflow-y: auto; }
|
||
.path-list li { padding: 8px 12px; border-bottom: 1px solid #e0e0e0; font-family: monospace; font-size: 0.9em; }
|
||
.path-list li:last-child { border-bottom: none; }
|
||
.cta-box { text-align: center; padding: 30px; background: linear-gradient(135deg, #144784 0%, #179e83 100%); border-radius: 8px; color: white; margin-top: 30px; }
|
||
.cta-box a { display: inline-block; background: white; color: #144784; padding: 14px 28px; border-radius: 6px; text-decoration: none; font-weight: 600; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<?php include($_SERVER["DOCUMENT_ROOT"] . "/includes/nav.php"); ?>
|
||
|
||
<nav class="breadcrumb">
|
||
<a href="/">Home</a> <span>›</span> <a href="/tools/">Tools</a> <span>›</span> Robots.txt Analyzer
|
||
</nav>
|
||
|
||
<div class="analyzer-container">
|
||
<div class="analyzer-header">
|
||
<h1>🤖 Robots.txt Analyzer</h1>
|
||
<p>Analyze any website's robots.txt to understand crawling rules and scraping permissions.</p>
|
||
</div>
|
||
|
||
<div class="analyzer-card">
|
||
<div class="url-input-group">
|
||
<input type="url" id="urlInput" placeholder="https://example.com" required>
|
||
<button onclick="analyzeRobots()" id="analyzeBtn">Analyze</button>
|
||
</div>
|
||
|
||
<div id="loading" class="loading">
|
||
<div class="spinner"></div>
|
||
<p>Fetching and analyzing robots.txt...</p>
|
||
</div>
|
||
|
||
<div id="results">
|
||
<div style="margin-bottom: 25px;">
|
||
<h3 style="color: #1a1a2e; margin-bottom: 15px;">📊 Quick Summary</h3>
|
||
<div id="summaryBadges"></div>
|
||
</div>
|
||
|
||
<div class="results-grid">
|
||
<div class="result-box">
|
||
<h3>🚫 Blocked Paths</h3>
|
||
<ul class="path-list" id="blockedPaths"></ul>
|
||
</div>
|
||
<div class="result-box">
|
||
<h3>✅ Allowed Paths</h3>
|
||
<ul class="path-list" id="allowedPaths"></ul>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="result-box" style="margin-top: 20px;">
|
||
<h3>🗺️ Sitemaps Found</h3>
|
||
<ul class="path-list" id="sitemaps"></ul>
|
||
</div>
|
||
|
||
<div class="result-box" style="margin-top: 20px;">
|
||
<h3>📄 Raw robots.txt</h3>
|
||
<pre id="rawContent"></pre>
|
||
</div>
|
||
|
||
<div class="cta-box">
|
||
<h3>Need Help With Compliant Scraping?</h3>
|
||
<p style="opacity: 0.9; margin: 10px 0 20px;">We build scrapers that respect robots.txt and follow best practices.</p>
|
||
<a href="/quote">Get a Free Quote →</a>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<?php include '../includes/footer.php'; ?>
|
||
|
||
<script>
|
||
async function analyzeRobots() {
|
||
const urlInput = document.getElementById('urlInput').value.trim();
|
||
if (!urlInput) { alert('Please enter a URL'); return; }
|
||
|
||
let baseUrl;
|
||
try { baseUrl = new URL(urlInput); }
|
||
catch { alert('Please enter a valid URL'); return; }
|
||
|
||
document.getElementById('analyzeBtn').disabled = true;
|
||
document.getElementById('loading').style.display = 'block';
|
||
document.getElementById('results').style.display = 'none';
|
||
|
||
const robotsUrl = `${baseUrl.protocol}//${baseUrl.hostname}/robots.txt`;
|
||
|
||
try {
|
||
// Use a CORS proxy or backend in production
|
||
const response = await fetch(`/api/fetch-robots.php?url=${encodeURIComponent(robotsUrl)}`);
|
||
const data = await response.json();
|
||
|
||
if (data.error) {
|
||
displayError(data.error);
|
||
} else {
|
||
displayResults(data.content, baseUrl.hostname);
|
||
}
|
||
} catch (err) {
|
||
// Fallback: simulate analysis
|
||
simulateAnalysis(baseUrl.hostname);
|
||
}
|
||
|
||
document.getElementById('analyzeBtn').disabled = false;
|
||
document.getElementById('loading').style.display = 'none';
|
||
document.getElementById('results').style.display = 'block';
|
||
}
|
||
|
||
function simulateAnalysis(hostname) {
|
||
// Simulated robots.txt for demo
|
||
const sampleRobots = `User-agent: *
|
||
Disallow: /admin/
|
||
Disallow: /private/
|
||
Disallow: /api/internal/
|
||
Allow: /api/public/
|
||
Allow: /
|
||
|
||
Sitemap: https://${hostname}/sitemap.xml
|
||
Sitemap: https://${hostname}/sitemap-blog.xml
|
||
|
||
# Crawl-delay: 1`;
|
||
|
||
displayResults(sampleRobots, hostname);
|
||
}
|
||
|
||
function displayResults(content, hostname) {
|
||
const lines = content.split('\n');
|
||
const blocked = [], allowed = [], sitemaps = [];
|
||
let crawlDelay = null;
|
||
|
||
lines.forEach(line => {
|
||
const lower = line.toLowerCase().trim();
|
||
if (lower.startsWith('disallow:')) {
|
||
const path = line.split(':').slice(1).join(':').trim();
|
||
if (path) blocked.push(path);
|
||
} else if (lower.startsWith('allow:')) {
|
||
const path = line.split(':').slice(1).join(':').trim();
|
||
if (path) allowed.push(path);
|
||
} else if (lower.startsWith('sitemap:')) {
|
||
sitemaps.push(line.split(':').slice(1).join(':').trim());
|
||
} else if (lower.startsWith('crawl-delay:')) {
|
||
crawlDelay = line.split(':')[1].trim();
|
||
}
|
||
});
|
||
|
||
// Summary badges
|
||
let badges = '';
|
||
badges += `<span class="stat-badge badge-blue">${blocked.length} blocked paths</span>`;
|
||
badges += `<span class="stat-badge badge-green">${allowed.length} allowed paths</span>`;
|
||
badges += `<span class="stat-badge badge-blue">${sitemaps.length} sitemaps</span>`;
|
||
if (crawlDelay) badges += `<span class="stat-badge badge-yellow">Crawl delay: ${crawlDelay}s</span>`;
|
||
if (blocked.length === 0) badges += `<span class="stat-badge badge-green">Open to crawling</span>`;
|
||
if (blocked.length > 10) badges += `<span class="stat-badge badge-yellow">Many restrictions</span>`;
|
||
document.getElementById('summaryBadges').innerHTML = badges;
|
||
|
||
// Blocked paths
|
||
document.getElementById('blockedPaths').innerHTML = blocked.length
|
||
? blocked.map(p => `<li>${escapeHtml(p)}</li>`).join('')
|
||
: '<li style="color:#888">No blocked paths</li>';
|
||
|
||
// Allowed paths
|
||
document.getElementById('allowedPaths').innerHTML = allowed.length
|
||
? allowed.map(p => `<li>${escapeHtml(p)}</li>`).join('')
|
||
: '<li style="color:#888">No explicit allows (default: all allowed)</li>';
|
||
|
||
// Sitemaps
|
||
document.getElementById('sitemaps').innerHTML = sitemaps.length
|
||
? sitemaps.map(s => { const isValid = /^https?:///i.test(s); return isValid ? `<li><a href="${escapeHtml(s)}" target="_blank" rel="noopener">${escapeHtml(s)}</a></li>` : `<li>${escapeHtml(s)} <span style="color:#c62828">(invalid URL)</span></li>`; }).join('')
|
||
: '<li style="color:#888">No sitemaps declared</li>';
|
||
|
||
// Raw content
|
||
document.getElementById('rawContent').textContent = content;
|
||
}
|
||
|
||
|
||
function escapeHtml(text) {
|
||
|
||
const div = document.createElement('div');
|
||
|
||
div.textContent = text;
|
||
|
||
return div.innerHTML;
|
||
}
|
||
|
||
function displayError(message) {
|
||
document.getElementById("loading").style.display = "none";
|
||
document.getElementById("results").style.display = "block";
|
||
document.getElementById("summaryBadges").innerHTML = "<span class=\"stat-badge badge-red\">Error</span>";
|
||
document.getElementById("blockedPaths").innerHTML = "<li style=\"color:#c62828\">" + escapeHtml(message) + "</li>";
|
||
document.getElementById("allowedPaths").innerHTML = "";
|
||
document.getElementById("sitemaps").innerHTML = "";
|
||
document.getElementById("rawContent").textContent = "Error: " + message;
|
||
|
||
}
|
||
|
||
|
||
document.getElementById('urlInput').addEventListener('keypress', e => {
|
||
if (e.key === 'Enter') analyzeRobots();
|
||
});
|
||
</script>
|
||
</body>
|
||
</html>
|