2026-02-05 04:11:15 +00:00
< ? php
$page_title = " Free Robots.txt Analyzer | UK Data Services " ;
$page_description = " Analyze any website's robots.txt file instantly. See crawling rules, blocked paths, sitemaps, and get recommendations for web scraping compliance. " ;
$canonical_url = " https://ukdataservices.co.uk/tools/robots-analyzer " ;
?>
<! DOCTYPE html >
< html lang = " en " >
< head >
< meta charset = " UTF-8 " >
< meta name = " viewport " content = " width=device-width, initial-scale=1.0 " >
< title >< ? php echo htmlspecialchars ( $page_title ); ?> </title>
< meta name = " description " content = " <?php echo htmlspecialchars( $page_description ); ?> " >
Fix navbar across all pages: add nav include, fonts, active state, spacing, stats, error pages
- Add nav.php include to 5 missing pages (cost-calculator, thank-you, 403, 404, 500)
- Add ErrorDocument directives to .htaccess for custom 403/404/500 pages
- Fix bogus accuracy stats (homepage, web-scraping, location pages)
- Fix invisible CTA buttons on property and financial service pages
- Add Google Fonts (Roboto Slab + Lato) to all pages missing it (tools, blog articles, error pages)
- Add active nav link highlighting (teal underline for current page)
- Improve footer contrast to WCAG AA, equal-height cards, mobile text scaling
- Consistent navbar-to-content spacing across all pages
- Bump cache version to v1.1.3
2026-02-11 07:15:11 +00:00
< link rel = " preconnect " href = " https://fonts.googleapis.com " >
< link rel = " preconnect " href = " https://fonts.gstatic.com " crossorigin >
< link href = " https://fonts.googleapis.com/css2?family=Roboto+Slab:wght@100;200;300;400;500;600;700;800;900&family=Lato:wght@100;200;300;400;500;600;700;800;900&display=swap " rel = " stylesheet " >
< link rel = " canonical " href = " <?php echo htmlspecialchars( $canonical_url ); ?> " >
2026-02-05 04:11:15 +00:00
< meta property = " og:title " content = " <?php echo htmlspecialchars( $page_title ); ?> " >
< meta property = " og:description " content = " <?php echo htmlspecialchars( $page_description ); ?> " >
< meta property = " og:type " content = " website " >
2026-02-22 11:11:56 +00:00
< link rel = " stylesheet " href = " ../assets/css/main.css?v=20260222 " >
2026-02-05 04:11:15 +00:00
< script type = " application/ld+json " >
{
" @context " : " https://schema.org " ,
" @type " : " SoftwareApplication " ,
" name " : " Robots.txt Analyzer " ,
" description " : " Free tool to analyze robots.txt files and understand crawling permissions " ,
" url " : " https://ukdataservices.co.uk/tools/robots-analyzer " ,
" applicationCategory " : " BusinessApplication " ,
" operatingSystem " : " Web Browser " ,
" offers " : { " @type " : " Offer " , " price " : " 0 " , " priceCurrency " : " GBP " }
}
</ script >
< style >
. analyzer - container { max - width : 900 px ; margin : 0 auto ; padding : 40 px 20 px ; }
. analyzer - header { text - align : center ; margin - bottom : 40 px ; }
. analyzer - header h1 { font - size : 2.2 em ; color : #1a1a2e; margin-bottom: 15px; }
. analyzer - header p { color : #666; font-size: 1.1em; }
. analyzer - card { background : #fff; border-radius: 12px; box-shadow: 0 4px 20px rgba(0,0,0,0.08); padding: 40px; }
. url - input - group { display : flex ; gap : 12 px ; margin - bottom : 30 px ; }
. url - input - group input { flex : 1 ; padding : 16 px ; border : 2 px solid #e0e0e0; border-radius: 8px; font-size: 1em; }
. url - input - group input : focus { border - color : #179e83; outline: none; }
. url - input - group button { background : #179e83; color: white; border: none; padding: 16px 32px; border-radius: 8px; font-weight: 600; cursor: pointer; }
. url - input - group button : hover { background : #148a72; }
. url - input - group button : disabled { background : #ccc; cursor: not-allowed; }
. results - grid { display : grid ; grid - template - columns : 1 fr 1 fr ; gap : 20 px ; }
@ media ( max - width : 768 px ) { . results - grid { grid - template - columns : 1 fr ; } }
. result - box { background : #f8f9fa; border-radius: 8px; padding: 20px; }
. result - box h3 { color : #1a1a2e; margin-bottom: 15px; font-size: 1.1em; display: flex; align-items: center; gap: 8px; }
. result - box pre { background : #1a1a2e; color: #a5d6a7; padding: 15px; border-radius: 6px; overflow-x: auto; font-size: 0.85em; max-height: 300px; }
. stat - badge { display : inline - block ; padding : 6 px 12 px ; border - radius : 15 px ; font - size : 0.9 em ; font - weight : 600 ; margin : 4 px ; }
. badge - green { background : #e8f5e9; color: #2e7d32; }
. badge - yellow { background : #fff3e0; color: #ef6c00; }
. badge - red { background : #ffebee; color: #c62828; }
. badge - blue { background : #e3f2fd; color: #1565c0; }
. loading { text - align : center ; padding : 40 px ; display : none ; }
. spinner { width : 40 px ; height : 40 px ; border : 4 px solid #e0e0e0; border-top-color: #179e83; border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto 15px; }
@ keyframes spin { to { transform : rotate ( 360 deg ); } }
#results { display: none; }
. breadcrumb { padding : 15 px 20 px ; background : #f5f5f5; font-size: 0.9em; }
. breadcrumb a { color : #144784; text-decoration: none; }
. breadcrumb span { color : #888; margin: 0 8px; }
. path - list { list - style : none ; padding : 0 ; margin : 0 ; max - height : 200 px ; overflow - y : auto ; }
. path - list li { padding : 8 px 12 px ; border - bottom : 1 px solid #e0e0e0; font-family: monospace; font-size: 0.9em; }
. path - list li : last - child { border - bottom : none ; }
. cta - box { text - align : center ; padding : 30 px ; background : linear - gradient ( 135 deg , #144784 0%, #179e83 100%); border-radius: 8px; color: white; margin-top: 30px; }
. cta - box a { display : inline - block ; background : white ; color : #144784; padding: 14px 28px; border-radius: 6px; text-decoration: none; font-weight: 600; }
</ style >
</ head >
< body >
2026-02-10 22:24:40 +00:00
< ? php include ( $_SERVER [ " DOCUMENT_ROOT " ] . " /includes/nav.php " ); ?>
2026-02-05 04:11:15 +00:00
< nav class = " breadcrumb " >
< a href = " / " > Home </ a > < span > › </ span > < a href = " /tools/ " > Tools </ a > < span > › </ span > Robots . txt Analyzer
</ nav >
< div class = " analyzer-container " >
< div class = " analyzer-header " >
< h1 > 🤖 Robots . txt Analyzer </ h1 >
< p > Analyze any website ' s robots . txt to understand crawling rules and scraping permissions .</ p >
</ div >
< div class = " analyzer-card " >
< div class = " url-input-group " >
< input type = " url " id = " urlInput " placeholder = " https://example.com " required >
< button onclick = " analyzeRobots() " id = " analyzeBtn " > Analyze </ button >
</ div >
< div id = " loading " class = " loading " >
< div class = " spinner " ></ div >
< p > Fetching and analyzing robots . txt ...</ p >
</ div >
< div id = " results " >
< div style = " margin-bottom: 25px; " >
< h3 style = " color: #1a1a2e; margin-bottom: 15px; " > 📊 Quick Summary </ h3 >
< div id = " summaryBadges " ></ div >
</ div >
< div class = " results-grid " >
< div class = " result-box " >
< h3 > 🚫 Blocked Paths </ h3 >
< ul class = " path-list " id = " blockedPaths " ></ ul >
</ div >
< div class = " result-box " >
< h3 > ✅ Allowed Paths </ h3 >
< ul class = " path-list " id = " allowedPaths " ></ ul >
</ div >
</ div >
< div class = " result-box " style = " margin-top: 20px; " >
< h3 > 🗺️ Sitemaps Found </ h3 >
< ul class = " path-list " id = " sitemaps " ></ ul >
</ div >
< div class = " result-box " style = " margin-top: 20px; " >
< h3 > 📄 Raw robots . txt </ h3 >
< pre id = " rawContent " ></ pre >
</ div >
< div class = " cta-box " >
< h3 > Need Help With Compliant Scraping ? </ h3 >
< p style = " opacity: 0.9; margin: 10px 0 20px; " > We build scrapers that respect robots . txt and follow best practices .</ p >
< a href = " /quote " > Get a Free Quote → </ a >
</ div >
</ div >
</ div >
</ div >
< ? php include '../includes/footer.php' ; ?>
< script >
async function analyzeRobots () {
const urlInput = document . getElementById ( 'urlInput' ) . value . trim ();
if ( ! urlInput ) { alert ( 'Please enter a URL' ); return ; }
let baseUrl ;
try { baseUrl = new URL ( urlInput ); }
catch { alert ( 'Please enter a valid URL' ); return ; }
document . getElementById ( 'analyzeBtn' ) . disabled = true ;
document . getElementById ( 'loading' ) . style . display = 'block' ;
document . getElementById ( 'results' ) . style . display = 'none' ;
const robotsUrl = `${baseUrl.protocol}//${baseUrl.hostname}/robots.txt` ;
try {
// Use a CORS proxy or backend in production
const response = await fetch ( `/api/fetch-robots.php?url=${encodeURIComponent(robotsUrl)}` );
const data = await response . json ();
if ( data . error ) {
displayError ( data . error );
} else {
displayResults ( data . content , baseUrl . hostname );
}
} catch ( err ) {
// Fallback: simulate analysis
simulateAnalysis ( baseUrl . hostname );
}
document . getElementById ( 'analyzeBtn' ) . disabled = false ;
document . getElementById ( 'loading' ) . style . display = 'none' ;
document . getElementById ( 'results' ) . style . display = 'block' ;
}
function simulateAnalysis ( hostname ) {
// Simulated robots.txt for demo
const sampleRobots = ` User - agent : *
Disallow : / admin /
Disallow : / private /
Disallow : / api / internal /
Allow : / api / public /
Allow : /
Sitemap : https :// $ { hostname } / sitemap . xml
Sitemap : https :// $ { hostname } / sitemap - blog . xml
# Crawl-delay: 1`;
displayResults ( sampleRobots , hostname );
}
function displayResults ( content , hostname ) {
const lines = content . split ( '\n' );
const blocked = [], allowed = [], sitemaps = [];
let crawlDelay = null ;
lines . forEach ( line => {
const lower = line . toLowerCase () . trim ();
if ( lower . startsWith ( 'disallow:' )) {
const path = line . split ( ':' ) . slice ( 1 ) . join ( ':' ) . trim ();
if ( path ) blocked . push ( path );
} else if ( lower . startsWith ( 'allow:' )) {
const path = line . split ( ':' ) . slice ( 1 ) . join ( ':' ) . trim ();
if ( path ) allowed . push ( path );
} else if ( lower . startsWith ( 'sitemap:' )) {
sitemaps . push ( line . split ( ':' ) . slice ( 1 ) . join ( ':' ) . trim ());
} else if ( lower . startsWith ( 'crawl-delay:' )) {
crawlDelay = line . split ( ':' )[ 1 ] . trim ();
}
});
// Summary badges
let badges = '' ;
badges += `<span class="stat-badge badge-blue">${blocked.length} blocked paths</span>` ;
badges += `<span class="stat-badge badge-green">${allowed.length} allowed paths</span>` ;
badges += `<span class="stat-badge badge-blue">${sitemaps.length} sitemaps</span>` ;
if ( crawlDelay ) badges += `<span class="stat-badge badge-yellow">Crawl delay: ${crawlDelay}s</span>` ;
if ( blocked . length === 0 ) badges += `<span class="stat-badge badge-green">Open to crawling</span>` ;
if ( blocked . length > 10 ) badges += `<span class="stat-badge badge-yellow">Many restrictions</span>` ;
document . getElementById ( 'summaryBadges' ) . innerHTML = badges ;
// Blocked paths
document . getElementById ( 'blockedPaths' ) . innerHTML = blocked . length
? blocked . map ( p => `<li>${escapeHtml(p)}</li>` ) . join ( '' )
: '<li style="color:#888">No blocked paths</li>' ;
// Allowed paths
document . getElementById ( 'allowedPaths' ) . innerHTML = allowed . length
? allowed . map ( p => `<li>${escapeHtml(p)}</li>` ) . join ( '' )
: '<li style="color:#888">No explicit allows (default: all allowed)</li>' ;
// Sitemaps
document . getElementById ( 'sitemaps' ) . innerHTML = sitemaps . length
? sitemaps . map ( s => { const isValid = /^ https ? :/// i . test ( s ); return isValid ? `<li><a href="${escapeHtml(s)}" target="_blank" rel="noopener">${escapeHtml(s)}</a></li>` : `<li>${escapeHtml(s)} <span style="color:#c62828">(invalid URL)</span></li>` ; }) . join ( '' )
: '<li style="color:#888">No sitemaps declared</li>' ;
// Raw content
document . getElementById ( 'rawContent' ) . textContent = content ;
}
function escapeHtml ( text ) {
const div = document . createElement ( 'div' );
div . textContent = text ;
return div . innerHTML ;
}
function displayError ( message ) {
document . getElementById ( " loading " ) . style . display = " none " ;
document . getElementById ( " results " ) . style . display = " block " ;
document . getElementById ( " summaryBadges " ) . innerHTML = " <span class= \" stat-badge badge-red \" >Error</span> " ;
document . getElementById ( " blockedPaths " ) . innerHTML = " <li style= \" color:#c62828 \" > " + escapeHtml ( message ) + " </li> " ;
document . getElementById ( " allowedPaths " ) . innerHTML = " " ;
document . getElementById ( " sitemaps " ) . innerHTML = " " ;
document . getElementById ( " rawContent " ) . textContent = " Error: " + message ;
}
document . getElementById ( 'urlInput' ) . addEventListener ( 'keypress' , e => {
if ( e . key === 'Enter' ) analyzeRobots ();
});
</ script >
</ body >
</ html >