- Remove restricted FAQPage schema and duplicate Organization schema - Remove incomplete LocalBusiness schema - Fix Organization: founder Peter Foster, foundingDate 2015 - Rewrite H1 to include target keywords - Fix BreadcrumbList with correct page URLs - Remove SearchAction pointing to 404 endpoint - Simplify verbose content across 13 key paragraphs - Promote service card headings from H3 to H2 with keyword-rich names - Add width/height attributes to all images (CLS fix) - Create 1200x630 OG social card image - Update og:image, og:image:width, og:image:height meta tags - Fix broken LinkedIn (www prefix) and Twitter (x.com) social links - Add Free Tools footer section linking to individual tools - Remove google-oauth-callback and /index URLs from sitemap.xml - Add AI crawler rules to robots.txt (GPTBot, ClaudeBot, Perplexity, etc.) - Add Disallow for oauth callback endpoints in robots.txt - Create llms.txt for AI search engine readiness
99 lines
1.8 KiB
Plaintext
99 lines
1.8 KiB
Plaintext
# UK Data Services - robots.txt
|
|
# https://ukdataservices.co.uk
|
|
|
|
User-agent: *
|
|
Allow: /
|
|
|
|
# Block sensitive directories and files
|
|
Disallow: /includes/
|
|
Disallow: /assets/
|
|
Disallow: /admin/
|
|
Disallow: /logs/
|
|
Disallow: /vendor/
|
|
Disallow: /config/
|
|
Disallow: /database/
|
|
Disallow: /docker/
|
|
Disallow: /redis/
|
|
Disallow: /google-oauth-callback
|
|
Disallow: /google-oauth-callback.php
|
|
Disallow: /oauth-callback.php
|
|
|
|
# Block configuration and handler files
|
|
Disallow: /*-handler.php
|
|
Disallow: /*.log$
|
|
Disallow: /*.inc$
|
|
Disallow: /*.sql$
|
|
Disallow: /*.sh$
|
|
Disallow: /*.bak$
|
|
Disallow: /db-config.php
|
|
Disallow: /.email-config.php
|
|
Disallow: /.recaptcha-config.php
|
|
|
|
# Block query string URLs to prevent duplicate content
|
|
Disallow: /*?*
|
|
|
|
# Allow important static assets for rendering
|
|
Allow: /assets/css/*.css
|
|
Allow: /assets/js/*.js
|
|
Allow: /assets/images/*.webp
|
|
Allow: /assets/images/*.png
|
|
Allow: /assets/images/*.jpg
|
|
Allow: /assets/images/*.svg
|
|
|
|
# Sitemaps
|
|
Sitemap: https://ukdataservices.co.uk/sitemap.xml
|
|
Sitemap: https://ukdataservices.co.uk/sitemap-index.xml
|
|
Sitemap: https://ukdataservices.co.uk/sitemap-blog.xml
|
|
Sitemap: https://ukdataservices.co.uk/sitemap-services.xml
|
|
Sitemap: https://ukdataservices.co.uk/sitemap-tools.xml
|
|
|
|
# Crawl-delay for respectful crawling
|
|
Crawl-delay: 1
|
|
|
|
# Specific instructions for major search engines
|
|
User-agent: Googlebot
|
|
Allow: /
|
|
Crawl-delay: 0
|
|
|
|
User-agent: Bingbot
|
|
Allow: /
|
|
Crawl-delay: 1
|
|
|
|
User-agent: Slurp
|
|
Allow: /
|
|
Crawl-delay: 2
|
|
|
|
# AI crawlers - explicitly allowed for citation
|
|
User-agent: GPTBot
|
|
Allow: /
|
|
|
|
User-agent: ChatGPT-User
|
|
Allow: /
|
|
|
|
User-agent: ClaudeBot
|
|
Allow: /
|
|
|
|
User-agent: anthropic-ai
|
|
Allow: /
|
|
|
|
User-agent: PerplexityBot
|
|
Allow: /
|
|
|
|
User-agent: Google-Extended
|
|
Allow: /
|
|
|
|
User-agent: Applebot-Extended
|
|
Allow: /
|
|
|
|
User-agent: Bytespider
|
|
Allow: /
|
|
|
|
User-agent: CCBot
|
|
Allow: /
|
|
|
|
User-agent: FacebookBot
|
|
Allow: /
|
|
|
|
User-agent: Amazonbot
|
|
Allow: /
|