Fix all UKDS SEO cross-contamination risks

- Remove old sitemaps pointing to ukdataservices.co.uk URLs
- Delete llms.txt/llms-full.txt (had ukdataservices references)
- Replace all web scraping text in PHP files with AI automation equivalents
- Add noindex to legal boilerplate pages (privacy, terms, cookie, GDPR) to prevent duplicate content penalty
- Fix OG/Twitter social card images from ukds-* filenames to ukaiautomation-*
- Fix lead-capture.php to write logs to ukaiautomation directory
- Fix sitemap.php, quote-handler, canonical.php, related-services component
This commit is contained in:
Peter Foster
2026-03-21 10:13:14 +00:00
parent 37a6b01598
commit 8eafc04b0f
29 changed files with 181 additions and 853 deletions

View File

@@ -14,13 +14,13 @@ $canonical_url = "https://ukaiautomation.co.uk/blog/search.php" . ($search_query
// Define all articles with their content for searching
$articles = [
[
'title' => 'Complete Guide to Web Scraping Compliance in the UK',
'url' => 'articles/web-scraping-compliance-uk-guide.php',
'title' => 'Complete Guide to AI Automation Compliance in the UK',
'url' => 'articles/web-Automation-compliance-uk-guide.php',
'category' => 'Legal & Compliance',
'date' => '2025-06-08',
'excerpt' => 'Navigate the complex landscape of UK data protection laws and ensure your web scraping activities remain fully compliant with GDPR and industry regulations.',
'excerpt' => 'Navigate the complex landscape of UK data protection laws and ensure your AI Automation activities remain fully compliant with GDPR and industry regulations.',
'read_time' => '12 min read',
'keywords' => 'web scraping compliance UK GDPR data protection legal regulations'
'keywords' => 'AI Automation compliance UK GDPR data protection legal regulations'
],
[
'title' => 'Measuring ROI from Competitive Intelligence Programmes',
@@ -41,13 +41,13 @@ $articles = [
'keywords' => 'price monitoring retail UK competitive pricing strategies automation'
],
[
'title' => 'Scraping JavaScript-Heavy Sites: Advanced Techniques',
'url' => 'articles/javascript-heavy-sites-scraping.php',
'category' => 'Web Scraping',
'title' => 'Automation JavaScript-Heavy Sites: Advanced Techniques',
'url' => 'articles/javascript-heavy-sites-Automation.php',
'category' => 'AI Automation',
'date' => '2025-06-01',
'excerpt' => 'Master the challenges of extracting data from dynamic websites using modern browser automation and rendering techniques.',
'read_time' => '6 min read',
'keywords' => 'javascript scraping dynamic websites browser automation rendering'
'keywords' => 'javascript Automation dynamic websites browser automation rendering'
],
[
'title' => 'Building Robust Data Quality Validation Pipelines',
@@ -68,13 +68,13 @@ $articles = [
'keywords' => 'financial services data transformation automation market data case study'
],
[
'title' => 'Cloud-Native Scraping Architecture for Enterprise Scale',
'url' => 'articles/cloud-native-scraping-architecture.php',
'title' => 'Cloud-Native Automation Architecture for Enterprise Scale',
'url' => 'articles/cloud-native-Automation-architecture.php',
'category' => 'Technology',
'date' => '2025-05-25',
'excerpt' => 'Design scalable, resilient web scraping infrastructure using modern cloud technologies and containerization.',
'excerpt' => 'Design scalable, resilient AI Automation infrastructure using modern cloud technologies and containerization.',
'read_time' => '11 min read',
'keywords' => 'cloud native scraping architecture enterprise scalable infrastructure'
'keywords' => 'cloud native Automation architecture enterprise scalable infrastructure'
],
[
'title' => 'UK Property Market: Data-Driven Investment Insights',
@@ -104,22 +104,22 @@ $articles = [
'keywords' => 'python data pipeline tools frameworks enterprise 2025'
],
[
'title' => 'Professional Rate Limiting Strategies for Web Scraping',
'url' => 'articles/web-scraping-rate-limiting.php',
'category' => 'Web Scraping',
'title' => 'Professional Rate Limiting Strategies for AI Automation',
'url' => 'articles/web-Automation-rate-limiting.php',
'category' => 'AI Automation',
'date' => '2025-05-20',
'excerpt' => 'Master advanced rate limiting techniques to ensure respectful and sustainable web scraping operations.',
'excerpt' => 'Master advanced rate limiting techniques to ensure respectful and sustainable AI Automation operations.',
'read_time' => '9 min read',
'keywords' => 'rate limiting web scraping professional sustainable techniques'
'keywords' => 'rate limiting AI Automation professional sustainable techniques'
],
[
'title' => 'Kubernetes Deployment for Enterprise Scraping',
'url' => 'articles/kubernetes-scraping-deployment.php',
'title' => 'Kubernetes Deployment for Enterprise Automation',
'url' => 'articles/kubernetes-Automation-deployment.php',
'category' => 'Technology',
'date' => '2025-05-18',
'excerpt' => 'Deploy and scale web scraping applications using Kubernetes with best practices for production environments.',
'excerpt' => 'Deploy and scale AI Automation applications using Kubernetes with best practices for production environments.',
'read_time' => '13 min read',
'keywords' => 'kubernetes deployment enterprise scraping scaling production'
'keywords' => 'kubernetes deployment enterprise Automation scaling production'
],
[
'title' => 'UK Cookie Law Compliance for Data Collection',
@@ -144,7 +144,7 @@ $articles = [
'url' => 'articles/healthcare-research-data-collection.php',
'category' => 'Case Studies',
'date' => '2025-05-10',
'excerpt' => 'How a UK research institution improved data collection efficiency by 60% using automated web scraping solutions.',
'excerpt' => 'How a UK research institution improved data collection efficiency by 60% using automated AI Automation solutions.',
'read_time' => '8 min read',
'keywords' => 'healthcare research data collection automation efficiency case study'
]
@@ -175,7 +175,7 @@ if ($search_query) {
<!-- Preload critical resources -->
<link rel="preload" href="../assets/css/main.css?v=20260222" as="style">
<link rel="preload" href="../assets/images/ukds-main-logo.png" as="image">
<link rel="preload" href="../assets/images/ukaiautomation-logo.svg" as="image">
<!-- Open Graph / Social Media -->
<meta property="og:type" content="website">
@@ -270,7 +270,7 @@ if ($search_query) {
<h3>Popular Topics:</h3>
<div class="popular-topics">
<a href="?q=web+scraping" class="topic-tag">Web Scraping</a>
<a href="?q=web+Automation" class="topic-tag">AI Automation</a>
<a href="?q=data+analytics" class="topic-tag">Data Analytics</a>
<a href="?q=GDPR+compliance" class="topic-tag">GDPR Compliance</a>
<a href="?q=business+intelligence" class="topic-tag">Business Intelligence</a>
@@ -287,9 +287,9 @@ if ($search_query) {
<div class="container">
<h2>Browse by Category</h2>
<div class="categories-grid">
<a href="/blog/categories/web-scraping" class="category-card">
<a href="/blog/categories/web-Automation" class="category-card">
<span class="category-icon">🕷️</span>
<h3>Web Scraping</h3>
<h3>AI Automation</h3>
<p>Techniques, tools, and best practices</p>
</a>