2025-12-09 09:07:07 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* Canonical URL Helper
|
|
|
|
|
* Generates consistent canonical URLs for the site
|
|
|
|
|
*
|
|
|
|
|
* Usage:
|
|
|
|
|
* include($_SERVER['DOCUMENT_ROOT'] . '/includes/canonical.php');
|
|
|
|
|
* $canonical = getCanonicalUrl();
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the canonical URL for the current page
|
|
|
|
|
* Handles www/non-www, http/https, trailing slashes, and query strings
|
|
|
|
|
*
|
|
|
|
|
* @param string|null $overrideUrl Optional URL to override auto-detection
|
|
|
|
|
* @return string The canonical URL
|
|
|
|
|
*/
|
|
|
|
|
function getCanonicalUrl($overrideUrl = null) {
|
|
|
|
|
$baseUrl = 'https://ukdataservices.co.uk';
|
|
|
|
|
|
|
|
|
|
// If override provided, clean and return it
|
|
|
|
|
if ($overrideUrl) {
|
|
|
|
|
return cleanCanonicalUrl($baseUrl, $overrideUrl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Auto-detect current URL
|
|
|
|
|
$requestUri = $_SERVER['REQUEST_URI'] ?? '/';
|
|
|
|
|
|
|
|
|
|
// Remove query string
|
|
|
|
|
$path = parse_url($requestUri, PHP_URL_PATH);
|
|
|
|
|
|
|
|
|
|
// Remove .php extension for clean URLs
|
|
|
|
|
if (substr($path, -4) === '.php') {
|
|
|
|
|
$path = substr($path, 0, -4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Normalize trailing slashes (remove except for root)
|
|
|
|
|
if ($path !== '/' && substr($path, -1) === '/') {
|
|
|
|
|
$path = rtrim($path, '/');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $baseUrl . $path;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Clean and normalize a canonical URL
|
|
|
|
|
*
|
|
|
|
|
* @param string $baseUrl The site base URL
|
|
|
|
|
* @param string $url The URL to clean
|
|
|
|
|
* @return string Cleaned canonical URL
|
|
|
|
|
*/
|
|
|
|
|
function cleanCanonicalUrl($baseUrl, $url) {
|
|
|
|
|
// If it's a relative URL, make it absolute
|
|
|
|
|
if (strpos($url, 'http') !== 0) {
|
|
|
|
|
$url = $baseUrl . '/' . ltrim($url, '/');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remove query string
|
|
|
|
|
$url = strtok($url, '?');
|
|
|
|
|
|
|
|
|
|
// Remove .php extension
|
|
|
|
|
if (substr($url, -4) === '.php') {
|
|
|
|
|
$url = substr($url, 0, -4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Normalize trailing slashes
|
|
|
|
|
$path = parse_url($url, PHP_URL_PATH);
|
|
|
|
|
if ($path && $path !== '/' && substr($path, -1) === '/') {
|
|
|
|
|
$url = rtrim($url, '/');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ensure https and non-www
|
|
|
|
|
$url = str_replace('http://', 'https://', $url);
|
|
|
|
|
$url = str_replace('://www.', '://', $url);
|
|
|
|
|
|
|
|
|
|
return $url;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Generate the canonical link tag
|
|
|
|
|
*
|
|
|
|
|
* @param string|null $url Optional URL override
|
|
|
|
|
* @return string HTML link tag
|
|
|
|
|
*/
|
|
|
|
|
function generateCanonicalTag($url = null) {
|
|
|
|
|
$canonical = getCanonicalUrl($url);
|
|
|
|
|
return '<link rel="canonical" href="' . htmlspecialchars($canonical) . '">';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get URL for a specific page by key
|
|
|
|
|
*
|
|
|
|
|
* @param string $pageKey The page identifier
|
|
|
|
|
* @return string The full canonical URL
|
|
|
|
|
*/
|
|
|
|
|
function getPageUrl($pageKey) {
|
|
|
|
|
$baseUrl = 'https://ukdataservices.co.uk';
|
|
|
|
|
|
|
|
|
|
$urls = [
|
|
|
|
|
'home' => '',
|
|
|
|
|
'about' => '/about',
|
|
|
|
|
'quote' => '/quote',
|
|
|
|
|
'faq' => '/faq',
|
|
|
|
|
'blog' => '/blog',
|
|
|
|
|
'contact' => '/#contact',
|
|
|
|
|
'privacy' => '/privacy-policy',
|
|
|
|
|
'terms' => '/terms-of-service',
|
|
|
|
|
'cookies' => '/cookie-policy',
|
|
|
|
|
'gdpr' => '/gdpr-compliance',
|
|
|
|
|
'project-types' => '/project-types',
|
|
|
|
|
'case-studies' => '/case-studies',
|
|
|
|
|
|
|
|
|
|
// Services
|
|
|
|
|
'services' => '/#services',
|
|
|
|
|
'web-scraping' => '/services/web-scraping',
|
|
|
|
|
'competitive-intelligence' => '/services/competitive-intelligence',
|
|
|
|
|
'price-monitoring' => '/services/price-monitoring',
|
|
|
|
|
'data-cleaning' => '/services/data-cleaning',
|
2026-03-02 11:44:06 +00:00
|
|
|
'data-analytics' => '/services/data-analysis-services',
|
2025-12-09 09:07:07 +00:00
|
|
|
'api-development' => '/services/api-development',
|
|
|
|
|
'property-data' => '/services/property-data-extraction',
|
|
|
|
|
'financial-data' => '/services/financial-data-services',
|
|
|
|
|
|
|
|
|
|
// Locations
|
|
|
|
|
'london' => '/locations/london',
|
|
|
|
|
'manchester' => '/locations/manchester',
|
|
|
|
|
'birmingham' => '/locations/birmingham',
|
|
|
|
|
'edinburgh' => '/locations/edinburgh',
|
|
|
|
|
'cardiff' => '/locations/cardiff'
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$path = $urls[$pageKey] ?? '';
|
|
|
|
|
return $baseUrl . $path;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check if current page is the canonical version
|
|
|
|
|
*
|
|
|
|
|
* @return bool True if current URL is canonical
|
|
|
|
|
*/
|
|
|
|
|
function isCanonicalUrl() {
|
|
|
|
|
$currentUrl = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] === 'on' ? "https" : "http")
|
|
|
|
|
. "://" . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI'];
|
|
|
|
|
$canonical = getCanonicalUrl();
|
|
|
|
|
|
|
|
|
|
return cleanCanonicalUrl('https://ukdataservices.co.uk', $currentUrl) === $canonical;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Output redirect to canonical URL if needed
|
|
|
|
|
* Call this at the very beginning of pages before any output
|
|
|
|
|
*/
|
|
|
|
|
function enforceCanonicalUrl() {
|
|
|
|
|
if (!isCanonicalUrl()) {
|
|
|
|
|
$canonical = getCanonicalUrl();
|
|
|
|
|
header('HTTP/1.1 301 Moved Permanently');
|
|
|
|
|
header('Location: ' . $canonical);
|
|
|
|
|
exit;
|
|
|
|
|
}
|
|
|
|
|
}
|