Files
ukaiautomation/.htaccess

255 lines
10 KiB
ApacheConf
Raw Normal View History

# Redirect www to non-www
RewriteEngine On
RewriteCond %{HTTP_HOST} ^www.(.*)$ [NC]
RewriteRule ^(.*)$ https://%1/$1 [L,R=301]
# Custom error pages
ErrorDocument 403 /403.php
ErrorDocument 404 /404.php
ErrorDocument 500 /500.php
# Security Rules for UK Data Services
# Protect sensitive files and configs
<FilesMatch "^\.(.*)$|\.log$|\.sql$|\.conf$|config\.php$|\.email-config\.php$|\.htaccess|\.htpasswd|\.ini|\.sh|\.inc|\.bak$">
Require all denied
</FilesMatch>
# Protect contact handlers from direct browser access (POST only)
<Files "contact-handler.php">
<LimitExcept POST>
Require all denied
</LimitExcept>
</Files>
<Files "quote-handler.php">
<LimitExcept POST>
Require all denied
</LimitExcept>
</Files>
# Security headers
<IfModule mod_headers.c>
Header always set X-Content-Type-Options "nosniff"
Header always set X-Frame-Options "SAMEORIGIN"
Header always set Referrer-Policy "strict-origin-when-cross-origin"
Header always set Permissions-Policy "geolocation=(), microphone=(), camera=(), payment=(), usb=()"
# CRITICAL: No caching for form pages (contain session-specific CSRF tokens)
<FilesMatch "(quote|contact)\.php$">
Header set Cache-Control "no-store, no-cache, must-revalidate, max-age=0"
Header set Pragma "no-cache"
Header set Expires "Sat, 01 Jan 2000 00:00:00 GMT"
</FilesMatch>
</IfModule>
# Enhanced Gzip compression
<IfModule mod_deflate.c>
AddOutputFilterByType DEFLATE text/html text/plain text/xml text/css text/javascript
AddOutputFilterByType DEFLATE application/javascript application/x-javascript
AddOutputFilterByType DEFLATE application/xml application/xhtml+xml application/rss+xml
AddOutputFilterByType DEFLATE application/json application/ld+json
AddOutputFilterByType DEFLATE image/svg+xml
AddOutputFilterByType DEFLATE font/ttf font/otf font/eot font/woff font/woff2
BrowserMatch ^Mozilla/4 gzip-only-text/html
BrowserMatch ^Mozilla/4\.0[678] no-gzip
BrowserMatch \bMSIE !no-gzip !gzip-only-text/html
Header append Vary User-Agent
</IfModule>
# Enable Brotli compression if available
<IfModule mod_brotli.c>
AddOutputFilterByType BROTLI_COMPRESS text/html text/plain text/xml text/css text/javascript
AddOutputFilterByType BROTLI_COMPRESS application/javascript application/x-javascript
AddOutputFilterByType BROTLI_COMPRESS application/xml application/xhtml+xml application/rss+xml
AddOutputFilterByType BROTLI_COMPRESS application/json application/ld+json
AddOutputFilterByType BROTLI_COMPRESS image/svg+xml
AddOutputFilterByType BROTLI_COMPRESS font/ttf font/otf font/woff font/woff2
</IfModule>
# Browser Caching Headers
<IfModule mod_expires.c>
ExpiresActive On
# Images - 1 year
ExpiresByType image/jpeg "access plus 1 year"
ExpiresByType image/jpg "access plus 1 year"
ExpiresByType image/gif "access plus 1 year"
ExpiresByType image/png "access plus 1 year"
ExpiresByType image/webp "access plus 1 year"
ExpiresByType image/svg+xml "access plus 1 year"
ExpiresByType image/x-icon "access plus 1 year"
ExpiresByType image/ico "access plus 1 year"
# Fonts - 1 year
ExpiresByType font/ttf "access plus 1 year"
ExpiresByType font/otf "access plus 1 year"
ExpiresByType font/woff "access plus 1 year"
ExpiresByType font/woff2 "access plus 1 year"
ExpiresByType application/font-woff "access plus 1 year"
ExpiresByType application/font-woff2 "access plus 1 year"
# CSS and JavaScript - 1 month
ExpiresByType text/css "access plus 1 month"
ExpiresByType application/javascript "access plus 1 month"
ExpiresByType text/javascript "access plus 1 month"
ExpiresByType application/x-javascript "access plus 1 month"
# HTML and PHP - 1 hour
ExpiresByType text/html "access plus 1 hour"
ExpiresByType application/xhtml+xml "access plus 1 hour"
# Data - no cache
ExpiresByType application/json "access plus 0 seconds"
ExpiresByType application/xml "access plus 0 seconds"
ExpiresByType text/xml "access plus 0 seconds"
# Default - 1 week
ExpiresDefault "access plus 1 week"
</IfModule>
# Cache-Control Headers
<IfModule mod_headers.c>
# Static assets - 1 year
<FilesMatch "\.(jpg|jpeg|png|gif|webp|svg|ico|woff|woff2|ttf|otf|eot)$">
Header set Cache-Control "max-age=31536000, public, immutable"
</FilesMatch>
# CSS and JS - 1 month
<FilesMatch "\.(css|js)$">
Header set Cache-Control "max-age=2592000, public"
</FilesMatch>
# Regular HTML/PHP - 1 hour (but form pages are excluded above)
<FilesMatch "\.(html)$">
Header set Cache-Control "max-age=3600, public, must-revalidate"
</FilesMatch>
# Keep-alive
Header set Connection keep-alive
</IfModule>
# HTTP/2 Server Push
<IfModule mod_http2.c>
<FilesMatch "index\.php">
Header add Link "</assets/css/main.min.css>; rel=preload; as=style"
Header add Link "</assets/images/ukds-main-logo.webp>; rel=preload; as=image"
Header add Link "</assets/js/main.min.js>; rel=preload; as=script"
</FilesMatch>
</IfModule>
# ETags
FileETag None
Header unset ETag
# Disable directory browsing
Options -Indexes
# Prevent access to logs and database directories
<IfModule mod_rewrite.c>
RewriteEngine On
# Block known scanner IPs
RewriteCond %{REMOTE_ADDR} ^(20\.63\.96\.50|4\.193\.248\.52)$
RewriteRule ^ - [F,L]
# Block requests for PHP files that don't exist (webshell scanners)
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule \.php$ - [F,L]
# Skip already processed .php files
RewriteCond %{REQUEST_FILENAME} -f
RewriteRule ^services/.*\.php$ - [L]
# Explicitly allow existing service pages
RewriteRule ^services/competitive-intelligence/?$ /services/competitive-intelligence.php [L]
RewriteRule ^services/data-cleaning/?$ /services/data-cleaning.php [L]
RewriteRule ^services/financial-data-services/?$ /services/financial-data-services.php [L]
RewriteRule ^services/price-monitoring/?$ /services/price-monitoring.php [L]
RewriteRule ^services/property-data-extraction/?$ /services/property-data-extraction.php [L]
RewriteRule ^services/web-scraping/?$ /services/web-scraping.php [L]
RewriteRule ^services/csharp-development-services/?$ /services/csharp-development-services.php [L]
RewriteRule ^services/data-processing-services/?$ /services/data-processing-services.php [L]
# Redirect /services index to project-types
RewriteRule ^services/?$ /project-types [R=301,L]
RewriteRule ^services/data-analytics-london/?$ /services/data-analytics-london.php [L]
RewriteRule ^services/data-analytics-consultancy-london/?$ /services/data-analytics-consultancy-london.php [L]
RewriteRule ^services/data-validation-cleaning/?$ /services/data-validation-cleaning.php [L]
RewriteRule ^services/data-analytics-services-uk/?$ /services/data-analytics-services-uk.php [L]
RewriteRule ^services/web-scraping-companies/?$ /services/web-scraping [R=301,L]
RewriteRule ^services/data-scraping/?$ /services/web-scraping [R=301,L]
RewriteRule ^web-scraping-services/?$ /services/web-scraping [R=301,L]
RewriteRule ^data-scraping-services/?$ /services/web-scraping [R=301,L]
# Redirect unknown service pages to project-types
RewriteRule ^services/(.+)$ /project-types [R=301,L]
# 301 Redirects for renamed pages
RewriteRule ^blog/articles/data-analytics-companies-london-top-providers/?$ /blog/articles/data-analytics-companies-london-top-providers-compared [R=301,L]
RewriteRule ^blog/articles/gdpr-compliance-web-scraping-uk-guide/?$ /blog/articles/web-scraping-compliance-uk-guide [R=301,L]
RewriteRule ^blog/articles/airflow-alternatives-python/?$ /blog/articles/python-airflow-alternatives [R=301,L]
RewriteRule ^blog/articles/kafka-performance-evaluation-real-time-streaming/?$ /blog/articles/performance-evaluation-apache-kafka-real-time-streaming [R=301,L]
RewriteRule ^blog/articles/real-time-analytics-streaming/?$ /blog/articles/real-time-analytics-streaming-data [R=301,L]
# Clean URL rewriting - remove .php extension
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteCond %{REQUEST_FILENAME}.php -f
RewriteRule ^(.+?)/?$ $1.php [END]
# Security rules
RewriteRule ^logs(/.*)?$ - [F,L]
RewriteRule ^database(/.*)?$ - [F,L]
RewriteRule ^\.git(/.*)?$ - [F,L]
RewriteRule ^docker(/.*)?$ - [F,L]
</IfModule>
# Disable server signature
ServerSignature Off
# === Page Speed Optimizations ===
# Enable Gzip compression
<IfModule mod_deflate.c>
AddOutputFilterByType DEFLATE text/html text/plain text/css text/javascript application/javascript application/json image/svg+xml
</IfModule>
# Browser caching
<IfModule mod_expires.c>
ExpiresActive On
ExpiresByType image/jpg "access plus 1 year"
ExpiresByType image/jpeg "access plus 1 year"
ExpiresByType image/gif "access plus 1 year"
ExpiresByType image/png "access plus 1 year"
ExpiresByType image/webp "access plus 1 year"
ExpiresByType image/svg+xml "access plus 1 year"
ExpiresByType text/css "access plus 1 month"
ExpiresByType application/javascript "access plus 1 month"
ExpiresByType text/javascript "access plus 1 month"
ExpiresByType application/pdf "access plus 1 month"
ExpiresByType image/x-icon "access plus 1 year"
ExpiresDefault "access plus 2 days"
</IfModule>
# Cache-Control headers
<IfModule mod_headers.c>
# Images only — long cache is safe since filenames don't change
<FilesMatch "\.(ico|pdf|flv|jpg|jpeg|png|gif|webp|svg)$">
Header set Cache-Control "max-age=31536000, public"
</FilesMatch>
# CSS and JS — short cache + must-revalidate so edits propagate within 1 hour
<FilesMatch "\.(css|js)$">
Header set Cache-Control "max-age=3600, public, must-revalidate"
</FilesMatch>
<FilesMatch "\.(html|htm|php)$">
Header set Cache-Control "max-age=600, private, must-revalidate"
</FilesMatch>
</IfModule>
# Keep-Alive
<IfModule mod_headers.c>
Header set Connection keep-alive
</IfModule>