Security hardening + new tools deployment
- Hide Apache version (ServerTokens Prod) - Add Permissions-Policy header - Remove deprecated X-XSS-Protection - Consolidate security headers to .htaccess only (remove duplicates from PHP) - Deploy free tools: robots-analyzer, data-converter - Deploy tools announcement blog post - Update sitemap with new tools and blog post
This commit is contained in:
122
api/fetch-robots.php
Normal file
122
api/fetch-robots.php
Normal file
@@ -0,0 +1,122 @@
|
||||
<?php
|
||||
/**
|
||||
* API endpoint to fetch robots.txt files
|
||||
* Handles CORS and acts as a proxy to avoid browser restrictions
|
||||
* SECURITY: Blocks internal/private IPs to prevent SSRF
|
||||
*/
|
||||
|
||||
header("Content-Type: application/json");
|
||||
header("Access-Control-Allow-Origin: *");
|
||||
header("Access-Control-Allow-Methods: GET");
|
||||
header("Cache-Control: public, max-age=300");
|
||||
|
||||
$url = $_GET["url"] ?? "";
|
||||
|
||||
if (empty($url)) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "URL parameter required"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Validate URL format
|
||||
if (!filter_var($url, FILTER_VALIDATE_URL)) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Invalid URL"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Parse URL components
|
||||
$parsed = parse_url($url);
|
||||
$scheme = $parsed["scheme"] ?? "";
|
||||
$host = $parsed["host"] ?? "";
|
||||
$path = $parsed["path"] ?? "";
|
||||
|
||||
// Only allow http/https
|
||||
if (!in_array(strtolower($scheme), ["http", "https"])) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Only http/https URLs allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Path must be exactly /robots.txt
|
||||
if ($path !== "/robots.txt") {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Only /robots.txt paths allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Block query strings and fragments
|
||||
if (!empty($parsed["query"]) || !empty($parsed["fragment"])) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Query strings not allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Resolve hostname to IP
|
||||
$ip = gethostbyname($host);
|
||||
if ($ip === $host) {
|
||||
// DNS resolution failed - might be internal hostname
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Could not resolve hostname"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Block private and reserved IP ranges (SSRF protection)
|
||||
$flags = FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE;
|
||||
if (!filter_var($ip, FILTER_VALIDATE_IP, $flags)) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Internal addresses not allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Also block IPv6 localhost variants
|
||||
if (preg_match("/^(::1|fe80:|fc00:|fd00:)/i", $ip)) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Internal addresses not allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Fetch the robots.txt
|
||||
$context = stream_context_create([
|
||||
"http" => [
|
||||
"timeout" => 10,
|
||||
"user_agent" => "UK Data Services Robots Analyzer (+https://ukdataservices.co.uk/tools/robots-analyzer)",
|
||||
"follow_location" => true,
|
||||
"max_redirects" => 3
|
||||
],
|
||||
"ssl" => [
|
||||
"verify_peer" => true,
|
||||
"verify_peer_name" => true
|
||||
]
|
||||
]);
|
||||
|
||||
$content = @file_get_contents($url, false, $context);
|
||||
|
||||
if ($content === false) {
|
||||
if (isset($http_response_header)) {
|
||||
foreach ($http_response_header as $header) {
|
||||
if (preg_match("/^HTTP\/\d\.\d\s+(\d+)/", $header, $matches)) {
|
||||
$statusCode = intval($matches[1]);
|
||||
if ($statusCode === 404) {
|
||||
echo json_encode([
|
||||
"content" => "# No robots.txt found\nUser-agent: *\nAllow: /",
|
||||
"status" => 404,
|
||||
"message" => "No robots.txt file found (this means the site allows all crawling by default)"
|
||||
]);
|
||||
exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
http_response_code(502);
|
||||
echo json_encode(["error" => "Failed to fetch robots.txt - site may be unreachable"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
echo json_encode([
|
||||
"content" => $content,
|
||||
"status" => 200,
|
||||
"url" => $url,
|
||||
"fetchedAt" => date("c")
|
||||
]);
|
||||
36
api/lead-capture.php
Normal file
36
api/lead-capture.php
Normal file
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
header("Content-Type: application/json");
|
||||
header("Access-Control-Allow-Origin: *");
|
||||
header("Access-Control-Allow-Methods: POST");
|
||||
header("Access-Control-Allow-Headers: Content-Type");
|
||||
|
||||
if ($_SERVER["REQUEST_METHOD"] === "OPTIONS") {
|
||||
http_response_code(200);
|
||||
exit;
|
||||
}
|
||||
|
||||
if ($_SERVER["REQUEST_METHOD"] !== "POST") {
|
||||
http_response_code(405);
|
||||
echo json_encode(["error" => "Method not allowed"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
$input = json_decode(file_get_contents("php://input"), true);
|
||||
$email = filter_var($input["email"] ?? "", FILTER_VALIDATE_EMAIL);
|
||||
$source = htmlspecialchars($input["source"] ?? "unknown");
|
||||
$page = htmlspecialchars($input["page"] ?? "unknown");
|
||||
|
||||
if (!$email) {
|
||||
http_response_code(400);
|
||||
echo json_encode(["error" => "Invalid email"]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Log the lead
|
||||
$log_entry = date("Y-m-d H:i:s") . " | $email | $source | $page\n";
|
||||
file_put_contents("/var/www/ukds/api/leads.log", $log_entry, FILE_APPEND | LOCK_EX);
|
||||
|
||||
// Send notification email (optional - uncomment if you want email alerts)
|
||||
// mail("peter.foster@ukdataservices.co.uk", "New Lead: $email", "Source: $source\nPage: $page");
|
||||
|
||||
echo json_encode(["success" => true, "message" => "Lead captured"]);
|
||||
Reference in New Issue
Block a user