Initial OFAC Civil Penalties scraper

Scrapes https://ofac.treasury.gov/civil-penalties-and-enforcement-information
for all years 2003-present. Downloads PDF documents and exports metadata.json
per CGSH Publication spec (v3) to S3 experimental bucket under ofac/ prefix.

Commands: ofac-full (all years), ofac-daily (current year incremental).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Peter Foster
2026-04-09 15:29:00 +01:00
commit ad7c5d55eb
110 changed files with 5075 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
{
"OFAC": {
"BaseUrl": "https://ofac.treasury.gov",
"YearUrlTemplate": "/civil-penalties-and-enforcement-information/{0}-enforcement-information",
"StartYear": 2003,
"UserAgent": "Mozilla/5.0 (compatible; OFACBot/1.0; +https://ukdataservices.co.uk)",
"RequestDelayMs": 1000
},
"Storage": {
"DatabasePath": "/git/cgsh-ofac/data/ofac.db",
"ExportDirectory": "/git/cgsh-ofac/data/exports",
"DownloadDirectory": "/git/cgsh-ofac/data/downloads"
},
"S3": {
"BucketName": "uk-data-services-experimental-927681712454-eu-west-3-an",
"AccessKeyId": "AKIA5P7RDSFDK5MSRN6P",
"SecretAccessKey": "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",
"Region": "eu-west-3",
"Prefix": "ofac"
},
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.Extensions.Http": "Warning"
}
}
}