Files
cgsh-ofac/src/OFACScraper/Application.cs

94 lines
3.0 KiB
C#
Raw Normal View History

using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OFACScraper.Configuration;
namespace OFACScraper;
public class Application
{
private readonly OFACScraper _scraper;
private readonly Exporter _exporter;
private readonly CheckpointStore _checkpoint;
private readonly OFACOptions _options;
private readonly ILogger<Application> _logger;
public Application(
OFACScraper scraper,
Exporter exporter,
CheckpointStore checkpoint,
IOptions<OFACOptions> options,
ILogger<Application> logger)
{
_scraper = scraper;
_exporter = exporter;
_checkpoint = checkpoint;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Full historical scrape: all years from StartYear to current year.
/// Skips records already in checkpoint.
/// </summary>
public async Task<int> RunFullAsync(CancellationToken ct = default)
{
var currentYear = DateTime.UtcNow.Year;
_logger.LogInformation("Starting full scrape {StartYear}{EndYear}", _options.StartYear, currentYear);
var total = 0;
for (var year = _options.StartYear; year <= currentYear; year++)
{
total += await ProcessYearAsync(year, ct);
if (ct.IsCancellationRequested) break;
}
_logger.LogInformation("Full scrape complete. {Total} new records exported. DB total: {DbTotal}",
total, _checkpoint.GetTotalCount());
return 0;
}
/// <summary>
/// Daily/incremental run: scrapes current year only, exports any new records.
/// </summary>
public async Task<int> RunDailyAsync(CancellationToken ct = default)
{
var currentYear = DateTime.UtcNow.Year;
_logger.LogInformation("Starting daily scrape for {Year}", currentYear);
var newRecords = await ProcessYearAsync(currentYear, ct);
_logger.LogInformation("Daily scrape complete. {New} new records exported.", newRecords);
return 0;
}
private async Task<int> ProcessYearAsync(int year, CancellationToken ct)
{
var records = await _scraper.GetYearRecordsAsync(year, ct);
var newCount = 0;
foreach (var record in records)
{
if (ct.IsCancellationRequested) break;
if (_checkpoint.HasRecord(record.TextId))
{
_logger.LogDebug("Skipping {TextId} (already processed)", record.TextId);
continue;
}
var success = await _exporter.ExportRecordAsync(record, ct);
if (success)
{
_checkpoint.MarkProcessed(
record.TextId, record.Date, record.Name, record.PenaltyTotalUsd,
record.DocumentUrl, record.FileName, record.Year);
newCount++;
}
}
if (newCount > 0)
_logger.LogInformation("Year {Year}: exported {Count} new records", year, newCount);
return newCount;
}
}