94 lines
3.0 KiB
C#
94 lines
3.0 KiB
C#
|
|
using Microsoft.Extensions.Logging;
|
|||
|
|
using Microsoft.Extensions.Options;
|
|||
|
|
using OFACScraper.Configuration;
|
|||
|
|
|
|||
|
|
namespace OFACScraper;
|
|||
|
|
|
|||
|
|
public class Application
|
|||
|
|
{
|
|||
|
|
private readonly OFACScraper _scraper;
|
|||
|
|
private readonly Exporter _exporter;
|
|||
|
|
private readonly CheckpointStore _checkpoint;
|
|||
|
|
private readonly OFACOptions _options;
|
|||
|
|
private readonly ILogger<Application> _logger;
|
|||
|
|
|
|||
|
|
public Application(
|
|||
|
|
OFACScraper scraper,
|
|||
|
|
Exporter exporter,
|
|||
|
|
CheckpointStore checkpoint,
|
|||
|
|
IOptions<OFACOptions> options,
|
|||
|
|
ILogger<Application> logger)
|
|||
|
|
{
|
|||
|
|
_scraper = scraper;
|
|||
|
|
_exporter = exporter;
|
|||
|
|
_checkpoint = checkpoint;
|
|||
|
|
_options = options.Value;
|
|||
|
|
_logger = logger;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// <summary>
|
|||
|
|
/// Full historical scrape: all years from StartYear to current year.
|
|||
|
|
/// Skips records already in checkpoint.
|
|||
|
|
/// </summary>
|
|||
|
|
public async Task<int> RunFullAsync(CancellationToken ct = default)
|
|||
|
|
{
|
|||
|
|
var currentYear = DateTime.UtcNow.Year;
|
|||
|
|
_logger.LogInformation("Starting full scrape {StartYear}–{EndYear}", _options.StartYear, currentYear);
|
|||
|
|
|
|||
|
|
var total = 0;
|
|||
|
|
for (var year = _options.StartYear; year <= currentYear; year++)
|
|||
|
|
{
|
|||
|
|
total += await ProcessYearAsync(year, ct);
|
|||
|
|
if (ct.IsCancellationRequested) break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_logger.LogInformation("Full scrape complete. {Total} new records exported. DB total: {DbTotal}",
|
|||
|
|
total, _checkpoint.GetTotalCount());
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// <summary>
|
|||
|
|
/// Daily/incremental run: scrapes current year only, exports any new records.
|
|||
|
|
/// </summary>
|
|||
|
|
public async Task<int> RunDailyAsync(CancellationToken ct = default)
|
|||
|
|
{
|
|||
|
|
var currentYear = DateTime.UtcNow.Year;
|
|||
|
|
_logger.LogInformation("Starting daily scrape for {Year}", currentYear);
|
|||
|
|
|
|||
|
|
var newRecords = await ProcessYearAsync(currentYear, ct);
|
|||
|
|
_logger.LogInformation("Daily scrape complete. {New} new records exported.", newRecords);
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
private async Task<int> ProcessYearAsync(int year, CancellationToken ct)
|
|||
|
|
{
|
|||
|
|
var records = await _scraper.GetYearRecordsAsync(year, ct);
|
|||
|
|
var newCount = 0;
|
|||
|
|
|
|||
|
|
foreach (var record in records)
|
|||
|
|
{
|
|||
|
|
if (ct.IsCancellationRequested) break;
|
|||
|
|
|
|||
|
|
if (_checkpoint.HasRecord(record.TextId))
|
|||
|
|
{
|
|||
|
|
_logger.LogDebug("Skipping {TextId} (already processed)", record.TextId);
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var success = await _exporter.ExportRecordAsync(record, ct);
|
|||
|
|
if (success)
|
|||
|
|
{
|
|||
|
|
_checkpoint.MarkProcessed(
|
|||
|
|
record.TextId, record.Date, record.Name, record.PenaltyTotalUsd,
|
|||
|
|
record.DocumentUrl, record.FileName, record.Year);
|
|||
|
|
newCount++;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (newCount > 0)
|
|||
|
|
_logger.LogInformation("Year {Year}: exported {Count} new records", year, newCount);
|
|||
|
|
|
|||
|
|
return newCount;
|
|||
|
|
}
|
|||
|
|
}
|