using Amazon; using Amazon.S3; using Amazon.S3.Model; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using OFACScraper.Configuration; namespace OFACScraper.Services; public class S3UploadService : IDisposable { private readonly S3Options _options; private readonly ILogger _logger; private readonly IAmazonS3? _s3Client; public S3UploadService(IOptions options, ILogger logger) { _options = options.Value; _logger = logger; if (_options.IsConfigured) { var config = new AmazonS3Config { RegionEndpoint = RegionEndpoint.GetBySystemName(_options.Region) }; _s3Client = new AmazonS3Client(_options.AccessKeyId, _options.SecretAccessKey, config); _logger.LogInformation("S3 configured: bucket={Bucket} region={Region} prefix={Prefix}", _options.BucketName, _options.Region, _options.Prefix); } else { _logger.LogWarning("S3 not configured — uploads will be skipped."); } } public void Dispose() => (_s3Client as IDisposable)?.Dispose(); public async Task UploadFileAsync(string localPath, string s3Key) { if (_s3Client == null) return false; try { await _s3Client.PutObjectAsync(new PutObjectRequest { BucketName = _options.BucketName, Key = s3Key, FilePath = localPath }); _logger.LogDebug("Uploaded {Path} → s3://{Bucket}/{Key}", localPath, _options.BucketName, s3Key); return true; } catch (Exception ex) { _logger.LogError(ex, "Failed to upload {Path}", localPath); return false; } } /// /// Syncs localDirectory to S3 under s3Prefix, skipping files whose MD5 matches existing S3 ETag. /// Uploads metadata.json last so CGSH processing triggers only after all documents are present. /// public async Task SyncDirectoryAsync(string localDirectory, string s3Prefix) { var result = new S3SyncResult(); if (_s3Client == null) { _logger.LogWarning("S3 not configured, skipping sync of {Path}", localDirectory); result.NotConfigured = true; return result; } if (!Directory.Exists(localDirectory)) { result.Error = $"Directory not found: {localDirectory}"; return result; } // List existing objects to skip unchanged files var existing = new Dictionary(); var listRequest = new ListObjectsV2Request { BucketName = _options.BucketName, Prefix = s3Prefix + "/" }; ListObjectsV2Response listResponse; do { listResponse = await _s3Client.ListObjectsV2Async(listRequest); foreach (var obj in listResponse.S3Objects ?? []) existing[obj.Key] = obj.ETag?.Trim('"') ?? ""; listRequest.ContinuationToken = listResponse.NextContinuationToken; } while (listResponse.IsTruncated == true); // metadata.json last — CGSH triggers on its arrival var files = Directory.GetFiles(localDirectory, "*", SearchOption.AllDirectories) .OrderBy(f => Path.GetFileName(f) == "metadata.json" ? 1 : 0) .ThenBy(f => f) .ToArray(); foreach (var file in files) { var relativePath = Path.GetRelativePath(localDirectory, file).Replace('\\', '/'); var s3Key = $"{s3Prefix}/{relativePath}"; if (existing.TryGetValue(s3Key, out var etag) && !string.IsNullOrEmpty(etag)) { var localMd5 = Convert.ToHexString( System.Security.Cryptography.MD5.HashData(File.ReadAllBytes(file)) ).ToLowerInvariant(); if (localMd5 == etag) { result.Skipped++; continue; } } if (await UploadFileAsync(file, s3Key)) result.Uploaded++; else result.Failed++; } _logger.LogInformation("S3 sync: {Uploaded} uploaded, {Skipped} unchanged, {Failed} failed", result.Uploaded, result.Skipped, result.Failed); return result; } } public class S3SyncResult { public int Uploaded { get; set; } public int Skipped { get; set; } public int Failed { get; set; } public bool NotConfigured { get; set; } public string? Error { get; set; } }