using System.Text.Json; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; using TrueCV.Domain.Enums; using TrueCV.Infrastructure.Data; namespace TrueCV.Infrastructure.Jobs; public sealed class ProcessCVCheckJob { private readonly ApplicationDbContext _dbContext; private readonly IFileStorageService _fileStorageService; private readonly ICVParserService _cvParserService; private readonly ICompanyVerifierService _companyVerifierService; private readonly IEducationVerifierService _educationVerifierService; private readonly ITimelineAnalyserService _timelineAnalyserService; private readonly ILogger _logger; private const int BaseScore = 100; private const int UnverifiedCompanyPenalty = 10; private const int GapMonthPenalty = 1; private const int MaxGapPenalty = 10; private const int OverlapMonthPenalty = 2; private const int DiplomaMillPenalty = 25; private const int SuspiciousInstitutionPenalty = 15; private const int UnverifiedEducationPenalty = 5; private const int EducationDatePenalty = 10; public ProcessCVCheckJob( ApplicationDbContext dbContext, IFileStorageService fileStorageService, ICVParserService cvParserService, ICompanyVerifierService companyVerifierService, IEducationVerifierService educationVerifierService, ITimelineAnalyserService timelineAnalyserService, ILogger logger) { _dbContext = dbContext; _fileStorageService = fileStorageService; _cvParserService = cvParserService; _companyVerifierService = companyVerifierService; _educationVerifierService = educationVerifierService; _timelineAnalyserService = timelineAnalyserService; _logger = logger; } public async Task ExecuteAsync(Guid cvCheckId, CancellationToken cancellationToken) { _logger.LogInformation("Starting CV check processing for: {CheckId}", cvCheckId); var cvCheck = await _dbContext.CVChecks .FirstOrDefaultAsync(c => c.Id == cvCheckId, cancellationToken); if (cvCheck is null) { _logger.LogError("CV check not found: {CheckId}", cvCheckId); return; } try { // Step 1: Update status to Processing cvCheck.Status = CheckStatus.Processing; await _dbContext.SaveChangesAsync(cancellationToken); _logger.LogDebug("CV check {CheckId} status updated to Processing", cvCheckId); // Step 2: Download file from blob await using var fileStream = await _fileStorageService.DownloadAsync(cvCheck.BlobUrl); _logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId); // Step 3: Parse CV var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken); _logger.LogDebug( "Parsed CV for check {CheckId}: {EmploymentCount} employment entries", cvCheckId, cvData.Employment.Count); // Step 4: Save extracted data cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented); await _dbContext.SaveChangesAsync(cancellationToken); // Step 5: Verify each employment entry (parallelized with rate limiting) var verificationTasks = cvData.Employment.Select(async employment => { var result = await _companyVerifierService.VerifyCompanyAsync( employment.CompanyName, employment.StartDate, employment.EndDate); _logger.LogDebug( "Verified {Company}: {IsVerified} (Score: {Score}%)", employment.CompanyName, result.IsVerified, result.MatchScore); return result; }); var verificationResults = (await Task.WhenAll(verificationTasks)).ToList(); // Step 6: Verify education entries var educationResults = _educationVerifierService.VerifyAll( cvData.Education, cvData.Employment); _logger.LogDebug( "Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)", cvCheckId, educationResults.Count, educationResults.Count(e => e.IsVerified), educationResults.Count(e => e.IsDiplomaMill)); // Step 7: Analyse timeline var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment); _logger.LogDebug( "Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps", cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count); // Step 8: Calculate veracity score var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis); _logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score); // Step 9: Create CVFlag records foreach (var flag in flags) { if (!Enum.TryParse(flag.Category, out var category)) { _logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category); category = FlagCategory.Timeline; } if (!Enum.TryParse(flag.Severity, out var severity)) { _logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity); severity = FlagSeverity.Info; } var cvFlag = new CVFlag { Id = Guid.NewGuid(), CVCheckId = cvCheckId, Category = category, Severity = severity, Title = flag.Title, Description = flag.Description, ScoreImpact = flag.ScoreImpact }; _dbContext.CVFlags.Add(cvFlag); } // Step 10: Generate veracity report var report = new VeracityReport { OverallScore = score, ScoreLabel = GetScoreLabel(score), EmploymentVerifications = verificationResults, EducationVerifications = educationResults, TimelineAnalysis = timelineAnalysis, Flags = flags, GeneratedAt = DateTime.UtcNow }; cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented); cvCheck.VeracityScore = score; // Step 11: Update status to Completed cvCheck.Status = CheckStatus.Completed; cvCheck.CompletedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(cancellationToken); _logger.LogInformation( "CV check {CheckId} completed successfully with score {Score}", cvCheckId, score); } catch (Exception ex) { _logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId); cvCheck.Status = CheckStatus.Failed; // Use CancellationToken.None to ensure failure status is saved even if original token is cancelled await _dbContext.SaveChangesAsync(CancellationToken.None); throw; } } private static (int Score, List Flags) CalculateVeracityScore( List verifications, List educationResults, TimelineAnalysisResult timeline) { var score = BaseScore; var flags = new List(); // Penalty for unverified companies foreach (var verification in verifications.Where(v => !v.IsVerified)) { score -= UnverifiedCompanyPenalty; flags.Add(new FlagResult { Category = FlagCategory.Employment.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Unverified Company", Description = $"Could not verify employment at '{verification.ClaimedCompany}'. {verification.VerificationNotes}", ScoreImpact = -UnverifiedCompanyPenalty }); } // Penalty for diploma mills (critical) foreach (var edu in educationResults.Where(e => e.IsDiplomaMill)) { score -= DiplomaMillPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Critical.ToString(), Title = "Diploma Mill Detected", Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}", ScoreImpact = -DiplomaMillPenalty }); } // Penalty for suspicious institutions foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill)) { score -= SuspiciousInstitutionPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Suspicious Institution", Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}", ScoreImpact = -SuspiciousInstitutionPenalty }); } // Penalty for unverified education (not recognised, but not flagged as fake) foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown")) { score -= UnverifiedEducationPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Info.ToString(), Title = "Unverified Institution", Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}", ScoreImpact = -UnverifiedEducationPenalty }); } // Penalty for implausible education dates foreach (var edu in educationResults.Where(e => !e.DatesArePlausible)) { score -= EducationDatePenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Education Date Issues", Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}", ScoreImpact = -EducationDatePenalty }); } // Penalty for gaps (max -10 per gap) foreach (var gap in timeline.Gaps) { var gapPenalty = Math.Min(gap.Months * GapMonthPenalty, MaxGapPenalty); score -= gapPenalty; var severity = gap.Months >= 6 ? FlagSeverity.Warning : FlagSeverity.Info; flags.Add(new FlagResult { Category = FlagCategory.Timeline.ToString(), Severity = severity.ToString(), Title = "Employment Gap", Description = $"{gap.Months} month gap in employment from {gap.StartDate:MMM yyyy} to {gap.EndDate:MMM yyyy}", ScoreImpact = -gapPenalty }); } // Penalty for overlaps (only if > 2 months) foreach (var overlap in timeline.Overlaps) { var excessMonths = Math.Max(0, overlap.Months - 2); // Allow 2 month transition, prevent negative var overlapPenalty = excessMonths * OverlapMonthPenalty; score -= overlapPenalty; var severity = overlap.Months >= 6 ? FlagSeverity.Critical : FlagSeverity.Warning; flags.Add(new FlagResult { Category = FlagCategory.Timeline.ToString(), Severity = severity.ToString(), Title = "Employment Overlap", Description = $"{overlap.Months} month overlap between '{overlap.Company1}' and '{overlap.Company2}' ({overlap.OverlapStart:MMM yyyy} to {overlap.OverlapEnd:MMM yyyy})", ScoreImpact = -overlapPenalty }); } // Ensure score doesn't go below 0 score = Math.Max(0, score); return (score, flags); } private static string GetScoreLabel(int score) { return score switch { >= 90 => "Excellent", >= 75 => "Good", >= 60 => "Fair", >= 40 => "Poor", _ => "Very Poor" }; } }