using System.Text.Json; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; using TrueCV.Domain.Enums; using TrueCV.Infrastructure.Data; namespace TrueCV.Infrastructure.Jobs; public sealed class ProcessCVCheckJob { private readonly ApplicationDbContext _dbContext; private readonly IFileStorageService _fileStorageService; private readonly ICVParserService _cvParserService; private readonly ICompanyVerifierService _companyVerifierService; private readonly IEducationVerifierService _educationVerifierService; private readonly ITimelineAnalyserService _timelineAnalyserService; private readonly IAuditService _auditService; private readonly ILogger _logger; private const int BaseScore = 100; private const int UnverifiedCompanyPenalty = 10; private const int ImplausibleJobTitlePenalty = 15; private const int CompanyVerificationFlagPenalty = 5; // Base penalty for company flags, actual from flag.ScoreImpact private const int RapidProgressionPenalty = 10; private const int EarlyCareerSeniorRolePenalty = 10; private const int GapMonthPenalty = 1; private const int MaxGapPenalty = 10; private const int OverlapMonthPenalty = 2; private const int DiplomaMillPenalty = 25; private const int SuspiciousInstitutionPenalty = 15; private const int UnverifiedEducationPenalty = 5; private const int EducationDatePenalty = 10; public ProcessCVCheckJob( ApplicationDbContext dbContext, IFileStorageService fileStorageService, ICVParserService cvParserService, ICompanyVerifierService companyVerifierService, IEducationVerifierService educationVerifierService, ITimelineAnalyserService timelineAnalyserService, IAuditService auditService, ILogger logger) { _dbContext = dbContext; _fileStorageService = fileStorageService; _cvParserService = cvParserService; _companyVerifierService = companyVerifierService; _educationVerifierService = educationVerifierService; _timelineAnalyserService = timelineAnalyserService; _auditService = auditService; _logger = logger; } public async Task ExecuteAsync(Guid cvCheckId, CancellationToken cancellationToken) { _logger.LogInformation("Starting CV check processing for: {CheckId}", cvCheckId); var cvCheck = await _dbContext.CVChecks .FirstOrDefaultAsync(c => c.Id == cvCheckId, cancellationToken); if (cvCheck is null) { _logger.LogError("CV check not found: {CheckId}", cvCheckId); return; } try { // Step 1: Update status to Processing cvCheck.Status = CheckStatus.Processing; cvCheck.ProcessingStage = "Downloading CV"; await _dbContext.SaveChangesAsync(cancellationToken); _logger.LogDebug("CV check {CheckId} status updated to Processing", cvCheckId); // Step 2: Download file from blob await using var fileStream = await _fileStorageService.DownloadAsync(cvCheck.BlobUrl); _logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId); // Step 3: Parse CV cvCheck.ProcessingStage = "Parsing CV"; await _dbContext.SaveChangesAsync(cancellationToken); var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken); _logger.LogDebug( "Parsed CV for check {CheckId}: {EmploymentCount} employment entries", cvCheckId, cvData.Employment.Count); // Step 4: Save extracted data cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented); cvCheck.ProcessingStage = "Verifying Employment"; await _dbContext.SaveChangesAsync(cancellationToken); // Step 5: Verify each employment entry (parallelized with rate limiting) // Skip freelance entries as they cannot be verified against company registries var verificationTasks = cvData.Employment .Where(e => !IsFreelance(e.CompanyName)) .Select(async employment => { var result = await _companyVerifierService.VerifyCompanyAsync( employment.CompanyName, employment.StartDate, employment.EndDate, employment.JobTitle); _logger.LogDebug( "Verified {Company}: {IsVerified} (Score: {Score}%), JobTitle: {JobTitle}, Plausible: {Plausible}", employment.CompanyName, result.IsVerified, result.MatchScore, employment.JobTitle, result.JobTitlePlausible); return result; }); var verificationResults = (await Task.WhenAll(verificationTasks)).ToList(); // Add freelance entries as auto-verified (skipped) foreach (var employment in cvData.Employment.Where(e => IsFreelance(e.CompanyName))) { verificationResults.Add(new CompanyVerificationResult { ClaimedCompany = employment.CompanyName, IsVerified = true, MatchScore = 100, VerificationNotes = "Freelance/self-employed - verification skipped", ClaimedJobTitle = employment.JobTitle, JobTitlePlausible = true }); _logger.LogDebug("Skipped verification for freelance entry: {Company}", employment.CompanyName); } // Step 5b: Verify director claims against Companies House officers cvCheck.ProcessingStage = "Verifying Directors"; await _dbContext.SaveChangesAsync(cancellationToken); await VerifyDirectorClaims(cvData.FullName, verificationResults, cancellationToken); // Step 6: Verify education entries cvCheck.ProcessingStage = "Verifying Education"; await _dbContext.SaveChangesAsync(cancellationToken); var educationResults = _educationVerifierService.VerifyAll( cvData.Education, cvData.Employment); _logger.LogDebug( "Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)", cvCheckId, educationResults.Count, educationResults.Count(e => e.IsVerified), educationResults.Count(e => e.IsDiplomaMill)); // Step 7: Analyse timeline cvCheck.ProcessingStage = "Analyzing Timeline"; await _dbContext.SaveChangesAsync(cancellationToken); var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment); _logger.LogDebug( "Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps", cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count); // Step 8: Calculate veracity score cvCheck.ProcessingStage = "Calculating Score"; await _dbContext.SaveChangesAsync(cancellationToken); var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData); _logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score); // Step 9: Create CVFlag records foreach (var flag in flags) { if (!Enum.TryParse(flag.Category, out var category)) { _logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category); category = FlagCategory.Timeline; } if (!Enum.TryParse(flag.Severity, out var severity)) { _logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity); severity = FlagSeverity.Info; } var cvFlag = new CVFlag { Id = Guid.NewGuid(), CVCheckId = cvCheckId, Category = category, Severity = severity, Title = flag.Title, Description = flag.Description, ScoreImpact = flag.ScoreImpact }; _dbContext.CVFlags.Add(cvFlag); } // Step 10: Generate veracity report cvCheck.ProcessingStage = "Generating Report"; await _dbContext.SaveChangesAsync(cancellationToken); var report = new VeracityReport { OverallScore = score, ScoreLabel = GetScoreLabel(score), EmploymentVerifications = verificationResults, EducationVerifications = educationResults, TimelineAnalysis = timelineAnalysis, Flags = flags, GeneratedAt = DateTime.UtcNow }; cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented); cvCheck.VeracityScore = score; // Step 11: Update status to Completed cvCheck.Status = CheckStatus.Completed; cvCheck.ProcessingStage = null; // Clear stage on completion cvCheck.CompletedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(cancellationToken); _logger.LogInformation( "CV check {CheckId} completed successfully with score {Score}", cvCheckId, score); await _auditService.LogAsync(cvCheck.UserId, AuditActions.CVProcessed, "CVCheck", cvCheckId, $"Score: {score}"); } catch (Exception ex) { _logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId); try { cvCheck.Status = CheckStatus.Failed; // Use CancellationToken.None to ensure failure status is saved even if original token is cancelled await _dbContext.SaveChangesAsync(CancellationToken.None); } catch (DbUpdateConcurrencyException) { // Record was deleted during processing - nothing to update _logger.LogWarning("CV check {CheckId} was deleted during processing", cvCheckId); return; } throw; } } private static (int Score, List Flags) CalculateVeracityScore( List verifications, List educationResults, TimelineAnalysisResult timeline, CVData cvData) { var score = BaseScore; var flags = new List(); // Penalty for unverified companies foreach (var verification in verifications.Where(v => !v.IsVerified)) { score -= UnverifiedCompanyPenalty; flags.Add(new FlagResult { Category = FlagCategory.Employment.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Unverified Company", Description = $"Could not verify employment at '{verification.ClaimedCompany}'. {verification.VerificationNotes}", ScoreImpact = -UnverifiedCompanyPenalty }); } // Process company verification flags (incorporation date, dissolution, dormant, etc.) foreach (var verification in verifications.Where(v => v.Flags.Count > 0)) { foreach (var companyFlag in verification.Flags) { var penalty = Math.Abs(companyFlag.ScoreImpact); score -= penalty; var severity = companyFlag.Severity switch { "Critical" => FlagSeverity.Critical, "Warning" => FlagSeverity.Warning, _ => FlagSeverity.Info }; flags.Add(new FlagResult { Category = FlagCategory.Employment.ToString(), Severity = severity.ToString(), Title = companyFlag.Type switch { "EmploymentBeforeIncorporation" => "Employment Before Company Existed", "EmploymentAtDissolvedCompany" => "Employment at Dissolved Company", "CurrentEmploymentAtDissolvedCompany" => "Current Employment at Dissolved Company", "EmploymentAtDormantCompany" => "Employment at Dormant Company", "SeniorRoleAtMicroCompany" => "Senior Role at Micro Company", "SicCodeMismatch" => "Role/Industry Mismatch", "ImplausibleJobTitle" => "Implausible Job Title", "UnverifiedDirectorClaim" => "Unverified Director Claim", _ => companyFlag.Type }, Description = companyFlag.Message, ScoreImpact = -penalty }); } } // Check for rapid career progression CheckRapidCareerProgression(cvData.Employment, flags, ref score); // Check for early career senior roles (relative to education end date) CheckEarlyCareerSeniorRoles(cvData.Employment, cvData.Education, flags, ref score); // Penalty for diploma mills (critical) foreach (var edu in educationResults.Where(e => e.IsDiplomaMill)) { score -= DiplomaMillPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Critical.ToString(), Title = "Diploma Mill Detected", Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}", ScoreImpact = -DiplomaMillPenalty }); } // Penalty for suspicious institutions foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill)) { score -= SuspiciousInstitutionPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Suspicious Institution", Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}", ScoreImpact = -SuspiciousInstitutionPenalty }); } // Penalty for unverified education (not recognised, but not flagged as fake) foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown")) { score -= UnverifiedEducationPenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Info.ToString(), Title = "Unverified Institution", Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}", ScoreImpact = -UnverifiedEducationPenalty }); } // Penalty for implausible education dates foreach (var edu in educationResults.Where(e => !e.DatesArePlausible)) { score -= EducationDatePenalty; flags.Add(new FlagResult { Category = FlagCategory.Education.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Education Date Issues", Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}", ScoreImpact = -EducationDatePenalty }); } // Penalty for gaps (max -10 per gap) foreach (var gap in timeline.Gaps) { var gapPenalty = Math.Min(gap.Months * GapMonthPenalty, MaxGapPenalty); score -= gapPenalty; var severity = gap.Months >= 6 ? FlagSeverity.Warning : FlagSeverity.Info; flags.Add(new FlagResult { Category = FlagCategory.Timeline.ToString(), Severity = severity.ToString(), Title = "Employment Gap", Description = $"{gap.Months} month gap in employment from {gap.StartDate:MMM yyyy} to {gap.EndDate:MMM yyyy}", ScoreImpact = -gapPenalty }); } // Penalty for overlaps (only if > 2 months) foreach (var overlap in timeline.Overlaps) { var excessMonths = Math.Max(0, overlap.Months - 2); // Allow 2 month transition, prevent negative var overlapPenalty = excessMonths * OverlapMonthPenalty; score -= overlapPenalty; var severity = overlap.Months >= 6 ? FlagSeverity.Critical : FlagSeverity.Warning; flags.Add(new FlagResult { Category = FlagCategory.Timeline.ToString(), Severity = severity.ToString(), Title = "Employment Overlap", Description = $"{overlap.Months} month overlap between '{overlap.Company1}' and '{overlap.Company2}' ({overlap.OverlapStart:MMM yyyy} to {overlap.OverlapEnd:MMM yyyy})", ScoreImpact = -overlapPenalty }); } // Deduplicate flags based on Title + Description var uniqueFlags = flags .GroupBy(f => (f.Title, f.Description)) .Select(g => g.First()) .ToList(); // Recalculate score based on unique flags var uniqueScore = BaseScore + uniqueFlags.Sum(f => f.ScoreImpact); // Ensure score doesn't go below 0 uniqueScore = Math.Max(0, uniqueScore); return (uniqueScore, uniqueFlags); } private static string GetScoreLabel(int score) { return score switch { >= 90 => "Excellent", >= 75 => "Good", >= 60 => "Fair", >= 40 => "Poor", _ => "Very Poor" }; } private static bool IsFreelance(string companyName) { if (string.IsNullOrWhiteSpace(companyName)) return false; var name = companyName.Trim().ToLowerInvariant(); return name == "freelance" || name == "freelancer" || name == "self-employed" || name == "self employed" || name.StartsWith("freelance ") || name.StartsWith("self-employed ") || name.Contains("(freelance)") || name.Contains("(self-employed)"); } private async Task VerifyDirectorClaims( string candidateName, List verificationResults, CancellationToken cancellationToken) { // Find all director claims at verified companies - use ToList() to avoid modifying during enumeration var directorCandidates = verificationResults .Select((result, index) => (result, index)) .Where(x => x.result.IsVerified && !string.IsNullOrEmpty(x.result.MatchedCompanyNumber)) .ToList(); foreach (var (result, index) in directorCandidates) { var jobTitle = result.ClaimedJobTitle?.ToLowerInvariant() ?? ""; // Check if this is a director claim var isDirectorClaim = jobTitle.Contains("director") || jobTitle.Contains("company secretary") || jobTitle == "md" || jobTitle.Contains("managing director"); if (!isDirectorClaim) continue; _logger.LogDebug( "Verifying director claim for {Candidate} at {Company}", candidateName, result.MatchedCompanyName); var isVerifiedDirector = await _companyVerifierService.VerifyDirectorAsync( result.MatchedCompanyNumber!, candidateName, result.ClaimedStartDate, result.ClaimedEndDate); if (isVerifiedDirector == false) { // Add a flag for unverified director claim var flags = (result.Flags ?? []).ToList(); flags.Add(new CompanyVerificationFlag { Type = "UnverifiedDirectorClaim", Severity = "Critical", Message = $"Claimed director role at '{result.MatchedCompanyName}' but candidate name not found in Companies House officers list", ScoreImpact = -20 }); // Update the result with the new flag verificationResults[index] = result with { Flags = flags }; _logger.LogWarning( "Director claim not verified for {Candidate} at {Company}", candidateName, result.MatchedCompanyName); } else if (isVerifiedDirector == true) { _logger.LogInformation( "Director claim verified for {Candidate} at {Company}", candidateName, result.MatchedCompanyName); } } } private static void CheckRapidCareerProgression( List employment, List flags, ref int score) { // Group employment by company and check for rapid promotions var byCompany = employment .Where(e => !string.IsNullOrWhiteSpace(e.CompanyName) && e.StartDate.HasValue) .GroupBy(e => e.CompanyName.ToLowerInvariant()) .Where(g => g.Count() > 1); foreach (var companyGroup in byCompany) { var orderedRoles = companyGroup.OrderBy(e => e.StartDate).ToList(); for (int i = 1; i < orderedRoles.Count; i++) { var prevRole = orderedRoles[i - 1]; var currRole = orderedRoles[i]; var prevSeniority = GetSeniorityLevel(prevRole.JobTitle); var currSeniority = GetSeniorityLevel(currRole.JobTitle); // Check for jump of 3+ seniority levels var seniorityJump = currSeniority - prevSeniority; if (seniorityJump >= 3) { // Calculate time between roles var monthsBetween = ((currRole.StartDate!.Value.Year - prevRole.StartDate!.Value.Year) * 12) + (currRole.StartDate!.Value.Month - prevRole.StartDate!.Value.Month); // If jumped 3+ levels in less than 2 years, flag it if (monthsBetween < 24) { score -= RapidProgressionPenalty; flags.Add(new FlagResult { Category = FlagCategory.Employment.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Rapid Career Progression", Description = $"Promoted from '{prevRole.JobTitle}' to '{currRole.JobTitle}' at '{companyGroup.First().CompanyName}' in {monthsBetween} months - unusually fast progression", ScoreImpact = -RapidProgressionPenalty }); } } } } } private static void CheckEarlyCareerSeniorRoles( List employment, List education, List flags, ref int score) { // Find the latest education end date to estimate career start var latestEducationEnd = education .Where(e => e.EndDate.HasValue) .Select(e => e.EndDate!.Value) .DefaultIfEmpty(DateOnly.MinValue) .Max(); if (latestEducationEnd == DateOnly.MinValue) { // No education dates available, skip check return; } foreach (var emp in employment.Where(e => e.StartDate.HasValue)) { var monthsAfterEducation = ((emp.StartDate!.Value.Year - latestEducationEnd.Year) * 12) + (emp.StartDate!.Value.Month - latestEducationEnd.Month); // Check if this is a senior role started within 2 years of finishing education if (monthsAfterEducation < 24 && monthsAfterEducation >= 0) { var seniority = GetSeniorityLevel(emp.JobTitle); // Flag if they're claiming a senior role (level 4+) very early in career if (seniority >= 4) { score -= EarlyCareerSeniorRolePenalty; flags.Add(new FlagResult { Category = FlagCategory.Employment.ToString(), Severity = FlagSeverity.Warning.ToString(), Title = "Early Career Senior Role", Description = $"Claimed senior role '{emp.JobTitle}' at '{emp.CompanyName}' only {monthsAfterEducation} months after completing education", ScoreImpact = -EarlyCareerSeniorRolePenalty }); } } } } private static int GetSeniorityLevel(string? jobTitle) { if (string.IsNullOrWhiteSpace(jobTitle)) return 0; var title = jobTitle.ToLowerInvariant(); // Level 6: C-suite if (title.Contains("ceo") || title.Contains("cto") || title.Contains("cfo") || title.Contains("coo") || title.Contains("cio") || title.Contains("chief") || title.Contains("managing director") || title == "md" || title.Contains("president") || title.Contains("chairman") || title.Contains("chairwoman") || title.Contains("chairperson")) { return 6; } // Level 5: VP / Executive if (title.Contains("vice president") || title.Contains("vp ") || title.StartsWith("vp") || title.Contains("svp") || title.Contains("executive director") || title.Contains("executive vice")) { return 5; } // Level 4: Director / Head if (title.Contains("director") || title.Contains("head of")) { return 4; } // Level 3: Senior / Lead / Principal / Manager if (title.Contains("senior") || title.Contains("lead") || title.Contains("principal") || title.Contains("manager") || title.Contains("team lead") || title.Contains("staff")) { return 3; } // Level 2: Mid-level (no junior, no senior) if (!title.Contains("junior") && !title.Contains("trainee") && !title.Contains("intern") && !title.Contains("graduate") && !title.Contains("entry") && !title.Contains("assistant")) { return 2; } // Level 1: Junior / Entry-level return 1; } }