2026-01-18 19:20:50 +01:00
using System.Text.Json ;
using Microsoft.EntityFrameworkCore ;
using Microsoft.Extensions.Logging ;
2026-01-20 16:45:43 +01:00
using TrueCV.Application.Helpers ;
2026-01-18 19:20:50 +01:00
using TrueCV.Application.Interfaces ;
using TrueCV.Application.Models ;
using TrueCV.Domain.Entities ;
using TrueCV.Domain.Enums ;
using TrueCV.Infrastructure.Data ;
namespace TrueCV.Infrastructure.Jobs ;
public sealed class ProcessCVCheckJob
{
private readonly ApplicationDbContext _dbContext ;
private readonly IFileStorageService _fileStorageService ;
private readonly ICVParserService _cvParserService ;
private readonly ICompanyVerifierService _companyVerifierService ;
2026-01-20 16:45:43 +01:00
private readonly IEducationVerifierService _educationVerifierService ;
2026-01-18 19:20:50 +01:00
private readonly ITimelineAnalyserService _timelineAnalyserService ;
private readonly ILogger < ProcessCVCheckJob > _logger ;
private const int BaseScore = 100 ;
private const int UnverifiedCompanyPenalty = 10 ;
private const int GapMonthPenalty = 1 ;
private const int MaxGapPenalty = 10 ;
private const int OverlapMonthPenalty = 2 ;
2026-01-20 16:45:43 +01:00
private const int DiplomaMillPenalty = 25 ;
private const int SuspiciousInstitutionPenalty = 15 ;
private const int UnverifiedEducationPenalty = 5 ;
private const int EducationDatePenalty = 10 ;
2026-01-18 19:20:50 +01:00
public ProcessCVCheckJob (
ApplicationDbContext dbContext ,
IFileStorageService fileStorageService ,
ICVParserService cvParserService ,
ICompanyVerifierService companyVerifierService ,
2026-01-20 16:45:43 +01:00
IEducationVerifierService educationVerifierService ,
2026-01-18 19:20:50 +01:00
ITimelineAnalyserService timelineAnalyserService ,
ILogger < ProcessCVCheckJob > logger )
{
_dbContext = dbContext ;
_fileStorageService = fileStorageService ;
_cvParserService = cvParserService ;
_companyVerifierService = companyVerifierService ;
2026-01-20 16:45:43 +01:00
_educationVerifierService = educationVerifierService ;
2026-01-18 19:20:50 +01:00
_timelineAnalyserService = timelineAnalyserService ;
_logger = logger ;
}
public async Task ExecuteAsync ( Guid cvCheckId , CancellationToken cancellationToken )
{
_logger . LogInformation ( "Starting CV check processing for: {CheckId}" , cvCheckId ) ;
var cvCheck = await _dbContext . CVChecks
. FirstOrDefaultAsync ( c = > c . Id = = cvCheckId , cancellationToken ) ;
if ( cvCheck is null )
{
_logger . LogError ( "CV check not found: {CheckId}" , cvCheckId ) ;
return ;
}
try
{
// Step 1: Update status to Processing
cvCheck . Status = CheckStatus . Processing ;
await _dbContext . SaveChangesAsync ( cancellationToken ) ;
_logger . LogDebug ( "CV check {CheckId} status updated to Processing" , cvCheckId ) ;
// Step 2: Download file from blob
await using var fileStream = await _fileStorageService . DownloadAsync ( cvCheck . BlobUrl ) ;
_logger . LogDebug ( "Downloaded CV file for check {CheckId}" , cvCheckId ) ;
// Step 3: Parse CV
2026-01-20 16:45:43 +01:00
var cvData = await _cvParserService . ParseAsync ( fileStream , cvCheck . OriginalFileName , cancellationToken ) ;
2026-01-18 19:20:50 +01:00
_logger . LogDebug (
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries" ,
cvCheckId , cvData . Employment . Count ) ;
// Step 4: Save extracted data
2026-01-20 16:45:43 +01:00
cvCheck . ExtractedDataJson = JsonSerializer . Serialize ( cvData , JsonDefaults . CamelCaseIndented ) ;
2026-01-18 19:20:50 +01:00
await _dbContext . SaveChangesAsync ( cancellationToken ) ;
2026-01-20 16:45:43 +01:00
// Step 5: Verify each employment entry (parallelized with rate limiting)
var verificationTasks = cvData . Employment . Select ( async employment = >
2026-01-18 19:20:50 +01:00
{
var result = await _companyVerifierService . VerifyCompanyAsync (
employment . CompanyName ,
employment . StartDate ,
employment . EndDate ) ;
_logger . LogDebug (
"Verified {Company}: {IsVerified} (Score: {Score}%)" ,
employment . CompanyName , result . IsVerified , result . MatchScore ) ;
2026-01-20 16:45:43 +01:00
return result ;
} ) ;
var verificationResults = ( await Task . WhenAll ( verificationTasks ) ) . ToList ( ) ;
// Step 6: Verify education entries
var educationResults = _educationVerifierService . VerifyAll (
cvData . Education ,
cvData . Employment ) ;
_logger . LogDebug (
"Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)" ,
cvCheckId ,
educationResults . Count ,
educationResults . Count ( e = > e . IsVerified ) ,
educationResults . Count ( e = > e . IsDiplomaMill ) ) ;
// Step 7: Analyse timeline
2026-01-18 19:20:50 +01:00
var timelineAnalysis = _timelineAnalyserService . Analyse ( cvData . Employment ) ;
_logger . LogDebug (
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps" ,
cvCheckId , timelineAnalysis . Gaps . Count , timelineAnalysis . Overlaps . Count ) ;
2026-01-20 16:45:43 +01:00
// Step 8: Calculate veracity score
var ( score , flags ) = CalculateVeracityScore ( verificationResults , educationResults , timelineAnalysis ) ;
2026-01-18 19:20:50 +01:00
_logger . LogDebug ( "Calculated veracity score for check {CheckId}: {Score}" , cvCheckId , score ) ;
2026-01-20 16:45:43 +01:00
// Step 9: Create CVFlag records
2026-01-18 19:20:50 +01:00
foreach ( var flag in flags )
{
2026-01-20 16:45:43 +01:00
if ( ! Enum . TryParse < FlagCategory > ( flag . Category , out var category ) )
{
_logger . LogWarning ( "Unknown flag category: {Category}, defaulting to Timeline" , flag . Category ) ;
category = FlagCategory . Timeline ;
}
if ( ! Enum . TryParse < FlagSeverity > ( flag . Severity , out var severity ) )
{
_logger . LogWarning ( "Unknown flag severity: {Severity}, defaulting to Info" , flag . Severity ) ;
severity = FlagSeverity . Info ;
}
2026-01-18 19:20:50 +01:00
var cvFlag = new CVFlag
{
Id = Guid . NewGuid ( ) ,
CVCheckId = cvCheckId ,
2026-01-20 16:45:43 +01:00
Category = category ,
Severity = severity ,
2026-01-18 19:20:50 +01:00
Title = flag . Title ,
Description = flag . Description ,
ScoreImpact = flag . ScoreImpact
} ;
_dbContext . CVFlags . Add ( cvFlag ) ;
}
2026-01-20 16:45:43 +01:00
// Step 10: Generate veracity report
2026-01-18 19:20:50 +01:00
var report = new VeracityReport
{
OverallScore = score ,
ScoreLabel = GetScoreLabel ( score ) ,
EmploymentVerifications = verificationResults ,
2026-01-20 16:45:43 +01:00
EducationVerifications = educationResults ,
2026-01-18 19:20:50 +01:00
TimelineAnalysis = timelineAnalysis ,
Flags = flags ,
GeneratedAt = DateTime . UtcNow
} ;
2026-01-20 16:45:43 +01:00
cvCheck . ReportJson = JsonSerializer . Serialize ( report , JsonDefaults . CamelCaseIndented ) ;
2026-01-18 19:20:50 +01:00
cvCheck . VeracityScore = score ;
2026-01-20 16:45:43 +01:00
// Step 11: Update status to Completed
2026-01-18 19:20:50 +01:00
cvCheck . Status = CheckStatus . Completed ;
cvCheck . CompletedAt = DateTime . UtcNow ;
await _dbContext . SaveChangesAsync ( cancellationToken ) ;
_logger . LogInformation (
"CV check {CheckId} completed successfully with score {Score}" ,
cvCheckId , score ) ;
}
catch ( Exception ex )
{
_logger . LogError ( ex , "Error processing CV check {CheckId}" , cvCheckId ) ;
cvCheck . Status = CheckStatus . Failed ;
2026-01-20 16:45:43 +01:00
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
await _dbContext . SaveChangesAsync ( CancellationToken . None ) ;
2026-01-18 19:20:50 +01:00
throw ;
}
}
private static ( int Score , List < FlagResult > Flags ) CalculateVeracityScore (
List < CompanyVerificationResult > verifications ,
2026-01-20 16:45:43 +01:00
List < EducationVerificationResult > educationResults ,
2026-01-18 19:20:50 +01:00
TimelineAnalysisResult timeline )
{
var score = BaseScore ;
var flags = new List < FlagResult > ( ) ;
// Penalty for unverified companies
foreach ( var verification in verifications . Where ( v = > ! v . IsVerified ) )
{
score - = UnverifiedCompanyPenalty ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Employment . ToString ( ) ,
Severity = FlagSeverity . Warning . ToString ( ) ,
Title = "Unverified Company" ,
Description = $"Could not verify employment at '{verification.ClaimedCompany}'. {verification.VerificationNotes}" ,
ScoreImpact = - UnverifiedCompanyPenalty
} ) ;
}
2026-01-20 16:45:43 +01:00
// Penalty for diploma mills (critical)
foreach ( var edu in educationResults . Where ( e = > e . IsDiplomaMill ) )
{
score - = DiplomaMillPenalty ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Education . ToString ( ) ,
Severity = FlagSeverity . Critical . ToString ( ) ,
Title = "Diploma Mill Detected" ,
Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}" ,
ScoreImpact = - DiplomaMillPenalty
} ) ;
}
// Penalty for suspicious institutions
foreach ( var edu in educationResults . Where ( e = > e . IsSuspicious & & ! e . IsDiplomaMill ) )
{
score - = SuspiciousInstitutionPenalty ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Education . ToString ( ) ,
Severity = FlagSeverity . Warning . ToString ( ) ,
Title = "Suspicious Institution" ,
Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}" ,
ScoreImpact = - SuspiciousInstitutionPenalty
} ) ;
}
// Penalty for unverified education (not recognised, but not flagged as fake)
foreach ( var edu in educationResults . Where ( e = > ! e . IsVerified & & ! e . IsDiplomaMill & & ! e . IsSuspicious & & e . Status = = "Unknown" ) )
{
score - = UnverifiedEducationPenalty ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Education . ToString ( ) ,
Severity = FlagSeverity . Info . ToString ( ) ,
Title = "Unverified Institution" ,
Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}" ,
ScoreImpact = - UnverifiedEducationPenalty
} ) ;
}
// Penalty for implausible education dates
foreach ( var edu in educationResults . Where ( e = > ! e . DatesArePlausible ) )
{
score - = EducationDatePenalty ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Education . ToString ( ) ,
Severity = FlagSeverity . Warning . ToString ( ) ,
Title = "Education Date Issues" ,
Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}" ,
ScoreImpact = - EducationDatePenalty
} ) ;
}
2026-01-18 19:20:50 +01:00
// Penalty for gaps (max -10 per gap)
foreach ( var gap in timeline . Gaps )
{
var gapPenalty = Math . Min ( gap . Months * GapMonthPenalty , MaxGapPenalty ) ;
score - = gapPenalty ;
var severity = gap . Months > = 6 ? FlagSeverity . Warning : FlagSeverity . Info ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Timeline . ToString ( ) ,
Severity = severity . ToString ( ) ,
Title = "Employment Gap" ,
Description = $"{gap.Months} month gap in employment from {gap.StartDate:MMM yyyy} to {gap.EndDate:MMM yyyy}" ,
ScoreImpact = - gapPenalty
} ) ;
}
// Penalty for overlaps (only if > 2 months)
foreach ( var overlap in timeline . Overlaps )
{
2026-01-20 16:45:43 +01:00
var excessMonths = Math . Max ( 0 , overlap . Months - 2 ) ; // Allow 2 month transition, prevent negative
2026-01-18 19:20:50 +01:00
var overlapPenalty = excessMonths * OverlapMonthPenalty ;
score - = overlapPenalty ;
var severity = overlap . Months > = 6 ? FlagSeverity . Critical : FlagSeverity . Warning ;
flags . Add ( new FlagResult
{
Category = FlagCategory . Timeline . ToString ( ) ,
Severity = severity . ToString ( ) ,
Title = "Employment Overlap" ,
Description = $"{overlap.Months} month overlap between '{overlap.Company1}' and '{overlap.Company2}' ({overlap.OverlapStart:MMM yyyy} to {overlap.OverlapEnd:MMM yyyy})" ,
ScoreImpact = - overlapPenalty
} ) ;
}
// Ensure score doesn't go below 0
score = Math . Max ( 0 , score ) ;
return ( score , flags ) ;
}
private static string GetScoreLabel ( int score )
{
return score switch
{
> = 90 = > "Excellent" ,
> = 75 = > "Good" ,
> = 60 = > "Fair" ,
> = 40 = > "Poor" ,
_ = > "Very Poor"
} ;
}
}