Files
RealCV/src/TrueCV.Infrastructure/Jobs/ProcessCVCheckJob.cs

332 lines
13 KiB
C#
Raw Normal View History

using System.Text.Json;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
using TrueCV.Domain.Enums;
using TrueCV.Infrastructure.Data;
namespace TrueCV.Infrastructure.Jobs;
public sealed class ProcessCVCheckJob
{
private readonly ApplicationDbContext _dbContext;
private readonly IFileStorageService _fileStorageService;
private readonly ICVParserService _cvParserService;
private readonly ICompanyVerifierService _companyVerifierService;
private readonly IEducationVerifierService _educationVerifierService;
private readonly ITimelineAnalyserService _timelineAnalyserService;
private readonly ILogger<ProcessCVCheckJob> _logger;
private const int BaseScore = 100;
private const int UnverifiedCompanyPenalty = 10;
private const int GapMonthPenalty = 1;
private const int MaxGapPenalty = 10;
private const int OverlapMonthPenalty = 2;
private const int DiplomaMillPenalty = 25;
private const int SuspiciousInstitutionPenalty = 15;
private const int UnverifiedEducationPenalty = 5;
private const int EducationDatePenalty = 10;
public ProcessCVCheckJob(
ApplicationDbContext dbContext,
IFileStorageService fileStorageService,
ICVParserService cvParserService,
ICompanyVerifierService companyVerifierService,
IEducationVerifierService educationVerifierService,
ITimelineAnalyserService timelineAnalyserService,
ILogger<ProcessCVCheckJob> logger)
{
_dbContext = dbContext;
_fileStorageService = fileStorageService;
_cvParserService = cvParserService;
_companyVerifierService = companyVerifierService;
_educationVerifierService = educationVerifierService;
_timelineAnalyserService = timelineAnalyserService;
_logger = logger;
}
public async Task ExecuteAsync(Guid cvCheckId, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting CV check processing for: {CheckId}", cvCheckId);
var cvCheck = await _dbContext.CVChecks
.FirstOrDefaultAsync(c => c.Id == cvCheckId, cancellationToken);
if (cvCheck is null)
{
_logger.LogError("CV check not found: {CheckId}", cvCheckId);
return;
}
try
{
// Step 1: Update status to Processing
cvCheck.Status = CheckStatus.Processing;
await _dbContext.SaveChangesAsync(cancellationToken);
_logger.LogDebug("CV check {CheckId} status updated to Processing", cvCheckId);
// Step 2: Download file from blob
await using var fileStream = await _fileStorageService.DownloadAsync(cvCheck.BlobUrl);
_logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId);
// Step 3: Parse CV
var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken);
_logger.LogDebug(
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
cvCheckId, cvData.Employment.Count);
// Step 4: Save extracted data
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
await _dbContext.SaveChangesAsync(cancellationToken);
// Step 5: Verify each employment entry (parallelized with rate limiting)
var verificationTasks = cvData.Employment.Select(async employment =>
{
var result = await _companyVerifierService.VerifyCompanyAsync(
employment.CompanyName,
employment.StartDate,
employment.EndDate);
_logger.LogDebug(
"Verified {Company}: {IsVerified} (Score: {Score}%)",
employment.CompanyName, result.IsVerified, result.MatchScore);
return result;
});
var verificationResults = (await Task.WhenAll(verificationTasks)).ToList();
// Step 6: Verify education entries
var educationResults = _educationVerifierService.VerifyAll(
cvData.Education,
cvData.Employment);
_logger.LogDebug(
"Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)",
cvCheckId,
educationResults.Count,
educationResults.Count(e => e.IsVerified),
educationResults.Count(e => e.IsDiplomaMill));
// Step 7: Analyse timeline
var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment);
_logger.LogDebug(
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
// Step 8: Calculate veracity score
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis);
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
// Step 9: Create CVFlag records
foreach (var flag in flags)
{
if (!Enum.TryParse<FlagCategory>(flag.Category, out var category))
{
_logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category);
category = FlagCategory.Timeline;
}
if (!Enum.TryParse<FlagSeverity>(flag.Severity, out var severity))
{
_logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity);
severity = FlagSeverity.Info;
}
var cvFlag = new CVFlag
{
Id = Guid.NewGuid(),
CVCheckId = cvCheckId,
Category = category,
Severity = severity,
Title = flag.Title,
Description = flag.Description,
ScoreImpact = flag.ScoreImpact
};
_dbContext.CVFlags.Add(cvFlag);
}
// Step 10: Generate veracity report
var report = new VeracityReport
{
OverallScore = score,
ScoreLabel = GetScoreLabel(score),
EmploymentVerifications = verificationResults,
EducationVerifications = educationResults,
TimelineAnalysis = timelineAnalysis,
Flags = flags,
GeneratedAt = DateTime.UtcNow
};
cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented);
cvCheck.VeracityScore = score;
// Step 11: Update status to Completed
cvCheck.Status = CheckStatus.Completed;
cvCheck.CompletedAt = DateTime.UtcNow;
await _dbContext.SaveChangesAsync(cancellationToken);
_logger.LogInformation(
"CV check {CheckId} completed successfully with score {Score}",
cvCheckId, score);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId);
cvCheck.Status = CheckStatus.Failed;
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
await _dbContext.SaveChangesAsync(CancellationToken.None);
throw;
}
}
private static (int Score, List<FlagResult> Flags) CalculateVeracityScore(
List<CompanyVerificationResult> verifications,
List<EducationVerificationResult> educationResults,
TimelineAnalysisResult timeline)
{
var score = BaseScore;
var flags = new List<FlagResult>();
// Penalty for unverified companies
foreach (var verification in verifications.Where(v => !v.IsVerified))
{
score -= UnverifiedCompanyPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Unverified Company",
Description = $"Could not verify employment at '{verification.ClaimedCompany}'. {verification.VerificationNotes}",
ScoreImpact = -UnverifiedCompanyPenalty
});
}
// Penalty for diploma mills (critical)
foreach (var edu in educationResults.Where(e => e.IsDiplomaMill))
{
score -= DiplomaMillPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Critical.ToString(),
Title = "Diploma Mill Detected",
Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}",
ScoreImpact = -DiplomaMillPenalty
});
}
// Penalty for suspicious institutions
foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill))
{
score -= SuspiciousInstitutionPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Suspicious Institution",
Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}",
ScoreImpact = -SuspiciousInstitutionPenalty
});
}
// Penalty for unverified education (not recognised, but not flagged as fake)
foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown"))
{
score -= UnverifiedEducationPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Unverified Institution",
Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}",
ScoreImpact = -UnverifiedEducationPenalty
});
}
// Penalty for implausible education dates
foreach (var edu in educationResults.Where(e => !e.DatesArePlausible))
{
score -= EducationDatePenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Education Date Issues",
Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}",
ScoreImpact = -EducationDatePenalty
});
}
// Penalty for gaps (max -10 per gap)
foreach (var gap in timeline.Gaps)
{
var gapPenalty = Math.Min(gap.Months * GapMonthPenalty, MaxGapPenalty);
score -= gapPenalty;
var severity = gap.Months >= 6 ? FlagSeverity.Warning : FlagSeverity.Info;
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = severity.ToString(),
Title = "Employment Gap",
Description = $"{gap.Months} month gap in employment from {gap.StartDate:MMM yyyy} to {gap.EndDate:MMM yyyy}",
ScoreImpact = -gapPenalty
});
}
// Penalty for overlaps (only if > 2 months)
foreach (var overlap in timeline.Overlaps)
{
var excessMonths = Math.Max(0, overlap.Months - 2); // Allow 2 month transition, prevent negative
var overlapPenalty = excessMonths * OverlapMonthPenalty;
score -= overlapPenalty;
var severity = overlap.Months >= 6 ? FlagSeverity.Critical : FlagSeverity.Warning;
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = severity.ToString(),
Title = "Employment Overlap",
Description = $"{overlap.Months} month overlap between '{overlap.Company1}' and '{overlap.Company2}' ({overlap.OverlapStart:MMM yyyy} to {overlap.OverlapEnd:MMM yyyy})",
ScoreImpact = -overlapPenalty
});
}
// Ensure score doesn't go below 0
score = Math.Max(0, score);
return (score, flags);
}
private static string GetScoreLabel(int score)
{
return score switch
{
>= 90 => "Excellent",
>= 75 => "Good",
>= 60 => "Fair",
>= 40 => "Poor",
_ => "Very Poor"
};
}
}