Add UK education verification and security fixes
Features: - Add UK institution recognition (170+ universities) - Add diploma mill detection (100+ blacklisted institutions) - Add education verification service with date plausibility checks - Add local file storage option (no Azure required) - Add default admin user seeding on startup - Enhance Serilog logging with file output Security fixes: - Fix path traversal vulnerability in LocalFileStorageService - Fix open redirect in login endpoint (use LocalRedirect) - Fix password validation message (12 chars, not 6) - Fix login to use HTTP POST endpoint (avoid Blazor cookie issues) Code improvements: - Add CancellationToken propagation to CV parser - Add shared helpers (JsonDefaults, DateHelpers, ScoreThresholds) - Add IUserContextService for user ID extraction - Parallelized company verification in ProcessCVCheckJob - Add 28 unit tests for education verification Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using TrueCV.Application.Helpers;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Domain.Entities;
|
||||
@@ -15,26 +16,26 @@ public sealed class ProcessCVCheckJob
|
||||
private readonly IFileStorageService _fileStorageService;
|
||||
private readonly ICVParserService _cvParserService;
|
||||
private readonly ICompanyVerifierService _companyVerifierService;
|
||||
private readonly IEducationVerifierService _educationVerifierService;
|
||||
private readonly ITimelineAnalyserService _timelineAnalyserService;
|
||||
private readonly ILogger<ProcessCVCheckJob> _logger;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = true
|
||||
};
|
||||
|
||||
private const int BaseScore = 100;
|
||||
private const int UnverifiedCompanyPenalty = 10;
|
||||
private const int GapMonthPenalty = 1;
|
||||
private const int MaxGapPenalty = 10;
|
||||
private const int OverlapMonthPenalty = 2;
|
||||
private const int DiplomaMillPenalty = 25;
|
||||
private const int SuspiciousInstitutionPenalty = 15;
|
||||
private const int UnverifiedEducationPenalty = 5;
|
||||
private const int EducationDatePenalty = 10;
|
||||
|
||||
public ProcessCVCheckJob(
|
||||
ApplicationDbContext dbContext,
|
||||
IFileStorageService fileStorageService,
|
||||
ICVParserService cvParserService,
|
||||
ICompanyVerifierService companyVerifierService,
|
||||
IEducationVerifierService educationVerifierService,
|
||||
ITimelineAnalyserService timelineAnalyserService,
|
||||
ILogger<ProcessCVCheckJob> logger)
|
||||
{
|
||||
@@ -42,6 +43,7 @@ public sealed class ProcessCVCheckJob
|
||||
_fileStorageService = fileStorageService;
|
||||
_cvParserService = cvParserService;
|
||||
_companyVerifierService = companyVerifierService;
|
||||
_educationVerifierService = educationVerifierService;
|
||||
_timelineAnalyserService = timelineAnalyserService;
|
||||
_logger = logger;
|
||||
}
|
||||
@@ -73,53 +75,78 @@ public sealed class ProcessCVCheckJob
|
||||
_logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId);
|
||||
|
||||
// Step 3: Parse CV
|
||||
var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName);
|
||||
var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
|
||||
cvCheckId, cvData.Employment.Count);
|
||||
|
||||
// Step 4: Save extracted data
|
||||
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonOptions);
|
||||
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// Step 5: Verify each employment entry
|
||||
var verificationResults = new List<CompanyVerificationResult>();
|
||||
foreach (var employment in cvData.Employment)
|
||||
// Step 5: Verify each employment entry (parallelized with rate limiting)
|
||||
var verificationTasks = cvData.Employment.Select(async employment =>
|
||||
{
|
||||
var result = await _companyVerifierService.VerifyCompanyAsync(
|
||||
employment.CompanyName,
|
||||
employment.StartDate,
|
||||
employment.EndDate);
|
||||
|
||||
verificationResults.Add(result);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Verified {Company}: {IsVerified} (Score: {Score}%)",
|
||||
employment.CompanyName, result.IsVerified, result.MatchScore);
|
||||
}
|
||||
|
||||
// Step 6: Analyse timeline
|
||||
return result;
|
||||
});
|
||||
|
||||
var verificationResults = (await Task.WhenAll(verificationTasks)).ToList();
|
||||
|
||||
// Step 6: Verify education entries
|
||||
var educationResults = _educationVerifierService.VerifyAll(
|
||||
cvData.Education,
|
||||
cvData.Employment);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)",
|
||||
cvCheckId,
|
||||
educationResults.Count,
|
||||
educationResults.Count(e => e.IsVerified),
|
||||
educationResults.Count(e => e.IsDiplomaMill));
|
||||
|
||||
// Step 7: Analyse timeline
|
||||
var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
|
||||
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
|
||||
|
||||
// Step 7: Calculate veracity score
|
||||
var (score, flags) = CalculateVeracityScore(verificationResults, timelineAnalysis);
|
||||
// Step 8: Calculate veracity score
|
||||
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis);
|
||||
|
||||
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
|
||||
|
||||
// Step 8: Create CVFlag records
|
||||
// Step 9: Create CVFlag records
|
||||
foreach (var flag in flags)
|
||||
{
|
||||
if (!Enum.TryParse<FlagCategory>(flag.Category, out var category))
|
||||
{
|
||||
_logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category);
|
||||
category = FlagCategory.Timeline;
|
||||
}
|
||||
|
||||
if (!Enum.TryParse<FlagSeverity>(flag.Severity, out var severity))
|
||||
{
|
||||
_logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity);
|
||||
severity = FlagSeverity.Info;
|
||||
}
|
||||
|
||||
var cvFlag = new CVFlag
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
CVCheckId = cvCheckId,
|
||||
Category = Enum.Parse<FlagCategory>(flag.Category),
|
||||
Severity = Enum.Parse<FlagSeverity>(flag.Severity),
|
||||
Category = category,
|
||||
Severity = severity,
|
||||
Title = flag.Title,
|
||||
Description = flag.Description,
|
||||
ScoreImpact = flag.ScoreImpact
|
||||
@@ -128,21 +155,22 @@ public sealed class ProcessCVCheckJob
|
||||
_dbContext.CVFlags.Add(cvFlag);
|
||||
}
|
||||
|
||||
// Step 9: Generate veracity report
|
||||
// Step 10: Generate veracity report
|
||||
var report = new VeracityReport
|
||||
{
|
||||
OverallScore = score,
|
||||
ScoreLabel = GetScoreLabel(score),
|
||||
EmploymentVerifications = verificationResults,
|
||||
EducationVerifications = educationResults,
|
||||
TimelineAnalysis = timelineAnalysis,
|
||||
Flags = flags,
|
||||
GeneratedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonOptions);
|
||||
cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented);
|
||||
cvCheck.VeracityScore = score;
|
||||
|
||||
// Step 10: Update status to Completed
|
||||
// Step 11: Update status to Completed
|
||||
cvCheck.Status = CheckStatus.Completed;
|
||||
cvCheck.CompletedAt = DateTime.UtcNow;
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
@@ -156,7 +184,8 @@ public sealed class ProcessCVCheckJob
|
||||
_logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId);
|
||||
|
||||
cvCheck.Status = CheckStatus.Failed;
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
|
||||
await _dbContext.SaveChangesAsync(CancellationToken.None);
|
||||
|
||||
throw;
|
||||
}
|
||||
@@ -164,6 +193,7 @@ public sealed class ProcessCVCheckJob
|
||||
|
||||
private static (int Score, List<FlagResult> Flags) CalculateVeracityScore(
|
||||
List<CompanyVerificationResult> verifications,
|
||||
List<EducationVerificationResult> educationResults,
|
||||
TimelineAnalysisResult timeline)
|
||||
{
|
||||
var score = BaseScore;
|
||||
@@ -184,6 +214,66 @@ public sealed class ProcessCVCheckJob
|
||||
});
|
||||
}
|
||||
|
||||
// Penalty for diploma mills (critical)
|
||||
foreach (var edu in educationResults.Where(e => e.IsDiplomaMill))
|
||||
{
|
||||
score -= DiplomaMillPenalty;
|
||||
|
||||
flags.Add(new FlagResult
|
||||
{
|
||||
Category = FlagCategory.Education.ToString(),
|
||||
Severity = FlagSeverity.Critical.ToString(),
|
||||
Title = "Diploma Mill Detected",
|
||||
Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}",
|
||||
ScoreImpact = -DiplomaMillPenalty
|
||||
});
|
||||
}
|
||||
|
||||
// Penalty for suspicious institutions
|
||||
foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill))
|
||||
{
|
||||
score -= SuspiciousInstitutionPenalty;
|
||||
|
||||
flags.Add(new FlagResult
|
||||
{
|
||||
Category = FlagCategory.Education.ToString(),
|
||||
Severity = FlagSeverity.Warning.ToString(),
|
||||
Title = "Suspicious Institution",
|
||||
Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}",
|
||||
ScoreImpact = -SuspiciousInstitutionPenalty
|
||||
});
|
||||
}
|
||||
|
||||
// Penalty for unverified education (not recognised, but not flagged as fake)
|
||||
foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown"))
|
||||
{
|
||||
score -= UnverifiedEducationPenalty;
|
||||
|
||||
flags.Add(new FlagResult
|
||||
{
|
||||
Category = FlagCategory.Education.ToString(),
|
||||
Severity = FlagSeverity.Info.ToString(),
|
||||
Title = "Unverified Institution",
|
||||
Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}",
|
||||
ScoreImpact = -UnverifiedEducationPenalty
|
||||
});
|
||||
}
|
||||
|
||||
// Penalty for implausible education dates
|
||||
foreach (var edu in educationResults.Where(e => !e.DatesArePlausible))
|
||||
{
|
||||
score -= EducationDatePenalty;
|
||||
|
||||
flags.Add(new FlagResult
|
||||
{
|
||||
Category = FlagCategory.Education.ToString(),
|
||||
Severity = FlagSeverity.Warning.ToString(),
|
||||
Title = "Education Date Issues",
|
||||
Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}",
|
||||
ScoreImpact = -EducationDatePenalty
|
||||
});
|
||||
}
|
||||
|
||||
// Penalty for gaps (max -10 per gap)
|
||||
foreach (var gap in timeline.Gaps)
|
||||
{
|
||||
@@ -205,7 +295,7 @@ public sealed class ProcessCVCheckJob
|
||||
// Penalty for overlaps (only if > 2 months)
|
||||
foreach (var overlap in timeline.Overlaps)
|
||||
{
|
||||
var excessMonths = overlap.Months - 2; // Allow 2 month transition
|
||||
var excessMonths = Math.Max(0, overlap.Months - 2); // Allow 2 month transition, prevent negative
|
||||
var overlapPenalty = excessMonths * OverlapMonthPenalty;
|
||||
score -= overlapPenalty;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user