Files
RealCV/src/RealCV.Infrastructure/Jobs/ProcessCVCheckJob.cs
Peter Foster 983fb5bd67 fix: Fail CV checks that return no extractable data
CVs that parse with no employment, no education, and unknown name are
likely scanned images or corrupted files. Instead of completing with
score 100 (misleading), fail with a clear error message.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 19:49:10 +00:00

1428 lines
57 KiB
C#

using System.Text.Json;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using RealCV.Application.Helpers;
using RealCV.Application.Interfaces;
using RealCV.Application.Models;
using RealCV.Domain.Entities;
using RealCV.Domain.Enums;
using RealCV.Infrastructure.Data;
namespace RealCV.Infrastructure.Jobs;
public sealed class ProcessCVCheckJob
{
private readonly ApplicationDbContext _dbContext;
private readonly IFileStorageService _fileStorageService;
private readonly ICVParserService _cvParserService;
private readonly ICompanyVerifierService _companyVerifierService;
private readonly IEducationVerifierService _educationVerifierService;
private readonly ITimelineAnalyserService _timelineAnalyserService;
private readonly IAuditService _auditService;
private readonly ILogger<ProcessCVCheckJob> _logger;
private const int BaseScore = 100;
private const int UnverifiedCompanyPenalty = 10;
private const int ImplausibleJobTitlePenalty = 15;
private const int CompanyVerificationFlagPenalty = 5; // Base penalty for company flags, actual from flag.ScoreImpact
private const int RapidProgressionPenalty = 10;
private const int EarlyCareerSeniorRolePenalty = 10;
private const int GapMonthPenalty = 1;
private const int MaxGapPenalty = 10;
private const int OverlapMonthPenalty = 2;
private const int DiplomaMillPenalty = 25;
private const int SuspiciousInstitutionPenalty = 15;
private const int UnverifiedEducationPenalty = 5;
private const int EducationDatePenalty = 10;
public ProcessCVCheckJob(
ApplicationDbContext dbContext,
IFileStorageService fileStorageService,
ICVParserService cvParserService,
ICompanyVerifierService companyVerifierService,
IEducationVerifierService educationVerifierService,
ITimelineAnalyserService timelineAnalyserService,
IAuditService auditService,
ILogger<ProcessCVCheckJob> logger)
{
_dbContext = dbContext;
_fileStorageService = fileStorageService;
_cvParserService = cvParserService;
_companyVerifierService = companyVerifierService;
_educationVerifierService = educationVerifierService;
_timelineAnalyserService = timelineAnalyserService;
_auditService = auditService;
_logger = logger;
}
public async Task ExecuteAsync(Guid cvCheckId, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting CV check processing for: {CheckId}", cvCheckId);
var cvCheck = await _dbContext.CVChecks
.FirstOrDefaultAsync(c => c.Id == cvCheckId, cancellationToken);
if (cvCheck is null)
{
_logger.LogError("CV check not found: {CheckId}", cvCheckId);
return;
}
try
{
// Step 1: Update status to Processing
cvCheck.Status = CheckStatus.Processing;
cvCheck.ProcessingStage = "Downloading CV";
await _dbContext.SaveChangesAsync(cancellationToken);
_logger.LogDebug("CV check {CheckId} status updated to Processing", cvCheckId);
// Step 2: Download file from blob
await using var fileStream = await _fileStorageService.DownloadAsync(cvCheck.BlobUrl);
_logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId);
// Step 3: Parse CV
cvCheck.ProcessingStage = "Parsing CV";
await _dbContext.SaveChangesAsync(cancellationToken);
var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken);
_logger.LogDebug(
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
cvCheckId, cvData.Employment.Count);
// Validate that the CV contains meaningful data
// A CV with no name, no employment AND no education is likely a parsing failure
if (cvData.Employment.Count == 0 && cvData.Education.Count == 0 &&
(string.IsNullOrWhiteSpace(cvData.FullName) || cvData.FullName == "Unknown"))
{
_logger.LogWarning(
"CV check {CheckId} parsed with no extractable data - possible scanned/image PDF or parsing failure",
cvCheckId);
throw new InvalidOperationException(
"Could not extract any employment or education data from this CV. " +
"The file may be a scanned image, password-protected, or in an unsupported format.");
}
// Step 4: Save extracted data
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
cvCheck.ProcessingStage = "Verifying Employment";
await _dbContext.SaveChangesAsync(cancellationToken);
// Step 5: Verify each employment entry (parallelized with rate limiting)
// Skip freelance, public sector, and charity entries as they cannot be verified against Companies House
var verificationTasks = cvData.Employment
.Where(e => !IsFreelance(e.CompanyName) &&
!IsPublicSectorEmployer(e.CompanyName) &&
!IsCharityOrVoluntary(e.CompanyName))
.Select(async employment =>
{
var result = await _companyVerifierService.VerifyCompanyAsync(
employment.CompanyName,
employment.StartDate,
employment.EndDate,
employment.JobTitle);
_logger.LogDebug(
"Verified {Company}: {IsVerified} (Score: {Score}%), JobTitle: {JobTitle}, Plausible: {Plausible}",
employment.CompanyName, result.IsVerified, result.MatchScore,
employment.JobTitle, result.JobTitlePlausible);
return result;
});
var verificationResults = (await Task.WhenAll(verificationTasks)).ToList();
// Add freelance entries as auto-verified (skipped)
foreach (var employment in cvData.Employment.Where(e => IsFreelance(e.CompanyName)))
{
verificationResults.Add(new CompanyVerificationResult
{
ClaimedCompany = employment.CompanyName,
IsVerified = true,
MatchScore = 100,
VerificationNotes = "Freelance/self-employed - verification skipped",
ClaimedJobTitle = employment.JobTitle,
JobTitlePlausible = true
});
_logger.LogDebug("Skipped verification for freelance entry: {Company}", employment.CompanyName);
}
// Add public sector employers as auto-verified (not in Companies House)
foreach (var employment in cvData.Employment.Where(e => IsPublicSectorEmployer(e.CompanyName)))
{
verificationResults.Add(new CompanyVerificationResult
{
ClaimedCompany = employment.CompanyName,
IsVerified = true,
MatchScore = 100,
VerificationNotes = "Public sector employer - not registered at Companies House",
ClaimedJobTitle = employment.JobTitle,
JobTitlePlausible = true
});
_logger.LogDebug("Skipped verification for public sector employer: {Company}", employment.CompanyName);
}
// Add charities/voluntary organisations as auto-verified (registered with Charity Commission, not Companies House)
foreach (var employment in cvData.Employment.Where(e => IsCharityOrVoluntary(e.CompanyName)))
{
verificationResults.Add(new CompanyVerificationResult
{
ClaimedCompany = employment.CompanyName,
IsVerified = true,
MatchScore = 100,
VerificationNotes = "Charity/voluntary organisation - registered with Charity Commission",
ClaimedJobTitle = employment.JobTitle,
JobTitlePlausible = true
});
_logger.LogDebug("Skipped verification for charity/voluntary organisation: {Company}", employment.CompanyName);
}
// Step 5b: Verify director claims against Companies House officers
cvCheck.ProcessingStage = "Verifying Directors";
await _dbContext.SaveChangesAsync(cancellationToken);
await VerifyDirectorClaims(cvData.FullName, verificationResults, cancellationToken);
// Step 6: Verify education entries
cvCheck.ProcessingStage = "Verifying Education";
await _dbContext.SaveChangesAsync(cancellationToken);
var educationResults = _educationVerifierService.VerifyAll(
cvData.Education,
cvData.Employment);
_logger.LogDebug(
"Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)",
cvCheckId,
educationResults.Count,
educationResults.Count(e => e.IsVerified),
educationResults.Count(e => e.IsDiplomaMill));
// Step 7: Analyse timeline
cvCheck.ProcessingStage = "Analysing Timeline";
await _dbContext.SaveChangesAsync(cancellationToken);
var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment);
_logger.LogDebug(
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
// Step 8: Calculate veracity score
cvCheck.ProcessingStage = "Calculating Score";
await _dbContext.SaveChangesAsync(cancellationToken);
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData);
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
// Step 9: Create CVFlag records
foreach (var flag in flags)
{
if (!Enum.TryParse<FlagCategory>(flag.Category, out var category))
{
_logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category);
category = FlagCategory.Timeline;
}
if (!Enum.TryParse<FlagSeverity>(flag.Severity, out var severity))
{
_logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity);
severity = FlagSeverity.Info;
}
var cvFlag = new CVFlag
{
Id = Guid.NewGuid(),
CVCheckId = cvCheckId,
Category = category,
Severity = severity,
Title = flag.Title,
Description = flag.Description,
ScoreImpact = flag.ScoreImpact
};
_dbContext.CVFlags.Add(cvFlag);
}
// Step 10: Generate veracity report
cvCheck.ProcessingStage = "Generating Report";
await _dbContext.SaveChangesAsync(cancellationToken);
var report = new VeracityReport
{
CandidateName = cvData.FullName,
OverallScore = score,
ScoreLabel = GetScoreLabel(score),
EmploymentVerifications = verificationResults,
EducationVerifications = educationResults,
TimelineAnalysis = timelineAnalysis,
Flags = flags,
GeneratedAt = DateTime.UtcNow
};
cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented);
cvCheck.VeracityScore = score;
// Step 11: Update status to Completed
cvCheck.Status = CheckStatus.Completed;
cvCheck.ProcessingStage = null; // Clear stage on completion
cvCheck.CompletedAt = DateTime.UtcNow;
await _dbContext.SaveChangesAsync(cancellationToken);
_logger.LogInformation(
"CV check {CheckId} completed successfully with score {Score}",
cvCheckId, score);
await _auditService.LogAsync(cvCheck.UserId, AuditActions.CVProcessed, "CVCheck", cvCheckId, $"Score: {score}");
// GDPR: Delete the uploaded CV file immediately after processing
// We only need the extracted data and report, not the original file
await DeleteCVFileAsync(cvCheck.BlobUrl, cvCheckId);
cvCheck.BlobUrl = string.Empty; // Clear the URL as file no longer exists
await _dbContext.SaveChangesAsync(cancellationToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId);
try
{
cvCheck.Status = CheckStatus.Failed;
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
await _dbContext.SaveChangesAsync(CancellationToken.None);
}
catch (DbUpdateConcurrencyException)
{
// Record was deleted during processing - nothing to update
_logger.LogWarning("CV check {CheckId} was deleted during processing", cvCheckId);
return;
}
throw;
}
}
/// <summary>
/// GDPR: Safely delete the uploaded CV file after processing.
/// </summary>
private async Task DeleteCVFileAsync(string blobUrl, Guid cvCheckId)
{
if (string.IsNullOrWhiteSpace(blobUrl))
{
_logger.LogDebug("No file to delete for CV check {CheckId}", cvCheckId);
return;
}
try
{
await _fileStorageService.DeleteAsync(blobUrl);
_logger.LogInformation("GDPR: Deleted CV file for check {CheckId}", cvCheckId);
}
catch (Exception ex)
{
// Log but don't fail the job - file deletion is important but shouldn't break processing
_logger.LogWarning(ex, "Failed to delete CV file for check {CheckId}: {BlobUrl}", cvCheckId, blobUrl);
}
}
private static (int Score, List<FlagResult> Flags) CalculateVeracityScore(
List<CompanyVerificationResult> verifications,
List<EducationVerificationResult> educationResults,
TimelineAnalysisResult timeline,
CVData cvData)
{
var score = BaseScore;
var flags = new List<FlagResult>();
// Penalty for unverified companies (deduplicated by company name)
var unverifiedByCompany = verifications
.Where(v => !v.IsVerified)
.GroupBy(v => v.ClaimedCompany, StringComparer.OrdinalIgnoreCase)
.ToList();
foreach (var companyGroup in unverifiedByCompany)
{
score -= UnverifiedCompanyPenalty;
var firstInstance = companyGroup.First();
var instanceCount = companyGroup.Count();
var description = instanceCount > 1
? $"Could not verify employment at '{firstInstance.ClaimedCompany}' ({instanceCount} roles). {firstInstance.VerificationNotes}"
: $"Could not verify employment at '{firstInstance.ClaimedCompany}'. {firstInstance.VerificationNotes}";
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Unverified Company",
Description = description,
ScoreImpact = -UnverifiedCompanyPenalty
});
}
// Process company verification flags (incorporation date, dissolution, dormant, etc.)
// Deduplicate by (company, flag type) to avoid penalizing same issue multiple times
var processedCompanyFlags = new HashSet<(string Company, string FlagType)>(
new CompanyFlagComparer());
foreach (var verification in verifications.Where(v => v.Flags.Count > 0))
{
foreach (var companyFlag in verification.Flags)
{
var key = (verification.ClaimedCompany, companyFlag.Type);
if (!processedCompanyFlags.Add(key))
{
// Already processed this flag for this company, skip
continue;
}
var penalty = Math.Abs(companyFlag.ScoreImpact);
score -= penalty;
var severity = companyFlag.Severity switch
{
"Critical" => FlagSeverity.Critical,
"Warning" => FlagSeverity.Warning,
_ => FlagSeverity.Info
};
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = severity.ToString(),
Title = companyFlag.Type switch
{
"EmploymentBeforeIncorporation" => "Employment Before Company Existed",
"EmploymentAtDissolvedCompany" => "Employment at Dissolved Company",
"CurrentEmploymentAtDissolvedCompany" => "Current Employment at Dissolved Company",
"EmploymentAtDormantCompany" => "Employment at Dormant Company",
"SeniorRoleAtMicroCompany" => "Senior Role at Micro Company",
"SicCodeMismatch" => "Role/Industry Mismatch",
"ImplausibleJobTitle" => "Implausible Job Title",
"UnverifiedDirectorClaim" => "Unverified Director Claim",
_ => companyFlag.Type
},
Description = companyFlag.Message,
ScoreImpact = -penalty
});
}
}
// Check for rapid career progression
CheckRapidCareerProgression(cvData.Employment, flags, ref score);
// Check for early career senior roles (relative to education end date)
CheckEarlyCareerSeniorRoles(cvData.Employment, cvData.Education, flags, ref score);
// Check for frequent job changes (informational only)
CheckFrequentJobChanges(cvData.Employment, flags);
// Informational flags (no penalty - provide context for recruiters)
AddCareerSpanFlag(cvData.Employment, flags);
AddCurrentEmploymentStatusFlag(cvData.Employment, flags);
AddLongTenureFlags(cvData.Employment, flags);
AddManagementExperienceFlag(cvData.Employment, flags);
AddCompanySizePatternFlag(verifications, flags);
AddCareerTrajectoryFlag(cvData.Employment, flags);
AddPLCExperienceFlag(verifications, flags);
AddVerifiedDirectorFlag(verifications, flags);
// Penalty for diploma mills (critical)
foreach (var edu in educationResults.Where(e => e.IsDiplomaMill))
{
score -= DiplomaMillPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Critical.ToString(),
Title = "Unaccredited Institution",
Description = $"'{edu.ClaimedInstitution}' was not found in accredited institutions databases. Manual verification recommended.",
ScoreImpact = -DiplomaMillPenalty
});
}
// Penalty for suspicious institutions
foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill))
{
score -= SuspiciousInstitutionPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Unrecognised Institution",
Description = $"'{edu.ClaimedInstitution}' was not found in recognised institutions databases. Manual verification recommended.",
ScoreImpact = -SuspiciousInstitutionPenalty
});
}
// Penalty for unverified education (not recognised, but not flagged as fake)
// Skip unknown/empty institutions as there's nothing to verify
foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown"
&& !string.IsNullOrWhiteSpace(e.ClaimedInstitution)
&& !e.ClaimedInstitution.Equals("Unknown Institution", StringComparison.OrdinalIgnoreCase)
&& !e.ClaimedInstitution.Equals("Unknown", StringComparison.OrdinalIgnoreCase)))
{
score -= UnverifiedEducationPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Unverified Institution",
Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}",
ScoreImpact = -UnverifiedEducationPenalty
});
}
// Penalty for implausible education dates
foreach (var edu in educationResults.Where(e => !e.DatesArePlausible))
{
score -= EducationDatePenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Education.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Education Date Issues",
Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}",
ScoreImpact = -EducationDatePenalty
});
}
// Penalty for gaps (max -10 per gap)
foreach (var gap in timeline.Gaps)
{
var gapPenalty = Math.Min(gap.Months * GapMonthPenalty, MaxGapPenalty);
score -= gapPenalty;
var severity = gap.Months >= 6 ? FlagSeverity.Warning : FlagSeverity.Info;
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = severity.ToString(),
Title = "Employment Gap",
Description = $"{gap.Months} month gap in employment from {gap.StartDate:MMM yyyy} to {gap.EndDate:MMM yyyy}",
ScoreImpact = -gapPenalty
});
}
// Note overlaps - these are often legitimate (part-time, consulting, transitions)
// Only flag as informational, no score penalty
foreach (var overlap in timeline.Overlaps)
{
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Concurrent Employment",
Description = $"Worked at both '{overlap.Company1}' and '{overlap.Company2}' simultaneously for {overlap.Months} months ({overlap.OverlapStart:MMM yyyy} to {overlap.OverlapEnd:MMM yyyy})",
ScoreImpact = 0
});
}
// Deduplicate flags based on Title + Description
var uniqueFlags = flags
.GroupBy(f => (f.Title, f.Description))
.Select(g => g.First())
.ToList();
// Recalculate score based on unique flags
var uniqueScore = BaseScore + uniqueFlags.Sum(f => f.ScoreImpact);
// Ensure score doesn't go below 0
uniqueScore = Math.Max(0, uniqueScore);
return (uniqueScore, uniqueFlags);
}
private static string GetScoreLabel(int score)
{
return score switch
{
>= 90 => "Excellent",
>= 75 => "Good",
>= 60 => "Fair",
>= 40 => "Poor",
_ => "Very Poor"
};
}
private static bool IsFreelance(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return false;
var name = companyName.Trim().ToLowerInvariant();
return name == "freelance" ||
name == "freelancer" ||
name == "self-employed" ||
name == "self employed" ||
name == "selfemployed" ||
name == "contractor" ||
name == "contract" || // Working on contract basis
name == "contract work" ||
name == "contract role" ||
name == "various" || // Multiple short-term contracts
name == "various clients" ||
name == "various companies" ||
name.StartsWith("freelance ") ||
name.StartsWith("self-employed ") ||
name.StartsWith("self employed ") ||
name.StartsWith("contract ") ||
name.StartsWith("contracting ") ||
name.Contains("(freelance)") ||
name.Contains("(self-employed)") ||
name.Contains("(self employed)") ||
name.Contains("(contractor)") ||
name.Contains("(contract)");
}
private static bool IsPublicSectorEmployer(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return false;
var name = companyName.Trim().ToLowerInvariant();
// Local authorities and councils
if (name.Contains(" mbc") || // Metropolitan Borough Council
name.Contains(" bc") || // Borough Council
name.Contains(" cc") || // County Council
name.Contains(" dc") || // District Council
name.EndsWith(" council") ||
name.Contains(" council ") ||
name.Contains("borough council") ||
name.Contains("county council") ||
name.Contains("district council") ||
name.Contains("city council") ||
name.Contains("town council") ||
name.Contains("parish council") ||
name.Contains("metropolitan") ||
name.Contains("local authority"))
{
return true;
}
// NHS and health
if (name.Contains("nhs") ||
name.Contains("national health service") ||
name.Contains("health trust") ||
name.Contains("hospital trust") ||
name.Contains("clinical commissioning") ||
name.Contains("primary care trust") ||
name.Contains("ambulance service") ||
name.Contains("ambulance trust"))
{
return true;
}
// Government departments and agencies
if (name.StartsWith("hm ") || // HM Revenue, HM Treasury, etc.
name.StartsWith("ministry of") ||
name.StartsWith("department of") ||
name.StartsWith("department for") ||
name.Contains("civil service") ||
name.Contains("home office") ||
name.Contains("cabinet office") ||
name.Contains("foreign office"))
{
return true;
}
// Emergency services
if (name.Contains("police") ||
name.Contains("fire service") ||
name.Contains("fire brigade") ||
name.Contains("fire and rescue"))
{
return true;
}
// Education (state sector)
if (name.Contains("academy trust") ||
name.Contains("multi academy") ||
name.Contains("education authority") ||
name.Contains("lea "))
{
return true;
}
return false;
}
private static bool IsCharityOrVoluntary(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return false;
var name = companyName.Trim().ToLowerInvariant();
// Well-known charities/voluntary organisations
var knownCharities = new[]
{
"girlguiding", "girl guiding", "girl guides",
"scouts", "scout association",
"red cross", "british red cross",
"st john ambulance", "st johns ambulance",
"rotary", "lions club",
"citizens advice", "cab ",
"oxfam", "save the children", "barnardos", "barnardo's",
"nspcc", "rspca", "rspb", "rnli",
"macmillan", "marie curie", "cancer research",
"british heart foundation", "bhf",
"age uk", "age concern",
"mind ", "samaritans",
"national trust", "english heritage",
"ymca", "ywca"
};
if (knownCharities.Any(c => name.Contains(c)))
{
return true;
}
// Generic charity indicators
if (name.Contains("charity") ||
name.Contains("charitable") ||
name.Contains("foundation") ||
name.Contains("trust ") ||
name.EndsWith(" trust") ||
name.Contains("volunteer") ||
name.Contains("voluntary"))
{
return true;
}
// Community partnerships (often registered as charities/CICs, e.g., "North Halifax Partnership")
if (name.Contains("partnership") &&
!name.Contains("llp") && // Exclude legal LLPs which are commercial
!name.Contains("limited liability"))
{
return true;
}
return false;
}
private async Task VerifyDirectorClaims(
string candidateName,
List<CompanyVerificationResult> verificationResults,
CancellationToken cancellationToken)
{
// Find all director claims at verified companies - use ToList() to avoid modifying during enumeration
var directorCandidates = verificationResults
.Select((result, index) => (result, index))
.Where(x => x.result.IsVerified && !string.IsNullOrEmpty(x.result.MatchedCompanyNumber))
.ToList();
foreach (var (result, index) in directorCandidates)
{
var jobTitle = result.ClaimedJobTitle?.ToLowerInvariant() ?? "";
// Check if this is a director claim
var isDirectorClaim = jobTitle.Contains("director") ||
jobTitle.Contains("company secretary") ||
jobTitle == "md" ||
jobTitle.Contains("managing director");
if (!isDirectorClaim) continue;
_logger.LogDebug(
"Verifying director claim for {Candidate} at {Company}",
candidateName, result.MatchedCompanyName);
var isVerifiedDirector = await _companyVerifierService.VerifyDirectorAsync(
result.MatchedCompanyNumber!,
candidateName,
result.ClaimedStartDate,
result.ClaimedEndDate);
if (isVerifiedDirector == false)
{
// Add a flag for unverified director claim
var flags = (result.Flags ?? []).ToList();
flags.Add(new CompanyVerificationFlag
{
Type = "UnverifiedDirectorClaim",
Severity = "Critical",
Message = $"Claimed director role at '{result.MatchedCompanyName}' but candidate name not found in Companies House officers list",
ScoreImpact = -20
});
// Update the result with the new flag
verificationResults[index] = result with { Flags = flags };
_logger.LogWarning(
"Director claim not verified for {Candidate} at {Company}",
candidateName, result.MatchedCompanyName);
}
else if (isVerifiedDirector == true)
{
_logger.LogInformation(
"Director claim verified for {Candidate} at {Company}",
candidateName, result.MatchedCompanyName);
}
}
}
private static void CheckRapidCareerProgression(
List<EmploymentEntry> employment,
List<FlagResult> flags,
ref int score)
{
// Group employment by company and check for rapid promotions
var byCompany = employment
.Where(e => !string.IsNullOrWhiteSpace(e.CompanyName) && e.StartDate.HasValue)
.GroupBy(e => e.CompanyName.ToLowerInvariant())
.Where(g => g.Count() > 1);
foreach (var companyGroup in byCompany)
{
var orderedRoles = companyGroup.OrderBy(e => e.StartDate).ToList();
for (int i = 1; i < orderedRoles.Count; i++)
{
var prevRole = orderedRoles[i - 1];
var currRole = orderedRoles[i];
var prevSeniority = GetSeniorityLevel(prevRole.JobTitle);
var currSeniority = GetSeniorityLevel(currRole.JobTitle);
// Check for jump of 3+ seniority levels
var seniorityJump = currSeniority - prevSeniority;
if (seniorityJump >= 3)
{
// Calculate time between roles
var monthsBetween = DateHelpers.MonthsBetween(prevRole.StartDate!.Value, currRole.StartDate!.Value);
// If jumped 3+ levels in less than 2 years, flag it
if (monthsBetween < 24)
{
score -= RapidProgressionPenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Rapid Career Progression",
Description = $"Promoted from '{prevRole.JobTitle}' to '{currRole.JobTitle}' at '{companyGroup.First().CompanyName}' in {monthsBetween} months - unusually fast progression",
ScoreImpact = -RapidProgressionPenalty
});
}
}
}
}
}
private static void CheckEarlyCareerSeniorRoles(
List<EmploymentEntry> employment,
List<EducationEntry> education,
List<FlagResult> flags,
ref int score)
{
// Find the latest education end date to estimate career start
var latestEducationEnd = education
.Where(e => e.EndDate.HasValue)
.Select(e => e.EndDate!.Value)
.DefaultIfEmpty(DateOnly.MinValue)
.Max();
if (latestEducationEnd == DateOnly.MinValue)
{
// No education dates available, skip check
return;
}
foreach (var emp in employment.Where(e => e.StartDate.HasValue))
{
var monthsAfterEducation = DateHelpers.MonthsBetween(latestEducationEnd, emp.StartDate!.Value);
// Check if this is a senior role started within 2 years of finishing education
if (monthsAfterEducation < 24 && monthsAfterEducation >= 0)
{
var seniority = GetSeniorityLevel(emp.JobTitle);
// Flag if they're claiming a senior role (level 4+) very early in career
if (seniority >= 4)
{
score -= EarlyCareerSeniorRolePenalty;
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Warning.ToString(),
Title = "Early Career Senior Role",
Description = $"Claimed senior role '{emp.JobTitle}' at '{emp.CompanyName}' only {monthsAfterEducation} months after completing education",
ScoreImpact = -EarlyCareerSeniorRolePenalty
});
}
}
}
}
private const int ShortTenureMonths = 18;
private const int MinShortTenuresForFlag = 3;
private static void CheckFrequentJobChanges(
List<EmploymentEntry> employment,
List<FlagResult> flags)
{
// Group employment by normalized company name (to combine roles at same employer)
var employerTenures = employment
.Where(e => !string.IsNullOrWhiteSpace(e.CompanyName) && e.StartDate.HasValue)
.Where(e => !IsFreelance(e.CompanyName)) // Exclude freelance
.GroupBy(e => NormalizeCompanyForGrouping(e.CompanyName))
.Select(g =>
{
// Calculate total tenure at this employer (sum of all roles)
var totalMonths = 0;
foreach (var role in g)
{
if (role.StartDate.HasValue)
{
var endDate = role.EndDate ?? DateOnly.FromDateTime(DateTime.Today);
var months = DateHelpers.MonthsBetween(role.StartDate.Value, endDate);
totalMonths += Math.Max(0, months);
}
}
return new
{
CompanyGroup = g.Key,
DisplayName = g.First().CompanyName,
TotalMonths = totalMonths,
RoleCount = g.Count()
};
})
.Where(t => t.TotalMonths > 0) // Exclude zero-tenure entries
.ToList();
if (employerTenures.Count == 0) return;
// Find short tenures (less than 18 months) at different companies
var shortTenures = employerTenures
.Where(t => t.TotalMonths < ShortTenureMonths)
.ToList();
// Calculate average tenure across unique employers
var avgTenureMonths = employerTenures.Average(t => t.TotalMonths);
var avgTenureYears = avgTenureMonths / 12.0;
// If 3+ different companies with short tenure, flag it (informational only)
if (shortTenures.Count >= MinShortTenuresForFlag)
{
var shortTenureCompanies = string.Join(", ", shortTenures.Take(5).Select(t => $"{t.DisplayName} ({t.TotalMonths}mo)"));
var moreCount = shortTenures.Count > 5 ? $" and {shortTenures.Count - 5} more" : "";
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Frequent Job Changes",
Description = $"Candidate has {shortTenures.Count} employers with tenure under {ShortTenureMonths} months: {shortTenureCompanies}{moreCount}. Average tenure: {avgTenureYears:F1} years across {employerTenures.Count} employers.",
ScoreImpact = 0 // Informational only, no penalty
});
}
// Even without frequent changes, note average tenure if it's low
else if (avgTenureMonths < 24 && employerTenures.Count >= 3)
{
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Average Tenure",
Description = $"Average tenure: {avgTenureYears:F1} years across {employerTenures.Count} employers.",
ScoreImpact = 0 // Informational only
});
}
}
/// <summary>
/// Normalizes company name for grouping purposes.
/// Groups companies like "BMW UK", "BMW Group", "BMW (UK) Ltd" together.
/// </summary>
private static string NormalizeCompanyForGrouping(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return "";
var name = companyName.ToLowerInvariant().Trim();
// Remove common suffixes
var suffixes = new[] { " limited", " ltd", " plc", " llp", " inc", " corporation", " corp",
" uk", " (uk)", " u.k.", " group", " holdings", " services" };
foreach (var suffix in suffixes)
{
if (name.EndsWith(suffix))
{
name = name[..^suffix.Length].Trim();
}
}
// Remove parenthetical content
name = System.Text.RegularExpressions.Regex.Replace(name, @"\([^)]*\)", "").Trim();
// Take first significant word(s) as the company identifier
// This helps group "Unilever Bestfood" with "Unilever UK"
var words = name.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (words.Length >= 1)
{
// Use first word if it's substantial (4+ chars), or first two words
if (words[0].Length >= 4)
{
return words[0];
}
else if (words.Length >= 2)
{
return words[0] + " " + words[1];
}
}
return name;
}
#region Informational Flags (No Penalty)
/// <summary>
/// Adds a flag showing total career span from earliest to latest employment.
/// </summary>
private static void AddCareerSpanFlag(List<EmploymentEntry> employment, List<FlagResult> flags)
{
var datedEmployment = employment
.Where(e => e.StartDate.HasValue)
.ToList();
if (datedEmployment.Count == 0) return;
var earliestStart = datedEmployment.Min(e => e.StartDate!.Value);
var latestEnd = datedEmployment
.Select(e => e.EndDate ?? DateOnly.FromDateTime(DateTime.Today))
.Max();
var totalMonths = DateHelpers.MonthsBetween(earliestStart, latestEnd);
var years = totalMonths / 12;
var months = totalMonths % 12;
var spanText = years > 0
? (months > 0 ? $"{years} years {months} months" : $"{years} years")
: $"{months} months";
var label = years switch
{
>= 20 => "Extensive Career",
>= 10 => "Established Professional",
>= 5 => "Experienced",
>= 2 => "Early-Mid Career",
_ => "Early Career"
};
flags.Add(new FlagResult
{
Category = FlagCategory.Timeline.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Career Span",
Description = $"{label}: {spanText} of professional experience ({earliestStart:MMM yyyy} to {(latestEnd == DateOnly.FromDateTime(DateTime.Today) ? "present" : latestEnd.ToString("MMM yyyy"))})",
ScoreImpact = 0
});
}
/// <summary>
/// Adds a flag showing current employment status.
/// </summary>
private static void AddCurrentEmploymentStatusFlag(List<EmploymentEntry> employment, List<FlagResult> flags)
{
var currentRole = employment.FirstOrDefault(e => e.IsCurrent || !e.EndDate.HasValue);
if (currentRole != null)
{
var startText = currentRole.StartDate.HasValue
? $" since {currentRole.StartDate.Value:MMM yyyy}"
: "";
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Current Status",
Description = $"Currently employed at {currentRole.CompanyName}{startText}",
ScoreImpact = 0
});
}
else
{
var lastRole = employment
.Where(e => e.EndDate.HasValue)
.OrderByDescending(e => e.EndDate)
.FirstOrDefault();
if (lastRole?.EndDate != null)
{
var monthsSince = DateHelpers.MonthsBetween(lastRole.EndDate.Value, DateOnly.FromDateTime(DateTime.Today));
if (monthsSince > 0)
{
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Current Status",
Description = $"Currently available - last role ended {lastRole.EndDate.Value:MMM yyyy} ({monthsSince} months ago)",
ScoreImpact = 0
});
}
}
}
}
/// <summary>
/// Highlights roles with particularly long tenure (5+ years).
/// </summary>
private static void AddLongTenureFlags(List<EmploymentEntry> employment, List<FlagResult> flags)
{
const int longTenureMonths = 60; // 5 years
var longTenures = employment
.Where(e => e.StartDate.HasValue)
.Select(e =>
{
var endDate = e.EndDate ?? DateOnly.FromDateTime(DateTime.Today);
var months = DateHelpers.MonthsBetween(e.StartDate!.Value, endDate);
return new { Entry = e, Months = months };
})
.Where(x => x.Months >= longTenureMonths)
.OrderByDescending(x => x.Months)
.ToList();
if (longTenures.Count > 0)
{
var longest = longTenures[0];
var years = longest.Months / 12;
if (longTenures.Count == 1)
{
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Long Tenure",
Description = $"{years} years at {longest.Entry.CompanyName} - demonstrates commitment and stability",
ScoreImpact = 0
});
}
else
{
var companies = string.Join(", ", longTenures.Take(3).Select(t => $"{t.Entry.CompanyName} ({t.Months / 12}y)"));
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Long Tenure",
Description = $"{longTenures.Count} roles with 5+ year tenure: {companies}",
ScoreImpact = 0
});
}
}
}
/// <summary>
/// Indicates management vs individual contributor experience.
/// </summary>
private static void AddManagementExperienceFlag(List<EmploymentEntry> employment, List<FlagResult> flags)
{
var managementKeywords = new[] { "manager", "head of", "director", "lead", "team lead", "supervisor", "chief", "vp", "vice president" };
var managementRoles = employment
.Where(e => !string.IsNullOrWhiteSpace(e.JobTitle))
.Where(e => managementKeywords.Any(kw => e.JobTitle!.ToLowerInvariant().Contains(kw)))
.ToList();
var totalRoles = employment.Count(e => !string.IsNullOrWhiteSpace(e.JobTitle));
if (totalRoles == 0) return;
if (managementRoles.Count > 0)
{
var recentManagement = managementRoles
.OrderByDescending(e => e.StartDate ?? DateOnly.MinValue)
.Take(2)
.Select(e => e.JobTitle)
.ToList();
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Management Experience",
Description = $"{managementRoles.Count} of {totalRoles} roles include management responsibility. Recent: {string.Join(", ", recentManagement)}",
ScoreImpact = 0
});
}
else
{
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Individual Contributor",
Description = "All roles appear to be individual contributor positions - no management titles detected",
ScoreImpact = 0
});
}
}
/// <summary>
/// Shows pattern of company sizes (startup/SME/corporate).
/// </summary>
private static void AddCompanySizePatternFlag(List<CompanyVerificationResult> verifications, List<FlagResult> flags)
{
var verifiedWithAccounts = verifications
.Where(v => v.IsVerified && !string.IsNullOrWhiteSpace(v.AccountsCategory))
.ToList();
if (verifiedWithAccounts.Count < 2) return;
var sizeGroups = verifiedWithAccounts
.GroupBy(v => v.AccountsCategory?.ToLowerInvariant() switch
{
"micro-entity" or "micro" => "Micro/Startup",
"small" => "Small",
"medium" or "audit-exempt-subsidiary" => "Medium",
"full" or "group" or "dormant" => "Large",
_ => "Unknown"
})
.Where(g => g.Key != "Unknown")
.ToDictionary(g => g.Key, g => g.Count());
if (sizeGroups.Count == 0) return;
var dominant = sizeGroups.OrderByDescending(kv => kv.Value).First();
var total = sizeGroups.Values.Sum();
var percentage = (dominant.Value * 100) / total;
string pattern;
if (percentage >= 70)
{
pattern = dominant.Key switch
{
"Micro/Startup" => "Startup/Early-stage focus",
"Small" => "Small business specialist",
"Medium" => "SME experience",
"Large" => "Large corporate experience",
_ => "Mixed company sizes"
};
}
else if (sizeGroups.Count >= 3)
{
pattern = "Diverse company sizes - experience across startups to corporates";
}
else
{
pattern = $"Mix of {string.Join(" and ", sizeGroups.Keys)}";
}
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Company Size Pattern",
Description = $"{pattern} ({string.Join(", ", sizeGroups.Select(kv => $"{kv.Key}: {kv.Value}"))})",
ScoreImpact = 0
});
}
/// <summary>
/// Shows career trajectory direction (upward/lateral/step-down).
/// </summary>
private static void AddCareerTrajectoryFlag(List<EmploymentEntry> employment, List<FlagResult> flags)
{
var orderedRoles = employment
.Where(e => e.StartDate.HasValue && !string.IsNullOrWhiteSpace(e.JobTitle))
.OrderBy(e => e.StartDate)
.ToList();
if (orderedRoles.Count < 3) return;
var seniorityLevels = orderedRoles.Select(e => GetSeniorityLevel(e.JobTitle)).ToList();
// Calculate average progression per transition
var transitions = new List<int>();
for (int i = 1; i < seniorityLevels.Count; i++)
{
transitions.Add(seniorityLevels[i] - seniorityLevels[i - 1]);
}
var avgProgression = transitions.Average();
var firstLevel = seniorityLevels.First();
var lastLevel = seniorityLevels.Last();
var netChange = lastLevel - firstLevel;
string trajectory;
string description;
if (avgProgression > 0.3 && netChange > 0)
{
trajectory = "Upward";
var firstTitle = orderedRoles.First().JobTitle;
var lastTitle = orderedRoles.Last().JobTitle;
description = $"Career shows upward progression from {firstTitle} to {lastTitle} (net +{netChange} seniority levels)";
}
else if (avgProgression < -0.3 && netChange < 0)
{
trajectory = "Step-down";
description = $"Recent roles at lower seniority than earlier career (may indicate work-life balance choice, industry change, or consulting)";
}
else
{
trajectory = "Lateral";
description = $"Career shows lateral movement - consistent seniority level across {orderedRoles.Count} roles";
}
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = $"Career Trajectory: {trajectory}",
Description = description,
ScoreImpact = 0
});
}
/// <summary>
/// Highlights experience at publicly listed companies (PLCs).
/// </summary>
private static void AddPLCExperienceFlag(List<CompanyVerificationResult> verifications, List<FlagResult> flags)
{
var plcRoles = verifications
.Where(v => v.IsVerified)
.Where(v => !string.IsNullOrWhiteSpace(v.CompanyType) &&
(v.CompanyType.ToLowerInvariant().Contains("plc") ||
v.CompanyType.ToLowerInvariant().Contains("public-limited")))
.ToList();
if (plcRoles.Count > 0)
{
var companies = string.Join(", ", plcRoles.Select(v => v.MatchedCompanyName).Distinct().Take(4));
var moreText = plcRoles.Select(v => v.MatchedCompanyName).Distinct().Count() > 4 ? " and others" : "";
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Public Company Experience",
Description = $"{plcRoles.Count} role(s) at PLC/publicly listed companies: {companies}{moreText}",
ScoreImpact = 0
});
}
}
/// <summary>
/// Positively highlights when director claims are verified by Companies House.
/// </summary>
private static void AddVerifiedDirectorFlag(List<CompanyVerificationResult> verifications, List<FlagResult> flags)
{
// Look for director roles that DON'T have an UnverifiedDirectorClaim flag
// (meaning they were either verified or not checked)
var directorRoles = verifications
.Where(v => v.IsVerified && !string.IsNullOrWhiteSpace(v.ClaimedJobTitle))
.Where(v =>
{
var title = v.ClaimedJobTitle!.ToLowerInvariant();
return title.Contains("director") || title.Contains("company secretary") ||
title == "md" || title.Contains("managing director");
})
.Where(v => !v.Flags.Any(f => f.Type == "UnverifiedDirectorClaim"))
.ToList();
// Only flag if we have verified directors (i.e., they were checked and confirmed)
// We can't distinguish "verified" from "not checked" without more context
// So we'll be conservative and only mention if there are director roles without red flags
if (directorRoles.Count > 0)
{
var companies = string.Join(", ", directorRoles.Select(v => v.MatchedCompanyName).Distinct().Take(3));
flags.Add(new FlagResult
{
Category = FlagCategory.Employment.ToString(),
Severity = FlagSeverity.Info.ToString(),
Title = "Director Experience",
Description = $"Director/senior officer role(s) at: {companies}",
ScoreImpact = 0
});
}
}
#endregion
private static int GetSeniorityLevel(string? jobTitle)
{
if (string.IsNullOrWhiteSpace(jobTitle)) return 0;
var title = jobTitle.ToLowerInvariant();
// Level 6: C-suite
if (title.Contains("ceo") || title.Contains("cto") || title.Contains("cfo") ||
title.Contains("coo") || title.Contains("cio") || title.Contains("chief") ||
title.Contains("managing director") || title == "md" ||
title.Contains("president") || title.Contains("chairman") ||
title.Contains("chairwoman") || title.Contains("chairperson"))
{
return 6;
}
// Level 5: VP / Executive
if (title.Contains("vice president") || title.Contains("vp ") ||
title.StartsWith("vp") || title.Contains("svp") ||
title.Contains("executive director") || title.Contains("executive vice"))
{
return 5;
}
// Level 4: Director / Head
if (title.Contains("director") || title.Contains("head of"))
{
return 4;
}
// Level 3: Senior / Lead / Principal / Manager
if (title.Contains("senior") || title.Contains("lead") ||
title.Contains("principal") || title.Contains("manager") ||
title.Contains("team lead") || title.Contains("staff"))
{
return 3;
}
// Level 2: Mid-level (no junior, no senior)
if (!title.Contains("junior") && !title.Contains("trainee") &&
!title.Contains("intern") && !title.Contains("graduate") &&
!title.Contains("entry") && !title.Contains("assistant"))
{
return 2;
}
// Level 1: Junior / Entry-level
return 1;
}
/// <summary>
/// Comparer for deduplicating company flags by (company name, flag type).
/// Uses case-insensitive comparison for company names.
/// </summary>
private sealed class CompanyFlagComparer : IEqualityComparer<(string Company, string FlagType)>
{
public bool Equals((string Company, string FlagType) x, (string Company, string FlagType) y)
{
return string.Equals(x.Company, y.Company, StringComparison.OrdinalIgnoreCase) &&
string.Equals(x.FlagType, y.FlagType, StringComparison.OrdinalIgnoreCase);
}
public int GetHashCode((string Company, string FlagType) obj)
{
return HashCode.Combine(
obj.Company?.ToUpperInvariant() ?? "",
obj.FlagType?.ToUpperInvariant() ?? "");
}
}
}