Add AI-powered company name matching using Claude API
Replace fuzzy string matching with semantic AI matching to fix false positives where similar-sounding but different companies were matched (e.g., "Families First CiC" incorrectly matching "FAMILIES AGAINST CONFORMITY LTD"). Changes: - Add ICompanyNameMatcherService interface and AICompanyNameMatcherService implementation using Claude Sonnet 4 for semantic company name comparison - Add SemanticMatchResult and related models for AI match results - Update CompanyVerifierService to use AI matching with fuzzy fallback - Add detection for public sector employers, charities, and self-employed entries that cannot be verified via Companies House - Update tests to work with new AI matcher integration Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -98,9 +98,11 @@ public sealed class ProcessCVCheckJob
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// Step 5: Verify each employment entry (parallelized with rate limiting)
|
||||
// Skip freelance entries as they cannot be verified against company registries
|
||||
// Skip freelance, public sector, and charity entries as they cannot be verified against Companies House
|
||||
var verificationTasks = cvData.Employment
|
||||
.Where(e => !IsFreelance(e.CompanyName))
|
||||
.Where(e => !IsFreelance(e.CompanyName) &&
|
||||
!IsPublicSectorEmployer(e.CompanyName) &&
|
||||
!IsCharityOrVoluntary(e.CompanyName))
|
||||
.Select(async employment =>
|
||||
{
|
||||
var result = await _companyVerifierService.VerifyCompanyAsync(
|
||||
@@ -135,6 +137,38 @@ public sealed class ProcessCVCheckJob
|
||||
_logger.LogDebug("Skipped verification for freelance entry: {Company}", employment.CompanyName);
|
||||
}
|
||||
|
||||
// Add public sector employers as auto-verified (not in Companies House)
|
||||
foreach (var employment in cvData.Employment.Where(e => IsPublicSectorEmployer(e.CompanyName)))
|
||||
{
|
||||
verificationResults.Add(new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = employment.CompanyName,
|
||||
IsVerified = true,
|
||||
MatchScore = 100,
|
||||
VerificationNotes = "Public sector employer - not registered at Companies House",
|
||||
ClaimedJobTitle = employment.JobTitle,
|
||||
JobTitlePlausible = true
|
||||
});
|
||||
|
||||
_logger.LogDebug("Skipped verification for public sector employer: {Company}", employment.CompanyName);
|
||||
}
|
||||
|
||||
// Add charities/voluntary organisations as auto-verified (registered with Charity Commission, not Companies House)
|
||||
foreach (var employment in cvData.Employment.Where(e => IsCharityOrVoluntary(e.CompanyName)))
|
||||
{
|
||||
verificationResults.Add(new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = employment.CompanyName,
|
||||
IsVerified = true,
|
||||
MatchScore = 100,
|
||||
VerificationNotes = "Charity/voluntary organisation - registered with Charity Commission",
|
||||
ClaimedJobTitle = employment.JobTitle,
|
||||
JobTitlePlausible = true
|
||||
});
|
||||
|
||||
_logger.LogDebug("Skipped verification for charity/voluntary organisation: {Company}", employment.CompanyName);
|
||||
}
|
||||
|
||||
// Step 5b: Verify director claims against Companies House officers
|
||||
cvCheck.ProcessingStage = "Verifying Directors";
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
@@ -486,10 +520,132 @@ public sealed class ProcessCVCheckJob
|
||||
name == "freelancer" ||
|
||||
name == "self-employed" ||
|
||||
name == "self employed" ||
|
||||
name == "selfemployed" ||
|
||||
name == "contractor" ||
|
||||
name.StartsWith("freelance ") ||
|
||||
name.StartsWith("self-employed ") ||
|
||||
name.StartsWith("self employed ") ||
|
||||
name.Contains("(freelance)") ||
|
||||
name.Contains("(self-employed)");
|
||||
name.Contains("(self-employed)") ||
|
||||
name.Contains("(self employed)") ||
|
||||
name.Contains("(contractor)");
|
||||
}
|
||||
|
||||
private static bool IsPublicSectorEmployer(string companyName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(companyName)) return false;
|
||||
|
||||
var name = companyName.Trim().ToLowerInvariant();
|
||||
|
||||
// Local authorities and councils
|
||||
if (name.Contains(" mbc") || // Metropolitan Borough Council
|
||||
name.Contains(" bc") || // Borough Council
|
||||
name.Contains(" cc") || // County Council
|
||||
name.Contains(" dc") || // District Council
|
||||
name.EndsWith(" council") ||
|
||||
name.Contains(" council ") ||
|
||||
name.Contains("borough council") ||
|
||||
name.Contains("county council") ||
|
||||
name.Contains("district council") ||
|
||||
name.Contains("city council") ||
|
||||
name.Contains("town council") ||
|
||||
name.Contains("parish council") ||
|
||||
name.Contains("metropolitan") ||
|
||||
name.Contains("local authority"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// NHS and health
|
||||
if (name.Contains("nhs") ||
|
||||
name.Contains("national health service") ||
|
||||
name.Contains("health trust") ||
|
||||
name.Contains("hospital trust") ||
|
||||
name.Contains("clinical commissioning") ||
|
||||
name.Contains("primary care trust") ||
|
||||
name.Contains("ambulance service") ||
|
||||
name.Contains("ambulance trust"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Government departments and agencies
|
||||
if (name.StartsWith("hm ") || // HM Revenue, HM Treasury, etc.
|
||||
name.StartsWith("ministry of") ||
|
||||
name.StartsWith("department of") ||
|
||||
name.StartsWith("department for") ||
|
||||
name.Contains("civil service") ||
|
||||
name.Contains("home office") ||
|
||||
name.Contains("cabinet office") ||
|
||||
name.Contains("foreign office"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Emergency services
|
||||
if (name.Contains("police") ||
|
||||
name.Contains("fire service") ||
|
||||
name.Contains("fire brigade") ||
|
||||
name.Contains("fire and rescue"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Education (state sector)
|
||||
if (name.Contains("academy trust") ||
|
||||
name.Contains("multi academy") ||
|
||||
name.Contains("education authority") ||
|
||||
name.Contains("lea "))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsCharityOrVoluntary(string companyName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(companyName)) return false;
|
||||
|
||||
var name = companyName.Trim().ToLowerInvariant();
|
||||
|
||||
// Well-known charities/voluntary organisations
|
||||
var knownCharities = new[]
|
||||
{
|
||||
"girlguiding", "girl guiding", "girl guides",
|
||||
"scouts", "scout association",
|
||||
"red cross", "british red cross",
|
||||
"st john ambulance", "st johns ambulance",
|
||||
"rotary", "lions club",
|
||||
"citizens advice", "cab ",
|
||||
"oxfam", "save the children", "barnardos", "barnardo's",
|
||||
"nspcc", "rspca", "rspb", "rnli",
|
||||
"macmillan", "marie curie", "cancer research",
|
||||
"british heart foundation", "bhf",
|
||||
"age uk", "age concern",
|
||||
"mind ", "samaritans",
|
||||
"national trust", "english heritage",
|
||||
"ymca", "ywca"
|
||||
};
|
||||
|
||||
if (knownCharities.Any(c => name.Contains(c)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Generic charity indicators
|
||||
if (name.Contains("charity") ||
|
||||
name.Contains("charitable") ||
|
||||
name.Contains("foundation") ||
|
||||
name.Contains("trust ") ||
|
||||
name.EndsWith(" trust") ||
|
||||
name.Contains("volunteer") ||
|
||||
name.Contains("voluntary"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private async Task VerifyDirectorClaims(
|
||||
|
||||
Reference in New Issue
Block a user