Add AI-powered company name matching using Claude API

Replace fuzzy string matching with semantic AI matching to fix false
positives where similar-sounding but different companies were matched
(e.g., "Families First CiC" incorrectly matching "FAMILIES AGAINST
CONFORMITY LTD").

Changes:
- Add ICompanyNameMatcherService interface and AICompanyNameMatcherService
  implementation using Claude Sonnet 4 for semantic company name comparison
- Add SemanticMatchResult and related models for AI match results
- Update CompanyVerifierService to use AI matching with fuzzy fallback
- Add detection for public sector employers, charities, and self-employed
  entries that cannot be verified via Companies House
- Update tests to work with new AI matcher integration

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-21 00:51:24 +01:00
parent 030ede9e77
commit d047de1c84
7 changed files with 586 additions and 28 deletions

View File

@@ -98,9 +98,11 @@ public sealed class ProcessCVCheckJob
await _dbContext.SaveChangesAsync(cancellationToken);
// Step 5: Verify each employment entry (parallelized with rate limiting)
// Skip freelance entries as they cannot be verified against company registries
// Skip freelance, public sector, and charity entries as they cannot be verified against Companies House
var verificationTasks = cvData.Employment
.Where(e => !IsFreelance(e.CompanyName))
.Where(e => !IsFreelance(e.CompanyName) &&
!IsPublicSectorEmployer(e.CompanyName) &&
!IsCharityOrVoluntary(e.CompanyName))
.Select(async employment =>
{
var result = await _companyVerifierService.VerifyCompanyAsync(
@@ -135,6 +137,38 @@ public sealed class ProcessCVCheckJob
_logger.LogDebug("Skipped verification for freelance entry: {Company}", employment.CompanyName);
}
// Add public sector employers as auto-verified (not in Companies House)
foreach (var employment in cvData.Employment.Where(e => IsPublicSectorEmployer(e.CompanyName)))
{
verificationResults.Add(new CompanyVerificationResult
{
ClaimedCompany = employment.CompanyName,
IsVerified = true,
MatchScore = 100,
VerificationNotes = "Public sector employer - not registered at Companies House",
ClaimedJobTitle = employment.JobTitle,
JobTitlePlausible = true
});
_logger.LogDebug("Skipped verification for public sector employer: {Company}", employment.CompanyName);
}
// Add charities/voluntary organisations as auto-verified (registered with Charity Commission, not Companies House)
foreach (var employment in cvData.Employment.Where(e => IsCharityOrVoluntary(e.CompanyName)))
{
verificationResults.Add(new CompanyVerificationResult
{
ClaimedCompany = employment.CompanyName,
IsVerified = true,
MatchScore = 100,
VerificationNotes = "Charity/voluntary organisation - registered with Charity Commission",
ClaimedJobTitle = employment.JobTitle,
JobTitlePlausible = true
});
_logger.LogDebug("Skipped verification for charity/voluntary organisation: {Company}", employment.CompanyName);
}
// Step 5b: Verify director claims against Companies House officers
cvCheck.ProcessingStage = "Verifying Directors";
await _dbContext.SaveChangesAsync(cancellationToken);
@@ -486,10 +520,132 @@ public sealed class ProcessCVCheckJob
name == "freelancer" ||
name == "self-employed" ||
name == "self employed" ||
name == "selfemployed" ||
name == "contractor" ||
name.StartsWith("freelance ") ||
name.StartsWith("self-employed ") ||
name.StartsWith("self employed ") ||
name.Contains("(freelance)") ||
name.Contains("(self-employed)");
name.Contains("(self-employed)") ||
name.Contains("(self employed)") ||
name.Contains("(contractor)");
}
private static bool IsPublicSectorEmployer(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return false;
var name = companyName.Trim().ToLowerInvariant();
// Local authorities and councils
if (name.Contains(" mbc") || // Metropolitan Borough Council
name.Contains(" bc") || // Borough Council
name.Contains(" cc") || // County Council
name.Contains(" dc") || // District Council
name.EndsWith(" council") ||
name.Contains(" council ") ||
name.Contains("borough council") ||
name.Contains("county council") ||
name.Contains("district council") ||
name.Contains("city council") ||
name.Contains("town council") ||
name.Contains("parish council") ||
name.Contains("metropolitan") ||
name.Contains("local authority"))
{
return true;
}
// NHS and health
if (name.Contains("nhs") ||
name.Contains("national health service") ||
name.Contains("health trust") ||
name.Contains("hospital trust") ||
name.Contains("clinical commissioning") ||
name.Contains("primary care trust") ||
name.Contains("ambulance service") ||
name.Contains("ambulance trust"))
{
return true;
}
// Government departments and agencies
if (name.StartsWith("hm ") || // HM Revenue, HM Treasury, etc.
name.StartsWith("ministry of") ||
name.StartsWith("department of") ||
name.StartsWith("department for") ||
name.Contains("civil service") ||
name.Contains("home office") ||
name.Contains("cabinet office") ||
name.Contains("foreign office"))
{
return true;
}
// Emergency services
if (name.Contains("police") ||
name.Contains("fire service") ||
name.Contains("fire brigade") ||
name.Contains("fire and rescue"))
{
return true;
}
// Education (state sector)
if (name.Contains("academy trust") ||
name.Contains("multi academy") ||
name.Contains("education authority") ||
name.Contains("lea "))
{
return true;
}
return false;
}
private static bool IsCharityOrVoluntary(string companyName)
{
if (string.IsNullOrWhiteSpace(companyName)) return false;
var name = companyName.Trim().ToLowerInvariant();
// Well-known charities/voluntary organisations
var knownCharities = new[]
{
"girlguiding", "girl guiding", "girl guides",
"scouts", "scout association",
"red cross", "british red cross",
"st john ambulance", "st johns ambulance",
"rotary", "lions club",
"citizens advice", "cab ",
"oxfam", "save the children", "barnardos", "barnardo's",
"nspcc", "rspca", "rspb", "rnli",
"macmillan", "marie curie", "cancer research",
"british heart foundation", "bhf",
"age uk", "age concern",
"mind ", "samaritans",
"national trust", "english heritage",
"ymca", "ywca"
};
if (knownCharities.Any(c => name.Contains(c)))
{
return true;
}
// Generic charity indicators
if (name.Contains("charity") ||
name.Contains("charitable") ||
name.Contains("foundation") ||
name.Contains("trust ") ||
name.EndsWith(" trust") ||
name.Contains("volunteer") ||
name.Contains("voluntary"))
{
return true;
}
return false;
}
private async Task VerifyDirectorClaims(