feat: Reduce false positives in company verification

Major improvements to company name matching accuracy:

- Add well-known brands dictionary with correct Companies House numbers
  for fast-track verification (Boots, Legal & General, EY, etc.)
- Add safe expansion words (UK, LIMITED, GROUP, PLC) that don't change
  company identity
- Fix core word validation to require original company's core words
- Remove overly aggressive skip words that removed meaningful identifiers
  (industries, technology, consulting, services, etc.)
- Add industry context hints for AI matching
- Fix CVBatchTester JSON deserialization for test files

Before: 98% verified but with false positives like:
- Boots → BOOTS AND BEARDS (wrong)
- Legal & General → LEGAL LIMITED (wrong)

After: 97% verified with correct matches:
- Boots → BOOTS UK LIMITED (correct)
- Legal & General → fast-tracked to correct company

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-22 19:01:11 +00:00
parent 94ca6e1b9a
commit 3d666d5f9c
7 changed files with 814 additions and 114 deletions

View File

@@ -53,22 +53,12 @@ public class CVBatchTester
options.UseSqlServer(connectionString));
// Companies House
services.Configure<CompaniesHouseSettings>(options =>
{
options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk";
options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? "";
});
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
services.AddHttpClient<CompaniesHouseClient>();
// Anthropic (for AI matching)
services.Configure<AnthropicSettings>(options =>
{
options.ApiKey = configuration["Anthropic:ApiKey"] ?? "";
});
services.AddHttpClient<AnthropicClient>();
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
// Services
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
@@ -142,7 +132,7 @@ public class CVBatchTester
var summary = new CVVerificationSummary
{
FileName = Path.GetFileName(filePath),
CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown"
CandidateName = parsedCV.FullName ?? "Unknown"
};
// Verify employers

View File

@@ -76,8 +76,9 @@ public class CompanyVerifierServiceTests : IDisposable
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
It.IsAny<string>(),
It.IsAny<List<CompanyCandidate>>(),
It.IsAny<string?>(),
It.IsAny<CancellationToken>()))
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, string? industryHint, CancellationToken _) =>
{
// Find exact or close match in candidates
var exactMatch = candidates.FirstOrDefault(c =>

View File

@@ -51,7 +51,7 @@ public sealed class EducationVerifierServiceTests
var result = _sut.Verify(education);
// Assert
result.VerificationNotes.Should().Contain("diploma mill blacklist");
result.VerificationNotes.Should().Contain("not found in accredited institutions");
}
#endregion