feat: Reduce false positives in company verification

Major improvements to company name matching accuracy:

- Add well-known brands dictionary with correct Companies House numbers
  for fast-track verification (Boots, Legal & General, EY, etc.)
- Add safe expansion words (UK, LIMITED, GROUP, PLC) that don't change
  company identity
- Fix core word validation to require original company's core words
- Remove overly aggressive skip words that removed meaningful identifiers
  (industries, technology, consulting, services, etc.)
- Add industry context hints for AI matching
- Fix CVBatchTester JSON deserialization for test files

Before: 98% verified but with false positives like:
- Boots → BOOTS AND BEARDS (wrong)
- Legal & General → LEGAL LIMITED (wrong)

After: 97% verified with correct matches:
- Boots → BOOTS UK LIMITED (correct)
- Legal & General → fast-tracked to correct company

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-22 19:01:11 +00:00
parent 94ca6e1b9a
commit 3d666d5f9c
7 changed files with 814 additions and 114 deletions

View File

@@ -53,22 +53,12 @@ public class CVBatchTester
options.UseSqlServer(connectionString));
// Companies House
services.Configure<CompaniesHouseSettings>(options =>
{
options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk";
options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? "";
});
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
services.AddHttpClient<CompaniesHouseClient>();
// Anthropic (for AI matching)
services.Configure<AnthropicSettings>(options =>
{
options.ApiKey = configuration["Anthropic:ApiKey"] ?? "";
});
services.AddHttpClient<AnthropicClient>();
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
// Services
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
@@ -142,7 +132,7 @@ public class CVBatchTester
var summary = new CVVerificationSummary
{
FileName = Path.GetFileName(filePath),
CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown"
CandidateName = parsedCV.FullName ?? "Unknown"
};
// Verify employers