feat: Reduce false positives in company verification
Major improvements to company name matching accuracy: - Add well-known brands dictionary with correct Companies House numbers for fast-track verification (Boots, Legal & General, EY, etc.) - Add safe expansion words (UK, LIMITED, GROUP, PLC) that don't change company identity - Fix core word validation to require original company's core words - Remove overly aggressive skip words that removed meaningful identifiers (industries, technology, consulting, services, etc.) - Add industry context hints for AI matching - Fix CVBatchTester JSON deserialization for test files Before: 98% verified but with false positives like: - Boots → BOOTS AND BEARDS (wrong) - Legal & General → LEGAL LIMITED (wrong) After: 97% verified with correct matches: - Boots → BOOTS UK LIMITED (correct) - Legal & General → fast-tracked to correct company 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -53,22 +53,12 @@ public class CVBatchTester
|
||||
options.UseSqlServer(connectionString));
|
||||
|
||||
// Companies House
|
||||
services.Configure<CompaniesHouseSettings>(options =>
|
||||
{
|
||||
options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk";
|
||||
options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? "";
|
||||
});
|
||||
|
||||
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
|
||||
services.AddHttpClient<CompaniesHouseClient>();
|
||||
|
||||
// Anthropic (for AI matching)
|
||||
services.Configure<AnthropicSettings>(options =>
|
||||
{
|
||||
options.ApiKey = configuration["Anthropic:ApiKey"] ?? "";
|
||||
});
|
||||
|
||||
services.AddHttpClient<AnthropicClient>();
|
||||
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
|
||||
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
|
||||
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
||||
|
||||
// Services
|
||||
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||
@@ -142,7 +132,7 @@ public class CVBatchTester
|
||||
var summary = new CVVerificationSummary
|
||||
{
|
||||
FileName = Path.GetFileName(filePath),
|
||||
CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown"
|
||||
CandidateName = parsedCV.FullName ?? "Unknown"
|
||||
};
|
||||
|
||||
// Verify employers
|
||||
|
||||
@@ -76,8 +76,9 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
|
||||
It.IsAny<string>(),
|
||||
It.IsAny<List<CompanyCandidate>>(),
|
||||
It.IsAny<string?>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
|
||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, string? industryHint, CancellationToken _) =>
|
||||
{
|
||||
// Find exact or close match in candidates
|
||||
var exactMatch = candidates.FirstOrDefault(c =>
|
||||
|
||||
@@ -51,7 +51,7 @@ public sealed class EducationVerifierServiceTests
|
||||
var result = _sut.Verify(education);
|
||||
|
||||
// Assert
|
||||
result.VerificationNotes.Should().Contain("diploma mill blacklist");
|
||||
result.VerificationNotes.Should().Contain("not found in accredited institutions");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
Reference in New Issue
Block a user