Add AI-powered company name matching using Claude API
Replace fuzzy string matching with semantic AI matching to fix false positives where similar-sounding but different companies were matched (e.g., "Families First CiC" incorrectly matching "FAMILIES AGAINST CONFORMITY LTD"). Changes: - Add ICompanyNameMatcherService interface and AICompanyNameMatcherService implementation using Claude Sonnet 4 for semantic company name comparison - Add SemanticMatchResult and related models for AI match results - Update CompanyVerifierService to use AI matching with fuzzy fallback - Add detection for public sector employers, charities, and self-employed entries that cannot be verified via Companies House - Update tests to work with new AI matcher integration Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
{
|
||||
private readonly CompaniesHouseClient _companiesHouseClient;
|
||||
private readonly IDbContextFactory<ApplicationDbContext> _dbContextFactory;
|
||||
private readonly ICompanyNameMatcherService _aiMatcher;
|
||||
private readonly ILogger<CompanyVerifierService> _logger;
|
||||
|
||||
private const int FuzzyMatchThreshold = 85;
|
||||
@@ -75,10 +76,12 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
public CompanyVerifierService(
|
||||
CompaniesHouseClient companiesHouseClient,
|
||||
IDbContextFactory<ApplicationDbContext> dbContextFactory,
|
||||
ICompanyNameMatcherService aiMatcher,
|
||||
ILogger<CompanyVerifierService> logger)
|
||||
{
|
||||
_companiesHouseClient = companiesHouseClient;
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_aiMatcher = aiMatcher;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
@@ -119,7 +122,10 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var searchQueries = GenerateSearchQueries(companyName);
|
||||
_logger.LogDebug("Generated {Count} search queries for '{CompanyName}': {Queries}",
|
||||
searchQueries.Count, companyName, string.Join(", ", searchQueries.Select(q => $"'{q}'")));
|
||||
(CompaniesHouseSearchItem Item, int Score)? bestMatch = null;
|
||||
|
||||
// Collect all candidates from all search queries for AI matching
|
||||
var allCandidates = new Dictionary<string, CompaniesHouseSearchItem>();
|
||||
var fuzzyMatches = new List<(CompaniesHouseSearchItem Item, int Score)>();
|
||||
|
||||
foreach (var query in searchQueries)
|
||||
{
|
||||
@@ -131,25 +137,91 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find best fuzzy match, preferring companies that existed at claimed start date
|
||||
// Pass both original name and search query for matching flexibility
|
||||
bestMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate);
|
||||
|
||||
if (bestMatch is not null)
|
||||
// Collect unique candidates
|
||||
foreach (var item in searchResponse.Items)
|
||||
{
|
||||
_logger.LogDebug("Found match with query '{Query}': {Company}", query, bestMatch.Value.Item.Title);
|
||||
break;
|
||||
if (!string.IsNullOrWhiteSpace(item.CompanyNumber) &&
|
||||
!allCandidates.ContainsKey(item.CompanyNumber))
|
||||
{
|
||||
allCandidates[item.CompanyNumber] = item;
|
||||
}
|
||||
}
|
||||
|
||||
// Find fuzzy matches (as before) for fallback
|
||||
var fuzzyMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate);
|
||||
if (fuzzyMatch is not null)
|
||||
{
|
||||
fuzzyMatches.Add(fuzzyMatch.Value);
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch is null)
|
||||
if (allCandidates.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No valid match found for: {CompanyName} after trying {Count} queries", companyName, searchQueries.Count);
|
||||
_logger.LogDebug("No candidates found for: {CompanyName} after trying {Count} queries", companyName, searchQueries.Count);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified against official records");
|
||||
}
|
||||
|
||||
var match = bestMatch.Value;
|
||||
// Use AI to find the best semantic match from all candidates
|
||||
_logger.LogDebug("Using AI to match '{CompanyName}' against {Count} candidates", companyName, allCandidates.Count);
|
||||
|
||||
var candidatesForAI = allCandidates.Values
|
||||
.Take(10) // Limit to top 10 candidates to reduce AI cost
|
||||
.Select(c => new CompanyCandidate
|
||||
{
|
||||
CompanyName = c.Title,
|
||||
CompanyNumber = c.CompanyNumber,
|
||||
CompanyStatus = c.CompanyStatus,
|
||||
DateOfCreation = c.DateOfCreation
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var aiResult = await _aiMatcher.FindBestMatchAsync(companyName, candidatesForAI);
|
||||
|
||||
CompaniesHouseSearchItem? matchedItem = null;
|
||||
int matchScore;
|
||||
|
||||
if (aiResult is not null && aiResult.IsMatch)
|
||||
{
|
||||
// AI found a valid match
|
||||
matchedItem = allCandidates.GetValueOrDefault(aiResult.CandidateCompanyNumber);
|
||||
matchScore = aiResult.ConfidenceScore;
|
||||
_logger.LogInformation(
|
||||
"AI matched '{ClaimedName}' to '{MatchedName}' with {Score}% confidence. Reasoning: {Reasoning}",
|
||||
companyName, aiResult.CandidateCompanyName, aiResult.ConfidenceScore, aiResult.Reasoning);
|
||||
}
|
||||
else if (fuzzyMatches.Count > 0)
|
||||
{
|
||||
// AI didn't find a match - check if it explicitly rejected or just failed
|
||||
if (aiResult?.MatchType == "NoMatch")
|
||||
{
|
||||
_logger.LogDebug("AI explicitly rejected all candidates for '{CompanyName}'. Reasoning: {Reasoning}",
|
||||
companyName, aiResult?.Reasoning ?? "No match found");
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified - no matching company found in official records");
|
||||
}
|
||||
|
||||
// AI failed (API error, etc.) - fall back to fuzzy matching
|
||||
_logger.LogWarning("AI matching failed for '{CompanyName}', falling back to fuzzy matching", companyName);
|
||||
var bestFuzzy = fuzzyMatches.OrderByDescending(m => m.Score).First();
|
||||
matchedItem = bestFuzzy.Item;
|
||||
matchScore = bestFuzzy.Score;
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug("No valid match found for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified against official records");
|
||||
}
|
||||
|
||||
if (matchedItem is null)
|
||||
{
|
||||
_logger.LogDebug("No valid match found for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified against official records");
|
||||
}
|
||||
|
||||
var match = (Item: matchedItem, Score: matchScore);
|
||||
|
||||
// Fetch full company details for additional data
|
||||
var companyDetails = await _companiesHouseClient.GetCompanyAsync(match.Item.CompanyNumber);
|
||||
@@ -607,6 +679,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
{
|
||||
var itemTitle = item.Title.ToUpperInvariant();
|
||||
var itemTitleLower = item.Title.ToLowerInvariant();
|
||||
var itemCoreWords = ExtractCoreIdentifiers(item.Title);
|
||||
|
||||
// Validate that ALL core identifiers appear in the match
|
||||
// "Lloyds Bowmaker" must have BOTH "LLOYDS" and "BOWMAKER" in the match
|
||||
@@ -614,6 +687,19 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var hasAllQueryCores = queryCoreWords.Count == 0 || queryCoreWords.All(w => itemTitle.Contains(w));
|
||||
if (!hasAllOriginalCores && !hasAllQueryCores) return false;
|
||||
|
||||
// Additional check: ensure the match doesn't have too many EXTRA core words
|
||||
// "Families First" should NOT match "Families Against Conformity" because
|
||||
// "Against" and "Conformity" are extra significant words
|
||||
if (coreWords.Count > 0 && hasAllOriginalCores)
|
||||
{
|
||||
var extraWordsInMatch = itemCoreWords.Count(w => !coreWords.Contains(w));
|
||||
// If the match has more than 1 extra core word, it's likely a different company
|
||||
if (extraWordsInMatch > 1 && itemCoreWords.Count > coreWords.Count + 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out non-employment entities unless explicitly searching for that type
|
||||
if (!IsValidEmploymentEntity(itemTitleLower, searchEntityTypes))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user