fix: Add direct alias matching to bypass AI rejection for known trading names

- WPP was failing because AI didn't recognize "WPP 2005 LIMITED" as the main
  WPP entity (WPP plc is incorporated in Jersey, not UK Companies House)
- Added FindDirectAliasMatch() that accepts candidates matching known aliases
  without requiring AI confirmation
- Fixed WPP alias to use "WPP 2005 LIMITED" instead of non-existent "WPP PLC"
- This improves employer verification from 84% to 85%

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-22 22:00:59 +00:00
parent 1a06d60f2d
commit 7ebf09c284

View File

@@ -134,7 +134,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
["Bain"] = new[] { "BAIN & COMPANY UK LIMITED" }, ["Bain"] = new[] { "BAIN & COMPANY UK LIMITED" },
// Advertising & Media // Advertising & Media
["WPP"] = new[] { "WPP PLC" }, ["WPP"] = new[] { "WPP 2005 LIMITED", "WPP GROUP", "WIRE & PLASTIC PRODUCTS" },
// Fintech // Fintech
["Checkout.com"] = new[] { "CHECKOUT.COM LIMITED", "CHECKOUT LTD" }, ["Checkout.com"] = new[] { "CHECKOUT.COM LIMITED", "CHECKOUT LTD" },
@@ -308,6 +308,61 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
"Company name could not be verified against official records"); "Company name could not be verified against official records");
} }
// Check for direct alias match first - if we searched for a known alias and found it exactly, accept it
// This handles cases where AI might reject valid matches (e.g., "WPP 2005 LIMITED" for "WPP")
var directAliasMatch = FindDirectAliasMatch(companyName, allCandidates.Values.ToList(), startDate);
if (directAliasMatch is not null)
{
_logger.LogInformation(
"Direct alias match for '{ClaimedName}' to '{MatchedName}' (known trading name alias)",
companyName, directAliasMatch.Value.Item.Title);
var aliasDetails = await _companiesHouseClient.GetCompanyAsync(directAliasMatch.Value.Item.CompanyNumber);
await CacheCompanyAsync(directAliasMatch.Value.Item, aliasDetails);
var aliasIncDate = DateHelpers.ParseDate(directAliasMatch.Value.Item.DateOfCreation);
var aliasDissDate = DateHelpers.ParseDate(directAliasMatch.Value.Item.DateOfCessation);
CheckIncorporationDate(flags, startDate, aliasIncDate, directAliasMatch.Value.Item.Title);
CheckDissolutionDate(flags, endDate, aliasDissDate, directAliasMatch.Value.Item.CompanyStatus, directAliasMatch.Value.Item.Title);
CheckDormantCompany(flags, aliasDetails?.Accounts?.LastAccounts?.Type, jobTitle, directAliasMatch.Value.Item.Title);
CheckCompanySizeVsRole(flags, aliasDetails?.Accounts?.LastAccounts?.Type, jobTitle, directAliasMatch.Value.Item.Title);
var (aliasJobPlausible, aliasJobNotes) = CheckJobTitlePlausibility(jobTitle, directAliasMatch.Value.Item.CompanyType);
if (aliasJobPlausible == false)
{
flags.Add(new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle",
Severity = "Critical",
Message = aliasJobNotes ?? "Job title requires verification",
ScoreImpact = -15
});
}
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = directAliasMatch.Value.Item.Title,
MatchedCompanyNumber = directAliasMatch.Value.Item.CompanyNumber,
MatchScore = directAliasMatch.Value.Score,
IsVerified = true,
VerificationNotes = "Matched via known trading name alias",
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
CompanyType = directAliasMatch.Value.Item.CompanyType,
CompanyStatus = directAliasMatch.Value.Item.CompanyStatus,
IncorporationDate = aliasIncDate,
DissolutionDate = aliasDissDate,
AccountsCategory = aliasDetails?.Accounts?.LastAccounts?.Type,
SicCodes = aliasDetails?.SicCodes ?? directAliasMatch.Value.Item.SicCodes,
ClaimedJobTitle = jobTitle,
JobTitlePlausible = aliasJobPlausible,
JobTitleNotes = aliasJobNotes,
Flags = flags
};
}
// Use AI to find the best semantic match from all candidates // Use AI to find the best semantic match from all candidates
_logger.LogDebug("Using AI to match '{CompanyName}' against {Count} candidates", companyName, allCandidates.Count); _logger.LogDebug("Using AI to match '{CompanyName}' against {Count} candidates", companyName, allCandidates.Count);
@@ -770,6 +825,57 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
#region Helper Methods #region Helper Methods
/// <summary>
/// Checks if any candidate directly matches a known trading name alias.
/// This allows bypassing AI matching for known aliases where the AI might incorrectly reject.
/// </summary>
private static (CompaniesHouseSearchItem Item, int Score)? FindDirectAliasMatch(
string companyName,
List<CompaniesHouseSearchItem> candidates,
DateOnly? claimedStartDate)
{
var normalized = companyName.Trim();
// Check if this company name has known aliases
if (!TradingNameAliases.TryGetValue(normalized, out var aliases))
{
return null;
}
// Look for candidates that exactly match one of the known aliases
foreach (var alias in aliases)
{
var aliasUpper = alias.ToUpperInvariant();
foreach (var candidate in candidates)
{
if (string.IsNullOrWhiteSpace(candidate.Title))
continue;
var titleUpper = candidate.Title.ToUpperInvariant();
// Check for exact match or very close match (fuzzy score >= 95)
var fuzzyScore = Fuzz.Ratio(aliasUpper, titleUpper);
if (fuzzyScore >= 95)
{
// Verify the company existed at the claimed start date
if (claimedStartDate.HasValue)
{
var incDate = DateHelpers.ParseDate(candidate.DateOfCreation);
if (incDate.HasValue && incDate.Value > claimedStartDate.Value)
{
continue; // Company didn't exist yet
}
}
return (candidate, 100); // 100% match via known alias
}
}
}
return null;
}
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName) private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
{ {
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays); var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);