Improve company matching and move points to employment table

Company Matching:
- Extract parent company from "Brand (Parent Company)" format
- Handle slash-separated names like "ASDA/WALMART"
- Match against both original name and search query for flexibility
- Add PLC/Plc case variations

Report UI:
- Remove separate Score Breakdown section
- Add Points column to Employment Verification table
- Calculate points per company from matching flags

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 21:49:26 +01:00
parent f4890b3049
commit 5d2ec4b98e
2 changed files with 132 additions and 91 deletions

View File

@@ -90,7 +90,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
}
// Find best fuzzy match, preferring companies that existed at claimed start date
bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate);
// Pass both original name and search query for matching flexibility
bestMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate);
if (bestMatch is not null)
{
@@ -586,18 +587,29 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
string companyName,
string searchQuery,
List<CompaniesHouseSearchItem> items,
DateOnly? claimedStartDate)
{
var normalizedSearch = companyName.ToUpperInvariant();
var normalizedOriginal = companyName.ToUpperInvariant();
var normalizedQuery = searchQuery.ToUpperInvariant();
// Match against both the original company name AND the search query used
// This handles cases like "Matthew Walker (Northern Foods Plc)" where we
// search for "Northern Foods Plc" but need to match against it, not the full name
var matches = items
.Where(item => !string.IsNullOrWhiteSpace(item.Title))
.Select(item => (Item: item, Score: Fuzz.TokenSetRatio(normalizedSearch, item.Title.ToUpperInvariant())))
.Select(item =>
{
var itemTitle = item.Title.ToUpperInvariant();
var scoreVsOriginal = Fuzz.TokenSetRatio(normalizedOriginal, itemTitle);
var scoreVsQuery = Fuzz.TokenSetRatio(normalizedQuery, itemTitle);
return (Item: item, Score: Math.Max(scoreVsOriginal, scoreVsQuery));
})
.Where(m => m.Score >= FuzzyMatchThreshold)
.ToList();
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}'", matches.Count, companyName);
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}' (query: '{Query}')", matches.Count, companyName, searchQuery);
foreach (var m in matches.Take(5))
{
_logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}",
@@ -785,63 +797,125 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
/// <summary>
/// Generates alternative search queries to find companies that may be registered
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
/// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent.
/// </summary>
private static List<string> GenerateSearchQueries(string companyName)
{
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { companyName };
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var normalized = companyName.Trim();
// Step 1: Generate UK/U.K. variations
var ukVariants = new List<string> { normalized };
// Step 0a: Check for "Brand (Parent Company)" format and extract parent company
// Parent company is more likely to be the registered name, so search it first
var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$");
if (parentMatch.Success)
{
var parentCompany = parentMatch.Groups[1].Value.Trim();
// Generate queries for parent company first (higher priority)
foreach (var parentQuery in GenerateNameVariations(parentCompany))
{
queries.Add(parentQuery);
}
// Also try the brand name without parenthetical
var brandName = normalized[..parentMatch.Index].Trim();
if (brandName.Length >= 3)
{
foreach (var brandQuery in GenerateNameVariations(brandName))
{
queries.Add(brandQuery);
}
}
}
if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase))
// Step 0b: Check for "Name1/Name2" format (e.g., "ASDA/WALMART")
// Try each part separately as they may be different registered names
if (normalized.Contains('/'))
{
var parts = normalized.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
foreach (var part in parts)
{
if (part.Length >= 3)
{
foreach (var partQuery in GenerateNameVariations(part))
{
queries.Add(partQuery);
}
}
}
}
// Also add variations of the full original name
foreach (var query in GenerateNameVariations(normalized))
{
queries.Add(query);
}
return queries.ToList();
}
/// <summary>
/// Generates name variations for a single company name (UK/U.K., Ltd/Limited, etc.)
/// </summary>
private static List<string> GenerateNameVariations(string name)
{
var variations = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { name };
// Step 1: Generate UK/U.K. variations
var ukVariants = new List<string> { name };
if (name.Contains(" UK", StringComparison.OrdinalIgnoreCase))
{
// Add U.K. variant
var withDots = normalized
var withDots = name
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
if (withDots != normalized)
if (withDots != name)
ukVariants.Add(withDots);
}
if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
if (name.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
{
// Add UK variant (no dots)
var withoutDots = normalized
var withoutDots = name
.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase);
if (withoutDots != normalized)
if (withoutDots != name)
ukVariants.Add(withoutDots);
}
// Step 2: For each UK variant, generate suffix variations (Ltd/Limited)
foreach (var variant in ukVariants)
{
queries.Add(variant);
variations.Add(variant);
// Try Ltd -> Limited
if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^4] + " Limited");
variations.Add(variant[..^4] + " Limited");
}
// Try Limited -> Ltd
else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^8] + " Ltd");
variations.Add(variant[..^8] + " Ltd");
}
// Try PLC variations
else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^4] + " Public Limited Company");
variations.Add(variant[..^4] + " Public Limited Company");
}
else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^24] + " PLC");
variations.Add(variant[..^24] + " PLC");
}
// Try Plc (mixed case) variations
else if (variant.EndsWith(" Plc", StringComparison.Ordinal))
{
variations.Add(variant[..^4] + " PLC");
variations.Add(variant[..^4] + " Public Limited Company");
}
}
// Step 3: Try core name without suffix
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" };
var coreName = normalized;
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " Plc", " LLP", " Inc", " Corporation", " Corp" };
var coreName = name;
foreach (var suffix in suffixesToRemove)
{
if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase))
@@ -850,10 +924,11 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
break;
}
}
if (coreName != normalized && coreName.Length >= 3)
if (coreName != name && coreName.Length >= 3)
{
queries.Add(coreName);
queries.Add(coreName + " Limited");
variations.Add(coreName);
variations.Add(coreName + " Limited");
variations.Add(coreName + " PLC");
// Also add U.K. variant of core name if applicable
if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase))
@@ -861,12 +936,12 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
var coreWithDots = coreName
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
queries.Add(coreWithDots);
queries.Add(coreWithDots + " Limited");
variations.Add(coreWithDots);
variations.Add(coreWithDots + " Limited");
}
}
return queries.ToList();
return variations.ToList();
}
#endregion