Fix company name variation search to find older companies

The previous search only tried a single query which often returned newer
companies with similar names. Now generates all combinations of name
variations (UK/U.K. + Ltd/Limited) to find the correct company.

For "Mattel UK Ltd", now searches:
- Mattel UK Ltd
- Mattel U.K. Ltd
- Mattel UK Limited
- Mattel U.K. Limited (finds MATTEL U.K. LIMITED from 1980)
- Plus core name variations

This ensures companies like "MATTEL U.K. LIMITED" (incorporated 1980)
are found instead of incorrectly matching "UK MATTEL LTD" (2025).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 21:39:22 +01:00
parent 58c0e79a85
commit f4890b3049

View File

@@ -791,55 +791,55 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { companyName };
var normalized = companyName.Trim();
// Common suffixes to try variations of
var suffixPatterns = new[]
{
(" Ltd", " Limited"),
(" Limited", " Ltd"),
(" PLC", " Public Limited Company"),
(" Public Limited Company", " PLC"),
(" LLP", " Limited Liability Partnership"),
(" Limited Liability Partnership", " LLP"),
};
// Step 1: Generate UK/U.K. variations
var ukVariants = new List<string> { normalized };
// Try suffix variations
foreach (var (from, to) in suffixPatterns)
if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase))
{
if (normalized.EndsWith(from, StringComparison.OrdinalIgnoreCase))
// Add U.K. variant
var withDots = normalized
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
if (withDots != normalized)
ukVariants.Add(withDots);
}
if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
{
// Add UK variant (no dots)
var withoutDots = normalized
.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase);
if (withoutDots != normalized)
ukVariants.Add(withoutDots);
}
// Step 2: For each UK variant, generate suffix variations (Ltd/Limited)
foreach (var variant in ukVariants)
{
queries.Add(variant);
// Try Ltd -> Limited
if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase))
{
var variant = normalized[..^from.Length] + to;
queries.Add(variant);
queries.Add(variant[..^4] + " Limited");
}
// Try Limited -> Ltd
else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^8] + " Ltd");
}
// Try PLC variations
else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^4] + " Public Limited Company");
}
else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase))
{
queries.Add(variant[..^24] + " PLC");
}
}
// Try adding/removing periods in country codes (UK <-> U.K., US <-> U.S.)
var withPeriods = System.Text.RegularExpressions.Regex.Replace(
normalized,
@"\b([A-Z])([A-Z])\b",
"$1.$2.");
queries.Add(withPeriods);
var withoutPeriods = System.Text.RegularExpressions.Regex.Replace(
normalized,
@"\b([A-Z])\.([A-Z])\.\b",
"$1$2");
queries.Add(withoutPeriods);
// Also try replacing "UK" with "U.K." and vice versa specifically
if (normalized.Contains(" UK ", StringComparison.OrdinalIgnoreCase) ||
normalized.EndsWith(" UK", StringComparison.OrdinalIgnoreCase))
{
queries.Add(normalized.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase));
}
if (normalized.Contains(" U.K. ", StringComparison.OrdinalIgnoreCase) ||
normalized.EndsWith(" U.K.", StringComparison.OrdinalIgnoreCase))
{
queries.Add(normalized.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase));
}
// Remove common suffixes to get core name
// Step 3: Try core name without suffix
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" };
var coreName = normalized;
foreach (var suffix in suffixesToRemove)
@@ -853,8 +853,17 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
if (coreName != normalized && coreName.Length >= 3)
{
queries.Add(coreName);
// Also try core name with "Limited" appended
queries.Add(coreName + " Limited");
// Also add U.K. variant of core name if applicable
if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase))
{
var coreWithDots = coreName
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
queries.Add(coreWithDots);
queries.Add(coreWithDots + " Limited");
}
}
return queries.ToList();