Fix company name variation search to find older companies
The previous search only tried a single query which often returned newer companies with similar names. Now generates all combinations of name variations (UK/U.K. + Ltd/Limited) to find the correct company. For "Mattel UK Ltd", now searches: - Mattel UK Ltd - Mattel U.K. Ltd - Mattel UK Limited - Mattel U.K. Limited (finds MATTEL U.K. LIMITED from 1980) - Plus core name variations This ensures companies like "MATTEL U.K. LIMITED" (incorporated 1980) are found instead of incorrectly matching "UK MATTEL LTD" (2025). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -791,55 +791,55 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { companyName };
|
||||
var normalized = companyName.Trim();
|
||||
|
||||
// Common suffixes to try variations of
|
||||
var suffixPatterns = new[]
|
||||
{
|
||||
(" Ltd", " Limited"),
|
||||
(" Limited", " Ltd"),
|
||||
(" PLC", " Public Limited Company"),
|
||||
(" Public Limited Company", " PLC"),
|
||||
(" LLP", " Limited Liability Partnership"),
|
||||
(" Limited Liability Partnership", " LLP"),
|
||||
};
|
||||
// Step 1: Generate UK/U.K. variations
|
||||
var ukVariants = new List<string> { normalized };
|
||||
|
||||
// Try suffix variations
|
||||
foreach (var (from, to) in suffixPatterns)
|
||||
if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (normalized.EndsWith(from, StringComparison.OrdinalIgnoreCase))
|
||||
// Add U.K. variant
|
||||
var withDots = normalized
|
||||
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
|
||||
if (withDots != normalized)
|
||||
ukVariants.Add(withDots);
|
||||
}
|
||||
if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Add UK variant (no dots)
|
||||
var withoutDots = normalized
|
||||
.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase);
|
||||
if (withoutDots != normalized)
|
||||
ukVariants.Add(withoutDots);
|
||||
}
|
||||
|
||||
// Step 2: For each UK variant, generate suffix variations (Ltd/Limited)
|
||||
foreach (var variant in ukVariants)
|
||||
{
|
||||
queries.Add(variant);
|
||||
|
||||
// Try Ltd -> Limited
|
||||
if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var variant = normalized[..^from.Length] + to;
|
||||
queries.Add(variant);
|
||||
queries.Add(variant[..^4] + " Limited");
|
||||
}
|
||||
// Try Limited -> Ltd
|
||||
else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^8] + " Ltd");
|
||||
}
|
||||
// Try PLC variations
|
||||
else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^4] + " Public Limited Company");
|
||||
}
|
||||
else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^24] + " PLC");
|
||||
}
|
||||
}
|
||||
|
||||
// Try adding/removing periods in country codes (UK <-> U.K., US <-> U.S.)
|
||||
var withPeriods = System.Text.RegularExpressions.Regex.Replace(
|
||||
normalized,
|
||||
@"\b([A-Z])([A-Z])\b",
|
||||
"$1.$2.");
|
||||
queries.Add(withPeriods);
|
||||
|
||||
var withoutPeriods = System.Text.RegularExpressions.Regex.Replace(
|
||||
normalized,
|
||||
@"\b([A-Z])\.([A-Z])\.\b",
|
||||
"$1$2");
|
||||
queries.Add(withoutPeriods);
|
||||
|
||||
// Also try replacing "UK" with "U.K." and vice versa specifically
|
||||
if (normalized.Contains(" UK ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.EndsWith(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(normalized.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
if (normalized.Contains(" U.K. ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.EndsWith(" U.K.", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(normalized.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
// Remove common suffixes to get core name
|
||||
// Step 3: Try core name without suffix
|
||||
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" };
|
||||
var coreName = normalized;
|
||||
foreach (var suffix in suffixesToRemove)
|
||||
@@ -853,8 +853,17 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
if (coreName != normalized && coreName.Length >= 3)
|
||||
{
|
||||
queries.Add(coreName);
|
||||
// Also try core name with "Limited" appended
|
||||
queries.Add(coreName + " Limited");
|
||||
|
||||
// Also add U.K. variant of core name if applicable
|
||||
if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var coreWithDots = coreName
|
||||
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
|
||||
queries.Add(coreWithDots);
|
||||
queries.Add(coreWithDots + " Limited");
|
||||
}
|
||||
}
|
||||
|
||||
return queries.ToList();
|
||||
|
||||
Reference in New Issue
Block a user