diff --git a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs index cdc856a..23d0ff3 100644 --- a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs @@ -791,55 +791,55 @@ public sealed class CompanyVerifierService : ICompanyVerifierService var queries = new HashSet(StringComparer.OrdinalIgnoreCase) { companyName }; var normalized = companyName.Trim(); - // Common suffixes to try variations of - var suffixPatterns = new[] - { - (" Ltd", " Limited"), - (" Limited", " Ltd"), - (" PLC", " Public Limited Company"), - (" Public Limited Company", " PLC"), - (" LLP", " Limited Liability Partnership"), - (" Limited Liability Partnership", " LLP"), - }; + // Step 1: Generate UK/U.K. variations + var ukVariants = new List { normalized }; - // Try suffix variations - foreach (var (from, to) in suffixPatterns) + if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase)) { - if (normalized.EndsWith(from, StringComparison.OrdinalIgnoreCase)) + // Add U.K. variant + var withDots = normalized + .Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) + .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase); + if (withDots != normalized) + ukVariants.Add(withDots); + } + if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase)) + { + // Add UK variant (no dots) + var withoutDots = normalized + .Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase) + .Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase); + if (withoutDots != normalized) + ukVariants.Add(withoutDots); + } + + // Step 2: For each UK variant, generate suffix variations (Ltd/Limited) + foreach (var variant in ukVariants) + { + queries.Add(variant); + + // Try Ltd -> Limited + if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase)) { - var variant = normalized[..^from.Length] + to; - queries.Add(variant); + queries.Add(variant[..^4] + " Limited"); + } + // Try Limited -> Ltd + else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase)) + { + queries.Add(variant[..^8] + " Ltd"); + } + // Try PLC variations + else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase)) + { + queries.Add(variant[..^4] + " Public Limited Company"); + } + else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase)) + { + queries.Add(variant[..^24] + " PLC"); } } - // Try adding/removing periods in country codes (UK <-> U.K., US <-> U.S.) - var withPeriods = System.Text.RegularExpressions.Regex.Replace( - normalized, - @"\b([A-Z])([A-Z])\b", - "$1.$2."); - queries.Add(withPeriods); - - var withoutPeriods = System.Text.RegularExpressions.Regex.Replace( - normalized, - @"\b([A-Z])\.([A-Z])\.\b", - "$1$2"); - queries.Add(withoutPeriods); - - // Also try replacing "UK" with "U.K." and vice versa specifically - if (normalized.Contains(" UK ", StringComparison.OrdinalIgnoreCase) || - normalized.EndsWith(" UK", StringComparison.OrdinalIgnoreCase)) - { - queries.Add(normalized.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) - .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase)); - } - if (normalized.Contains(" U.K. ", StringComparison.OrdinalIgnoreCase) || - normalized.EndsWith(" U.K.", StringComparison.OrdinalIgnoreCase)) - { - queries.Add(normalized.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase) - .Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase)); - } - - // Remove common suffixes to get core name + // Step 3: Try core name without suffix var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" }; var coreName = normalized; foreach (var suffix in suffixesToRemove) @@ -853,8 +853,17 @@ public sealed class CompanyVerifierService : ICompanyVerifierService if (coreName != normalized && coreName.Length >= 3) { queries.Add(coreName); - // Also try core name with "Limited" appended queries.Add(coreName + " Limited"); + + // Also add U.K. variant of core name if applicable + if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase)) + { + var coreWithDots = coreName + .Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) + .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase); + queries.Add(coreWithDots); + queries.Add(coreWithDots + " Limited"); + } } return queries.ToList();