diff --git a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs index 766b7b5..cdc856a 100644 --- a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs @@ -71,23 +71,37 @@ public sealed class CompanyVerifierService : ICompanyVerifierService } } - // Search Companies House + // Search Companies House with fallback queries try { - var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName); + var searchQueries = GenerateSearchQueries(companyName); + _logger.LogDebug("Generated {Count} search queries for '{CompanyName}': {Queries}", + searchQueries.Count, companyName, string.Join(", ", searchQueries.Select(q => $"'{q}'"))); + (CompaniesHouseSearchItem Item, int Score)? bestMatch = null; - if (searchResponse?.Items is null || searchResponse.Items.Count == 0) + foreach (var query in searchQueries) { - _logger.LogDebug("No companies found for: {CompanyName}", companyName); - return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "No matching company found in Companies House"); - } + _logger.LogDebug("Searching Companies House with query: {Query}", query); + var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(query); - // Find best fuzzy match, preferring companies that existed at claimed start date - var bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate); + if (searchResponse?.Items is null || searchResponse.Items.Count == 0) + { + continue; + } + + // Find best fuzzy match, preferring companies that existed at claimed start date + bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate); + + if (bestMatch is not null) + { + _logger.LogDebug("Found match with query '{Query}': {Company}", query, bestMatch.Value.Item.Title); + break; + } + } if (bestMatch is null) { - _logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName); + _logger.LogDebug("No valid match found for: {CompanyName} after trying {Count} queries", companyName, searchQueries.Count); return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "Company name could not be verified against official records"); } @@ -768,5 +782,83 @@ public sealed class CompanyVerifierService : ICompanyVerifierService }; } + /// + /// Generates alternative search queries to find companies that may be registered + /// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd"). + /// + private static List GenerateSearchQueries(string companyName) + { + var queries = new HashSet(StringComparer.OrdinalIgnoreCase) { companyName }; + var normalized = companyName.Trim(); + + // Common suffixes to try variations of + var suffixPatterns = new[] + { + (" Ltd", " Limited"), + (" Limited", " Ltd"), + (" PLC", " Public Limited Company"), + (" Public Limited Company", " PLC"), + (" LLP", " Limited Liability Partnership"), + (" Limited Liability Partnership", " LLP"), + }; + + // Try suffix variations + foreach (var (from, to) in suffixPatterns) + { + if (normalized.EndsWith(from, StringComparison.OrdinalIgnoreCase)) + { + var variant = normalized[..^from.Length] + to; + queries.Add(variant); + } + } + + // Try adding/removing periods in country codes (UK <-> U.K., US <-> U.S.) + var withPeriods = System.Text.RegularExpressions.Regex.Replace( + normalized, + @"\b([A-Z])([A-Z])\b", + "$1.$2."); + queries.Add(withPeriods); + + var withoutPeriods = System.Text.RegularExpressions.Regex.Replace( + normalized, + @"\b([A-Z])\.([A-Z])\.\b", + "$1$2"); + queries.Add(withoutPeriods); + + // Also try replacing "UK" with "U.K." and vice versa specifically + if (normalized.Contains(" UK ", StringComparison.OrdinalIgnoreCase) || + normalized.EndsWith(" UK", StringComparison.OrdinalIgnoreCase)) + { + queries.Add(normalized.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) + .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase)); + } + if (normalized.Contains(" U.K. ", StringComparison.OrdinalIgnoreCase) || + normalized.EndsWith(" U.K.", StringComparison.OrdinalIgnoreCase)) + { + queries.Add(normalized.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase) + .Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase)); + } + + // Remove common suffixes to get core name + var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" }; + var coreName = normalized; + foreach (var suffix in suffixesToRemove) + { + if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)) + { + coreName = coreName[..^suffix.Length].Trim(); + break; + } + } + if (coreName != normalized && coreName.Length >= 3) + { + queries.Add(coreName); + // Also try core name with "Limited" appended + queries.Add(coreName + " Limited"); + } + + return queries.ToList(); + } + #endregion }