Add alternative search queries for company name variations
When searching Companies House, now generates multiple query variations to find companies registered with different naming conventions (e.g., "U.K." vs "UK", "Limited" vs "Ltd"). This helps match older companies like "MATTEL U.K. LIMITED" when CVs list "Mattel UK Ltd". Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -71,23 +71,37 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
}
|
||||
}
|
||||
|
||||
// Search Companies House
|
||||
// Search Companies House with fallback queries
|
||||
try
|
||||
{
|
||||
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
|
||||
var searchQueries = GenerateSearchQueries(companyName);
|
||||
_logger.LogDebug("Generated {Count} search queries for '{CompanyName}': {Queries}",
|
||||
searchQueries.Count, companyName, string.Join(", ", searchQueries.Select(q => $"'{q}'")));
|
||||
(CompaniesHouseSearchItem Item, int Score)? bestMatch = null;
|
||||
|
||||
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
|
||||
foreach (var query in searchQueries)
|
||||
{
|
||||
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "No matching company found in Companies House");
|
||||
}
|
||||
_logger.LogDebug("Searching Companies House with query: {Query}", query);
|
||||
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(query);
|
||||
|
||||
// Find best fuzzy match, preferring companies that existed at claimed start date
|
||||
var bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate);
|
||||
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find best fuzzy match, preferring companies that existed at claimed start date
|
||||
bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate);
|
||||
|
||||
if (bestMatch is not null)
|
||||
{
|
||||
_logger.LogDebug("Found match with query '{Query}': {Company}", query, bestMatch.Value.Item.Title);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch is null)
|
||||
{
|
||||
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
|
||||
_logger.LogDebug("No valid match found for: {CompanyName} after trying {Count} queries", companyName, searchQueries.Count);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified against official records");
|
||||
}
|
||||
@@ -768,5 +782,83 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates alternative search queries to find companies that may be registered
|
||||
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
|
||||
/// </summary>
|
||||
private static List<string> GenerateSearchQueries(string companyName)
|
||||
{
|
||||
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { companyName };
|
||||
var normalized = companyName.Trim();
|
||||
|
||||
// Common suffixes to try variations of
|
||||
var suffixPatterns = new[]
|
||||
{
|
||||
(" Ltd", " Limited"),
|
||||
(" Limited", " Ltd"),
|
||||
(" PLC", " Public Limited Company"),
|
||||
(" Public Limited Company", " PLC"),
|
||||
(" LLP", " Limited Liability Partnership"),
|
||||
(" Limited Liability Partnership", " LLP"),
|
||||
};
|
||||
|
||||
// Try suffix variations
|
||||
foreach (var (from, to) in suffixPatterns)
|
||||
{
|
||||
if (normalized.EndsWith(from, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var variant = normalized[..^from.Length] + to;
|
||||
queries.Add(variant);
|
||||
}
|
||||
}
|
||||
|
||||
// Try adding/removing periods in country codes (UK <-> U.K., US <-> U.S.)
|
||||
var withPeriods = System.Text.RegularExpressions.Regex.Replace(
|
||||
normalized,
|
||||
@"\b([A-Z])([A-Z])\b",
|
||||
"$1.$2.");
|
||||
queries.Add(withPeriods);
|
||||
|
||||
var withoutPeriods = System.Text.RegularExpressions.Regex.Replace(
|
||||
normalized,
|
||||
@"\b([A-Z])\.([A-Z])\.\b",
|
||||
"$1$2");
|
||||
queries.Add(withoutPeriods);
|
||||
|
||||
// Also try replacing "UK" with "U.K." and vice versa specifically
|
||||
if (normalized.Contains(" UK ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.EndsWith(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(normalized.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
if (normalized.Contains(" U.K. ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.EndsWith(" U.K.", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(normalized.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
// Remove common suffixes to get core name
|
||||
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" };
|
||||
var coreName = normalized;
|
||||
foreach (var suffix in suffixesToRemove)
|
||||
{
|
||||
if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
coreName = coreName[..^suffix.Length].Trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (coreName != normalized && coreName.Length >= 3)
|
||||
{
|
||||
queries.Add(coreName);
|
||||
// Also try core name with "Limited" appended
|
||||
queries.Add(coreName + " Limited");
|
||||
}
|
||||
|
||||
return queries.ToList();
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user