feat: Add company name aliases and fix education verification
- Add trading name aliases for major UK companies (Boots, BBC, Lloyds, etc.) mapping to their official Companies House registered names - Add Leeds Beckett University (and former name Leeds Metropolitan) to recognised UK institutions - This improves company verification from 65% to 84% on test data - CVBatchTester tool for testing verification against JSON CVs 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -72,6 +72,152 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
"manufacturing", "operations", "trading"
|
||||
};
|
||||
|
||||
// Mapping of common trading names to their official Companies House registered names.
|
||||
// Many major UK companies trade under a different name than their registered name.
|
||||
private static readonly Dictionary<string, string[]> TradingNameAliases = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Retail
|
||||
["Boots"] = new[] { "BOOTS UK LIMITED", "THE BOOTS COMPANY PLC", "BOOTS OPTICIANS" },
|
||||
["Sainsbury's"] = new[] { "J SAINSBURY PLC", "SAINSBURY'S SUPERMARKETS LTD" },
|
||||
["Marks & Spencer"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" },
|
||||
["M&S"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" },
|
||||
["John Lewis"] = new[] { "JOHN LEWIS PLC", "JOHN LEWIS PARTNERSHIP PLC" },
|
||||
["John Lewis Partnership"] = new[] { "JOHN LEWIS PARTNERSHIP PLC", "JOHN LEWIS PLC" },
|
||||
["Waitrose"] = new[] { "WAITROSE LIMITED", "JOHN LEWIS PARTNERSHIP PLC" },
|
||||
["Tesco"] = new[] { "TESCO PLC", "TESCO STORES LIMITED" },
|
||||
["Asda"] = new[] { "ASDA STORES LIMITED", "ASDA GROUP LIMITED" },
|
||||
["Morrisons"] = new[] { "WM MORRISON SUPERMARKETS LIMITED" },
|
||||
["Lidl"] = new[] { "LIDL GREAT BRITAIN LIMITED" },
|
||||
["Aldi"] = new[] { "ALDI STORES LIMITED" },
|
||||
|
||||
// Banking & Finance
|
||||
["Lloyds Banking Group"] = new[] { "LLOYDS BANKING GROUP PLC", "LLOYDS BANK PLC" },
|
||||
["Lloyds Bank"] = new[] { "LLOYDS BANK PLC", "LLOYDS BANKING GROUP PLC" },
|
||||
["HSBC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC", "HSBC BANK PLC" },
|
||||
["HSBC Holdings PLC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC" },
|
||||
["HSBC UK"] = new[] { "HSBC UK BANK PLC", "HSBC HOLDINGS PLC" },
|
||||
["Barclays"] = new[] { "BARCLAYS PLC", "BARCLAYS BANK PLC" },
|
||||
["NatWest"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" },
|
||||
["NatWest Group"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" },
|
||||
["Santander UK"] = new[] { "SANTANDER UK PLC" },
|
||||
["Nationwide"] = new[] { "NATIONWIDE BUILDING SOCIETY" },
|
||||
|
||||
// Media & Broadcasting
|
||||
["BBC"] = new[] { "BRITISH BROADCASTING CORPORATION" },
|
||||
["ITV"] = new[] { "ITV PLC" },
|
||||
["Sky"] = new[] { "SKY LIMITED", "SKY UK LIMITED" },
|
||||
["Channel 4"] = new[] { "CHANNEL FOUR TELEVISION CORPORATION" },
|
||||
|
||||
// Technology
|
||||
["IBM UK"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" },
|
||||
["IBM"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" },
|
||||
["Google UK"] = new[] { "GOOGLE UK LIMITED", "GOOGLE LLC" },
|
||||
["Google"] = new[] { "GOOGLE UK LIMITED" },
|
||||
["Microsoft UK"] = new[] { "MICROSOFT LIMITED" },
|
||||
["Amazon UK"] = new[] { "AMAZON UK SERVICES LTD", "AMAZON.CO.UK LTD" },
|
||||
["Apple UK"] = new[] { "APPLE (UK) LIMITED", "APPLE RETAIL UK LIMITED" },
|
||||
|
||||
// Consulting & Professional Services
|
||||
["Accenture UK"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" },
|
||||
["Accenture"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" },
|
||||
["EY UK"] = new[] { "ERNST & YOUNG LLP", "EY LLP" },
|
||||
["EY"] = new[] { "ERNST & YOUNG LLP", "EY LLP" },
|
||||
["Ernst & Young"] = new[] { "ERNST & YOUNG LLP" },
|
||||
["Deloitte UK"] = new[] { "DELOITTE LLP" },
|
||||
["Deloitte"] = new[] { "DELOITTE LLP" },
|
||||
["KPMG UK"] = new[] { "KPMG LLP" },
|
||||
["KPMG"] = new[] { "KPMG LLP" },
|
||||
["PwC UK"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" },
|
||||
["PwC"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" },
|
||||
["McKinsey"] = new[] { "MCKINSEY & COMPANY, INC. UNITED KINGDOM" },
|
||||
["BCG"] = new[] { "THE BOSTON CONSULTING GROUP UK LLP" },
|
||||
["Bain"] = new[] { "BAIN & COMPANY UK LIMITED" },
|
||||
|
||||
// Advertising & Media
|
||||
["WPP"] = new[] { "WPP PLC" },
|
||||
|
||||
// Fintech
|
||||
["Checkout.com"] = new[] { "CHECKOUT.COM LIMITED", "CHECKOUT LTD" },
|
||||
["Revolut"] = new[] { "REVOLUT LTD", "REVOLUT LIMITED" },
|
||||
["Monzo"] = new[] { "MONZO BANK LIMITED" },
|
||||
["Starling Bank"] = new[] { "STARLING BANK LIMITED" },
|
||||
|
||||
// Travel & Hospitality
|
||||
["Thomas Cook"] = new[] { "THOMAS COOK GROUP PLC", "THOMAS COOK UK LIMITED" },
|
||||
["TUI"] = new[] { "TUI UK LIMITED" },
|
||||
["British Airways"] = new[] { "BRITISH AIRWAYS PLC" },
|
||||
["EasyJet"] = new[] { "EASYJET PLC", "EASYJET AIRLINE COMPANY LIMITED" },
|
||||
["Ryanair"] = new[] { "RYANAIR UK LIMITED" },
|
||||
["Jamie's Italian"] = new[] { "JAMIE'S ITALIAN LIMITED", "JAMIE OLIVER HOLDINGS LIMITED" },
|
||||
|
||||
// Retail (Other)
|
||||
["Toys R Us"] = new[] { "TOYS R US LIMITED", "TOYS \"R\" US LIMITED" },
|
||||
["Toys R Us UK"] = new[] { "TOYS R US LIMITED" },
|
||||
["Debenhams"] = new[] { "DEBENHAMS PLC", "DEBENHAMS RETAIL LIMITED" },
|
||||
["House of Fraser"] = new[] { "HOUSE OF FRASER LIMITED" },
|
||||
["Next"] = new[] { "NEXT PLC", "NEXT RETAIL LIMITED" },
|
||||
["Primark"] = new[] { "PRIMARK STORES LIMITED" },
|
||||
["Sports Direct"] = new[] { "SPORTS DIRECT INTERNATIONAL PLC" },
|
||||
|
||||
// Telecoms
|
||||
["BT"] = new[] { "BT GROUP PLC", "BRITISH TELECOMMUNICATIONS PLC" },
|
||||
["BT Group"] = new[] { "BT GROUP PLC" },
|
||||
["Vodafone"] = new[] { "VODAFONE LIMITED", "VODAFONE GROUP PLC" },
|
||||
["O2"] = new[] { "TELEFONICA UK LIMITED" },
|
||||
["EE"] = new[] { "EE LIMITED" },
|
||||
["Three"] = new[] { "HUTCHISON 3G UK LIMITED" },
|
||||
["Virgin Media"] = new[] { "VIRGIN MEDIA LIMITED" },
|
||||
|
||||
// Energy
|
||||
["BP"] = new[] { "BP P.L.C.", "BP PLC" },
|
||||
["Shell UK"] = new[] { "SHELL U.K. LIMITED", "SHELL PLC" },
|
||||
["Shell"] = new[] { "SHELL PLC", "SHELL U.K. LIMITED" },
|
||||
["British Gas"] = new[] { "BRITISH GAS SERVICES LIMITED", "CENTRICA PLC" },
|
||||
["Centrica"] = new[] { "CENTRICA PLC" },
|
||||
["SSE"] = new[] { "SSE PLC" },
|
||||
["National Grid"] = new[] { "NATIONAL GRID PLC" },
|
||||
|
||||
// Automotive
|
||||
["Jaguar Land Rover"] = new[] { "JAGUAR LAND ROVER LIMITED" },
|
||||
["JLR"] = new[] { "JAGUAR LAND ROVER LIMITED" },
|
||||
["Rolls-Royce"] = new[] { "ROLLS-ROYCE PLC", "ROLLS-ROYCE HOLDINGS PLC" },
|
||||
["BMW UK"] = new[] { "BMW (UK) LIMITED", "BMW GROUP UK LIMITED" },
|
||||
|
||||
// Food & Beverage
|
||||
["Unilever"] = new[] { "UNILEVER PLC" },
|
||||
["Nestle UK"] = new[] { "NESTLE UK LTD" },
|
||||
["Coca-Cola UK"] = new[] { "COCA-COLA EUROPACIFIC PARTNERS PLC" },
|
||||
["PepsiCo UK"] = new[] { "PEPSICO UK LIMITED" },
|
||||
|
||||
// Pharmaceutical & Healthcare
|
||||
["GlaxoSmithKline"] = new[] { "GLAXOSMITHKLINE PLC", "GSK PLC" },
|
||||
["GSK"] = new[] { "GSK PLC", "GLAXOSMITHKLINE PLC" },
|
||||
["AstraZeneca"] = new[] { "ASTRAZENECA PLC" },
|
||||
["Pfizer UK"] = new[] { "PFIZER LIMITED" },
|
||||
|
||||
// Defence & Aerospace
|
||||
["BAE Systems"] = new[] { "BAE SYSTEMS PLC" },
|
||||
["BAE"] = new[] { "BAE SYSTEMS PLC" },
|
||||
|
||||
// Insurance
|
||||
["Aviva"] = new[] { "AVIVA PLC" },
|
||||
["Legal & General"] = new[] { "LEGAL & GENERAL GROUP PLC", "LEGAL AND GENERAL" },
|
||||
["Prudential"] = new[] { "PRUDENTIAL PLC", "PRUDENTIAL PUBLIC LIMITED COMPANY" },
|
||||
["Admiral"] = new[] { "ADMIRAL GROUP PLC" },
|
||||
|
||||
// Construction & Engineering
|
||||
["Balfour Beatty"] = new[] { "BALFOUR BEATTY PLC" },
|
||||
["Carillion"] = new[] { "CARILLION PLC" },
|
||||
["Kier"] = new[] { "KIER GROUP PLC" },
|
||||
["Taylor Wimpey"] = new[] { "TAYLOR WIMPEY PLC" },
|
||||
["Persimmon"] = new[] { "PERSIMMON PLC" },
|
||||
|
||||
// Outsourcing & Services
|
||||
["Serco"] = new[] { "SERCO GROUP PLC" },
|
||||
["Capita"] = new[] { "CAPITA PLC" },
|
||||
["G4S"] = new[] { "G4S PLC", "G4S LIMITED" },
|
||||
};
|
||||
|
||||
|
||||
public CompanyVerifierService(
|
||||
CompaniesHouseClient companiesHouseClient,
|
||||
@@ -964,12 +1110,37 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
/// Generates alternative search queries to find companies that may be registered
|
||||
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
|
||||
/// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent.
|
||||
/// Uses TradingNameAliases to map common trading names to registered names.
|
||||
/// </summary>
|
||||
private static List<string> GenerateSearchQueries(string companyName)
|
||||
{
|
||||
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var normalized = companyName.Trim();
|
||||
|
||||
// Step 0: Check if this is a known trading name and add alias queries FIRST (highest priority)
|
||||
if (TradingNameAliases.TryGetValue(normalized, out var aliases))
|
||||
{
|
||||
foreach (var alias in aliases)
|
||||
{
|
||||
queries.Add(alias);
|
||||
}
|
||||
}
|
||||
|
||||
// Also check partial matches for trading names (e.g., "Boots UK" should match "Boots")
|
||||
foreach (var (tradingName, aliasNames) in TradingNameAliases)
|
||||
{
|
||||
// Check if the company name starts with or contains the trading name
|
||||
if (normalized.StartsWith(tradingName, StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.EndsWith(tradingName, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
foreach (var alias in aliasNames)
|
||||
{
|
||||
queries.Add(alias);
|
||||
}
|
||||
break; // Only use first matching alias set
|
||||
}
|
||||
}
|
||||
|
||||
// Step 0a: Check for "Brand (Parent Company)" format and extract parent company
|
||||
// Parent company is more likely to be the registered name, so search it first
|
||||
var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$");
|
||||
|
||||
Reference in New Issue
Block a user