From 7f4c2362f06e82826e8d24af9164d7432b3db402 Mon Sep 17 00:00:00 2001 From: peter Date: Tue, 20 Jan 2026 22:04:44 +0100 Subject: [PATCH] Add first-word parent company extraction For company names like "UNILEVER BESTFOOD", now also tries searching for just the first word ("UNILEVER") as it may be the registered parent company name. Adds PLC/Limited variations for major corps. Skips common prefixes like "The", "UK", "British", etc. Co-Authored-By: Claude Opus 4.5 --- .../Services/CompanyVerifierService.cs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs index 2aff125..3edad7e 100644 --- a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs @@ -843,6 +843,28 @@ public sealed class CompanyVerifierService : ICompanyVerifierService } } + // Step 0c: Try first word as potential parent company (e.g., "UNILEVER BESTFOOD" -> "UNILEVER") + // Many company names are "ParentCompany Division" or "ParentCompany Brand" + var words = normalized.Split(' ', StringSplitOptions.RemoveEmptyEntries); + if (words.Length >= 2) + { + var firstWord = words[0]; + // Only try if first word is substantial (not "The", "A", common prefixes) + var skipWords = new HashSet(StringComparer.OrdinalIgnoreCase) + { "the", "a", "an", "uk", "british", "national", "international", "global", "new" }; + + if (firstWord.Length >= 4 && !skipWords.Contains(firstWord)) + { + foreach (var firstWordQuery in GenerateNameVariations(firstWord)) + { + queries.Add(firstWordQuery); + } + // Also try first word + PLC/Limited for major corporations + queries.Add(firstWord + " PLC"); + queries.Add(firstWord + " Limited"); + } + } + // Also add variations of the full original name foreach (var query in GenerateNameVariations(normalized)) {