Add first-word parent company extraction

For company names like "UNILEVER BESTFOOD", now also tries searching
for just the first word ("UNILEVER") as it may be the registered
parent company name. Adds PLC/Limited variations for major corps.

Skips common prefixes like "The", "UK", "British", etc.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 22:04:44 +01:00
parent 02147d1221
commit 7f4c2362f0

View File

@@ -843,6 +843,28 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
} }
} }
// Step 0c: Try first word as potential parent company (e.g., "UNILEVER BESTFOOD" -> "UNILEVER")
// Many company names are "ParentCompany Division" or "ParentCompany Brand"
var words = normalized.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (words.Length >= 2)
{
var firstWord = words[0];
// Only try if first word is substantial (not "The", "A", common prefixes)
var skipWords = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{ "the", "a", "an", "uk", "british", "national", "international", "global", "new" };
if (firstWord.Length >= 4 && !skipWords.Contains(firstWord))
{
foreach (var firstWordQuery in GenerateNameVariations(firstWord))
{
queries.Add(firstWordQuery);
}
// Also try first word + PLC/Limited for major corporations
queries.Add(firstWord + " PLC");
queries.Add(firstWord + " Limited");
}
}
// Also add variations of the full original name // Also add variations of the full original name
foreach (var query in GenerateNameVariations(normalized)) foreach (var query in GenerateNameVariations(normalized))
{ {