Add first-word parent company extraction
For company names like "UNILEVER BESTFOOD", now also tries searching
for just the first word ("UNILEVER") as it may be the registered
parent company name. Adds PLC/Limited variations for major corps.
Skips common prefixes like "The", "UK", "British", etc.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -843,6 +843,28 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
}
|
||||
}
|
||||
|
||||
// Step 0c: Try first word as potential parent company (e.g., "UNILEVER BESTFOOD" -> "UNILEVER")
|
||||
// Many company names are "ParentCompany Division" or "ParentCompany Brand"
|
||||
var words = normalized.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (words.Length >= 2)
|
||||
{
|
||||
var firstWord = words[0];
|
||||
// Only try if first word is substantial (not "The", "A", common prefixes)
|
||||
var skipWords = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "the", "a", "an", "uk", "british", "national", "international", "global", "new" };
|
||||
|
||||
if (firstWord.Length >= 4 && !skipWords.Contains(firstWord))
|
||||
{
|
||||
foreach (var firstWordQuery in GenerateNameVariations(firstWord))
|
||||
{
|
||||
queries.Add(firstWordQuery);
|
||||
}
|
||||
// Also try first word + PLC/Limited for major corporations
|
||||
queries.Add(firstWord + " PLC");
|
||||
queries.Add(firstWord + " Limited");
|
||||
}
|
||||
}
|
||||
|
||||
// Also add variations of the full original name
|
||||
foreach (var query in GenerateNameVariations(normalized))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user