diff --git a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs index 23d0ff3..2aff125 100644 --- a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs @@ -90,7 +90,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService } // Find best fuzzy match, preferring companies that existed at claimed start date - bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate); + // Pass both original name and search query for matching flexibility + bestMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate); if (bestMatch is not null) { @@ -586,18 +587,29 @@ public sealed class CompanyVerifierService : ICompanyVerifierService private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch( string companyName, + string searchQuery, List items, DateOnly? claimedStartDate) { - var normalizedSearch = companyName.ToUpperInvariant(); + var normalizedOriginal = companyName.ToUpperInvariant(); + var normalizedQuery = searchQuery.ToUpperInvariant(); + // Match against both the original company name AND the search query used + // This handles cases like "Matthew Walker (Northern Foods Plc)" where we + // search for "Northern Foods Plc" but need to match against it, not the full name var matches = items .Where(item => !string.IsNullOrWhiteSpace(item.Title)) - .Select(item => (Item: item, Score: Fuzz.TokenSetRatio(normalizedSearch, item.Title.ToUpperInvariant()))) + .Select(item => + { + var itemTitle = item.Title.ToUpperInvariant(); + var scoreVsOriginal = Fuzz.TokenSetRatio(normalizedOriginal, itemTitle); + var scoreVsQuery = Fuzz.TokenSetRatio(normalizedQuery, itemTitle); + return (Item: item, Score: Math.Max(scoreVsOriginal, scoreVsQuery)); + }) .Where(m => m.Score >= FuzzyMatchThreshold) .ToList(); - _logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}'", matches.Count, companyName); + _logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}' (query: '{Query}')", matches.Count, companyName, searchQuery); foreach (var m in matches.Take(5)) { _logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}", @@ -785,63 +797,125 @@ public sealed class CompanyVerifierService : ICompanyVerifierService /// /// Generates alternative search queries to find companies that may be registered /// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd"). + /// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent. /// private static List GenerateSearchQueries(string companyName) { - var queries = new HashSet(StringComparer.OrdinalIgnoreCase) { companyName }; + var queries = new HashSet(StringComparer.OrdinalIgnoreCase); var normalized = companyName.Trim(); - // Step 1: Generate UK/U.K. variations - var ukVariants = new List { normalized }; + // Step 0a: Check for "Brand (Parent Company)" format and extract parent company + // Parent company is more likely to be the registered name, so search it first + var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$"); + if (parentMatch.Success) + { + var parentCompany = parentMatch.Groups[1].Value.Trim(); + // Generate queries for parent company first (higher priority) + foreach (var parentQuery in GenerateNameVariations(parentCompany)) + { + queries.Add(parentQuery); + } + // Also try the brand name without parenthetical + var brandName = normalized[..parentMatch.Index].Trim(); + if (brandName.Length >= 3) + { + foreach (var brandQuery in GenerateNameVariations(brandName)) + { + queries.Add(brandQuery); + } + } + } - if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase)) + // Step 0b: Check for "Name1/Name2" format (e.g., "ASDA/WALMART") + // Try each part separately as they may be different registered names + if (normalized.Contains('/')) + { + var parts = normalized.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + foreach (var part in parts) + { + if (part.Length >= 3) + { + foreach (var partQuery in GenerateNameVariations(part)) + { + queries.Add(partQuery); + } + } + } + } + + // Also add variations of the full original name + foreach (var query in GenerateNameVariations(normalized)) + { + queries.Add(query); + } + + return queries.ToList(); + } + + /// + /// Generates name variations for a single company name (UK/U.K., Ltd/Limited, etc.) + /// + private static List GenerateNameVariations(string name) + { + var variations = new HashSet(StringComparer.OrdinalIgnoreCase) { name }; + + // Step 1: Generate UK/U.K. variations + var ukVariants = new List { name }; + + if (name.Contains(" UK", StringComparison.OrdinalIgnoreCase)) { // Add U.K. variant - var withDots = normalized + var withDots = name .Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase); - if (withDots != normalized) + if (withDots != name) ukVariants.Add(withDots); } - if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase)) + if (name.Contains(" U.K.", StringComparison.OrdinalIgnoreCase)) { // Add UK variant (no dots) - var withoutDots = normalized + var withoutDots = name .Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase) .Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase); - if (withoutDots != normalized) + if (withoutDots != name) ukVariants.Add(withoutDots); } // Step 2: For each UK variant, generate suffix variations (Ltd/Limited) foreach (var variant in ukVariants) { - queries.Add(variant); + variations.Add(variant); // Try Ltd -> Limited if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase)) { - queries.Add(variant[..^4] + " Limited"); + variations.Add(variant[..^4] + " Limited"); } // Try Limited -> Ltd else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase)) { - queries.Add(variant[..^8] + " Ltd"); + variations.Add(variant[..^8] + " Ltd"); } // Try PLC variations else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase)) { - queries.Add(variant[..^4] + " Public Limited Company"); + variations.Add(variant[..^4] + " Public Limited Company"); } else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase)) { - queries.Add(variant[..^24] + " PLC"); + variations.Add(variant[..^24] + " PLC"); + } + // Try Plc (mixed case) variations + else if (variant.EndsWith(" Plc", StringComparison.Ordinal)) + { + variations.Add(variant[..^4] + " PLC"); + variations.Add(variant[..^4] + " Public Limited Company"); } } // Step 3: Try core name without suffix - var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" }; - var coreName = normalized; + var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " Plc", " LLP", " Inc", " Corporation", " Corp" }; + var coreName = name; foreach (var suffix in suffixesToRemove) { if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)) @@ -850,10 +924,11 @@ public sealed class CompanyVerifierService : ICompanyVerifierService break; } } - if (coreName != normalized && coreName.Length >= 3) + if (coreName != name && coreName.Length >= 3) { - queries.Add(coreName); - queries.Add(coreName + " Limited"); + variations.Add(coreName); + variations.Add(coreName + " Limited"); + variations.Add(coreName + " PLC"); // Also add U.K. variant of core name if applicable if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase)) @@ -861,12 +936,12 @@ public sealed class CompanyVerifierService : ICompanyVerifierService var coreWithDots = coreName .Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase) .Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase); - queries.Add(coreWithDots); - queries.Add(coreWithDots + " Limited"); + variations.Add(coreWithDots); + variations.Add(coreWithDots + " Limited"); } } - return queries.ToList(); + return variations.ToList(); } #endregion diff --git a/src/TrueCV.Web/Components/Pages/Report.razor b/src/TrueCV.Web/Components/Pages/Report.razor index 3bc1e6f..2f77264 100644 --- a/src/TrueCV.Web/Components/Pages/Report.razor +++ b/src/TrueCV.Web/Components/Pages/Report.razor @@ -144,70 +144,6 @@ - -
-
-
- - - - - Score Breakdown -
-
-
- @{ - var deductedFlags = _report.Flags - .GroupBy(f => (f.Title, f.Description)) - .Select(g => g.First()) - .Where(f => f.ScoreImpact < 0) - .ToList(); - var totalDeductions = deductedFlags.Sum(f => Math.Abs(f.ScoreImpact)); - } -
-
- - - - - - - @foreach (var flag in deductedFlags.OrderByDescending(f => Math.Abs(f.ScoreImpact))) - { - - - - - } - - - - - - - - - -
Base Score100 pts
@FormatFlagTitle(flag.Title)-@Math.Abs(flag.ScoreImpact) pts
Total Deductions-@totalDeductions pts
Final Score@_report.OverallScore pts
-
-
-
-
How Scoring Works
-
    -
  • Everyone starts with 100 points
  • -
  • Points are deducted for issues found during verification
  • -
  • Unverified companies: -10 pts each
  • -
  • Director claims not verified: -20 pts each
  • -
  • Employment before company existed: -20 pts
  • -
  • Employment gaps: -1 to -10 pts per gap
  • -
  • Concurrent employment is noted but not penalised
  • -
-
-
-
-
-
-
@@ -229,6 +165,7 @@ Matched Company Match Score Status + Points @@ -284,11 +221,24 @@ Unverified } + + @{ + var companyPoints = GetPointsForCompany(verification.ClaimedCompany, verification.MatchedCompanyName); + } + @if (companyPoints < 0) + { + @companyPoints + } + else + { + 0 + } + @if (!string.IsNullOrEmpty(verification.VerificationNotes)) { - + @@ -752,4 +702,20 @@ _ => title }; } + + private int GetPointsForCompany(string claimedCompany, string? matchedCompany) + { + if (_report?.Flags is null) return 0; + + // Sum up all flags that mention this company in their description + var companyFlags = _report.Flags + .Where(f => f.ScoreImpact < 0 && + ((!string.IsNullOrEmpty(f.Description) && f.Description.Contains(claimedCompany, StringComparison.OrdinalIgnoreCase)) || + (!string.IsNullOrEmpty(matchedCompany) && !string.IsNullOrEmpty(f.Description) && f.Description.Contains(matchedCompany, StringComparison.OrdinalIgnoreCase)))) + .GroupBy(f => (f.Title, f.Description)) + .Select(g => g.First()) + .ToList(); + + return companyFlags.Sum(f => f.ScoreImpact); + } }