Improve company matching and move points to employment table
Company Matching: - Extract parent company from "Brand (Parent Company)" format - Handle slash-separated names like "ASDA/WALMART" - Match against both original name and search query for flexibility - Add PLC/Plc case variations Report UI: - Remove separate Score Breakdown section - Add Points column to Employment Verification table - Calculate points per company from matching flags Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -90,7 +90,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
}
|
||||
|
||||
// Find best fuzzy match, preferring companies that existed at claimed start date
|
||||
bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate);
|
||||
// Pass both original name and search query for matching flexibility
|
||||
bestMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate);
|
||||
|
||||
if (bestMatch is not null)
|
||||
{
|
||||
@@ -586,18 +587,29 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
|
||||
private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
|
||||
string companyName,
|
||||
string searchQuery,
|
||||
List<CompaniesHouseSearchItem> items,
|
||||
DateOnly? claimedStartDate)
|
||||
{
|
||||
var normalizedSearch = companyName.ToUpperInvariant();
|
||||
var normalizedOriginal = companyName.ToUpperInvariant();
|
||||
var normalizedQuery = searchQuery.ToUpperInvariant();
|
||||
|
||||
// Match against both the original company name AND the search query used
|
||||
// This handles cases like "Matthew Walker (Northern Foods Plc)" where we
|
||||
// search for "Northern Foods Plc" but need to match against it, not the full name
|
||||
var matches = items
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Title))
|
||||
.Select(item => (Item: item, Score: Fuzz.TokenSetRatio(normalizedSearch, item.Title.ToUpperInvariant())))
|
||||
.Select(item =>
|
||||
{
|
||||
var itemTitle = item.Title.ToUpperInvariant();
|
||||
var scoreVsOriginal = Fuzz.TokenSetRatio(normalizedOriginal, itemTitle);
|
||||
var scoreVsQuery = Fuzz.TokenSetRatio(normalizedQuery, itemTitle);
|
||||
return (Item: item, Score: Math.Max(scoreVsOriginal, scoreVsQuery));
|
||||
})
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.ToList();
|
||||
|
||||
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}'", matches.Count, companyName);
|
||||
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}' (query: '{Query}')", matches.Count, companyName, searchQuery);
|
||||
foreach (var m in matches.Take(5))
|
||||
{
|
||||
_logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}",
|
||||
@@ -785,63 +797,125 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
/// <summary>
|
||||
/// Generates alternative search queries to find companies that may be registered
|
||||
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
|
||||
/// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent.
|
||||
/// </summary>
|
||||
private static List<string> GenerateSearchQueries(string companyName)
|
||||
{
|
||||
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { companyName };
|
||||
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var normalized = companyName.Trim();
|
||||
|
||||
// Step 1: Generate UK/U.K. variations
|
||||
var ukVariants = new List<string> { normalized };
|
||||
// Step 0a: Check for "Brand (Parent Company)" format and extract parent company
|
||||
// Parent company is more likely to be the registered name, so search it first
|
||||
var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$");
|
||||
if (parentMatch.Success)
|
||||
{
|
||||
var parentCompany = parentMatch.Groups[1].Value.Trim();
|
||||
// Generate queries for parent company first (higher priority)
|
||||
foreach (var parentQuery in GenerateNameVariations(parentCompany))
|
||||
{
|
||||
queries.Add(parentQuery);
|
||||
}
|
||||
// Also try the brand name without parenthetical
|
||||
var brandName = normalized[..parentMatch.Index].Trim();
|
||||
if (brandName.Length >= 3)
|
||||
{
|
||||
foreach (var brandQuery in GenerateNameVariations(brandName))
|
||||
{
|
||||
queries.Add(brandQuery);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (normalized.Contains(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
// Step 0b: Check for "Name1/Name2" format (e.g., "ASDA/WALMART")
|
||||
// Try each part separately as they may be different registered names
|
||||
if (normalized.Contains('/'))
|
||||
{
|
||||
var parts = normalized.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
foreach (var part in parts)
|
||||
{
|
||||
if (part.Length >= 3)
|
||||
{
|
||||
foreach (var partQuery in GenerateNameVariations(part))
|
||||
{
|
||||
queries.Add(partQuery);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also add variations of the full original name
|
||||
foreach (var query in GenerateNameVariations(normalized))
|
||||
{
|
||||
queries.Add(query);
|
||||
}
|
||||
|
||||
return queries.ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates name variations for a single company name (UK/U.K., Ltd/Limited, etc.)
|
||||
/// </summary>
|
||||
private static List<string> GenerateNameVariations(string name)
|
||||
{
|
||||
var variations = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { name };
|
||||
|
||||
// Step 1: Generate UK/U.K. variations
|
||||
var ukVariants = new List<string> { name };
|
||||
|
||||
if (name.Contains(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Add U.K. variant
|
||||
var withDots = normalized
|
||||
var withDots = name
|
||||
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
|
||||
if (withDots != normalized)
|
||||
if (withDots != name)
|
||||
ukVariants.Add(withDots);
|
||||
}
|
||||
if (normalized.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
|
||||
if (name.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Add UK variant (no dots)
|
||||
var withoutDots = normalized
|
||||
var withoutDots = name
|
||||
.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase);
|
||||
if (withoutDots != normalized)
|
||||
if (withoutDots != name)
|
||||
ukVariants.Add(withoutDots);
|
||||
}
|
||||
|
||||
// Step 2: For each UK variant, generate suffix variations (Ltd/Limited)
|
||||
foreach (var variant in ukVariants)
|
||||
{
|
||||
queries.Add(variant);
|
||||
variations.Add(variant);
|
||||
|
||||
// Try Ltd -> Limited
|
||||
if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^4] + " Limited");
|
||||
variations.Add(variant[..^4] + " Limited");
|
||||
}
|
||||
// Try Limited -> Ltd
|
||||
else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^8] + " Ltd");
|
||||
variations.Add(variant[..^8] + " Ltd");
|
||||
}
|
||||
// Try PLC variations
|
||||
else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^4] + " Public Limited Company");
|
||||
variations.Add(variant[..^4] + " Public Limited Company");
|
||||
}
|
||||
else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
queries.Add(variant[..^24] + " PLC");
|
||||
variations.Add(variant[..^24] + " PLC");
|
||||
}
|
||||
// Try Plc (mixed case) variations
|
||||
else if (variant.EndsWith(" Plc", StringComparison.Ordinal))
|
||||
{
|
||||
variations.Add(variant[..^4] + " PLC");
|
||||
variations.Add(variant[..^4] + " Public Limited Company");
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Try core name without suffix
|
||||
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " LLP", " Inc", " Corporation", " Corp" };
|
||||
var coreName = normalized;
|
||||
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " Plc", " LLP", " Inc", " Corporation", " Corp" };
|
||||
var coreName = name;
|
||||
foreach (var suffix in suffixesToRemove)
|
||||
{
|
||||
if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase))
|
||||
@@ -850,10 +924,11 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (coreName != normalized && coreName.Length >= 3)
|
||||
if (coreName != name && coreName.Length >= 3)
|
||||
{
|
||||
queries.Add(coreName);
|
||||
queries.Add(coreName + " Limited");
|
||||
variations.Add(coreName);
|
||||
variations.Add(coreName + " Limited");
|
||||
variations.Add(coreName + " PLC");
|
||||
|
||||
// Also add U.K. variant of core name if applicable
|
||||
if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase))
|
||||
@@ -861,12 +936,12 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var coreWithDots = coreName
|
||||
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
|
||||
queries.Add(coreWithDots);
|
||||
queries.Add(coreWithDots + " Limited");
|
||||
variations.Add(coreWithDots);
|
||||
variations.Add(coreWithDots + " Limited");
|
||||
}
|
||||
}
|
||||
|
||||
return queries.ToList();
|
||||
return variations.ToList();
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
@@ -144,70 +144,6 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Score Breakdown -->
|
||||
<div class="card border-0 shadow-sm mb-4">
|
||||
<div class="card-header bg-white py-3">
|
||||
<h5 class="mb-0 fw-bold">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="bi bi-calculator me-2" viewBox="0 0 16 16">
|
||||
<path d="M12 1a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h8zM4 0a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2H4z"/>
|
||||
<path d="M4 2.5a.5.5 0 0 1 .5-.5h7a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-.5.5h-7a.5.5 0 0 1-.5-.5v-2zm0 4a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm0 3a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm0 3a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm3-6a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm0 3a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm0 3a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm3-6a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1zm0 3a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v4a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-4z"/>
|
||||
</svg>
|
||||
Score Breakdown
|
||||
</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
@{
|
||||
var deductedFlags = _report.Flags
|
||||
.GroupBy(f => (f.Title, f.Description))
|
||||
.Select(g => g.First())
|
||||
.Where(f => f.ScoreImpact < 0)
|
||||
.ToList();
|
||||
var totalDeductions = deductedFlags.Sum(f => Math.Abs(f.ScoreImpact));
|
||||
}
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<table class="table table-sm">
|
||||
<tbody>
|
||||
<tr class="table-success">
|
||||
<td><strong>Base Score</strong></td>
|
||||
<td class="text-end"><strong>100 pts</strong></td>
|
||||
</tr>
|
||||
@foreach (var flag in deductedFlags.OrderByDescending(f => Math.Abs(f.ScoreImpact)))
|
||||
{
|
||||
<tr class="text-danger">
|
||||
<td>@FormatFlagTitle(flag.Title)</td>
|
||||
<td class="text-end">-@Math.Abs(flag.ScoreImpact) pts</td>
|
||||
</tr>
|
||||
}
|
||||
<tr class="table-secondary">
|
||||
<td><strong>Total Deductions</strong></td>
|
||||
<td class="text-end text-danger"><strong>-@totalDeductions pts</strong></td>
|
||||
</tr>
|
||||
<tr class="table-primary">
|
||||
<td><strong>Final Score</strong></td>
|
||||
<td class="text-end"><strong>@_report.OverallScore pts</strong></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="alert alert-light border">
|
||||
<h6 class="fw-bold mb-2">How Scoring Works</h6>
|
||||
<ul class="small mb-0">
|
||||
<li>Everyone starts with <strong>100 points</strong></li>
|
||||
<li>Points are deducted for issues found during verification</li>
|
||||
<li>Unverified companies: <strong>-10 pts</strong> each</li>
|
||||
<li>Director claims not verified: <strong>-20 pts</strong> each</li>
|
||||
<li>Employment before company existed: <strong>-20 pts</strong></li>
|
||||
<li>Employment gaps: <strong>-1 to -10 pts</strong> per gap</li>
|
||||
<li>Concurrent employment is noted but not penalised</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Employment Verification -->
|
||||
<div class="card border-0 shadow-sm mb-4">
|
||||
<div class="card-header bg-white py-3">
|
||||
@@ -229,6 +165,7 @@
|
||||
<th>Matched Company</th>
|
||||
<th class="text-center">Match Score</th>
|
||||
<th class="text-center">Status</th>
|
||||
<th class="text-center">Points</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -284,11 +221,24 @@
|
||||
<span class="badge bg-warning text-dark">Unverified</span>
|
||||
}
|
||||
</td>
|
||||
<td class="text-center">
|
||||
@{
|
||||
var companyPoints = GetPointsForCompany(verification.ClaimedCompany, verification.MatchedCompanyName);
|
||||
}
|
||||
@if (companyPoints < 0)
|
||||
{
|
||||
<span class="text-danger fw-medium">@companyPoints</span>
|
||||
}
|
||||
else
|
||||
{
|
||||
<span class="text-success">0</span>
|
||||
}
|
||||
</td>
|
||||
</tr>
|
||||
@if (!string.IsNullOrEmpty(verification.VerificationNotes))
|
||||
{
|
||||
<tr class="table-light">
|
||||
<td colspan="5" class="small text-muted py-1 ps-4">
|
||||
<td colspan="6" class="small text-muted py-1 ps-4">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-info-circle me-1" viewBox="0 0 16 16">
|
||||
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
|
||||
<path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/>
|
||||
@@ -752,4 +702,20 @@
|
||||
_ => title
|
||||
};
|
||||
}
|
||||
|
||||
private int GetPointsForCompany(string claimedCompany, string? matchedCompany)
|
||||
{
|
||||
if (_report?.Flags is null) return 0;
|
||||
|
||||
// Sum up all flags that mention this company in their description
|
||||
var companyFlags = _report.Flags
|
||||
.Where(f => f.ScoreImpact < 0 &&
|
||||
((!string.IsNullOrEmpty(f.Description) && f.Description.Contains(claimedCompany, StringComparison.OrdinalIgnoreCase)) ||
|
||||
(!string.IsNullOrEmpty(matchedCompany) && !string.IsNullOrEmpty(f.Description) && f.Description.Contains(matchedCompany, StringComparison.OrdinalIgnoreCase))))
|
||||
.GroupBy(f => (f.Title, f.Description))
|
||||
.Select(g => g.First())
|
||||
.ToList();
|
||||
|
||||
return companyFlags.Sum(f => f.ScoreImpact);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user