Add debug logging to company matching for diagnosis

This commit is contained in:
2026-01-20 21:21:21 +01:00
parent 7bb68b2567
commit 04557dd8c4

View File

@@ -570,7 +570,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
return matches?.Company; return matches?.Company;
} }
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch( private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
string companyName, string companyName,
List<CompaniesHouseSearchItem> items, List<CompaniesHouseSearchItem> items,
DateOnly? claimedStartDate) DateOnly? claimedStartDate)
@@ -583,30 +583,46 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
.Where(m => m.Score >= FuzzyMatchThreshold) .Where(m => m.Score >= FuzzyMatchThreshold)
.ToList(); .ToList();
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}'", matches.Count, companyName);
foreach (var m in matches.Take(5))
{
_logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}",
m.Item.Title, m.Item.CompanyNumber, m.Score, m.Item.DateOfCreation ?? "null");
}
if (matches.Count == 0) return null; if (matches.Count == 0) return null;
// If we have a claimed start date, prefer companies that existed at that time // If we have a claimed start date, prefer companies that existed at that time
if (claimedStartDate.HasValue) if (claimedStartDate.HasValue)
{ {
_logger.LogDebug("Filtering for companies that existed at claimed start date: {StartDate}", claimedStartDate.Value);
var existedAtStartDate = matches var existedAtStartDate = matches
.Where(m => .Where(m =>
{ {
var incDate = DateHelpers.ParseDate(m.Item.DateOfCreation); var incDate = DateHelpers.ParseDate(m.Item.DateOfCreation);
// Company existed if it was incorporated before the claimed start date var existed = incDate == null || incDate <= claimedStartDate.Value;
return incDate == null || incDate <= claimedStartDate.Value; _logger.LogDebug(" {Title}: IncDate={IncDate}, Existed={Existed}",
m.Item.Title, incDate?.ToString() ?? "null", existed);
return existed;
}) })
.OrderByDescending(m => m.Score) .OrderByDescending(m => m.Score)
.ToList(); .ToList();
_logger.LogDebug("Companies that existed at start date: {Count}", existedAtStartDate.Count);
// If any matches existed at the start date, prefer those // If any matches existed at the start date, prefer those
if (existedAtStartDate.Count > 0) if (existedAtStartDate.Count > 0)
{ {
_logger.LogDebug("Selected: {Title} ({Number})", existedAtStartDate[0].Item.Title, existedAtStartDate[0].Item.CompanyNumber);
return existedAtStartDate[0]; return existedAtStartDate[0];
} }
} }
// Fall back to highest score if no temporal match // Fall back to highest score if no temporal match
return matches.OrderByDescending(m => m.Score).First(); var fallback = matches.OrderByDescending(m => m.Score).First();
_logger.LogDebug("Falling back to highest score: {Title} ({Number})", fallback.Item.Title, fallback.Item.CompanyNumber);
return fallback;
} }
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item, CompaniesHouseCompany? details) private async Task CacheCompanyAsync(CompaniesHouseSearchItem item, CompaniesHouseCompany? details)