2026-01-18 19:20:50 +01:00
|
|
|
using FuzzySharp;
|
|
|
|
|
using Microsoft.EntityFrameworkCore;
|
|
|
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
|
using TrueCV.Application.DTOs;
|
2026-01-20 16:45:43 +01:00
|
|
|
using TrueCV.Application.Helpers;
|
2026-01-18 19:20:50 +01:00
|
|
|
using TrueCV.Application.Interfaces;
|
|
|
|
|
using TrueCV.Application.Models;
|
|
|
|
|
using TrueCV.Domain.Entities;
|
|
|
|
|
using TrueCV.Infrastructure.Data;
|
|
|
|
|
using TrueCV.Infrastructure.ExternalApis;
|
|
|
|
|
|
|
|
|
|
namespace TrueCV.Infrastructure.Services;
|
|
|
|
|
|
|
|
|
|
public sealed class CompanyVerifierService : ICompanyVerifierService
|
|
|
|
|
{
|
|
|
|
|
private readonly CompaniesHouseClient _companiesHouseClient;
|
|
|
|
|
private readonly ApplicationDbContext _dbContext;
|
|
|
|
|
private readonly ILogger<CompanyVerifierService> _logger;
|
|
|
|
|
|
|
|
|
|
private const int FuzzyMatchThreshold = 70;
|
|
|
|
|
private const int CacheExpirationDays = 30;
|
|
|
|
|
|
|
|
|
|
public CompanyVerifierService(
|
|
|
|
|
CompaniesHouseClient companiesHouseClient,
|
|
|
|
|
ApplicationDbContext dbContext,
|
|
|
|
|
ILogger<CompanyVerifierService> logger)
|
|
|
|
|
{
|
|
|
|
|
_companiesHouseClient = companiesHouseClient;
|
|
|
|
|
_dbContext = dbContext;
|
|
|
|
|
_logger = logger;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
|
|
|
|
|
string companyName,
|
|
|
|
|
DateOnly? startDate,
|
|
|
|
|
DateOnly? endDate)
|
|
|
|
|
{
|
|
|
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
|
|
|
|
|
|
|
|
|
|
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
|
|
|
|
|
|
|
|
|
|
// Try to find a cached match first
|
|
|
|
|
var cachedMatch = await FindCachedMatchAsync(companyName);
|
|
|
|
|
if (cachedMatch is not null)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
|
|
|
|
|
return CreateVerificationResult(companyName, cachedMatch, startDate, endDate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Search Companies House
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
|
|
|
|
|
|
|
|
|
|
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
|
|
|
|
|
return CreateUnverifiedResult(companyName, startDate, endDate, "No matching company found in Companies House");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find best fuzzy match
|
|
|
|
|
var bestMatch = FindBestMatch(companyName, searchResponse.Items);
|
|
|
|
|
|
|
|
|
|
if (bestMatch is null)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
|
|
|
|
|
return CreateUnverifiedResult(companyName, startDate, endDate,
|
|
|
|
|
$"No company name matched above {FuzzyMatchThreshold}% threshold");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Cache the matched company
|
|
|
|
|
var match = bestMatch.Value;
|
|
|
|
|
await CacheCompanyAsync(match.Item);
|
|
|
|
|
|
|
|
|
|
_logger.LogInformation(
|
|
|
|
|
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
|
|
|
|
|
companyName, match.Item.Title, match.Score);
|
|
|
|
|
|
|
|
|
|
return new CompanyVerificationResult
|
|
|
|
|
{
|
|
|
|
|
ClaimedCompany = companyName,
|
|
|
|
|
MatchedCompanyName = match.Item.Title,
|
|
|
|
|
MatchedCompanyNumber = match.Item.CompanyNumber,
|
|
|
|
|
MatchScore = match.Score,
|
|
|
|
|
IsVerified = true,
|
|
|
|
|
VerificationNotes = $"Matched with {match.Score}% confidence",
|
|
|
|
|
ClaimedStartDate = startDate,
|
|
|
|
|
ClaimedEndDate = endDate
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
catch (CompaniesHouseRateLimitException ex)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
|
|
|
|
|
return CreateUnverifiedResult(companyName, startDate, endDate,
|
|
|
|
|
"Verification temporarily unavailable due to rate limiting");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
|
|
|
|
|
{
|
|
|
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(query);
|
|
|
|
|
|
|
|
|
|
_logger.LogDebug("Searching companies for query: {Query}", query);
|
|
|
|
|
|
|
|
|
|
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
|
|
|
|
|
|
|
|
|
|
if (response?.Items is null)
|
|
|
|
|
{
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return response.Items.Select(item => new CompanySearchResult
|
|
|
|
|
{
|
|
|
|
|
CompanyNumber = item.CompanyNumber,
|
|
|
|
|
CompanyName = item.Title,
|
|
|
|
|
CompanyStatus = item.CompanyStatus ?? "Unknown",
|
2026-01-20 16:45:43 +01:00
|
|
|
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
|
2026-01-18 19:20:50 +01:00
|
|
|
AddressSnippet = item.AddressSnippet
|
|
|
|
|
}).ToList();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
|
|
|
|
|
{
|
|
|
|
|
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
|
|
|
|
|
|
|
|
|
|
// Get recent cached companies
|
|
|
|
|
var cachedCompanies = await _dbContext.CompanyCache
|
|
|
|
|
.Where(c => c.CachedAt >= cutoffDate)
|
|
|
|
|
.ToListAsync();
|
|
|
|
|
|
|
|
|
|
if (cachedCompanies.Count == 0)
|
|
|
|
|
{
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find best fuzzy match in cache
|
|
|
|
|
var matches = cachedCompanies
|
|
|
|
|
.Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
|
|
|
|
|
.Where(m => m.Score >= FuzzyMatchThreshold)
|
|
|
|
|
.OrderByDescending(m => m.Score)
|
|
|
|
|
.FirstOrDefault();
|
|
|
|
|
|
|
|
|
|
return matches?.Company;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
|
|
|
|
|
string companyName,
|
|
|
|
|
List<CompaniesHouseSearchItem> items)
|
|
|
|
|
{
|
|
|
|
|
var normalizedSearch = companyName.ToUpperInvariant();
|
|
|
|
|
|
|
|
|
|
var matches = items
|
|
|
|
|
.Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant())))
|
|
|
|
|
.Where(m => m.Score >= FuzzyMatchThreshold)
|
|
|
|
|
.OrderByDescending(m => m.Score)
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
return matches.Count > 0 ? matches[0] : null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item)
|
|
|
|
|
{
|
|
|
|
|
var existingCache = await _dbContext.CompanyCache
|
|
|
|
|
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
|
|
|
|
|
|
|
|
|
|
if (existingCache is not null)
|
|
|
|
|
{
|
|
|
|
|
existingCache.CompanyName = item.Title;
|
|
|
|
|
existingCache.Status = item.CompanyStatus ?? "Unknown";
|
2026-01-20 16:45:43 +01:00
|
|
|
existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation);
|
|
|
|
|
existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation);
|
2026-01-18 19:20:50 +01:00
|
|
|
existingCache.CachedAt = DateTime.UtcNow;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
var cacheEntry = new CompanyCache
|
|
|
|
|
{
|
|
|
|
|
CompanyNumber = item.CompanyNumber,
|
|
|
|
|
CompanyName = item.Title,
|
|
|
|
|
Status = item.CompanyStatus ?? "Unknown",
|
2026-01-20 16:45:43 +01:00
|
|
|
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
|
|
|
|
|
DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation),
|
2026-01-18 19:20:50 +01:00
|
|
|
CachedAt = DateTime.UtcNow
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
_dbContext.CompanyCache.Add(cacheEntry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await _dbContext.SaveChangesAsync();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static CompanyVerificationResult CreateVerificationResult(
|
|
|
|
|
string claimedCompany,
|
|
|
|
|
CompanyCache cached,
|
|
|
|
|
DateOnly? startDate,
|
|
|
|
|
DateOnly? endDate)
|
|
|
|
|
{
|
|
|
|
|
var matchScore = Fuzz.Ratio(
|
|
|
|
|
claimedCompany.ToUpperInvariant(),
|
|
|
|
|
cached.CompanyName.ToUpperInvariant());
|
|
|
|
|
|
|
|
|
|
return new CompanyVerificationResult
|
|
|
|
|
{
|
|
|
|
|
ClaimedCompany = claimedCompany,
|
|
|
|
|
MatchedCompanyName = cached.CompanyName,
|
|
|
|
|
MatchedCompanyNumber = cached.CompanyNumber,
|
|
|
|
|
MatchScore = matchScore,
|
|
|
|
|
IsVerified = true,
|
|
|
|
|
VerificationNotes = $"Matched from cache with {matchScore}% confidence",
|
|
|
|
|
ClaimedStartDate = startDate,
|
|
|
|
|
ClaimedEndDate = endDate
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static CompanyVerificationResult CreateUnverifiedResult(
|
|
|
|
|
string companyName,
|
|
|
|
|
DateOnly? startDate,
|
|
|
|
|
DateOnly? endDate,
|
|
|
|
|
string reason)
|
|
|
|
|
{
|
|
|
|
|
return new CompanyVerificationResult
|
|
|
|
|
{
|
|
|
|
|
ClaimedCompany = companyName,
|
|
|
|
|
MatchedCompanyName = null,
|
|
|
|
|
MatchedCompanyNumber = null,
|
|
|
|
|
MatchScore = 0,
|
|
|
|
|
IsVerified = false,
|
|
|
|
|
VerificationNotes = reason,
|
|
|
|
|
ClaimedStartDate = startDate,
|
|
|
|
|
ClaimedEndDate = endDate
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|