Files
RealCV/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs

235 lines
8.2 KiB
C#
Raw Normal View History

using FuzzySharp;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.DTOs;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
using TrueCV.Infrastructure.Data;
using TrueCV.Infrastructure.ExternalApis;
namespace TrueCV.Infrastructure.Services;
public sealed class CompanyVerifierService : ICompanyVerifierService
{
private readonly CompaniesHouseClient _companiesHouseClient;
private readonly ApplicationDbContext _dbContext;
private readonly ILogger<CompanyVerifierService> _logger;
private const int FuzzyMatchThreshold = 70;
private const int CacheExpirationDays = 30;
public CompanyVerifierService(
CompaniesHouseClient companiesHouseClient,
ApplicationDbContext dbContext,
ILogger<CompanyVerifierService> logger)
{
_companiesHouseClient = companiesHouseClient;
_dbContext = dbContext;
_logger = logger;
}
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
string companyName,
DateOnly? startDate,
DateOnly? endDate)
{
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
// Try to find a cached match first
var cachedMatch = await FindCachedMatchAsync(companyName);
if (cachedMatch is not null)
{
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
return CreateVerificationResult(companyName, cachedMatch, startDate, endDate);
}
// Search Companies House
try
{
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
{
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate, "No matching company found in Companies House");
}
// Find best fuzzy match
var bestMatch = FindBestMatch(companyName, searchResponse.Items);
if (bestMatch is null)
{
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate,
$"No company name matched above {FuzzyMatchThreshold}% threshold");
}
// Cache the matched company
var match = bestMatch.Value;
await CacheCompanyAsync(match.Item);
_logger.LogInformation(
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
companyName, match.Item.Title, match.Score);
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = match.Item.Title,
MatchedCompanyNumber = match.Item.CompanyNumber,
MatchScore = match.Score,
IsVerified = true,
VerificationNotes = $"Matched with {match.Score}% confidence",
ClaimedStartDate = startDate,
ClaimedEndDate = endDate
};
}
catch (CompaniesHouseRateLimitException ex)
{
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate,
"Verification temporarily unavailable due to rate limiting");
}
}
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
{
ArgumentException.ThrowIfNullOrWhiteSpace(query);
_logger.LogDebug("Searching companies for query: {Query}", query);
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
if (response?.Items is null)
{
return [];
}
return response.Items.Select(item => new CompanySearchResult
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
CompanyStatus = item.CompanyStatus ?? "Unknown",
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
AddressSnippet = item.AddressSnippet
}).ToList();
}
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
{
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
// Get recent cached companies
var cachedCompanies = await _dbContext.CompanyCache
.Where(c => c.CachedAt >= cutoffDate)
.ToListAsync();
if (cachedCompanies.Count == 0)
{
return null;
}
// Find best fuzzy match in cache
var matches = cachedCompanies
.Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
.Where(m => m.Score >= FuzzyMatchThreshold)
.OrderByDescending(m => m.Score)
.FirstOrDefault();
return matches?.Company;
}
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
string companyName,
List<CompaniesHouseSearchItem> items)
{
var normalizedSearch = companyName.ToUpperInvariant();
var matches = items
.Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant())))
.Where(m => m.Score >= FuzzyMatchThreshold)
.OrderByDescending(m => m.Score)
.ToList();
return matches.Count > 0 ? matches[0] : null;
}
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item)
{
var existingCache = await _dbContext.CompanyCache
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
if (existingCache is not null)
{
existingCache.CompanyName = item.Title;
existingCache.Status = item.CompanyStatus ?? "Unknown";
existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation);
existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation);
existingCache.CachedAt = DateTime.UtcNow;
}
else
{
var cacheEntry = new CompanyCache
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
Status = item.CompanyStatus ?? "Unknown",
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation),
CachedAt = DateTime.UtcNow
};
_dbContext.CompanyCache.Add(cacheEntry);
}
await _dbContext.SaveChangesAsync();
}
private static CompanyVerificationResult CreateVerificationResult(
string claimedCompany,
CompanyCache cached,
DateOnly? startDate,
DateOnly? endDate)
{
var matchScore = Fuzz.Ratio(
claimedCompany.ToUpperInvariant(),
cached.CompanyName.ToUpperInvariant());
return new CompanyVerificationResult
{
ClaimedCompany = claimedCompany,
MatchedCompanyName = cached.CompanyName,
MatchedCompanyNumber = cached.CompanyNumber,
MatchScore = matchScore,
IsVerified = true,
VerificationNotes = $"Matched from cache with {matchScore}% confidence",
ClaimedStartDate = startDate,
ClaimedEndDate = endDate
};
}
private static CompanyVerificationResult CreateUnverifiedResult(
string companyName,
DateOnly? startDate,
DateOnly? endDate,
string reason)
{
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = null,
MatchedCompanyNumber = null,
MatchScore = 0,
IsVerified = false,
VerificationNotes = reason,
ClaimedStartDate = startDate,
ClaimedEndDate = endDate
};
}
}