using FuzzySharp; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.DTOs; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; using TrueCV.Infrastructure.Data; using TrueCV.Infrastructure.ExternalApis; namespace TrueCV.Infrastructure.Services; public sealed class CompanyVerifierService : ICompanyVerifierService { private readonly CompaniesHouseClient _companiesHouseClient; private readonly ApplicationDbContext _dbContext; private readonly ILogger _logger; private const int FuzzyMatchThreshold = 70; private const int CacheExpirationDays = 30; public CompanyVerifierService( CompaniesHouseClient companiesHouseClient, ApplicationDbContext dbContext, ILogger logger) { _companiesHouseClient = companiesHouseClient; _dbContext = dbContext; _logger = logger; } public async Task VerifyCompanyAsync( string companyName, DateOnly? startDate, DateOnly? endDate) { ArgumentException.ThrowIfNullOrWhiteSpace(companyName); _logger.LogDebug("Verifying company: {CompanyName}", companyName); // Try to find a cached match first var cachedMatch = await FindCachedMatchAsync(companyName); if (cachedMatch is not null) { _logger.LogDebug("Found cached company match for: {CompanyName}", companyName); return CreateVerificationResult(companyName, cachedMatch, startDate, endDate); } // Search Companies House try { var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName); if (searchResponse?.Items is null || searchResponse.Items.Count == 0) { _logger.LogDebug("No companies found for: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, "No matching company found in Companies House"); } // Find best fuzzy match var bestMatch = FindBestMatch(companyName, searchResponse.Items); if (bestMatch is null) { _logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, $"No company name matched above {FuzzyMatchThreshold}% threshold"); } // Cache the matched company var match = bestMatch.Value; await CacheCompanyAsync(match.Item); _logger.LogInformation( "Verified company {ClaimedName} matched to {MatchedName} with score {Score}%", companyName, match.Item.Title, match.Score); return new CompanyVerificationResult { ClaimedCompany = companyName, MatchedCompanyName = match.Item.Title, MatchedCompanyNumber = match.Item.CompanyNumber, MatchScore = match.Score, IsVerified = true, VerificationNotes = $"Matched with {match.Score}% confidence", ClaimedStartDate = startDate, ClaimedEndDate = endDate }; } catch (CompaniesHouseRateLimitException ex) { _logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, "Verification temporarily unavailable due to rate limiting"); } } public async Task> SearchCompaniesAsync(string query) { ArgumentException.ThrowIfNullOrWhiteSpace(query); _logger.LogDebug("Searching companies for query: {Query}", query); var response = await _companiesHouseClient.SearchCompaniesAsync(query); if (response?.Items is null) { return []; } return response.Items.Select(item => new CompanySearchResult { CompanyNumber = item.CompanyNumber, CompanyName = item.Title, CompanyStatus = item.CompanyStatus ?? "Unknown", IncorporationDate = ParseDate(item.DateOfCreation), AddressSnippet = item.AddressSnippet }).ToList(); } private async Task FindCachedMatchAsync(string companyName) { var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays); // Get recent cached companies var cachedCompanies = await _dbContext.CompanyCache .Where(c => c.CachedAt >= cutoffDate) .ToListAsync(); if (cachedCompanies.Count == 0) { return null; } // Find best fuzzy match in cache var matches = cachedCompanies .Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) }) .Where(m => m.Score >= FuzzyMatchThreshold) .OrderByDescending(m => m.Score) .FirstOrDefault(); return matches?.Company; } private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch( string companyName, List items) { var normalizedSearch = companyName.ToUpperInvariant(); var matches = items .Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant()))) .Where(m => m.Score >= FuzzyMatchThreshold) .OrderByDescending(m => m.Score) .ToList(); return matches.Count > 0 ? matches[0] : null; } private async Task CacheCompanyAsync(CompaniesHouseSearchItem item) { var existingCache = await _dbContext.CompanyCache .FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber); if (existingCache is not null) { existingCache.CompanyName = item.Title; existingCache.Status = item.CompanyStatus ?? "Unknown"; existingCache.IncorporationDate = ParseDate(item.DateOfCreation); existingCache.DissolutionDate = ParseDate(item.DateOfCessation); existingCache.CachedAt = DateTime.UtcNow; } else { var cacheEntry = new CompanyCache { CompanyNumber = item.CompanyNumber, CompanyName = item.Title, Status = item.CompanyStatus ?? "Unknown", IncorporationDate = ParseDate(item.DateOfCreation), DissolutionDate = ParseDate(item.DateOfCessation), CachedAt = DateTime.UtcNow }; _dbContext.CompanyCache.Add(cacheEntry); } await _dbContext.SaveChangesAsync(); } private static CompanyVerificationResult CreateVerificationResult( string claimedCompany, CompanyCache cached, DateOnly? startDate, DateOnly? endDate) { var matchScore = Fuzz.Ratio( claimedCompany.ToUpperInvariant(), cached.CompanyName.ToUpperInvariant()); return new CompanyVerificationResult { ClaimedCompany = claimedCompany, MatchedCompanyName = cached.CompanyName, MatchedCompanyNumber = cached.CompanyNumber, MatchScore = matchScore, IsVerified = true, VerificationNotes = $"Matched from cache with {matchScore}% confidence", ClaimedStartDate = startDate, ClaimedEndDate = endDate }; } private static CompanyVerificationResult CreateUnverifiedResult( string companyName, DateOnly? startDate, DateOnly? endDate, string reason) { return new CompanyVerificationResult { ClaimedCompany = companyName, MatchedCompanyName = null, MatchedCompanyNumber = null, MatchScore = 0, IsVerified = false, VerificationNotes = reason, ClaimedStartDate = startDate, ClaimedEndDate = endDate }; } private static DateOnly? ParseDate(string? dateString) { if (string.IsNullOrWhiteSpace(dateString)) { return null; } if (DateOnly.TryParse(dateString, out var date)) { return date; } return null; } }