using System.Text.Json; using FuzzySharp; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.DTOs; using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; using TrueCV.Infrastructure.Data; using TrueCV.Infrastructure.ExternalApis; namespace TrueCV.Infrastructure.Services; public sealed class CompanyVerifierService : ICompanyVerifierService { private readonly CompaniesHouseClient _companiesHouseClient; private readonly IDbContextFactory _dbContextFactory; private readonly ILogger _logger; private const int FuzzyMatchThreshold = 85; private const int CacheExpirationDays = 30; // SIC codes for tech/software companies private static readonly HashSet TechSicCodes = new() { "62011", "62012", "62020", "62030", "62090", // Computer programming and consultancy "63110", "63120", // Data processing, hosting "58210", "58290", // Publishing of computer games, other software "61100", "61200", "61300", "61900" // Telecommunications }; public CompanyVerifierService( CompaniesHouseClient companiesHouseClient, IDbContextFactory dbContextFactory, ILogger logger) { _companiesHouseClient = companiesHouseClient; _dbContextFactory = dbContextFactory; _logger = logger; } public async Task VerifyCompanyAsync( string companyName, DateOnly? startDate, DateOnly? endDate, string? jobTitle = null) { ArgumentException.ThrowIfNullOrWhiteSpace(companyName); _logger.LogDebug("Verifying company: {CompanyName}", companyName); var flags = new List(); // Try to find a cached match first (but only if it existed at claimed start date) var cachedMatch = await FindCachedMatchAsync(companyName); if (cachedMatch is not null) { // Check if cached company existed at the claimed start date var cacheValid = !startDate.HasValue || cachedMatch.IncorporationDate == null || cachedMatch.IncorporationDate <= startDate.Value; if (cacheValid) { _logger.LogDebug("Found cached company match for: {CompanyName}", companyName); return CreateResultFromCache(cachedMatch, companyName, startDate, endDate, jobTitle, flags); } else { _logger.LogDebug("Cached company {CachedName} was incorporated after claimed start date, searching for alternatives", cachedMatch.CompanyName); } } // Search Companies House try { var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName); if (searchResponse?.Items is null || searchResponse.Items.Count == 0) { _logger.LogDebug("No companies found for: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "No matching company found in Companies House"); } // Find best fuzzy match, preferring companies that existed at claimed start date var bestMatch = FindBestMatch(companyName, searchResponse.Items, startDate); if (bestMatch is null) { _logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "Company name could not be verified against official records"); } var match = bestMatch.Value; // Fetch full company details for additional data var companyDetails = await _companiesHouseClient.GetCompanyAsync(match.Item.CompanyNumber); // Cache the matched company with full details await CacheCompanyAsync(match.Item, companyDetails); _logger.LogInformation( "Verified company {ClaimedName} matched to {MatchedName} with score {Score}%", companyName, match.Item.Title, match.Score); // Run all verification checks var incorporationDate = DateHelpers.ParseDate(match.Item.DateOfCreation); var dissolutionDate = DateHelpers.ParseDate(match.Item.DateOfCessation); var companyStatus = match.Item.CompanyStatus; var companyType = match.Item.CompanyType; var sicCodes = companyDetails?.SicCodes ?? match.Item.SicCodes; var accountsCategory = companyDetails?.Accounts?.LastAccounts?.Type; // Check 1: Employment before company incorporation CheckIncorporationDate(flags, startDate, incorporationDate, match.Item.Title); // Check 2: Employment at dissolved company CheckDissolutionDate(flags, endDate, dissolutionDate, companyStatus, match.Item.Title); // Check 3: Dormant company check CheckDormantCompany(flags, accountsCategory, jobTitle, match.Item.Title); // Check 4: Company size vs job title CheckCompanySizeVsRole(flags, accountsCategory, jobTitle, match.Item.Title); // Check 5: SIC code vs job title mismatch CheckSicCodeMismatch(flags, sicCodes, jobTitle, match.Item.Title); // Check 6: Job title plausibility for PLCs var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, companyType); if (jobPlausible == false) { flags.Add(new CompanyVerificationFlag { Type = "ImplausibleJobTitle", Severity = "Critical", Message = jobNotes ?? "Job title requires verification", ScoreImpact = -15 }); } return new CompanyVerificationResult { ClaimedCompany = companyName, MatchedCompanyName = match.Item.Title, MatchedCompanyNumber = match.Item.CompanyNumber, MatchScore = match.Score, IsVerified = true, VerificationNotes = null, ClaimedStartDate = startDate, ClaimedEndDate = endDate, CompanyType = companyType, CompanyStatus = companyStatus, IncorporationDate = incorporationDate, DissolutionDate = dissolutionDate, AccountsCategory = accountsCategory, SicCodes = sicCodes, ClaimedJobTitle = jobTitle, JobTitlePlausible = jobPlausible, JobTitleNotes = jobNotes, Flags = flags }; } catch (CompaniesHouseRateLimitException ex) { _logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName); return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "Verification temporarily unavailable due to rate limiting"); } } public async Task> SearchCompaniesAsync(string query) { ArgumentException.ThrowIfNullOrWhiteSpace(query); _logger.LogDebug("Searching companies for query: {Query}", query); var response = await _companiesHouseClient.SearchCompaniesAsync(query); if (response?.Items is null) { return []; } return response.Items.Select(item => new CompanySearchResult { CompanyNumber = item.CompanyNumber, CompanyName = item.Title, CompanyStatus = item.CompanyStatus ?? "Unknown", IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation), AddressSnippet = item.AddressSnippet }).ToList(); } public async Task VerifyDirectorAsync( string companyNumber, string candidateName, DateOnly? startDate, DateOnly? endDate) { if (string.IsNullOrWhiteSpace(companyNumber) || string.IsNullOrWhiteSpace(candidateName)) { return null; } try { var officers = await _companiesHouseClient.GetOfficersAsync(companyNumber); if (officers?.Items is null || officers.Items.Count == 0) { _logger.LogDebug("No officers found for company {CompanyNumber}", companyNumber); return null; } // Normalize candidate name for comparison var normalizedCandidate = NormalizeName(candidateName); foreach (var officer in officers.Items) { // Check if officer role is director-like var role = officer.OfficerRole?.ToLowerInvariant() ?? ""; if (!role.Contains("director") && !role.Contains("secretary")) { continue; } // Fuzzy match the name var normalizedOfficer = NormalizeName(officer.Name); var matchScore = Fuzz.Ratio(normalizedCandidate, normalizedOfficer); if (matchScore >= 80) // High threshold for name matching { // Check date overlap var appointedOn = DateHelpers.ParseDate(officer.AppointedOn); var resignedOn = DateHelpers.ParseDate(officer.ResignedOn); // If no claimed dates, just check if names match if (!startDate.HasValue && !endDate.HasValue) { _logger.LogDebug( "Found matching director {OfficerName} for candidate {CandidateName} at company {CompanyNumber}", officer.Name, candidateName, companyNumber); return true; } // Check if employment period overlaps with directorship var datesOverlap = DatesOverlap( startDate, endDate, appointedOn, resignedOn); if (datesOverlap) { _logger.LogDebug( "Verified director {OfficerName} matches candidate {CandidateName} with overlapping dates", officer.Name, candidateName); return true; } } } _logger.LogDebug( "No matching director found for candidate {CandidateName} at company {CompanyNumber}", candidateName, companyNumber); return false; } catch (CompaniesHouseRateLimitException) { _logger.LogWarning("Rate limit hit while verifying director for company {CompanyNumber}", companyNumber); return null; } catch (Exception ex) { _logger.LogError(ex, "Error verifying director for company {CompanyNumber}", companyNumber); return null; } } private static string NormalizeName(string name) { if (string.IsNullOrWhiteSpace(name)) return ""; // Companies House often stores names as "SURNAME, Firstname" // Convert to "Firstname Surname" format for comparison var normalized = name.ToUpperInvariant().Trim(); if (normalized.Contains(',')) { var parts = normalized.Split(',', 2); if (parts.Length == 2) { normalized = $"{parts[1].Trim()} {parts[0].Trim()}"; } } return normalized; } private static bool DatesOverlap(DateOnly? start1, DateOnly? end1, DateOnly? start2, DateOnly? end2) { // If no dates, assume overlap if (!start1.HasValue && !end1.HasValue) return true; if (!start2.HasValue && !end2.HasValue) return true; // Use default dates for missing values var s1 = start1 ?? DateOnly.MinValue; var e1 = end1 ?? DateOnly.MaxValue; var s2 = start2 ?? DateOnly.MinValue; var e2 = end2 ?? DateOnly.MaxValue; // Check overlap: periods overlap if one starts before the other ends return s1 <= e2 && s2 <= e1; } #region Verification Checks private static void CheckIncorporationDate( List flags, DateOnly? claimedStartDate, DateOnly? incorporationDate, string companyName) { if (claimedStartDate.HasValue && incorporationDate.HasValue) { if (claimedStartDate.Value < incorporationDate.Value) { flags.Add(new CompanyVerificationFlag { Type = "EmploymentBeforeIncorporation", Severity = "Critical", Message = $"Claimed employment at '{companyName}' starting {claimedStartDate:MMM yyyy} is before company incorporation date {incorporationDate:MMM yyyy}", ScoreImpact = -20 }); } } } private static void CheckDissolutionDate( List flags, DateOnly? claimedEndDate, DateOnly? dissolutionDate, string? companyStatus, string companyName) { var isDissolvedStatus = companyStatus?.ToLowerInvariant() is "dissolved" or "liquidation" or "administration"; if (dissolutionDate.HasValue && isDissolvedStatus) { // Allow 3 month buffer for wind-down var bufferDate = dissolutionDate.Value.AddMonths(3); if (claimedEndDate.HasValue && claimedEndDate.Value > bufferDate) { flags.Add(new CompanyVerificationFlag { Type = "EmploymentAtDissolvedCompany", Severity = "Critical", Message = $"Claimed employment at '{companyName}' until {claimedEndDate:MMM yyyy} but company was dissolved on {dissolutionDate:MMM yyyy}", ScoreImpact = -20 }); } else if (!claimedEndDate.HasValue) // Current employment at dissolved company { flags.Add(new CompanyVerificationFlag { Type = "CurrentEmploymentAtDissolvedCompany", Severity = "Critical", Message = $"Claims current employment at '{companyName}' but company was dissolved on {dissolutionDate:MMM yyyy}", ScoreImpact = -25 }); } } } private static void CheckDormantCompany( List flags, string? accountsCategory, string? jobTitle, string companyName) { if (string.IsNullOrWhiteSpace(accountsCategory)) return; var isDormant = accountsCategory.ToLowerInvariant().Contains("dormant"); if (!isDormant) return; // Directors can maintain dormant companies, but other roles are suspicious var title = jobTitle?.ToLowerInvariant() ?? ""; var isDirectorRole = title.Contains("director") || title.Contains("company secretary"); if (!isDirectorRole) { flags.Add(new CompanyVerificationFlag { Type = "EmploymentAtDormantCompany", Severity = "Warning", Message = $"Claimed active employment as '{jobTitle}' at '{companyName}' which files dormant accounts", ScoreImpact = -10 }); } } private static void CheckCompanySizeVsRole( List flags, string? accountsCategory, string? jobTitle, string companyName) { if (string.IsNullOrWhiteSpace(accountsCategory) || string.IsNullOrWhiteSpace(jobTitle)) return; var category = accountsCategory.ToLowerInvariant(); var title = jobTitle.ToLowerInvariant(); // Micro-entity: < 10 employees, < £632k turnover var isMicroEntity = category.Contains("micro"); // Check for senior management roles at micro companies var isSeniorRole = title.Contains("vp") || title.Contains("vice president") || title.Contains("head of") || title.Contains("chief") || title.Contains("director of") || title.Contains("senior director"); // At micro companies, having many senior roles is suspicious if (isMicroEntity && isSeniorRole) { flags.Add(new CompanyVerificationFlag { Type = "SeniorRoleAtMicroCompany", Severity = "Warning", Message = $"Claimed senior role '{jobTitle}' at '{companyName}' which files micro-entity accounts (typically <10 employees)", ScoreImpact = -10 }); } } private static void CheckSicCodeMismatch( List flags, List? sicCodes, string? jobTitle, string companyName) { if (sicCodes is null || sicCodes.Count == 0 || string.IsNullOrWhiteSpace(jobTitle)) return; var title = jobTitle.ToLowerInvariant(); // Check if this is a tech role var isTechRole = title.Contains("software") || title.Contains("developer") || title.Contains("engineer") || title.Contains("programmer") || title.Contains("data scientist") || title.Contains("data analyst") || title.Contains("devops") || title.Contains("cloud") || title.Contains("machine learning") || title.Contains("ai ") || title.Contains("frontend") || title.Contains("backend") || title.Contains("full stack") || title.Contains("fullstack"); if (isTechRole) { // Check if company has any tech SIC codes var hasTechSic = sicCodes.Any(s => TechSicCodes.Contains(s)); if (!hasTechSic) { // Get the primary SIC code description (simplified - just show code) var primarySic = sicCodes.FirstOrDefault() ?? "Unknown"; flags.Add(new CompanyVerificationFlag { Type = "SicCodeMismatch", Severity = "Info", Message = $"Tech role '{jobTitle}' at '{companyName}' (SIC: {primarySic}) - company is not registered as a technology business", ScoreImpact = -5 }); } } } private static (bool? IsPlausible, string? Notes) CheckJobTitlePlausibility(string? jobTitle, string? companyType) { if (string.IsNullOrWhiteSpace(jobTitle) || string.IsNullOrWhiteSpace(companyType)) { return (null, null); } var title = jobTitle.Trim().ToLowerInvariant(); var type = companyType.Trim().ToLowerInvariant(); // Check if this is a PLC (Public Limited Company) - these are large companies var isPlc = type.Contains("plc") || type.Contains("public limited"); // Check for C-suite / very senior roles var isCsuiteRole = title.Contains("ceo") || title.Contains("chief executive") || title.Contains("cto") || title.Contains("chief technology") || title.Contains("cfo") || title.Contains("chief financial") || title.Contains("coo") || title.Contains("chief operating") || title.Contains("cio") || title.Contains("chief information") || title.Contains("managing director") || title == "md" || title.Contains("chairman") || title.Contains("chairwoman") || title.Contains("chairperson") || title.Contains("president"); // Check for board-level roles var isBoardRole = title.Contains("board member") || title.Contains("non-executive director") || title.Contains("executive director") || (title == "director" && !title.Contains("of")); if (isPlc && (isCsuiteRole || isBoardRole)) { return (false, $"Claimed senior role '{jobTitle}' at a PLC requires verification - C-suite positions at public companies are publicly disclosed"); } // Check for VP/SVP at PLCs (also usually disclosed) var isVpRole = title.Contains("vice president") || title.Contains("vp ") || title.StartsWith("vp") || title.Contains("svp") || title.Contains("senior vice president") || title.Contains("evp") || title.Contains("executive vice president"); if (isPlc && isVpRole) { return (false, $"Claimed VP-level role '{jobTitle}' at a PLC - senior positions at public companies should be verifiable"); } return (true, null); } #endregion #region Helper Methods private async Task FindCachedMatchAsync(string companyName) { var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays); await using var dbContext = await _dbContextFactory.CreateDbContextAsync(); var cachedCompanies = await dbContext.CompanyCache .Where(c => c.CachedAt >= cutoffDate) .ToListAsync(); if (cachedCompanies.Count == 0) { return null; } var matches = cachedCompanies .Where(c => !string.IsNullOrWhiteSpace(c.CompanyName)) .Select(c => new { Company = c, Score = Fuzz.TokenSetRatio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) }) .Where(m => m.Score >= FuzzyMatchThreshold) .OrderByDescending(m => m.Score) .FirstOrDefault(); return matches?.Company; } private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch( string companyName, List items, DateOnly? claimedStartDate) { var normalizedSearch = companyName.ToUpperInvariant(); var matches = items .Where(item => !string.IsNullOrWhiteSpace(item.Title)) .Select(item => (Item: item, Score: Fuzz.TokenSetRatio(normalizedSearch, item.Title.ToUpperInvariant()))) .Where(m => m.Score >= FuzzyMatchThreshold) .ToList(); _logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}'", matches.Count, companyName); foreach (var m in matches.Take(5)) { _logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}", m.Item.Title, m.Item.CompanyNumber, m.Score, m.Item.DateOfCreation ?? "null"); } if (matches.Count == 0) return null; // If we have a claimed start date, prefer companies that existed at that time if (claimedStartDate.HasValue) { _logger.LogDebug("Filtering for companies that existed at claimed start date: {StartDate}", claimedStartDate.Value); var existedAtStartDate = matches .Where(m => { var incDate = DateHelpers.ParseDate(m.Item.DateOfCreation); var existed = incDate == null || incDate <= claimedStartDate.Value; _logger.LogDebug(" {Title}: IncDate={IncDate}, Existed={Existed}", m.Item.Title, incDate?.ToString() ?? "null", existed); return existed; }) .OrderByDescending(m => m.Score) .ToList(); _logger.LogDebug("Companies that existed at start date: {Count}", existedAtStartDate.Count); // If any matches existed at the start date, prefer those if (existedAtStartDate.Count > 0) { _logger.LogDebug("Selected: {Title} ({Number})", existedAtStartDate[0].Item.Title, existedAtStartDate[0].Item.CompanyNumber); return existedAtStartDate[0]; } } // Fall back to highest score if no temporal match var fallback = matches.OrderByDescending(m => m.Score).First(); _logger.LogDebug("Falling back to highest score: {Title} ({Number})", fallback.Item.Title, fallback.Item.CompanyNumber); return fallback; } private async Task CacheCompanyAsync(CompaniesHouseSearchItem item, CompaniesHouseCompany? details) { try { await using var dbContext = await _dbContextFactory.CreateDbContextAsync(); var existingCache = await dbContext.CompanyCache .FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber); var sicCodes = details?.SicCodes ?? item.SicCodes; var sicCodesJson = sicCodes != null ? JsonSerializer.Serialize(sicCodes) : null; var accountsCategory = details?.Accounts?.LastAccounts?.Type; if (existingCache is not null) { existingCache.CompanyName = item.Title; existingCache.Status = item.CompanyStatus ?? "Unknown"; existingCache.CompanyType = item.CompanyType; existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation); existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation); existingCache.AccountsCategory = accountsCategory; existingCache.SicCodesJson = sicCodesJson; existingCache.CachedAt = DateTime.UtcNow; } else { var cacheEntry = new CompanyCache { CompanyNumber = item.CompanyNumber, CompanyName = item.Title, Status = item.CompanyStatus ?? "Unknown", CompanyType = item.CompanyType, IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation), DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation), AccountsCategory = accountsCategory, SicCodesJson = sicCodesJson, CachedAt = DateTime.UtcNow }; dbContext.CompanyCache.Add(cacheEntry); } await dbContext.SaveChangesAsync(); } catch (DbUpdateException ex) when (ex.InnerException?.Message.Contains("PK_CompanyCache") == true) { // Race condition: another task already cached this company - ignore _logger.LogDebug("Company {CompanyNumber} already cached by another task", item.CompanyNumber); } } private CompanyVerificationResult CreateResultFromCache( CompanyCache cached, string claimedCompany, DateOnly? startDate, DateOnly? endDate, string? jobTitle, List flags) { var matchScore = Fuzz.TokenSetRatio( claimedCompany.ToUpperInvariant(), cached.CompanyName.ToUpperInvariant()); List? sicCodes = null; if (!string.IsNullOrEmpty(cached.SicCodesJson)) { try { sicCodes = JsonSerializer.Deserialize>(cached.SicCodesJson); } catch (JsonException) { // Ignore malformed JSON in cache } } // Run all verification checks CheckIncorporationDate(flags, startDate, cached.IncorporationDate, cached.CompanyName); CheckDissolutionDate(flags, endDate, cached.DissolutionDate, cached.Status, cached.CompanyName); CheckDormantCompany(flags, cached.AccountsCategory, jobTitle, cached.CompanyName); CheckCompanySizeVsRole(flags, cached.AccountsCategory, jobTitle, cached.CompanyName); CheckSicCodeMismatch(flags, sicCodes, jobTitle, cached.CompanyName); var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, cached.CompanyType); if (jobPlausible == false) { flags.Add(new CompanyVerificationFlag { Type = "ImplausibleJobTitle", Severity = "Critical", Message = jobNotes ?? "Job title requires verification", ScoreImpact = -15 }); } return new CompanyVerificationResult { ClaimedCompany = claimedCompany, MatchedCompanyName = cached.CompanyName, MatchedCompanyNumber = cached.CompanyNumber, MatchScore = matchScore, IsVerified = true, VerificationNotes = null, ClaimedStartDate = startDate, ClaimedEndDate = endDate, CompanyType = cached.CompanyType, CompanyStatus = cached.Status, IncorporationDate = cached.IncorporationDate, DissolutionDate = cached.DissolutionDate, AccountsCategory = cached.AccountsCategory, SicCodes = sicCodes, ClaimedJobTitle = jobTitle, JobTitlePlausible = jobPlausible, JobTitleNotes = jobNotes, Flags = flags }; } private static CompanyVerificationResult CreateUnverifiedResult( string companyName, DateOnly? startDate, DateOnly? endDate, string? jobTitle, string reason) { return new CompanyVerificationResult { ClaimedCompany = companyName, MatchedCompanyName = null, MatchedCompanyNumber = null, MatchScore = 0, IsVerified = false, VerificationNotes = reason, ClaimedStartDate = startDate, ClaimedEndDate = endDate, ClaimedJobTitle = jobTitle }; } #endregion }