2026-01-20 20:00:24 +01:00
using System.Text.Json ;
2026-01-18 19:20:50 +01:00
using FuzzySharp ;
using Microsoft.EntityFrameworkCore ;
using Microsoft.Extensions.Logging ;
using TrueCV.Application.DTOs ;
2026-01-20 16:45:43 +01:00
using TrueCV.Application.Helpers ;
2026-01-18 19:20:50 +01:00
using TrueCV.Application.Interfaces ;
using TrueCV.Application.Models ;
using TrueCV.Domain.Entities ;
using TrueCV.Infrastructure.Data ;
using TrueCV.Infrastructure.ExternalApis ;
namespace TrueCV.Infrastructure.Services ;
public sealed class CompanyVerifierService : ICompanyVerifierService
{
private readonly CompaniesHouseClient _companiesHouseClient ;
2026-01-20 16:54:58 +01:00
private readonly IDbContextFactory < ApplicationDbContext > _dbContextFactory ;
2026-01-18 19:20:50 +01:00
private readonly ILogger < CompanyVerifierService > _logger ;
2026-01-20 20:58:12 +01:00
private const int FuzzyMatchThreshold = 85 ;
2026-01-18 19:20:50 +01:00
private const int CacheExpirationDays = 30 ;
2026-01-20 20:00:24 +01:00
// SIC codes for tech/software companies
private static readonly HashSet < string > TechSicCodes = new ( )
{
"62011" , "62012" , "62020" , "62030" , "62090" , // Computer programming and consultancy
"63110" , "63120" , // Data processing, hosting
"58210" , "58290" , // Publishing of computer games, other software
"61100" , "61200" , "61300" , "61900" // Telecommunications
} ;
2026-01-18 19:20:50 +01:00
public CompanyVerifierService (
CompaniesHouseClient companiesHouseClient ,
2026-01-20 16:54:58 +01:00
IDbContextFactory < ApplicationDbContext > dbContextFactory ,
2026-01-18 19:20:50 +01:00
ILogger < CompanyVerifierService > logger )
{
_companiesHouseClient = companiesHouseClient ;
2026-01-20 16:54:58 +01:00
_dbContextFactory = dbContextFactory ;
2026-01-18 19:20:50 +01:00
_logger = logger ;
}
public async Task < CompanyVerificationResult > VerifyCompanyAsync (
string companyName ,
DateOnly ? startDate ,
2026-01-20 20:00:24 +01:00
DateOnly ? endDate ,
string? jobTitle = null )
2026-01-18 19:20:50 +01:00
{
ArgumentException . ThrowIfNullOrWhiteSpace ( companyName ) ;
_logger . LogDebug ( "Verifying company: {CompanyName}" , companyName ) ;
2026-01-20 20:00:24 +01:00
var flags = new List < CompanyVerificationFlag > ( ) ;
2026-01-18 19:20:50 +01:00
// Try to find a cached match first
var cachedMatch = await FindCachedMatchAsync ( companyName ) ;
if ( cachedMatch is not null )
{
_logger . LogDebug ( "Found cached company match for: {CompanyName}" , companyName ) ;
2026-01-20 20:00:24 +01:00
return CreateResultFromCache ( cachedMatch , companyName , startDate , endDate , jobTitle , flags ) ;
2026-01-18 19:20:50 +01:00
}
// Search Companies House
try
{
var searchResponse = await _companiesHouseClient . SearchCompaniesAsync ( companyName ) ;
if ( searchResponse ? . Items is null | | searchResponse . Items . Count = = 0 )
{
_logger . LogDebug ( "No companies found for: {CompanyName}" , companyName ) ;
2026-01-20 20:00:24 +01:00
return CreateUnverifiedResult ( companyName , startDate , endDate , jobTitle , "No matching company found in Companies House" ) ;
2026-01-18 19:20:50 +01:00
}
2026-01-20 21:14:01 +01:00
// Find best fuzzy match, preferring companies that existed at claimed start date
var bestMatch = FindBestMatch ( companyName , searchResponse . Items , startDate ) ;
2026-01-18 19:20:50 +01:00
if ( bestMatch is null )
{
_logger . LogDebug ( "No fuzzy match above threshold for: {CompanyName}" , companyName ) ;
2026-01-20 20:00:24 +01:00
return CreateUnverifiedResult ( companyName , startDate , endDate , jobTitle ,
2026-01-20 21:04:30 +01:00
"Company name could not be verified against official records" ) ;
2026-01-18 19:20:50 +01:00
}
var match = bestMatch . Value ;
2026-01-20 20:00:24 +01:00
// Fetch full company details for additional data
var companyDetails = await _companiesHouseClient . GetCompanyAsync ( match . Item . CompanyNumber ) ;
// Cache the matched company with full details
await CacheCompanyAsync ( match . Item , companyDetails ) ;
2026-01-18 19:20:50 +01:00
_logger . LogInformation (
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%" ,
companyName , match . Item . Title , match . Score ) ;
2026-01-20 20:00:24 +01:00
// Run all verification checks
var incorporationDate = DateHelpers . ParseDate ( match . Item . DateOfCreation ) ;
var dissolutionDate = DateHelpers . ParseDate ( match . Item . DateOfCessation ) ;
var companyStatus = match . Item . CompanyStatus ;
var companyType = match . Item . CompanyType ;
var sicCodes = companyDetails ? . SicCodes ? ? match . Item . SicCodes ;
var accountsCategory = companyDetails ? . Accounts ? . LastAccounts ? . Type ;
// Check 1: Employment before company incorporation
CheckIncorporationDate ( flags , startDate , incorporationDate , match . Item . Title ) ;
// Check 2: Employment at dissolved company
CheckDissolutionDate ( flags , endDate , dissolutionDate , companyStatus , match . Item . Title ) ;
// Check 3: Dormant company check
CheckDormantCompany ( flags , accountsCategory , jobTitle , match . Item . Title ) ;
// Check 4: Company size vs job title
CheckCompanySizeVsRole ( flags , accountsCategory , jobTitle , match . Item . Title ) ;
// Check 5: SIC code vs job title mismatch
CheckSicCodeMismatch ( flags , sicCodes , jobTitle , match . Item . Title ) ;
// Check 6: Job title plausibility for PLCs
var ( jobPlausible , jobNotes ) = CheckJobTitlePlausibility ( jobTitle , companyType ) ;
if ( jobPlausible = = false )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle" ,
Severity = "Critical" ,
Message = jobNotes ? ? "Job title requires verification" ,
ScoreImpact = - 15
} ) ;
}
2026-01-18 19:20:50 +01:00
return new CompanyVerificationResult
{
ClaimedCompany = companyName ,
MatchedCompanyName = match . Item . Title ,
MatchedCompanyNumber = match . Item . CompanyNumber ,
MatchScore = match . Score ,
IsVerified = true ,
2026-01-20 20:58:12 +01:00
VerificationNotes = null ,
2026-01-18 19:20:50 +01:00
ClaimedStartDate = startDate ,
2026-01-20 20:00:24 +01:00
ClaimedEndDate = endDate ,
CompanyType = companyType ,
CompanyStatus = companyStatus ,
IncorporationDate = incorporationDate ,
DissolutionDate = dissolutionDate ,
AccountsCategory = accountsCategory ,
SicCodes = sicCodes ,
ClaimedJobTitle = jobTitle ,
JobTitlePlausible = jobPlausible ,
JobTitleNotes = jobNotes ,
Flags = flags
2026-01-18 19:20:50 +01:00
} ;
}
catch ( CompaniesHouseRateLimitException ex )
{
_logger . LogWarning ( ex , "Rate limit hit while verifying company: {CompanyName}" , companyName ) ;
2026-01-20 20:00:24 +01:00
return CreateUnverifiedResult ( companyName , startDate , endDate , jobTitle ,
2026-01-18 19:20:50 +01:00
"Verification temporarily unavailable due to rate limiting" ) ;
}
}
public async Task < List < CompanySearchResult > > SearchCompaniesAsync ( string query )
{
ArgumentException . ThrowIfNullOrWhiteSpace ( query ) ;
_logger . LogDebug ( "Searching companies for query: {Query}" , query ) ;
var response = await _companiesHouseClient . SearchCompaniesAsync ( query ) ;
if ( response ? . Items is null )
{
return [ ] ;
}
return response . Items . Select ( item = > new CompanySearchResult
{
CompanyNumber = item . CompanyNumber ,
CompanyName = item . Title ,
CompanyStatus = item . CompanyStatus ? ? "Unknown" ,
2026-01-20 16:45:43 +01:00
IncorporationDate = DateHelpers . ParseDate ( item . DateOfCreation ) ,
2026-01-18 19:20:50 +01:00
AddressSnippet = item . AddressSnippet
} ) . ToList ( ) ;
}
2026-01-20 20:00:24 +01:00
public async Task < bool? > VerifyDirectorAsync (
string companyNumber ,
string candidateName ,
DateOnly ? startDate ,
DateOnly ? endDate )
{
if ( string . IsNullOrWhiteSpace ( companyNumber ) | | string . IsNullOrWhiteSpace ( candidateName ) )
{
return null ;
}
try
{
var officers = await _companiesHouseClient . GetOfficersAsync ( companyNumber ) ;
if ( officers ? . Items is null | | officers . Items . Count = = 0 )
{
_logger . LogDebug ( "No officers found for company {CompanyNumber}" , companyNumber ) ;
return null ;
}
// Normalize candidate name for comparison
var normalizedCandidate = NormalizeName ( candidateName ) ;
foreach ( var officer in officers . Items )
{
// Check if officer role is director-like
var role = officer . OfficerRole ? . ToLowerInvariant ( ) ? ? "" ;
if ( ! role . Contains ( "director" ) & & ! role . Contains ( "secretary" ) )
{
continue ;
}
// Fuzzy match the name
var normalizedOfficer = NormalizeName ( officer . Name ) ;
var matchScore = Fuzz . Ratio ( normalizedCandidate , normalizedOfficer ) ;
if ( matchScore > = 80 ) // High threshold for name matching
{
// Check date overlap
var appointedOn = DateHelpers . ParseDate ( officer . AppointedOn ) ;
var resignedOn = DateHelpers . ParseDate ( officer . ResignedOn ) ;
// If no claimed dates, just check if names match
if ( ! startDate . HasValue & & ! endDate . HasValue )
{
_logger . LogDebug (
"Found matching director {OfficerName} for candidate {CandidateName} at company {CompanyNumber}" ,
officer . Name , candidateName , companyNumber ) ;
return true ;
}
// Check if employment period overlaps with directorship
var datesOverlap = DatesOverlap (
startDate , endDate ,
appointedOn , resignedOn ) ;
if ( datesOverlap )
{
_logger . LogDebug (
"Verified director {OfficerName} matches candidate {CandidateName} with overlapping dates" ,
officer . Name , candidateName ) ;
return true ;
}
}
}
_logger . LogDebug (
"No matching director found for candidate {CandidateName} at company {CompanyNumber}" ,
candidateName , companyNumber ) ;
return false ;
}
catch ( CompaniesHouseRateLimitException )
{
_logger . LogWarning ( "Rate limit hit while verifying director for company {CompanyNumber}" , companyNumber ) ;
return null ;
}
catch ( Exception ex )
{
_logger . LogError ( ex , "Error verifying director for company {CompanyNumber}" , companyNumber ) ;
return null ;
}
}
private static string NormalizeName ( string name )
{
if ( string . IsNullOrWhiteSpace ( name ) ) return "" ;
// Companies House often stores names as "SURNAME, Firstname"
// Convert to "Firstname Surname" format for comparison
var normalized = name . ToUpperInvariant ( ) . Trim ( ) ;
if ( normalized . Contains ( ',' ) )
{
var parts = normalized . Split ( ',' , 2 ) ;
if ( parts . Length = = 2 )
{
normalized = $"{parts[1].Trim()} {parts[0].Trim()}" ;
}
}
return normalized ;
}
private static bool DatesOverlap ( DateOnly ? start1 , DateOnly ? end1 , DateOnly ? start2 , DateOnly ? end2 )
{
// If no dates, assume overlap
if ( ! start1 . HasValue & & ! end1 . HasValue ) return true ;
if ( ! start2 . HasValue & & ! end2 . HasValue ) return true ;
// Use default dates for missing values
var s1 = start1 ? ? DateOnly . MinValue ;
var e1 = end1 ? ? DateOnly . MaxValue ;
var s2 = start2 ? ? DateOnly . MinValue ;
var e2 = end2 ? ? DateOnly . MaxValue ;
// Check overlap: periods overlap if one starts before the other ends
return s1 < = e2 & & s2 < = e1 ;
}
#region Verification Checks
private static void CheckIncorporationDate (
List < CompanyVerificationFlag > flags ,
DateOnly ? claimedStartDate ,
DateOnly ? incorporationDate ,
string companyName )
{
if ( claimedStartDate . HasValue & & incorporationDate . HasValue )
{
if ( claimedStartDate . Value < incorporationDate . Value )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "EmploymentBeforeIncorporation" ,
Severity = "Critical" ,
Message = $"Claimed employment at '{companyName}' starting {claimedStartDate:MMM yyyy} is before company incorporation date {incorporationDate:MMM yyyy}" ,
ScoreImpact = - 20
} ) ;
}
}
}
private static void CheckDissolutionDate (
List < CompanyVerificationFlag > flags ,
DateOnly ? claimedEndDate ,
DateOnly ? dissolutionDate ,
string? companyStatus ,
string companyName )
{
var isDissolvedStatus = companyStatus ? . ToLowerInvariant ( ) is "dissolved" or "liquidation" or "administration" ;
if ( dissolutionDate . HasValue & & isDissolvedStatus )
{
// Allow 3 month buffer for wind-down
var bufferDate = dissolutionDate . Value . AddMonths ( 3 ) ;
if ( claimedEndDate . HasValue & & claimedEndDate . Value > bufferDate )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "EmploymentAtDissolvedCompany" ,
Severity = "Critical" ,
Message = $"Claimed employment at '{companyName}' until {claimedEndDate:MMM yyyy} but company was dissolved on {dissolutionDate:MMM yyyy}" ,
ScoreImpact = - 20
} ) ;
}
else if ( ! claimedEndDate . HasValue ) // Current employment at dissolved company
{
flags . Add ( new CompanyVerificationFlag
{
Type = "CurrentEmploymentAtDissolvedCompany" ,
Severity = "Critical" ,
Message = $"Claims current employment at '{companyName}' but company was dissolved on {dissolutionDate:MMM yyyy}" ,
ScoreImpact = - 25
} ) ;
}
}
}
private static void CheckDormantCompany (
List < CompanyVerificationFlag > flags ,
string? accountsCategory ,
string? jobTitle ,
string companyName )
{
if ( string . IsNullOrWhiteSpace ( accountsCategory ) ) return ;
var isDormant = accountsCategory . ToLowerInvariant ( ) . Contains ( "dormant" ) ;
if ( ! isDormant ) return ;
// Directors can maintain dormant companies, but other roles are suspicious
var title = jobTitle ? . ToLowerInvariant ( ) ? ? "" ;
var isDirectorRole = title . Contains ( "director" ) | | title . Contains ( "company secretary" ) ;
if ( ! isDirectorRole )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "EmploymentAtDormantCompany" ,
Severity = "Warning" ,
Message = $"Claimed active employment as '{jobTitle}' at '{companyName}' which files dormant accounts" ,
ScoreImpact = - 10
} ) ;
}
}
private static void CheckCompanySizeVsRole (
List < CompanyVerificationFlag > flags ,
string? accountsCategory ,
string? jobTitle ,
string companyName )
{
if ( string . IsNullOrWhiteSpace ( accountsCategory ) | | string . IsNullOrWhiteSpace ( jobTitle ) ) return ;
var category = accountsCategory . ToLowerInvariant ( ) ;
var title = jobTitle . ToLowerInvariant ( ) ;
// Micro-entity: < 10 employees, < £632k turnover
var isMicroEntity = category . Contains ( "micro" ) ;
// Check for senior management roles at micro companies
var isSeniorRole = title . Contains ( "vp" ) | |
title . Contains ( "vice president" ) | |
title . Contains ( "head of" ) | |
title . Contains ( "chief" ) | |
title . Contains ( "director of" ) | |
title . Contains ( "senior director" ) ;
// At micro companies, having many senior roles is suspicious
if ( isMicroEntity & & isSeniorRole )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "SeniorRoleAtMicroCompany" ,
Severity = "Warning" ,
Message = $"Claimed senior role '{jobTitle}' at '{companyName}' which files micro-entity accounts (typically <10 employees)" ,
ScoreImpact = - 10
} ) ;
}
}
private static void CheckSicCodeMismatch (
List < CompanyVerificationFlag > flags ,
List < string > ? sicCodes ,
string? jobTitle ,
string companyName )
{
if ( sicCodes is null | | sicCodes . Count = = 0 | | string . IsNullOrWhiteSpace ( jobTitle ) ) return ;
var title = jobTitle . ToLowerInvariant ( ) ;
// Check if this is a tech role
var isTechRole = title . Contains ( "software" ) | |
title . Contains ( "developer" ) | |
title . Contains ( "engineer" ) | |
title . Contains ( "programmer" ) | |
title . Contains ( "data scientist" ) | |
title . Contains ( "data analyst" ) | |
title . Contains ( "devops" ) | |
title . Contains ( "cloud" ) | |
title . Contains ( "machine learning" ) | |
title . Contains ( "ai " ) | |
title . Contains ( "frontend" ) | |
title . Contains ( "backend" ) | |
title . Contains ( "full stack" ) | |
title . Contains ( "fullstack" ) ;
if ( isTechRole )
{
// Check if company has any tech SIC codes
var hasTechSic = sicCodes . Any ( s = > TechSicCodes . Contains ( s ) ) ;
if ( ! hasTechSic )
{
// Get the primary SIC code description (simplified - just show code)
var primarySic = sicCodes . FirstOrDefault ( ) ? ? "Unknown" ;
flags . Add ( new CompanyVerificationFlag
{
Type = "SicCodeMismatch" ,
Severity = "Info" ,
Message = $"Tech role '{jobTitle}' at '{companyName}' (SIC: {primarySic}) - company is not registered as a technology business" ,
ScoreImpact = - 5
} ) ;
}
}
}
private static ( bool? IsPlausible , string? Notes ) CheckJobTitlePlausibility ( string? jobTitle , string? companyType )
{
if ( string . IsNullOrWhiteSpace ( jobTitle ) | | string . IsNullOrWhiteSpace ( companyType ) )
{
return ( null , null ) ;
}
var title = jobTitle . Trim ( ) . ToLowerInvariant ( ) ;
var type = companyType . Trim ( ) . ToLowerInvariant ( ) ;
// Check if this is a PLC (Public Limited Company) - these are large companies
var isPlc = type . Contains ( "plc" ) | | type . Contains ( "public limited" ) ;
// Check for C-suite / very senior roles
var isCsuiteRole = title . Contains ( "ceo" ) | |
title . Contains ( "chief executive" ) | |
title . Contains ( "cto" ) | |
title . Contains ( "chief technology" ) | |
title . Contains ( "cfo" ) | |
title . Contains ( "chief financial" ) | |
title . Contains ( "coo" ) | |
title . Contains ( "chief operating" ) | |
title . Contains ( "cio" ) | |
title . Contains ( "chief information" ) | |
title . Contains ( "managing director" ) | |
title = = "md" | |
title . Contains ( "chairman" ) | |
title . Contains ( "chairwoman" ) | |
title . Contains ( "chairperson" ) | |
title . Contains ( "president" ) ;
// Check for board-level roles
var isBoardRole = title . Contains ( "board member" ) | |
title . Contains ( "non-executive director" ) | |
title . Contains ( "executive director" ) | |
( title = = "director" & & ! title . Contains ( "of" ) ) ;
if ( isPlc & & ( isCsuiteRole | | isBoardRole ) )
{
return ( false , $"Claimed senior role '{jobTitle}' at a PLC requires verification - C-suite positions at public companies are publicly disclosed" ) ;
}
// Check for VP/SVP at PLCs (also usually disclosed)
var isVpRole = title . Contains ( "vice president" ) | |
title . Contains ( "vp " ) | |
title . StartsWith ( "vp" ) | |
title . Contains ( "svp" ) | |
title . Contains ( "senior vice president" ) | |
title . Contains ( "evp" ) | |
title . Contains ( "executive vice president" ) ;
if ( isPlc & & isVpRole )
{
return ( false , $"Claimed VP-level role '{jobTitle}' at a PLC - senior positions at public companies should be verifiable" ) ;
}
return ( true , null ) ;
}
#endregion
#region Helper Methods
2026-01-18 19:20:50 +01:00
private async Task < CompanyCache ? > FindCachedMatchAsync ( string companyName )
{
var cutoffDate = DateTime . UtcNow . AddDays ( - CacheExpirationDays ) ;
2026-01-20 16:54:58 +01:00
await using var dbContext = await _dbContextFactory . CreateDbContextAsync ( ) ;
var cachedCompanies = await dbContext . CompanyCache
2026-01-18 19:20:50 +01:00
. Where ( c = > c . CachedAt > = cutoffDate )
. ToListAsync ( ) ;
if ( cachedCompanies . Count = = 0 )
{
return null ;
}
var matches = cachedCompanies
2026-01-20 20:58:12 +01:00
. Where ( c = > ! string . IsNullOrWhiteSpace ( c . CompanyName ) )
. Select ( c = > new { Company = c , Score = Fuzz . TokenSetRatio ( companyName . ToUpperInvariant ( ) , c . CompanyName . ToUpperInvariant ( ) ) } )
2026-01-18 19:20:50 +01:00
. Where ( m = > m . Score > = FuzzyMatchThreshold )
. OrderByDescending ( m = > m . Score )
. FirstOrDefault ( ) ;
return matches ? . Company ;
}
private static ( CompaniesHouseSearchItem Item , int Score ) ? FindBestMatch (
string companyName ,
2026-01-20 21:14:01 +01:00
List < CompaniesHouseSearchItem > items ,
DateOnly ? claimedStartDate )
2026-01-18 19:20:50 +01:00
{
var normalizedSearch = companyName . ToUpperInvariant ( ) ;
var matches = items
2026-01-20 20:58:12 +01:00
. Where ( item = > ! string . IsNullOrWhiteSpace ( item . Title ) )
. Select ( item = > ( Item : item , Score : Fuzz . TokenSetRatio ( normalizedSearch , item . Title . ToUpperInvariant ( ) ) ) )
2026-01-18 19:20:50 +01:00
. Where ( m = > m . Score > = FuzzyMatchThreshold )
. ToList ( ) ;
2026-01-20 21:14:01 +01:00
if ( matches . Count = = 0 ) return null ;
// If we have a claimed start date, prefer companies that existed at that time
if ( claimedStartDate . HasValue )
{
var existedAtStartDate = matches
. Where ( m = >
{
var incDate = DateHelpers . ParseDate ( m . Item . DateOfCreation ) ;
// Company existed if it was incorporated before the claimed start date
return incDate = = null | | incDate < = claimedStartDate . Value ;
} )
. OrderByDescending ( m = > m . Score )
. ToList ( ) ;
// If any matches existed at the start date, prefer those
if ( existedAtStartDate . Count > 0 )
{
return existedAtStartDate [ 0 ] ;
}
}
// Fall back to highest score if no temporal match
return matches . OrderByDescending ( m = > m . Score ) . First ( ) ;
2026-01-18 19:20:50 +01:00
}
2026-01-20 20:00:24 +01:00
private async Task CacheCompanyAsync ( CompaniesHouseSearchItem item , CompaniesHouseCompany ? details )
2026-01-18 19:20:50 +01:00
{
2026-01-20 20:58:12 +01:00
try
{
await using var dbContext = await _dbContextFactory . CreateDbContextAsync ( ) ;
2026-01-20 16:54:58 +01:00
2026-01-20 20:58:12 +01:00
var existingCache = await dbContext . CompanyCache
. FirstOrDefaultAsync ( c = > c . CompanyNumber = = item . CompanyNumber ) ;
2026-01-18 19:20:50 +01:00
2026-01-20 20:58:12 +01:00
var sicCodes = details ? . SicCodes ? ? item . SicCodes ;
var sicCodesJson = sicCodes ! = null ? JsonSerializer . Serialize ( sicCodes ) : null ;
var accountsCategory = details ? . Accounts ? . LastAccounts ? . Type ;
2026-01-20 20:00:24 +01:00
2026-01-20 20:58:12 +01:00
if ( existingCache is not null )
2026-01-18 19:20:50 +01:00
{
2026-01-20 20:58:12 +01:00
existingCache . CompanyName = item . Title ;
existingCache . Status = item . CompanyStatus ? ? "Unknown" ;
existingCache . CompanyType = item . CompanyType ;
existingCache . IncorporationDate = DateHelpers . ParseDate ( item . DateOfCreation ) ;
existingCache . DissolutionDate = DateHelpers . ParseDate ( item . DateOfCessation ) ;
existingCache . AccountsCategory = accountsCategory ;
existingCache . SicCodesJson = sicCodesJson ;
existingCache . CachedAt = DateTime . UtcNow ;
}
else
{
var cacheEntry = new CompanyCache
{
CompanyNumber = item . CompanyNumber ,
CompanyName = item . Title ,
Status = item . CompanyStatus ? ? "Unknown" ,
CompanyType = item . CompanyType ,
IncorporationDate = DateHelpers . ParseDate ( item . DateOfCreation ) ,
DissolutionDate = DateHelpers . ParseDate ( item . DateOfCessation ) ,
AccountsCategory = accountsCategory ,
SicCodesJson = sicCodesJson ,
CachedAt = DateTime . UtcNow
} ;
dbContext . CompanyCache . Add ( cacheEntry ) ;
}
2026-01-18 19:20:50 +01:00
2026-01-20 20:58:12 +01:00
await dbContext . SaveChangesAsync ( ) ;
}
catch ( DbUpdateException ex ) when ( ex . InnerException ? . Message . Contains ( "PK_CompanyCache" ) = = true )
{
// Race condition: another task already cached this company - ignore
_logger . LogDebug ( "Company {CompanyNumber} already cached by another task" , item . CompanyNumber ) ;
2026-01-18 19:20:50 +01:00
}
}
2026-01-20 20:00:24 +01:00
private CompanyVerificationResult CreateResultFromCache (
2026-01-18 19:20:50 +01:00
CompanyCache cached ,
2026-01-20 20:00:24 +01:00
string claimedCompany ,
2026-01-18 19:20:50 +01:00
DateOnly ? startDate ,
2026-01-20 20:00:24 +01:00
DateOnly ? endDate ,
string? jobTitle ,
List < CompanyVerificationFlag > flags )
2026-01-18 19:20:50 +01:00
{
2026-01-20 20:58:12 +01:00
var matchScore = Fuzz . TokenSetRatio (
2026-01-18 19:20:50 +01:00
claimedCompany . ToUpperInvariant ( ) ,
cached . CompanyName . ToUpperInvariant ( ) ) ;
2026-01-20 20:58:12 +01:00
List < string > ? sicCodes = null ;
if ( ! string . IsNullOrEmpty ( cached . SicCodesJson ) )
{
try
{
sicCodes = JsonSerializer . Deserialize < List < string > > ( cached . SicCodesJson ) ;
}
catch ( JsonException )
{
// Ignore malformed JSON in cache
}
}
2026-01-20 20:00:24 +01:00
// Run all verification checks
CheckIncorporationDate ( flags , startDate , cached . IncorporationDate , cached . CompanyName ) ;
CheckDissolutionDate ( flags , endDate , cached . DissolutionDate , cached . Status , cached . CompanyName ) ;
CheckDormantCompany ( flags , cached . AccountsCategory , jobTitle , cached . CompanyName ) ;
CheckCompanySizeVsRole ( flags , cached . AccountsCategory , jobTitle , cached . CompanyName ) ;
CheckSicCodeMismatch ( flags , sicCodes , jobTitle , cached . CompanyName ) ;
var ( jobPlausible , jobNotes ) = CheckJobTitlePlausibility ( jobTitle , cached . CompanyType ) ;
if ( jobPlausible = = false )
{
flags . Add ( new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle" ,
Severity = "Critical" ,
Message = jobNotes ? ? "Job title requires verification" ,
ScoreImpact = - 15
} ) ;
}
2026-01-18 19:20:50 +01:00
return new CompanyVerificationResult
{
ClaimedCompany = claimedCompany ,
MatchedCompanyName = cached . CompanyName ,
MatchedCompanyNumber = cached . CompanyNumber ,
MatchScore = matchScore ,
IsVerified = true ,
2026-01-20 20:58:12 +01:00
VerificationNotes = null ,
2026-01-18 19:20:50 +01:00
ClaimedStartDate = startDate ,
2026-01-20 20:00:24 +01:00
ClaimedEndDate = endDate ,
CompanyType = cached . CompanyType ,
CompanyStatus = cached . Status ,
IncorporationDate = cached . IncorporationDate ,
DissolutionDate = cached . DissolutionDate ,
AccountsCategory = cached . AccountsCategory ,
SicCodes = sicCodes ,
ClaimedJobTitle = jobTitle ,
JobTitlePlausible = jobPlausible ,
JobTitleNotes = jobNotes ,
Flags = flags
2026-01-18 19:20:50 +01:00
} ;
}
private static CompanyVerificationResult CreateUnverifiedResult (
string companyName ,
DateOnly ? startDate ,
DateOnly ? endDate ,
2026-01-20 20:00:24 +01:00
string? jobTitle ,
2026-01-18 19:20:50 +01:00
string reason )
{
return new CompanyVerificationResult
{
ClaimedCompany = companyName ,
MatchedCompanyName = null ,
MatchedCompanyNumber = null ,
MatchScore = 0 ,
IsVerified = false ,
VerificationNotes = reason ,
ClaimedStartDate = startDate ,
2026-01-20 20:00:24 +01:00
ClaimedEndDate = endDate ,
ClaimedJobTitle = jobTitle
2026-01-18 19:20:50 +01:00
} ;
}
2026-01-20 20:00:24 +01:00
#endregion
2026-01-18 19:20:50 +01:00
}