Files
RealCV/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs

699 lines
27 KiB
C#
Raw Normal View History

using System.Text.Json;
using FuzzySharp;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.DTOs;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
using TrueCV.Infrastructure.Data;
using TrueCV.Infrastructure.ExternalApis;
namespace TrueCV.Infrastructure.Services;
public sealed class CompanyVerifierService : ICompanyVerifierService
{
private readonly CompaniesHouseClient _companiesHouseClient;
private readonly IDbContextFactory<ApplicationDbContext> _dbContextFactory;
private readonly ILogger<CompanyVerifierService> _logger;
private const int FuzzyMatchThreshold = 70;
private const int CacheExpirationDays = 30;
// SIC codes for tech/software companies
private static readonly HashSet<string> TechSicCodes = new()
{
"62011", "62012", "62020", "62030", "62090", // Computer programming and consultancy
"63110", "63120", // Data processing, hosting
"58210", "58290", // Publishing of computer games, other software
"61100", "61200", "61300", "61900" // Telecommunications
};
public CompanyVerifierService(
CompaniesHouseClient companiesHouseClient,
IDbContextFactory<ApplicationDbContext> dbContextFactory,
ILogger<CompanyVerifierService> logger)
{
_companiesHouseClient = companiesHouseClient;
_dbContextFactory = dbContextFactory;
_logger = logger;
}
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
string companyName,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle = null)
{
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
var flags = new List<CompanyVerificationFlag>();
// Try to find a cached match first
var cachedMatch = await FindCachedMatchAsync(companyName);
if (cachedMatch is not null)
{
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
return CreateResultFromCache(cachedMatch, companyName, startDate, endDate, jobTitle, flags);
}
// Search Companies House
try
{
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
{
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle, "No matching company found in Companies House");
}
// Find best fuzzy match
var bestMatch = FindBestMatch(companyName, searchResponse.Items);
if (bestMatch is null)
{
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
$"No company name matched above {FuzzyMatchThreshold}% threshold");
}
var match = bestMatch.Value;
// Fetch full company details for additional data
var companyDetails = await _companiesHouseClient.GetCompanyAsync(match.Item.CompanyNumber);
// Cache the matched company with full details
await CacheCompanyAsync(match.Item, companyDetails);
_logger.LogInformation(
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
companyName, match.Item.Title, match.Score);
// Run all verification checks
var incorporationDate = DateHelpers.ParseDate(match.Item.DateOfCreation);
var dissolutionDate = DateHelpers.ParseDate(match.Item.DateOfCessation);
var companyStatus = match.Item.CompanyStatus;
var companyType = match.Item.CompanyType;
var sicCodes = companyDetails?.SicCodes ?? match.Item.SicCodes;
var accountsCategory = companyDetails?.Accounts?.LastAccounts?.Type;
// Check 1: Employment before company incorporation
CheckIncorporationDate(flags, startDate, incorporationDate, match.Item.Title);
// Check 2: Employment at dissolved company
CheckDissolutionDate(flags, endDate, dissolutionDate, companyStatus, match.Item.Title);
// Check 3: Dormant company check
CheckDormantCompany(flags, accountsCategory, jobTitle, match.Item.Title);
// Check 4: Company size vs job title
CheckCompanySizeVsRole(flags, accountsCategory, jobTitle, match.Item.Title);
// Check 5: SIC code vs job title mismatch
CheckSicCodeMismatch(flags, sicCodes, jobTitle, match.Item.Title);
// Check 6: Job title plausibility for PLCs
var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, companyType);
if (jobPlausible == false)
{
flags.Add(new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle",
Severity = "Critical",
Message = jobNotes ?? "Job title requires verification",
ScoreImpact = -15
});
}
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = match.Item.Title,
MatchedCompanyNumber = match.Item.CompanyNumber,
MatchScore = match.Score,
IsVerified = true,
VerificationNotes = $"Matched with {match.Score}% confidence",
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
CompanyType = companyType,
CompanyStatus = companyStatus,
IncorporationDate = incorporationDate,
DissolutionDate = dissolutionDate,
AccountsCategory = accountsCategory,
SicCodes = sicCodes,
ClaimedJobTitle = jobTitle,
JobTitlePlausible = jobPlausible,
JobTitleNotes = jobNotes,
Flags = flags
};
}
catch (CompaniesHouseRateLimitException ex)
{
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
"Verification temporarily unavailable due to rate limiting");
}
}
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
{
ArgumentException.ThrowIfNullOrWhiteSpace(query);
_logger.LogDebug("Searching companies for query: {Query}", query);
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
if (response?.Items is null)
{
return [];
}
return response.Items.Select(item => new CompanySearchResult
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
CompanyStatus = item.CompanyStatus ?? "Unknown",
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
AddressSnippet = item.AddressSnippet
}).ToList();
}
public async Task<bool?> VerifyDirectorAsync(
string companyNumber,
string candidateName,
DateOnly? startDate,
DateOnly? endDate)
{
if (string.IsNullOrWhiteSpace(companyNumber) || string.IsNullOrWhiteSpace(candidateName))
{
return null;
}
try
{
var officers = await _companiesHouseClient.GetOfficersAsync(companyNumber);
if (officers?.Items is null || officers.Items.Count == 0)
{
_logger.LogDebug("No officers found for company {CompanyNumber}", companyNumber);
return null;
}
// Normalize candidate name for comparison
var normalizedCandidate = NormalizeName(candidateName);
foreach (var officer in officers.Items)
{
// Check if officer role is director-like
var role = officer.OfficerRole?.ToLowerInvariant() ?? "";
if (!role.Contains("director") && !role.Contains("secretary"))
{
continue;
}
// Fuzzy match the name
var normalizedOfficer = NormalizeName(officer.Name);
var matchScore = Fuzz.Ratio(normalizedCandidate, normalizedOfficer);
if (matchScore >= 80) // High threshold for name matching
{
// Check date overlap
var appointedOn = DateHelpers.ParseDate(officer.AppointedOn);
var resignedOn = DateHelpers.ParseDate(officer.ResignedOn);
// If no claimed dates, just check if names match
if (!startDate.HasValue && !endDate.HasValue)
{
_logger.LogDebug(
"Found matching director {OfficerName} for candidate {CandidateName} at company {CompanyNumber}",
officer.Name, candidateName, companyNumber);
return true;
}
// Check if employment period overlaps with directorship
var datesOverlap = DatesOverlap(
startDate, endDate,
appointedOn, resignedOn);
if (datesOverlap)
{
_logger.LogDebug(
"Verified director {OfficerName} matches candidate {CandidateName} with overlapping dates",
officer.Name, candidateName);
return true;
}
}
}
_logger.LogDebug(
"No matching director found for candidate {CandidateName} at company {CompanyNumber}",
candidateName, companyNumber);
return false;
}
catch (CompaniesHouseRateLimitException)
{
_logger.LogWarning("Rate limit hit while verifying director for company {CompanyNumber}", companyNumber);
return null;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error verifying director for company {CompanyNumber}", companyNumber);
return null;
}
}
private static string NormalizeName(string name)
{
if (string.IsNullOrWhiteSpace(name)) return "";
// Companies House often stores names as "SURNAME, Firstname"
// Convert to "Firstname Surname" format for comparison
var normalized = name.ToUpperInvariant().Trim();
if (normalized.Contains(','))
{
var parts = normalized.Split(',', 2);
if (parts.Length == 2)
{
normalized = $"{parts[1].Trim()} {parts[0].Trim()}";
}
}
return normalized;
}
private static bool DatesOverlap(DateOnly? start1, DateOnly? end1, DateOnly? start2, DateOnly? end2)
{
// If no dates, assume overlap
if (!start1.HasValue && !end1.HasValue) return true;
if (!start2.HasValue && !end2.HasValue) return true;
// Use default dates for missing values
var s1 = start1 ?? DateOnly.MinValue;
var e1 = end1 ?? DateOnly.MaxValue;
var s2 = start2 ?? DateOnly.MinValue;
var e2 = end2 ?? DateOnly.MaxValue;
// Check overlap: periods overlap if one starts before the other ends
return s1 <= e2 && s2 <= e1;
}
#region Verification Checks
private static void CheckIncorporationDate(
List<CompanyVerificationFlag> flags,
DateOnly? claimedStartDate,
DateOnly? incorporationDate,
string companyName)
{
if (claimedStartDate.HasValue && incorporationDate.HasValue)
{
if (claimedStartDate.Value < incorporationDate.Value)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentBeforeIncorporation",
Severity = "Critical",
Message = $"Claimed employment at '{companyName}' starting {claimedStartDate:MMM yyyy} is before company incorporation date {incorporationDate:MMM yyyy}",
ScoreImpact = -20
});
}
}
}
private static void CheckDissolutionDate(
List<CompanyVerificationFlag> flags,
DateOnly? claimedEndDate,
DateOnly? dissolutionDate,
string? companyStatus,
string companyName)
{
var isDissolvedStatus = companyStatus?.ToLowerInvariant() is "dissolved" or "liquidation" or "administration";
if (dissolutionDate.HasValue && isDissolvedStatus)
{
// Allow 3 month buffer for wind-down
var bufferDate = dissolutionDate.Value.AddMonths(3);
if (claimedEndDate.HasValue && claimedEndDate.Value > bufferDate)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentAtDissolvedCompany",
Severity = "Critical",
Message = $"Claimed employment at '{companyName}' until {claimedEndDate:MMM yyyy} but company was dissolved on {dissolutionDate:MMM yyyy}",
ScoreImpact = -20
});
}
else if (!claimedEndDate.HasValue) // Current employment at dissolved company
{
flags.Add(new CompanyVerificationFlag
{
Type = "CurrentEmploymentAtDissolvedCompany",
Severity = "Critical",
Message = $"Claims current employment at '{companyName}' but company was dissolved on {dissolutionDate:MMM yyyy}",
ScoreImpact = -25
});
}
}
}
private static void CheckDormantCompany(
List<CompanyVerificationFlag> flags,
string? accountsCategory,
string? jobTitle,
string companyName)
{
if (string.IsNullOrWhiteSpace(accountsCategory)) return;
var isDormant = accountsCategory.ToLowerInvariant().Contains("dormant");
if (!isDormant) return;
// Directors can maintain dormant companies, but other roles are suspicious
var title = jobTitle?.ToLowerInvariant() ?? "";
var isDirectorRole = title.Contains("director") || title.Contains("company secretary");
if (!isDirectorRole)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentAtDormantCompany",
Severity = "Warning",
Message = $"Claimed active employment as '{jobTitle}' at '{companyName}' which files dormant accounts",
ScoreImpact = -10
});
}
}
private static void CheckCompanySizeVsRole(
List<CompanyVerificationFlag> flags,
string? accountsCategory,
string? jobTitle,
string companyName)
{
if (string.IsNullOrWhiteSpace(accountsCategory) || string.IsNullOrWhiteSpace(jobTitle)) return;
var category = accountsCategory.ToLowerInvariant();
var title = jobTitle.ToLowerInvariant();
// Micro-entity: < 10 employees, < £632k turnover
var isMicroEntity = category.Contains("micro");
// Check for senior management roles at micro companies
var isSeniorRole = title.Contains("vp") ||
title.Contains("vice president") ||
title.Contains("head of") ||
title.Contains("chief") ||
title.Contains("director of") ||
title.Contains("senior director");
// At micro companies, having many senior roles is suspicious
if (isMicroEntity && isSeniorRole)
{
flags.Add(new CompanyVerificationFlag
{
Type = "SeniorRoleAtMicroCompany",
Severity = "Warning",
Message = $"Claimed senior role '{jobTitle}' at '{companyName}' which files micro-entity accounts (typically <10 employees)",
ScoreImpact = -10
});
}
}
private static void CheckSicCodeMismatch(
List<CompanyVerificationFlag> flags,
List<string>? sicCodes,
string? jobTitle,
string companyName)
{
if (sicCodes is null || sicCodes.Count == 0 || string.IsNullOrWhiteSpace(jobTitle)) return;
var title = jobTitle.ToLowerInvariant();
// Check if this is a tech role
var isTechRole = title.Contains("software") ||
title.Contains("developer") ||
title.Contains("engineer") ||
title.Contains("programmer") ||
title.Contains("data scientist") ||
title.Contains("data analyst") ||
title.Contains("devops") ||
title.Contains("cloud") ||
title.Contains("machine learning") ||
title.Contains("ai ") ||
title.Contains("frontend") ||
title.Contains("backend") ||
title.Contains("full stack") ||
title.Contains("fullstack");
if (isTechRole)
{
// Check if company has any tech SIC codes
var hasTechSic = sicCodes.Any(s => TechSicCodes.Contains(s));
if (!hasTechSic)
{
// Get the primary SIC code description (simplified - just show code)
var primarySic = sicCodes.FirstOrDefault() ?? "Unknown";
flags.Add(new CompanyVerificationFlag
{
Type = "SicCodeMismatch",
Severity = "Info",
Message = $"Tech role '{jobTitle}' at '{companyName}' (SIC: {primarySic}) - company is not registered as a technology business",
ScoreImpact = -5
});
}
}
}
private static (bool? IsPlausible, string? Notes) CheckJobTitlePlausibility(string? jobTitle, string? companyType)
{
if (string.IsNullOrWhiteSpace(jobTitle) || string.IsNullOrWhiteSpace(companyType))
{
return (null, null);
}
var title = jobTitle.Trim().ToLowerInvariant();
var type = companyType.Trim().ToLowerInvariant();
// Check if this is a PLC (Public Limited Company) - these are large companies
var isPlc = type.Contains("plc") || type.Contains("public limited");
// Check for C-suite / very senior roles
var isCsuiteRole = title.Contains("ceo") ||
title.Contains("chief executive") ||
title.Contains("cto") ||
title.Contains("chief technology") ||
title.Contains("cfo") ||
title.Contains("chief financial") ||
title.Contains("coo") ||
title.Contains("chief operating") ||
title.Contains("cio") ||
title.Contains("chief information") ||
title.Contains("managing director") ||
title == "md" ||
title.Contains("chairman") ||
title.Contains("chairwoman") ||
title.Contains("chairperson") ||
title.Contains("president");
// Check for board-level roles
var isBoardRole = title.Contains("board member") ||
title.Contains("non-executive director") ||
title.Contains("executive director") ||
(title == "director" && !title.Contains("of"));
if (isPlc && (isCsuiteRole || isBoardRole))
{
return (false, $"Claimed senior role '{jobTitle}' at a PLC requires verification - C-suite positions at public companies are publicly disclosed");
}
// Check for VP/SVP at PLCs (also usually disclosed)
var isVpRole = title.Contains("vice president") ||
title.Contains("vp ") ||
title.StartsWith("vp") ||
title.Contains("svp") ||
title.Contains("senior vice president") ||
title.Contains("evp") ||
title.Contains("executive vice president");
if (isPlc && isVpRole)
{
return (false, $"Claimed VP-level role '{jobTitle}' at a PLC - senior positions at public companies should be verifiable");
}
return (true, null);
}
#endregion
#region Helper Methods
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
{
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
await using var dbContext = await _dbContextFactory.CreateDbContextAsync();
var cachedCompanies = await dbContext.CompanyCache
.Where(c => c.CachedAt >= cutoffDate)
.ToListAsync();
if (cachedCompanies.Count == 0)
{
return null;
}
var matches = cachedCompanies
.Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
.Where(m => m.Score >= FuzzyMatchThreshold)
.OrderByDescending(m => m.Score)
.FirstOrDefault();
return matches?.Company;
}
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
string companyName,
List<CompaniesHouseSearchItem> items)
{
var normalizedSearch = companyName.ToUpperInvariant();
var matches = items
.Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant())))
.Where(m => m.Score >= FuzzyMatchThreshold)
.OrderByDescending(m => m.Score)
.ToList();
return matches.Count > 0 ? matches[0] : null;
}
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item, CompaniesHouseCompany? details)
{
await using var dbContext = await _dbContextFactory.CreateDbContextAsync();
var existingCache = await dbContext.CompanyCache
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
var sicCodes = details?.SicCodes ?? item.SicCodes;
var sicCodesJson = sicCodes != null ? JsonSerializer.Serialize(sicCodes) : null;
var accountsCategory = details?.Accounts?.LastAccounts?.Type;
if (existingCache is not null)
{
existingCache.CompanyName = item.Title;
existingCache.Status = item.CompanyStatus ?? "Unknown";
existingCache.CompanyType = item.CompanyType;
existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation);
existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation);
existingCache.AccountsCategory = accountsCategory;
existingCache.SicCodesJson = sicCodesJson;
existingCache.CachedAt = DateTime.UtcNow;
}
else
{
var cacheEntry = new CompanyCache
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
Status = item.CompanyStatus ?? "Unknown",
CompanyType = item.CompanyType,
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation),
AccountsCategory = accountsCategory,
SicCodesJson = sicCodesJson,
CachedAt = DateTime.UtcNow
};
dbContext.CompanyCache.Add(cacheEntry);
}
await dbContext.SaveChangesAsync();
}
private CompanyVerificationResult CreateResultFromCache(
CompanyCache cached,
string claimedCompany,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle,
List<CompanyVerificationFlag> flags)
{
var matchScore = Fuzz.Ratio(
claimedCompany.ToUpperInvariant(),
cached.CompanyName.ToUpperInvariant());
var sicCodes = !string.IsNullOrEmpty(cached.SicCodesJson)
? JsonSerializer.Deserialize<List<string>>(cached.SicCodesJson)
: null;
// Run all verification checks
CheckIncorporationDate(flags, startDate, cached.IncorporationDate, cached.CompanyName);
CheckDissolutionDate(flags, endDate, cached.DissolutionDate, cached.Status, cached.CompanyName);
CheckDormantCompany(flags, cached.AccountsCategory, jobTitle, cached.CompanyName);
CheckCompanySizeVsRole(flags, cached.AccountsCategory, jobTitle, cached.CompanyName);
CheckSicCodeMismatch(flags, sicCodes, jobTitle, cached.CompanyName);
var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, cached.CompanyType);
if (jobPlausible == false)
{
flags.Add(new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle",
Severity = "Critical",
Message = jobNotes ?? "Job title requires verification",
ScoreImpact = -15
});
}
return new CompanyVerificationResult
{
ClaimedCompany = claimedCompany,
MatchedCompanyName = cached.CompanyName,
MatchedCompanyNumber = cached.CompanyNumber,
MatchScore = matchScore,
IsVerified = true,
VerificationNotes = $"Matched from cache with {matchScore}% confidence",
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
CompanyType = cached.CompanyType,
CompanyStatus = cached.Status,
IncorporationDate = cached.IncorporationDate,
DissolutionDate = cached.DissolutionDate,
AccountsCategory = cached.AccountsCategory,
SicCodes = sicCodes,
ClaimedJobTitle = jobTitle,
JobTitlePlausible = jobPlausible,
JobTitleNotes = jobNotes,
Flags = flags
};
}
private static CompanyVerificationResult CreateUnverifiedResult(
string companyName,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle,
string reason)
{
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = null,
MatchedCompanyNumber = null,
MatchScore = 0,
IsVerified = false,
VerificationNotes = reason,
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
ClaimedJobTitle = jobTitle
};
}
#endregion
}