Files
RealCV/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs

949 lines
38 KiB
C#
Raw Normal View History

using System.Text.Json;
using FuzzySharp;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.DTOs;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
using TrueCV.Infrastructure.Data;
using TrueCV.Infrastructure.ExternalApis;
namespace TrueCV.Infrastructure.Services;
public sealed class CompanyVerifierService : ICompanyVerifierService
{
private readonly CompaniesHouseClient _companiesHouseClient;
private readonly IDbContextFactory<ApplicationDbContext> _dbContextFactory;
private readonly ILogger<CompanyVerifierService> _logger;
private const int FuzzyMatchThreshold = 85;
private const int CacheExpirationDays = 30;
// SIC codes for tech/software companies
private static readonly HashSet<string> TechSicCodes = new()
{
"62011", "62012", "62020", "62030", "62090", // Computer programming and consultancy
"63110", "63120", // Data processing, hosting
"58210", "58290", // Publishing of computer games, other software
"61100", "61200", "61300", "61900" // Telecommunications
};
public CompanyVerifierService(
CompaniesHouseClient companiesHouseClient,
IDbContextFactory<ApplicationDbContext> dbContextFactory,
ILogger<CompanyVerifierService> logger)
{
_companiesHouseClient = companiesHouseClient;
_dbContextFactory = dbContextFactory;
_logger = logger;
}
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
string companyName,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle = null)
{
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
var flags = new List<CompanyVerificationFlag>();
// Try to find a cached match first (but only if it existed at claimed start date)
var cachedMatch = await FindCachedMatchAsync(companyName);
if (cachedMatch is not null)
{
// Check if cached company existed at the claimed start date
var cacheValid = !startDate.HasValue ||
cachedMatch.IncorporationDate == null ||
cachedMatch.IncorporationDate <= startDate.Value;
if (cacheValid)
{
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
return CreateResultFromCache(cachedMatch, companyName, startDate, endDate, jobTitle, flags);
}
else
{
_logger.LogDebug("Cached company {CachedName} was incorporated after claimed start date, searching for alternatives", cachedMatch.CompanyName);
}
}
// Search Companies House with fallback queries
try
{
var searchQueries = GenerateSearchQueries(companyName);
_logger.LogDebug("Generated {Count} search queries for '{CompanyName}': {Queries}",
searchQueries.Count, companyName, string.Join(", ", searchQueries.Select(q => $"'{q}'")));
(CompaniesHouseSearchItem Item, int Score)? bestMatch = null;
foreach (var query in searchQueries)
{
_logger.LogDebug("Searching Companies House with query: {Query}", query);
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(query);
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
{
continue;
}
// Find best fuzzy match, preferring companies that existed at claimed start date
// Pass both original name and search query for matching flexibility
bestMatch = FindBestMatch(companyName, query, searchResponse.Items, startDate);
if (bestMatch is not null)
{
_logger.LogDebug("Found match with query '{Query}': {Company}", query, bestMatch.Value.Item.Title);
break;
}
}
if (bestMatch is null)
{
_logger.LogDebug("No valid match found for: {CompanyName} after trying {Count} queries", companyName, searchQueries.Count);
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
"Company name could not be verified against official records");
}
var match = bestMatch.Value;
// Fetch full company details for additional data
var companyDetails = await _companiesHouseClient.GetCompanyAsync(match.Item.CompanyNumber);
// Cache the matched company with full details
await CacheCompanyAsync(match.Item, companyDetails);
_logger.LogInformation(
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
companyName, match.Item.Title, match.Score);
// Run all verification checks
var incorporationDate = DateHelpers.ParseDate(match.Item.DateOfCreation);
var dissolutionDate = DateHelpers.ParseDate(match.Item.DateOfCessation);
var companyStatus = match.Item.CompanyStatus;
var companyType = match.Item.CompanyType;
var sicCodes = companyDetails?.SicCodes ?? match.Item.SicCodes;
var accountsCategory = companyDetails?.Accounts?.LastAccounts?.Type;
// Check 1: Employment before company incorporation
CheckIncorporationDate(flags, startDate, incorporationDate, match.Item.Title);
// Check 2: Employment at dissolved company
CheckDissolutionDate(flags, endDate, dissolutionDate, companyStatus, match.Item.Title);
// Check 3: Dormant company check
CheckDormantCompany(flags, accountsCategory, jobTitle, match.Item.Title);
// Check 4: Company size vs job title
CheckCompanySizeVsRole(flags, accountsCategory, jobTitle, match.Item.Title);
// Check 5: SIC code vs job title mismatch
CheckSicCodeMismatch(flags, sicCodes, jobTitle, match.Item.Title);
// Check 6: Job title plausibility for PLCs
var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, companyType);
if (jobPlausible == false)
{
flags.Add(new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle",
Severity = "Critical",
Message = jobNotes ?? "Job title requires verification",
ScoreImpact = -15
});
}
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = match.Item.Title,
MatchedCompanyNumber = match.Item.CompanyNumber,
MatchScore = match.Score,
IsVerified = true,
VerificationNotes = null,
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
CompanyType = companyType,
CompanyStatus = companyStatus,
IncorporationDate = incorporationDate,
DissolutionDate = dissolutionDate,
AccountsCategory = accountsCategory,
SicCodes = sicCodes,
ClaimedJobTitle = jobTitle,
JobTitlePlausible = jobPlausible,
JobTitleNotes = jobNotes,
Flags = flags
};
}
catch (CompaniesHouseRateLimitException ex)
{
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
"Verification temporarily unavailable due to rate limiting");
}
}
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
{
ArgumentException.ThrowIfNullOrWhiteSpace(query);
_logger.LogDebug("Searching companies for query: {Query}", query);
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
if (response?.Items is null)
{
return [];
}
return response.Items.Select(item => new CompanySearchResult
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
CompanyStatus = item.CompanyStatus ?? "Unknown",
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
AddressSnippet = item.AddressSnippet
}).ToList();
}
public async Task<bool?> VerifyDirectorAsync(
string companyNumber,
string candidateName,
DateOnly? startDate,
DateOnly? endDate)
{
if (string.IsNullOrWhiteSpace(companyNumber) || string.IsNullOrWhiteSpace(candidateName))
{
return null;
}
try
{
var officers = await _companiesHouseClient.GetOfficersAsync(companyNumber);
if (officers?.Items is null || officers.Items.Count == 0)
{
_logger.LogDebug("No officers found for company {CompanyNumber}", companyNumber);
return null;
}
// Normalize candidate name for comparison
var normalizedCandidate = NormalizeName(candidateName);
foreach (var officer in officers.Items)
{
// Check if officer role is director-like
var role = officer.OfficerRole?.ToLowerInvariant() ?? "";
if (!role.Contains("director") && !role.Contains("secretary"))
{
continue;
}
// Fuzzy match the name
var normalizedOfficer = NormalizeName(officer.Name);
var matchScore = Fuzz.Ratio(normalizedCandidate, normalizedOfficer);
if (matchScore >= 80) // High threshold for name matching
{
// Check date overlap
var appointedOn = DateHelpers.ParseDate(officer.AppointedOn);
var resignedOn = DateHelpers.ParseDate(officer.ResignedOn);
// If no claimed dates, just check if names match
if (!startDate.HasValue && !endDate.HasValue)
{
_logger.LogDebug(
"Found matching director {OfficerName} for candidate {CandidateName} at company {CompanyNumber}",
officer.Name, candidateName, companyNumber);
return true;
}
// Check if employment period overlaps with directorship
var datesOverlap = DatesOverlap(
startDate, endDate,
appointedOn, resignedOn);
if (datesOverlap)
{
_logger.LogDebug(
"Verified director {OfficerName} matches candidate {CandidateName} with overlapping dates",
officer.Name, candidateName);
return true;
}
}
}
_logger.LogDebug(
"No matching director found for candidate {CandidateName} at company {CompanyNumber}",
candidateName, companyNumber);
return false;
}
catch (CompaniesHouseRateLimitException)
{
_logger.LogWarning("Rate limit hit while verifying director for company {CompanyNumber}", companyNumber);
return null;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error verifying director for company {CompanyNumber}", companyNumber);
return null;
}
}
private static string NormalizeName(string name)
{
if (string.IsNullOrWhiteSpace(name)) return "";
// Companies House often stores names as "SURNAME, Firstname"
// Convert to "Firstname Surname" format for comparison
var normalized = name.ToUpperInvariant().Trim();
if (normalized.Contains(','))
{
var parts = normalized.Split(',', 2);
if (parts.Length == 2)
{
normalized = $"{parts[1].Trim()} {parts[0].Trim()}";
}
}
return normalized;
}
private static bool DatesOverlap(DateOnly? start1, DateOnly? end1, DateOnly? start2, DateOnly? end2)
{
// If no dates, assume overlap
if (!start1.HasValue && !end1.HasValue) return true;
if (!start2.HasValue && !end2.HasValue) return true;
// Use default dates for missing values
var s1 = start1 ?? DateOnly.MinValue;
var e1 = end1 ?? DateOnly.MaxValue;
var s2 = start2 ?? DateOnly.MinValue;
var e2 = end2 ?? DateOnly.MaxValue;
// Check overlap: periods overlap if one starts before the other ends
return s1 <= e2 && s2 <= e1;
}
#region Verification Checks
private static void CheckIncorporationDate(
List<CompanyVerificationFlag> flags,
DateOnly? claimedStartDate,
DateOnly? incorporationDate,
string companyName)
{
if (claimedStartDate.HasValue && incorporationDate.HasValue)
{
if (claimedStartDate.Value < incorporationDate.Value)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentBeforeIncorporation",
Severity = "Critical",
Message = $"Claimed employment at '{companyName}' starting {claimedStartDate:MMM yyyy} is before company incorporation date {incorporationDate:MMM yyyy}",
ScoreImpact = -20
});
}
}
}
private static void CheckDissolutionDate(
List<CompanyVerificationFlag> flags,
DateOnly? claimedEndDate,
DateOnly? dissolutionDate,
string? companyStatus,
string companyName)
{
var isDissolvedStatus = companyStatus?.ToLowerInvariant() is "dissolved" or "liquidation" or "administration";
if (dissolutionDate.HasValue && isDissolvedStatus)
{
// Allow 3 month buffer for wind-down
var bufferDate = dissolutionDate.Value.AddMonths(3);
if (claimedEndDate.HasValue && claimedEndDate.Value > bufferDate)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentAtDissolvedCompany",
Severity = "Critical",
Message = $"Claimed employment at '{companyName}' until {claimedEndDate:MMM yyyy} but company was dissolved on {dissolutionDate:MMM yyyy}",
ScoreImpact = -20
});
}
else if (!claimedEndDate.HasValue) // Current employment at dissolved company
{
flags.Add(new CompanyVerificationFlag
{
Type = "CurrentEmploymentAtDissolvedCompany",
Severity = "Critical",
Message = $"Claims current employment at '{companyName}' but company was dissolved on {dissolutionDate:MMM yyyy}",
ScoreImpact = -25
});
}
}
}
private static void CheckDormantCompany(
List<CompanyVerificationFlag> flags,
string? accountsCategory,
string? jobTitle,
string companyName)
{
if (string.IsNullOrWhiteSpace(accountsCategory)) return;
var isDormant = accountsCategory.ToLowerInvariant().Contains("dormant");
if (!isDormant) return;
// Directors can maintain dormant companies, but other roles are suspicious
var title = jobTitle?.ToLowerInvariant() ?? "";
var isDirectorRole = title.Contains("director") || title.Contains("company secretary");
if (!isDirectorRole)
{
flags.Add(new CompanyVerificationFlag
{
Type = "EmploymentAtDormantCompany",
Severity = "Warning",
Message = $"Claimed active employment as '{jobTitle}' at '{companyName}' which files dormant accounts",
ScoreImpact = -10
});
}
}
private static void CheckCompanySizeVsRole(
List<CompanyVerificationFlag> flags,
string? accountsCategory,
string? jobTitle,
string companyName)
{
if (string.IsNullOrWhiteSpace(accountsCategory) || string.IsNullOrWhiteSpace(jobTitle)) return;
var category = accountsCategory.ToLowerInvariant();
var title = jobTitle.ToLowerInvariant();
// Micro-entity: < 10 employees, < £632k turnover
var isMicroEntity = category.Contains("micro");
// Check for senior management roles at micro companies
var isSeniorRole = title.Contains("vp") ||
title.Contains("vice president") ||
title.Contains("head of") ||
title.Contains("chief") ||
title.Contains("director of") ||
title.Contains("senior director");
// At micro companies, having many senior roles is suspicious
if (isMicroEntity && isSeniorRole)
{
flags.Add(new CompanyVerificationFlag
{
Type = "SeniorRoleAtMicroCompany",
Severity = "Warning",
Message = $"Claimed senior role '{jobTitle}' at '{companyName}' which files micro-entity accounts (typically <10 employees)",
ScoreImpact = -10
});
}
}
private static void CheckSicCodeMismatch(
List<CompanyVerificationFlag> flags,
List<string>? sicCodes,
string? jobTitle,
string companyName)
{
if (sicCodes is null || sicCodes.Count == 0 || string.IsNullOrWhiteSpace(jobTitle)) return;
var title = jobTitle.ToLowerInvariant();
// Check if this is a tech role
var isTechRole = title.Contains("software") ||
title.Contains("developer") ||
title.Contains("engineer") ||
title.Contains("programmer") ||
title.Contains("data scientist") ||
title.Contains("data analyst") ||
title.Contains("devops") ||
title.Contains("cloud") ||
title.Contains("machine learning") ||
title.Contains("ai ") ||
title.Contains("frontend") ||
title.Contains("backend") ||
title.Contains("full stack") ||
title.Contains("fullstack");
if (isTechRole)
{
// Check if company has any tech SIC codes
var hasTechSic = sicCodes.Any(s => TechSicCodes.Contains(s));
if (!hasTechSic)
{
// Get the primary SIC code description (simplified - just show code)
var primarySic = sicCodes.FirstOrDefault() ?? "Unknown";
flags.Add(new CompanyVerificationFlag
{
Type = "SicCodeMismatch",
Severity = "Info",
Message = $"Tech role '{jobTitle}' at '{companyName}' (SIC: {primarySic}) - company is not registered as a technology business",
ScoreImpact = -5
});
}
}
}
private static (bool? IsPlausible, string? Notes) CheckJobTitlePlausibility(string? jobTitle, string? companyType)
{
if (string.IsNullOrWhiteSpace(jobTitle) || string.IsNullOrWhiteSpace(companyType))
{
return (null, null);
}
var title = jobTitle.Trim().ToLowerInvariant();
var type = companyType.Trim().ToLowerInvariant();
// Check if this is a PLC (Public Limited Company) - these are large companies
var isPlc = type.Contains("plc") || type.Contains("public limited");
// Check for C-suite / very senior roles
var isCsuiteRole = title.Contains("ceo") ||
title.Contains("chief executive") ||
title.Contains("cto") ||
title.Contains("chief technology") ||
title.Contains("cfo") ||
title.Contains("chief financial") ||
title.Contains("coo") ||
title.Contains("chief operating") ||
title.Contains("cio") ||
title.Contains("chief information") ||
title.Contains("managing director") ||
title == "md" ||
title.Contains("chairman") ||
title.Contains("chairwoman") ||
title.Contains("chairperson") ||
title.Contains("president");
// Check for board-level roles
var isBoardRole = title.Contains("board member") ||
title.Contains("non-executive director") ||
title.Contains("executive director") ||
(title == "director" && !title.Contains("of"));
if (isPlc && (isCsuiteRole || isBoardRole))
{
return (false, $"Claimed senior role '{jobTitle}' at a PLC requires verification - C-suite positions at public companies are publicly disclosed");
}
// Check for VP/SVP at PLCs (also usually disclosed)
var isVpRole = title.Contains("vice president") ||
title.Contains("vp ") ||
title.StartsWith("vp") ||
title.Contains("svp") ||
title.Contains("senior vice president") ||
title.Contains("evp") ||
title.Contains("executive vice president");
if (isPlc && isVpRole)
{
return (false, $"Claimed VP-level role '{jobTitle}' at a PLC - senior positions at public companies should be verifiable");
}
return (true, null);
}
#endregion
#region Helper Methods
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
{
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
await using var dbContext = await _dbContextFactory.CreateDbContextAsync();
var cachedCompanies = await dbContext.CompanyCache
.Where(c => c.CachedAt >= cutoffDate)
.ToListAsync();
if (cachedCompanies.Count == 0)
{
return null;
}
var matches = cachedCompanies
.Where(c => !string.IsNullOrWhiteSpace(c.CompanyName))
.Select(c => new { Company = c, Score = Fuzz.TokenSetRatio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
.Where(m => m.Score >= FuzzyMatchThreshold)
.OrderByDescending(m => m.Score)
.FirstOrDefault();
return matches?.Company;
}
private (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
string companyName,
string searchQuery,
List<CompaniesHouseSearchItem> items,
DateOnly? claimedStartDate)
{
var normalizedOriginal = companyName.ToUpperInvariant();
var normalizedQuery = searchQuery.ToUpperInvariant();
// Match against both the original company name AND the search query used
// This handles cases like "Matthew Walker (Northern Foods Plc)" where we
// search for "Northern Foods Plc" but need to match against it, not the full name
var matches = items
.Where(item => !string.IsNullOrWhiteSpace(item.Title))
.Select(item =>
{
var itemTitle = item.Title.ToUpperInvariant();
var scoreVsOriginal = Fuzz.TokenSetRatio(normalizedOriginal, itemTitle);
var scoreVsQuery = Fuzz.TokenSetRatio(normalizedQuery, itemTitle);
return (Item: item, Score: Math.Max(scoreVsOriginal, scoreVsQuery));
})
.Where(m => m.Score >= FuzzyMatchThreshold)
.ToList();
_logger.LogDebug("Found {Count} matches above threshold for '{CompanyName}' (query: '{Query}')", matches.Count, companyName, searchQuery);
foreach (var m in matches.Take(5))
{
_logger.LogDebug(" Match: {Title} ({Number}), Score: {Score}, DateOfCreation: {Date}",
m.Item.Title, m.Item.CompanyNumber, m.Score, m.Item.DateOfCreation ?? "null");
}
if (matches.Count == 0) return null;
// If we have a claimed start date, prefer companies that existed at that time
if (claimedStartDate.HasValue)
{
_logger.LogDebug("Filtering for companies that existed at claimed start date: {StartDate}", claimedStartDate.Value);
var existedAtStartDate = matches
.Where(m =>
{
var incDate = DateHelpers.ParseDate(m.Item.DateOfCreation);
var existed = incDate == null || incDate <= claimedStartDate.Value;
_logger.LogDebug(" {Title}: IncDate={IncDate}, Existed={Existed}",
m.Item.Title, incDate?.ToString() ?? "null", existed);
return existed;
})
.OrderByDescending(m => m.Score)
.ToList();
_logger.LogDebug("Companies that existed at start date: {Count}", existedAtStartDate.Count);
// If any matches existed at the start date, prefer those
if (existedAtStartDate.Count > 0)
{
_logger.LogDebug("Selected: {Title} ({Number})", existedAtStartDate[0].Item.Title, existedAtStartDate[0].Item.CompanyNumber);
return existedAtStartDate[0];
}
// No companies existed at the claimed start date - don't match a wrong company
_logger.LogDebug("No companies found that existed at claimed start date {StartDate}, returning no match", claimedStartDate.Value);
return null;
}
// No start date provided - just use highest score
var fallback = matches.OrderByDescending(m => m.Score).First();
_logger.LogDebug("No start date filter, using highest score: {Title} ({Number})", fallback.Item.Title, fallback.Item.CompanyNumber);
return fallback;
}
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item, CompaniesHouseCompany? details)
{
try
{
await using var dbContext = await _dbContextFactory.CreateDbContextAsync();
var existingCache = await dbContext.CompanyCache
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
var sicCodes = details?.SicCodes ?? item.SicCodes;
var sicCodesJson = sicCodes != null ? JsonSerializer.Serialize(sicCodes) : null;
var accountsCategory = details?.Accounts?.LastAccounts?.Type;
if (existingCache is not null)
{
existingCache.CompanyName = item.Title;
existingCache.Status = item.CompanyStatus ?? "Unknown";
existingCache.CompanyType = item.CompanyType;
existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation);
existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation);
existingCache.AccountsCategory = accountsCategory;
existingCache.SicCodesJson = sicCodesJson;
existingCache.CachedAt = DateTime.UtcNow;
}
else
{
var cacheEntry = new CompanyCache
{
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
Status = item.CompanyStatus ?? "Unknown",
CompanyType = item.CompanyType,
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation),
AccountsCategory = accountsCategory,
SicCodesJson = sicCodesJson,
CachedAt = DateTime.UtcNow
};
dbContext.CompanyCache.Add(cacheEntry);
}
await dbContext.SaveChangesAsync();
}
catch (DbUpdateException ex) when (ex.InnerException?.Message.Contains("PK_CompanyCache") == true)
{
// Race condition: another task already cached this company - ignore
_logger.LogDebug("Company {CompanyNumber} already cached by another task", item.CompanyNumber);
}
}
private CompanyVerificationResult CreateResultFromCache(
CompanyCache cached,
string claimedCompany,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle,
List<CompanyVerificationFlag> flags)
{
var matchScore = Fuzz.TokenSetRatio(
claimedCompany.ToUpperInvariant(),
cached.CompanyName.ToUpperInvariant());
List<string>? sicCodes = null;
if (!string.IsNullOrEmpty(cached.SicCodesJson))
{
try
{
sicCodes = JsonSerializer.Deserialize<List<string>>(cached.SicCodesJson);
}
catch (JsonException)
{
// Ignore malformed JSON in cache
}
}
// Run all verification checks
CheckIncorporationDate(flags, startDate, cached.IncorporationDate, cached.CompanyName);
CheckDissolutionDate(flags, endDate, cached.DissolutionDate, cached.Status, cached.CompanyName);
CheckDormantCompany(flags, cached.AccountsCategory, jobTitle, cached.CompanyName);
CheckCompanySizeVsRole(flags, cached.AccountsCategory, jobTitle, cached.CompanyName);
CheckSicCodeMismatch(flags, sicCodes, jobTitle, cached.CompanyName);
var (jobPlausible, jobNotes) = CheckJobTitlePlausibility(jobTitle, cached.CompanyType);
if (jobPlausible == false)
{
flags.Add(new CompanyVerificationFlag
{
Type = "ImplausibleJobTitle",
Severity = "Critical",
Message = jobNotes ?? "Job title requires verification",
ScoreImpact = -15
});
}
return new CompanyVerificationResult
{
ClaimedCompany = claimedCompany,
MatchedCompanyName = cached.CompanyName,
MatchedCompanyNumber = cached.CompanyNumber,
MatchScore = matchScore,
IsVerified = true,
VerificationNotes = null,
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
CompanyType = cached.CompanyType,
CompanyStatus = cached.Status,
IncorporationDate = cached.IncorporationDate,
DissolutionDate = cached.DissolutionDate,
AccountsCategory = cached.AccountsCategory,
SicCodes = sicCodes,
ClaimedJobTitle = jobTitle,
JobTitlePlausible = jobPlausible,
JobTitleNotes = jobNotes,
Flags = flags
};
}
private static CompanyVerificationResult CreateUnverifiedResult(
string companyName,
DateOnly? startDate,
DateOnly? endDate,
string? jobTitle,
string reason)
{
return new CompanyVerificationResult
{
ClaimedCompany = companyName,
MatchedCompanyName = null,
MatchedCompanyNumber = null,
MatchScore = 0,
IsVerified = false,
VerificationNotes = reason,
ClaimedStartDate = startDate,
ClaimedEndDate = endDate,
ClaimedJobTitle = jobTitle
};
}
/// <summary>
/// Generates alternative search queries to find companies that may be registered
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
/// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent.
/// </summary>
private static List<string> GenerateSearchQueries(string companyName)
{
var queries = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var normalized = companyName.Trim();
// Step 0a: Check for "Brand (Parent Company)" format and extract parent company
// Parent company is more likely to be the registered name, so search it first
var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$");
if (parentMatch.Success)
{
var parentCompany = parentMatch.Groups[1].Value.Trim();
// Generate queries for parent company first (higher priority)
foreach (var parentQuery in GenerateNameVariations(parentCompany))
{
queries.Add(parentQuery);
}
// Also try the brand name without parenthetical
var brandName = normalized[..parentMatch.Index].Trim();
if (brandName.Length >= 3)
{
foreach (var brandQuery in GenerateNameVariations(brandName))
{
queries.Add(brandQuery);
}
}
}
// Step 0b: Check for "Name1/Name2" format (e.g., "ASDA/WALMART")
// Try each part separately as they may be different registered names
if (normalized.Contains('/'))
{
var parts = normalized.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
foreach (var part in parts)
{
if (part.Length >= 3)
{
foreach (var partQuery in GenerateNameVariations(part))
{
queries.Add(partQuery);
}
}
}
}
// Also add variations of the full original name
foreach (var query in GenerateNameVariations(normalized))
{
queries.Add(query);
}
return queries.ToList();
}
/// <summary>
/// Generates name variations for a single company name (UK/U.K., Ltd/Limited, etc.)
/// </summary>
private static List<string> GenerateNameVariations(string name)
{
var variations = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { name };
// Step 1: Generate UK/U.K. variations
var ukVariants = new List<string> { name };
if (name.Contains(" UK", StringComparison.OrdinalIgnoreCase))
{
// Add U.K. variant
var withDots = name
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
if (withDots != name)
ukVariants.Add(withDots);
}
if (name.Contains(" U.K.", StringComparison.OrdinalIgnoreCase))
{
// Add UK variant (no dots)
var withoutDots = name
.Replace(" U.K. ", " UK ", StringComparison.OrdinalIgnoreCase)
.Replace(" U.K.", " UK", StringComparison.OrdinalIgnoreCase);
if (withoutDots != name)
ukVariants.Add(withoutDots);
}
// Step 2: For each UK variant, generate suffix variations (Ltd/Limited)
foreach (var variant in ukVariants)
{
variations.Add(variant);
// Try Ltd -> Limited
if (variant.EndsWith(" Ltd", StringComparison.OrdinalIgnoreCase))
{
variations.Add(variant[..^4] + " Limited");
}
// Try Limited -> Ltd
else if (variant.EndsWith(" Limited", StringComparison.OrdinalIgnoreCase))
{
variations.Add(variant[..^8] + " Ltd");
}
// Try PLC variations
else if (variant.EndsWith(" PLC", StringComparison.OrdinalIgnoreCase))
{
variations.Add(variant[..^4] + " Public Limited Company");
}
else if (variant.EndsWith(" Public Limited Company", StringComparison.OrdinalIgnoreCase))
{
variations.Add(variant[..^24] + " PLC");
}
// Try Plc (mixed case) variations
else if (variant.EndsWith(" Plc", StringComparison.Ordinal))
{
variations.Add(variant[..^4] + " PLC");
variations.Add(variant[..^4] + " Public Limited Company");
}
}
// Step 3: Try core name without suffix
var suffixesToRemove = new[] { " Ltd", " Limited", " PLC", " Plc", " LLP", " Inc", " Corporation", " Corp" };
var coreName = name;
foreach (var suffix in suffixesToRemove)
{
if (coreName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase))
{
coreName = coreName[..^suffix.Length].Trim();
break;
}
}
if (coreName != name && coreName.Length >= 3)
{
variations.Add(coreName);
variations.Add(coreName + " Limited");
variations.Add(coreName + " PLC");
// Also add U.K. variant of core name if applicable
if (coreName.Contains(" UK", StringComparison.OrdinalIgnoreCase))
{
var coreWithDots = coreName
.Replace(" UK ", " U.K. ", StringComparison.OrdinalIgnoreCase)
.Replace(" UK", " U.K.", StringComparison.OrdinalIgnoreCase);
variations.Add(coreWithDots);
variations.Add(coreWithDots + " Limited");
}
}
return variations.ToList();
}
#endregion
}