Initial commit: TrueCV CV verification platform
Clean architecture solution with: - Domain: Entities (User, CVCheck, CVFlag, CompanyCache) and Enums - Application: Service interfaces, DTOs, and models - Infrastructure: EF Core, Identity, Hangfire, external API clients, services - Web: Blazor Server UI with pages and components Features: - CV upload and parsing (PDF/DOCX) using Claude API - Employment verification against Companies House API - Timeline analysis for gaps and overlaps - Veracity scoring algorithm - Background job processing with Hangfire - Azure Blob Storage for file storage Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
247
src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs
Normal file
247
src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs
Normal file
@@ -0,0 +1,247 @@
|
||||
using FuzzySharp;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using TrueCV.Application.DTOs;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Domain.Entities;
|
||||
using TrueCV.Infrastructure.Data;
|
||||
using TrueCV.Infrastructure.ExternalApis;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
{
|
||||
private readonly CompaniesHouseClient _companiesHouseClient;
|
||||
private readonly ApplicationDbContext _dbContext;
|
||||
private readonly ILogger<CompanyVerifierService> _logger;
|
||||
|
||||
private const int FuzzyMatchThreshold = 70;
|
||||
private const int CacheExpirationDays = 30;
|
||||
|
||||
public CompanyVerifierService(
|
||||
CompaniesHouseClient companiesHouseClient,
|
||||
ApplicationDbContext dbContext,
|
||||
ILogger<CompanyVerifierService> logger)
|
||||
{
|
||||
_companiesHouseClient = companiesHouseClient;
|
||||
_dbContext = dbContext;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
|
||||
string companyName,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
|
||||
|
||||
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
|
||||
|
||||
// Try to find a cached match first
|
||||
var cachedMatch = await FindCachedMatchAsync(companyName);
|
||||
if (cachedMatch is not null)
|
||||
{
|
||||
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
|
||||
return CreateVerificationResult(companyName, cachedMatch, startDate, endDate);
|
||||
}
|
||||
|
||||
// Search Companies House
|
||||
try
|
||||
{
|
||||
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
|
||||
|
||||
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, "No matching company found in Companies House");
|
||||
}
|
||||
|
||||
// Find best fuzzy match
|
||||
var bestMatch = FindBestMatch(companyName, searchResponse.Items);
|
||||
|
||||
if (bestMatch is null)
|
||||
{
|
||||
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate,
|
||||
$"No company name matched above {FuzzyMatchThreshold}% threshold");
|
||||
}
|
||||
|
||||
// Cache the matched company
|
||||
var match = bestMatch.Value;
|
||||
await CacheCompanyAsync(match.Item);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
|
||||
companyName, match.Item.Title, match.Score);
|
||||
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = match.Item.Title,
|
||||
MatchedCompanyNumber = match.Item.CompanyNumber,
|
||||
MatchScore = match.Score,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Matched with {match.Score}% confidence",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
catch (CompaniesHouseRateLimitException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate,
|
||||
"Verification temporarily unavailable due to rate limiting");
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(query);
|
||||
|
||||
_logger.LogDebug("Searching companies for query: {Query}", query);
|
||||
|
||||
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
|
||||
|
||||
if (response?.Items is null)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return response.Items.Select(item => new CompanySearchResult
|
||||
{
|
||||
CompanyNumber = item.CompanyNumber,
|
||||
CompanyName = item.Title,
|
||||
CompanyStatus = item.CompanyStatus ?? "Unknown",
|
||||
IncorporationDate = ParseDate(item.DateOfCreation),
|
||||
AddressSnippet = item.AddressSnippet
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
|
||||
{
|
||||
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
|
||||
|
||||
// Get recent cached companies
|
||||
var cachedCompanies = await _dbContext.CompanyCache
|
||||
.Where(c => c.CachedAt >= cutoffDate)
|
||||
.ToListAsync();
|
||||
|
||||
if (cachedCompanies.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find best fuzzy match in cache
|
||||
var matches = cachedCompanies
|
||||
.Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.OrderByDescending(m => m.Score)
|
||||
.FirstOrDefault();
|
||||
|
||||
return matches?.Company;
|
||||
}
|
||||
|
||||
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
|
||||
string companyName,
|
||||
List<CompaniesHouseSearchItem> items)
|
||||
{
|
||||
var normalizedSearch = companyName.ToUpperInvariant();
|
||||
|
||||
var matches = items
|
||||
.Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant())))
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.OrderByDescending(m => m.Score)
|
||||
.ToList();
|
||||
|
||||
return matches.Count > 0 ? matches[0] : null;
|
||||
}
|
||||
|
||||
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item)
|
||||
{
|
||||
var existingCache = await _dbContext.CompanyCache
|
||||
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
|
||||
|
||||
if (existingCache is not null)
|
||||
{
|
||||
existingCache.CompanyName = item.Title;
|
||||
existingCache.Status = item.CompanyStatus ?? "Unknown";
|
||||
existingCache.IncorporationDate = ParseDate(item.DateOfCreation);
|
||||
existingCache.DissolutionDate = ParseDate(item.DateOfCessation);
|
||||
existingCache.CachedAt = DateTime.UtcNow;
|
||||
}
|
||||
else
|
||||
{
|
||||
var cacheEntry = new CompanyCache
|
||||
{
|
||||
CompanyNumber = item.CompanyNumber,
|
||||
CompanyName = item.Title,
|
||||
Status = item.CompanyStatus ?? "Unknown",
|
||||
IncorporationDate = ParseDate(item.DateOfCreation),
|
||||
DissolutionDate = ParseDate(item.DateOfCessation),
|
||||
CachedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
_dbContext.CompanyCache.Add(cacheEntry);
|
||||
}
|
||||
|
||||
await _dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
private static CompanyVerificationResult CreateVerificationResult(
|
||||
string claimedCompany,
|
||||
CompanyCache cached,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate)
|
||||
{
|
||||
var matchScore = Fuzz.Ratio(
|
||||
claimedCompany.ToUpperInvariant(),
|
||||
cached.CompanyName.ToUpperInvariant());
|
||||
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = claimedCompany,
|
||||
MatchedCompanyName = cached.CompanyName,
|
||||
MatchedCompanyNumber = cached.CompanyNumber,
|
||||
MatchScore = matchScore,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Matched from cache with {matchScore}% confidence",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
|
||||
private static CompanyVerificationResult CreateUnverifiedResult(
|
||||
string companyName,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate,
|
||||
string reason)
|
||||
{
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = null,
|
||||
MatchedCompanyNumber = null,
|
||||
MatchScore = 0,
|
||||
IsVerified = false,
|
||||
VerificationNotes = reason,
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
|
||||
private static DateOnly? ParseDate(string? dateString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(dateString))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (DateOnly.TryParse(dateString, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user