Initial commit: TrueCV CV verification platform
Clean architecture solution with: - Domain: Entities (User, CVCheck, CVFlag, CompanyCache) and Enums - Application: Service interfaces, DTOs, and models - Infrastructure: EF Core, Identity, Hangfire, external API clients, services - Web: Blazor Server UI with pages and components Features: - CV upload and parsing (PDF/DOCX) using Claude API - Employment verification against Companies House API - Timeline analysis for gaps and overlaps - Veracity scoring algorithm - Background job processing with Hangfire - Azure Blob Storage for file storage Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
164
src/TrueCV.Infrastructure/Services/CVCheckService.cs
Normal file
164
src/TrueCV.Infrastructure/Services/CVCheckService.cs
Normal file
@@ -0,0 +1,164 @@
|
||||
using System.Text.Json;
|
||||
using Hangfire;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using TrueCV.Application.DTOs;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Domain.Entities;
|
||||
using TrueCV.Domain.Enums;
|
||||
using TrueCV.Infrastructure.Data;
|
||||
using TrueCV.Infrastructure.Jobs;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class CVCheckService : ICVCheckService
|
||||
{
|
||||
private readonly ApplicationDbContext _dbContext;
|
||||
private readonly IFileStorageService _fileStorageService;
|
||||
private readonly IBackgroundJobClient _backgroundJobClient;
|
||||
private readonly ILogger<CVCheckService> _logger;
|
||||
|
||||
public CVCheckService(
|
||||
ApplicationDbContext dbContext,
|
||||
IFileStorageService fileStorageService,
|
||||
IBackgroundJobClient backgroundJobClient,
|
||||
ILogger<CVCheckService> logger)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
_fileStorageService = fileStorageService;
|
||||
_backgroundJobClient = backgroundJobClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<Guid> CreateCheckAsync(Guid userId, Stream file, string fileName)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(file);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
|
||||
|
||||
_logger.LogDebug("Creating CV check for user {UserId}, file: {FileName}", userId, fileName);
|
||||
|
||||
// Upload file to blob storage
|
||||
var blobUrl = await _fileStorageService.UploadAsync(file, fileName);
|
||||
|
||||
_logger.LogDebug("File uploaded to: {BlobUrl}", blobUrl);
|
||||
|
||||
// Create CV check record
|
||||
var cvCheck = new CVCheck
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
UserId = userId,
|
||||
OriginalFileName = fileName,
|
||||
BlobUrl = blobUrl,
|
||||
Status = CheckStatus.Pending
|
||||
};
|
||||
|
||||
_dbContext.CVChecks.Add(cvCheck);
|
||||
await _dbContext.SaveChangesAsync();
|
||||
|
||||
_logger.LogDebug("CV check record created with ID: {CheckId}", cvCheck.Id);
|
||||
|
||||
// Queue background job for processing
|
||||
_backgroundJobClient.Enqueue<ProcessCVCheckJob>(job => job.ExecuteAsync(cvCheck.Id, CancellationToken.None));
|
||||
|
||||
_logger.LogInformation(
|
||||
"CV check {CheckId} created for user {UserId}, processing queued",
|
||||
cvCheck.Id, userId);
|
||||
|
||||
return cvCheck.Id;
|
||||
}
|
||||
|
||||
public async Task<CVCheckDto?> GetCheckAsync(Guid id)
|
||||
{
|
||||
_logger.LogDebug("Retrieving CV check: {CheckId}", id);
|
||||
|
||||
var cvCheck = await _dbContext.CVChecks
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(c => c.Id == id);
|
||||
|
||||
if (cvCheck is null)
|
||||
{
|
||||
_logger.LogDebug("CV check not found: {CheckId}", id);
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapToDto(cvCheck);
|
||||
}
|
||||
|
||||
public async Task<List<CVCheckDto>> GetUserChecksAsync(Guid userId)
|
||||
{
|
||||
_logger.LogDebug("Retrieving CV checks for user: {UserId}", userId);
|
||||
|
||||
var checks = await _dbContext.CVChecks
|
||||
.AsNoTracking()
|
||||
.Where(c => c.UserId == userId)
|
||||
.OrderByDescending(c => c.CreatedAt)
|
||||
.ToListAsync();
|
||||
|
||||
_logger.LogDebug("Found {Count} CV checks for user {UserId}", checks.Count, userId);
|
||||
|
||||
return checks.Select(MapToDto).ToList();
|
||||
}
|
||||
|
||||
public async Task<CVCheckDto?> GetCheckForUserAsync(Guid id, Guid userId)
|
||||
{
|
||||
_logger.LogDebug("Retrieving CV check {CheckId} for user {UserId}", id, userId);
|
||||
|
||||
var cvCheck = await _dbContext.CVChecks
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(c => c.Id == id && c.UserId == userId);
|
||||
|
||||
if (cvCheck is null)
|
||||
{
|
||||
_logger.LogDebug("CV check not found: {CheckId} for user {UserId}", id, userId);
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapToDto(cvCheck);
|
||||
}
|
||||
|
||||
public async Task<VeracityReport?> GetReportAsync(Guid checkId, Guid userId)
|
||||
{
|
||||
_logger.LogDebug("Retrieving report for CV check {CheckId}, user {UserId}", checkId, userId);
|
||||
|
||||
var cvCheck = await _dbContext.CVChecks
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(c => c.Id == checkId && c.UserId == userId);
|
||||
|
||||
if (cvCheck is null)
|
||||
{
|
||||
_logger.LogWarning("CV check not found: {CheckId} for user {UserId}", checkId, userId);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cvCheck.Status != CheckStatus.Completed || string.IsNullOrEmpty(cvCheck.ReportJson))
|
||||
{
|
||||
_logger.LogDebug("CV check {CheckId} not completed or has no report", checkId);
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var report = JsonSerializer.Deserialize<VeracityReport>(cvCheck.ReportJson);
|
||||
return report;
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to deserialize report JSON for check {CheckId}", checkId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static CVCheckDto MapToDto(CVCheck cvCheck)
|
||||
{
|
||||
return new CVCheckDto
|
||||
{
|
||||
Id = cvCheck.Id,
|
||||
OriginalFileName = cvCheck.OriginalFileName,
|
||||
Status = cvCheck.Status.ToString(),
|
||||
VeracityScore = cvCheck.VeracityScore,
|
||||
CreatedAt = cvCheck.CreatedAt,
|
||||
CompletedAt = cvCheck.CompletedAt
|
||||
};
|
||||
}
|
||||
}
|
||||
318
src/TrueCV.Infrastructure/Services/CVParserService.cs
Normal file
318
src/TrueCV.Infrastructure/Services/CVParserService.cs
Normal file
@@ -0,0 +1,318 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Anthropic.SDK;
|
||||
using Anthropic.SDK.Messaging;
|
||||
using DocumentFormat.OpenXml.Packaging;
|
||||
using DocumentFormat.OpenXml.Wordprocessing;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Infrastructure.Configuration;
|
||||
using UglyToad.PdfPig;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class CVParserService : ICVParserService
|
||||
{
|
||||
private readonly AnthropicClient _anthropicClient;
|
||||
private readonly ILogger<CVParserService> _logger;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
private const string SystemPrompt = """
|
||||
You are a CV/Resume parser. Your task is to extract structured information from CV text.
|
||||
You must respond ONLY with valid JSON, no other text or markdown.
|
||||
""";
|
||||
|
||||
private const string ExtractionPrompt = """
|
||||
Parse the following CV text and extract the information into this exact JSON structure:
|
||||
|
||||
{
|
||||
"fullName": "string (required)",
|
||||
"email": "string or null",
|
||||
"phone": "string or null",
|
||||
"employment": [
|
||||
{
|
||||
"companyName": "string (required)",
|
||||
"jobTitle": "string (required)",
|
||||
"location": "string or null",
|
||||
"startDate": "YYYY-MM-DD or null",
|
||||
"endDate": "YYYY-MM-DD or null (null if current)",
|
||||
"isCurrent": "boolean",
|
||||
"description": "string or null"
|
||||
}
|
||||
],
|
||||
"education": [
|
||||
{
|
||||
"institution": "string (required)",
|
||||
"qualification": "string or null (e.g., BSc, MSc, PhD)",
|
||||
"subject": "string or null",
|
||||
"grade": "string or null",
|
||||
"startDate": "YYYY-MM-DD or null",
|
||||
"endDate": "YYYY-MM-DD or null"
|
||||
}
|
||||
],
|
||||
"skills": ["array of skill strings"]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- For dates, use the first day of the month if only month/year is given (e.g., "Jan 2020" becomes "2020-01-01")
|
||||
- For dates with only year, use January 1st (e.g., "2020" becomes "2020-01-01")
|
||||
- Set isCurrent to true if the job appears to be ongoing (e.g., "Present", "Current", no end date mentioned with recent start)
|
||||
- Extract all employment history in chronological order
|
||||
- If information is not available, use null
|
||||
- Do not invent or assume information not present in the text
|
||||
|
||||
CV TEXT:
|
||||
{CV_TEXT}
|
||||
""";
|
||||
|
||||
public CVParserService(
|
||||
IOptions<AnthropicSettings> settings,
|
||||
ILogger<CVParserService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
_anthropicClient = new AnthropicClient(settings.Value.ApiKey);
|
||||
}
|
||||
|
||||
public async Task<CVData> ParseAsync(Stream fileStream, string fileName)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(fileStream);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
|
||||
|
||||
_logger.LogDebug("Parsing CV file: {FileName}", fileName);
|
||||
|
||||
var text = await ExtractTextAsync(fileStream, fileName);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
_logger.LogWarning("No text content extracted from file: {FileName}", fileName);
|
||||
throw new InvalidOperationException($"Could not extract text content from file: {fileName}");
|
||||
}
|
||||
|
||||
_logger.LogDebug("Extracted {CharCount} characters from {FileName}", text.Length, fileName);
|
||||
|
||||
var cvData = await ParseWithClaudeAsync(text);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Successfully parsed CV for {FullName} with {EmploymentCount} employment entries and {EducationCount} education entries",
|
||||
cvData.FullName,
|
||||
cvData.Employment.Count,
|
||||
cvData.Education.Count);
|
||||
|
||||
return cvData;
|
||||
}
|
||||
|
||||
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName)
|
||||
{
|
||||
var extension = Path.GetExtension(fileName).ToLowerInvariant();
|
||||
|
||||
return extension switch
|
||||
{
|
||||
".pdf" => await ExtractTextFromPdfAsync(fileStream),
|
||||
".docx" => ExtractTextFromDocx(fileStream),
|
||||
_ => throw new NotSupportedException($"File type '{extension}' is not supported. Only PDF and DOCX files are accepted.")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream)
|
||||
{
|
||||
// Copy stream to memory for PdfPig (requires seekable stream)
|
||||
using var memoryStream = new MemoryStream();
|
||||
await fileStream.CopyToAsync(memoryStream);
|
||||
memoryStream.Position = 0;
|
||||
|
||||
using var document = PdfDocument.Open(memoryStream);
|
||||
var textBuilder = new StringBuilder();
|
||||
|
||||
foreach (var page in document.GetPages())
|
||||
{
|
||||
var pageText = page.Text;
|
||||
textBuilder.AppendLine(pageText);
|
||||
}
|
||||
|
||||
return textBuilder.ToString();
|
||||
}
|
||||
|
||||
private static string ExtractTextFromDocx(Stream fileStream)
|
||||
{
|
||||
using var document = WordprocessingDocument.Open(fileStream, false);
|
||||
var body = document.MainDocumentPart?.Document?.Body;
|
||||
|
||||
if (body is null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var textBuilder = new StringBuilder();
|
||||
|
||||
foreach (var paragraph in body.Elements<Paragraph>())
|
||||
{
|
||||
var paragraphText = paragraph.InnerText;
|
||||
if (!string.IsNullOrWhiteSpace(paragraphText))
|
||||
{
|
||||
textBuilder.AppendLine(paragraphText);
|
||||
}
|
||||
}
|
||||
|
||||
return textBuilder.ToString();
|
||||
}
|
||||
|
||||
private async Task<CVData> ParseWithClaudeAsync(string cvText)
|
||||
{
|
||||
var prompt = ExtractionPrompt.Replace("{CV_TEXT}", cvText);
|
||||
|
||||
var messages = new List<Message>
|
||||
{
|
||||
new(RoleType.User, prompt)
|
||||
};
|
||||
|
||||
var parameters = new MessageParameters
|
||||
{
|
||||
Model = "claude-sonnet-4-20250514",
|
||||
MaxTokens = 4096,
|
||||
Messages = messages,
|
||||
System = [new SystemMessage(SystemPrompt)]
|
||||
};
|
||||
|
||||
_logger.LogDebug("Sending CV text to Claude API for parsing");
|
||||
|
||||
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters);
|
||||
|
||||
var responseText = response.Content
|
||||
.OfType<TextContent>()
|
||||
.FirstOrDefault()?.Text;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(responseText))
|
||||
{
|
||||
_logger.LogError("Claude API returned empty response");
|
||||
throw new InvalidOperationException("Failed to parse CV: AI returned empty response");
|
||||
}
|
||||
|
||||
// Clean up response - remove markdown code blocks if present
|
||||
responseText = CleanJsonResponse(responseText);
|
||||
|
||||
_logger.LogDebug("Received response from Claude API, parsing JSON");
|
||||
|
||||
try
|
||||
{
|
||||
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonOptions);
|
||||
|
||||
if (parsedResponse is null)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to deserialize CV data from AI response");
|
||||
}
|
||||
|
||||
return MapToCVData(parsedResponse);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to parse Claude response as JSON: {Response}", responseText);
|
||||
throw new InvalidOperationException("Failed to parse CV: AI returned invalid JSON", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static string CleanJsonResponse(string response)
|
||||
{
|
||||
var trimmed = response.Trim();
|
||||
|
||||
// Remove markdown code blocks
|
||||
if (trimmed.StartsWith("```json", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
trimmed = trimmed[7..];
|
||||
}
|
||||
else if (trimmed.StartsWith("```"))
|
||||
{
|
||||
trimmed = trimmed[3..];
|
||||
}
|
||||
|
||||
if (trimmed.EndsWith("```"))
|
||||
{
|
||||
trimmed = trimmed[..^3];
|
||||
}
|
||||
|
||||
return trimmed.Trim();
|
||||
}
|
||||
|
||||
private static CVData MapToCVData(ClaudeCVResponse response)
|
||||
{
|
||||
return new CVData
|
||||
{
|
||||
FullName = response.FullName ?? "Unknown",
|
||||
Email = response.Email,
|
||||
Phone = response.Phone,
|
||||
Employment = response.Employment?.Select(e => new EmploymentEntry
|
||||
{
|
||||
CompanyName = e.CompanyName ?? "Unknown Company",
|
||||
JobTitle = e.JobTitle ?? "Unknown Position",
|
||||
Location = e.Location,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate),
|
||||
IsCurrent = e.IsCurrent ?? false,
|
||||
Description = e.Description
|
||||
}).ToList() ?? [],
|
||||
Education = response.Education?.Select(e => new EducationEntry
|
||||
{
|
||||
Institution = e.Institution ?? "Unknown Institution",
|
||||
Qualification = e.Qualification,
|
||||
Subject = e.Subject,
|
||||
Grade = e.Grade,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate)
|
||||
}).ToList() ?? [],
|
||||
Skills = response.Skills ?? []
|
||||
};
|
||||
}
|
||||
|
||||
private static DateOnly? ParseDate(string? dateString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(dateString))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (DateOnly.TryParse(dateString, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Internal DTOs for Claude response parsing
|
||||
private sealed record ClaudeCVResponse
|
||||
{
|
||||
public string? FullName { get; init; }
|
||||
public string? Email { get; init; }
|
||||
public string? Phone { get; init; }
|
||||
public List<ClaudeEmploymentEntry>? Employment { get; init; }
|
||||
public List<ClaudeEducationEntry>? Education { get; init; }
|
||||
public List<string>? Skills { get; init; }
|
||||
}
|
||||
|
||||
private sealed record ClaudeEmploymentEntry
|
||||
{
|
||||
public string? CompanyName { get; init; }
|
||||
public string? JobTitle { get; init; }
|
||||
public string? Location { get; init; }
|
||||
public string? StartDate { get; init; }
|
||||
public string? EndDate { get; init; }
|
||||
public bool? IsCurrent { get; init; }
|
||||
public string? Description { get; init; }
|
||||
}
|
||||
|
||||
private sealed record ClaudeEducationEntry
|
||||
{
|
||||
public string? Institution { get; init; }
|
||||
public string? Qualification { get; init; }
|
||||
public string? Subject { get; init; }
|
||||
public string? Grade { get; init; }
|
||||
public string? StartDate { get; init; }
|
||||
public string? EndDate { get; init; }
|
||||
}
|
||||
}
|
||||
247
src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs
Normal file
247
src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs
Normal file
@@ -0,0 +1,247 @@
|
||||
using FuzzySharp;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using TrueCV.Application.DTOs;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Domain.Entities;
|
||||
using TrueCV.Infrastructure.Data;
|
||||
using TrueCV.Infrastructure.ExternalApis;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
{
|
||||
private readonly CompaniesHouseClient _companiesHouseClient;
|
||||
private readonly ApplicationDbContext _dbContext;
|
||||
private readonly ILogger<CompanyVerifierService> _logger;
|
||||
|
||||
private const int FuzzyMatchThreshold = 70;
|
||||
private const int CacheExpirationDays = 30;
|
||||
|
||||
public CompanyVerifierService(
|
||||
CompaniesHouseClient companiesHouseClient,
|
||||
ApplicationDbContext dbContext,
|
||||
ILogger<CompanyVerifierService> logger)
|
||||
{
|
||||
_companiesHouseClient = companiesHouseClient;
|
||||
_dbContext = dbContext;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<CompanyVerificationResult> VerifyCompanyAsync(
|
||||
string companyName,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(companyName);
|
||||
|
||||
_logger.LogDebug("Verifying company: {CompanyName}", companyName);
|
||||
|
||||
// Try to find a cached match first
|
||||
var cachedMatch = await FindCachedMatchAsync(companyName);
|
||||
if (cachedMatch is not null)
|
||||
{
|
||||
_logger.LogDebug("Found cached company match for: {CompanyName}", companyName);
|
||||
return CreateVerificationResult(companyName, cachedMatch, startDate, endDate);
|
||||
}
|
||||
|
||||
// Search Companies House
|
||||
try
|
||||
{
|
||||
var searchResponse = await _companiesHouseClient.SearchCompaniesAsync(companyName);
|
||||
|
||||
if (searchResponse?.Items is null || searchResponse.Items.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No companies found for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, "No matching company found in Companies House");
|
||||
}
|
||||
|
||||
// Find best fuzzy match
|
||||
var bestMatch = FindBestMatch(companyName, searchResponse.Items);
|
||||
|
||||
if (bestMatch is null)
|
||||
{
|
||||
_logger.LogDebug("No fuzzy match above threshold for: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate,
|
||||
$"No company name matched above {FuzzyMatchThreshold}% threshold");
|
||||
}
|
||||
|
||||
// Cache the matched company
|
||||
var match = bestMatch.Value;
|
||||
await CacheCompanyAsync(match.Item);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Verified company {ClaimedName} matched to {MatchedName} with score {Score}%",
|
||||
companyName, match.Item.Title, match.Score);
|
||||
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = match.Item.Title,
|
||||
MatchedCompanyNumber = match.Item.CompanyNumber,
|
||||
MatchScore = match.Score,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Matched with {match.Score}% confidence",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
catch (CompaniesHouseRateLimitException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Rate limit hit while verifying company: {CompanyName}", companyName);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate,
|
||||
"Verification temporarily unavailable due to rate limiting");
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<List<CompanySearchResult>> SearchCompaniesAsync(string query)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(query);
|
||||
|
||||
_logger.LogDebug("Searching companies for query: {Query}", query);
|
||||
|
||||
var response = await _companiesHouseClient.SearchCompaniesAsync(query);
|
||||
|
||||
if (response?.Items is null)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return response.Items.Select(item => new CompanySearchResult
|
||||
{
|
||||
CompanyNumber = item.CompanyNumber,
|
||||
CompanyName = item.Title,
|
||||
CompanyStatus = item.CompanyStatus ?? "Unknown",
|
||||
IncorporationDate = ParseDate(item.DateOfCreation),
|
||||
AddressSnippet = item.AddressSnippet
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
|
||||
{
|
||||
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
|
||||
|
||||
// Get recent cached companies
|
||||
var cachedCompanies = await _dbContext.CompanyCache
|
||||
.Where(c => c.CachedAt >= cutoffDate)
|
||||
.ToListAsync();
|
||||
|
||||
if (cachedCompanies.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find best fuzzy match in cache
|
||||
var matches = cachedCompanies
|
||||
.Select(c => new { Company = c, Score = Fuzz.Ratio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.OrderByDescending(m => m.Score)
|
||||
.FirstOrDefault();
|
||||
|
||||
return matches?.Company;
|
||||
}
|
||||
|
||||
private static (CompaniesHouseSearchItem Item, int Score)? FindBestMatch(
|
||||
string companyName,
|
||||
List<CompaniesHouseSearchItem> items)
|
||||
{
|
||||
var normalizedSearch = companyName.ToUpperInvariant();
|
||||
|
||||
var matches = items
|
||||
.Select(item => (Item: item, Score: Fuzz.Ratio(normalizedSearch, item.Title.ToUpperInvariant())))
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.OrderByDescending(m => m.Score)
|
||||
.ToList();
|
||||
|
||||
return matches.Count > 0 ? matches[0] : null;
|
||||
}
|
||||
|
||||
private async Task CacheCompanyAsync(CompaniesHouseSearchItem item)
|
||||
{
|
||||
var existingCache = await _dbContext.CompanyCache
|
||||
.FirstOrDefaultAsync(c => c.CompanyNumber == item.CompanyNumber);
|
||||
|
||||
if (existingCache is not null)
|
||||
{
|
||||
existingCache.CompanyName = item.Title;
|
||||
existingCache.Status = item.CompanyStatus ?? "Unknown";
|
||||
existingCache.IncorporationDate = ParseDate(item.DateOfCreation);
|
||||
existingCache.DissolutionDate = ParseDate(item.DateOfCessation);
|
||||
existingCache.CachedAt = DateTime.UtcNow;
|
||||
}
|
||||
else
|
||||
{
|
||||
var cacheEntry = new CompanyCache
|
||||
{
|
||||
CompanyNumber = item.CompanyNumber,
|
||||
CompanyName = item.Title,
|
||||
Status = item.CompanyStatus ?? "Unknown",
|
||||
IncorporationDate = ParseDate(item.DateOfCreation),
|
||||
DissolutionDate = ParseDate(item.DateOfCessation),
|
||||
CachedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
_dbContext.CompanyCache.Add(cacheEntry);
|
||||
}
|
||||
|
||||
await _dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
private static CompanyVerificationResult CreateVerificationResult(
|
||||
string claimedCompany,
|
||||
CompanyCache cached,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate)
|
||||
{
|
||||
var matchScore = Fuzz.Ratio(
|
||||
claimedCompany.ToUpperInvariant(),
|
||||
cached.CompanyName.ToUpperInvariant());
|
||||
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = claimedCompany,
|
||||
MatchedCompanyName = cached.CompanyName,
|
||||
MatchedCompanyNumber = cached.CompanyNumber,
|
||||
MatchScore = matchScore,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Matched from cache with {matchScore}% confidence",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
|
||||
private static CompanyVerificationResult CreateUnverifiedResult(
|
||||
string companyName,
|
||||
DateOnly? startDate,
|
||||
DateOnly? endDate,
|
||||
string reason)
|
||||
{
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = null,
|
||||
MatchedCompanyNumber = null,
|
||||
MatchScore = 0,
|
||||
IsVerified = false,
|
||||
VerificationNotes = reason,
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate
|
||||
};
|
||||
}
|
||||
|
||||
private static DateOnly? ParseDate(string? dateString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(dateString))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (DateOnly.TryParse(dateString, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
120
src/TrueCV.Infrastructure/Services/FileStorageService.cs
Normal file
120
src/TrueCV.Infrastructure/Services/FileStorageService.cs
Normal file
@@ -0,0 +1,120 @@
|
||||
using Azure.Storage.Blobs;
|
||||
using Azure.Storage.Blobs.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Infrastructure.Configuration;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class FileStorageService : IFileStorageService
|
||||
{
|
||||
private readonly BlobContainerClient _containerClient;
|
||||
private readonly ILogger<FileStorageService> _logger;
|
||||
|
||||
public FileStorageService(
|
||||
IOptions<AzureBlobSettings> settings,
|
||||
ILogger<FileStorageService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
|
||||
var blobServiceClient = new BlobServiceClient(settings.Value.ConnectionString);
|
||||
_containerClient = blobServiceClient.GetBlobContainerClient(settings.Value.ContainerName);
|
||||
}
|
||||
|
||||
public async Task<string> UploadAsync(Stream fileStream, string fileName)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(fileStream);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
|
||||
|
||||
var extension = Path.GetExtension(fileName);
|
||||
var uniqueBlobName = $"{Guid.NewGuid()}{extension}";
|
||||
|
||||
_logger.LogDebug("Uploading file {FileName} as blob {BlobName}", fileName, uniqueBlobName);
|
||||
|
||||
var blobClient = _containerClient.GetBlobClient(uniqueBlobName);
|
||||
|
||||
await _containerClient.CreateIfNotExistsAsync();
|
||||
|
||||
var httpHeaders = new BlobHttpHeaders
|
||||
{
|
||||
ContentType = GetContentType(extension)
|
||||
};
|
||||
|
||||
await blobClient.UploadAsync(fileStream, new BlobUploadOptions
|
||||
{
|
||||
HttpHeaders = httpHeaders,
|
||||
Metadata = new Dictionary<string, string>
|
||||
{
|
||||
["originalFileName"] = fileName,
|
||||
["uploadedAt"] = DateTime.UtcNow.ToString("O")
|
||||
}
|
||||
});
|
||||
|
||||
var blobUrl = blobClient.Uri.ToString();
|
||||
|
||||
_logger.LogInformation("Successfully uploaded file {FileName} to {BlobUrl}", fileName, blobUrl);
|
||||
|
||||
return blobUrl;
|
||||
}
|
||||
|
||||
public async Task<Stream> DownloadAsync(string blobUrl)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl);
|
||||
|
||||
var blobName = ExtractBlobNameFromUrl(blobUrl);
|
||||
|
||||
_logger.LogDebug("Downloading blob {BlobName} from {BlobUrl}", blobName, blobUrl);
|
||||
|
||||
var blobClient = _containerClient.GetBlobClient(blobName);
|
||||
|
||||
var response = await blobClient.DownloadStreamingAsync();
|
||||
|
||||
_logger.LogDebug("Successfully downloaded blob {BlobName}", blobName);
|
||||
|
||||
return response.Value.Content;
|
||||
}
|
||||
|
||||
public async Task DeleteAsync(string blobUrl)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl);
|
||||
|
||||
var blobName = ExtractBlobNameFromUrl(blobUrl);
|
||||
|
||||
_logger.LogDebug("Deleting blob {BlobName}", blobName);
|
||||
|
||||
var blobClient = _containerClient.GetBlobClient(blobName);
|
||||
|
||||
var deleted = await blobClient.DeleteIfExistsAsync();
|
||||
|
||||
if (deleted)
|
||||
{
|
||||
_logger.LogInformation("Successfully deleted blob {BlobName}", blobName);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning("Blob {BlobName} did not exist when attempting to delete", blobName);
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractBlobNameFromUrl(string blobUrl)
|
||||
{
|
||||
var uri = new Uri(blobUrl);
|
||||
var segments = uri.Segments;
|
||||
|
||||
// The blob name is the last segment after the container name
|
||||
// URL format: https://account.blob.core.windows.net/container/blobname
|
||||
return segments.Length > 2 ? segments[^1] : throw new ArgumentException("Invalid blob URL", nameof(blobUrl));
|
||||
}
|
||||
|
||||
private static string GetContentType(string extension)
|
||||
{
|
||||
return extension.ToLowerInvariant() switch
|
||||
{
|
||||
".pdf" => "application/pdf",
|
||||
".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".doc" => "application/msword",
|
||||
_ => "application/octet-stream"
|
||||
};
|
||||
}
|
||||
}
|
||||
205
src/TrueCV.Infrastructure/Services/TimelineAnalyserService.cs
Normal file
205
src/TrueCV.Infrastructure/Services/TimelineAnalyserService.cs
Normal file
@@ -0,0 +1,205 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
|
||||
namespace TrueCV.Infrastructure.Services;
|
||||
|
||||
public sealed class TimelineAnalyserService : ITimelineAnalyserService
|
||||
{
|
||||
private readonly ILogger<TimelineAnalyserService> _logger;
|
||||
|
||||
private const int MinimumGapMonths = 3;
|
||||
private const int AllowedOverlapMonths = 2;
|
||||
|
||||
public TimelineAnalyserService(ILogger<TimelineAnalyserService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public TimelineAnalysisResult Analyse(List<EmploymentEntry> employmentHistory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(employmentHistory);
|
||||
|
||||
if (employmentHistory.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No employment history to analyse");
|
||||
return new TimelineAnalysisResult
|
||||
{
|
||||
TotalGapMonths = 0,
|
||||
TotalOverlapMonths = 0,
|
||||
Gaps = [],
|
||||
Overlaps = []
|
||||
};
|
||||
}
|
||||
|
||||
// Filter entries with valid dates and sort by start date
|
||||
var sortedEmployment = employmentHistory
|
||||
.Where(e => e.StartDate.HasValue)
|
||||
.OrderBy(e => e.StartDate!.Value)
|
||||
.ToList();
|
||||
|
||||
if (sortedEmployment.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No employment entries with valid dates to analyse");
|
||||
return new TimelineAnalysisResult
|
||||
{
|
||||
TotalGapMonths = 0,
|
||||
TotalOverlapMonths = 0,
|
||||
Gaps = [],
|
||||
Overlaps = []
|
||||
};
|
||||
}
|
||||
|
||||
var gaps = DetectGaps(sortedEmployment);
|
||||
var overlaps = DetectOverlaps(sortedEmployment);
|
||||
|
||||
var totalGapMonths = gaps.Sum(g => g.Months);
|
||||
var totalOverlapMonths = overlaps.Sum(o => o.Months);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Timeline analysis complete: {GapCount} gaps ({TotalGapMonths} months), {OverlapCount} overlaps ({TotalOverlapMonths} months)",
|
||||
gaps.Count, totalGapMonths, overlaps.Count, totalOverlapMonths);
|
||||
|
||||
return new TimelineAnalysisResult
|
||||
{
|
||||
TotalGapMonths = totalGapMonths,
|
||||
TotalOverlapMonths = totalOverlapMonths,
|
||||
Gaps = gaps,
|
||||
Overlaps = overlaps
|
||||
};
|
||||
}
|
||||
|
||||
private List<TimelineGap> DetectGaps(List<EmploymentEntry> sortedEmployment)
|
||||
{
|
||||
var gaps = new List<TimelineGap>();
|
||||
|
||||
for (var i = 0; i < sortedEmployment.Count - 1; i++)
|
||||
{
|
||||
var current = sortedEmployment[i];
|
||||
var next = sortedEmployment[i + 1];
|
||||
|
||||
// Get the effective end date for the current position
|
||||
var currentEndDate = GetEffectiveEndDate(current);
|
||||
var nextStartDate = next.StartDate!.Value;
|
||||
|
||||
// Skip if there's no gap or overlap
|
||||
if (currentEndDate >= nextStartDate)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var gapMonths = CalculateMonthsDifference(currentEndDate, nextStartDate);
|
||||
|
||||
// Only report gaps of 3+ months
|
||||
if (gapMonths >= MinimumGapMonths)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Detected {Months} month gap between {EndDate} and {StartDate}",
|
||||
gapMonths, currentEndDate, nextStartDate);
|
||||
|
||||
gaps.Add(new TimelineGap
|
||||
{
|
||||
StartDate = currentEndDate,
|
||||
EndDate = nextStartDate,
|
||||
Months = gapMonths
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return gaps;
|
||||
}
|
||||
|
||||
private List<TimelineOverlap> DetectOverlaps(List<EmploymentEntry> sortedEmployment)
|
||||
{
|
||||
var overlaps = new List<TimelineOverlap>();
|
||||
|
||||
for (var i = 0; i < sortedEmployment.Count; i++)
|
||||
{
|
||||
for (var j = i + 1; j < sortedEmployment.Count; j++)
|
||||
{
|
||||
var earlier = sortedEmployment[i];
|
||||
var later = sortedEmployment[j];
|
||||
|
||||
var overlap = CalculateOverlap(earlier, later);
|
||||
|
||||
if (overlap is not null && overlap.Value.Months > AllowedOverlapMonths)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Detected {Months} month overlap between {Company1} and {Company2}",
|
||||
overlap.Value.Months, earlier.CompanyName, later.CompanyName);
|
||||
|
||||
overlaps.Add(new TimelineOverlap
|
||||
{
|
||||
Company1 = earlier.CompanyName,
|
||||
Company2 = later.CompanyName,
|
||||
OverlapStart = overlap.Value.Start,
|
||||
OverlapEnd = overlap.Value.End,
|
||||
Months = overlap.Value.Months
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
private static (DateOnly Start, DateOnly End, int Months)? CalculateOverlap(
|
||||
EmploymentEntry earlier,
|
||||
EmploymentEntry later)
|
||||
{
|
||||
if (!earlier.StartDate.HasValue || !later.StartDate.HasValue)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var earlierEnd = GetEffectiveEndDate(earlier);
|
||||
var laterStart = later.StartDate.Value;
|
||||
|
||||
// No overlap if earlier job ended before later job started
|
||||
if (earlierEnd <= laterStart)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var laterEnd = GetEffectiveEndDate(later);
|
||||
|
||||
// The overlap period
|
||||
var overlapStart = laterStart;
|
||||
var overlapEnd = earlierEnd < laterEnd ? earlierEnd : laterEnd;
|
||||
|
||||
if (overlapStart >= overlapEnd)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var months = CalculateMonthsDifference(overlapStart, overlapEnd);
|
||||
|
||||
return (overlapStart, overlapEnd, months);
|
||||
}
|
||||
|
||||
private static DateOnly GetEffectiveEndDate(EmploymentEntry entry)
|
||||
{
|
||||
if (entry.EndDate.HasValue)
|
||||
{
|
||||
return entry.EndDate.Value;
|
||||
}
|
||||
|
||||
// If marked as current or no end date, use today
|
||||
return DateOnly.FromDateTime(DateTime.UtcNow);
|
||||
}
|
||||
|
||||
private static int CalculateMonthsDifference(DateOnly startDate, DateOnly endDate)
|
||||
{
|
||||
var yearDiff = endDate.Year - startDate.Year;
|
||||
var monthDiff = endDate.Month - startDate.Month;
|
||||
var totalMonths = (yearDiff * 12) + monthDiff;
|
||||
|
||||
// Add a month if we've passed the day in the month
|
||||
if (endDate.Day >= startDate.Day)
|
||||
{
|
||||
totalMonths++;
|
||||
}
|
||||
|
||||
return Math.Max(0, totalMonths);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user