Add UK education verification and security fixes
Features: - Add UK institution recognition (170+ universities) - Add diploma mill detection (100+ blacklisted institutions) - Add education verification service with date plausibility checks - Add local file storage option (no Azure required) - Add default admin user seeding on startup - Enhance Serilog logging with file output Security fixes: - Fix path traversal vulnerability in LocalFileStorageService - Fix open redirect in login endpoint (use LocalRedirect) - Fix password validation message (12 chars, not 6) - Fix login to use HTTP POST endpoint (avoid Blazor cookie issues) Code improvements: - Add CancellationToken propagation to CV parser - Add shared helpers (JsonDefaults, DateHelpers, ScoreThresholds) - Add IUserContextService for user ID extraction - Parallelized company verification in ProcessCVCheckJob - Add 28 unit tests for education verification Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ using DocumentFormat.OpenXml.Packaging;
|
||||
using DocumentFormat.OpenXml.Wordprocessing;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using TrueCV.Application.Helpers;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Infrastructure.Configuration;
|
||||
@@ -18,12 +19,6 @@ public sealed class CVParserService : ICVParserService
|
||||
private readonly AnthropicClient _anthropicClient;
|
||||
private readonly ILogger<CVParserService> _logger;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
private const string SystemPrompt = """
|
||||
You are a CV/Resume parser. Your task is to extract structured information from CV text.
|
||||
You must respond ONLY with valid JSON, no other text or markdown.
|
||||
@@ -80,14 +75,14 @@ public sealed class CVParserService : ICVParserService
|
||||
_anthropicClient = new AnthropicClient(settings.Value.ApiKey);
|
||||
}
|
||||
|
||||
public async Task<CVData> ParseAsync(Stream fileStream, string fileName)
|
||||
public async Task<CVData> ParseAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(fileStream);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
|
||||
|
||||
_logger.LogDebug("Parsing CV file: {FileName}", fileName);
|
||||
|
||||
var text = await ExtractTextAsync(fileStream, fileName);
|
||||
var text = await ExtractTextAsync(fileStream, fileName, cancellationToken);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
@@ -97,7 +92,7 @@ public sealed class CVParserService : ICVParserService
|
||||
|
||||
_logger.LogDebug("Extracted {CharCount} characters from {FileName}", text.Length, fileName);
|
||||
|
||||
var cvData = await ParseWithClaudeAsync(text);
|
||||
var cvData = await ParseWithClaudeAsync(text, cancellationToken);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Successfully parsed CV for {FullName} with {EmploymentCount} employment entries and {EducationCount} education entries",
|
||||
@@ -108,23 +103,23 @@ public sealed class CVParserService : ICVParserService
|
||||
return cvData;
|
||||
}
|
||||
|
||||
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName)
|
||||
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName, CancellationToken cancellationToken)
|
||||
{
|
||||
var extension = Path.GetExtension(fileName).ToLowerInvariant();
|
||||
|
||||
return extension switch
|
||||
{
|
||||
".pdf" => await ExtractTextFromPdfAsync(fileStream),
|
||||
".pdf" => await ExtractTextFromPdfAsync(fileStream, cancellationToken),
|
||||
".docx" => ExtractTextFromDocx(fileStream),
|
||||
_ => throw new NotSupportedException($"File type '{extension}' is not supported. Only PDF and DOCX files are accepted.")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream)
|
||||
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream, CancellationToken cancellationToken)
|
||||
{
|
||||
// Copy stream to memory for PdfPig (requires seekable stream)
|
||||
using var memoryStream = new MemoryStream();
|
||||
await fileStream.CopyToAsync(memoryStream);
|
||||
await fileStream.CopyToAsync(memoryStream, cancellationToken);
|
||||
memoryStream.Position = 0;
|
||||
|
||||
using var document = PdfDocument.Open(memoryStream);
|
||||
@@ -132,6 +127,7 @@ public sealed class CVParserService : ICVParserService
|
||||
|
||||
foreach (var page in document.GetPages())
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
var pageText = page.Text;
|
||||
textBuilder.AppendLine(pageText);
|
||||
}
|
||||
@@ -163,7 +159,7 @@ public sealed class CVParserService : ICVParserService
|
||||
return textBuilder.ToString();
|
||||
}
|
||||
|
||||
private async Task<CVData> ParseWithClaudeAsync(string cvText)
|
||||
private async Task<CVData> ParseWithClaudeAsync(string cvText, CancellationToken cancellationToken)
|
||||
{
|
||||
var prompt = ExtractionPrompt.Replace("{CV_TEXT}", cvText);
|
||||
|
||||
@@ -182,7 +178,7 @@ public sealed class CVParserService : ICVParserService
|
||||
|
||||
_logger.LogDebug("Sending CV text to Claude API for parsing");
|
||||
|
||||
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters);
|
||||
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
|
||||
|
||||
var responseText = response.Content
|
||||
.OfType<TextContent>()
|
||||
@@ -201,7 +197,7 @@ public sealed class CVParserService : ICVParserService
|
||||
|
||||
try
|
||||
{
|
||||
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonOptions);
|
||||
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonDefaults.CamelCase);
|
||||
|
||||
if (parsedResponse is null)
|
||||
{
|
||||
@@ -251,8 +247,8 @@ public sealed class CVParserService : ICVParserService
|
||||
CompanyName = e.CompanyName ?? "Unknown Company",
|
||||
JobTitle = e.JobTitle ?? "Unknown Position",
|
||||
Location = e.Location,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate),
|
||||
StartDate = DateHelpers.ParseDate(e.StartDate),
|
||||
EndDate = DateHelpers.ParseDate(e.EndDate),
|
||||
IsCurrent = e.IsCurrent ?? false,
|
||||
Description = e.Description
|
||||
}).ToList() ?? [],
|
||||
@@ -262,28 +258,13 @@ public sealed class CVParserService : ICVParserService
|
||||
Qualification = e.Qualification,
|
||||
Subject = e.Subject,
|
||||
Grade = e.Grade,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate)
|
||||
StartDate = DateHelpers.ParseDate(e.StartDate),
|
||||
EndDate = DateHelpers.ParseDate(e.EndDate)
|
||||
}).ToList() ?? [],
|
||||
Skills = response.Skills ?? []
|
||||
};
|
||||
}
|
||||
|
||||
private static DateOnly? ParseDate(string? dateString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(dateString))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (DateOnly.TryParse(dateString, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Internal DTOs for Claude response parsing
|
||||
private sealed record ClaudeCVResponse
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user