Add UK education verification and security fixes

Features:
- Add UK institution recognition (170+ universities)
- Add diploma mill detection (100+ blacklisted institutions)
- Add education verification service with date plausibility checks
- Add local file storage option (no Azure required)
- Add default admin user seeding on startup
- Enhance Serilog logging with file output

Security fixes:
- Fix path traversal vulnerability in LocalFileStorageService
- Fix open redirect in login endpoint (use LocalRedirect)
- Fix password validation message (12 chars, not 6)
- Fix login to use HTTP POST endpoint (avoid Blazor cookie issues)

Code improvements:
- Add CancellationToken propagation to CV parser
- Add shared helpers (JsonDefaults, DateHelpers, ScoreThresholds)
- Add IUserContextService for user ID extraction
- Parallelized company verification in ProcessCVCheckJob
- Add 28 unit tests for education verification

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 16:45:43 +01:00
parent c6d52a38b2
commit f1ccd217d8
35 changed files with 1791 additions and 415 deletions

View File

@@ -6,6 +6,7 @@ using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Infrastructure.Configuration;
@@ -18,12 +19,6 @@ public sealed class CVParserService : ICVParserService
private readonly AnthropicClient _anthropicClient;
private readonly ILogger<CVParserService> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
PropertyNameCaseInsensitive = true
};
private const string SystemPrompt = """
You are a CV/Resume parser. Your task is to extract structured information from CV text.
You must respond ONLY with valid JSON, no other text or markdown.
@@ -80,14 +75,14 @@ public sealed class CVParserService : ICVParserService
_anthropicClient = new AnthropicClient(settings.Value.ApiKey);
}
public async Task<CVData> ParseAsync(Stream fileStream, string fileName)
public async Task<CVData> ParseAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(fileStream);
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
_logger.LogDebug("Parsing CV file: {FileName}", fileName);
var text = await ExtractTextAsync(fileStream, fileName);
var text = await ExtractTextAsync(fileStream, fileName, cancellationToken);
if (string.IsNullOrWhiteSpace(text))
{
@@ -97,7 +92,7 @@ public sealed class CVParserService : ICVParserService
_logger.LogDebug("Extracted {CharCount} characters from {FileName}", text.Length, fileName);
var cvData = await ParseWithClaudeAsync(text);
var cvData = await ParseWithClaudeAsync(text, cancellationToken);
_logger.LogInformation(
"Successfully parsed CV for {FullName} with {EmploymentCount} employment entries and {EducationCount} education entries",
@@ -108,23 +103,23 @@ public sealed class CVParserService : ICVParserService
return cvData;
}
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName)
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName, CancellationToken cancellationToken)
{
var extension = Path.GetExtension(fileName).ToLowerInvariant();
return extension switch
{
".pdf" => await ExtractTextFromPdfAsync(fileStream),
".pdf" => await ExtractTextFromPdfAsync(fileStream, cancellationToken),
".docx" => ExtractTextFromDocx(fileStream),
_ => throw new NotSupportedException($"File type '{extension}' is not supported. Only PDF and DOCX files are accepted.")
};
}
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream)
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream, CancellationToken cancellationToken)
{
// Copy stream to memory for PdfPig (requires seekable stream)
using var memoryStream = new MemoryStream();
await fileStream.CopyToAsync(memoryStream);
await fileStream.CopyToAsync(memoryStream, cancellationToken);
memoryStream.Position = 0;
using var document = PdfDocument.Open(memoryStream);
@@ -132,6 +127,7 @@ public sealed class CVParserService : ICVParserService
foreach (var page in document.GetPages())
{
cancellationToken.ThrowIfCancellationRequested();
var pageText = page.Text;
textBuilder.AppendLine(pageText);
}
@@ -163,7 +159,7 @@ public sealed class CVParserService : ICVParserService
return textBuilder.ToString();
}
private async Task<CVData> ParseWithClaudeAsync(string cvText)
private async Task<CVData> ParseWithClaudeAsync(string cvText, CancellationToken cancellationToken)
{
var prompt = ExtractionPrompt.Replace("{CV_TEXT}", cvText);
@@ -182,7 +178,7 @@ public sealed class CVParserService : ICVParserService
_logger.LogDebug("Sending CV text to Claude API for parsing");
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters);
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
var responseText = response.Content
.OfType<TextContent>()
@@ -201,7 +197,7 @@ public sealed class CVParserService : ICVParserService
try
{
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonOptions);
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonDefaults.CamelCase);
if (parsedResponse is null)
{
@@ -251,8 +247,8 @@ public sealed class CVParserService : ICVParserService
CompanyName = e.CompanyName ?? "Unknown Company",
JobTitle = e.JobTitle ?? "Unknown Position",
Location = e.Location,
StartDate = ParseDate(e.StartDate),
EndDate = ParseDate(e.EndDate),
StartDate = DateHelpers.ParseDate(e.StartDate),
EndDate = DateHelpers.ParseDate(e.EndDate),
IsCurrent = e.IsCurrent ?? false,
Description = e.Description
}).ToList() ?? [],
@@ -262,28 +258,13 @@ public sealed class CVParserService : ICVParserService
Qualification = e.Qualification,
Subject = e.Subject,
Grade = e.Grade,
StartDate = ParseDate(e.StartDate),
EndDate = ParseDate(e.EndDate)
StartDate = DateHelpers.ParseDate(e.StartDate),
EndDate = DateHelpers.ParseDate(e.EndDate)
}).ToList() ?? [],
Skills = response.Skills ?? []
};
}
private static DateOnly? ParseDate(string? dateString)
{
if (string.IsNullOrWhiteSpace(dateString))
{
return null;
}
if (DateOnly.TryParse(dateString, out var date))
{
return date;
}
return null;
}
// Internal DTOs for Claude response parsing
private sealed record ClaudeCVResponse
{