Add UK education verification and security fixes

Features:
- Add UK institution recognition (170+ universities)
- Add diploma mill detection (100+ blacklisted institutions)
- Add education verification service with date plausibility checks
- Add local file storage option (no Azure required)
- Add default admin user seeding on startup
- Enhance Serilog logging with file output

Security fixes:
- Fix path traversal vulnerability in LocalFileStorageService
- Fix open redirect in login endpoint (use LocalRedirect)
- Fix password validation message (12 chars, not 6)
- Fix login to use HTTP POST endpoint (avoid Blazor cookie issues)

Code improvements:
- Add CancellationToken propagation to CV parser
- Add shared helpers (JsonDefaults, DateHelpers, ScoreThresholds)
- Add IUserContextService for user ID extraction
- Parallelized company verification in ProcessCVCheckJob
- Add 28 unit tests for education verification

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 16:45:43 +01:00
parent c6d52a38b2
commit f1ccd217d8
35 changed files with 1791 additions and 415 deletions

View File

@@ -3,6 +3,7 @@ using Hangfire;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.DTOs;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
@@ -139,7 +140,7 @@ public sealed class CVCheckService : ICVCheckService
try
{
var report = JsonSerializer.Deserialize<VeracityReport>(cvCheck.ReportJson);
var report = JsonSerializer.Deserialize<VeracityReport>(cvCheck.ReportJson, JsonDefaults.CamelCase);
return report;
}
catch (JsonException ex)

View File

@@ -6,6 +6,7 @@ using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Infrastructure.Configuration;
@@ -18,12 +19,6 @@ public sealed class CVParserService : ICVParserService
private readonly AnthropicClient _anthropicClient;
private readonly ILogger<CVParserService> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
PropertyNameCaseInsensitive = true
};
private const string SystemPrompt = """
You are a CV/Resume parser. Your task is to extract structured information from CV text.
You must respond ONLY with valid JSON, no other text or markdown.
@@ -80,14 +75,14 @@ public sealed class CVParserService : ICVParserService
_anthropicClient = new AnthropicClient(settings.Value.ApiKey);
}
public async Task<CVData> ParseAsync(Stream fileStream, string fileName)
public async Task<CVData> ParseAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(fileStream);
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
_logger.LogDebug("Parsing CV file: {FileName}", fileName);
var text = await ExtractTextAsync(fileStream, fileName);
var text = await ExtractTextAsync(fileStream, fileName, cancellationToken);
if (string.IsNullOrWhiteSpace(text))
{
@@ -97,7 +92,7 @@ public sealed class CVParserService : ICVParserService
_logger.LogDebug("Extracted {CharCount} characters from {FileName}", text.Length, fileName);
var cvData = await ParseWithClaudeAsync(text);
var cvData = await ParseWithClaudeAsync(text, cancellationToken);
_logger.LogInformation(
"Successfully parsed CV for {FullName} with {EmploymentCount} employment entries and {EducationCount} education entries",
@@ -108,23 +103,23 @@ public sealed class CVParserService : ICVParserService
return cvData;
}
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName)
private async Task<string> ExtractTextAsync(Stream fileStream, string fileName, CancellationToken cancellationToken)
{
var extension = Path.GetExtension(fileName).ToLowerInvariant();
return extension switch
{
".pdf" => await ExtractTextFromPdfAsync(fileStream),
".pdf" => await ExtractTextFromPdfAsync(fileStream, cancellationToken),
".docx" => ExtractTextFromDocx(fileStream),
_ => throw new NotSupportedException($"File type '{extension}' is not supported. Only PDF and DOCX files are accepted.")
};
}
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream)
private async Task<string> ExtractTextFromPdfAsync(Stream fileStream, CancellationToken cancellationToken)
{
// Copy stream to memory for PdfPig (requires seekable stream)
using var memoryStream = new MemoryStream();
await fileStream.CopyToAsync(memoryStream);
await fileStream.CopyToAsync(memoryStream, cancellationToken);
memoryStream.Position = 0;
using var document = PdfDocument.Open(memoryStream);
@@ -132,6 +127,7 @@ public sealed class CVParserService : ICVParserService
foreach (var page in document.GetPages())
{
cancellationToken.ThrowIfCancellationRequested();
var pageText = page.Text;
textBuilder.AppendLine(pageText);
}
@@ -163,7 +159,7 @@ public sealed class CVParserService : ICVParserService
return textBuilder.ToString();
}
private async Task<CVData> ParseWithClaudeAsync(string cvText)
private async Task<CVData> ParseWithClaudeAsync(string cvText, CancellationToken cancellationToken)
{
var prompt = ExtractionPrompt.Replace("{CV_TEXT}", cvText);
@@ -182,7 +178,7 @@ public sealed class CVParserService : ICVParserService
_logger.LogDebug("Sending CV text to Claude API for parsing");
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters);
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
var responseText = response.Content
.OfType<TextContent>()
@@ -201,7 +197,7 @@ public sealed class CVParserService : ICVParserService
try
{
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonOptions);
var parsedResponse = JsonSerializer.Deserialize<ClaudeCVResponse>(responseText, JsonDefaults.CamelCase);
if (parsedResponse is null)
{
@@ -251,8 +247,8 @@ public sealed class CVParserService : ICVParserService
CompanyName = e.CompanyName ?? "Unknown Company",
JobTitle = e.JobTitle ?? "Unknown Position",
Location = e.Location,
StartDate = ParseDate(e.StartDate),
EndDate = ParseDate(e.EndDate),
StartDate = DateHelpers.ParseDate(e.StartDate),
EndDate = DateHelpers.ParseDate(e.EndDate),
IsCurrent = e.IsCurrent ?? false,
Description = e.Description
}).ToList() ?? [],
@@ -262,28 +258,13 @@ public sealed class CVParserService : ICVParserService
Qualification = e.Qualification,
Subject = e.Subject,
Grade = e.Grade,
StartDate = ParseDate(e.StartDate),
EndDate = ParseDate(e.EndDate)
StartDate = DateHelpers.ParseDate(e.StartDate),
EndDate = DateHelpers.ParseDate(e.EndDate)
}).ToList() ?? [],
Skills = response.Skills ?? []
};
}
private static DateOnly? ParseDate(string? dateString)
{
if (string.IsNullOrWhiteSpace(dateString))
{
return null;
}
if (DateOnly.TryParse(dateString, out var date))
{
return date;
}
return null;
}
// Internal DTOs for Claude response parsing
private sealed record ClaudeCVResponse
{

View File

@@ -2,6 +2,7 @@ using FuzzySharp;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using TrueCV.Application.DTOs;
using TrueCV.Application.Helpers;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
@@ -113,7 +114,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
CompanyStatus = item.CompanyStatus ?? "Unknown",
IncorporationDate = ParseDate(item.DateOfCreation),
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
AddressSnippet = item.AddressSnippet
}).ToList();
}
@@ -166,8 +167,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
{
existingCache.CompanyName = item.Title;
existingCache.Status = item.CompanyStatus ?? "Unknown";
existingCache.IncorporationDate = ParseDate(item.DateOfCreation);
existingCache.DissolutionDate = ParseDate(item.DateOfCessation);
existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation);
existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation);
existingCache.CachedAt = DateTime.UtcNow;
}
else
@@ -177,8 +178,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
CompanyNumber = item.CompanyNumber,
CompanyName = item.Title,
Status = item.CompanyStatus ?? "Unknown",
IncorporationDate = ParseDate(item.DateOfCreation),
DissolutionDate = ParseDate(item.DateOfCessation),
IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation),
DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation),
CachedAt = DateTime.UtcNow
};
@@ -230,18 +231,4 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
};
}
private static DateOnly? ParseDate(string? dateString)
{
if (string.IsNullOrWhiteSpace(dateString))
{
return null;
}
if (DateOnly.TryParse(dateString, out var date))
{
return date;
}
return null;
}
}

View File

@@ -0,0 +1,267 @@
using TrueCV.Application.Data;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
namespace TrueCV.Infrastructure.Services;
public class EducationVerifierService : IEducationVerifierService
{
private const int MinimumDegreeYears = 1;
private const int MaximumDegreeYears = 8;
private const int MinimumGraduationAge = 18;
public EducationVerificationResult Verify(EducationEntry education)
{
var institution = education.Institution;
// Check for diploma mill first (highest priority flag)
if (DiplomaMills.IsDiplomaMill(institution))
{
return new EducationVerificationResult
{
ClaimedInstitution = institution,
Status = "DiplomaMill",
IsVerified = false,
IsDiplomaMill = true,
IsSuspicious = true,
VerificationNotes = "Institution is on the diploma mill blacklist",
ClaimedStartDate = education.StartDate,
ClaimedEndDate = education.EndDate,
DatesArePlausible = true,
ClaimedQualification = education.Qualification,
ClaimedSubject = education.Subject
};
}
// Check for suspicious patterns
if (DiplomaMills.HasSuspiciousPattern(institution))
{
return new EducationVerificationResult
{
ClaimedInstitution = institution,
Status = "Suspicious",
IsVerified = false,
IsDiplomaMill = false,
IsSuspicious = true,
VerificationNotes = "Institution name contains suspicious patterns common in diploma mills",
ClaimedStartDate = education.StartDate,
ClaimedEndDate = education.EndDate,
DatesArePlausible = true,
ClaimedQualification = education.Qualification,
ClaimedSubject = education.Subject
};
}
// Check if it's a recognised UK institution
var officialName = UKInstitutions.GetOfficialName(institution);
if (officialName != null)
{
var (datesPlausible, dateNotes) = CheckDatePlausibility(education.StartDate, education.EndDate);
return new EducationVerificationResult
{
ClaimedInstitution = institution,
MatchedInstitution = officialName,
Status = "Recognised",
IsVerified = true,
IsDiplomaMill = false,
IsSuspicious = false,
VerificationNotes = institution.Equals(officialName, StringComparison.OrdinalIgnoreCase)
? "Verified UK higher education institution"
: $"Matched to official name: {officialName}",
ClaimedStartDate = education.StartDate,
ClaimedEndDate = education.EndDate,
DatesArePlausible = datesPlausible,
DatePlausibilityNotes = dateNotes,
ClaimedQualification = education.Qualification,
ClaimedSubject = education.Subject
};
}
// Not in our database - could be international or unrecognised
return new EducationVerificationResult
{
ClaimedInstitution = institution,
Status = "Unknown",
IsVerified = false,
IsDiplomaMill = false,
IsSuspicious = false,
VerificationNotes = "Institution not found in UK recognised institutions database. May be an international institution.",
ClaimedStartDate = education.StartDate,
ClaimedEndDate = education.EndDate,
DatesArePlausible = true,
ClaimedQualification = education.Qualification,
ClaimedSubject = education.Subject
};
}
public List<EducationVerificationResult> VerifyAll(
List<EducationEntry> education,
List<EmploymentEntry>? employment = null)
{
var results = new List<EducationVerificationResult>();
foreach (var edu in education)
{
var result = Verify(edu);
// If we have employment data, check for timeline issues
if (employment?.Count > 0 && result.ClaimedEndDate.HasValue)
{
var (timelinePlausible, timelineNotes) = CheckEducationEmploymentTimeline(
result.ClaimedEndDate.Value,
employment);
if (!timelinePlausible)
{
result = result with
{
DatesArePlausible = false,
DatePlausibilityNotes = CombineNotes(result.DatePlausibilityNotes, timelineNotes)
};
}
}
results.Add(result);
}
// Check for overlapping education periods
CheckOverlappingEducation(results);
return results;
}
private static (bool isPlausible, string? notes) CheckDatePlausibility(DateOnly? startDate, DateOnly? endDate)
{
if (!startDate.HasValue || !endDate.HasValue)
{
return (true, null);
}
var start = startDate.Value;
var end = endDate.Value;
// End date should be after start date
if (end <= start)
{
return (false, "End date is before or equal to start date");
}
// Check course duration is reasonable
var years = (end.ToDateTime(TimeOnly.MinValue) - start.ToDateTime(TimeOnly.MinValue)).TotalDays / 365.25;
if (years < MinimumDegreeYears)
{
return (false, $"Course duration ({years:F1} years) is unusually short for a degree");
}
if (years > MaximumDegreeYears)
{
return (false, $"Course duration ({years:F1} years) is unusually long");
}
// Check if graduation date is in the future
if (end > DateOnly.FromDateTime(DateTime.UtcNow))
{
return (true, "Graduation date is in the future - possibly currently studying");
}
return (true, null);
}
private static (bool isPlausible, string? notes) CheckEducationEmploymentTimeline(
DateOnly graduationDate,
List<EmploymentEntry> employment)
{
// Find the earliest employment start date
var earliestEmployment = employment
.Where(e => e.StartDate.HasValue)
.OrderBy(e => e.StartDate)
.FirstOrDefault();
if (earliestEmployment?.StartDate == null)
{
return (true, null);
}
var employmentStart = earliestEmployment.StartDate.Value;
// If someone claims to have started full-time work significantly before graduating,
// that's suspicious (unless it's clearly an internship/part-time role)
var monthsBeforeGraduation = (graduationDate.ToDateTime(TimeOnly.MinValue) -
employmentStart.ToDateTime(TimeOnly.MinValue)).TotalDays / 30;
if (monthsBeforeGraduation > 24) // More than 2 years before graduation
{
var isLikelyInternship = earliestEmployment.JobTitle.Contains("intern", StringComparison.OrdinalIgnoreCase) ||
earliestEmployment.JobTitle.Contains("placement", StringComparison.OrdinalIgnoreCase) ||
earliestEmployment.JobTitle.Contains("trainee", StringComparison.OrdinalIgnoreCase);
if (!isLikelyInternship)
{
return (false, $"Employment at {earliestEmployment.CompanyName} started {monthsBeforeGraduation:F0} months before claimed graduation");
}
}
return (true, null);
}
private static void CheckOverlappingEducation(List<EducationVerificationResult> results)
{
var datedResults = results
.Where(r => r.ClaimedStartDate.HasValue && r.ClaimedEndDate.HasValue)
.ToList();
for (var i = 0; i < datedResults.Count; i++)
{
for (var j = i + 1; j < datedResults.Count; j++)
{
var edu1 = datedResults[i];
var edu2 = datedResults[j];
if (PeriodsOverlap(
edu1.ClaimedStartDate!.Value, edu1.ClaimedEndDate!.Value,
edu2.ClaimedStartDate!.Value, edu2.ClaimedEndDate!.Value))
{
// Find the actual index in the original results list
var idx1 = results.IndexOf(edu1);
var idx2 = results.IndexOf(edu2);
if (idx1 >= 0)
{
results[idx1] = edu1 with
{
DatePlausibilityNotes = CombineNotes(
edu1.DatePlausibilityNotes,
$"Overlaps with education at {edu2.ClaimedInstitution}")
};
}
if (idx2 >= 0)
{
results[idx2] = edu2 with
{
DatePlausibilityNotes = CombineNotes(
edu2.DatePlausibilityNotes,
$"Overlaps with education at {edu1.ClaimedInstitution}")
};
}
}
}
}
}
private static bool PeriodsOverlap(DateOnly start1, DateOnly end1, DateOnly start2, DateOnly end2)
{
return start1 < end2 && start2 < end1;
}
private static string? CombineNotes(string? existing, string? additional)
{
if (string.IsNullOrEmpty(additional))
return existing;
if (string.IsNullOrEmpty(existing))
return additional;
return $"{existing}; {additional}";
}
}

View File

@@ -68,11 +68,15 @@ public sealed class FileStorageService : IFileStorageService
var blobClient = _containerClient.GetBlobClient(blobName);
var response = await blobClient.DownloadStreamingAsync();
// Download to memory stream to ensure proper resource management
// The caller will own and dispose this stream
var memoryStream = new MemoryStream();
await blobClient.DownloadToAsync(memoryStream);
memoryStream.Position = 0;
_logger.LogDebug("Successfully downloaded blob {BlobName}", blobName);
return response.Value.Content;
return memoryStream;
}
public async Task DeleteAsync(string blobUrl)
@@ -99,12 +103,21 @@ public sealed class FileStorageService : IFileStorageService
private static string ExtractBlobNameFromUrl(string blobUrl)
{
var uri = new Uri(blobUrl);
if (!Uri.TryCreate(blobUrl, UriKind.Absolute, out var uri))
{
throw new ArgumentException($"Invalid blob URL format: '{blobUrl}'", nameof(blobUrl));
}
var segments = uri.Segments;
// The blob name is the last segment after the container name
// URL format: https://account.blob.core.windows.net/container/blobname
return segments.Length > 2 ? segments[^1] : throw new ArgumentException("Invalid blob URL", nameof(blobUrl));
if (segments.Length <= 2)
{
throw new ArgumentException($"Blob URL does not contain a valid blob name: '{blobUrl}'", nameof(blobUrl));
}
return segments[^1];
}
private static string GetContentType(string extension)

View File

@@ -0,0 +1,117 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using TrueCV.Application.Interfaces;
using TrueCV.Infrastructure.Configuration;
namespace TrueCV.Infrastructure.Services;
public sealed class LocalFileStorageService : IFileStorageService
{
private readonly string _storagePath;
private readonly ILogger<LocalFileStorageService> _logger;
public LocalFileStorageService(
IOptions<LocalStorageSettings> settings,
ILogger<LocalFileStorageService> logger)
{
_logger = logger;
_storagePath = settings.Value.StoragePath;
if (!Directory.Exists(_storagePath))
{
Directory.CreateDirectory(_storagePath);
_logger.LogInformation("Created local storage directory: {Path}", _storagePath);
}
}
public async Task<string> UploadAsync(Stream fileStream, string fileName)
{
ArgumentNullException.ThrowIfNull(fileStream);
ArgumentException.ThrowIfNullOrWhiteSpace(fileName);
var extension = Path.GetExtension(fileName);
var uniqueFileName = $"{Guid.NewGuid()}{extension}";
var filePath = Path.Combine(_storagePath, uniqueFileName);
_logger.LogDebug("Uploading file {FileName} to {FilePath}", fileName, filePath);
await using var fileStreamOut = new FileStream(filePath, FileMode.Create, FileAccess.Write);
await fileStream.CopyToAsync(fileStreamOut);
// Return a file:// URL for local storage
var fileUrl = $"file://{filePath}";
_logger.LogInformation("Successfully uploaded file {FileName} to {FileUrl}", fileName, fileUrl);
return fileUrl;
}
public async Task<Stream> DownloadAsync(string blobUrl)
{
ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl);
var filePath = ExtractFilePathFromUrl(blobUrl);
_logger.LogDebug("Downloading file from {FilePath}", filePath);
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"File not found: {filePath}");
}
var memoryStream = new MemoryStream();
await using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
await fileStream.CopyToAsync(memoryStream);
memoryStream.Position = 0;
_logger.LogDebug("Successfully downloaded file from {FilePath}", filePath);
return memoryStream;
}
public Task DeleteAsync(string blobUrl)
{
ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl);
var filePath = ExtractFilePathFromUrl(blobUrl);
_logger.LogDebug("Deleting file {FilePath}", filePath);
if (File.Exists(filePath))
{
File.Delete(filePath);
_logger.LogInformation("Successfully deleted file {FilePath}", filePath);
}
else
{
_logger.LogWarning("File {FilePath} did not exist when attempting to delete", filePath);
}
return Task.CompletedTask;
}
private string ExtractFilePathFromUrl(string fileUrl)
{
string filePath;
if (fileUrl.StartsWith("file://", StringComparison.OrdinalIgnoreCase))
{
filePath = fileUrl[7..];
}
else
{
filePath = fileUrl;
}
// Resolve to absolute path and validate it's within storage directory
var fullPath = Path.GetFullPath(filePath);
var storagePath = Path.GetFullPath(_storagePath);
if (!fullPath.StartsWith(storagePath, StringComparison.OrdinalIgnoreCase))
{
throw new UnauthorizedAccessException($"Access denied: path is outside storage directory");
}
return fullPath;
}
}

View File

@@ -0,0 +1,28 @@
using System.Security.Claims;
using Microsoft.AspNetCore.Components.Authorization;
using TrueCV.Application.Interfaces;
namespace TrueCV.Infrastructure.Services;
public sealed class UserContextService : IUserContextService
{
private readonly AuthenticationStateProvider _authenticationStateProvider;
public UserContextService(AuthenticationStateProvider authenticationStateProvider)
{
_authenticationStateProvider = authenticationStateProvider;
}
public async Task<Guid?> GetCurrentUserIdAsync()
{
var authState = await _authenticationStateProvider.GetAuthenticationStateAsync();
var userIdClaim = authState.User.FindFirst(ClaimTypes.NameIdentifier)?.Value;
if (string.IsNullOrEmpty(userIdClaim) || !Guid.TryParse(userIdClaim, out var userId))
{
return null;
}
return userId;
}
}