diff --git a/.gitignore b/.gitignore index 470bfd8..bcfd575 100644 --- a/.gitignore +++ b/.gitignore @@ -218,3 +218,7 @@ local/ *.tmp *.temp *.swp + +# Local file uploads +src/TrueCV.Web/uploads/ +logs/ diff --git a/Directory.Build.props b/Directory.Build.props new file mode 100644 index 0000000..700482c --- /dev/null +++ b/Directory.Build.props @@ -0,0 +1,7 @@ + + + false + false + false + + diff --git a/src/TrueCV.Application/Data/DiplomaMills.cs b/src/TrueCV.Application/Data/DiplomaMills.cs new file mode 100644 index 0000000..e07d0a4 --- /dev/null +++ b/src/TrueCV.Application/Data/DiplomaMills.cs @@ -0,0 +1,210 @@ +namespace TrueCV.Application.Data; + +/// +/// Known diploma mills and fake educational institutions. +/// Sources: HEDD, Oregon ODA, UNESCO warnings, Michigan AG list +/// +public static class DiplomaMills +{ + /// + /// Known diploma mills and unaccredited institutions that sell fake degrees. + /// This list includes institutions identified by various regulatory bodies. + /// + public static readonly HashSet KnownDiplomaMills = new(StringComparer.OrdinalIgnoreCase) + { + // Well-known diploma mills + "Almeda University", + "Ashwood University", + "Belford University", + "Bircham University", + "Breyer State University", + "Brighton University (not Brighton UK)", + "Buxton University", + "Cambridge State University", + "Chadwick University", + "Clayton University", + "Columbus University", + "Corllins University", + "Dartington University", + "Dickinson State University Online", + "Fairfax University", + "Glendale University", + "Greenleaf University", + "Hamilton University", + "Harrington University", + "Hill University", + "Hollywood University", + "International University (generic)", + "Irish International University", + "James Monroe University", + "Jamestown University", + "Kennedy-Western University", + "Kensington University", + "Knightsbridge University", + "LaSalle University (Louisiana)", + "Lexington University", + "Lincoln University (if not Pennsylvania)", + "Madison University", + "Metropolitan University (generic)", + "Middletown University", + "Monticello University", + "Northern University", + "Northfield University", + "Pacific Southern University", + "Pacific Western University", + "Paramount University", + "Parkwood University", + "Preston University", + "Redding University", + "Richmond University (not American Intl)", + "Robertstown University", + "Rochdale University", + "Rochville University", + "Saint Regis University", + "St Regis University", + "Shaftesbury University", + "Shelbourne University", + "Stanton University", + "Stratford University (if unaccredited)", + "Suffield University", + "Summit University (diploma mill)", + "Sussex College of Technology", + "Trinity College and University", + "Trinity Southern University", + "University Degree Program", + "University of Atlanta", + "University of Berkley", + "University of Devonshire", + "University of Dunham", + "University of England", + "University of Northern Washington", + "University of Palmers Green", + "University of San Moritz", + "University of Sussex (fake - not real Sussex)", + "University of Wexford", + "Vocational University", + "Warnborough University", + "Washington International University", + "Weston Reserve University", + "Westbourne University", + "Western States University", + "Woodfield University", + "Yorker International University", + + // Pakistani diploma mills commonly seen in UK + "Axact University", + "Brooklyn Park University", + "Columbiana University", + "Hillford University", + "Nixon University", + "Oxbridge University", + "University of Newford", + + // Online diploma mills + "American World University", + "Ashford University (pre-2005)", + "Concordia College and University", + "Columbus State University (fake)", + "Frederick Taylor University", + "International Theological University", + "Nations University", + "Paramount California University", + "University of Ancient Studies", + "University of Asia", + "Virtual University (unaccredited)", + }; + + /// + /// Suspicious patterns in institution names that often indicate diploma mills. + /// + public static readonly string[] SuspiciousPatterns = + [ + "online university", + "virtual university", + "life experience", + "no classes required", + "degree in days", + "accredited by", // followed by fake accreditor + "internationally recognised", + "worldwide university", + "global university", + "premier university", + "elite university", + "executive university", + "professional university", + "distance learning university", // be careful - some are legit + ]; + + /// + /// Fake accreditation bodies used by diploma mills. + /// + public static readonly HashSet FakeAccreditors = new(StringComparer.OrdinalIgnoreCase) + { + "World Association of Universities and Colleges", + "WAUC", + "International Accreditation Agency", + "Universal Accreditation Council", + "Board of Online Universities Accreditation", + "International Council for Open and Distance Education", + "World Online Education Accrediting Commission", + "Central States Consortium of Colleges and Schools", + "American Council of Private Colleges and Universities", + "Association of Distance Learning Programs", + "International Distance Education Certification Agency", + }; + + /// + /// Check if an institution is a known diploma mill. + /// + public static bool IsDiplomaMill(string institutionName) + { + if (string.IsNullOrWhiteSpace(institutionName)) + return false; + + var normalised = institutionName.Trim(); + + // Direct match + if (KnownDiplomaMills.Contains(normalised)) + return true; + + // Check if name contains known diploma mill + foreach (var mill in KnownDiplomaMills) + { + if (normalised.Contains(mill, StringComparison.OrdinalIgnoreCase)) + return true; + } + + return false; + } + + /// + /// Check if institution name has suspicious patterns common in diploma mills. + /// Returns true if suspicious (but not confirmed fake). + /// + public static bool HasSuspiciousPattern(string institutionName) + { + if (string.IsNullOrWhiteSpace(institutionName)) + return false; + + var lower = institutionName.ToLowerInvariant(); + + foreach (var pattern in SuspiciousPatterns) + { + if (lower.Contains(pattern)) + return true; + } + + return false; + } + + /// + /// Check if an accreditor is known to be fake. + /// + public static bool IsFakeAccreditor(string accreditorName) + { + if (string.IsNullOrWhiteSpace(accreditorName)) + return false; + + return FakeAccreditors.Contains(accreditorName.Trim()); + } +} diff --git a/src/TrueCV.Application/Data/UKInstitutions.cs b/src/TrueCV.Application/Data/UKInstitutions.cs new file mode 100644 index 0000000..dc88f0b --- /dev/null +++ b/src/TrueCV.Application/Data/UKInstitutions.cs @@ -0,0 +1,285 @@ +namespace TrueCV.Application.Data; + +/// +/// List of recognised UK higher education institutions. +/// Source: GOV.UK Register of Higher Education Providers +/// +public static class UKInstitutions +{ + /// + /// Recognised UK universities and higher education providers. + /// These are legitimate degree-awarding institutions. + /// + public static readonly HashSet RecognisedInstitutions = new(StringComparer.OrdinalIgnoreCase) + { + // Russell Group Universities + "University of Birmingham", + "University of Bristol", + "University of Cambridge", + "Cardiff University", + "Durham University", + "University of Edinburgh", + "University of Exeter", + "University of Glasgow", + "Imperial College London", + "King's College London", + "University of Leeds", + "University of Liverpool", + "London School of Economics", + "London School of Economics and Political Science", + "LSE", + "University of Manchester", + "Newcastle University", + "University of Nottingham", + "University of Oxford", + "Queen Mary University of London", + "Queen's University Belfast", + "University of Sheffield", + "University of Southampton", + "University College London", + "UCL", + "University of Warwick", + "University of York", + + // Other Major Universities + "Aston University", + "University of Bath", + "Birkbeck, University of London", + "Bournemouth University", + "University of Bradford", + "University of Brighton", + "Brunel University London", + "University of Buckingham", + "Canterbury Christ Church University", + "City, University of London", + "Coventry University", + "Cranfield University", + "De Montfort University", + "University of Derby", + "University of Dundee", + "University of East Anglia", + "UEA", + "University of East London", + "Edge Hill University", + "University of Essex", + "Falmouth University", + "University of Greenwich", + "Heriot-Watt University", + "University of Hertfordshire", + "University of Huddersfield", + "University of Hull", + "Keele University", + "University of Kent", + "Kingston University", + "Lancaster University", + "University of Leicester", + "University of Lincoln", + "Liverpool John Moores University", + "Liverpool Hope University", + "University of London", + "London Metropolitan University", + "London South Bank University", + "Loughborough University", + "Manchester Metropolitan University", + "Middlesex University", + "Northumbria University", + "Norwich University of the Arts", + "Nottingham Trent University", + "Open University", + "The Open University", + "Oxford Brookes University", + "University of Plymouth", + "University of Portsmouth", + "Queen Margaret University", + "University of Reading", + "Robert Gordon University", + "Roehampton University", + "Royal Holloway, University of London", + "Royal Holloway", + "University of Salford", + "SOAS University of London", + "SOAS", + "Sheffield Hallam University", + "University of South Wales", + "University of St Andrews", + "St Andrews", + "Staffordshire University", + "University of Stirling", + "University of Strathclyde", + "University of Sunderland", + "University of Surrey", + "University of Sussex", + "Swansea University", + "Teesside University", + "Ulster University", + "University of the West of England", + "UWE Bristol", + "University of the West of Scotland", + "University of Westminster", + "University of Winchester", + "University of Wolverhampton", + "University of Worcester", + "Wrexham University", + "York St John University", + + // Scottish Universities + "University of Aberdeen", + "Abertay University", + "Edinburgh Napier University", + "Glasgow Caledonian University", + "University of the Highlands and Islands", + + // Welsh Universities + "Aberystwyth University", + "Bangor University", + "University of South Wales", + "Wrexham Glyndwr University", + + // Northern Ireland + "Ulster University", + "Queen's University Belfast", + + // Specialist Institutions + "Royal Academy of Music", + "Royal College of Art", + "Royal College of Music", + "Royal Northern College of Music", + "Royal Veterinary College", + "Goldsmiths, University of London", + "Goldsmiths", + "Courtauld Institute of Art", + "London Business School", + "LBS", + "Guildhall School of Music and Drama", + "Trinity Laban Conservatoire of Music and Dance", + "Arts University Bournemouth", + "University for the Creative Arts", + "Ravensbourne University London", + + // Business Schools (accredited) + "Henley Business School", + "Warwick Business School", + "Manchester Business School", + "Said Business School", + "Judge Business School", + "Cass Business School", + "Bayes Business School", + "Imperial College Business School", + "Cranfield School of Management", + "Ashridge Business School", + "Alliance Manchester Business School", + }; + + /// + /// Common name variations and abbreviations mapped to official names. + /// + public static readonly Dictionary NameVariations = new(StringComparer.OrdinalIgnoreCase) + { + ["Cambridge"] = "University of Cambridge", + ["Oxford"] = "University of Oxford", + ["Cambridge University"] = "University of Cambridge", + ["Oxford University"] = "University of Oxford", + ["Imperial"] = "Imperial College London", + ["Imperial College"] = "Imperial College London", + ["Kings College London"] = "King's College London", + ["Kings London"] = "King's College London", + ["KCL"] = "King's College London", + ["Edinburgh"] = "University of Edinburgh", + ["Manchester"] = "University of Manchester", + ["Bristol"] = "University of Bristol", + ["Warwick"] = "University of Warwick", + ["Durham"] = "Durham University", + ["Bath"] = "University of Bath", + ["Exeter"] = "University of Exeter", + ["York"] = "University of York", + ["Leeds"] = "University of Leeds", + ["Sheffield"] = "University of Sheffield", + ["Birmingham"] = "University of Birmingham", + ["Nottingham"] = "University of Nottingham", + ["Southampton"] = "University of Southampton", + ["Glasgow"] = "University of Glasgow", + ["Liverpool"] = "University of Liverpool", + ["Lancaster"] = "Lancaster University", + ["Leicester"] = "University of Leicester", + ["Surrey"] = "University of Surrey", + ["Sussex"] = "University of Sussex", + ["Reading"] = "University of Reading", + ["Loughborough"] = "Loughborough University", + ["Brunel"] = "Brunel University London", + ["Kent"] = "University of Kent", + ["Essex"] = "University of Essex", + ["Strathclyde"] = "University of Strathclyde", + ["Heriot Watt"] = "Heriot-Watt University", + ["Heriot-Watt"] = "Heriot-Watt University", + ["St Andrews University"] = "University of St Andrews", + ["Saint Andrews"] = "University of St Andrews", + ["Birkbeck"] = "Birkbeck, University of London", + ["QMUL"] = "Queen Mary University of London", + ["Queen Mary"] = "Queen Mary University of London", + ["Royal Holloway University"] = "Royal Holloway, University of London", + ["RHUL"] = "Royal Holloway, University of London", + }; + + /// + /// Check if an institution is recognised. Handles common variations. + /// + public static bool IsRecognised(string institutionName) + { + if (string.IsNullOrWhiteSpace(institutionName)) + return false; + + var normalised = institutionName.Trim(); + + // Direct match + if (RecognisedInstitutions.Contains(normalised)) + return true; + + // Check variations + if (NameVariations.TryGetValue(normalised, out var officialName)) + return RecognisedInstitutions.Contains(officialName); + + // Fuzzy match - check if any recognised institution contains the search term + // or if the search term contains a recognised institution + foreach (var institution in RecognisedInstitutions) + { + if (institution.Contains(normalised, StringComparison.OrdinalIgnoreCase) || + normalised.Contains(institution, StringComparison.OrdinalIgnoreCase)) + { + return true; + } + } + + return false; + } + + /// + /// Get the official name of an institution if found. + /// + public static string? GetOfficialName(string institutionName) + { + if (string.IsNullOrWhiteSpace(institutionName)) + return null; + + var normalised = institutionName.Trim(); + + // Direct match + if (RecognisedInstitutions.Contains(normalised)) + return normalised; + + // Check variations + if (NameVariations.TryGetValue(normalised, out var officialName)) + return officialName; + + // Fuzzy match + foreach (var institution in RecognisedInstitutions) + { + if (institution.Contains(normalised, StringComparison.OrdinalIgnoreCase) || + normalised.Contains(institution, StringComparison.OrdinalIgnoreCase)) + { + return institution; + } + } + + return null; + } +} diff --git a/src/TrueCV.Application/Helpers/DateHelpers.cs b/src/TrueCV.Application/Helpers/DateHelpers.cs new file mode 100644 index 0000000..78bb122 --- /dev/null +++ b/src/TrueCV.Application/Helpers/DateHelpers.cs @@ -0,0 +1,19 @@ +namespace TrueCV.Application.Helpers; + +public static class DateHelpers +{ + public static DateOnly? ParseDate(string? dateString) + { + if (string.IsNullOrWhiteSpace(dateString)) + { + return null; + } + + if (DateOnly.TryParse(dateString, out var date)) + { + return date; + } + + return null; + } +} diff --git a/src/TrueCV.Application/Helpers/JsonDefaults.cs b/src/TrueCV.Application/Helpers/JsonDefaults.cs new file mode 100644 index 0000000..f6c7a93 --- /dev/null +++ b/src/TrueCV.Application/Helpers/JsonDefaults.cs @@ -0,0 +1,19 @@ +using System.Text.Json; + +namespace TrueCV.Application.Helpers; + +public static class JsonDefaults +{ + public static readonly JsonSerializerOptions CamelCase = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true + }; + + public static readonly JsonSerializerOptions CamelCaseIndented = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + WriteIndented = true + }; +} diff --git a/src/TrueCV.Application/Helpers/ScoreThresholds.cs b/src/TrueCV.Application/Helpers/ScoreThresholds.cs new file mode 100644 index 0000000..439e3c4 --- /dev/null +++ b/src/TrueCV.Application/Helpers/ScoreThresholds.cs @@ -0,0 +1,21 @@ +namespace TrueCV.Application.Helpers; + +public static class ScoreThresholds +{ + public const int High = 70; + public const int Medium = 50; + + public static string GetScoreClass(int score) => score switch + { + > High => "score-high", + >= Medium => "score-medium", + _ => "score-low" + }; + + public static string GetBadgeClass(int score) => score switch + { + > High => "bg-success", + >= Medium => "bg-warning text-dark", + _ => "bg-danger" + }; +} diff --git a/src/TrueCV.Application/Interfaces/ICVParserService.cs b/src/TrueCV.Application/Interfaces/ICVParserService.cs index da8244c..bc5637a 100644 --- a/src/TrueCV.Application/Interfaces/ICVParserService.cs +++ b/src/TrueCV.Application/Interfaces/ICVParserService.cs @@ -4,5 +4,5 @@ namespace TrueCV.Application.Interfaces; public interface ICVParserService { - Task ParseAsync(Stream fileStream, string fileName); + Task ParseAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default); } diff --git a/src/TrueCV.Application/Interfaces/IEducationVerifierService.cs b/src/TrueCV.Application/Interfaces/IEducationVerifierService.cs new file mode 100644 index 0000000..0620cc9 --- /dev/null +++ b/src/TrueCV.Application/Interfaces/IEducationVerifierService.cs @@ -0,0 +1,16 @@ +using TrueCV.Application.Models; + +namespace TrueCV.Application.Interfaces; + +public interface IEducationVerifierService +{ + /// + /// Verify a single education entry. + /// + EducationVerificationResult Verify(EducationEntry education); + + /// + /// Verify all education entries and check for timeline issues. + /// + List VerifyAll(List education, List? employment = null); +} diff --git a/src/TrueCV.Application/Interfaces/IUserContextService.cs b/src/TrueCV.Application/Interfaces/IUserContextService.cs new file mode 100644 index 0000000..863cdd8 --- /dev/null +++ b/src/TrueCV.Application/Interfaces/IUserContextService.cs @@ -0,0 +1,6 @@ +namespace TrueCV.Application.Interfaces; + +public interface IUserContextService +{ + Task GetCurrentUserIdAsync(); +} diff --git a/src/TrueCV.Application/Models/EducationVerificationResult.cs b/src/TrueCV.Application/Models/EducationVerificationResult.cs new file mode 100644 index 0000000..a4f8e3c --- /dev/null +++ b/src/TrueCV.Application/Models/EducationVerificationResult.cs @@ -0,0 +1,22 @@ +namespace TrueCV.Application.Models; + +public sealed record EducationVerificationResult +{ + public required string ClaimedInstitution { get; init; } + public string? MatchedInstitution { get; init; } + public required string Status { get; init; } // Recognised, NotRecognised, DiplomaMill, Suspicious, Unknown + public bool IsVerified { get; init; } + public bool IsDiplomaMill { get; init; } + public bool IsSuspicious { get; init; } + public string? VerificationNotes { get; init; } + + // Date plausibility + public DateOnly? ClaimedStartDate { get; init; } + public DateOnly? ClaimedEndDate { get; init; } + public bool DatesArePlausible { get; init; } = true; + public string? DatePlausibilityNotes { get; init; } + + // Qualification info + public string? ClaimedQualification { get; init; } + public string? ClaimedSubject { get; init; } +} diff --git a/src/TrueCV.Application/Models/VeracityReport.cs b/src/TrueCV.Application/Models/VeracityReport.cs index cce7985..7190660 100644 --- a/src/TrueCV.Application/Models/VeracityReport.cs +++ b/src/TrueCV.Application/Models/VeracityReport.cs @@ -5,6 +5,7 @@ public sealed record VeracityReport public required int OverallScore { get; init; } public required string ScoreLabel { get; init; } public List EmploymentVerifications { get; init; } = []; + public List EducationVerifications { get; init; } = []; public required TimelineAnalysisResult TimelineAnalysis { get; init; } public List Flags { get; init; } = []; public required DateTime GeneratedAt { get; init; } diff --git a/src/TrueCV.Domain/Entities/CVCheck.cs b/src/TrueCV.Domain/Entities/CVCheck.cs index 24416ce..79bdd5a 100644 --- a/src/TrueCV.Domain/Entities/CVCheck.cs +++ b/src/TrueCV.Domain/Entities/CVCheck.cs @@ -1,5 +1,4 @@ using System.ComponentModel.DataAnnotations; -using System.ComponentModel.DataAnnotations.Schema; using TrueCV.Domain.Enums; namespace TrueCV.Domain.Entities; @@ -31,9 +30,5 @@ public class CVCheck public DateTime? CompletedAt { get; set; } - // Navigation properties - [ForeignKey(nameof(UserId))] - public User User { get; set; } = null!; - public ICollection Flags { get; set; } = new List(); } diff --git a/src/TrueCV.Domain/Entities/User.cs b/src/TrueCV.Domain/Entities/User.cs deleted file mode 100644 index 4638cd4..0000000 --- a/src/TrueCV.Domain/Entities/User.cs +++ /dev/null @@ -1,24 +0,0 @@ -using System.ComponentModel.DataAnnotations; -using TrueCV.Domain.Enums; - -namespace TrueCV.Domain.Entities; - -public class User -{ - [Key] - public Guid Id { get; set; } - - [Required] - [MaxLength(256)] - public string Email { get; set; } = string.Empty; - - public UserPlan Plan { get; set; } - - [MaxLength(256)] - public string? StripeCustomerId { get; set; } - - public int ChecksUsedThisMonth { get; set; } - - // Navigation property - public ICollection CVChecks { get; set; } = new List(); -} diff --git a/src/TrueCV.Infrastructure/Configuration/LocalStorageSettings.cs b/src/TrueCV.Infrastructure/Configuration/LocalStorageSettings.cs new file mode 100644 index 0000000..7ab9685 --- /dev/null +++ b/src/TrueCV.Infrastructure/Configuration/LocalStorageSettings.cs @@ -0,0 +1,8 @@ +namespace TrueCV.Infrastructure.Configuration; + +public sealed class LocalStorageSettings +{ + public const string SectionName = "LocalStorage"; + + public string StoragePath { get; set; } = "./uploads"; +} diff --git a/src/TrueCV.Infrastructure/Data/ApplicationDbContext.cs b/src/TrueCV.Infrastructure/Data/ApplicationDbContext.cs index 967e292..d7477c4 100644 --- a/src/TrueCV.Infrastructure/Data/ApplicationDbContext.cs +++ b/src/TrueCV.Infrastructure/Data/ApplicationDbContext.cs @@ -2,7 +2,6 @@ using Microsoft.AspNetCore.Identity; using Microsoft.AspNetCore.Identity.EntityFrameworkCore; using Microsoft.EntityFrameworkCore; using TrueCV.Domain.Entities; -using TrueCV.Domain.Enums; using TrueCV.Infrastructure.Identity; namespace TrueCV.Infrastructure.Data; @@ -64,9 +63,6 @@ public class ApplicationDbContext : IdentityDbContext f.CVCheck) .HasForeignKey(f => f.CVCheckId) .OnDelete(DeleteBehavior.Cascade); - - // Ignore the User navigation property since we're using ApplicationUser - entity.Ignore(c => c.User); }); } diff --git a/src/TrueCV.Infrastructure/DependencyInjection.cs b/src/TrueCV.Infrastructure/DependencyInjection.cs index 5e74b35..eaa3943 100644 --- a/src/TrueCV.Infrastructure/DependencyInjection.cs +++ b/src/TrueCV.Infrastructure/DependencyInjection.cs @@ -1,4 +1,3 @@ -using Azure.Storage.Blobs; using Hangfire; using Hangfire.SqlServer; using Microsoft.EntityFrameworkCore; @@ -59,6 +58,9 @@ public static class DependencyInjection services.Configure( configuration.GetSection(AzureBlobSettings.SectionName)); + services.Configure( + configuration.GetSection(LocalStorageSettings.SectionName)); + // Configure HttpClient for CompaniesHouseClient with retry policy services.AddHttpClient((serviceProvider, client) => { @@ -73,22 +75,24 @@ public static class DependencyInjection }) .AddPolicyHandler(GetRetryPolicy()); - // Configure BlobServiceClient - var azureBlobConnectionString = configuration - .GetSection(AzureBlobSettings.SectionName) - .GetValue("ConnectionString"); - - if (!string.IsNullOrWhiteSpace(azureBlobConnectionString)) - { - services.AddSingleton(_ => new BlobServiceClient(azureBlobConnectionString)); - } - // Register services services.AddScoped(); services.AddScoped(); + services.AddScoped(); services.AddScoped(); - services.AddScoped(); services.AddScoped(); + services.AddScoped(); + + // Register file storage - use local storage if configured, otherwise Azure + var useLocalStorage = configuration.GetValue("UseLocalStorage"); + if (useLocalStorage) + { + services.AddScoped(); + } + else + { + services.AddScoped(); + } // Register Hangfire jobs services.AddTransient(); diff --git a/src/TrueCV.Infrastructure/Jobs/ProcessCVCheckJob.cs b/src/TrueCV.Infrastructure/Jobs/ProcessCVCheckJob.cs index 73cafa6..7b208c1 100644 --- a/src/TrueCV.Infrastructure/Jobs/ProcessCVCheckJob.cs +++ b/src/TrueCV.Infrastructure/Jobs/ProcessCVCheckJob.cs @@ -1,6 +1,7 @@ using System.Text.Json; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; +using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; @@ -15,26 +16,26 @@ public sealed class ProcessCVCheckJob private readonly IFileStorageService _fileStorageService; private readonly ICVParserService _cvParserService; private readonly ICompanyVerifierService _companyVerifierService; + private readonly IEducationVerifierService _educationVerifierService; private readonly ITimelineAnalyserService _timelineAnalyserService; private readonly ILogger _logger; - private static readonly JsonSerializerOptions JsonOptions = new() - { - PropertyNamingPolicy = JsonNamingPolicy.CamelCase, - WriteIndented = true - }; - private const int BaseScore = 100; private const int UnverifiedCompanyPenalty = 10; private const int GapMonthPenalty = 1; private const int MaxGapPenalty = 10; private const int OverlapMonthPenalty = 2; + private const int DiplomaMillPenalty = 25; + private const int SuspiciousInstitutionPenalty = 15; + private const int UnverifiedEducationPenalty = 5; + private const int EducationDatePenalty = 10; public ProcessCVCheckJob( ApplicationDbContext dbContext, IFileStorageService fileStorageService, ICVParserService cvParserService, ICompanyVerifierService companyVerifierService, + IEducationVerifierService educationVerifierService, ITimelineAnalyserService timelineAnalyserService, ILogger logger) { @@ -42,6 +43,7 @@ public sealed class ProcessCVCheckJob _fileStorageService = fileStorageService; _cvParserService = cvParserService; _companyVerifierService = companyVerifierService; + _educationVerifierService = educationVerifierService; _timelineAnalyserService = timelineAnalyserService; _logger = logger; } @@ -73,53 +75,78 @@ public sealed class ProcessCVCheckJob _logger.LogDebug("Downloaded CV file for check {CheckId}", cvCheckId); // Step 3: Parse CV - var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName); + var cvData = await _cvParserService.ParseAsync(fileStream, cvCheck.OriginalFileName, cancellationToken); _logger.LogDebug( "Parsed CV for check {CheckId}: {EmploymentCount} employment entries", cvCheckId, cvData.Employment.Count); // Step 4: Save extracted data - cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonOptions); + cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented); await _dbContext.SaveChangesAsync(cancellationToken); - // Step 5: Verify each employment entry - var verificationResults = new List(); - foreach (var employment in cvData.Employment) + // Step 5: Verify each employment entry (parallelized with rate limiting) + var verificationTasks = cvData.Employment.Select(async employment => { var result = await _companyVerifierService.VerifyCompanyAsync( employment.CompanyName, employment.StartDate, employment.EndDate); - verificationResults.Add(result); - _logger.LogDebug( "Verified {Company}: {IsVerified} (Score: {Score}%)", employment.CompanyName, result.IsVerified, result.MatchScore); - } - // Step 6: Analyse timeline + return result; + }); + + var verificationResults = (await Task.WhenAll(verificationTasks)).ToList(); + + // Step 6: Verify education entries + var educationResults = _educationVerifierService.VerifyAll( + cvData.Education, + cvData.Employment); + + _logger.LogDebug( + "Education verification for check {CheckId}: {Count} entries verified ({Recognised} recognised, {DiplomaMill} diploma mills)", + cvCheckId, + educationResults.Count, + educationResults.Count(e => e.IsVerified), + educationResults.Count(e => e.IsDiplomaMill)); + + // Step 7: Analyse timeline var timelineAnalysis = _timelineAnalyserService.Analyse(cvData.Employment); _logger.LogDebug( "Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps", cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count); - // Step 7: Calculate veracity score - var (score, flags) = CalculateVeracityScore(verificationResults, timelineAnalysis); + // Step 8: Calculate veracity score + var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis); _logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score); - // Step 8: Create CVFlag records + // Step 9: Create CVFlag records foreach (var flag in flags) { + if (!Enum.TryParse(flag.Category, out var category)) + { + _logger.LogWarning("Unknown flag category: {Category}, defaulting to Timeline", flag.Category); + category = FlagCategory.Timeline; + } + + if (!Enum.TryParse(flag.Severity, out var severity)) + { + _logger.LogWarning("Unknown flag severity: {Severity}, defaulting to Info", flag.Severity); + severity = FlagSeverity.Info; + } + var cvFlag = new CVFlag { Id = Guid.NewGuid(), CVCheckId = cvCheckId, - Category = Enum.Parse(flag.Category), - Severity = Enum.Parse(flag.Severity), + Category = category, + Severity = severity, Title = flag.Title, Description = flag.Description, ScoreImpact = flag.ScoreImpact @@ -128,21 +155,22 @@ public sealed class ProcessCVCheckJob _dbContext.CVFlags.Add(cvFlag); } - // Step 9: Generate veracity report + // Step 10: Generate veracity report var report = new VeracityReport { OverallScore = score, ScoreLabel = GetScoreLabel(score), EmploymentVerifications = verificationResults, + EducationVerifications = educationResults, TimelineAnalysis = timelineAnalysis, Flags = flags, GeneratedAt = DateTime.UtcNow }; - cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonOptions); + cvCheck.ReportJson = JsonSerializer.Serialize(report, JsonDefaults.CamelCaseIndented); cvCheck.VeracityScore = score; - // Step 10: Update status to Completed + // Step 11: Update status to Completed cvCheck.Status = CheckStatus.Completed; cvCheck.CompletedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(cancellationToken); @@ -156,7 +184,8 @@ public sealed class ProcessCVCheckJob _logger.LogError(ex, "Error processing CV check {CheckId}", cvCheckId); cvCheck.Status = CheckStatus.Failed; - await _dbContext.SaveChangesAsync(cancellationToken); + // Use CancellationToken.None to ensure failure status is saved even if original token is cancelled + await _dbContext.SaveChangesAsync(CancellationToken.None); throw; } @@ -164,6 +193,7 @@ public sealed class ProcessCVCheckJob private static (int Score, List Flags) CalculateVeracityScore( List verifications, + List educationResults, TimelineAnalysisResult timeline) { var score = BaseScore; @@ -184,6 +214,66 @@ public sealed class ProcessCVCheckJob }); } + // Penalty for diploma mills (critical) + foreach (var edu in educationResults.Where(e => e.IsDiplomaMill)) + { + score -= DiplomaMillPenalty; + + flags.Add(new FlagResult + { + Category = FlagCategory.Education.ToString(), + Severity = FlagSeverity.Critical.ToString(), + Title = "Diploma Mill Detected", + Description = $"'{edu.ClaimedInstitution}' is a known diploma mill. {edu.VerificationNotes}", + ScoreImpact = -DiplomaMillPenalty + }); + } + + // Penalty for suspicious institutions + foreach (var edu in educationResults.Where(e => e.IsSuspicious && !e.IsDiplomaMill)) + { + score -= SuspiciousInstitutionPenalty; + + flags.Add(new FlagResult + { + Category = FlagCategory.Education.ToString(), + Severity = FlagSeverity.Warning.ToString(), + Title = "Suspicious Institution", + Description = $"'{edu.ClaimedInstitution}' has suspicious characteristics. {edu.VerificationNotes}", + ScoreImpact = -SuspiciousInstitutionPenalty + }); + } + + // Penalty for unverified education (not recognised, but not flagged as fake) + foreach (var edu in educationResults.Where(e => !e.IsVerified && !e.IsDiplomaMill && !e.IsSuspicious && e.Status == "Unknown")) + { + score -= UnverifiedEducationPenalty; + + flags.Add(new FlagResult + { + Category = FlagCategory.Education.ToString(), + Severity = FlagSeverity.Info.ToString(), + Title = "Unverified Institution", + Description = $"Could not verify '{edu.ClaimedInstitution}'. {edu.VerificationNotes}", + ScoreImpact = -UnverifiedEducationPenalty + }); + } + + // Penalty for implausible education dates + foreach (var edu in educationResults.Where(e => !e.DatesArePlausible)) + { + score -= EducationDatePenalty; + + flags.Add(new FlagResult + { + Category = FlagCategory.Education.ToString(), + Severity = FlagSeverity.Warning.ToString(), + Title = "Education Date Issues", + Description = $"Date issues for '{edu.ClaimedInstitution}': {edu.DatePlausibilityNotes}", + ScoreImpact = -EducationDatePenalty + }); + } + // Penalty for gaps (max -10 per gap) foreach (var gap in timeline.Gaps) { @@ -205,7 +295,7 @@ public sealed class ProcessCVCheckJob // Penalty for overlaps (only if > 2 months) foreach (var overlap in timeline.Overlaps) { - var excessMonths = overlap.Months - 2; // Allow 2 month transition + var excessMonths = Math.Max(0, overlap.Months - 2); // Allow 2 month transition, prevent negative var overlapPenalty = excessMonths * OverlapMonthPenalty; score -= overlapPenalty; diff --git a/src/TrueCV.Infrastructure/Services/CVCheckService.cs b/src/TrueCV.Infrastructure/Services/CVCheckService.cs index 7e349fe..7f9b4fc 100644 --- a/src/TrueCV.Infrastructure/Services/CVCheckService.cs +++ b/src/TrueCV.Infrastructure/Services/CVCheckService.cs @@ -3,6 +3,7 @@ using Hangfire; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.DTOs; +using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; @@ -139,7 +140,7 @@ public sealed class CVCheckService : ICVCheckService try { - var report = JsonSerializer.Deserialize(cvCheck.ReportJson); + var report = JsonSerializer.Deserialize(cvCheck.ReportJson, JsonDefaults.CamelCase); return report; } catch (JsonException ex) diff --git a/src/TrueCV.Infrastructure/Services/CVParserService.cs b/src/TrueCV.Infrastructure/Services/CVParserService.cs index 88011fb..0f37427 100644 --- a/src/TrueCV.Infrastructure/Services/CVParserService.cs +++ b/src/TrueCV.Infrastructure/Services/CVParserService.cs @@ -6,6 +6,7 @@ using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Infrastructure.Configuration; @@ -18,12 +19,6 @@ public sealed class CVParserService : ICVParserService private readonly AnthropicClient _anthropicClient; private readonly ILogger _logger; - private static readonly JsonSerializerOptions JsonOptions = new() - { - PropertyNamingPolicy = JsonNamingPolicy.CamelCase, - PropertyNameCaseInsensitive = true - }; - private const string SystemPrompt = """ You are a CV/Resume parser. Your task is to extract structured information from CV text. You must respond ONLY with valid JSON, no other text or markdown. @@ -80,14 +75,14 @@ public sealed class CVParserService : ICVParserService _anthropicClient = new AnthropicClient(settings.Value.ApiKey); } - public async Task ParseAsync(Stream fileStream, string fileName) + public async Task ParseAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(fileStream); ArgumentException.ThrowIfNullOrWhiteSpace(fileName); _logger.LogDebug("Parsing CV file: {FileName}", fileName); - var text = await ExtractTextAsync(fileStream, fileName); + var text = await ExtractTextAsync(fileStream, fileName, cancellationToken); if (string.IsNullOrWhiteSpace(text)) { @@ -97,7 +92,7 @@ public sealed class CVParserService : ICVParserService _logger.LogDebug("Extracted {CharCount} characters from {FileName}", text.Length, fileName); - var cvData = await ParseWithClaudeAsync(text); + var cvData = await ParseWithClaudeAsync(text, cancellationToken); _logger.LogInformation( "Successfully parsed CV for {FullName} with {EmploymentCount} employment entries and {EducationCount} education entries", @@ -108,23 +103,23 @@ public sealed class CVParserService : ICVParserService return cvData; } - private async Task ExtractTextAsync(Stream fileStream, string fileName) + private async Task ExtractTextAsync(Stream fileStream, string fileName, CancellationToken cancellationToken) { var extension = Path.GetExtension(fileName).ToLowerInvariant(); return extension switch { - ".pdf" => await ExtractTextFromPdfAsync(fileStream), + ".pdf" => await ExtractTextFromPdfAsync(fileStream, cancellationToken), ".docx" => ExtractTextFromDocx(fileStream), _ => throw new NotSupportedException($"File type '{extension}' is not supported. Only PDF and DOCX files are accepted.") }; } - private async Task ExtractTextFromPdfAsync(Stream fileStream) + private async Task ExtractTextFromPdfAsync(Stream fileStream, CancellationToken cancellationToken) { // Copy stream to memory for PdfPig (requires seekable stream) using var memoryStream = new MemoryStream(); - await fileStream.CopyToAsync(memoryStream); + await fileStream.CopyToAsync(memoryStream, cancellationToken); memoryStream.Position = 0; using var document = PdfDocument.Open(memoryStream); @@ -132,6 +127,7 @@ public sealed class CVParserService : ICVParserService foreach (var page in document.GetPages()) { + cancellationToken.ThrowIfCancellationRequested(); var pageText = page.Text; textBuilder.AppendLine(pageText); } @@ -163,7 +159,7 @@ public sealed class CVParserService : ICVParserService return textBuilder.ToString(); } - private async Task ParseWithClaudeAsync(string cvText) + private async Task ParseWithClaudeAsync(string cvText, CancellationToken cancellationToken) { var prompt = ExtractionPrompt.Replace("{CV_TEXT}", cvText); @@ -182,7 +178,7 @@ public sealed class CVParserService : ICVParserService _logger.LogDebug("Sending CV text to Claude API for parsing"); - var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters); + var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken); var responseText = response.Content .OfType() @@ -201,7 +197,7 @@ public sealed class CVParserService : ICVParserService try { - var parsedResponse = JsonSerializer.Deserialize(responseText, JsonOptions); + var parsedResponse = JsonSerializer.Deserialize(responseText, JsonDefaults.CamelCase); if (parsedResponse is null) { @@ -251,8 +247,8 @@ public sealed class CVParserService : ICVParserService CompanyName = e.CompanyName ?? "Unknown Company", JobTitle = e.JobTitle ?? "Unknown Position", Location = e.Location, - StartDate = ParseDate(e.StartDate), - EndDate = ParseDate(e.EndDate), + StartDate = DateHelpers.ParseDate(e.StartDate), + EndDate = DateHelpers.ParseDate(e.EndDate), IsCurrent = e.IsCurrent ?? false, Description = e.Description }).ToList() ?? [], @@ -262,28 +258,13 @@ public sealed class CVParserService : ICVParserService Qualification = e.Qualification, Subject = e.Subject, Grade = e.Grade, - StartDate = ParseDate(e.StartDate), - EndDate = ParseDate(e.EndDate) + StartDate = DateHelpers.ParseDate(e.StartDate), + EndDate = DateHelpers.ParseDate(e.EndDate) }).ToList() ?? [], Skills = response.Skills ?? [] }; } - private static DateOnly? ParseDate(string? dateString) - { - if (string.IsNullOrWhiteSpace(dateString)) - { - return null; - } - - if (DateOnly.TryParse(dateString, out var date)) - { - return date; - } - - return null; - } - // Internal DTOs for Claude response parsing private sealed record ClaudeCVResponse { diff --git a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs index a398a0f..59017cc 100644 --- a/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/TrueCV.Infrastructure/Services/CompanyVerifierService.cs @@ -2,6 +2,7 @@ using FuzzySharp; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using TrueCV.Application.DTOs; +using TrueCV.Application.Helpers; using TrueCV.Application.Interfaces; using TrueCV.Application.Models; using TrueCV.Domain.Entities; @@ -113,7 +114,7 @@ public sealed class CompanyVerifierService : ICompanyVerifierService CompanyNumber = item.CompanyNumber, CompanyName = item.Title, CompanyStatus = item.CompanyStatus ?? "Unknown", - IncorporationDate = ParseDate(item.DateOfCreation), + IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation), AddressSnippet = item.AddressSnippet }).ToList(); } @@ -166,8 +167,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService { existingCache.CompanyName = item.Title; existingCache.Status = item.CompanyStatus ?? "Unknown"; - existingCache.IncorporationDate = ParseDate(item.DateOfCreation); - existingCache.DissolutionDate = ParseDate(item.DateOfCessation); + existingCache.IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation); + existingCache.DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation); existingCache.CachedAt = DateTime.UtcNow; } else @@ -177,8 +178,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService CompanyNumber = item.CompanyNumber, CompanyName = item.Title, Status = item.CompanyStatus ?? "Unknown", - IncorporationDate = ParseDate(item.DateOfCreation), - DissolutionDate = ParseDate(item.DateOfCessation), + IncorporationDate = DateHelpers.ParseDate(item.DateOfCreation), + DissolutionDate = DateHelpers.ParseDate(item.DateOfCessation), CachedAt = DateTime.UtcNow }; @@ -230,18 +231,4 @@ public sealed class CompanyVerifierService : ICompanyVerifierService }; } - private static DateOnly? ParseDate(string? dateString) - { - if (string.IsNullOrWhiteSpace(dateString)) - { - return null; - } - - if (DateOnly.TryParse(dateString, out var date)) - { - return date; - } - - return null; - } } diff --git a/src/TrueCV.Infrastructure/Services/EducationVerifierService.cs b/src/TrueCV.Infrastructure/Services/EducationVerifierService.cs new file mode 100644 index 0000000..9ff0505 --- /dev/null +++ b/src/TrueCV.Infrastructure/Services/EducationVerifierService.cs @@ -0,0 +1,267 @@ +using TrueCV.Application.Data; +using TrueCV.Application.Interfaces; +using TrueCV.Application.Models; + +namespace TrueCV.Infrastructure.Services; + +public class EducationVerifierService : IEducationVerifierService +{ + private const int MinimumDegreeYears = 1; + private const int MaximumDegreeYears = 8; + private const int MinimumGraduationAge = 18; + + public EducationVerificationResult Verify(EducationEntry education) + { + var institution = education.Institution; + + // Check for diploma mill first (highest priority flag) + if (DiplomaMills.IsDiplomaMill(institution)) + { + return new EducationVerificationResult + { + ClaimedInstitution = institution, + Status = "DiplomaMill", + IsVerified = false, + IsDiplomaMill = true, + IsSuspicious = true, + VerificationNotes = "Institution is on the diploma mill blacklist", + ClaimedStartDate = education.StartDate, + ClaimedEndDate = education.EndDate, + DatesArePlausible = true, + ClaimedQualification = education.Qualification, + ClaimedSubject = education.Subject + }; + } + + // Check for suspicious patterns + if (DiplomaMills.HasSuspiciousPattern(institution)) + { + return new EducationVerificationResult + { + ClaimedInstitution = institution, + Status = "Suspicious", + IsVerified = false, + IsDiplomaMill = false, + IsSuspicious = true, + VerificationNotes = "Institution name contains suspicious patterns common in diploma mills", + ClaimedStartDate = education.StartDate, + ClaimedEndDate = education.EndDate, + DatesArePlausible = true, + ClaimedQualification = education.Qualification, + ClaimedSubject = education.Subject + }; + } + + // Check if it's a recognised UK institution + var officialName = UKInstitutions.GetOfficialName(institution); + if (officialName != null) + { + var (datesPlausible, dateNotes) = CheckDatePlausibility(education.StartDate, education.EndDate); + + return new EducationVerificationResult + { + ClaimedInstitution = institution, + MatchedInstitution = officialName, + Status = "Recognised", + IsVerified = true, + IsDiplomaMill = false, + IsSuspicious = false, + VerificationNotes = institution.Equals(officialName, StringComparison.OrdinalIgnoreCase) + ? "Verified UK higher education institution" + : $"Matched to official name: {officialName}", + ClaimedStartDate = education.StartDate, + ClaimedEndDate = education.EndDate, + DatesArePlausible = datesPlausible, + DatePlausibilityNotes = dateNotes, + ClaimedQualification = education.Qualification, + ClaimedSubject = education.Subject + }; + } + + // Not in our database - could be international or unrecognised + return new EducationVerificationResult + { + ClaimedInstitution = institution, + Status = "Unknown", + IsVerified = false, + IsDiplomaMill = false, + IsSuspicious = false, + VerificationNotes = "Institution not found in UK recognised institutions database. May be an international institution.", + ClaimedStartDate = education.StartDate, + ClaimedEndDate = education.EndDate, + DatesArePlausible = true, + ClaimedQualification = education.Qualification, + ClaimedSubject = education.Subject + }; + } + + public List VerifyAll( + List education, + List? employment = null) + { + var results = new List(); + + foreach (var edu in education) + { + var result = Verify(edu); + + // If we have employment data, check for timeline issues + if (employment?.Count > 0 && result.ClaimedEndDate.HasValue) + { + var (timelinePlausible, timelineNotes) = CheckEducationEmploymentTimeline( + result.ClaimedEndDate.Value, + employment); + + if (!timelinePlausible) + { + result = result with + { + DatesArePlausible = false, + DatePlausibilityNotes = CombineNotes(result.DatePlausibilityNotes, timelineNotes) + }; + } + } + + results.Add(result); + } + + // Check for overlapping education periods + CheckOverlappingEducation(results); + + return results; + } + + private static (bool isPlausible, string? notes) CheckDatePlausibility(DateOnly? startDate, DateOnly? endDate) + { + if (!startDate.HasValue || !endDate.HasValue) + { + return (true, null); + } + + var start = startDate.Value; + var end = endDate.Value; + + // End date should be after start date + if (end <= start) + { + return (false, "End date is before or equal to start date"); + } + + // Check course duration is reasonable + var years = (end.ToDateTime(TimeOnly.MinValue) - start.ToDateTime(TimeOnly.MinValue)).TotalDays / 365.25; + + if (years < MinimumDegreeYears) + { + return (false, $"Course duration ({years:F1} years) is unusually short for a degree"); + } + + if (years > MaximumDegreeYears) + { + return (false, $"Course duration ({years:F1} years) is unusually long"); + } + + // Check if graduation date is in the future + if (end > DateOnly.FromDateTime(DateTime.UtcNow)) + { + return (true, "Graduation date is in the future - possibly currently studying"); + } + + return (true, null); + } + + private static (bool isPlausible, string? notes) CheckEducationEmploymentTimeline( + DateOnly graduationDate, + List employment) + { + // Find the earliest employment start date + var earliestEmployment = employment + .Where(e => e.StartDate.HasValue) + .OrderBy(e => e.StartDate) + .FirstOrDefault(); + + if (earliestEmployment?.StartDate == null) + { + return (true, null); + } + + var employmentStart = earliestEmployment.StartDate.Value; + + // If someone claims to have started full-time work significantly before graduating, + // that's suspicious (unless it's clearly an internship/part-time role) + var monthsBeforeGraduation = (graduationDate.ToDateTime(TimeOnly.MinValue) - + employmentStart.ToDateTime(TimeOnly.MinValue)).TotalDays / 30; + + if (monthsBeforeGraduation > 24) // More than 2 years before graduation + { + var isLikelyInternship = earliestEmployment.JobTitle.Contains("intern", StringComparison.OrdinalIgnoreCase) || + earliestEmployment.JobTitle.Contains("placement", StringComparison.OrdinalIgnoreCase) || + earliestEmployment.JobTitle.Contains("trainee", StringComparison.OrdinalIgnoreCase); + + if (!isLikelyInternship) + { + return (false, $"Employment at {earliestEmployment.CompanyName} started {monthsBeforeGraduation:F0} months before claimed graduation"); + } + } + + return (true, null); + } + + private static void CheckOverlappingEducation(List results) + { + var datedResults = results + .Where(r => r.ClaimedStartDate.HasValue && r.ClaimedEndDate.HasValue) + .ToList(); + + for (var i = 0; i < datedResults.Count; i++) + { + for (var j = i + 1; j < datedResults.Count; j++) + { + var edu1 = datedResults[i]; + var edu2 = datedResults[j]; + + if (PeriodsOverlap( + edu1.ClaimedStartDate!.Value, edu1.ClaimedEndDate!.Value, + edu2.ClaimedStartDate!.Value, edu2.ClaimedEndDate!.Value)) + { + // Find the actual index in the original results list + var idx1 = results.IndexOf(edu1); + var idx2 = results.IndexOf(edu2); + + if (idx1 >= 0) + { + results[idx1] = edu1 with + { + DatePlausibilityNotes = CombineNotes( + edu1.DatePlausibilityNotes, + $"Overlaps with education at {edu2.ClaimedInstitution}") + }; + } + + if (idx2 >= 0) + { + results[idx2] = edu2 with + { + DatePlausibilityNotes = CombineNotes( + edu2.DatePlausibilityNotes, + $"Overlaps with education at {edu1.ClaimedInstitution}") + }; + } + } + } + } + } + + private static bool PeriodsOverlap(DateOnly start1, DateOnly end1, DateOnly start2, DateOnly end2) + { + return start1 < end2 && start2 < end1; + } + + private static string? CombineNotes(string? existing, string? additional) + { + if (string.IsNullOrEmpty(additional)) + return existing; + if (string.IsNullOrEmpty(existing)) + return additional; + return $"{existing}; {additional}"; + } +} diff --git a/src/TrueCV.Infrastructure/Services/FileStorageService.cs b/src/TrueCV.Infrastructure/Services/FileStorageService.cs index c2524b1..048de4b 100644 --- a/src/TrueCV.Infrastructure/Services/FileStorageService.cs +++ b/src/TrueCV.Infrastructure/Services/FileStorageService.cs @@ -68,11 +68,15 @@ public sealed class FileStorageService : IFileStorageService var blobClient = _containerClient.GetBlobClient(blobName); - var response = await blobClient.DownloadStreamingAsync(); + // Download to memory stream to ensure proper resource management + // The caller will own and dispose this stream + var memoryStream = new MemoryStream(); + await blobClient.DownloadToAsync(memoryStream); + memoryStream.Position = 0; _logger.LogDebug("Successfully downloaded blob {BlobName}", blobName); - return response.Value.Content; + return memoryStream; } public async Task DeleteAsync(string blobUrl) @@ -99,12 +103,21 @@ public sealed class FileStorageService : IFileStorageService private static string ExtractBlobNameFromUrl(string blobUrl) { - var uri = new Uri(blobUrl); + if (!Uri.TryCreate(blobUrl, UriKind.Absolute, out var uri)) + { + throw new ArgumentException($"Invalid blob URL format: '{blobUrl}'", nameof(blobUrl)); + } + var segments = uri.Segments; // The blob name is the last segment after the container name // URL format: https://account.blob.core.windows.net/container/blobname - return segments.Length > 2 ? segments[^1] : throw new ArgumentException("Invalid blob URL", nameof(blobUrl)); + if (segments.Length <= 2) + { + throw new ArgumentException($"Blob URL does not contain a valid blob name: '{blobUrl}'", nameof(blobUrl)); + } + + return segments[^1]; } private static string GetContentType(string extension) diff --git a/src/TrueCV.Infrastructure/Services/LocalFileStorageService.cs b/src/TrueCV.Infrastructure/Services/LocalFileStorageService.cs new file mode 100644 index 0000000..ed55265 --- /dev/null +++ b/src/TrueCV.Infrastructure/Services/LocalFileStorageService.cs @@ -0,0 +1,117 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TrueCV.Application.Interfaces; +using TrueCV.Infrastructure.Configuration; + +namespace TrueCV.Infrastructure.Services; + +public sealed class LocalFileStorageService : IFileStorageService +{ + private readonly string _storagePath; + private readonly ILogger _logger; + + public LocalFileStorageService( + IOptions settings, + ILogger logger) + { + _logger = logger; + _storagePath = settings.Value.StoragePath; + + if (!Directory.Exists(_storagePath)) + { + Directory.CreateDirectory(_storagePath); + _logger.LogInformation("Created local storage directory: {Path}", _storagePath); + } + } + + public async Task UploadAsync(Stream fileStream, string fileName) + { + ArgumentNullException.ThrowIfNull(fileStream); + ArgumentException.ThrowIfNullOrWhiteSpace(fileName); + + var extension = Path.GetExtension(fileName); + var uniqueFileName = $"{Guid.NewGuid()}{extension}"; + var filePath = Path.Combine(_storagePath, uniqueFileName); + + _logger.LogDebug("Uploading file {FileName} to {FilePath}", fileName, filePath); + + await using var fileStreamOut = new FileStream(filePath, FileMode.Create, FileAccess.Write); + await fileStream.CopyToAsync(fileStreamOut); + + // Return a file:// URL for local storage + var fileUrl = $"file://{filePath}"; + + _logger.LogInformation("Successfully uploaded file {FileName} to {FileUrl}", fileName, fileUrl); + + return fileUrl; + } + + public async Task DownloadAsync(string blobUrl) + { + ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl); + + var filePath = ExtractFilePathFromUrl(blobUrl); + + _logger.LogDebug("Downloading file from {FilePath}", filePath); + + if (!File.Exists(filePath)) + { + throw new FileNotFoundException($"File not found: {filePath}"); + } + + var memoryStream = new MemoryStream(); + await using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read); + await fileStream.CopyToAsync(memoryStream); + memoryStream.Position = 0; + + _logger.LogDebug("Successfully downloaded file from {FilePath}", filePath); + + return memoryStream; + } + + public Task DeleteAsync(string blobUrl) + { + ArgumentException.ThrowIfNullOrWhiteSpace(blobUrl); + + var filePath = ExtractFilePathFromUrl(blobUrl); + + _logger.LogDebug("Deleting file {FilePath}", filePath); + + if (File.Exists(filePath)) + { + File.Delete(filePath); + _logger.LogInformation("Successfully deleted file {FilePath}", filePath); + } + else + { + _logger.LogWarning("File {FilePath} did not exist when attempting to delete", filePath); + } + + return Task.CompletedTask; + } + + private string ExtractFilePathFromUrl(string fileUrl) + { + string filePath; + + if (fileUrl.StartsWith("file://", StringComparison.OrdinalIgnoreCase)) + { + filePath = fileUrl[7..]; + } + else + { + filePath = fileUrl; + } + + // Resolve to absolute path and validate it's within storage directory + var fullPath = Path.GetFullPath(filePath); + var storagePath = Path.GetFullPath(_storagePath); + + if (!fullPath.StartsWith(storagePath, StringComparison.OrdinalIgnoreCase)) + { + throw new UnauthorizedAccessException($"Access denied: path is outside storage directory"); + } + + return fullPath; + } +} diff --git a/src/TrueCV.Infrastructure/Services/UserContextService.cs b/src/TrueCV.Infrastructure/Services/UserContextService.cs new file mode 100644 index 0000000..79c90ee --- /dev/null +++ b/src/TrueCV.Infrastructure/Services/UserContextService.cs @@ -0,0 +1,28 @@ +using System.Security.Claims; +using Microsoft.AspNetCore.Components.Authorization; +using TrueCV.Application.Interfaces; + +namespace TrueCV.Infrastructure.Services; + +public sealed class UserContextService : IUserContextService +{ + private readonly AuthenticationStateProvider _authenticationStateProvider; + + public UserContextService(AuthenticationStateProvider authenticationStateProvider) + { + _authenticationStateProvider = authenticationStateProvider; + } + + public async Task GetCurrentUserIdAsync() + { + var authState = await _authenticationStateProvider.GetAuthenticationStateAsync(); + var userIdClaim = authState.User.FindFirst(ClaimTypes.NameIdentifier)?.Value; + + if (string.IsNullOrEmpty(userIdClaim) || !Guid.TryParse(userIdClaim, out var userId)) + { + return null; + } + + return userId; + } +} diff --git a/src/TrueCV.Web/Components/Pages/Account/Login.razor b/src/TrueCV.Web/Components/Pages/Account/Login.razor index 6a9a2fc..db64f44 100644 --- a/src/TrueCV.Web/Components/Pages/Account/Login.razor +++ b/src/TrueCV.Web/Components/Pages/Account/Login.razor @@ -1,7 +1,6 @@ @page "/account/login" @using TrueCV.Web.Components.Layout @layout MainLayout -@rendermode InteractiveServer @using Microsoft.AspNetCore.Identity @using TrueCV.Infrastructure.Identity @@ -26,50 +25,40 @@ @if (!string.IsNullOrEmpty(_errorMessage)) { - @code { - private LoginModel _model = new(); - private bool _isLoading; private string? _errorMessage; [SupplyParameterFromQuery] public string? ReturnUrl { get; set; } - private async Task HandleLogin() + [SupplyParameterFromQuery(Name = "error")] + public string? Error { get; set; } + + protected override void OnInitialized() { - _isLoading = true; - _errorMessage = null; - - try - { - var result = await SignInManager.PasswordSignInAsync( - _model.Email, - _model.Password, - _model.RememberMe, - lockoutOnFailure: false); - - if (result.Succeeded) - { - var returnUrl = string.IsNullOrEmpty(ReturnUrl) ? "/dashboard" : ReturnUrl; - NavigationManager.NavigateTo(returnUrl, forceLoad: true); - } - else if (result.IsLockedOut) - { - _errorMessage = "This account has been locked out. Please try again later."; - } - else if (result.IsNotAllowed) - { - _errorMessage = "This account is not allowed to sign in."; - } - else - { - _errorMessage = "Invalid email or password."; - } - } - catch (Exception ex) - { - _errorMessage = $"An error occurred: {ex.Message}"; - } - finally - { - _isLoading = false; - } - } - - private sealed class LoginModel - { - [System.ComponentModel.DataAnnotations.Required(ErrorMessage = "Email is required")] - [System.ComponentModel.DataAnnotations.EmailAddress(ErrorMessage = "Invalid email format")] - public string Email { get; set; } = string.Empty; - - [System.ComponentModel.DataAnnotations.Required(ErrorMessage = "Password is required")] - public string Password { get; set; } = string.Empty; - - public bool RememberMe { get; set; } + _errorMessage = Error; } } diff --git a/src/TrueCV.Web/Components/Pages/Account/Register.razor b/src/TrueCV.Web/Components/Pages/Account/Register.razor index ddcd054..8617ffe 100644 --- a/src/TrueCV.Web/Components/Pages/Account/Register.razor +++ b/src/TrueCV.Web/Components/Pages/Account/Register.razor @@ -48,7 +48,7 @@ -
Password must be at least 6 characters.
+
Password must be at least 12 characters with uppercase, lowercase, number, and symbol.
@@ -153,7 +153,7 @@ public string Email { get; set; } = string.Empty; [System.ComponentModel.DataAnnotations.Required(ErrorMessage = "Password is required")] - [System.ComponentModel.DataAnnotations.MinLength(6, ErrorMessage = "Password must be at least 6 characters")] + [System.ComponentModel.DataAnnotations.MinLength(12, ErrorMessage = "Password must be at least 12 characters")] public string Password { get; set; } = string.Empty; [System.ComponentModel.DataAnnotations.Required(ErrorMessage = "Please confirm your password")] diff --git a/src/TrueCV.Web/Components/Pages/Check.razor b/src/TrueCV.Web/Components/Pages/Check.razor index 3c5abcf..a947bb8 100644 --- a/src/TrueCV.Web/Components/Pages/Check.razor +++ b/src/TrueCV.Web/Components/Pages/Check.razor @@ -5,6 +5,7 @@ @inject ICVCheckService CVCheckService @inject NavigationManager NavigationManager @inject AuthenticationStateProvider AuthenticationStateProvider +@inject ILogger Logger Upload CV - TrueCV @@ -145,6 +146,10 @@ private const long MaxFileSize = 10 * 1024 * 1024; // 10MB + // Magic bytes for file type validation + private static readonly byte[] PdfMagicBytes = [0x25, 0x50, 0x44, 0x46]; // %PDF + private static readonly byte[] DocxMagicBytes = [0x50, 0x4B, 0x03, 0x04]; // PK.. (ZIP signature) + private void HandleDragEnter() { _isDragging = true; @@ -186,10 +191,15 @@ _errorMessage = null; } + private CancellationTokenSource? _progressCts; + private async Task UploadFile() { if (_selectedFile is null) return; + _progressCts = new CancellationTokenSource(); + Task? progressTask = null; + try { _isUploading = true; @@ -207,35 +217,81 @@ } // Simulate progress for better UX - var progressTask = SimulateProgress(); + progressTask = SimulateProgress(_progressCts.Token); await using var stream = _selectedFile.OpenReadStream(MaxFileSize); using var memoryStream = new MemoryStream(); await stream.CopyToAsync(memoryStream); memoryStream.Position = 0; + // Validate file content (magic bytes) + if (!await ValidateFileContentAsync(memoryStream, _selectedFile.Name)) + { + _errorMessage = "Invalid file content. The file appears to be corrupted or not a valid PDF/DOCX."; + return; + } + var checkId = await CVCheckService.CreateCheckAsync(userId, memoryStream, _selectedFile.Name); _uploadProgress = 100; + await InvokeAsync(StateHasChanged); await Task.Delay(500); // Brief pause to show completion NavigationManager.NavigateTo($"/report/{checkId}"); } catch (Exception ex) { - _errorMessage = $"An error occurred while uploading: {ex.Message}"; + Logger.LogError(ex, "Error uploading CV"); + _errorMessage = "An error occurred while uploading. Please try again."; + } + finally + { _isUploading = false; + _progressCts?.Cancel(); + if (progressTask is not null) + { + try { await progressTask; } catch (OperationCanceledException) { } + } + _progressCts?.Dispose(); + _progressCts = null; } } - private async Task SimulateProgress() + private async Task SimulateProgress(CancellationToken cancellationToken) { - while (_uploadProgress < 90 && _isUploading) + try { - await Task.Delay(200); - _uploadProgress += 10; - StateHasChanged(); + while (_uploadProgress < 90 && _isUploading && !cancellationToken.IsCancellationRequested) + { + await Task.Delay(200, cancellationToken); + _uploadProgress += 10; + await InvokeAsync(StateHasChanged); + } } + catch (OperationCanceledException) + { + // Expected when upload completes + } + } + + private async Task ValidateFileContentAsync(MemoryStream stream, string fileName) + { + var extension = Path.GetExtension(fileName).ToLowerInvariant(); + var header = new byte[4]; + + stream.Position = 0; + var bytesRead = await stream.ReadAsync(header.AsMemory(0, 4)); + stream.Position = 0; + + if (bytesRead < 4) + return false; + + return extension switch + { + ".pdf" => header.AsSpan().StartsWith(PdfMagicBytes), + ".docx" => header.AsSpan().StartsWith(DocxMagicBytes), + _ => false + }; } private bool IsValidFileType(string fileName) diff --git a/src/TrueCV.Web/Components/Pages/Dashboard.razor b/src/TrueCV.Web/Components/Pages/Dashboard.razor index 571c84e..0568085 100644 --- a/src/TrueCV.Web/Components/Pages/Dashboard.razor +++ b/src/TrueCV.Web/Components/Pages/Dashboard.razor @@ -5,6 +5,7 @@ @inject ICVCheckService CVCheckService @inject NavigationManager NavigationManager @inject AuthenticationStateProvider AuthenticationStateProvider +@inject ILogger Logger Dashboard - TrueCV @@ -255,7 +256,8 @@ } catch (Exception ex) { - _errorMessage = $"An error occurred while loading checks: {ex.Message}"; + Logger.LogError(ex, "Error loading CV checks"); + _errorMessage = "An error occurred while loading checks. Please try again."; } finally { diff --git a/src/TrueCV.Web/Components/Pages/Report.razor b/src/TrueCV.Web/Components/Pages/Report.razor index 0d34a3c..019c3ba 100644 --- a/src/TrueCV.Web/Components/Pages/Report.razor +++ b/src/TrueCV.Web/Components/Pages/Report.razor @@ -5,6 +5,7 @@ @inject ICVCheckService CVCheckService @inject NavigationManager NavigationManager @inject AuthenticationStateProvider AuthenticationStateProvider +@inject ILogger Logger Verification Report - TrueCV @@ -509,7 +510,8 @@ } catch (Exception ex) { - _errorMessage = $"An error occurred: {ex.Message}"; + Logger.LogError(ex, "Error loading report data"); + _errorMessage = "An error occurred while loading the report. Please try again."; } finally { diff --git a/src/TrueCV.Web/Components/Shared/CVUploader.razor b/src/TrueCV.Web/Components/Shared/CVUploader.razor deleted file mode 100644 index 78abedf..0000000 --- a/src/TrueCV.Web/Components/Shared/CVUploader.razor +++ /dev/null @@ -1,190 +0,0 @@ -@using Microsoft.AspNetCore.Components.Forms - -
- - - - - - @if (!string.IsNullOrEmpty(_errorMessage)) - { - - } -
- - - -@code { - private const long MaxFileSizeBytes = 10 * 1024 * 1024; // 10MB - private static readonly string[] AllowedExtensions = [".pdf", ".docx"]; - - private bool _isDragOver; - private string? _selectedFileName; - private string? _errorMessage; - - [Parameter] - public EventCallback OnFileSelected { get; set; } - - private void HandleDragEnter() - { - _isDragOver = true; - } - - private void HandleDragLeave() - { - _isDragOver = false; - } - - private void HandleDrop() - { - _isDragOver = false; - } - - private async Task HandleFileSelected(InputFileChangeEventArgs e) - { - _errorMessage = null; - _selectedFileName = null; - - var file = e.File; - if (file is null) - { - return; - } - - var extension = Path.GetExtension(file.Name).ToLowerInvariant(); - if (!AllowedExtensions.Contains(extension)) - { - _errorMessage = "Invalid file type. Please upload a .pdf or .docx file."; - return; - } - - if (file.Size > MaxFileSizeBytes) - { - _errorMessage = "File size exceeds 10MB limit. Please upload a smaller file."; - return; - } - - _selectedFileName = file.Name; - await OnFileSelected.InvokeAsync(file); - } -} diff --git a/src/TrueCV.Web/Components/_Imports.razor b/src/TrueCV.Web/Components/_Imports.razor index 77d0176..fe16548 100644 --- a/src/TrueCV.Web/Components/_Imports.razor +++ b/src/TrueCV.Web/Components/_Imports.razor @@ -8,6 +8,7 @@ @using Microsoft.AspNetCore.Components.Web @using static Microsoft.AspNetCore.Components.Web.RenderMode @using Microsoft.AspNetCore.Components.Web.Virtualization +@using Microsoft.Extensions.Logging @using Microsoft.JSInterop @using TrueCV.Web @using TrueCV.Web.Components diff --git a/src/TrueCV.Web/Program.cs b/src/TrueCV.Web/Program.cs index 2dfe1bd..4a4a906 100644 --- a/src/TrueCV.Web/Program.cs +++ b/src/TrueCV.Web/Program.cs @@ -32,15 +32,19 @@ try // Add Infrastructure services (DbContext, Hangfire, HttpClients, Services) builder.Services.AddInfrastructure(builder.Configuration); - // Add Identity + // Add Identity with secure password requirements builder.Services.AddIdentity>(options => { - options.Password.RequireDigit = false; - options.Password.RequireLowercase = false; - options.Password.RequireUppercase = false; - options.Password.RequireNonAlphanumeric = false; - options.Password.RequiredLength = 6; + options.Password.RequireDigit = true; + options.Password.RequireLowercase = true; + options.Password.RequireUppercase = true; + options.Password.RequireNonAlphanumeric = true; + options.Password.RequiredLength = 12; + options.Password.RequiredUniqueChars = 4; options.SignIn.RequireConfirmedAccount = false; + options.Lockout.DefaultLockoutTimeSpan = TimeSpan.FromMinutes(5); + options.Lockout.MaxFailedAccessAttempts = 5; + options.Lockout.AllowedForNewUsers = true; }) .AddEntityFrameworkStores() .AddDefaultTokenProviders(); @@ -62,6 +66,26 @@ try var app = builder.Build(); + // Seed default admin user + using (var scope = app.Services.CreateScope()) + { + var userManager = scope.ServiceProvider.GetRequiredService>(); + var defaultEmail = "admin@truecv.local"; + var defaultPassword = "TrueCV_Admin123!"; + + if (await userManager.FindByEmailAsync(defaultEmail) == null) + { + var adminUser = new ApplicationUser + { + UserName = defaultEmail, + Email = defaultEmail, + EmailConfirmed = true + }; + await userManager.CreateAsync(adminUser, defaultPassword); + Log.Information("Created default admin user: {Email}", defaultEmail); + } + } + // Configure the HTTP request pipeline. if (!app.Environment.IsDevelopment()) { @@ -98,6 +122,44 @@ try }); } + // Login endpoint + app.MapPost("/account/perform-login", async ( + HttpContext context, + SignInManager signInManager) => + { + var form = await context.Request.ReadFormAsync(); + var email = form["email"].ToString(); + var password = form["password"].ToString(); + var rememberMe = form["rememberMe"].ToString() == "true"; + var returnUrl = form["returnUrl"].ToString(); + + Log.Information("Login attempt for {Email}", email); + + // Validate returnUrl is local to prevent open redirect attacks + if (string.IsNullOrEmpty(returnUrl) || !Uri.IsWellFormedUriString(returnUrl, UriKind.Relative) || returnUrl.StartsWith("//")) + { + returnUrl = "/dashboard"; + } + + var result = await signInManager.PasswordSignInAsync(email, password, rememberMe, lockoutOnFailure: true); + + if (result.Succeeded) + { + Log.Information("User {Email} logged in successfully", email); + return Results.LocalRedirect(returnUrl); + } + else if (result.IsLockedOut) + { + Log.Warning("User {Email} account is locked out", email); + return Results.Redirect("/account/login?error=Account+locked.+Try+again+later."); + } + else + { + Log.Warning("Failed login attempt for {Email}", email); + return Results.Redirect("/account/login?error=Invalid+email+or+password."); + } + }); + // Logout endpoint app.MapPost("/account/logout", async (SignInManager signInManager) => { diff --git a/tests/TrueCV.Tests/Jobs/ProcessCVCheckJobTests.cs b/tests/TrueCV.Tests/Jobs/ProcessCVCheckJobTests.cs index 5f9150e..a1f53cc 100644 --- a/tests/TrueCV.Tests/Jobs/ProcessCVCheckJobTests.cs +++ b/tests/TrueCV.Tests/Jobs/ProcessCVCheckJobTests.cs @@ -18,6 +18,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable private readonly Mock _fileStorageServiceMock; private readonly Mock _cvParserServiceMock; private readonly Mock _companyVerifierServiceMock; + private readonly Mock _educationVerifierServiceMock; private readonly Mock _timelineAnalyserServiceMock; private readonly Mock> _loggerMock; private readonly ProcessCVCheckJob _sut; @@ -37,6 +38,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable _fileStorageServiceMock = new Mock(); _cvParserServiceMock = new Mock(); _companyVerifierServiceMock = new Mock(); + _educationVerifierServiceMock = new Mock(); _timelineAnalyserServiceMock = new Mock(); _loggerMock = new Mock>(); @@ -45,6 +47,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable _fileStorageServiceMock.Object, _cvParserServiceMock.Object, _companyVerifierServiceMock.Object, + _educationVerifierServiceMock.Object, _timelineAnalyserServiceMock.Object, _loggerMock.Object); } @@ -159,7 +162,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable // Assert _cvParserServiceMock.Verify( - x => x.ParseAsync(It.IsAny(), "resume.pdf"), + x => x.ParseAsync(It.IsAny(), "resume.pdf", It.IsAny()), Times.Once); _dbContext.ChangeTracker.Clear(); @@ -843,7 +846,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable x => x.DownloadAsync(It.IsAny()), Times.Never); _cvParserServiceMock.Verify( - x => x.ParseAsync(It.IsAny(), It.IsAny()), + x => x.ParseAsync(It.IsAny(), It.IsAny(), It.IsAny()), Times.Never); } @@ -1007,6 +1010,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable private void SetupDefaultMocks( CVData? cvData = null, List? verificationResults = null, + List? educationResults = null, TimelineAnalysisResult? timelineResult = null) { cvData ??= CreateTestCVData(); @@ -1017,7 +1021,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable .ReturnsAsync(new MemoryStream()); _cvParserServiceMock - .Setup(x => x.ParseAsync(It.IsAny(), It.IsAny())) + .Setup(x => x.ParseAsync(It.IsAny(), It.IsAny(), It.IsAny())) .ReturnsAsync(cvData); if (verificationResults != null) @@ -1040,6 +1044,12 @@ public sealed class ProcessCVCheckJobTests : IDisposable .ReturnsAsync(CreateDefaultVerificationResult()); } + _educationVerifierServiceMock + .Setup(x => x.VerifyAll( + It.IsAny>(), + It.IsAny?>())) + .Returns(educationResults ?? []); + _timelineAnalyserServiceMock .Setup(x => x.Analyse(It.IsAny>())) .Returns(timelineResult); diff --git a/tests/TrueCV.Tests/Services/EducationVerifierServiceTests.cs b/tests/TrueCV.Tests/Services/EducationVerifierServiceTests.cs new file mode 100644 index 0000000..56f6bbf --- /dev/null +++ b/tests/TrueCV.Tests/Services/EducationVerifierServiceTests.cs @@ -0,0 +1,418 @@ +using FluentAssertions; +using TrueCV.Application.Models; +using TrueCV.Infrastructure.Services; + +namespace TrueCV.Tests.Services; + +public sealed class EducationVerifierServiceTests +{ + private readonly EducationVerifierService _sut = new(); + + #region Diploma Mill Detection + + [Theory] + [InlineData("Belford University")] + [InlineData("Ashwood University")] + [InlineData("Rochville University")] + [InlineData("St Regis University")] + public void Verify_DiplomaMillInstitution_ReturnsDiplomaMill(string institution) + { + // Arrange + var education = new EducationEntry + { + Institution = institution, + Qualification = "PhD", + Subject = "Business", + StartDate = new DateOnly(2020, 1, 1), + EndDate = new DateOnly(2020, 6, 1) + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.Status.Should().Be("DiplomaMill"); + result.IsDiplomaMill.Should().BeTrue(); + result.IsSuspicious.Should().BeTrue(); + result.IsVerified.Should().BeFalse(); + } + + [Fact] + public void Verify_DiplomaMillInstitution_IncludesVerificationNotes() + { + // Arrange + var education = new EducationEntry + { + Institution = "Belford University", + Qualification = "MBA" + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.VerificationNotes.Should().Contain("diploma mill blacklist"); + } + + #endregion + + #region Suspicious Pattern Detection + + [Theory] + [InlineData("Global Online University")] + [InlineData("Premier University of Excellence")] + [InlineData("Executive Virtual University")] + public void Verify_SuspiciousPatternInstitution_ReturnsSuspicious(string institution) + { + // Arrange + var education = new EducationEntry + { + Institution = institution + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.Status.Should().Be("Suspicious"); + result.IsSuspicious.Should().BeTrue(); + result.IsDiplomaMill.Should().BeFalse(); + result.IsVerified.Should().BeFalse(); + } + + #endregion + + #region UK Institution Recognition + + [Theory] + [InlineData("University of Cambridge", "University of Cambridge")] + [InlineData("Cambridge", "University of Cambridge")] + [InlineData("University of Oxford", "University of Oxford")] + [InlineData("Oxford", "University of Oxford")] + [InlineData("Imperial College London", "Imperial College London")] + [InlineData("UCL", "UCL")] // UCL is directly in the recognised list + [InlineData("LSE", "LSE")] // LSE is directly in the recognised list + public void Verify_RecognisedUKInstitution_ReturnsRecognised(string input, string expectedMatch) + { + // Arrange + var education = new EducationEntry + { + Institution = input, + Qualification = "BSc", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.Status.Should().Be("Recognised"); + result.IsVerified.Should().BeTrue(); + result.IsDiplomaMill.Should().BeFalse(); + result.IsSuspicious.Should().BeFalse(); + result.MatchedInstitution.Should().Be(expectedMatch); + } + + [Fact] + public void Verify_RecognisedInstitution_IncludesVerificationNotes() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Manchester" + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.VerificationNotes.Should().Contain("Verified UK higher education institution"); + } + + [Fact] + public void Verify_RecognisedInstitutionVariation_NotesMatchedName() + { + // Arrange + var education = new EducationEntry + { + Institution = "Cambridge" + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.VerificationNotes.Should().Contain("Matched to official name"); + result.MatchedInstitution.Should().Be("University of Cambridge"); + } + + #endregion + + #region Unknown Institutions + + [Fact] + public void Verify_UnknownInstitution_ReturnsUnknown() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Ljubljana", + Qualification = "BSc" + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.Status.Should().Be("Unknown"); + result.IsVerified.Should().BeFalse(); + result.IsDiplomaMill.Should().BeFalse(); + result.IsSuspicious.Should().BeFalse(); + result.VerificationNotes.Should().Contain("international institution"); + } + + #endregion + + #region Date Plausibility + + [Fact] + public void Verify_PlausibleDates_ReturnsPlausible() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.DatesArePlausible.Should().BeTrue(); + result.DatePlausibilityNotes.Should().BeNull(); + } + + [Fact] + public void Verify_TooShortCourseDuration_ReturnsImplausible() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol", + StartDate = new DateOnly(2020, 1, 1), + EndDate = new DateOnly(2020, 6, 1) // 6 months + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.DatesArePlausible.Should().BeFalse(); + result.DatePlausibilityNotes.Should().Contain("unusually short"); + } + + [Fact] + public void Verify_TooLongCourseDuration_ReturnsImplausible() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol", + StartDate = new DateOnly(2010, 1, 1), + EndDate = new DateOnly(2020, 1, 1) // 10 years + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.DatesArePlausible.Should().BeFalse(); + result.DatePlausibilityNotes.Should().Contain("unusually long"); + } + + [Fact] + public void Verify_EndDateBeforeStartDate_ReturnsImplausible() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol", + StartDate = new DateOnly(2021, 1, 1), + EndDate = new DateOnly(2020, 1, 1) + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.DatesArePlausible.Should().BeFalse(); + result.DatePlausibilityNotes.Should().Contain("before or equal to start date"); + } + + [Fact] + public void Verify_NoDates_AssumesPlausible() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol" + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.DatesArePlausible.Should().BeTrue(); + } + + #endregion + + #region VerifyAll + + [Fact] + public void VerifyAll_MultipleEducations_ReturnsResultsForEach() + { + // Arrange + var educations = new List + { + new() { Institution = "University of Cambridge" }, + new() { Institution = "Belford University" }, + new() { Institution = "Unknown Foreign University" } + }; + + // Act + var results = _sut.VerifyAll(educations); + + // Assert + results.Should().HaveCount(3); + results[0].Status.Should().Be("Recognised"); + results[1].Status.Should().Be("DiplomaMill"); + results[2].Status.Should().Be("Unknown"); + } + + [Fact] + public void VerifyAll_OverlappingEducation_NotesOverlap() + { + // Arrange + var educations = new List + { + new() + { + Institution = "University of Bristol", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + }, + new() + { + Institution = "University of Bath", + StartDate = new DateOnly(2020, 9, 1), + EndDate = new DateOnly(2023, 6, 1) + } + }; + + // Act + var results = _sut.VerifyAll(educations); + + // Assert + results[0].DatePlausibilityNotes.Should().Contain("Overlaps with"); + results[1].DatePlausibilityNotes.Should().Contain("Overlaps with"); + } + + [Fact] + public void VerifyAll_EmploymentBeforeGraduation_ChecksTimeline() + { + // Arrange + var educations = new List + { + new() + { + Institution = "University of Bristol", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + } + }; + + var employment = new List + { + new() + { + CompanyName = "Tech Corp", + JobTitle = "Senior Developer", + StartDate = new DateOnly(2018, 1, 1) // Started before education started + } + }; + + // Act + var results = _sut.VerifyAll(educations, employment); + + // Assert + results[0].DatesArePlausible.Should().BeFalse(); + results[0].DatePlausibilityNotes.Should().Contain("months before claimed graduation"); + } + + [Fact] + public void VerifyAll_InternshipBeforeGraduation_AllowsTimeline() + { + // Arrange + var educations = new List + { + new() + { + Institution = "University of Bristol", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + } + }; + + var employment = new List + { + new() + { + CompanyName = "Tech Corp", + JobTitle = "Software Intern", + StartDate = new DateOnly(2019, 6, 1) + } + }; + + // Act + var results = _sut.VerifyAll(educations, employment); + + // Assert + // Should be plausible because it's an internship + results[0].DatesArePlausible.Should().BeTrue(); + } + + #endregion + + #region Data Preservation + + [Fact] + public void Verify_PreservesAllClaimedData() + { + // Arrange + var education = new EducationEntry + { + Institution = "University of Bristol", + Qualification = "BSc Computer Science", + Subject = "Computer Science", + Grade = "First Class Honours", + StartDate = new DateOnly(2018, 9, 1), + EndDate = new DateOnly(2021, 6, 1) + }; + + // Act + var result = _sut.Verify(education); + + // Assert + result.ClaimedInstitution.Should().Be("University of Bristol"); + result.ClaimedQualification.Should().Be("BSc Computer Science"); + result.ClaimedSubject.Should().Be("Computer Science"); + result.ClaimedStartDate.Should().Be(new DateOnly(2018, 9, 1)); + result.ClaimedEndDate.Should().Be(new DateOnly(2021, 6, 1)); + } + + #endregion +}