From 2575e2be95735e589099897b36905a2e49d9b014 Mon Sep 17 00:00:00 2001 From: Peter Foster Date: Sun, 25 Jan 2026 04:30:11 +0000 Subject: [PATCH] feat: Add text analysis checks for CV verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement four new CV verification checks without external APIs: 1. Buzzword detection - flags excessive clichés (50+ patterns) 2. Vague achievement detection - identifies weak language vs quantified results 3. Skills/job title alignment - checks skills match claimed roles (25+ role mappings) 4. Unrealistic metrics detection - flags implausible claims (>200% growth, etc.) New files: - ITextAnalysisService interface - TextAnalysisResult models - TextAnalysisService implementation (~400 lines) Integration: - Added "Analysing Content" processing stage - Flags appear under Plausibility category - TextAnalysis section added to veracity report 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../Interfaces/ITextAnalysisService.cs | 8 + .../Models/TextAnalysisResult.cs | 66 ++ .../Models/VeracityReport.cs | 1 + .../DependencyInjection.cs | 1 + .../Jobs/ProcessCVCheckJob.cs | 46 +- .../Services/TextAnalysisService.cs | 593 ++++++++++++++++++ .../Jobs/ProcessCVCheckJobTests.cs | 7 + 7 files changed, 721 insertions(+), 1 deletion(-) create mode 100644 src/RealCV.Application/Interfaces/ITextAnalysisService.cs create mode 100644 src/RealCV.Application/Models/TextAnalysisResult.cs create mode 100644 src/RealCV.Infrastructure/Services/TextAnalysisService.cs diff --git a/src/RealCV.Application/Interfaces/ITextAnalysisService.cs b/src/RealCV.Application/Interfaces/ITextAnalysisService.cs new file mode 100644 index 0000000..276ba35 --- /dev/null +++ b/src/RealCV.Application/Interfaces/ITextAnalysisService.cs @@ -0,0 +1,8 @@ +using RealCV.Application.Models; + +namespace RealCV.Application.Interfaces; + +public interface ITextAnalysisService +{ + TextAnalysisResult Analyse(CVData cvData); +} diff --git a/src/RealCV.Application/Models/TextAnalysisResult.cs b/src/RealCV.Application/Models/TextAnalysisResult.cs new file mode 100644 index 0000000..bb4e460 --- /dev/null +++ b/src/RealCV.Application/Models/TextAnalysisResult.cs @@ -0,0 +1,66 @@ +namespace RealCV.Application.Models; + +public sealed record TextAnalysisResult +{ + public BuzzwordAnalysis BuzzwordAnalysis { get; init; } = new(); + public AchievementAnalysis AchievementAnalysis { get; init; } = new(); + public SkillsAlignmentAnalysis SkillsAlignment { get; init; } = new(); + public MetricsAnalysis MetricsAnalysis { get; init; } = new(); + public List Flags { get; init; } = []; +} + +public sealed record BuzzwordAnalysis +{ + public int TotalBuzzwords { get; init; } + public List BuzzwordsFound { get; init; } = []; + public double BuzzwordDensity { get; init; } +} + +public sealed record AchievementAnalysis +{ + public int TotalStatements { get; init; } + public int VagueStatements { get; init; } + public int QuantifiedStatements { get; init; } + public int StrongActionVerbStatements { get; init; } + public List VagueExamples { get; init; } = []; +} + +public sealed record SkillsAlignmentAnalysis +{ + public int TotalRolesChecked { get; init; } + public int RolesWithMatchingSkills { get; init; } + public List Mismatches { get; init; } = []; +} + +public sealed record SkillMismatch +{ + public required string JobTitle { get; init; } + public required string CompanyName { get; init; } + public required List ExpectedSkills { get; init; } + public required List MatchingSkills { get; init; } +} + +public sealed record MetricsAnalysis +{ + public int TotalMetricsClaimed { get; init; } + public int PlausibleMetrics { get; init; } + public int SuspiciousMetrics { get; init; } + public int RoundNumberCount { get; init; } + public double RoundNumberRatio { get; init; } + public List SuspiciousMetricsList { get; init; } = []; +} + +public sealed record SuspiciousMetric +{ + public required string ClaimText { get; init; } + public required double Value { get; init; } + public required string Reason { get; init; } +} + +public sealed record TextAnalysisFlag +{ + public required string Type { get; init; } + public required string Severity { get; init; } + public required string Message { get; init; } + public int ScoreImpact { get; init; } +} diff --git a/src/RealCV.Application/Models/VeracityReport.cs b/src/RealCV.Application/Models/VeracityReport.cs index f1f329c..97ff3cb 100644 --- a/src/RealCV.Application/Models/VeracityReport.cs +++ b/src/RealCV.Application/Models/VeracityReport.cs @@ -8,6 +8,7 @@ public sealed record VeracityReport public List EmploymentVerifications { get; init; } = []; public List EducationVerifications { get; init; } = []; public required TimelineAnalysisResult TimelineAnalysis { get; init; } + public TextAnalysisResult? TextAnalysis { get; init; } public List Flags { get; init; } = []; public required DateTime GeneratedAt { get; init; } } diff --git a/src/RealCV.Infrastructure/DependencyInjection.cs b/src/RealCV.Infrastructure/DependencyInjection.cs index 05c4b53..afdd201 100644 --- a/src/RealCV.Infrastructure/DependencyInjection.cs +++ b/src/RealCV.Infrastructure/DependencyInjection.cs @@ -114,6 +114,7 @@ public static class DependencyInjection services.AddScoped(); services.AddScoped(); services.AddScoped(); + services.AddScoped(); services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/src/RealCV.Infrastructure/Jobs/ProcessCVCheckJob.cs b/src/RealCV.Infrastructure/Jobs/ProcessCVCheckJob.cs index 7070288..01a439d 100644 --- a/src/RealCV.Infrastructure/Jobs/ProcessCVCheckJob.cs +++ b/src/RealCV.Infrastructure/Jobs/ProcessCVCheckJob.cs @@ -18,6 +18,7 @@ public sealed class ProcessCVCheckJob private readonly ICompanyVerifierService _companyVerifierService; private readonly IEducationVerifierService _educationVerifierService; private readonly ITimelineAnalyserService _timelineAnalyserService; + private readonly ITextAnalysisService _textAnalysisService; private readonly IAuditService _auditService; private readonly ILogger _logger; @@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJob ICompanyVerifierService companyVerifierService, IEducationVerifierService educationVerifierService, ITimelineAnalyserService timelineAnalyserService, + ITextAnalysisService textAnalysisService, IAuditService auditService, ILogger logger) { @@ -50,6 +52,7 @@ public sealed class ProcessCVCheckJob _companyVerifierService = companyVerifierService; _educationVerifierService = educationVerifierService; _timelineAnalyserService = timelineAnalyserService; + _textAnalysisService = textAnalysisService; _auditService = auditService; _logger = logger; } @@ -198,10 +201,23 @@ public sealed class ProcessCVCheckJob "Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps", cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count); + // Step 7b: Analyse text for buzzwords, vague achievements, skills alignment, and metrics + cvCheck.ProcessingStage = "Analysing Content"; + await _dbContext.SaveChangesAsync(cancellationToken); + + var textAnalysis = _textAnalysisService.Analyse(cvData); + + _logger.LogDebug( + "Text analysis for check {CheckId}: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches", + cvCheckId, + textAnalysis.BuzzwordAnalysis.TotalBuzzwords, + textAnalysis.AchievementAnalysis.VagueStatements, + textAnalysis.SkillsAlignment.Mismatches.Count); + // Step 8: Calculate veracity score cvCheck.ProcessingStage = "Calculating Score"; await _dbContext.SaveChangesAsync(cancellationToken); - var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData); + var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, textAnalysis, cvData); _logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score); @@ -246,6 +262,7 @@ public sealed class ProcessCVCheckJob EmploymentVerifications = verificationResults, EducationVerifications = educationResults, TimelineAnalysis = timelineAnalysis, + TextAnalysis = textAnalysis, Flags = flags, GeneratedAt = DateTime.UtcNow }; @@ -290,6 +307,7 @@ public sealed class ProcessCVCheckJob List verifications, List educationResults, TimelineAnalysisResult timeline, + TextAnalysisResult textAnalysis, CVData cvData) { var score = BaseScore; @@ -484,6 +502,32 @@ public sealed class ProcessCVCheckJob }); } + // Process text analysis flags (buzzwords, vague achievements, skills alignment, metrics) + foreach (var textFlag in textAnalysis.Flags) + { + score += textFlag.ScoreImpact; // ScoreImpact is already negative + + flags.Add(new FlagResult + { + Category = FlagCategory.Plausibility.ToString(), + Severity = textFlag.Severity, + Title = textFlag.Type switch + { + "ExcessiveBuzzwords" => "Excessive Buzzwords", + "HighBuzzwordCount" => "High Buzzword Count", + "VagueAchievements" => "Vague Achievements", + "LackOfQuantification" => "Lack of Quantification", + "SkillsJobMismatch" => "Skills/Job Mismatch", + "UnrealisticMetrics" => "Unrealistic Metrics", + "UnrealisticMetric" => "Unrealistic Metric", + "SuspiciouslyRoundNumbers" => "Suspiciously Round Numbers", + _ => textFlag.Type + }, + Description = textFlag.Message, + ScoreImpact = textFlag.ScoreImpact + }); + } + // Deduplicate flags based on Title + Description var uniqueFlags = flags .GroupBy(f => (f.Title, f.Description)) diff --git a/src/RealCV.Infrastructure/Services/TextAnalysisService.cs b/src/RealCV.Infrastructure/Services/TextAnalysisService.cs new file mode 100644 index 0000000..e3c9c18 --- /dev/null +++ b/src/RealCV.Infrastructure/Services/TextAnalysisService.cs @@ -0,0 +1,593 @@ +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using RealCV.Application.Interfaces; +using RealCV.Application.Models; + +namespace RealCV.Infrastructure.Services; + +public sealed partial class TextAnalysisService : ITextAnalysisService +{ + private readonly ILogger _logger; + + public TextAnalysisService(ILogger logger) + { + _logger = logger; + } + + public TextAnalysisResult Analyse(CVData cvData) + { + _logger.LogDebug("Starting text analysis for CV: {Name}", cvData.FullName); + + var flags = new List(); + + // Run all analyses + var buzzwordAnalysis = AnalyseBuzzwords(cvData, flags); + var achievementAnalysis = AnalyseAchievements(cvData, flags); + var skillsAlignment = AnalyseSkillsAlignment(cvData, flags); + var metricsAnalysis = AnalyseMetrics(cvData, flags); + + _logger.LogDebug( + "Text analysis complete: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches, {SuspiciousCount} suspicious metrics", + buzzwordAnalysis.TotalBuzzwords, + achievementAnalysis.VagueStatements, + skillsAlignment.Mismatches.Count, + metricsAnalysis.SuspiciousMetrics); + + return new TextAnalysisResult + { + BuzzwordAnalysis = buzzwordAnalysis, + AchievementAnalysis = achievementAnalysis, + SkillsAlignment = skillsAlignment, + MetricsAnalysis = metricsAnalysis, + Flags = flags + }; + } + + #region Buzzword Detection + + private static readonly HashSet Buzzwords = new(StringComparer.OrdinalIgnoreCase) + { + // Overused personality descriptors + "results-driven", "detail-oriented", "team player", "self-starter", + "go-getter", "proactive", "dynamic", "passionate", "motivated", + "hardworking", "dedicated", "enthusiastic", "driven", + + // Corporate jargon + "synergy", "leverage", "paradigm", "holistic", "innovative", + "disruptive", "scalable", "agile", "optimization", "strategic", + "streamline", "spearhead", "champion", "facilitate", + + // Vague superlatives + "best-in-class", "world-class", "cutting-edge", "state-of-the-art", + "next-generation", "game-changer", "thought leader", + + // Empty phrases + "think outside the box", "hit the ground running", "move the needle", + "low-hanging fruit", "value-add", "bandwidth", "circle back", + "deep dive", "pivot", "ecosystem" + }; + + private static readonly HashSet BuzzwordPhrases = new(StringComparer.OrdinalIgnoreCase) + { + "results-driven professional", + "highly motivated individual", + "proven track record", + "strong work ethic", + "excellent interpersonal skills", + "ability to work independently", + "thrive under pressure", + "fast-paced environment", + "excellent communication skills", + "strategic thinker", + "problem solver", + "out of the box", + "above and beyond", + "value proposition" + }; + + private static BuzzwordAnalysis AnalyseBuzzwords(CVData cvData, List flags) + { + var allText = GetAllDescriptionText(cvData); + var textLower = allText.ToLower(); + var wordCount = allText.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length; + + var found = new List(); + + // Check for phrases first + foreach (var phrase in BuzzwordPhrases) + { + if (textLower.Contains(phrase.ToLower())) + { + found.Add(phrase); + } + } + + // Check individual buzzwords (avoiding duplicates from phrases) + foreach (var buzzword in Buzzwords) + { + if (textLower.Contains(buzzword.ToLower()) && + !found.Any(f => f.Contains(buzzword, StringComparison.OrdinalIgnoreCase))) + { + found.Add(buzzword); + } + } + + var density = wordCount > 0 ? found.Count / (wordCount / 100.0) : 0; + + // Generate flags based on severity + if (found.Count >= 10) + { + flags.Add(new TextAnalysisFlag + { + Type = "ExcessiveBuzzwords", + Severity = "Warning", + Message = $"CV contains {found.Count} buzzwords/clichés - may indicate template or AI-generated content. Examples: {string.Join(", ", found.Take(5))}", + ScoreImpact = -10 + }); + } + else if (found.Count >= 6) + { + flags.Add(new TextAnalysisFlag + { + Type = "HighBuzzwordCount", + Severity = "Info", + Message = $"CV contains {found.Count} common buzzwords: {string.Join(", ", found.Take(4))}", + ScoreImpact = -5 + }); + } + + return new BuzzwordAnalysis + { + TotalBuzzwords = found.Count, + BuzzwordsFound = found, + BuzzwordDensity = density + }; + } + + #endregion + + #region Vague Achievement Detection + + private static readonly string[] VaguePatterns = + [ + "responsible for", + "worked on", + "helped with", + "assisted in", + "involved in", + "participated in", + "contributed to", + "various tasks", + "many projects", + "multiple initiatives", + "day-to-day", + "duties included", + "tasked with" + ]; + + private static readonly string[] StrongActionVerbs = + [ + "achieved", "increased", "reduced", "decreased", "improved", + "generated", "saved", "developed", "created", "launched", + "implemented", "negotiated", "secured", "designed", "built", + "led", "managed", "delivered", "transformed", "accelerated", + "streamlined", "consolidated", "eliminated", "maximized", "minimized" + ]; + + private static AchievementAnalysis AnalyseAchievements(CVData cvData, List flags) + { + var totalStatements = 0; + var vagueStatements = 0; + var quantifiedStatements = 0; + var strongVerbStatements = 0; + var vagueExamples = new List(); + + foreach (var job in cvData.Employment) + { + if (string.IsNullOrWhiteSpace(job.Description)) continue; + + // Split into bullet points or sentences + var statements = job.Description + .Split(['\n', '•', '●', '■', '▪', '*', '-'], StringSplitOptions.RemoveEmptyEntries) + .Select(s => s.Trim()) + .Where(s => s.Length > 10) + .ToList(); + + foreach (var statement in statements) + { + totalStatements++; + var statementLower = statement.ToLower(); + + // Check for quantification (numbers, percentages, currency) + if (HasQuantification().IsMatch(statement)) + { + quantifiedStatements++; + } + + // Check for strong action verbs at the start + if (StrongActionVerbs.Any(v => statementLower.StartsWith(v))) + { + strongVerbStatements++; + } + + // Check for vague patterns + if (VaguePatterns.Any(p => statementLower.Contains(p))) + { + vagueStatements++; + if (vagueExamples.Count < 3) + { + var truncated = statement.Length > 60 ? statement[..57] + "..." : statement; + vagueExamples.Add(truncated); + } + } + } + } + + // Generate flags + if (totalStatements > 0) + { + var vagueRatio = (double)vagueStatements / totalStatements; + var quantifiedRatio = (double)quantifiedStatements / totalStatements; + + if (vagueRatio > 0.5 && totalStatements >= 5) + { + flags.Add(new TextAnalysisFlag + { + Type = "VagueAchievements", + Severity = "Warning", + Message = $"{vagueStatements} of {totalStatements} statements use vague language (e.g., 'responsible for', 'helped with'). Consider: \"{vagueExamples.FirstOrDefault()}\"", + ScoreImpact = -8 + }); + } + + if (quantifiedRatio < 0.2 && totalStatements >= 5) + { + flags.Add(new TextAnalysisFlag + { + Type = "LackOfQuantification", + Severity = "Info", + Message = $"Only {quantifiedStatements} of {totalStatements} achievement statements include measurable results", + ScoreImpact = 0 + }); + } + } + + return new AchievementAnalysis + { + TotalStatements = totalStatements, + VagueStatements = vagueStatements, + QuantifiedStatements = quantifiedStatements, + StrongActionVerbStatements = strongVerbStatements, + VagueExamples = vagueExamples + }; + } + + [GeneratedRegex(@"\d+%|\$[\d,]+|£[\d,]+|\d+\s*(million|thousand|k\b|m\b)|[0-9]+x\b", RegexOptions.IgnoreCase)] + private static partial Regex HasQuantification(); + + #endregion + + #region Skills Alignment + + private static readonly Dictionary> RoleSkillsMap = new(StringComparer.OrdinalIgnoreCase) + { + // Software/Tech roles + ["software engineer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer", "engineering"], + ["software developer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer"], + ["web developer"] = ["html", "css", "javascript", "web", "frontend", "backend", "react", "angular", "vue", "node"], + ["frontend developer"] = ["html", "css", "javascript", "react", "angular", "vue", "typescript", "ui", "ux"], + ["backend developer"] = ["api", "database", "sql", "server", "node", "python", "java", "c#", ".net"], + ["full stack"] = ["frontend", "backend", "javascript", "database", "api", "react", "node"], + ["devops engineer"] = ["ci/cd", "docker", "kubernetes", "aws", "azure", "jenkins", "terraform", "infrastructure"], + ["data scientist"] = ["python", "machine learning", "statistics", "data analysis", "sql", "r", "tensorflow", "pandas"], + ["data analyst"] = ["sql", "excel", "data", "analysis", "tableau", "power bi", "statistics", "reporting"], + ["data engineer"] = ["sql", "python", "etl", "data pipeline", "spark", "hadoop", "database", "aws", "azure"], + + // Project/Product roles + ["project manager"] = ["project management", "agile", "scrum", "stakeholder", "planning", "budget", "pmp", "prince2"], + ["product manager"] = ["product", "roadmap", "stakeholder", "agile", "user research", "strategy", "backlog"], + ["scrum master"] = ["scrum", "agile", "sprint", "kanban", "jira", "facilitation", "coaching"], + + // Business roles + ["business analyst"] = ["requirements", "analysis", "stakeholder", "documentation", "process", "sql", "jira"], + ["marketing manager"] = ["marketing", "campaigns", "branding", "analytics", "seo", "content", "social media", "digital"], + ["sales manager"] = ["sales", "revenue", "crm", "pipeline", "negotiation", "b2b", "b2c", "targets"], + + // Finance roles + ["accountant"] = ["accounting", "financial", "excel", "bookkeeping", "tax", "audit", "sage", "xero", "quickbooks"], + ["financial analyst"] = ["financial", "modelling", "excel", "forecasting", "budgeting", "analysis", "reporting"], + + // Design roles + ["ux designer"] = ["ux", "user experience", "wireframe", "prototype", "figma", "sketch", "user research", "usability"], + ["ui designer"] = ["ui", "visual design", "figma", "sketch", "adobe", "interface", "design systems"], + ["graphic designer"] = ["photoshop", "illustrator", "indesign", "adobe", "design", "creative", "branding"], + + // HR roles + ["hr manager"] = ["hr", "human resources", "recruitment", "employee relations", "policy", "training", "performance"], + ["recruiter"] = ["recruitment", "sourcing", "interviewing", "talent", "hiring", "ats", "linkedin"], + + // Other common roles + ["customer service"] = ["customer", "support", "service", "communication", "crm", "resolution"], + ["operations manager"] = ["operations", "logistics", "process", "efficiency", "supply chain", "management"] + }; + + private static SkillsAlignmentAnalysis AnalyseSkillsAlignment(CVData cvData, List flags) + { + var mismatches = new List(); + var rolesChecked = 0; + var rolesWithMatchingSkills = 0; + + // Normalize skills for matching + var skillsLower = cvData.Skills + .Select(s => s.ToLower().Trim()) + .ToHashSet(); + + // Also extract skills mentioned in descriptions + var allText = GetAllDescriptionText(cvData).ToLower(); + + foreach (var job in cvData.Employment) + { + var titleLower = job.JobTitle.ToLower(); + + foreach (var (rolePattern, expectedSkills) in RoleSkillsMap) + { + if (!titleLower.Contains(rolePattern)) continue; + + rolesChecked++; + + // Find matching skills (in skills list OR mentioned in descriptions) + var matchingSkills = expectedSkills + .Where(expected => + skillsLower.Any(s => s.Contains(expected)) || + allText.Contains(expected)) + .ToList(); + + if (matchingSkills.Count >= 2) + { + rolesWithMatchingSkills++; + } + else + { + mismatches.Add(new SkillMismatch + { + JobTitle = job.JobTitle, + CompanyName = job.CompanyName, + ExpectedSkills = expectedSkills.Take(5).ToList(), + MatchingSkills = matchingSkills + }); + } + + break; // Only match first role pattern + } + } + + // Generate flags for significant mismatches + if (mismatches.Count >= 2) + { + var examples = mismatches.Take(2) + .Select(m => $"'{m.JobTitle}' lacks typical skills") + .ToList(); + + flags.Add(new TextAnalysisFlag + { + Type = "SkillsJobMismatch", + Severity = "Warning", + Message = $"{mismatches.Count} roles have few matching skills listed. {string.Join("; ", examples)}. Expected skills like: {string.Join(", ", mismatches.First().ExpectedSkills.Take(3))}", + ScoreImpact = -8 + }); + } + else if (mismatches.Count == 1) + { + var m = mismatches.First(); + flags.Add(new TextAnalysisFlag + { + Type = "SkillsJobMismatch", + Severity = "Info", + Message = $"Role '{m.JobTitle}' at {m.CompanyName} has limited matching skills. Expected: {string.Join(", ", m.ExpectedSkills.Take(4))}", + ScoreImpact = -3 + }); + } + + return new SkillsAlignmentAnalysis + { + TotalRolesChecked = rolesChecked, + RolesWithMatchingSkills = rolesWithMatchingSkills, + Mismatches = mismatches + }; + } + + #endregion + + #region Unrealistic Metrics Detection + + private static MetricsAnalysis AnalyseMetrics(CVData cvData, List flags) + { + var allText = GetAllDescriptionText(cvData); + var suspiciousMetrics = new List(); + var totalMetrics = 0; + var plausibleMetrics = 0; + + // Revenue/growth increase patterns + var revenuePattern = RevenueIncreasePattern(); + foreach (Match match in revenuePattern.Matches(allText)) + { + totalMetrics++; + var value = double.Parse(match.Groups[1].Value); + + if (value > 300) + { + suspiciousMetrics.Add(new SuspiciousMetric + { + ClaimText = match.Value, + Value = value, + Reason = $"{value}% increase is exceptionally high - requires verification" + }); + } + else if (value > 200) + { + suspiciousMetrics.Add(new SuspiciousMetric + { + ClaimText = match.Value, + Value = value, + Reason = $"{value}% is unusually high for most contexts" + }); + } + else + { + plausibleMetrics++; + } + } + + // Cost reduction patterns + var costPattern = CostReductionPattern(); + foreach (Match match in costPattern.Matches(allText)) + { + totalMetrics++; + var value = double.Parse(match.Groups[1].Value); + + if (value > 70) + { + suspiciousMetrics.Add(new SuspiciousMetric + { + ClaimText = match.Value, + Value = value, + Reason = $"{value}% cost reduction is extremely rare" + }); + } + else + { + plausibleMetrics++; + } + } + + // Efficiency/productivity improvements + var efficiencyPattern = EfficiencyPattern(); + foreach (Match match in efficiencyPattern.Matches(allText)) + { + totalMetrics++; + var value = double.Parse(match.Groups[1].Value); + + if (value > 500) + { + suspiciousMetrics.Add(new SuspiciousMetric + { + ClaimText = match.Value, + Value = value, + Reason = $"{value}% efficiency gain is implausible" + }); + } + else if (value > 200) + { + suspiciousMetrics.Add(new SuspiciousMetric + { + ClaimText = match.Value, + Value = value, + Reason = $"{value}% improvement is unusually high" + }); + } + else + { + plausibleMetrics++; + } + } + + // Check for suspiciously round numbers + var (roundCount, roundRatio) = AnalyseRoundNumbers(allText); + + // Generate flags + if (suspiciousMetrics.Count >= 2) + { + flags.Add(new TextAnalysisFlag + { + Type = "UnrealisticMetrics", + Severity = "Warning", + Message = $"{suspiciousMetrics.Count} achievement metrics appear exaggerated. Example: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}", + ScoreImpact = -10 + }); + } + else if (suspiciousMetrics.Count == 1) + { + flags.Add(new TextAnalysisFlag + { + Type = "UnrealisticMetric", + Severity = "Info", + Message = $"Metric may be exaggerated: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}", + ScoreImpact = -3 + }); + } + + if (roundRatio > 0.8 && totalMetrics >= 4) + { + flags.Add(new TextAnalysisFlag + { + Type = "SuspiciouslyRoundNumbers", + Severity = "Info", + Message = $"{roundCount} of {totalMetrics} metrics are round numbers (ending in 0 or 5) - real data is rarely this clean", + ScoreImpact = -3 + }); + } + + return new MetricsAnalysis + { + TotalMetricsClaimed = totalMetrics, + PlausibleMetrics = plausibleMetrics, + SuspiciousMetrics = suspiciousMetrics.Count, + RoundNumberCount = roundCount, + RoundNumberRatio = roundRatio, + SuspiciousMetricsList = suspiciousMetrics + }; + } + + [GeneratedRegex(@"(?:increased|grew|boosted|raised|improved)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)] + private static partial Regex RevenueIncreasePattern(); + + [GeneratedRegex(@"(?:reduced|cut|decreased|saved|lowered)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)] + private static partial Regex CostReductionPattern(); + + [GeneratedRegex(@"(\d+)%\s+(?:faster|quicker|more efficient|improvement|productivity|increase)", RegexOptions.IgnoreCase)] + private static partial Regex EfficiencyPattern(); + + private static (int RoundCount, double RoundRatio) AnalyseRoundNumbers(string text) + { + var numberPattern = NumberPattern(); + var matches = numberPattern.Matches(text); + + var total = 0; + var roundCount = 0; + + foreach (Match match in matches) + { + var numStr = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value; + numStr = numStr.Replace(",", ""); + + if (int.TryParse(numStr, out var num) && num >= 10) + { + total++; + if (num % 10 == 0 || num % 5 == 0) + { + roundCount++; + } + } + } + + return (roundCount, total > 0 ? (double)roundCount / total : 0); + } + + [GeneratedRegex(@"(\d+)%|(?:\$|£)([\d,]+)")] + private static partial Regex NumberPattern(); + + #endregion + + #region Helpers + + private static string GetAllDescriptionText(CVData cvData) + { + var descriptions = cvData.Employment + .Where(e => !string.IsNullOrWhiteSpace(e.Description)) + .Select(e => e.Description!); + + return string.Join(" ", descriptions); + } + + #endregion +} diff --git a/tests/RealCV.Tests/Jobs/ProcessCVCheckJobTests.cs b/tests/RealCV.Tests/Jobs/ProcessCVCheckJobTests.cs index 2222dfd..4642b37 100644 --- a/tests/RealCV.Tests/Jobs/ProcessCVCheckJobTests.cs +++ b/tests/RealCV.Tests/Jobs/ProcessCVCheckJobTests.cs @@ -20,6 +20,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable private readonly Mock _companyVerifierServiceMock; private readonly Mock _educationVerifierServiceMock; private readonly Mock _timelineAnalyserServiceMock; + private readonly Mock _textAnalysisServiceMock; private readonly Mock _auditServiceMock; private readonly Mock> _loggerMock; private readonly ProcessCVCheckJob _sut; @@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable _companyVerifierServiceMock = new Mock(); _educationVerifierServiceMock = new Mock(); _timelineAnalyserServiceMock = new Mock(); + _textAnalysisServiceMock = new Mock(); _auditServiceMock = new Mock(); _loggerMock = new Mock>(); @@ -51,6 +53,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable _companyVerifierServiceMock.Object, _educationVerifierServiceMock.Object, _timelineAnalyserServiceMock.Object, + _textAnalysisServiceMock.Object, _auditServiceMock.Object, _loggerMock.Object); } @@ -1073,6 +1076,10 @@ public sealed class ProcessCVCheckJobTests : IDisposable _timelineAnalyserServiceMock .Setup(x => x.Analyse(It.IsAny>())) .Returns(timelineResult); + + _textAnalysisServiceMock + .Setup(x => x.Analyse(It.IsAny())) + .Returns(new TextAnalysisResult()); } private static CVData CreateTestCVData(int employmentCount = 1)