feat: Add text analysis checks for CV verification

Implement four new CV verification checks without external APIs:

1. Buzzword detection - flags excessive clichés (50+ patterns)
2. Vague achievement detection - identifies weak language vs quantified results
3. Skills/job title alignment - checks skills match claimed roles (25+ role mappings)
4. Unrealistic metrics detection - flags implausible claims (>200% growth, etc.)

New files:
- ITextAnalysisService interface
- TextAnalysisResult models
- TextAnalysisService implementation (~400 lines)

Integration:
- Added "Analysing Content" processing stage
- Flags appear under Plausibility category
- TextAnalysis section added to veracity report

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-25 04:30:11 +00:00
parent a132efd907
commit 2575e2be95
7 changed files with 721 additions and 1 deletions

View File

@@ -114,6 +114,7 @@ public static class DependencyInjection
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
services.AddScoped<ITimelineAnalyserService, TimelineAnalyserService>();
services.AddScoped<ITextAnalysisService, TextAnalysisService>();
services.AddScoped<ICVCheckService, CVCheckService>();
services.AddScoped<IUserContextService, UserContextService>();
services.AddScoped<IAuditService, AuditService>();

View File

@@ -18,6 +18,7 @@ public sealed class ProcessCVCheckJob
private readonly ICompanyVerifierService _companyVerifierService;
private readonly IEducationVerifierService _educationVerifierService;
private readonly ITimelineAnalyserService _timelineAnalyserService;
private readonly ITextAnalysisService _textAnalysisService;
private readonly IAuditService _auditService;
private readonly ILogger<ProcessCVCheckJob> _logger;
@@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJob
ICompanyVerifierService companyVerifierService,
IEducationVerifierService educationVerifierService,
ITimelineAnalyserService timelineAnalyserService,
ITextAnalysisService textAnalysisService,
IAuditService auditService,
ILogger<ProcessCVCheckJob> logger)
{
@@ -50,6 +52,7 @@ public sealed class ProcessCVCheckJob
_companyVerifierService = companyVerifierService;
_educationVerifierService = educationVerifierService;
_timelineAnalyserService = timelineAnalyserService;
_textAnalysisService = textAnalysisService;
_auditService = auditService;
_logger = logger;
}
@@ -198,10 +201,23 @@ public sealed class ProcessCVCheckJob
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
// Step 7b: Analyse text for buzzwords, vague achievements, skills alignment, and metrics
cvCheck.ProcessingStage = "Analysing Content";
await _dbContext.SaveChangesAsync(cancellationToken);
var textAnalysis = _textAnalysisService.Analyse(cvData);
_logger.LogDebug(
"Text analysis for check {CheckId}: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches",
cvCheckId,
textAnalysis.BuzzwordAnalysis.TotalBuzzwords,
textAnalysis.AchievementAnalysis.VagueStatements,
textAnalysis.SkillsAlignment.Mismatches.Count);
// Step 8: Calculate veracity score
cvCheck.ProcessingStage = "Calculating Score";
await _dbContext.SaveChangesAsync(cancellationToken);
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData);
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, textAnalysis, cvData);
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
@@ -246,6 +262,7 @@ public sealed class ProcessCVCheckJob
EmploymentVerifications = verificationResults,
EducationVerifications = educationResults,
TimelineAnalysis = timelineAnalysis,
TextAnalysis = textAnalysis,
Flags = flags,
GeneratedAt = DateTime.UtcNow
};
@@ -290,6 +307,7 @@ public sealed class ProcessCVCheckJob
List<CompanyVerificationResult> verifications,
List<EducationVerificationResult> educationResults,
TimelineAnalysisResult timeline,
TextAnalysisResult textAnalysis,
CVData cvData)
{
var score = BaseScore;
@@ -484,6 +502,32 @@ public sealed class ProcessCVCheckJob
});
}
// Process text analysis flags (buzzwords, vague achievements, skills alignment, metrics)
foreach (var textFlag in textAnalysis.Flags)
{
score += textFlag.ScoreImpact; // ScoreImpact is already negative
flags.Add(new FlagResult
{
Category = FlagCategory.Plausibility.ToString(),
Severity = textFlag.Severity,
Title = textFlag.Type switch
{
"ExcessiveBuzzwords" => "Excessive Buzzwords",
"HighBuzzwordCount" => "High Buzzword Count",
"VagueAchievements" => "Vague Achievements",
"LackOfQuantification" => "Lack of Quantification",
"SkillsJobMismatch" => "Skills/Job Mismatch",
"UnrealisticMetrics" => "Unrealistic Metrics",
"UnrealisticMetric" => "Unrealistic Metric",
"SuspiciouslyRoundNumbers" => "Suspiciously Round Numbers",
_ => textFlag.Type
},
Description = textFlag.Message,
ScoreImpact = textFlag.ScoreImpact
});
}
// Deduplicate flags based on Title + Description
var uniqueFlags = flags
.GroupBy(f => (f.Title, f.Description))

View File

@@ -0,0 +1,593 @@
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using RealCV.Application.Interfaces;
using RealCV.Application.Models;
namespace RealCV.Infrastructure.Services;
public sealed partial class TextAnalysisService : ITextAnalysisService
{
private readonly ILogger<TextAnalysisService> _logger;
public TextAnalysisService(ILogger<TextAnalysisService> logger)
{
_logger = logger;
}
public TextAnalysisResult Analyse(CVData cvData)
{
_logger.LogDebug("Starting text analysis for CV: {Name}", cvData.FullName);
var flags = new List<TextAnalysisFlag>();
// Run all analyses
var buzzwordAnalysis = AnalyseBuzzwords(cvData, flags);
var achievementAnalysis = AnalyseAchievements(cvData, flags);
var skillsAlignment = AnalyseSkillsAlignment(cvData, flags);
var metricsAnalysis = AnalyseMetrics(cvData, flags);
_logger.LogDebug(
"Text analysis complete: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches, {SuspiciousCount} suspicious metrics",
buzzwordAnalysis.TotalBuzzwords,
achievementAnalysis.VagueStatements,
skillsAlignment.Mismatches.Count,
metricsAnalysis.SuspiciousMetrics);
return new TextAnalysisResult
{
BuzzwordAnalysis = buzzwordAnalysis,
AchievementAnalysis = achievementAnalysis,
SkillsAlignment = skillsAlignment,
MetricsAnalysis = metricsAnalysis,
Flags = flags
};
}
#region Buzzword Detection
private static readonly HashSet<string> Buzzwords = new(StringComparer.OrdinalIgnoreCase)
{
// Overused personality descriptors
"results-driven", "detail-oriented", "team player", "self-starter",
"go-getter", "proactive", "dynamic", "passionate", "motivated",
"hardworking", "dedicated", "enthusiastic", "driven",
// Corporate jargon
"synergy", "leverage", "paradigm", "holistic", "innovative",
"disruptive", "scalable", "agile", "optimization", "strategic",
"streamline", "spearhead", "champion", "facilitate",
// Vague superlatives
"best-in-class", "world-class", "cutting-edge", "state-of-the-art",
"next-generation", "game-changer", "thought leader",
// Empty phrases
"think outside the box", "hit the ground running", "move the needle",
"low-hanging fruit", "value-add", "bandwidth", "circle back",
"deep dive", "pivot", "ecosystem"
};
private static readonly HashSet<string> BuzzwordPhrases = new(StringComparer.OrdinalIgnoreCase)
{
"results-driven professional",
"highly motivated individual",
"proven track record",
"strong work ethic",
"excellent interpersonal skills",
"ability to work independently",
"thrive under pressure",
"fast-paced environment",
"excellent communication skills",
"strategic thinker",
"problem solver",
"out of the box",
"above and beyond",
"value proposition"
};
private static BuzzwordAnalysis AnalyseBuzzwords(CVData cvData, List<TextAnalysisFlag> flags)
{
var allText = GetAllDescriptionText(cvData);
var textLower = allText.ToLower();
var wordCount = allText.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
var found = new List<string>();
// Check for phrases first
foreach (var phrase in BuzzwordPhrases)
{
if (textLower.Contains(phrase.ToLower()))
{
found.Add(phrase);
}
}
// Check individual buzzwords (avoiding duplicates from phrases)
foreach (var buzzword in Buzzwords)
{
if (textLower.Contains(buzzword.ToLower()) &&
!found.Any(f => f.Contains(buzzword, StringComparison.OrdinalIgnoreCase)))
{
found.Add(buzzword);
}
}
var density = wordCount > 0 ? found.Count / (wordCount / 100.0) : 0;
// Generate flags based on severity
if (found.Count >= 10)
{
flags.Add(new TextAnalysisFlag
{
Type = "ExcessiveBuzzwords",
Severity = "Warning",
Message = $"CV contains {found.Count} buzzwords/clichés - may indicate template or AI-generated content. Examples: {string.Join(", ", found.Take(5))}",
ScoreImpact = -10
});
}
else if (found.Count >= 6)
{
flags.Add(new TextAnalysisFlag
{
Type = "HighBuzzwordCount",
Severity = "Info",
Message = $"CV contains {found.Count} common buzzwords: {string.Join(", ", found.Take(4))}",
ScoreImpact = -5
});
}
return new BuzzwordAnalysis
{
TotalBuzzwords = found.Count,
BuzzwordsFound = found,
BuzzwordDensity = density
};
}
#endregion
#region Vague Achievement Detection
private static readonly string[] VaguePatterns =
[
"responsible for",
"worked on",
"helped with",
"assisted in",
"involved in",
"participated in",
"contributed to",
"various tasks",
"many projects",
"multiple initiatives",
"day-to-day",
"duties included",
"tasked with"
];
private static readonly string[] StrongActionVerbs =
[
"achieved", "increased", "reduced", "decreased", "improved",
"generated", "saved", "developed", "created", "launched",
"implemented", "negotiated", "secured", "designed", "built",
"led", "managed", "delivered", "transformed", "accelerated",
"streamlined", "consolidated", "eliminated", "maximized", "minimized"
];
private static AchievementAnalysis AnalyseAchievements(CVData cvData, List<TextAnalysisFlag> flags)
{
var totalStatements = 0;
var vagueStatements = 0;
var quantifiedStatements = 0;
var strongVerbStatements = 0;
var vagueExamples = new List<string>();
foreach (var job in cvData.Employment)
{
if (string.IsNullOrWhiteSpace(job.Description)) continue;
// Split into bullet points or sentences
var statements = job.Description
.Split(['\n', '•', '●', '■', '▪', '*', '-'], StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Trim())
.Where(s => s.Length > 10)
.ToList();
foreach (var statement in statements)
{
totalStatements++;
var statementLower = statement.ToLower();
// Check for quantification (numbers, percentages, currency)
if (HasQuantification().IsMatch(statement))
{
quantifiedStatements++;
}
// Check for strong action verbs at the start
if (StrongActionVerbs.Any(v => statementLower.StartsWith(v)))
{
strongVerbStatements++;
}
// Check for vague patterns
if (VaguePatterns.Any(p => statementLower.Contains(p)))
{
vagueStatements++;
if (vagueExamples.Count < 3)
{
var truncated = statement.Length > 60 ? statement[..57] + "..." : statement;
vagueExamples.Add(truncated);
}
}
}
}
// Generate flags
if (totalStatements > 0)
{
var vagueRatio = (double)vagueStatements / totalStatements;
var quantifiedRatio = (double)quantifiedStatements / totalStatements;
if (vagueRatio > 0.5 && totalStatements >= 5)
{
flags.Add(new TextAnalysisFlag
{
Type = "VagueAchievements",
Severity = "Warning",
Message = $"{vagueStatements} of {totalStatements} statements use vague language (e.g., 'responsible for', 'helped with'). Consider: \"{vagueExamples.FirstOrDefault()}\"",
ScoreImpact = -8
});
}
if (quantifiedRatio < 0.2 && totalStatements >= 5)
{
flags.Add(new TextAnalysisFlag
{
Type = "LackOfQuantification",
Severity = "Info",
Message = $"Only {quantifiedStatements} of {totalStatements} achievement statements include measurable results",
ScoreImpact = 0
});
}
}
return new AchievementAnalysis
{
TotalStatements = totalStatements,
VagueStatements = vagueStatements,
QuantifiedStatements = quantifiedStatements,
StrongActionVerbStatements = strongVerbStatements,
VagueExamples = vagueExamples
};
}
[GeneratedRegex(@"\d+%|\$[\d,]+|£[\d,]+|\d+\s*(million|thousand|k\b|m\b)|[0-9]+x\b", RegexOptions.IgnoreCase)]
private static partial Regex HasQuantification();
#endregion
#region Skills Alignment
private static readonly Dictionary<string, HashSet<string>> RoleSkillsMap = new(StringComparer.OrdinalIgnoreCase)
{
// Software/Tech roles
["software engineer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer", "engineering"],
["software developer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer"],
["web developer"] = ["html", "css", "javascript", "web", "frontend", "backend", "react", "angular", "vue", "node"],
["frontend developer"] = ["html", "css", "javascript", "react", "angular", "vue", "typescript", "ui", "ux"],
["backend developer"] = ["api", "database", "sql", "server", "node", "python", "java", "c#", ".net"],
["full stack"] = ["frontend", "backend", "javascript", "database", "api", "react", "node"],
["devops engineer"] = ["ci/cd", "docker", "kubernetes", "aws", "azure", "jenkins", "terraform", "infrastructure"],
["data scientist"] = ["python", "machine learning", "statistics", "data analysis", "sql", "r", "tensorflow", "pandas"],
["data analyst"] = ["sql", "excel", "data", "analysis", "tableau", "power bi", "statistics", "reporting"],
["data engineer"] = ["sql", "python", "etl", "data pipeline", "spark", "hadoop", "database", "aws", "azure"],
// Project/Product roles
["project manager"] = ["project management", "agile", "scrum", "stakeholder", "planning", "budget", "pmp", "prince2"],
["product manager"] = ["product", "roadmap", "stakeholder", "agile", "user research", "strategy", "backlog"],
["scrum master"] = ["scrum", "agile", "sprint", "kanban", "jira", "facilitation", "coaching"],
// Business roles
["business analyst"] = ["requirements", "analysis", "stakeholder", "documentation", "process", "sql", "jira"],
["marketing manager"] = ["marketing", "campaigns", "branding", "analytics", "seo", "content", "social media", "digital"],
["sales manager"] = ["sales", "revenue", "crm", "pipeline", "negotiation", "b2b", "b2c", "targets"],
// Finance roles
["accountant"] = ["accounting", "financial", "excel", "bookkeeping", "tax", "audit", "sage", "xero", "quickbooks"],
["financial analyst"] = ["financial", "modelling", "excel", "forecasting", "budgeting", "analysis", "reporting"],
// Design roles
["ux designer"] = ["ux", "user experience", "wireframe", "prototype", "figma", "sketch", "user research", "usability"],
["ui designer"] = ["ui", "visual design", "figma", "sketch", "adobe", "interface", "design systems"],
["graphic designer"] = ["photoshop", "illustrator", "indesign", "adobe", "design", "creative", "branding"],
// HR roles
["hr manager"] = ["hr", "human resources", "recruitment", "employee relations", "policy", "training", "performance"],
["recruiter"] = ["recruitment", "sourcing", "interviewing", "talent", "hiring", "ats", "linkedin"],
// Other common roles
["customer service"] = ["customer", "support", "service", "communication", "crm", "resolution"],
["operations manager"] = ["operations", "logistics", "process", "efficiency", "supply chain", "management"]
};
private static SkillsAlignmentAnalysis AnalyseSkillsAlignment(CVData cvData, List<TextAnalysisFlag> flags)
{
var mismatches = new List<SkillMismatch>();
var rolesChecked = 0;
var rolesWithMatchingSkills = 0;
// Normalize skills for matching
var skillsLower = cvData.Skills
.Select(s => s.ToLower().Trim())
.ToHashSet();
// Also extract skills mentioned in descriptions
var allText = GetAllDescriptionText(cvData).ToLower();
foreach (var job in cvData.Employment)
{
var titleLower = job.JobTitle.ToLower();
foreach (var (rolePattern, expectedSkills) in RoleSkillsMap)
{
if (!titleLower.Contains(rolePattern)) continue;
rolesChecked++;
// Find matching skills (in skills list OR mentioned in descriptions)
var matchingSkills = expectedSkills
.Where(expected =>
skillsLower.Any(s => s.Contains(expected)) ||
allText.Contains(expected))
.ToList();
if (matchingSkills.Count >= 2)
{
rolesWithMatchingSkills++;
}
else
{
mismatches.Add(new SkillMismatch
{
JobTitle = job.JobTitle,
CompanyName = job.CompanyName,
ExpectedSkills = expectedSkills.Take(5).ToList(),
MatchingSkills = matchingSkills
});
}
break; // Only match first role pattern
}
}
// Generate flags for significant mismatches
if (mismatches.Count >= 2)
{
var examples = mismatches.Take(2)
.Select(m => $"'{m.JobTitle}' lacks typical skills")
.ToList();
flags.Add(new TextAnalysisFlag
{
Type = "SkillsJobMismatch",
Severity = "Warning",
Message = $"{mismatches.Count} roles have few matching skills listed. {string.Join("; ", examples)}. Expected skills like: {string.Join(", ", mismatches.First().ExpectedSkills.Take(3))}",
ScoreImpact = -8
});
}
else if (mismatches.Count == 1)
{
var m = mismatches.First();
flags.Add(new TextAnalysisFlag
{
Type = "SkillsJobMismatch",
Severity = "Info",
Message = $"Role '{m.JobTitle}' at {m.CompanyName} has limited matching skills. Expected: {string.Join(", ", m.ExpectedSkills.Take(4))}",
ScoreImpact = -3
});
}
return new SkillsAlignmentAnalysis
{
TotalRolesChecked = rolesChecked,
RolesWithMatchingSkills = rolesWithMatchingSkills,
Mismatches = mismatches
};
}
#endregion
#region Unrealistic Metrics Detection
private static MetricsAnalysis AnalyseMetrics(CVData cvData, List<TextAnalysisFlag> flags)
{
var allText = GetAllDescriptionText(cvData);
var suspiciousMetrics = new List<SuspiciousMetric>();
var totalMetrics = 0;
var plausibleMetrics = 0;
// Revenue/growth increase patterns
var revenuePattern = RevenueIncreasePattern();
foreach (Match match in revenuePattern.Matches(allText))
{
totalMetrics++;
var value = double.Parse(match.Groups[1].Value);
if (value > 300)
{
suspiciousMetrics.Add(new SuspiciousMetric
{
ClaimText = match.Value,
Value = value,
Reason = $"{value}% increase is exceptionally high - requires verification"
});
}
else if (value > 200)
{
suspiciousMetrics.Add(new SuspiciousMetric
{
ClaimText = match.Value,
Value = value,
Reason = $"{value}% is unusually high for most contexts"
});
}
else
{
plausibleMetrics++;
}
}
// Cost reduction patterns
var costPattern = CostReductionPattern();
foreach (Match match in costPattern.Matches(allText))
{
totalMetrics++;
var value = double.Parse(match.Groups[1].Value);
if (value > 70)
{
suspiciousMetrics.Add(new SuspiciousMetric
{
ClaimText = match.Value,
Value = value,
Reason = $"{value}% cost reduction is extremely rare"
});
}
else
{
plausibleMetrics++;
}
}
// Efficiency/productivity improvements
var efficiencyPattern = EfficiencyPattern();
foreach (Match match in efficiencyPattern.Matches(allText))
{
totalMetrics++;
var value = double.Parse(match.Groups[1].Value);
if (value > 500)
{
suspiciousMetrics.Add(new SuspiciousMetric
{
ClaimText = match.Value,
Value = value,
Reason = $"{value}% efficiency gain is implausible"
});
}
else if (value > 200)
{
suspiciousMetrics.Add(new SuspiciousMetric
{
ClaimText = match.Value,
Value = value,
Reason = $"{value}% improvement is unusually high"
});
}
else
{
plausibleMetrics++;
}
}
// Check for suspiciously round numbers
var (roundCount, roundRatio) = AnalyseRoundNumbers(allText);
// Generate flags
if (suspiciousMetrics.Count >= 2)
{
flags.Add(new TextAnalysisFlag
{
Type = "UnrealisticMetrics",
Severity = "Warning",
Message = $"{suspiciousMetrics.Count} achievement metrics appear exaggerated. Example: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
ScoreImpact = -10
});
}
else if (suspiciousMetrics.Count == 1)
{
flags.Add(new TextAnalysisFlag
{
Type = "UnrealisticMetric",
Severity = "Info",
Message = $"Metric may be exaggerated: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
ScoreImpact = -3
});
}
if (roundRatio > 0.8 && totalMetrics >= 4)
{
flags.Add(new TextAnalysisFlag
{
Type = "SuspiciouslyRoundNumbers",
Severity = "Info",
Message = $"{roundCount} of {totalMetrics} metrics are round numbers (ending in 0 or 5) - real data is rarely this clean",
ScoreImpact = -3
});
}
return new MetricsAnalysis
{
TotalMetricsClaimed = totalMetrics,
PlausibleMetrics = plausibleMetrics,
SuspiciousMetrics = suspiciousMetrics.Count,
RoundNumberCount = roundCount,
RoundNumberRatio = roundRatio,
SuspiciousMetricsList = suspiciousMetrics
};
}
[GeneratedRegex(@"(?:increased|grew|boosted|raised|improved)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
private static partial Regex RevenueIncreasePattern();
[GeneratedRegex(@"(?:reduced|cut|decreased|saved|lowered)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
private static partial Regex CostReductionPattern();
[GeneratedRegex(@"(\d+)%\s+(?:faster|quicker|more efficient|improvement|productivity|increase)", RegexOptions.IgnoreCase)]
private static partial Regex EfficiencyPattern();
private static (int RoundCount, double RoundRatio) AnalyseRoundNumbers(string text)
{
var numberPattern = NumberPattern();
var matches = numberPattern.Matches(text);
var total = 0;
var roundCount = 0;
foreach (Match match in matches)
{
var numStr = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
numStr = numStr.Replace(",", "");
if (int.TryParse(numStr, out var num) && num >= 10)
{
total++;
if (num % 10 == 0 || num % 5 == 0)
{
roundCount++;
}
}
}
return (roundCount, total > 0 ? (double)roundCount / total : 0);
}
[GeneratedRegex(@"(\d+)%|(?:\$|£)([\d,]+)")]
private static partial Regex NumberPattern();
#endregion
#region Helpers
private static string GetAllDescriptionText(CVData cvData)
{
var descriptions = cvData.Employment
.Where(e => !string.IsNullOrWhiteSpace(e.Description))
.Select(e => e.Description!);
return string.Join(" ", descriptions);
}
#endregion
}