feat: Add text analysis checks for CV verification
Implement four new CV verification checks without external APIs: 1. Buzzword detection - flags excessive clichés (50+ patterns) 2. Vague achievement detection - identifies weak language vs quantified results 3. Skills/job title alignment - checks skills match claimed roles (25+ role mappings) 4. Unrealistic metrics detection - flags implausible claims (>200% growth, etc.) New files: - ITextAnalysisService interface - TextAnalysisResult models - TextAnalysisService implementation (~400 lines) Integration: - Added "Analysing Content" processing stage - Flags appear under Plausibility category - TextAnalysis section added to veracity report 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -114,6 +114,7 @@ public static class DependencyInjection
|
||||
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||
services.AddScoped<ITimelineAnalyserService, TimelineAnalyserService>();
|
||||
services.AddScoped<ITextAnalysisService, TextAnalysisService>();
|
||||
services.AddScoped<ICVCheckService, CVCheckService>();
|
||||
services.AddScoped<IUserContextService, UserContextService>();
|
||||
services.AddScoped<IAuditService, AuditService>();
|
||||
|
||||
@@ -18,6 +18,7 @@ public sealed class ProcessCVCheckJob
|
||||
private readonly ICompanyVerifierService _companyVerifierService;
|
||||
private readonly IEducationVerifierService _educationVerifierService;
|
||||
private readonly ITimelineAnalyserService _timelineAnalyserService;
|
||||
private readonly ITextAnalysisService _textAnalysisService;
|
||||
private readonly IAuditService _auditService;
|
||||
private readonly ILogger<ProcessCVCheckJob> _logger;
|
||||
|
||||
@@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJob
|
||||
ICompanyVerifierService companyVerifierService,
|
||||
IEducationVerifierService educationVerifierService,
|
||||
ITimelineAnalyserService timelineAnalyserService,
|
||||
ITextAnalysisService textAnalysisService,
|
||||
IAuditService auditService,
|
||||
ILogger<ProcessCVCheckJob> logger)
|
||||
{
|
||||
@@ -50,6 +52,7 @@ public sealed class ProcessCVCheckJob
|
||||
_companyVerifierService = companyVerifierService;
|
||||
_educationVerifierService = educationVerifierService;
|
||||
_timelineAnalyserService = timelineAnalyserService;
|
||||
_textAnalysisService = textAnalysisService;
|
||||
_auditService = auditService;
|
||||
_logger = logger;
|
||||
}
|
||||
@@ -198,10 +201,23 @@ public sealed class ProcessCVCheckJob
|
||||
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
|
||||
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
|
||||
|
||||
// Step 7b: Analyse text for buzzwords, vague achievements, skills alignment, and metrics
|
||||
cvCheck.ProcessingStage = "Analysing Content";
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
|
||||
var textAnalysis = _textAnalysisService.Analyse(cvData);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Text analysis for check {CheckId}: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches",
|
||||
cvCheckId,
|
||||
textAnalysis.BuzzwordAnalysis.TotalBuzzwords,
|
||||
textAnalysis.AchievementAnalysis.VagueStatements,
|
||||
textAnalysis.SkillsAlignment.Mismatches.Count);
|
||||
|
||||
// Step 8: Calculate veracity score
|
||||
cvCheck.ProcessingStage = "Calculating Score";
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData);
|
||||
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, textAnalysis, cvData);
|
||||
|
||||
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
|
||||
|
||||
@@ -246,6 +262,7 @@ public sealed class ProcessCVCheckJob
|
||||
EmploymentVerifications = verificationResults,
|
||||
EducationVerifications = educationResults,
|
||||
TimelineAnalysis = timelineAnalysis,
|
||||
TextAnalysis = textAnalysis,
|
||||
Flags = flags,
|
||||
GeneratedAt = DateTime.UtcNow
|
||||
};
|
||||
@@ -290,6 +307,7 @@ public sealed class ProcessCVCheckJob
|
||||
List<CompanyVerificationResult> verifications,
|
||||
List<EducationVerificationResult> educationResults,
|
||||
TimelineAnalysisResult timeline,
|
||||
TextAnalysisResult textAnalysis,
|
||||
CVData cvData)
|
||||
{
|
||||
var score = BaseScore;
|
||||
@@ -484,6 +502,32 @@ public sealed class ProcessCVCheckJob
|
||||
});
|
||||
}
|
||||
|
||||
// Process text analysis flags (buzzwords, vague achievements, skills alignment, metrics)
|
||||
foreach (var textFlag in textAnalysis.Flags)
|
||||
{
|
||||
score += textFlag.ScoreImpact; // ScoreImpact is already negative
|
||||
|
||||
flags.Add(new FlagResult
|
||||
{
|
||||
Category = FlagCategory.Plausibility.ToString(),
|
||||
Severity = textFlag.Severity,
|
||||
Title = textFlag.Type switch
|
||||
{
|
||||
"ExcessiveBuzzwords" => "Excessive Buzzwords",
|
||||
"HighBuzzwordCount" => "High Buzzword Count",
|
||||
"VagueAchievements" => "Vague Achievements",
|
||||
"LackOfQuantification" => "Lack of Quantification",
|
||||
"SkillsJobMismatch" => "Skills/Job Mismatch",
|
||||
"UnrealisticMetrics" => "Unrealistic Metrics",
|
||||
"UnrealisticMetric" => "Unrealistic Metric",
|
||||
"SuspiciouslyRoundNumbers" => "Suspiciously Round Numbers",
|
||||
_ => textFlag.Type
|
||||
},
|
||||
Description = textFlag.Message,
|
||||
ScoreImpact = textFlag.ScoreImpact
|
||||
});
|
||||
}
|
||||
|
||||
// Deduplicate flags based on Title + Description
|
||||
var uniqueFlags = flags
|
||||
.GroupBy(f => (f.Title, f.Description))
|
||||
|
||||
593
src/RealCV.Infrastructure/Services/TextAnalysisService.cs
Normal file
593
src/RealCV.Infrastructure/Services/TextAnalysisService.cs
Normal file
@@ -0,0 +1,593 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using RealCV.Application.Interfaces;
|
||||
using RealCV.Application.Models;
|
||||
|
||||
namespace RealCV.Infrastructure.Services;
|
||||
|
||||
public sealed partial class TextAnalysisService : ITextAnalysisService
|
||||
{
|
||||
private readonly ILogger<TextAnalysisService> _logger;
|
||||
|
||||
public TextAnalysisService(ILogger<TextAnalysisService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public TextAnalysisResult Analyse(CVData cvData)
|
||||
{
|
||||
_logger.LogDebug("Starting text analysis for CV: {Name}", cvData.FullName);
|
||||
|
||||
var flags = new List<TextAnalysisFlag>();
|
||||
|
||||
// Run all analyses
|
||||
var buzzwordAnalysis = AnalyseBuzzwords(cvData, flags);
|
||||
var achievementAnalysis = AnalyseAchievements(cvData, flags);
|
||||
var skillsAlignment = AnalyseSkillsAlignment(cvData, flags);
|
||||
var metricsAnalysis = AnalyseMetrics(cvData, flags);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Text analysis complete: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches, {SuspiciousCount} suspicious metrics",
|
||||
buzzwordAnalysis.TotalBuzzwords,
|
||||
achievementAnalysis.VagueStatements,
|
||||
skillsAlignment.Mismatches.Count,
|
||||
metricsAnalysis.SuspiciousMetrics);
|
||||
|
||||
return new TextAnalysisResult
|
||||
{
|
||||
BuzzwordAnalysis = buzzwordAnalysis,
|
||||
AchievementAnalysis = achievementAnalysis,
|
||||
SkillsAlignment = skillsAlignment,
|
||||
MetricsAnalysis = metricsAnalysis,
|
||||
Flags = flags
|
||||
};
|
||||
}
|
||||
|
||||
#region Buzzword Detection
|
||||
|
||||
private static readonly HashSet<string> Buzzwords = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Overused personality descriptors
|
||||
"results-driven", "detail-oriented", "team player", "self-starter",
|
||||
"go-getter", "proactive", "dynamic", "passionate", "motivated",
|
||||
"hardworking", "dedicated", "enthusiastic", "driven",
|
||||
|
||||
// Corporate jargon
|
||||
"synergy", "leverage", "paradigm", "holistic", "innovative",
|
||||
"disruptive", "scalable", "agile", "optimization", "strategic",
|
||||
"streamline", "spearhead", "champion", "facilitate",
|
||||
|
||||
// Vague superlatives
|
||||
"best-in-class", "world-class", "cutting-edge", "state-of-the-art",
|
||||
"next-generation", "game-changer", "thought leader",
|
||||
|
||||
// Empty phrases
|
||||
"think outside the box", "hit the ground running", "move the needle",
|
||||
"low-hanging fruit", "value-add", "bandwidth", "circle back",
|
||||
"deep dive", "pivot", "ecosystem"
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> BuzzwordPhrases = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"results-driven professional",
|
||||
"highly motivated individual",
|
||||
"proven track record",
|
||||
"strong work ethic",
|
||||
"excellent interpersonal skills",
|
||||
"ability to work independently",
|
||||
"thrive under pressure",
|
||||
"fast-paced environment",
|
||||
"excellent communication skills",
|
||||
"strategic thinker",
|
||||
"problem solver",
|
||||
"out of the box",
|
||||
"above and beyond",
|
||||
"value proposition"
|
||||
};
|
||||
|
||||
private static BuzzwordAnalysis AnalyseBuzzwords(CVData cvData, List<TextAnalysisFlag> flags)
|
||||
{
|
||||
var allText = GetAllDescriptionText(cvData);
|
||||
var textLower = allText.ToLower();
|
||||
var wordCount = allText.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
|
||||
|
||||
var found = new List<string>();
|
||||
|
||||
// Check for phrases first
|
||||
foreach (var phrase in BuzzwordPhrases)
|
||||
{
|
||||
if (textLower.Contains(phrase.ToLower()))
|
||||
{
|
||||
found.Add(phrase);
|
||||
}
|
||||
}
|
||||
|
||||
// Check individual buzzwords (avoiding duplicates from phrases)
|
||||
foreach (var buzzword in Buzzwords)
|
||||
{
|
||||
if (textLower.Contains(buzzword.ToLower()) &&
|
||||
!found.Any(f => f.Contains(buzzword, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
found.Add(buzzword);
|
||||
}
|
||||
}
|
||||
|
||||
var density = wordCount > 0 ? found.Count / (wordCount / 100.0) : 0;
|
||||
|
||||
// Generate flags based on severity
|
||||
if (found.Count >= 10)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "ExcessiveBuzzwords",
|
||||
Severity = "Warning",
|
||||
Message = $"CV contains {found.Count} buzzwords/clichés - may indicate template or AI-generated content. Examples: {string.Join(", ", found.Take(5))}",
|
||||
ScoreImpact = -10
|
||||
});
|
||||
}
|
||||
else if (found.Count >= 6)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "HighBuzzwordCount",
|
||||
Severity = "Info",
|
||||
Message = $"CV contains {found.Count} common buzzwords: {string.Join(", ", found.Take(4))}",
|
||||
ScoreImpact = -5
|
||||
});
|
||||
}
|
||||
|
||||
return new BuzzwordAnalysis
|
||||
{
|
||||
TotalBuzzwords = found.Count,
|
||||
BuzzwordsFound = found,
|
||||
BuzzwordDensity = density
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Vague Achievement Detection
|
||||
|
||||
private static readonly string[] VaguePatterns =
|
||||
[
|
||||
"responsible for",
|
||||
"worked on",
|
||||
"helped with",
|
||||
"assisted in",
|
||||
"involved in",
|
||||
"participated in",
|
||||
"contributed to",
|
||||
"various tasks",
|
||||
"many projects",
|
||||
"multiple initiatives",
|
||||
"day-to-day",
|
||||
"duties included",
|
||||
"tasked with"
|
||||
];
|
||||
|
||||
private static readonly string[] StrongActionVerbs =
|
||||
[
|
||||
"achieved", "increased", "reduced", "decreased", "improved",
|
||||
"generated", "saved", "developed", "created", "launched",
|
||||
"implemented", "negotiated", "secured", "designed", "built",
|
||||
"led", "managed", "delivered", "transformed", "accelerated",
|
||||
"streamlined", "consolidated", "eliminated", "maximized", "minimized"
|
||||
];
|
||||
|
||||
private static AchievementAnalysis AnalyseAchievements(CVData cvData, List<TextAnalysisFlag> flags)
|
||||
{
|
||||
var totalStatements = 0;
|
||||
var vagueStatements = 0;
|
||||
var quantifiedStatements = 0;
|
||||
var strongVerbStatements = 0;
|
||||
var vagueExamples = new List<string>();
|
||||
|
||||
foreach (var job in cvData.Employment)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(job.Description)) continue;
|
||||
|
||||
// Split into bullet points or sentences
|
||||
var statements = job.Description
|
||||
.Split(['\n', '•', '●', '■', '▪', '*', '-'], StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(s => s.Trim())
|
||||
.Where(s => s.Length > 10)
|
||||
.ToList();
|
||||
|
||||
foreach (var statement in statements)
|
||||
{
|
||||
totalStatements++;
|
||||
var statementLower = statement.ToLower();
|
||||
|
||||
// Check for quantification (numbers, percentages, currency)
|
||||
if (HasQuantification().IsMatch(statement))
|
||||
{
|
||||
quantifiedStatements++;
|
||||
}
|
||||
|
||||
// Check for strong action verbs at the start
|
||||
if (StrongActionVerbs.Any(v => statementLower.StartsWith(v)))
|
||||
{
|
||||
strongVerbStatements++;
|
||||
}
|
||||
|
||||
// Check for vague patterns
|
||||
if (VaguePatterns.Any(p => statementLower.Contains(p)))
|
||||
{
|
||||
vagueStatements++;
|
||||
if (vagueExamples.Count < 3)
|
||||
{
|
||||
var truncated = statement.Length > 60 ? statement[..57] + "..." : statement;
|
||||
vagueExamples.Add(truncated);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate flags
|
||||
if (totalStatements > 0)
|
||||
{
|
||||
var vagueRatio = (double)vagueStatements / totalStatements;
|
||||
var quantifiedRatio = (double)quantifiedStatements / totalStatements;
|
||||
|
||||
if (vagueRatio > 0.5 && totalStatements >= 5)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "VagueAchievements",
|
||||
Severity = "Warning",
|
||||
Message = $"{vagueStatements} of {totalStatements} statements use vague language (e.g., 'responsible for', 'helped with'). Consider: \"{vagueExamples.FirstOrDefault()}\"",
|
||||
ScoreImpact = -8
|
||||
});
|
||||
}
|
||||
|
||||
if (quantifiedRatio < 0.2 && totalStatements >= 5)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "LackOfQuantification",
|
||||
Severity = "Info",
|
||||
Message = $"Only {quantifiedStatements} of {totalStatements} achievement statements include measurable results",
|
||||
ScoreImpact = 0
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return new AchievementAnalysis
|
||||
{
|
||||
TotalStatements = totalStatements,
|
||||
VagueStatements = vagueStatements,
|
||||
QuantifiedStatements = quantifiedStatements,
|
||||
StrongActionVerbStatements = strongVerbStatements,
|
||||
VagueExamples = vagueExamples
|
||||
};
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"\d+%|\$[\d,]+|£[\d,]+|\d+\s*(million|thousand|k\b|m\b)|[0-9]+x\b", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex HasQuantification();
|
||||
|
||||
#endregion
|
||||
|
||||
#region Skills Alignment
|
||||
|
||||
private static readonly Dictionary<string, HashSet<string>> RoleSkillsMap = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Software/Tech roles
|
||||
["software engineer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer", "engineering"],
|
||||
["software developer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer"],
|
||||
["web developer"] = ["html", "css", "javascript", "web", "frontend", "backend", "react", "angular", "vue", "node"],
|
||||
["frontend developer"] = ["html", "css", "javascript", "react", "angular", "vue", "typescript", "ui", "ux"],
|
||||
["backend developer"] = ["api", "database", "sql", "server", "node", "python", "java", "c#", ".net"],
|
||||
["full stack"] = ["frontend", "backend", "javascript", "database", "api", "react", "node"],
|
||||
["devops engineer"] = ["ci/cd", "docker", "kubernetes", "aws", "azure", "jenkins", "terraform", "infrastructure"],
|
||||
["data scientist"] = ["python", "machine learning", "statistics", "data analysis", "sql", "r", "tensorflow", "pandas"],
|
||||
["data analyst"] = ["sql", "excel", "data", "analysis", "tableau", "power bi", "statistics", "reporting"],
|
||||
["data engineer"] = ["sql", "python", "etl", "data pipeline", "spark", "hadoop", "database", "aws", "azure"],
|
||||
|
||||
// Project/Product roles
|
||||
["project manager"] = ["project management", "agile", "scrum", "stakeholder", "planning", "budget", "pmp", "prince2"],
|
||||
["product manager"] = ["product", "roadmap", "stakeholder", "agile", "user research", "strategy", "backlog"],
|
||||
["scrum master"] = ["scrum", "agile", "sprint", "kanban", "jira", "facilitation", "coaching"],
|
||||
|
||||
// Business roles
|
||||
["business analyst"] = ["requirements", "analysis", "stakeholder", "documentation", "process", "sql", "jira"],
|
||||
["marketing manager"] = ["marketing", "campaigns", "branding", "analytics", "seo", "content", "social media", "digital"],
|
||||
["sales manager"] = ["sales", "revenue", "crm", "pipeline", "negotiation", "b2b", "b2c", "targets"],
|
||||
|
||||
// Finance roles
|
||||
["accountant"] = ["accounting", "financial", "excel", "bookkeeping", "tax", "audit", "sage", "xero", "quickbooks"],
|
||||
["financial analyst"] = ["financial", "modelling", "excel", "forecasting", "budgeting", "analysis", "reporting"],
|
||||
|
||||
// Design roles
|
||||
["ux designer"] = ["ux", "user experience", "wireframe", "prototype", "figma", "sketch", "user research", "usability"],
|
||||
["ui designer"] = ["ui", "visual design", "figma", "sketch", "adobe", "interface", "design systems"],
|
||||
["graphic designer"] = ["photoshop", "illustrator", "indesign", "adobe", "design", "creative", "branding"],
|
||||
|
||||
// HR roles
|
||||
["hr manager"] = ["hr", "human resources", "recruitment", "employee relations", "policy", "training", "performance"],
|
||||
["recruiter"] = ["recruitment", "sourcing", "interviewing", "talent", "hiring", "ats", "linkedin"],
|
||||
|
||||
// Other common roles
|
||||
["customer service"] = ["customer", "support", "service", "communication", "crm", "resolution"],
|
||||
["operations manager"] = ["operations", "logistics", "process", "efficiency", "supply chain", "management"]
|
||||
};
|
||||
|
||||
private static SkillsAlignmentAnalysis AnalyseSkillsAlignment(CVData cvData, List<TextAnalysisFlag> flags)
|
||||
{
|
||||
var mismatches = new List<SkillMismatch>();
|
||||
var rolesChecked = 0;
|
||||
var rolesWithMatchingSkills = 0;
|
||||
|
||||
// Normalize skills for matching
|
||||
var skillsLower = cvData.Skills
|
||||
.Select(s => s.ToLower().Trim())
|
||||
.ToHashSet();
|
||||
|
||||
// Also extract skills mentioned in descriptions
|
||||
var allText = GetAllDescriptionText(cvData).ToLower();
|
||||
|
||||
foreach (var job in cvData.Employment)
|
||||
{
|
||||
var titleLower = job.JobTitle.ToLower();
|
||||
|
||||
foreach (var (rolePattern, expectedSkills) in RoleSkillsMap)
|
||||
{
|
||||
if (!titleLower.Contains(rolePattern)) continue;
|
||||
|
||||
rolesChecked++;
|
||||
|
||||
// Find matching skills (in skills list OR mentioned in descriptions)
|
||||
var matchingSkills = expectedSkills
|
||||
.Where(expected =>
|
||||
skillsLower.Any(s => s.Contains(expected)) ||
|
||||
allText.Contains(expected))
|
||||
.ToList();
|
||||
|
||||
if (matchingSkills.Count >= 2)
|
||||
{
|
||||
rolesWithMatchingSkills++;
|
||||
}
|
||||
else
|
||||
{
|
||||
mismatches.Add(new SkillMismatch
|
||||
{
|
||||
JobTitle = job.JobTitle,
|
||||
CompanyName = job.CompanyName,
|
||||
ExpectedSkills = expectedSkills.Take(5).ToList(),
|
||||
MatchingSkills = matchingSkills
|
||||
});
|
||||
}
|
||||
|
||||
break; // Only match first role pattern
|
||||
}
|
||||
}
|
||||
|
||||
// Generate flags for significant mismatches
|
||||
if (mismatches.Count >= 2)
|
||||
{
|
||||
var examples = mismatches.Take(2)
|
||||
.Select(m => $"'{m.JobTitle}' lacks typical skills")
|
||||
.ToList();
|
||||
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "SkillsJobMismatch",
|
||||
Severity = "Warning",
|
||||
Message = $"{mismatches.Count} roles have few matching skills listed. {string.Join("; ", examples)}. Expected skills like: {string.Join(", ", mismatches.First().ExpectedSkills.Take(3))}",
|
||||
ScoreImpact = -8
|
||||
});
|
||||
}
|
||||
else if (mismatches.Count == 1)
|
||||
{
|
||||
var m = mismatches.First();
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "SkillsJobMismatch",
|
||||
Severity = "Info",
|
||||
Message = $"Role '{m.JobTitle}' at {m.CompanyName} has limited matching skills. Expected: {string.Join(", ", m.ExpectedSkills.Take(4))}",
|
||||
ScoreImpact = -3
|
||||
});
|
||||
}
|
||||
|
||||
return new SkillsAlignmentAnalysis
|
||||
{
|
||||
TotalRolesChecked = rolesChecked,
|
||||
RolesWithMatchingSkills = rolesWithMatchingSkills,
|
||||
Mismatches = mismatches
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Unrealistic Metrics Detection
|
||||
|
||||
private static MetricsAnalysis AnalyseMetrics(CVData cvData, List<TextAnalysisFlag> flags)
|
||||
{
|
||||
var allText = GetAllDescriptionText(cvData);
|
||||
var suspiciousMetrics = new List<SuspiciousMetric>();
|
||||
var totalMetrics = 0;
|
||||
var plausibleMetrics = 0;
|
||||
|
||||
// Revenue/growth increase patterns
|
||||
var revenuePattern = RevenueIncreasePattern();
|
||||
foreach (Match match in revenuePattern.Matches(allText))
|
||||
{
|
||||
totalMetrics++;
|
||||
var value = double.Parse(match.Groups[1].Value);
|
||||
|
||||
if (value > 300)
|
||||
{
|
||||
suspiciousMetrics.Add(new SuspiciousMetric
|
||||
{
|
||||
ClaimText = match.Value,
|
||||
Value = value,
|
||||
Reason = $"{value}% increase is exceptionally high - requires verification"
|
||||
});
|
||||
}
|
||||
else if (value > 200)
|
||||
{
|
||||
suspiciousMetrics.Add(new SuspiciousMetric
|
||||
{
|
||||
ClaimText = match.Value,
|
||||
Value = value,
|
||||
Reason = $"{value}% is unusually high for most contexts"
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
plausibleMetrics++;
|
||||
}
|
||||
}
|
||||
|
||||
// Cost reduction patterns
|
||||
var costPattern = CostReductionPattern();
|
||||
foreach (Match match in costPattern.Matches(allText))
|
||||
{
|
||||
totalMetrics++;
|
||||
var value = double.Parse(match.Groups[1].Value);
|
||||
|
||||
if (value > 70)
|
||||
{
|
||||
suspiciousMetrics.Add(new SuspiciousMetric
|
||||
{
|
||||
ClaimText = match.Value,
|
||||
Value = value,
|
||||
Reason = $"{value}% cost reduction is extremely rare"
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
plausibleMetrics++;
|
||||
}
|
||||
}
|
||||
|
||||
// Efficiency/productivity improvements
|
||||
var efficiencyPattern = EfficiencyPattern();
|
||||
foreach (Match match in efficiencyPattern.Matches(allText))
|
||||
{
|
||||
totalMetrics++;
|
||||
var value = double.Parse(match.Groups[1].Value);
|
||||
|
||||
if (value > 500)
|
||||
{
|
||||
suspiciousMetrics.Add(new SuspiciousMetric
|
||||
{
|
||||
ClaimText = match.Value,
|
||||
Value = value,
|
||||
Reason = $"{value}% efficiency gain is implausible"
|
||||
});
|
||||
}
|
||||
else if (value > 200)
|
||||
{
|
||||
suspiciousMetrics.Add(new SuspiciousMetric
|
||||
{
|
||||
ClaimText = match.Value,
|
||||
Value = value,
|
||||
Reason = $"{value}% improvement is unusually high"
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
plausibleMetrics++;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for suspiciously round numbers
|
||||
var (roundCount, roundRatio) = AnalyseRoundNumbers(allText);
|
||||
|
||||
// Generate flags
|
||||
if (suspiciousMetrics.Count >= 2)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "UnrealisticMetrics",
|
||||
Severity = "Warning",
|
||||
Message = $"{suspiciousMetrics.Count} achievement metrics appear exaggerated. Example: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
|
||||
ScoreImpact = -10
|
||||
});
|
||||
}
|
||||
else if (suspiciousMetrics.Count == 1)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "UnrealisticMetric",
|
||||
Severity = "Info",
|
||||
Message = $"Metric may be exaggerated: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
|
||||
ScoreImpact = -3
|
||||
});
|
||||
}
|
||||
|
||||
if (roundRatio > 0.8 && totalMetrics >= 4)
|
||||
{
|
||||
flags.Add(new TextAnalysisFlag
|
||||
{
|
||||
Type = "SuspiciouslyRoundNumbers",
|
||||
Severity = "Info",
|
||||
Message = $"{roundCount} of {totalMetrics} metrics are round numbers (ending in 0 or 5) - real data is rarely this clean",
|
||||
ScoreImpact = -3
|
||||
});
|
||||
}
|
||||
|
||||
return new MetricsAnalysis
|
||||
{
|
||||
TotalMetricsClaimed = totalMetrics,
|
||||
PlausibleMetrics = plausibleMetrics,
|
||||
SuspiciousMetrics = suspiciousMetrics.Count,
|
||||
RoundNumberCount = roundCount,
|
||||
RoundNumberRatio = roundRatio,
|
||||
SuspiciousMetricsList = suspiciousMetrics
|
||||
};
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(?:increased|grew|boosted|raised|improved)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex RevenueIncreasePattern();
|
||||
|
||||
[GeneratedRegex(@"(?:reduced|cut|decreased|saved|lowered)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex CostReductionPattern();
|
||||
|
||||
[GeneratedRegex(@"(\d+)%\s+(?:faster|quicker|more efficient|improvement|productivity|increase)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex EfficiencyPattern();
|
||||
|
||||
private static (int RoundCount, double RoundRatio) AnalyseRoundNumbers(string text)
|
||||
{
|
||||
var numberPattern = NumberPattern();
|
||||
var matches = numberPattern.Matches(text);
|
||||
|
||||
var total = 0;
|
||||
var roundCount = 0;
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var numStr = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
|
||||
numStr = numStr.Replace(",", "");
|
||||
|
||||
if (int.TryParse(numStr, out var num) && num >= 10)
|
||||
{
|
||||
total++;
|
||||
if (num % 10 == 0 || num % 5 == 0)
|
||||
{
|
||||
roundCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (roundCount, total > 0 ? (double)roundCount / total : 0);
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(\d+)%|(?:\$|£)([\d,]+)")]
|
||||
private static partial Regex NumberPattern();
|
||||
|
||||
#endregion
|
||||
|
||||
#region Helpers
|
||||
|
||||
private static string GetAllDescriptionText(CVData cvData)
|
||||
{
|
||||
var descriptions = cvData.Employment
|
||||
.Where(e => !string.IsNullOrWhiteSpace(e.Description))
|
||||
.Select(e => e.Description!);
|
||||
|
||||
return string.Join(" ", descriptions);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
Reference in New Issue
Block a user