feat: Add text analysis checks for CV verification
Implement four new CV verification checks without external APIs: 1. Buzzword detection - flags excessive clichés (50+ patterns) 2. Vague achievement detection - identifies weak language vs quantified results 3. Skills/job title alignment - checks skills match claimed roles (25+ role mappings) 4. Unrealistic metrics detection - flags implausible claims (>200% growth, etc.) New files: - ITextAnalysisService interface - TextAnalysisResult models - TextAnalysisService implementation (~400 lines) Integration: - Added "Analysing Content" processing stage - Flags appear under Plausibility category - TextAnalysis section added to veracity report 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,8 @@
|
|||||||
|
using RealCV.Application.Models;
|
||||||
|
|
||||||
|
namespace RealCV.Application.Interfaces;
|
||||||
|
|
||||||
|
public interface ITextAnalysisService
|
||||||
|
{
|
||||||
|
TextAnalysisResult Analyse(CVData cvData);
|
||||||
|
}
|
||||||
66
src/RealCV.Application/Models/TextAnalysisResult.cs
Normal file
66
src/RealCV.Application/Models/TextAnalysisResult.cs
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
namespace RealCV.Application.Models;
|
||||||
|
|
||||||
|
public sealed record TextAnalysisResult
|
||||||
|
{
|
||||||
|
public BuzzwordAnalysis BuzzwordAnalysis { get; init; } = new();
|
||||||
|
public AchievementAnalysis AchievementAnalysis { get; init; } = new();
|
||||||
|
public SkillsAlignmentAnalysis SkillsAlignment { get; init; } = new();
|
||||||
|
public MetricsAnalysis MetricsAnalysis { get; init; } = new();
|
||||||
|
public List<TextAnalysisFlag> Flags { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record BuzzwordAnalysis
|
||||||
|
{
|
||||||
|
public int TotalBuzzwords { get; init; }
|
||||||
|
public List<string> BuzzwordsFound { get; init; } = [];
|
||||||
|
public double BuzzwordDensity { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record AchievementAnalysis
|
||||||
|
{
|
||||||
|
public int TotalStatements { get; init; }
|
||||||
|
public int VagueStatements { get; init; }
|
||||||
|
public int QuantifiedStatements { get; init; }
|
||||||
|
public int StrongActionVerbStatements { get; init; }
|
||||||
|
public List<string> VagueExamples { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record SkillsAlignmentAnalysis
|
||||||
|
{
|
||||||
|
public int TotalRolesChecked { get; init; }
|
||||||
|
public int RolesWithMatchingSkills { get; init; }
|
||||||
|
public List<SkillMismatch> Mismatches { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record SkillMismatch
|
||||||
|
{
|
||||||
|
public required string JobTitle { get; init; }
|
||||||
|
public required string CompanyName { get; init; }
|
||||||
|
public required List<string> ExpectedSkills { get; init; }
|
||||||
|
public required List<string> MatchingSkills { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record MetricsAnalysis
|
||||||
|
{
|
||||||
|
public int TotalMetricsClaimed { get; init; }
|
||||||
|
public int PlausibleMetrics { get; init; }
|
||||||
|
public int SuspiciousMetrics { get; init; }
|
||||||
|
public int RoundNumberCount { get; init; }
|
||||||
|
public double RoundNumberRatio { get; init; }
|
||||||
|
public List<SuspiciousMetric> SuspiciousMetricsList { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record SuspiciousMetric
|
||||||
|
{
|
||||||
|
public required string ClaimText { get; init; }
|
||||||
|
public required double Value { get; init; }
|
||||||
|
public required string Reason { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record TextAnalysisFlag
|
||||||
|
{
|
||||||
|
public required string Type { get; init; }
|
||||||
|
public required string Severity { get; init; }
|
||||||
|
public required string Message { get; init; }
|
||||||
|
public int ScoreImpact { get; init; }
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ public sealed record VeracityReport
|
|||||||
public List<CompanyVerificationResult> EmploymentVerifications { get; init; } = [];
|
public List<CompanyVerificationResult> EmploymentVerifications { get; init; } = [];
|
||||||
public List<EducationVerificationResult> EducationVerifications { get; init; } = [];
|
public List<EducationVerificationResult> EducationVerifications { get; init; } = [];
|
||||||
public required TimelineAnalysisResult TimelineAnalysis { get; init; }
|
public required TimelineAnalysisResult TimelineAnalysis { get; init; }
|
||||||
|
public TextAnalysisResult? TextAnalysis { get; init; }
|
||||||
public List<FlagResult> Flags { get; init; } = [];
|
public List<FlagResult> Flags { get; init; } = [];
|
||||||
public required DateTime GeneratedAt { get; init; }
|
public required DateTime GeneratedAt { get; init; }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ public static class DependencyInjection
|
|||||||
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
services.AddScoped<ITimelineAnalyserService, TimelineAnalyserService>();
|
services.AddScoped<ITimelineAnalyserService, TimelineAnalyserService>();
|
||||||
|
services.AddScoped<ITextAnalysisService, TextAnalysisService>();
|
||||||
services.AddScoped<ICVCheckService, CVCheckService>();
|
services.AddScoped<ICVCheckService, CVCheckService>();
|
||||||
services.AddScoped<IUserContextService, UserContextService>();
|
services.AddScoped<IUserContextService, UserContextService>();
|
||||||
services.AddScoped<IAuditService, AuditService>();
|
services.AddScoped<IAuditService, AuditService>();
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ public sealed class ProcessCVCheckJob
|
|||||||
private readonly ICompanyVerifierService _companyVerifierService;
|
private readonly ICompanyVerifierService _companyVerifierService;
|
||||||
private readonly IEducationVerifierService _educationVerifierService;
|
private readonly IEducationVerifierService _educationVerifierService;
|
||||||
private readonly ITimelineAnalyserService _timelineAnalyserService;
|
private readonly ITimelineAnalyserService _timelineAnalyserService;
|
||||||
|
private readonly ITextAnalysisService _textAnalysisService;
|
||||||
private readonly IAuditService _auditService;
|
private readonly IAuditService _auditService;
|
||||||
private readonly ILogger<ProcessCVCheckJob> _logger;
|
private readonly ILogger<ProcessCVCheckJob> _logger;
|
||||||
|
|
||||||
@@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJob
|
|||||||
ICompanyVerifierService companyVerifierService,
|
ICompanyVerifierService companyVerifierService,
|
||||||
IEducationVerifierService educationVerifierService,
|
IEducationVerifierService educationVerifierService,
|
||||||
ITimelineAnalyserService timelineAnalyserService,
|
ITimelineAnalyserService timelineAnalyserService,
|
||||||
|
ITextAnalysisService textAnalysisService,
|
||||||
IAuditService auditService,
|
IAuditService auditService,
|
||||||
ILogger<ProcessCVCheckJob> logger)
|
ILogger<ProcessCVCheckJob> logger)
|
||||||
{
|
{
|
||||||
@@ -50,6 +52,7 @@ public sealed class ProcessCVCheckJob
|
|||||||
_companyVerifierService = companyVerifierService;
|
_companyVerifierService = companyVerifierService;
|
||||||
_educationVerifierService = educationVerifierService;
|
_educationVerifierService = educationVerifierService;
|
||||||
_timelineAnalyserService = timelineAnalyserService;
|
_timelineAnalyserService = timelineAnalyserService;
|
||||||
|
_textAnalysisService = textAnalysisService;
|
||||||
_auditService = auditService;
|
_auditService = auditService;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
}
|
}
|
||||||
@@ -198,10 +201,23 @@ public sealed class ProcessCVCheckJob
|
|||||||
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
|
"Timeline analysis for check {CheckId}: {GapCount} gaps, {OverlapCount} overlaps",
|
||||||
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
|
cvCheckId, timelineAnalysis.Gaps.Count, timelineAnalysis.Overlaps.Count);
|
||||||
|
|
||||||
|
// Step 7b: Analyse text for buzzwords, vague achievements, skills alignment, and metrics
|
||||||
|
cvCheck.ProcessingStage = "Analysing Content";
|
||||||
|
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||||
|
|
||||||
|
var textAnalysis = _textAnalysisService.Analyse(cvData);
|
||||||
|
|
||||||
|
_logger.LogDebug(
|
||||||
|
"Text analysis for check {CheckId}: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches",
|
||||||
|
cvCheckId,
|
||||||
|
textAnalysis.BuzzwordAnalysis.TotalBuzzwords,
|
||||||
|
textAnalysis.AchievementAnalysis.VagueStatements,
|
||||||
|
textAnalysis.SkillsAlignment.Mismatches.Count);
|
||||||
|
|
||||||
// Step 8: Calculate veracity score
|
// Step 8: Calculate veracity score
|
||||||
cvCheck.ProcessingStage = "Calculating Score";
|
cvCheck.ProcessingStage = "Calculating Score";
|
||||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||||
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, cvData);
|
var (score, flags) = CalculateVeracityScore(verificationResults, educationResults, timelineAnalysis, textAnalysis, cvData);
|
||||||
|
|
||||||
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
|
_logger.LogDebug("Calculated veracity score for check {CheckId}: {Score}", cvCheckId, score);
|
||||||
|
|
||||||
@@ -246,6 +262,7 @@ public sealed class ProcessCVCheckJob
|
|||||||
EmploymentVerifications = verificationResults,
|
EmploymentVerifications = verificationResults,
|
||||||
EducationVerifications = educationResults,
|
EducationVerifications = educationResults,
|
||||||
TimelineAnalysis = timelineAnalysis,
|
TimelineAnalysis = timelineAnalysis,
|
||||||
|
TextAnalysis = textAnalysis,
|
||||||
Flags = flags,
|
Flags = flags,
|
||||||
GeneratedAt = DateTime.UtcNow
|
GeneratedAt = DateTime.UtcNow
|
||||||
};
|
};
|
||||||
@@ -290,6 +307,7 @@ public sealed class ProcessCVCheckJob
|
|||||||
List<CompanyVerificationResult> verifications,
|
List<CompanyVerificationResult> verifications,
|
||||||
List<EducationVerificationResult> educationResults,
|
List<EducationVerificationResult> educationResults,
|
||||||
TimelineAnalysisResult timeline,
|
TimelineAnalysisResult timeline,
|
||||||
|
TextAnalysisResult textAnalysis,
|
||||||
CVData cvData)
|
CVData cvData)
|
||||||
{
|
{
|
||||||
var score = BaseScore;
|
var score = BaseScore;
|
||||||
@@ -484,6 +502,32 @@ public sealed class ProcessCVCheckJob
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process text analysis flags (buzzwords, vague achievements, skills alignment, metrics)
|
||||||
|
foreach (var textFlag in textAnalysis.Flags)
|
||||||
|
{
|
||||||
|
score += textFlag.ScoreImpact; // ScoreImpact is already negative
|
||||||
|
|
||||||
|
flags.Add(new FlagResult
|
||||||
|
{
|
||||||
|
Category = FlagCategory.Plausibility.ToString(),
|
||||||
|
Severity = textFlag.Severity,
|
||||||
|
Title = textFlag.Type switch
|
||||||
|
{
|
||||||
|
"ExcessiveBuzzwords" => "Excessive Buzzwords",
|
||||||
|
"HighBuzzwordCount" => "High Buzzword Count",
|
||||||
|
"VagueAchievements" => "Vague Achievements",
|
||||||
|
"LackOfQuantification" => "Lack of Quantification",
|
||||||
|
"SkillsJobMismatch" => "Skills/Job Mismatch",
|
||||||
|
"UnrealisticMetrics" => "Unrealistic Metrics",
|
||||||
|
"UnrealisticMetric" => "Unrealistic Metric",
|
||||||
|
"SuspiciouslyRoundNumbers" => "Suspiciously Round Numbers",
|
||||||
|
_ => textFlag.Type
|
||||||
|
},
|
||||||
|
Description = textFlag.Message,
|
||||||
|
ScoreImpact = textFlag.ScoreImpact
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Deduplicate flags based on Title + Description
|
// Deduplicate flags based on Title + Description
|
||||||
var uniqueFlags = flags
|
var uniqueFlags = flags
|
||||||
.GroupBy(f => (f.Title, f.Description))
|
.GroupBy(f => (f.Title, f.Description))
|
||||||
|
|||||||
593
src/RealCV.Infrastructure/Services/TextAnalysisService.cs
Normal file
593
src/RealCV.Infrastructure/Services/TextAnalysisService.cs
Normal file
@@ -0,0 +1,593 @@
|
|||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
|
||||||
|
namespace RealCV.Infrastructure.Services;
|
||||||
|
|
||||||
|
public sealed partial class TextAnalysisService : ITextAnalysisService
|
||||||
|
{
|
||||||
|
private readonly ILogger<TextAnalysisService> _logger;
|
||||||
|
|
||||||
|
public TextAnalysisService(ILogger<TextAnalysisService> logger)
|
||||||
|
{
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TextAnalysisResult Analyse(CVData cvData)
|
||||||
|
{
|
||||||
|
_logger.LogDebug("Starting text analysis for CV: {Name}", cvData.FullName);
|
||||||
|
|
||||||
|
var flags = new List<TextAnalysisFlag>();
|
||||||
|
|
||||||
|
// Run all analyses
|
||||||
|
var buzzwordAnalysis = AnalyseBuzzwords(cvData, flags);
|
||||||
|
var achievementAnalysis = AnalyseAchievements(cvData, flags);
|
||||||
|
var skillsAlignment = AnalyseSkillsAlignment(cvData, flags);
|
||||||
|
var metricsAnalysis = AnalyseMetrics(cvData, flags);
|
||||||
|
|
||||||
|
_logger.LogDebug(
|
||||||
|
"Text analysis complete: {BuzzwordCount} buzzwords, {VagueCount} vague statements, {MismatchCount} skill mismatches, {SuspiciousCount} suspicious metrics",
|
||||||
|
buzzwordAnalysis.TotalBuzzwords,
|
||||||
|
achievementAnalysis.VagueStatements,
|
||||||
|
skillsAlignment.Mismatches.Count,
|
||||||
|
metricsAnalysis.SuspiciousMetrics);
|
||||||
|
|
||||||
|
return new TextAnalysisResult
|
||||||
|
{
|
||||||
|
BuzzwordAnalysis = buzzwordAnalysis,
|
||||||
|
AchievementAnalysis = achievementAnalysis,
|
||||||
|
SkillsAlignment = skillsAlignment,
|
||||||
|
MetricsAnalysis = metricsAnalysis,
|
||||||
|
Flags = flags
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Buzzword Detection
|
||||||
|
|
||||||
|
private static readonly HashSet<string> Buzzwords = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Overused personality descriptors
|
||||||
|
"results-driven", "detail-oriented", "team player", "self-starter",
|
||||||
|
"go-getter", "proactive", "dynamic", "passionate", "motivated",
|
||||||
|
"hardworking", "dedicated", "enthusiastic", "driven",
|
||||||
|
|
||||||
|
// Corporate jargon
|
||||||
|
"synergy", "leverage", "paradigm", "holistic", "innovative",
|
||||||
|
"disruptive", "scalable", "agile", "optimization", "strategic",
|
||||||
|
"streamline", "spearhead", "champion", "facilitate",
|
||||||
|
|
||||||
|
// Vague superlatives
|
||||||
|
"best-in-class", "world-class", "cutting-edge", "state-of-the-art",
|
||||||
|
"next-generation", "game-changer", "thought leader",
|
||||||
|
|
||||||
|
// Empty phrases
|
||||||
|
"think outside the box", "hit the ground running", "move the needle",
|
||||||
|
"low-hanging fruit", "value-add", "bandwidth", "circle back",
|
||||||
|
"deep dive", "pivot", "ecosystem"
|
||||||
|
};
|
||||||
|
|
||||||
|
private static readonly HashSet<string> BuzzwordPhrases = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
"results-driven professional",
|
||||||
|
"highly motivated individual",
|
||||||
|
"proven track record",
|
||||||
|
"strong work ethic",
|
||||||
|
"excellent interpersonal skills",
|
||||||
|
"ability to work independently",
|
||||||
|
"thrive under pressure",
|
||||||
|
"fast-paced environment",
|
||||||
|
"excellent communication skills",
|
||||||
|
"strategic thinker",
|
||||||
|
"problem solver",
|
||||||
|
"out of the box",
|
||||||
|
"above and beyond",
|
||||||
|
"value proposition"
|
||||||
|
};
|
||||||
|
|
||||||
|
private static BuzzwordAnalysis AnalyseBuzzwords(CVData cvData, List<TextAnalysisFlag> flags)
|
||||||
|
{
|
||||||
|
var allText = GetAllDescriptionText(cvData);
|
||||||
|
var textLower = allText.ToLower();
|
||||||
|
var wordCount = allText.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
|
||||||
|
|
||||||
|
var found = new List<string>();
|
||||||
|
|
||||||
|
// Check for phrases first
|
||||||
|
foreach (var phrase in BuzzwordPhrases)
|
||||||
|
{
|
||||||
|
if (textLower.Contains(phrase.ToLower()))
|
||||||
|
{
|
||||||
|
found.Add(phrase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check individual buzzwords (avoiding duplicates from phrases)
|
||||||
|
foreach (var buzzword in Buzzwords)
|
||||||
|
{
|
||||||
|
if (textLower.Contains(buzzword.ToLower()) &&
|
||||||
|
!found.Any(f => f.Contains(buzzword, StringComparison.OrdinalIgnoreCase)))
|
||||||
|
{
|
||||||
|
found.Add(buzzword);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var density = wordCount > 0 ? found.Count / (wordCount / 100.0) : 0;
|
||||||
|
|
||||||
|
// Generate flags based on severity
|
||||||
|
if (found.Count >= 10)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "ExcessiveBuzzwords",
|
||||||
|
Severity = "Warning",
|
||||||
|
Message = $"CV contains {found.Count} buzzwords/clichés - may indicate template or AI-generated content. Examples: {string.Join(", ", found.Take(5))}",
|
||||||
|
ScoreImpact = -10
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if (found.Count >= 6)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "HighBuzzwordCount",
|
||||||
|
Severity = "Info",
|
||||||
|
Message = $"CV contains {found.Count} common buzzwords: {string.Join(", ", found.Take(4))}",
|
||||||
|
ScoreImpact = -5
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BuzzwordAnalysis
|
||||||
|
{
|
||||||
|
TotalBuzzwords = found.Count,
|
||||||
|
BuzzwordsFound = found,
|
||||||
|
BuzzwordDensity = density
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Vague Achievement Detection
|
||||||
|
|
||||||
|
private static readonly string[] VaguePatterns =
|
||||||
|
[
|
||||||
|
"responsible for",
|
||||||
|
"worked on",
|
||||||
|
"helped with",
|
||||||
|
"assisted in",
|
||||||
|
"involved in",
|
||||||
|
"participated in",
|
||||||
|
"contributed to",
|
||||||
|
"various tasks",
|
||||||
|
"many projects",
|
||||||
|
"multiple initiatives",
|
||||||
|
"day-to-day",
|
||||||
|
"duties included",
|
||||||
|
"tasked with"
|
||||||
|
];
|
||||||
|
|
||||||
|
private static readonly string[] StrongActionVerbs =
|
||||||
|
[
|
||||||
|
"achieved", "increased", "reduced", "decreased", "improved",
|
||||||
|
"generated", "saved", "developed", "created", "launched",
|
||||||
|
"implemented", "negotiated", "secured", "designed", "built",
|
||||||
|
"led", "managed", "delivered", "transformed", "accelerated",
|
||||||
|
"streamlined", "consolidated", "eliminated", "maximized", "minimized"
|
||||||
|
];
|
||||||
|
|
||||||
|
private static AchievementAnalysis AnalyseAchievements(CVData cvData, List<TextAnalysisFlag> flags)
|
||||||
|
{
|
||||||
|
var totalStatements = 0;
|
||||||
|
var vagueStatements = 0;
|
||||||
|
var quantifiedStatements = 0;
|
||||||
|
var strongVerbStatements = 0;
|
||||||
|
var vagueExamples = new List<string>();
|
||||||
|
|
||||||
|
foreach (var job in cvData.Employment)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(job.Description)) continue;
|
||||||
|
|
||||||
|
// Split into bullet points or sentences
|
||||||
|
var statements = job.Description
|
||||||
|
.Split(['\n', '•', '●', '■', '▪', '*', '-'], StringSplitOptions.RemoveEmptyEntries)
|
||||||
|
.Select(s => s.Trim())
|
||||||
|
.Where(s => s.Length > 10)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
foreach (var statement in statements)
|
||||||
|
{
|
||||||
|
totalStatements++;
|
||||||
|
var statementLower = statement.ToLower();
|
||||||
|
|
||||||
|
// Check for quantification (numbers, percentages, currency)
|
||||||
|
if (HasQuantification().IsMatch(statement))
|
||||||
|
{
|
||||||
|
quantifiedStatements++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for strong action verbs at the start
|
||||||
|
if (StrongActionVerbs.Any(v => statementLower.StartsWith(v)))
|
||||||
|
{
|
||||||
|
strongVerbStatements++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for vague patterns
|
||||||
|
if (VaguePatterns.Any(p => statementLower.Contains(p)))
|
||||||
|
{
|
||||||
|
vagueStatements++;
|
||||||
|
if (vagueExamples.Count < 3)
|
||||||
|
{
|
||||||
|
var truncated = statement.Length > 60 ? statement[..57] + "..." : statement;
|
||||||
|
vagueExamples.Add(truncated);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate flags
|
||||||
|
if (totalStatements > 0)
|
||||||
|
{
|
||||||
|
var vagueRatio = (double)vagueStatements / totalStatements;
|
||||||
|
var quantifiedRatio = (double)quantifiedStatements / totalStatements;
|
||||||
|
|
||||||
|
if (vagueRatio > 0.5 && totalStatements >= 5)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "VagueAchievements",
|
||||||
|
Severity = "Warning",
|
||||||
|
Message = $"{vagueStatements} of {totalStatements} statements use vague language (e.g., 'responsible for', 'helped with'). Consider: \"{vagueExamples.FirstOrDefault()}\"",
|
||||||
|
ScoreImpact = -8
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (quantifiedRatio < 0.2 && totalStatements >= 5)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "LackOfQuantification",
|
||||||
|
Severity = "Info",
|
||||||
|
Message = $"Only {quantifiedStatements} of {totalStatements} achievement statements include measurable results",
|
||||||
|
ScoreImpact = 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new AchievementAnalysis
|
||||||
|
{
|
||||||
|
TotalStatements = totalStatements,
|
||||||
|
VagueStatements = vagueStatements,
|
||||||
|
QuantifiedStatements = quantifiedStatements,
|
||||||
|
StrongActionVerbStatements = strongVerbStatements,
|
||||||
|
VagueExamples = vagueExamples
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
[GeneratedRegex(@"\d+%|\$[\d,]+|£[\d,]+|\d+\s*(million|thousand|k\b|m\b)|[0-9]+x\b", RegexOptions.IgnoreCase)]
|
||||||
|
private static partial Regex HasQuantification();
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Skills Alignment
|
||||||
|
|
||||||
|
private static readonly Dictionary<string, HashSet<string>> RoleSkillsMap = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Software/Tech roles
|
||||||
|
["software engineer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer", "engineering"],
|
||||||
|
["software developer"] = ["programming", "coding", "development", "software", "git", "testing", "code", "developer"],
|
||||||
|
["web developer"] = ["html", "css", "javascript", "web", "frontend", "backend", "react", "angular", "vue", "node"],
|
||||||
|
["frontend developer"] = ["html", "css", "javascript", "react", "angular", "vue", "typescript", "ui", "ux"],
|
||||||
|
["backend developer"] = ["api", "database", "sql", "server", "node", "python", "java", "c#", ".net"],
|
||||||
|
["full stack"] = ["frontend", "backend", "javascript", "database", "api", "react", "node"],
|
||||||
|
["devops engineer"] = ["ci/cd", "docker", "kubernetes", "aws", "azure", "jenkins", "terraform", "infrastructure"],
|
||||||
|
["data scientist"] = ["python", "machine learning", "statistics", "data analysis", "sql", "r", "tensorflow", "pandas"],
|
||||||
|
["data analyst"] = ["sql", "excel", "data", "analysis", "tableau", "power bi", "statistics", "reporting"],
|
||||||
|
["data engineer"] = ["sql", "python", "etl", "data pipeline", "spark", "hadoop", "database", "aws", "azure"],
|
||||||
|
|
||||||
|
// Project/Product roles
|
||||||
|
["project manager"] = ["project management", "agile", "scrum", "stakeholder", "planning", "budget", "pmp", "prince2"],
|
||||||
|
["product manager"] = ["product", "roadmap", "stakeholder", "agile", "user research", "strategy", "backlog"],
|
||||||
|
["scrum master"] = ["scrum", "agile", "sprint", "kanban", "jira", "facilitation", "coaching"],
|
||||||
|
|
||||||
|
// Business roles
|
||||||
|
["business analyst"] = ["requirements", "analysis", "stakeholder", "documentation", "process", "sql", "jira"],
|
||||||
|
["marketing manager"] = ["marketing", "campaigns", "branding", "analytics", "seo", "content", "social media", "digital"],
|
||||||
|
["sales manager"] = ["sales", "revenue", "crm", "pipeline", "negotiation", "b2b", "b2c", "targets"],
|
||||||
|
|
||||||
|
// Finance roles
|
||||||
|
["accountant"] = ["accounting", "financial", "excel", "bookkeeping", "tax", "audit", "sage", "xero", "quickbooks"],
|
||||||
|
["financial analyst"] = ["financial", "modelling", "excel", "forecasting", "budgeting", "analysis", "reporting"],
|
||||||
|
|
||||||
|
// Design roles
|
||||||
|
["ux designer"] = ["ux", "user experience", "wireframe", "prototype", "figma", "sketch", "user research", "usability"],
|
||||||
|
["ui designer"] = ["ui", "visual design", "figma", "sketch", "adobe", "interface", "design systems"],
|
||||||
|
["graphic designer"] = ["photoshop", "illustrator", "indesign", "adobe", "design", "creative", "branding"],
|
||||||
|
|
||||||
|
// HR roles
|
||||||
|
["hr manager"] = ["hr", "human resources", "recruitment", "employee relations", "policy", "training", "performance"],
|
||||||
|
["recruiter"] = ["recruitment", "sourcing", "interviewing", "talent", "hiring", "ats", "linkedin"],
|
||||||
|
|
||||||
|
// Other common roles
|
||||||
|
["customer service"] = ["customer", "support", "service", "communication", "crm", "resolution"],
|
||||||
|
["operations manager"] = ["operations", "logistics", "process", "efficiency", "supply chain", "management"]
|
||||||
|
};
|
||||||
|
|
||||||
|
private static SkillsAlignmentAnalysis AnalyseSkillsAlignment(CVData cvData, List<TextAnalysisFlag> flags)
|
||||||
|
{
|
||||||
|
var mismatches = new List<SkillMismatch>();
|
||||||
|
var rolesChecked = 0;
|
||||||
|
var rolesWithMatchingSkills = 0;
|
||||||
|
|
||||||
|
// Normalize skills for matching
|
||||||
|
var skillsLower = cvData.Skills
|
||||||
|
.Select(s => s.ToLower().Trim())
|
||||||
|
.ToHashSet();
|
||||||
|
|
||||||
|
// Also extract skills mentioned in descriptions
|
||||||
|
var allText = GetAllDescriptionText(cvData).ToLower();
|
||||||
|
|
||||||
|
foreach (var job in cvData.Employment)
|
||||||
|
{
|
||||||
|
var titleLower = job.JobTitle.ToLower();
|
||||||
|
|
||||||
|
foreach (var (rolePattern, expectedSkills) in RoleSkillsMap)
|
||||||
|
{
|
||||||
|
if (!titleLower.Contains(rolePattern)) continue;
|
||||||
|
|
||||||
|
rolesChecked++;
|
||||||
|
|
||||||
|
// Find matching skills (in skills list OR mentioned in descriptions)
|
||||||
|
var matchingSkills = expectedSkills
|
||||||
|
.Where(expected =>
|
||||||
|
skillsLower.Any(s => s.Contains(expected)) ||
|
||||||
|
allText.Contains(expected))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (matchingSkills.Count >= 2)
|
||||||
|
{
|
||||||
|
rolesWithMatchingSkills++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mismatches.Add(new SkillMismatch
|
||||||
|
{
|
||||||
|
JobTitle = job.JobTitle,
|
||||||
|
CompanyName = job.CompanyName,
|
||||||
|
ExpectedSkills = expectedSkills.Take(5).ToList(),
|
||||||
|
MatchingSkills = matchingSkills
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
break; // Only match first role pattern
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate flags for significant mismatches
|
||||||
|
if (mismatches.Count >= 2)
|
||||||
|
{
|
||||||
|
var examples = mismatches.Take(2)
|
||||||
|
.Select(m => $"'{m.JobTitle}' lacks typical skills")
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "SkillsJobMismatch",
|
||||||
|
Severity = "Warning",
|
||||||
|
Message = $"{mismatches.Count} roles have few matching skills listed. {string.Join("; ", examples)}. Expected skills like: {string.Join(", ", mismatches.First().ExpectedSkills.Take(3))}",
|
||||||
|
ScoreImpact = -8
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if (mismatches.Count == 1)
|
||||||
|
{
|
||||||
|
var m = mismatches.First();
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "SkillsJobMismatch",
|
||||||
|
Severity = "Info",
|
||||||
|
Message = $"Role '{m.JobTitle}' at {m.CompanyName} has limited matching skills. Expected: {string.Join(", ", m.ExpectedSkills.Take(4))}",
|
||||||
|
ScoreImpact = -3
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SkillsAlignmentAnalysis
|
||||||
|
{
|
||||||
|
TotalRolesChecked = rolesChecked,
|
||||||
|
RolesWithMatchingSkills = rolesWithMatchingSkills,
|
||||||
|
Mismatches = mismatches
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Unrealistic Metrics Detection
|
||||||
|
|
||||||
|
private static MetricsAnalysis AnalyseMetrics(CVData cvData, List<TextAnalysisFlag> flags)
|
||||||
|
{
|
||||||
|
var allText = GetAllDescriptionText(cvData);
|
||||||
|
var suspiciousMetrics = new List<SuspiciousMetric>();
|
||||||
|
var totalMetrics = 0;
|
||||||
|
var plausibleMetrics = 0;
|
||||||
|
|
||||||
|
// Revenue/growth increase patterns
|
||||||
|
var revenuePattern = RevenueIncreasePattern();
|
||||||
|
foreach (Match match in revenuePattern.Matches(allText))
|
||||||
|
{
|
||||||
|
totalMetrics++;
|
||||||
|
var value = double.Parse(match.Groups[1].Value);
|
||||||
|
|
||||||
|
if (value > 300)
|
||||||
|
{
|
||||||
|
suspiciousMetrics.Add(new SuspiciousMetric
|
||||||
|
{
|
||||||
|
ClaimText = match.Value,
|
||||||
|
Value = value,
|
||||||
|
Reason = $"{value}% increase is exceptionally high - requires verification"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if (value > 200)
|
||||||
|
{
|
||||||
|
suspiciousMetrics.Add(new SuspiciousMetric
|
||||||
|
{
|
||||||
|
ClaimText = match.Value,
|
||||||
|
Value = value,
|
||||||
|
Reason = $"{value}% is unusually high for most contexts"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
plausibleMetrics++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cost reduction patterns
|
||||||
|
var costPattern = CostReductionPattern();
|
||||||
|
foreach (Match match in costPattern.Matches(allText))
|
||||||
|
{
|
||||||
|
totalMetrics++;
|
||||||
|
var value = double.Parse(match.Groups[1].Value);
|
||||||
|
|
||||||
|
if (value > 70)
|
||||||
|
{
|
||||||
|
suspiciousMetrics.Add(new SuspiciousMetric
|
||||||
|
{
|
||||||
|
ClaimText = match.Value,
|
||||||
|
Value = value,
|
||||||
|
Reason = $"{value}% cost reduction is extremely rare"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
plausibleMetrics++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Efficiency/productivity improvements
|
||||||
|
var efficiencyPattern = EfficiencyPattern();
|
||||||
|
foreach (Match match in efficiencyPattern.Matches(allText))
|
||||||
|
{
|
||||||
|
totalMetrics++;
|
||||||
|
var value = double.Parse(match.Groups[1].Value);
|
||||||
|
|
||||||
|
if (value > 500)
|
||||||
|
{
|
||||||
|
suspiciousMetrics.Add(new SuspiciousMetric
|
||||||
|
{
|
||||||
|
ClaimText = match.Value,
|
||||||
|
Value = value,
|
||||||
|
Reason = $"{value}% efficiency gain is implausible"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if (value > 200)
|
||||||
|
{
|
||||||
|
suspiciousMetrics.Add(new SuspiciousMetric
|
||||||
|
{
|
||||||
|
ClaimText = match.Value,
|
||||||
|
Value = value,
|
||||||
|
Reason = $"{value}% improvement is unusually high"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
plausibleMetrics++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for suspiciously round numbers
|
||||||
|
var (roundCount, roundRatio) = AnalyseRoundNumbers(allText);
|
||||||
|
|
||||||
|
// Generate flags
|
||||||
|
if (suspiciousMetrics.Count >= 2)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "UnrealisticMetrics",
|
||||||
|
Severity = "Warning",
|
||||||
|
Message = $"{suspiciousMetrics.Count} achievement metrics appear exaggerated. Example: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
|
||||||
|
ScoreImpact = -10
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if (suspiciousMetrics.Count == 1)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "UnrealisticMetric",
|
||||||
|
Severity = "Info",
|
||||||
|
Message = $"Metric may be exaggerated: \"{suspiciousMetrics.First().ClaimText}\" - {suspiciousMetrics.First().Reason}",
|
||||||
|
ScoreImpact = -3
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (roundRatio > 0.8 && totalMetrics >= 4)
|
||||||
|
{
|
||||||
|
flags.Add(new TextAnalysisFlag
|
||||||
|
{
|
||||||
|
Type = "SuspiciouslyRoundNumbers",
|
||||||
|
Severity = "Info",
|
||||||
|
Message = $"{roundCount} of {totalMetrics} metrics are round numbers (ending in 0 or 5) - real data is rarely this clean",
|
||||||
|
ScoreImpact = -3
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return new MetricsAnalysis
|
||||||
|
{
|
||||||
|
TotalMetricsClaimed = totalMetrics,
|
||||||
|
PlausibleMetrics = plausibleMetrics,
|
||||||
|
SuspiciousMetrics = suspiciousMetrics.Count,
|
||||||
|
RoundNumberCount = roundCount,
|
||||||
|
RoundNumberRatio = roundRatio,
|
||||||
|
SuspiciousMetricsList = suspiciousMetrics
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
[GeneratedRegex(@"(?:increased|grew|boosted|raised|improved)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
|
||||||
|
private static partial Regex RevenueIncreasePattern();
|
||||||
|
|
||||||
|
[GeneratedRegex(@"(?:reduced|cut|decreased|saved|lowered)\s+(?:\w+\s+){0,3}(?:by\s+)?(\d+)%", RegexOptions.IgnoreCase)]
|
||||||
|
private static partial Regex CostReductionPattern();
|
||||||
|
|
||||||
|
[GeneratedRegex(@"(\d+)%\s+(?:faster|quicker|more efficient|improvement|productivity|increase)", RegexOptions.IgnoreCase)]
|
||||||
|
private static partial Regex EfficiencyPattern();
|
||||||
|
|
||||||
|
private static (int RoundCount, double RoundRatio) AnalyseRoundNumbers(string text)
|
||||||
|
{
|
||||||
|
var numberPattern = NumberPattern();
|
||||||
|
var matches = numberPattern.Matches(text);
|
||||||
|
|
||||||
|
var total = 0;
|
||||||
|
var roundCount = 0;
|
||||||
|
|
||||||
|
foreach (Match match in matches)
|
||||||
|
{
|
||||||
|
var numStr = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
|
||||||
|
numStr = numStr.Replace(",", "");
|
||||||
|
|
||||||
|
if (int.TryParse(numStr, out var num) && num >= 10)
|
||||||
|
{
|
||||||
|
total++;
|
||||||
|
if (num % 10 == 0 || num % 5 == 0)
|
||||||
|
{
|
||||||
|
roundCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (roundCount, total > 0 ? (double)roundCount / total : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
[GeneratedRegex(@"(\d+)%|(?:\$|£)([\d,]+)")]
|
||||||
|
private static partial Regex NumberPattern();
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Helpers
|
||||||
|
|
||||||
|
private static string GetAllDescriptionText(CVData cvData)
|
||||||
|
{
|
||||||
|
var descriptions = cvData.Employment
|
||||||
|
.Where(e => !string.IsNullOrWhiteSpace(e.Description))
|
||||||
|
.Select(e => e.Description!);
|
||||||
|
|
||||||
|
return string.Join(" ", descriptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
}
|
||||||
@@ -20,6 +20,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable
|
|||||||
private readonly Mock<ICompanyVerifierService> _companyVerifierServiceMock;
|
private readonly Mock<ICompanyVerifierService> _companyVerifierServiceMock;
|
||||||
private readonly Mock<IEducationVerifierService> _educationVerifierServiceMock;
|
private readonly Mock<IEducationVerifierService> _educationVerifierServiceMock;
|
||||||
private readonly Mock<ITimelineAnalyserService> _timelineAnalyserServiceMock;
|
private readonly Mock<ITimelineAnalyserService> _timelineAnalyserServiceMock;
|
||||||
|
private readonly Mock<ITextAnalysisService> _textAnalysisServiceMock;
|
||||||
private readonly Mock<IAuditService> _auditServiceMock;
|
private readonly Mock<IAuditService> _auditServiceMock;
|
||||||
private readonly Mock<ILogger<ProcessCVCheckJob>> _loggerMock;
|
private readonly Mock<ILogger<ProcessCVCheckJob>> _loggerMock;
|
||||||
private readonly ProcessCVCheckJob _sut;
|
private readonly ProcessCVCheckJob _sut;
|
||||||
@@ -41,6 +42,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable
|
|||||||
_companyVerifierServiceMock = new Mock<ICompanyVerifierService>();
|
_companyVerifierServiceMock = new Mock<ICompanyVerifierService>();
|
||||||
_educationVerifierServiceMock = new Mock<IEducationVerifierService>();
|
_educationVerifierServiceMock = new Mock<IEducationVerifierService>();
|
||||||
_timelineAnalyserServiceMock = new Mock<ITimelineAnalyserService>();
|
_timelineAnalyserServiceMock = new Mock<ITimelineAnalyserService>();
|
||||||
|
_textAnalysisServiceMock = new Mock<ITextAnalysisService>();
|
||||||
_auditServiceMock = new Mock<IAuditService>();
|
_auditServiceMock = new Mock<IAuditService>();
|
||||||
_loggerMock = new Mock<ILogger<ProcessCVCheckJob>>();
|
_loggerMock = new Mock<ILogger<ProcessCVCheckJob>>();
|
||||||
|
|
||||||
@@ -51,6 +53,7 @@ public sealed class ProcessCVCheckJobTests : IDisposable
|
|||||||
_companyVerifierServiceMock.Object,
|
_companyVerifierServiceMock.Object,
|
||||||
_educationVerifierServiceMock.Object,
|
_educationVerifierServiceMock.Object,
|
||||||
_timelineAnalyserServiceMock.Object,
|
_timelineAnalyserServiceMock.Object,
|
||||||
|
_textAnalysisServiceMock.Object,
|
||||||
_auditServiceMock.Object,
|
_auditServiceMock.Object,
|
||||||
_loggerMock.Object);
|
_loggerMock.Object);
|
||||||
}
|
}
|
||||||
@@ -1073,6 +1076,10 @@ public sealed class ProcessCVCheckJobTests : IDisposable
|
|||||||
_timelineAnalyserServiceMock
|
_timelineAnalyserServiceMock
|
||||||
.Setup(x => x.Analyse(It.IsAny<List<EmploymentEntry>>()))
|
.Setup(x => x.Analyse(It.IsAny<List<EmploymentEntry>>()))
|
||||||
.Returns(timelineResult);
|
.Returns(timelineResult);
|
||||||
|
|
||||||
|
_textAnalysisServiceMock
|
||||||
|
.Setup(x => x.Analyse(It.IsAny<CVData>()))
|
||||||
|
.Returns(new TextAnalysisResult());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CVData CreateTestCVData(int employmentCount = 1)
|
private static CVData CreateTestCVData(int employmentCount = 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user