Uses Claude Haiku to intelligently detect when a company name contains multiple companies (e.g., "ASDA/WALMART", "Corus & Laura Ashley Hotels") vs single companies with similar patterns (e.g., "Ernst & Young"). - Adds ExtractCompanyNamesAsync to ICompanyNameMatcherService - Only triggers for names with potential separators (/, &, "and") - Verifies each extracted part individually, returns first match - Uses fast Haiku model to minimize cost Results: - ASDA/WALMART → verified via 'ASDA' → ASDA GROUP LIMITED - Corus & Laura Ashley Hotels → verified via 'Corus' → Tata Steel UK - Employers: 104/120 verified (86%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
303 lines
12 KiB
C#
303 lines
12 KiB
C#
using System.Text.Json;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using Microsoft.Extensions.Configuration;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Logging;
|
|
using RealCV.Application.Interfaces;
|
|
using RealCV.Application.Models;
|
|
using RealCV.Infrastructure.Configuration;
|
|
using RealCV.Infrastructure.Data;
|
|
using RealCV.Infrastructure.ExternalApis;
|
|
using RealCV.Infrastructure.Services;
|
|
|
|
namespace CVBatchTester;
|
|
|
|
class Program
|
|
{
|
|
static async Task<int> Main(string[] args)
|
|
{
|
|
var folderPath = args.FirstOrDefault() ?? AskForFolder();
|
|
|
|
if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath))
|
|
{
|
|
Console.WriteLine($"Error: Folder not found: {folderPath}");
|
|
Console.WriteLine("Usage: CVBatchTester <folder-path>");
|
|
Console.WriteLine(" e.g. CVBatchTester /home/user/cvs");
|
|
return 1;
|
|
}
|
|
|
|
Console.WriteLine($"CV Batch Verification Tester");
|
|
Console.WriteLine($"Processing CVs from: {folderPath}");
|
|
Console.WriteLine(new string('=', 80));
|
|
|
|
// Setup DI
|
|
var services = new ServiceCollection();
|
|
ConfigureServices(services);
|
|
var provider = services.BuildServiceProvider();
|
|
|
|
// Find CV files
|
|
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
|
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase))
|
|
.OrderBy(f => f)
|
|
.ToList();
|
|
|
|
Console.WriteLine($"Found {cvFiles.Count} CV files\n");
|
|
|
|
if (cvFiles.Count == 0)
|
|
{
|
|
Console.WriteLine("No CV files found (.pdf, .docx, .doc)");
|
|
return 1;
|
|
}
|
|
|
|
// Track results
|
|
var allUnverifiedEmployers = new List<string>();
|
|
var allUnverifiedInstitutions = new List<string>();
|
|
var totalEmployers = 0;
|
|
var verifiedEmployers = 0;
|
|
var totalEducation = 0;
|
|
var verifiedEducation = 0;
|
|
var processedCount = 0;
|
|
var errorCount = 0;
|
|
|
|
foreach (var cvFile in cvFiles)
|
|
{
|
|
Console.WriteLine($"\n{new string('=', 80)}");
|
|
Console.WriteLine($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
|
|
Console.WriteLine(new string('=', 80));
|
|
|
|
try
|
|
{
|
|
using var scope = provider.CreateScope();
|
|
var parser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
|
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
|
|
|
// Parse CV
|
|
await using var stream = File.OpenRead(cvFile);
|
|
var cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
|
|
|
Console.WriteLine($"Candidate: {cv.FullName}");
|
|
|
|
// Verify Employers
|
|
if (cv.Employment?.Count > 0)
|
|
{
|
|
Console.WriteLine($"\nEMPLOYERS ({cv.Employment.Count}):");
|
|
Console.WriteLine(new string('-', 60));
|
|
|
|
foreach (var emp in cv.Employment)
|
|
{
|
|
totalEmployers++;
|
|
try
|
|
{
|
|
var result = await companyVerifier.VerifyCompanyAsync(
|
|
emp.CompanyName,
|
|
emp.StartDate,
|
|
emp.EndDate,
|
|
emp.JobTitle);
|
|
|
|
var icon = result.IsVerified ? "✓" : "✗";
|
|
var period = FormatPeriod(emp.StartDate, emp.EndDate);
|
|
|
|
Console.WriteLine($"\n {icon} {emp.CompanyName}");
|
|
Console.WriteLine($" Period: {period}");
|
|
Console.WriteLine($" Role: {emp.JobTitle}");
|
|
|
|
if (result.IsVerified)
|
|
{
|
|
verifiedEmployers++;
|
|
Console.WriteLine($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
|
|
if (!string.IsNullOrEmpty(result.MatchedCompanyNumber))
|
|
Console.WriteLine($" Company #: {result.MatchedCompanyNumber}");
|
|
if (!string.IsNullOrEmpty(result.CompanyStatus))
|
|
Console.WriteLine($" Status: {result.CompanyStatus}");
|
|
}
|
|
else
|
|
{
|
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine($"\n ✗ {emp.CompanyName}");
|
|
Console.WriteLine($" ERROR: {ex.Message}");
|
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Verify Education
|
|
if (cv.Education?.Count > 0)
|
|
{
|
|
Console.WriteLine($"\nEDUCATION ({cv.Education.Count}):");
|
|
Console.WriteLine(new string('-', 60));
|
|
|
|
var eduEntries = cv.Education.Select(e => new EducationEntry
|
|
{
|
|
Institution = e.Institution,
|
|
Qualification = e.Qualification,
|
|
Subject = e.Subject,
|
|
StartDate = e.StartDate,
|
|
EndDate = e.EndDate
|
|
}).ToList();
|
|
|
|
var eduResults = eduVerifier.VerifyAll(eduEntries);
|
|
|
|
foreach (var result in eduResults)
|
|
{
|
|
totalEducation++;
|
|
var icon = result.IsVerified ? "✓" : "✗";
|
|
|
|
Console.WriteLine($"\n {icon} {result.ClaimedInstitution}");
|
|
Console.WriteLine($" Qualification: {result.ClaimedQualification}");
|
|
if (!string.IsNullOrEmpty(result.ClaimedSubject))
|
|
Console.WriteLine($" Subject: {result.ClaimedSubject}");
|
|
|
|
if (result.IsVerified)
|
|
{
|
|
verifiedEducation++;
|
|
if (result.MatchedInstitution != null &&
|
|
!result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Console.WriteLine($" Match: {result.MatchedInstitution}");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
|
Console.WriteLine($" Status: {result.Status}");
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
errorCount++;
|
|
Console.WriteLine($"ERROR processing file: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
// Print Summary
|
|
Console.WriteLine($"\n\n{new string('=', 80)}");
|
|
Console.WriteLine("VERIFICATION SUMMARY");
|
|
Console.WriteLine(new string('=', 80));
|
|
|
|
Console.WriteLine($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
|
|
if (errorCount > 0)
|
|
Console.WriteLine($"Errors: {errorCount}");
|
|
|
|
var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0;
|
|
var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0;
|
|
|
|
Console.WriteLine($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
|
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
|
|
|
|
// List unverified employers
|
|
var uniqueUnverifiedEmployers = allUnverifiedEmployers
|
|
.GroupBy(e => e, StringComparer.OrdinalIgnoreCase)
|
|
.OrderByDescending(g => g.Count())
|
|
.ThenBy(g => g.Key)
|
|
.ToList();
|
|
|
|
if (uniqueUnverifiedEmployers.Count > 0)
|
|
{
|
|
Console.WriteLine($"\n{new string('-', 60)}");
|
|
Console.WriteLine($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
|
|
foreach (var group in uniqueUnverifiedEmployers)
|
|
{
|
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
|
Console.WriteLine($" - {group.Key}{count}");
|
|
}
|
|
}
|
|
|
|
// List unverified institutions
|
|
var uniqueUnverifiedInstitutions = allUnverifiedInstitutions
|
|
.GroupBy(i => i, StringComparer.OrdinalIgnoreCase)
|
|
.OrderByDescending(g => g.Count())
|
|
.ThenBy(g => g.Key)
|
|
.ToList();
|
|
|
|
if (uniqueUnverifiedInstitutions.Count > 0)
|
|
{
|
|
Console.WriteLine($"\n{new string('-', 60)}");
|
|
Console.WriteLine($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
|
|
foreach (var group in uniqueUnverifiedInstitutions)
|
|
{
|
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
|
Console.WriteLine($" - {group.Key}{count}");
|
|
}
|
|
}
|
|
|
|
Console.WriteLine($"\n{new string('=', 80)}");
|
|
return 0;
|
|
}
|
|
|
|
static string AskForFolder()
|
|
{
|
|
Console.Write("Enter CV folder path: ");
|
|
return Console.ReadLine() ?? "";
|
|
}
|
|
|
|
static string FormatPeriod(DateOnly? start, DateOnly? end)
|
|
{
|
|
var startStr = start?.ToString("MMM yyyy") ?? "?";
|
|
var endStr = end?.ToString("MMM yyyy") ?? "Present";
|
|
return $"{startStr} - {endStr}";
|
|
}
|
|
|
|
static void ConfigureServices(IServiceCollection services)
|
|
{
|
|
// Load configuration - try multiple locations
|
|
var configPaths = new[]
|
|
{
|
|
"/var/www/realcv",
|
|
"/git/RealCV/src/RealCV.Web",
|
|
Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web"))
|
|
};
|
|
|
|
var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web";
|
|
Console.WriteLine($"Loading config from: {webProjectPath}");
|
|
|
|
var configuration = new ConfigurationBuilder()
|
|
.SetBasePath(webProjectPath)
|
|
.AddJsonFile("appsettings.json", optional: true)
|
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
|
.AddJsonFile("appsettings.Production.json", optional: true)
|
|
.Build();
|
|
|
|
// Logging - minimal output
|
|
services.AddLogging(builder =>
|
|
{
|
|
builder.AddConsole();
|
|
builder.SetMinimumLevel(LogLevel.Warning);
|
|
});
|
|
|
|
// Database
|
|
var connectionString = configuration.GetConnectionString("DefaultConnection")
|
|
?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
|
|
|
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
|
options.UseSqlServer(connectionString));
|
|
|
|
// Companies House - use configuration binding
|
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection(CompaniesHouseSettings.SectionName));
|
|
services.AddHttpClient<CompaniesHouseClient>();
|
|
|
|
// Anthropic - use configuration binding
|
|
services.Configure<AnthropicSettings>(configuration.GetSection(AnthropicSettings.SectionName));
|
|
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
|
|
|
// Services
|
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
|
services.AddScoped<ICVParserService, CVParserService>();
|
|
}
|
|
}
|