diff --git a/src/RealCV.Application/Data/UKInstitutions.cs b/src/RealCV.Application/Data/UKInstitutions.cs
index 018e8f3..f1ceb00 100644
--- a/src/RealCV.Application/Data/UKInstitutions.cs
+++ b/src/RealCV.Application/Data/UKInstitutions.cs
@@ -43,6 +43,8 @@ public static class UKInstitutions
// Other Major Universities
"Aston University",
+ "Leeds Beckett University",
+ "Leeds Metropolitan University", // Former name of Leeds Beckett
"University of Bath",
"Birkbeck, University of London",
"Bournemouth University",
@@ -218,6 +220,9 @@ public static class UKInstitutions
["Queen Mary"] = "Queen Mary University of London",
["Royal Holloway University"] = "Royal Holloway, University of London",
["RHUL"] = "Royal Holloway, University of London",
+ ["Leeds Beckett"] = "Leeds Beckett University",
+ ["Leeds Met"] = "Leeds Beckett University",
+ ["Leeds Metropolitan"] = "Leeds Beckett University",
};
///
diff --git a/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs b/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs
index 5206b4c..19055e6 100644
--- a/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs
+++ b/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs
@@ -72,6 +72,152 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
"manufacturing", "operations", "trading"
};
+ // Mapping of common trading names to their official Companies House registered names.
+ // Many major UK companies trade under a different name than their registered name.
+ private static readonly Dictionary TradingNameAliases = new(StringComparer.OrdinalIgnoreCase)
+ {
+ // Retail
+ ["Boots"] = new[] { "BOOTS UK LIMITED", "THE BOOTS COMPANY PLC", "BOOTS OPTICIANS" },
+ ["Sainsbury's"] = new[] { "J SAINSBURY PLC", "SAINSBURY'S SUPERMARKETS LTD" },
+ ["Marks & Spencer"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" },
+ ["M&S"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" },
+ ["John Lewis"] = new[] { "JOHN LEWIS PLC", "JOHN LEWIS PARTNERSHIP PLC" },
+ ["John Lewis Partnership"] = new[] { "JOHN LEWIS PARTNERSHIP PLC", "JOHN LEWIS PLC" },
+ ["Waitrose"] = new[] { "WAITROSE LIMITED", "JOHN LEWIS PARTNERSHIP PLC" },
+ ["Tesco"] = new[] { "TESCO PLC", "TESCO STORES LIMITED" },
+ ["Asda"] = new[] { "ASDA STORES LIMITED", "ASDA GROUP LIMITED" },
+ ["Morrisons"] = new[] { "WM MORRISON SUPERMARKETS LIMITED" },
+ ["Lidl"] = new[] { "LIDL GREAT BRITAIN LIMITED" },
+ ["Aldi"] = new[] { "ALDI STORES LIMITED" },
+
+ // Banking & Finance
+ ["Lloyds Banking Group"] = new[] { "LLOYDS BANKING GROUP PLC", "LLOYDS BANK PLC" },
+ ["Lloyds Bank"] = new[] { "LLOYDS BANK PLC", "LLOYDS BANKING GROUP PLC" },
+ ["HSBC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC", "HSBC BANK PLC" },
+ ["HSBC Holdings PLC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC" },
+ ["HSBC UK"] = new[] { "HSBC UK BANK PLC", "HSBC HOLDINGS PLC" },
+ ["Barclays"] = new[] { "BARCLAYS PLC", "BARCLAYS BANK PLC" },
+ ["NatWest"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" },
+ ["NatWest Group"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" },
+ ["Santander UK"] = new[] { "SANTANDER UK PLC" },
+ ["Nationwide"] = new[] { "NATIONWIDE BUILDING SOCIETY" },
+
+ // Media & Broadcasting
+ ["BBC"] = new[] { "BRITISH BROADCASTING CORPORATION" },
+ ["ITV"] = new[] { "ITV PLC" },
+ ["Sky"] = new[] { "SKY LIMITED", "SKY UK LIMITED" },
+ ["Channel 4"] = new[] { "CHANNEL FOUR TELEVISION CORPORATION" },
+
+ // Technology
+ ["IBM UK"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" },
+ ["IBM"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" },
+ ["Google UK"] = new[] { "GOOGLE UK LIMITED", "GOOGLE LLC" },
+ ["Google"] = new[] { "GOOGLE UK LIMITED" },
+ ["Microsoft UK"] = new[] { "MICROSOFT LIMITED" },
+ ["Amazon UK"] = new[] { "AMAZON UK SERVICES LTD", "AMAZON.CO.UK LTD" },
+ ["Apple UK"] = new[] { "APPLE (UK) LIMITED", "APPLE RETAIL UK LIMITED" },
+
+ // Consulting & Professional Services
+ ["Accenture UK"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" },
+ ["Accenture"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" },
+ ["EY UK"] = new[] { "ERNST & YOUNG LLP", "EY LLP" },
+ ["EY"] = new[] { "ERNST & YOUNG LLP", "EY LLP" },
+ ["Ernst & Young"] = new[] { "ERNST & YOUNG LLP" },
+ ["Deloitte UK"] = new[] { "DELOITTE LLP" },
+ ["Deloitte"] = new[] { "DELOITTE LLP" },
+ ["KPMG UK"] = new[] { "KPMG LLP" },
+ ["KPMG"] = new[] { "KPMG LLP" },
+ ["PwC UK"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" },
+ ["PwC"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" },
+ ["McKinsey"] = new[] { "MCKINSEY & COMPANY, INC. UNITED KINGDOM" },
+ ["BCG"] = new[] { "THE BOSTON CONSULTING GROUP UK LLP" },
+ ["Bain"] = new[] { "BAIN & COMPANY UK LIMITED" },
+
+ // Advertising & Media
+ ["WPP"] = new[] { "WPP PLC" },
+
+ // Fintech
+ ["Checkout.com"] = new[] { "CHECKOUT.COM LIMITED", "CHECKOUT LTD" },
+ ["Revolut"] = new[] { "REVOLUT LTD", "REVOLUT LIMITED" },
+ ["Monzo"] = new[] { "MONZO BANK LIMITED" },
+ ["Starling Bank"] = new[] { "STARLING BANK LIMITED" },
+
+ // Travel & Hospitality
+ ["Thomas Cook"] = new[] { "THOMAS COOK GROUP PLC", "THOMAS COOK UK LIMITED" },
+ ["TUI"] = new[] { "TUI UK LIMITED" },
+ ["British Airways"] = new[] { "BRITISH AIRWAYS PLC" },
+ ["EasyJet"] = new[] { "EASYJET PLC", "EASYJET AIRLINE COMPANY LIMITED" },
+ ["Ryanair"] = new[] { "RYANAIR UK LIMITED" },
+ ["Jamie's Italian"] = new[] { "JAMIE'S ITALIAN LIMITED", "JAMIE OLIVER HOLDINGS LIMITED" },
+
+ // Retail (Other)
+ ["Toys R Us"] = new[] { "TOYS R US LIMITED", "TOYS \"R\" US LIMITED" },
+ ["Toys R Us UK"] = new[] { "TOYS R US LIMITED" },
+ ["Debenhams"] = new[] { "DEBENHAMS PLC", "DEBENHAMS RETAIL LIMITED" },
+ ["House of Fraser"] = new[] { "HOUSE OF FRASER LIMITED" },
+ ["Next"] = new[] { "NEXT PLC", "NEXT RETAIL LIMITED" },
+ ["Primark"] = new[] { "PRIMARK STORES LIMITED" },
+ ["Sports Direct"] = new[] { "SPORTS DIRECT INTERNATIONAL PLC" },
+
+ // Telecoms
+ ["BT"] = new[] { "BT GROUP PLC", "BRITISH TELECOMMUNICATIONS PLC" },
+ ["BT Group"] = new[] { "BT GROUP PLC" },
+ ["Vodafone"] = new[] { "VODAFONE LIMITED", "VODAFONE GROUP PLC" },
+ ["O2"] = new[] { "TELEFONICA UK LIMITED" },
+ ["EE"] = new[] { "EE LIMITED" },
+ ["Three"] = new[] { "HUTCHISON 3G UK LIMITED" },
+ ["Virgin Media"] = new[] { "VIRGIN MEDIA LIMITED" },
+
+ // Energy
+ ["BP"] = new[] { "BP P.L.C.", "BP PLC" },
+ ["Shell UK"] = new[] { "SHELL U.K. LIMITED", "SHELL PLC" },
+ ["Shell"] = new[] { "SHELL PLC", "SHELL U.K. LIMITED" },
+ ["British Gas"] = new[] { "BRITISH GAS SERVICES LIMITED", "CENTRICA PLC" },
+ ["Centrica"] = new[] { "CENTRICA PLC" },
+ ["SSE"] = new[] { "SSE PLC" },
+ ["National Grid"] = new[] { "NATIONAL GRID PLC" },
+
+ // Automotive
+ ["Jaguar Land Rover"] = new[] { "JAGUAR LAND ROVER LIMITED" },
+ ["JLR"] = new[] { "JAGUAR LAND ROVER LIMITED" },
+ ["Rolls-Royce"] = new[] { "ROLLS-ROYCE PLC", "ROLLS-ROYCE HOLDINGS PLC" },
+ ["BMW UK"] = new[] { "BMW (UK) LIMITED", "BMW GROUP UK LIMITED" },
+
+ // Food & Beverage
+ ["Unilever"] = new[] { "UNILEVER PLC" },
+ ["Nestle UK"] = new[] { "NESTLE UK LTD" },
+ ["Coca-Cola UK"] = new[] { "COCA-COLA EUROPACIFIC PARTNERS PLC" },
+ ["PepsiCo UK"] = new[] { "PEPSICO UK LIMITED" },
+
+ // Pharmaceutical & Healthcare
+ ["GlaxoSmithKline"] = new[] { "GLAXOSMITHKLINE PLC", "GSK PLC" },
+ ["GSK"] = new[] { "GSK PLC", "GLAXOSMITHKLINE PLC" },
+ ["AstraZeneca"] = new[] { "ASTRAZENECA PLC" },
+ ["Pfizer UK"] = new[] { "PFIZER LIMITED" },
+
+ // Defence & Aerospace
+ ["BAE Systems"] = new[] { "BAE SYSTEMS PLC" },
+ ["BAE"] = new[] { "BAE SYSTEMS PLC" },
+
+ // Insurance
+ ["Aviva"] = new[] { "AVIVA PLC" },
+ ["Legal & General"] = new[] { "LEGAL & GENERAL GROUP PLC", "LEGAL AND GENERAL" },
+ ["Prudential"] = new[] { "PRUDENTIAL PLC", "PRUDENTIAL PUBLIC LIMITED COMPANY" },
+ ["Admiral"] = new[] { "ADMIRAL GROUP PLC" },
+
+ // Construction & Engineering
+ ["Balfour Beatty"] = new[] { "BALFOUR BEATTY PLC" },
+ ["Carillion"] = new[] { "CARILLION PLC" },
+ ["Kier"] = new[] { "KIER GROUP PLC" },
+ ["Taylor Wimpey"] = new[] { "TAYLOR WIMPEY PLC" },
+ ["Persimmon"] = new[] { "PERSIMMON PLC" },
+
+ // Outsourcing & Services
+ ["Serco"] = new[] { "SERCO GROUP PLC" },
+ ["Capita"] = new[] { "CAPITA PLC" },
+ ["G4S"] = new[] { "G4S PLC", "G4S LIMITED" },
+ };
+
public CompanyVerifierService(
CompaniesHouseClient companiesHouseClient,
@@ -964,12 +1110,37 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
/// Generates alternative search queries to find companies that may be registered
/// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd").
/// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent.
+ /// Uses TradingNameAliases to map common trading names to registered names.
///
private static List GenerateSearchQueries(string companyName)
{
var queries = new HashSet(StringComparer.OrdinalIgnoreCase);
var normalized = companyName.Trim();
+ // Step 0: Check if this is a known trading name and add alias queries FIRST (highest priority)
+ if (TradingNameAliases.TryGetValue(normalized, out var aliases))
+ {
+ foreach (var alias in aliases)
+ {
+ queries.Add(alias);
+ }
+ }
+
+ // Also check partial matches for trading names (e.g., "Boots UK" should match "Boots")
+ foreach (var (tradingName, aliasNames) in TradingNameAliases)
+ {
+ // Check if the company name starts with or contains the trading name
+ if (normalized.StartsWith(tradingName, StringComparison.OrdinalIgnoreCase) ||
+ normalized.EndsWith(tradingName, StringComparison.OrdinalIgnoreCase))
+ {
+ foreach (var alias in aliasNames)
+ {
+ queries.Add(alias);
+ }
+ break; // Only use first matching alias set
+ }
+ }
+
// Step 0a: Check for "Brand (Parent Company)" format and extract parent company
// Parent company is more likely to be the registered name, so search it first
var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$");
diff --git a/tools/CVBatchTester/CVBatchTester.csproj b/tools/CVBatchTester/CVBatchTester.csproj
new file mode 100644
index 0000000..76d44ee
--- /dev/null
+++ b/tools/CVBatchTester/CVBatchTester.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+
+
+
+
+
+
+
+
diff --git a/tools/CVBatchTester/Program.cs b/tools/CVBatchTester/Program.cs
new file mode 100644
index 0000000..dc6c4fa
--- /dev/null
+++ b/tools/CVBatchTester/Program.cs
@@ -0,0 +1,445 @@
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Microsoft.EntityFrameworkCore;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using RealCV.Application.Interfaces;
+using RealCV.Application.Models;
+using RealCV.Infrastructure.Configuration;
+using RealCV.Infrastructure.Data;
+using RealCV.Infrastructure.ExternalApis;
+using RealCV.Infrastructure.Services;
+
+namespace CVBatchTester;
+
+// DTOs for test JSON format (snake_case with nested personal object)
+record TestCVData
+{
+ public string? CvId { get; init; }
+ public string? Category { get; init; }
+ public List? ExpectedFlags { get; init; }
+ public TestPersonalData? Personal { get; init; }
+ public string? Profile { get; init; }
+ public List? Employment { get; init; }
+ public List? Education { get; init; }
+ public List? Skills { get; init; }
+}
+
+record TestPersonalData
+{
+ public string? Name { get; init; }
+ public string? Email { get; init; }
+ public string? Phone { get; init; }
+ public string? Address { get; init; }
+ public string? LinkedIn { get; init; }
+}
+
+record TestEmploymentEntry
+{
+ public string? Company { get; init; }
+ public string? JobTitle { get; init; }
+ public string? StartDate { get; init; }
+ public string? EndDate { get; init; }
+ public string? Location { get; init; }
+ public string? Description { get; init; }
+ public List? Achievements { get; init; }
+}
+
+record TestEducationEntry
+{
+ public string? Institution { get; init; }
+ public string? Qualification { get; init; }
+ public string? Subject { get; init; }
+ public string? Classification { get; init; }
+ public string? StartDate { get; init; }
+ public string? EndDate { get; init; }
+}
+
+class Program
+{
+ private static StreamWriter? _logWriter;
+
+ private static readonly JsonSerializerOptions JsonOptions = new()
+ {
+ PropertyNameCaseInsensitive = true,
+ PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
+ Converters = { new JsonStringEnumConverter() }
+ };
+
+ static async Task Main(string[] args)
+ {
+ var folderPath = args.FirstOrDefault() ?? AskForFolder();
+
+ if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath))
+ {
+ Log($"Error: Folder not found: {folderPath}");
+ Log("Usage: CVBatchTester [--output ]");
+ Log(" e.g. CVBatchTester /home/user/cvs");
+ Log(" e.g. CVBatchTester /home/user/cvs --output /tmp/results.log");
+ return 1;
+ }
+
+ // Check for --output flag
+ var outputIndex = Array.IndexOf(args, "--output");
+ var logPath = outputIndex >= 0 && outputIndex < args.Length - 1
+ ? args[outputIndex + 1]
+ : Path.Combine(folderPath, $"batch-results-{DateTime.Now:yyyyMMdd-HHmmss}.log");
+
+ _logWriter = new StreamWriter(logPath, false) { AutoFlush = true };
+
+ Log($"CV Batch Verification Tester");
+ Log($"Processing CVs from: {folderPath}");
+ Log($"Output log: {logPath}");
+ Log($"Started: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
+ Log(new string('=', 80));
+
+ // Setup DI
+ var services = new ServiceCollection();
+ ConfigureServices(services);
+ var provider = services.BuildServiceProvider();
+
+ // Find CV files
+ var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
+ .Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
+ f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
+ f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase) ||
+ f.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
+ .OrderBy(f => f)
+ .ToList();
+
+ Log($"Found {cvFiles.Count} CV files\n");
+
+ if (cvFiles.Count == 0)
+ {
+ Log("No CV files found (.pdf, .docx, .doc, .json)");
+ return 1;
+ }
+
+ // Track results
+ var allUnverifiedEmployers = new List();
+ var allUnverifiedInstitutions = new List();
+ var totalEmployers = 0;
+ var verifiedEmployers = 0;
+ var totalEducation = 0;
+ var verifiedEducation = 0;
+ var processedCount = 0;
+ var errorCount = 0;
+
+ foreach (var cvFile in cvFiles)
+ {
+ Log($"\n{new string('=', 80)}");
+ Log($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
+ Log(new string('=', 80));
+
+ try
+ {
+ using var scope = provider.CreateScope();
+ var parser = scope.ServiceProvider.GetRequiredService();
+ var companyVerifier = scope.ServiceProvider.GetRequiredService();
+ var eduVerifier = scope.ServiceProvider.GetRequiredService();
+
+ // Parse CV - handle JSON files differently
+ CVData cv;
+ if (cvFile.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
+ {
+ var jsonContent = await File.ReadAllTextAsync(cvFile);
+ var testCv = JsonSerializer.Deserialize(jsonContent, JsonOptions)
+ ?? throw new InvalidOperationException("Failed to deserialize JSON CV");
+
+ // Convert TestCVData to CVData
+ cv = ConvertTestCVData(testCv);
+ Log($"Loaded JSON CV: {cv.FullName}");
+ }
+ else
+ {
+ await using var stream = File.OpenRead(cvFile);
+ cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
+ Log($"Parsed CV: {cv.FullName}");
+ }
+
+ // Verify Employers
+ if (cv.Employment?.Count > 0)
+ {
+ Log($"\nEMPLOYERS ({cv.Employment.Count}):");
+ Log(new string('-', 60));
+
+ foreach (var emp in cv.Employment)
+ {
+ totalEmployers++;
+ try
+ {
+ var result = await companyVerifier.VerifyCompanyAsync(
+ emp.CompanyName,
+ emp.StartDate,
+ emp.EndDate,
+ emp.JobTitle);
+
+ var icon = result.IsVerified ? "✓" : "✗";
+ var period = FormatPeriod(emp.StartDate, emp.EndDate);
+
+ Log($"\n {icon} {emp.CompanyName}");
+ Log($" Period: {period}");
+ Log($" Role: {emp.JobTitle}");
+
+ if (result.IsVerified)
+ {
+ verifiedEmployers++;
+ Log($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
+ if (!string.IsNullOrEmpty(result.MatchedCompanyNumber))
+ Log($" Company #: {result.MatchedCompanyNumber}");
+ if (!string.IsNullOrEmpty(result.CompanyStatus))
+ Log($" Status: {result.CompanyStatus}");
+ }
+ else
+ {
+ allUnverifiedEmployers.Add(emp.CompanyName);
+ }
+
+ if (!string.IsNullOrEmpty(result.VerificationNotes))
+ Log($" Note: {result.VerificationNotes}");
+ }
+ catch (Exception ex)
+ {
+ Log($"\n ✗ {emp.CompanyName}");
+ Log($" ERROR: {ex.Message}");
+ allUnverifiedEmployers.Add(emp.CompanyName);
+ }
+ }
+ }
+
+ // Verify Education
+ if (cv.Education?.Count > 0)
+ {
+ Log($"\nEDUCATION ({cv.Education.Count}):");
+ Log(new string('-', 60));
+
+ var eduEntries = cv.Education.Select(e => new EducationEntry
+ {
+ Institution = e.Institution,
+ Qualification = e.Qualification,
+ Subject = e.Subject,
+ StartDate = e.StartDate,
+ EndDate = e.EndDate
+ }).ToList();
+
+ var eduResults = eduVerifier.VerifyAll(eduEntries);
+
+ foreach (var result in eduResults)
+ {
+ totalEducation++;
+ var icon = result.IsVerified ? "✓" : "✗";
+
+ Log($"\n {icon} {result.ClaimedInstitution}");
+ Log($" Qualification: {result.ClaimedQualification}");
+ if (!string.IsNullOrEmpty(result.ClaimedSubject))
+ Log($" Subject: {result.ClaimedSubject}");
+
+ if (result.IsVerified)
+ {
+ verifiedEducation++;
+ if (result.MatchedInstitution != null &&
+ !result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase))
+ {
+ Log($" Match: {result.MatchedInstitution}");
+ }
+ }
+ else
+ {
+ allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
+ Log($" Status: {result.Status}");
+ }
+
+ if (!string.IsNullOrEmpty(result.VerificationNotes))
+ Log($" Note: {result.VerificationNotes}");
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ errorCount++;
+ Log($"ERROR processing file: {ex.Message}");
+ }
+ }
+
+ // Print Summary
+ Log($"\n\n{new string('=', 80)}");
+ Log("VERIFICATION SUMMARY");
+ Log(new string('=', 80));
+
+ Log($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
+ if (errorCount > 0)
+ Log($"Errors: {errorCount}");
+
+ var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0;
+ var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0;
+
+ Log($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
+ Log($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
+
+ // List unverified employers
+ var uniqueUnverifiedEmployers = allUnverifiedEmployers
+ .GroupBy(e => e, StringComparer.OrdinalIgnoreCase)
+ .OrderByDescending(g => g.Count())
+ .ThenBy(g => g.Key)
+ .ToList();
+
+ if (uniqueUnverifiedEmployers.Count > 0)
+ {
+ Log($"\n{new string('-', 60)}");
+ Log($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
+ foreach (var group in uniqueUnverifiedEmployers)
+ {
+ var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
+ Log($" - {group.Key}{count}");
+ }
+ }
+
+ // List unverified institutions
+ var uniqueUnverifiedInstitutions = allUnverifiedInstitutions
+ .GroupBy(i => i, StringComparer.OrdinalIgnoreCase)
+ .OrderByDescending(g => g.Count())
+ .ThenBy(g => g.Key)
+ .ToList();
+
+ if (uniqueUnverifiedInstitutions.Count > 0)
+ {
+ Log($"\n{new string('-', 60)}");
+ Log($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
+ foreach (var group in uniqueUnverifiedInstitutions)
+ {
+ var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
+ Log($" - {group.Key}{count}");
+ }
+ }
+
+ Log($"\nCompleted: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
+ Log($"\n{new string('=', 80)}");
+
+ _logWriter?.Close();
+ Console.WriteLine($"\nResults written to: {logPath}");
+
+ return 0;
+ }
+
+ static void Log(string message)
+ {
+ Console.WriteLine(message);
+ _logWriter?.WriteLine(message);
+ }
+
+ static string AskForFolder()
+ {
+ Console.Write("Enter CV folder path: ");
+ return Console.ReadLine() ?? "";
+ }
+
+ static string FormatPeriod(DateOnly? start, DateOnly? end)
+ {
+ var startStr = start?.ToString("MMM yyyy") ?? "?";
+ var endStr = end?.ToString("MMM yyyy") ?? "Present";
+ return $"{startStr} - {endStr}";
+ }
+
+ static CVData ConvertTestCVData(TestCVData testCv)
+ {
+ return new CVData
+ {
+ FullName = testCv.Personal?.Name ?? "Unknown",
+ Email = testCv.Personal?.Email,
+ Phone = testCv.Personal?.Phone,
+ Employment = testCv.Employment?.Select(e => new EmploymentEntry
+ {
+ CompanyName = e.Company ?? "Unknown",
+ JobTitle = e.JobTitle ?? "Unknown",
+ Location = e.Location,
+ StartDate = ParseDate(e.StartDate),
+ EndDate = ParseDate(e.EndDate),
+ IsCurrent = e.EndDate == null,
+ Description = e.Description
+ }).ToList() ?? [],
+ Education = testCv.Education?.Select(e => new EducationEntry
+ {
+ Institution = e.Institution ?? "Unknown",
+ Qualification = e.Qualification,
+ Subject = e.Subject,
+ StartDate = ParseDate(e.StartDate),
+ EndDate = ParseDate(e.EndDate)
+ }).ToList() ?? [],
+ Skills = testCv.Skills ?? []
+ };
+ }
+
+ static DateOnly? ParseDate(string? dateStr)
+ {
+ if (string.IsNullOrEmpty(dateStr)) return null;
+
+ // Try parsing YYYY-MM format
+ if (dateStr.Length == 7 && dateStr[4] == '-')
+ {
+ if (int.TryParse(dateStr[..4], out var year) && int.TryParse(dateStr[5..], out var month))
+ {
+ return new DateOnly(year, month, 1);
+ }
+ }
+
+ // Try standard parsing
+ if (DateOnly.TryParse(dateStr, out var date))
+ {
+ return date;
+ }
+
+ return null;
+ }
+
+ static void ConfigureServices(IServiceCollection services)
+ {
+ // Load configuration - try multiple locations
+ var configPaths = new[]
+ {
+ "/var/www/realcv",
+ "/git/RealCV/src/RealCV.Web",
+ Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web"))
+ };
+
+ var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web";
+ Log($"Loading config from: {webProjectPath}");
+
+ var configuration = new ConfigurationBuilder()
+ .SetBasePath(webProjectPath)
+ .AddJsonFile("appsettings.json", optional: true)
+ .AddJsonFile("appsettings.Development.json", optional: true)
+ .AddJsonFile("appsettings.Production.json", optional: true)
+ .Build();
+
+ // Logging - show info level for verification details
+ services.AddLogging(builder =>
+ {
+ builder.AddConsole();
+ builder.SetMinimumLevel(LogLevel.Information);
+ // Filter out noisy libraries
+ builder.AddFilter("Microsoft", LogLevel.Warning);
+ builder.AddFilter("System", LogLevel.Warning);
+ });
+
+ // Database
+ var connectionString = configuration.GetConnectionString("DefaultConnection")
+ ?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
+
+ services.AddDbContextFactory(options =>
+ options.UseSqlServer(connectionString));
+
+ // Companies House - use configuration binding
+ services.Configure(configuration.GetSection(CompaniesHouseSettings.SectionName));
+ services.AddHttpClient();
+
+ // Anthropic - use configuration binding
+ services.Configure(configuration.GetSection(AnthropicSettings.SectionName));
+ services.AddScoped();
+
+ // Services
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ }
+}