diff --git a/src/RealCV.Application/Data/UKInstitutions.cs b/src/RealCV.Application/Data/UKInstitutions.cs index 018e8f3..f1ceb00 100644 --- a/src/RealCV.Application/Data/UKInstitutions.cs +++ b/src/RealCV.Application/Data/UKInstitutions.cs @@ -43,6 +43,8 @@ public static class UKInstitutions // Other Major Universities "Aston University", + "Leeds Beckett University", + "Leeds Metropolitan University", // Former name of Leeds Beckett "University of Bath", "Birkbeck, University of London", "Bournemouth University", @@ -218,6 +220,9 @@ public static class UKInstitutions ["Queen Mary"] = "Queen Mary University of London", ["Royal Holloway University"] = "Royal Holloway, University of London", ["RHUL"] = "Royal Holloway, University of London", + ["Leeds Beckett"] = "Leeds Beckett University", + ["Leeds Met"] = "Leeds Beckett University", + ["Leeds Metropolitan"] = "Leeds Beckett University", }; /// diff --git a/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs b/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs index 5206b4c..19055e6 100644 --- a/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs +++ b/src/RealCV.Infrastructure/Services/CompanyVerifierService.cs @@ -72,6 +72,152 @@ public sealed class CompanyVerifierService : ICompanyVerifierService "manufacturing", "operations", "trading" }; + // Mapping of common trading names to their official Companies House registered names. + // Many major UK companies trade under a different name than their registered name. + private static readonly Dictionary TradingNameAliases = new(StringComparer.OrdinalIgnoreCase) + { + // Retail + ["Boots"] = new[] { "BOOTS UK LIMITED", "THE BOOTS COMPANY PLC", "BOOTS OPTICIANS" }, + ["Sainsbury's"] = new[] { "J SAINSBURY PLC", "SAINSBURY'S SUPERMARKETS LTD" }, + ["Marks & Spencer"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" }, + ["M&S"] = new[] { "MARKS AND SPENCER GROUP PLC", "MARKS AND SPENCER PLC" }, + ["John Lewis"] = new[] { "JOHN LEWIS PLC", "JOHN LEWIS PARTNERSHIP PLC" }, + ["John Lewis Partnership"] = new[] { "JOHN LEWIS PARTNERSHIP PLC", "JOHN LEWIS PLC" }, + ["Waitrose"] = new[] { "WAITROSE LIMITED", "JOHN LEWIS PARTNERSHIP PLC" }, + ["Tesco"] = new[] { "TESCO PLC", "TESCO STORES LIMITED" }, + ["Asda"] = new[] { "ASDA STORES LIMITED", "ASDA GROUP LIMITED" }, + ["Morrisons"] = new[] { "WM MORRISON SUPERMARKETS LIMITED" }, + ["Lidl"] = new[] { "LIDL GREAT BRITAIN LIMITED" }, + ["Aldi"] = new[] { "ALDI STORES LIMITED" }, + + // Banking & Finance + ["Lloyds Banking Group"] = new[] { "LLOYDS BANKING GROUP PLC", "LLOYDS BANK PLC" }, + ["Lloyds Bank"] = new[] { "LLOYDS BANK PLC", "LLOYDS BANKING GROUP PLC" }, + ["HSBC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC", "HSBC BANK PLC" }, + ["HSBC Holdings PLC"] = new[] { "HSBC HOLDINGS PLC", "HSBC UK BANK PLC" }, + ["HSBC UK"] = new[] { "HSBC UK BANK PLC", "HSBC HOLDINGS PLC" }, + ["Barclays"] = new[] { "BARCLAYS PLC", "BARCLAYS BANK PLC" }, + ["NatWest"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" }, + ["NatWest Group"] = new[] { "NATWEST GROUP PLC", "NATIONAL WESTMINSTER BANK PLC" }, + ["Santander UK"] = new[] { "SANTANDER UK PLC" }, + ["Nationwide"] = new[] { "NATIONWIDE BUILDING SOCIETY" }, + + // Media & Broadcasting + ["BBC"] = new[] { "BRITISH BROADCASTING CORPORATION" }, + ["ITV"] = new[] { "ITV PLC" }, + ["Sky"] = new[] { "SKY LIMITED", "SKY UK LIMITED" }, + ["Channel 4"] = new[] { "CHANNEL FOUR TELEVISION CORPORATION" }, + + // Technology + ["IBM UK"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" }, + ["IBM"] = new[] { "IBM UNITED KINGDOM LIMITED", "INTERNATIONAL BUSINESS MACHINES" }, + ["Google UK"] = new[] { "GOOGLE UK LIMITED", "GOOGLE LLC" }, + ["Google"] = new[] { "GOOGLE UK LIMITED" }, + ["Microsoft UK"] = new[] { "MICROSOFT LIMITED" }, + ["Amazon UK"] = new[] { "AMAZON UK SERVICES LTD", "AMAZON.CO.UK LTD" }, + ["Apple UK"] = new[] { "APPLE (UK) LIMITED", "APPLE RETAIL UK LIMITED" }, + + // Consulting & Professional Services + ["Accenture UK"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" }, + ["Accenture"] = new[] { "ACCENTURE (UK) LIMITED", "ACCENTURE PLC" }, + ["EY UK"] = new[] { "ERNST & YOUNG LLP", "EY LLP" }, + ["EY"] = new[] { "ERNST & YOUNG LLP", "EY LLP" }, + ["Ernst & Young"] = new[] { "ERNST & YOUNG LLP" }, + ["Deloitte UK"] = new[] { "DELOITTE LLP" }, + ["Deloitte"] = new[] { "DELOITTE LLP" }, + ["KPMG UK"] = new[] { "KPMG LLP" }, + ["KPMG"] = new[] { "KPMG LLP" }, + ["PwC UK"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" }, + ["PwC"] = new[] { "PRICEWATERHOUSECOOPERS LLP", "PWC (UK) LIMITED" }, + ["McKinsey"] = new[] { "MCKINSEY & COMPANY, INC. UNITED KINGDOM" }, + ["BCG"] = new[] { "THE BOSTON CONSULTING GROUP UK LLP" }, + ["Bain"] = new[] { "BAIN & COMPANY UK LIMITED" }, + + // Advertising & Media + ["WPP"] = new[] { "WPP PLC" }, + + // Fintech + ["Checkout.com"] = new[] { "CHECKOUT.COM LIMITED", "CHECKOUT LTD" }, + ["Revolut"] = new[] { "REVOLUT LTD", "REVOLUT LIMITED" }, + ["Monzo"] = new[] { "MONZO BANK LIMITED" }, + ["Starling Bank"] = new[] { "STARLING BANK LIMITED" }, + + // Travel & Hospitality + ["Thomas Cook"] = new[] { "THOMAS COOK GROUP PLC", "THOMAS COOK UK LIMITED" }, + ["TUI"] = new[] { "TUI UK LIMITED" }, + ["British Airways"] = new[] { "BRITISH AIRWAYS PLC" }, + ["EasyJet"] = new[] { "EASYJET PLC", "EASYJET AIRLINE COMPANY LIMITED" }, + ["Ryanair"] = new[] { "RYANAIR UK LIMITED" }, + ["Jamie's Italian"] = new[] { "JAMIE'S ITALIAN LIMITED", "JAMIE OLIVER HOLDINGS LIMITED" }, + + // Retail (Other) + ["Toys R Us"] = new[] { "TOYS R US LIMITED", "TOYS \"R\" US LIMITED" }, + ["Toys R Us UK"] = new[] { "TOYS R US LIMITED" }, + ["Debenhams"] = new[] { "DEBENHAMS PLC", "DEBENHAMS RETAIL LIMITED" }, + ["House of Fraser"] = new[] { "HOUSE OF FRASER LIMITED" }, + ["Next"] = new[] { "NEXT PLC", "NEXT RETAIL LIMITED" }, + ["Primark"] = new[] { "PRIMARK STORES LIMITED" }, + ["Sports Direct"] = new[] { "SPORTS DIRECT INTERNATIONAL PLC" }, + + // Telecoms + ["BT"] = new[] { "BT GROUP PLC", "BRITISH TELECOMMUNICATIONS PLC" }, + ["BT Group"] = new[] { "BT GROUP PLC" }, + ["Vodafone"] = new[] { "VODAFONE LIMITED", "VODAFONE GROUP PLC" }, + ["O2"] = new[] { "TELEFONICA UK LIMITED" }, + ["EE"] = new[] { "EE LIMITED" }, + ["Three"] = new[] { "HUTCHISON 3G UK LIMITED" }, + ["Virgin Media"] = new[] { "VIRGIN MEDIA LIMITED" }, + + // Energy + ["BP"] = new[] { "BP P.L.C.", "BP PLC" }, + ["Shell UK"] = new[] { "SHELL U.K. LIMITED", "SHELL PLC" }, + ["Shell"] = new[] { "SHELL PLC", "SHELL U.K. LIMITED" }, + ["British Gas"] = new[] { "BRITISH GAS SERVICES LIMITED", "CENTRICA PLC" }, + ["Centrica"] = new[] { "CENTRICA PLC" }, + ["SSE"] = new[] { "SSE PLC" }, + ["National Grid"] = new[] { "NATIONAL GRID PLC" }, + + // Automotive + ["Jaguar Land Rover"] = new[] { "JAGUAR LAND ROVER LIMITED" }, + ["JLR"] = new[] { "JAGUAR LAND ROVER LIMITED" }, + ["Rolls-Royce"] = new[] { "ROLLS-ROYCE PLC", "ROLLS-ROYCE HOLDINGS PLC" }, + ["BMW UK"] = new[] { "BMW (UK) LIMITED", "BMW GROUP UK LIMITED" }, + + // Food & Beverage + ["Unilever"] = new[] { "UNILEVER PLC" }, + ["Nestle UK"] = new[] { "NESTLE UK LTD" }, + ["Coca-Cola UK"] = new[] { "COCA-COLA EUROPACIFIC PARTNERS PLC" }, + ["PepsiCo UK"] = new[] { "PEPSICO UK LIMITED" }, + + // Pharmaceutical & Healthcare + ["GlaxoSmithKline"] = new[] { "GLAXOSMITHKLINE PLC", "GSK PLC" }, + ["GSK"] = new[] { "GSK PLC", "GLAXOSMITHKLINE PLC" }, + ["AstraZeneca"] = new[] { "ASTRAZENECA PLC" }, + ["Pfizer UK"] = new[] { "PFIZER LIMITED" }, + + // Defence & Aerospace + ["BAE Systems"] = new[] { "BAE SYSTEMS PLC" }, + ["BAE"] = new[] { "BAE SYSTEMS PLC" }, + + // Insurance + ["Aviva"] = new[] { "AVIVA PLC" }, + ["Legal & General"] = new[] { "LEGAL & GENERAL GROUP PLC", "LEGAL AND GENERAL" }, + ["Prudential"] = new[] { "PRUDENTIAL PLC", "PRUDENTIAL PUBLIC LIMITED COMPANY" }, + ["Admiral"] = new[] { "ADMIRAL GROUP PLC" }, + + // Construction & Engineering + ["Balfour Beatty"] = new[] { "BALFOUR BEATTY PLC" }, + ["Carillion"] = new[] { "CARILLION PLC" }, + ["Kier"] = new[] { "KIER GROUP PLC" }, + ["Taylor Wimpey"] = new[] { "TAYLOR WIMPEY PLC" }, + ["Persimmon"] = new[] { "PERSIMMON PLC" }, + + // Outsourcing & Services + ["Serco"] = new[] { "SERCO GROUP PLC" }, + ["Capita"] = new[] { "CAPITA PLC" }, + ["G4S"] = new[] { "G4S PLC", "G4S LIMITED" }, + }; + public CompanyVerifierService( CompaniesHouseClient companiesHouseClient, @@ -964,12 +1110,37 @@ public sealed class CompanyVerifierService : ICompanyVerifierService /// Generates alternative search queries to find companies that may be registered /// with slightly different names (e.g., "U.K." vs "UK", "Limited" vs "Ltd"). /// Also handles "Brand (Parent Company)" format by extracting and prioritizing the parent. + /// Uses TradingNameAliases to map common trading names to registered names. /// private static List GenerateSearchQueries(string companyName) { var queries = new HashSet(StringComparer.OrdinalIgnoreCase); var normalized = companyName.Trim(); + // Step 0: Check if this is a known trading name and add alias queries FIRST (highest priority) + if (TradingNameAliases.TryGetValue(normalized, out var aliases)) + { + foreach (var alias in aliases) + { + queries.Add(alias); + } + } + + // Also check partial matches for trading names (e.g., "Boots UK" should match "Boots") + foreach (var (tradingName, aliasNames) in TradingNameAliases) + { + // Check if the company name starts with or contains the trading name + if (normalized.StartsWith(tradingName, StringComparison.OrdinalIgnoreCase) || + normalized.EndsWith(tradingName, StringComparison.OrdinalIgnoreCase)) + { + foreach (var alias in aliasNames) + { + queries.Add(alias); + } + break; // Only use first matching alias set + } + } + // Step 0a: Check for "Brand (Parent Company)" format and extract parent company // Parent company is more likely to be the registered name, so search it first var parentMatch = System.Text.RegularExpressions.Regex.Match(normalized, @"\(([^)]+)\)\s*$"); diff --git a/tools/CVBatchTester/CVBatchTester.csproj b/tools/CVBatchTester/CVBatchTester.csproj new file mode 100644 index 0000000..76d44ee --- /dev/null +++ b/tools/CVBatchTester/CVBatchTester.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + diff --git a/tools/CVBatchTester/Program.cs b/tools/CVBatchTester/Program.cs new file mode 100644 index 0000000..dc6c4fa --- /dev/null +++ b/tools/CVBatchTester/Program.cs @@ -0,0 +1,445 @@ +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using RealCV.Application.Interfaces; +using RealCV.Application.Models; +using RealCV.Infrastructure.Configuration; +using RealCV.Infrastructure.Data; +using RealCV.Infrastructure.ExternalApis; +using RealCV.Infrastructure.Services; + +namespace CVBatchTester; + +// DTOs for test JSON format (snake_case with nested personal object) +record TestCVData +{ + public string? CvId { get; init; } + public string? Category { get; init; } + public List? ExpectedFlags { get; init; } + public TestPersonalData? Personal { get; init; } + public string? Profile { get; init; } + public List? Employment { get; init; } + public List? Education { get; init; } + public List? Skills { get; init; } +} + +record TestPersonalData +{ + public string? Name { get; init; } + public string? Email { get; init; } + public string? Phone { get; init; } + public string? Address { get; init; } + public string? LinkedIn { get; init; } +} + +record TestEmploymentEntry +{ + public string? Company { get; init; } + public string? JobTitle { get; init; } + public string? StartDate { get; init; } + public string? EndDate { get; init; } + public string? Location { get; init; } + public string? Description { get; init; } + public List? Achievements { get; init; } +} + +record TestEducationEntry +{ + public string? Institution { get; init; } + public string? Qualification { get; init; } + public string? Subject { get; init; } + public string? Classification { get; init; } + public string? StartDate { get; init; } + public string? EndDate { get; init; } +} + +class Program +{ + private static StreamWriter? _logWriter; + + private static readonly JsonSerializerOptions JsonOptions = new() + { + PropertyNameCaseInsensitive = true, + PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, + Converters = { new JsonStringEnumConverter() } + }; + + static async Task Main(string[] args) + { + var folderPath = args.FirstOrDefault() ?? AskForFolder(); + + if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath)) + { + Log($"Error: Folder not found: {folderPath}"); + Log("Usage: CVBatchTester [--output ]"); + Log(" e.g. CVBatchTester /home/user/cvs"); + Log(" e.g. CVBatchTester /home/user/cvs --output /tmp/results.log"); + return 1; + } + + // Check for --output flag + var outputIndex = Array.IndexOf(args, "--output"); + var logPath = outputIndex >= 0 && outputIndex < args.Length - 1 + ? args[outputIndex + 1] + : Path.Combine(folderPath, $"batch-results-{DateTime.Now:yyyyMMdd-HHmmss}.log"); + + _logWriter = new StreamWriter(logPath, false) { AutoFlush = true }; + + Log($"CV Batch Verification Tester"); + Log($"Processing CVs from: {folderPath}"); + Log($"Output log: {logPath}"); + Log($"Started: {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); + Log(new string('=', 80)); + + // Setup DI + var services = new ServiceCollection(); + ConfigureServices(services); + var provider = services.BuildServiceProvider(); + + // Find CV files + var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly) + .Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) || + f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) || + f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase) || + f.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) + .OrderBy(f => f) + .ToList(); + + Log($"Found {cvFiles.Count} CV files\n"); + + if (cvFiles.Count == 0) + { + Log("No CV files found (.pdf, .docx, .doc, .json)"); + return 1; + } + + // Track results + var allUnverifiedEmployers = new List(); + var allUnverifiedInstitutions = new List(); + var totalEmployers = 0; + var verifiedEmployers = 0; + var totalEducation = 0; + var verifiedEducation = 0; + var processedCount = 0; + var errorCount = 0; + + foreach (var cvFile in cvFiles) + { + Log($"\n{new string('=', 80)}"); + Log($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}"); + Log(new string('=', 80)); + + try + { + using var scope = provider.CreateScope(); + var parser = scope.ServiceProvider.GetRequiredService(); + var companyVerifier = scope.ServiceProvider.GetRequiredService(); + var eduVerifier = scope.ServiceProvider.GetRequiredService(); + + // Parse CV - handle JSON files differently + CVData cv; + if (cvFile.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) + { + var jsonContent = await File.ReadAllTextAsync(cvFile); + var testCv = JsonSerializer.Deserialize(jsonContent, JsonOptions) + ?? throw new InvalidOperationException("Failed to deserialize JSON CV"); + + // Convert TestCVData to CVData + cv = ConvertTestCVData(testCv); + Log($"Loaded JSON CV: {cv.FullName}"); + } + else + { + await using var stream = File.OpenRead(cvFile); + cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile)); + Log($"Parsed CV: {cv.FullName}"); + } + + // Verify Employers + if (cv.Employment?.Count > 0) + { + Log($"\nEMPLOYERS ({cv.Employment.Count}):"); + Log(new string('-', 60)); + + foreach (var emp in cv.Employment) + { + totalEmployers++; + try + { + var result = await companyVerifier.VerifyCompanyAsync( + emp.CompanyName, + emp.StartDate, + emp.EndDate, + emp.JobTitle); + + var icon = result.IsVerified ? "✓" : "✗"; + var period = FormatPeriod(emp.StartDate, emp.EndDate); + + Log($"\n {icon} {emp.CompanyName}"); + Log($" Period: {period}"); + Log($" Role: {emp.JobTitle}"); + + if (result.IsVerified) + { + verifiedEmployers++; + Log($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)"); + if (!string.IsNullOrEmpty(result.MatchedCompanyNumber)) + Log($" Company #: {result.MatchedCompanyNumber}"); + if (!string.IsNullOrEmpty(result.CompanyStatus)) + Log($" Status: {result.CompanyStatus}"); + } + else + { + allUnverifiedEmployers.Add(emp.CompanyName); + } + + if (!string.IsNullOrEmpty(result.VerificationNotes)) + Log($" Note: {result.VerificationNotes}"); + } + catch (Exception ex) + { + Log($"\n ✗ {emp.CompanyName}"); + Log($" ERROR: {ex.Message}"); + allUnverifiedEmployers.Add(emp.CompanyName); + } + } + } + + // Verify Education + if (cv.Education?.Count > 0) + { + Log($"\nEDUCATION ({cv.Education.Count}):"); + Log(new string('-', 60)); + + var eduEntries = cv.Education.Select(e => new EducationEntry + { + Institution = e.Institution, + Qualification = e.Qualification, + Subject = e.Subject, + StartDate = e.StartDate, + EndDate = e.EndDate + }).ToList(); + + var eduResults = eduVerifier.VerifyAll(eduEntries); + + foreach (var result in eduResults) + { + totalEducation++; + var icon = result.IsVerified ? "✓" : "✗"; + + Log($"\n {icon} {result.ClaimedInstitution}"); + Log($" Qualification: {result.ClaimedQualification}"); + if (!string.IsNullOrEmpty(result.ClaimedSubject)) + Log($" Subject: {result.ClaimedSubject}"); + + if (result.IsVerified) + { + verifiedEducation++; + if (result.MatchedInstitution != null && + !result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase)) + { + Log($" Match: {result.MatchedInstitution}"); + } + } + else + { + allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown"); + Log($" Status: {result.Status}"); + } + + if (!string.IsNullOrEmpty(result.VerificationNotes)) + Log($" Note: {result.VerificationNotes}"); + } + } + } + catch (Exception ex) + { + errorCount++; + Log($"ERROR processing file: {ex.Message}"); + } + } + + // Print Summary + Log($"\n\n{new string('=', 80)}"); + Log("VERIFICATION SUMMARY"); + Log(new string('=', 80)); + + Log($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}"); + if (errorCount > 0) + Log($"Errors: {errorCount}"); + + var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0; + var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0; + + Log($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)"); + Log($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)"); + + // List unverified employers + var uniqueUnverifiedEmployers = allUnverifiedEmployers + .GroupBy(e => e, StringComparer.OrdinalIgnoreCase) + .OrderByDescending(g => g.Count()) + .ThenBy(g => g.Key) + .ToList(); + + if (uniqueUnverifiedEmployers.Count > 0) + { + Log($"\n{new string('-', 60)}"); + Log($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):"); + foreach (var group in uniqueUnverifiedEmployers) + { + var count = group.Count() > 1 ? $" (x{group.Count()})" : ""; + Log($" - {group.Key}{count}"); + } + } + + // List unverified institutions + var uniqueUnverifiedInstitutions = allUnverifiedInstitutions + .GroupBy(i => i, StringComparer.OrdinalIgnoreCase) + .OrderByDescending(g => g.Count()) + .ThenBy(g => g.Key) + .ToList(); + + if (uniqueUnverifiedInstitutions.Count > 0) + { + Log($"\n{new string('-', 60)}"); + Log($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):"); + foreach (var group in uniqueUnverifiedInstitutions) + { + var count = group.Count() > 1 ? $" (x{group.Count()})" : ""; + Log($" - {group.Key}{count}"); + } + } + + Log($"\nCompleted: {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); + Log($"\n{new string('=', 80)}"); + + _logWriter?.Close(); + Console.WriteLine($"\nResults written to: {logPath}"); + + return 0; + } + + static void Log(string message) + { + Console.WriteLine(message); + _logWriter?.WriteLine(message); + } + + static string AskForFolder() + { + Console.Write("Enter CV folder path: "); + return Console.ReadLine() ?? ""; + } + + static string FormatPeriod(DateOnly? start, DateOnly? end) + { + var startStr = start?.ToString("MMM yyyy") ?? "?"; + var endStr = end?.ToString("MMM yyyy") ?? "Present"; + return $"{startStr} - {endStr}"; + } + + static CVData ConvertTestCVData(TestCVData testCv) + { + return new CVData + { + FullName = testCv.Personal?.Name ?? "Unknown", + Email = testCv.Personal?.Email, + Phone = testCv.Personal?.Phone, + Employment = testCv.Employment?.Select(e => new EmploymentEntry + { + CompanyName = e.Company ?? "Unknown", + JobTitle = e.JobTitle ?? "Unknown", + Location = e.Location, + StartDate = ParseDate(e.StartDate), + EndDate = ParseDate(e.EndDate), + IsCurrent = e.EndDate == null, + Description = e.Description + }).ToList() ?? [], + Education = testCv.Education?.Select(e => new EducationEntry + { + Institution = e.Institution ?? "Unknown", + Qualification = e.Qualification, + Subject = e.Subject, + StartDate = ParseDate(e.StartDate), + EndDate = ParseDate(e.EndDate) + }).ToList() ?? [], + Skills = testCv.Skills ?? [] + }; + } + + static DateOnly? ParseDate(string? dateStr) + { + if (string.IsNullOrEmpty(dateStr)) return null; + + // Try parsing YYYY-MM format + if (dateStr.Length == 7 && dateStr[4] == '-') + { + if (int.TryParse(dateStr[..4], out var year) && int.TryParse(dateStr[5..], out var month)) + { + return new DateOnly(year, month, 1); + } + } + + // Try standard parsing + if (DateOnly.TryParse(dateStr, out var date)) + { + return date; + } + + return null; + } + + static void ConfigureServices(IServiceCollection services) + { + // Load configuration - try multiple locations + var configPaths = new[] + { + "/var/www/realcv", + "/git/RealCV/src/RealCV.Web", + Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web")) + }; + + var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web"; + Log($"Loading config from: {webProjectPath}"); + + var configuration = new ConfigurationBuilder() + .SetBasePath(webProjectPath) + .AddJsonFile("appsettings.json", optional: true) + .AddJsonFile("appsettings.Development.json", optional: true) + .AddJsonFile("appsettings.Production.json", optional: true) + .Build(); + + // Logging - show info level for verification details + services.AddLogging(builder => + { + builder.AddConsole(); + builder.SetMinimumLevel(LogLevel.Information); + // Filter out noisy libraries + builder.AddFilter("Microsoft", LogLevel.Warning); + builder.AddFilter("System", LogLevel.Warning); + }); + + // Database + var connectionString = configuration.GetConnectionString("DefaultConnection") + ?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True"; + + services.AddDbContextFactory(options => + options.UseSqlServer(connectionString)); + + // Companies House - use configuration binding + services.Configure(configuration.GetSection(CompaniesHouseSettings.SectionName)); + services.AddHttpClient(); + + // Anthropic - use configuration binding + services.Configure(configuration.GetSection(AnthropicSettings.SectionName)); + services.AddScoped(); + + // Services + services.AddScoped(); + services.AddScoped(); + services.AddScoped(); + } +}