using System.Text.Json; using System.Text.Json.Serialization; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using RealCV.Application.Interfaces; using RealCV.Application.Models; using RealCV.Infrastructure.Configuration; using RealCV.Infrastructure.Data; using RealCV.Infrastructure.ExternalApis; using RealCV.Infrastructure.Services; namespace CVBatchTester; // DTOs for test JSON format (snake_case with nested personal object) record TestCVData { public string? CvId { get; init; } public string? Category { get; init; } public List? ExpectedFlags { get; init; } public TestPersonalData? Personal { get; init; } public string? Profile { get; init; } public List? Employment { get; init; } public List? Education { get; init; } public List? Skills { get; init; } } record TestPersonalData { public string? Name { get; init; } public string? Email { get; init; } public string? Phone { get; init; } public string? Address { get; init; } public string? LinkedIn { get; init; } } record TestEmploymentEntry { public string? Company { get; init; } public string? JobTitle { get; init; } public string? StartDate { get; init; } public string? EndDate { get; init; } public string? Location { get; init; } public string? Description { get; init; } public List? Achievements { get; init; } } record TestEducationEntry { public string? Institution { get; init; } public string? Qualification { get; init; } public string? Subject { get; init; } public string? Classification { get; init; } public string? StartDate { get; init; } public string? EndDate { get; init; } } class Program { private static StreamWriter? _logWriter; private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNameCaseInsensitive = true, PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, Converters = { new JsonStringEnumConverter() } }; static async Task Main(string[] args) { var folderPath = args.FirstOrDefault() ?? AskForFolder(); if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath)) { Log($"Error: Folder not found: {folderPath}"); Log("Usage: CVBatchTester [--output ]"); Log(" e.g. CVBatchTester /home/user/cvs"); Log(" e.g. CVBatchTester /home/user/cvs --output /tmp/results.log"); return 1; } // Check for --output flag var outputIndex = Array.IndexOf(args, "--output"); var logPath = outputIndex >= 0 && outputIndex < args.Length - 1 ? args[outputIndex + 1] : Path.Combine(folderPath, $"batch-results-{DateTime.Now:yyyyMMdd-HHmmss}.log"); _logWriter = new StreamWriter(logPath, false) { AutoFlush = true }; Log($"CV Batch Verification Tester"); Log($"Processing CVs from: {folderPath}"); Log($"Output log: {logPath}"); Log($"Started: {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); Log(new string('=', 80)); // Setup DI var services = new ServiceCollection(); ConfigureServices(services); var provider = services.BuildServiceProvider(); // Find CV files var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly) .Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) .OrderBy(f => f) .ToList(); Log($"Found {cvFiles.Count} CV files\n"); if (cvFiles.Count == 0) { Log("No CV files found (.pdf, .docx, .doc, .json)"); return 1; } // Track results var allUnverifiedEmployers = new List(); var allUnverifiedInstitutions = new List(); var totalEmployers = 0; var verifiedEmployers = 0; var totalEducation = 0; var verifiedEducation = 0; var processedCount = 0; var errorCount = 0; foreach (var cvFile in cvFiles) { Log($"\n{new string('=', 80)}"); Log($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}"); Log(new string('=', 80)); try { using var scope = provider.CreateScope(); var parser = scope.ServiceProvider.GetRequiredService(); var companyVerifier = scope.ServiceProvider.GetRequiredService(); var eduVerifier = scope.ServiceProvider.GetRequiredService(); // Parse CV - handle JSON files differently CVData cv; if (cvFile.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) { var jsonContent = await File.ReadAllTextAsync(cvFile); var testCv = JsonSerializer.Deserialize(jsonContent, JsonOptions) ?? throw new InvalidOperationException("Failed to deserialize JSON CV"); // Convert TestCVData to CVData cv = ConvertTestCVData(testCv); Log($"Loaded JSON CV: {cv.FullName}"); } else { await using var stream = File.OpenRead(cvFile); cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile)); Log($"Parsed CV: {cv.FullName}"); } // Verify Employers if (cv.Employment?.Count > 0) { Log($"\nEMPLOYERS ({cv.Employment.Count}):"); Log(new string('-', 60)); foreach (var emp in cv.Employment) { totalEmployers++; try { var result = await companyVerifier.VerifyCompanyAsync( emp.CompanyName, emp.StartDate, emp.EndDate, emp.JobTitle); var icon = result.IsVerified ? "✓" : "✗"; var period = FormatPeriod(emp.StartDate, emp.EndDate); Log($"\n {icon} {emp.CompanyName}"); Log($" Period: {period}"); Log($" Role: {emp.JobTitle}"); if (result.IsVerified) { verifiedEmployers++; Log($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)"); if (!string.IsNullOrEmpty(result.MatchedCompanyNumber)) Log($" Company #: {result.MatchedCompanyNumber}"); if (!string.IsNullOrEmpty(result.CompanyStatus)) Log($" Status: {result.CompanyStatus}"); } else { allUnverifiedEmployers.Add(emp.CompanyName); } if (!string.IsNullOrEmpty(result.VerificationNotes)) Log($" Note: {result.VerificationNotes}"); // Display any flags (warnings/issues) if (result.Flags?.Count > 0) { foreach (var flag in result.Flags) { var flagIcon = flag.Severity == "Critical" ? "⚠️" : "ℹ️"; Log($" {flagIcon} FLAG [{flag.Type}]: {flag.Message}"); } } } catch (Exception ex) { Log($"\n ✗ {emp.CompanyName}"); Log($" ERROR: {ex.Message}"); allUnverifiedEmployers.Add(emp.CompanyName); } } } // Verify Education if (cv.Education?.Count > 0) { Log($"\nEDUCATION ({cv.Education.Count}):"); Log(new string('-', 60)); var eduEntries = cv.Education.Select(e => new EducationEntry { Institution = e.Institution, Qualification = e.Qualification, Subject = e.Subject, StartDate = e.StartDate, EndDate = e.EndDate }).ToList(); var eduResults = eduVerifier.VerifyAll(eduEntries); foreach (var result in eduResults) { totalEducation++; var icon = result.IsVerified ? "✓" : "✗"; Log($"\n {icon} {result.ClaimedInstitution}"); Log($" Qualification: {result.ClaimedQualification}"); if (!string.IsNullOrEmpty(result.ClaimedSubject)) Log($" Subject: {result.ClaimedSubject}"); if (result.IsVerified) { verifiedEducation++; if (result.MatchedInstitution != null && !result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase)) { Log($" Match: {result.MatchedInstitution}"); } } else { allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown"); Log($" Status: {result.Status}"); } if (!string.IsNullOrEmpty(result.VerificationNotes)) Log($" Note: {result.VerificationNotes}"); } } } catch (Exception ex) { errorCount++; Log($"ERROR processing file: {ex.Message}"); } } // Print Summary Log($"\n\n{new string('=', 80)}"); Log("VERIFICATION SUMMARY"); Log(new string('=', 80)); Log($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}"); if (errorCount > 0) Log($"Errors: {errorCount}"); var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0; var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0; Log($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)"); Log($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)"); // List unverified employers var uniqueUnverifiedEmployers = allUnverifiedEmployers .GroupBy(e => e, StringComparer.OrdinalIgnoreCase) .OrderByDescending(g => g.Count()) .ThenBy(g => g.Key) .ToList(); if (uniqueUnverifiedEmployers.Count > 0) { Log($"\n{new string('-', 60)}"); Log($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):"); foreach (var group in uniqueUnverifiedEmployers) { var count = group.Count() > 1 ? $" (x{group.Count()})" : ""; Log($" - {group.Key}{count}"); } } // List unverified institutions var uniqueUnverifiedInstitutions = allUnverifiedInstitutions .GroupBy(i => i, StringComparer.OrdinalIgnoreCase) .OrderByDescending(g => g.Count()) .ThenBy(g => g.Key) .ToList(); if (uniqueUnverifiedInstitutions.Count > 0) { Log($"\n{new string('-', 60)}"); Log($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):"); foreach (var group in uniqueUnverifiedInstitutions) { var count = group.Count() > 1 ? $" (x{group.Count()})" : ""; Log($" - {group.Key}{count}"); } } Log($"\nCompleted: {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); Log($"\n{new string('=', 80)}"); _logWriter?.Close(); Console.WriteLine($"\nResults written to: {logPath}"); return 0; } static void Log(string message) { Console.WriteLine(message); _logWriter?.WriteLine(message); } static string AskForFolder() { Console.Write("Enter CV folder path: "); return Console.ReadLine() ?? ""; } static string FormatPeriod(DateOnly? start, DateOnly? end) { var startStr = start?.ToString("MMM yyyy") ?? "?"; var endStr = end?.ToString("MMM yyyy") ?? "Present"; return $"{startStr} - {endStr}"; } static CVData ConvertTestCVData(TestCVData testCv) { return new CVData { FullName = testCv.Personal?.Name ?? "Unknown", Email = testCv.Personal?.Email, Phone = testCv.Personal?.Phone, Employment = testCv.Employment?.Select(e => new EmploymentEntry { CompanyName = e.Company ?? "Unknown", JobTitle = e.JobTitle ?? "Unknown", Location = e.Location, StartDate = ParseDate(e.StartDate), EndDate = ParseDate(e.EndDate), IsCurrent = e.EndDate == null, Description = e.Description }).ToList() ?? [], Education = testCv.Education?.Select(e => new EducationEntry { Institution = e.Institution ?? "Unknown", Qualification = e.Qualification, Subject = e.Subject, StartDate = ParseDate(e.StartDate), EndDate = ParseDate(e.EndDate) }).ToList() ?? [], Skills = testCv.Skills ?? [] }; } static DateOnly? ParseDate(string? dateStr) { if (string.IsNullOrEmpty(dateStr)) return null; // Try parsing YYYY-MM format if (dateStr.Length == 7 && dateStr[4] == '-') { if (int.TryParse(dateStr[..4], out var year) && int.TryParse(dateStr[5..], out var month)) { return new DateOnly(year, month, 1); } } // Try standard parsing if (DateOnly.TryParse(dateStr, out var date)) { return date; } return null; } static void ConfigureServices(IServiceCollection services) { // Load configuration - try multiple locations var configPaths = new[] { "/var/www/realcv", "/git/RealCV/src/RealCV.Web", Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web")) }; var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web"; Log($"Loading config from: {webProjectPath}"); var configuration = new ConfigurationBuilder() .SetBasePath(webProjectPath) .AddJsonFile("appsettings.json", optional: true) .AddJsonFile("appsettings.Development.json", optional: true) .AddJsonFile("appsettings.Production.json", optional: true) .Build(); // Logging - show info level for verification details services.AddLogging(builder => { builder.AddConsole(); builder.SetMinimumLevel(LogLevel.Information); // Filter out noisy libraries builder.AddFilter("Microsoft", LogLevel.Warning); builder.AddFilter("System", LogLevel.Warning); }); // Database var connectionString = configuration.GetConnectionString("DefaultConnection") ?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True"; services.AddDbContextFactory(options => options.UseSqlServer(connectionString)); // Companies House - use configuration binding services.Configure(configuration.GetSection(CompaniesHouseSettings.SectionName)); services.AddHttpClient(); // Anthropic - use configuration binding services.Configure(configuration.GetSection(AnthropicSettings.SectionName)); services.AddScoped(); // Services services.AddScoped(); services.AddScoped(); services.AddScoped(); } }