using System.Text.Json; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using RealCV.Application.Interfaces; using RealCV.Application.Models; using RealCV.Infrastructure.Data; using RealCV.Infrastructure.ExternalApis; using RealCV.Infrastructure.Services; using RealCV.Infrastructure.Configuration; namespace RealCV.Tests.Integration; /// /// Test utility to batch process CVs and output verification findings. /// Run with: dotnet test --filter "FullyQualifiedName~CVBatchTester" -- TestRunParameters.Parameter(name=\"CvFolder\", value=\"/path/to/cvs\") /// Or use the ProcessFolder method directly. /// public class CVBatchTester { private readonly IServiceProvider _serviceProvider; public CVBatchTester() { var services = new ServiceCollection(); ConfigureServices(services); _serviceProvider = services.BuildServiceProvider(); } private static void ConfigureServices(IServiceCollection services) { // Load configuration var configuration = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json", optional: true) .AddJsonFile("appsettings.Development.json", optional: true) .AddEnvironmentVariables() .Build(); // Logging services.AddLogging(builder => { builder.AddConsole(); builder.SetMinimumLevel(LogLevel.Information); }); // Database var connectionString = configuration.GetConnectionString("DefaultConnection") ?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True"; services.AddDbContextFactory(options => options.UseSqlServer(connectionString)); // Companies House services.Configure(options => { options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk"; options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? ""; }); services.AddHttpClient(); // Anthropic (for AI matching) services.Configure(options => { options.ApiKey = configuration["Anthropic:ApiKey"] ?? ""; }); services.AddHttpClient(); services.AddScoped(); // Services services.AddScoped(); services.AddScoped(); services.AddScoped(); } /// /// Process all CVs in a folder and return verification results. /// public async Task> ProcessFolderAsync(string folderPath) { if (!Directory.Exists(folderPath)) { throw new DirectoryNotFoundException($"Folder not found: {folderPath}"); } var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly) .Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase)) .ToList(); Console.WriteLine($"Found {cvFiles.Count} CV files in {folderPath}"); Console.WriteLine(new string('=', 80)); var results = new List(); foreach (var cvFile in cvFiles) { Console.WriteLine($"\nProcessing: {Path.GetFileName(cvFile)}"); Console.WriteLine(new string('-', 60)); try { var result = await ProcessSingleCVAsync(cvFile); results.Add(result); PrintSummary(result); } catch (Exception ex) { Console.WriteLine($"ERROR: {ex.Message}"); results.Add(new CVVerificationSummary { FileName = Path.GetFileName(cvFile), Error = ex.Message }); } } // Print overall summary Console.WriteLine("\n" + new string('=', 80)); Console.WriteLine("OVERALL SUMMARY"); Console.WriteLine(new string('=', 80)); PrintOverallSummary(results); return results; } private async Task ProcessSingleCVAsync(string filePath) { using var scope = _serviceProvider.CreateScope(); var cvParser = scope.ServiceProvider.GetRequiredService(); var companyVerifier = scope.ServiceProvider.GetRequiredService(); var educationVerifier = scope.ServiceProvider.GetRequiredService(); // Parse the CV await using var fileStream = File.OpenRead(filePath); var parsedCV = await cvParser.ParseAsync(fileStream, Path.GetFileName(filePath)); var summary = new CVVerificationSummary { FileName = Path.GetFileName(filePath), CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown" }; // Verify employers if (parsedCV.Employment?.Count > 0) { foreach (var employment in parsedCV.Employment) { try { var result = await companyVerifier.VerifyCompanyAsync( employment.CompanyName, employment.StartDate, employment.EndDate, employment.JobTitle); summary.EmployerResults.Add(new EmployerVerificationSummary { ClaimedName = employment.CompanyName, MatchedName = result.MatchedCompanyName, CompanyNumber = result.MatchedCompanyNumber, IsVerified = result.IsVerified, MatchScore = result.MatchScore, Notes = result.VerificationNotes, Status = result.CompanyStatus }); } catch (Exception ex) { summary.EmployerResults.Add(new EmployerVerificationSummary { ClaimedName = employment.CompanyName, IsVerified = false, Notes = $"Error: {ex.Message}" }); } } } // Verify education if (parsedCV.Education?.Count > 0) { var educationResults = educationVerifier.VerifyAll( parsedCV.Education.Select(e => new EducationEntry { Institution = e.Institution, Qualification = e.Qualification, Subject = e.Subject, StartDate = e.StartDate, EndDate = e.EndDate }).ToList()); foreach (var result in educationResults) { summary.EducationResults.Add(new EducationVerificationSummary { ClaimedInstitution = result.ClaimedInstitution, MatchedInstitution = result.MatchedInstitution, Qualification = result.ClaimedQualification, IsVerified = result.IsVerified, Status = result.Status, Notes = result.VerificationNotes }); } } return summary; } private static void PrintSummary(CVVerificationSummary summary) { Console.WriteLine($"Candidate: {summary.CandidateName}"); Console.WriteLine($"\n EMPLOYERS ({summary.EmployerResults.Count}):"); foreach (var emp in summary.EmployerResults) { var status = emp.IsVerified ? "✓" : "✗"; var matchInfo = emp.IsVerified ? $"-> {emp.MatchedName} ({emp.MatchScore}%)" : emp.Notes ?? "Not found"; Console.WriteLine($" {status} {emp.ClaimedName}"); Console.WriteLine($" {matchInfo}"); } Console.WriteLine($"\n EDUCATION ({summary.EducationResults.Count}):"); foreach (var edu in summary.EducationResults) { var status = edu.IsVerified ? "✓" : "✗"; var matchInfo = edu.IsVerified && edu.MatchedInstitution != null ? $"-> {edu.MatchedInstitution}" : edu.Notes ?? edu.Status; Console.WriteLine($" {status} {edu.ClaimedInstitution}"); Console.WriteLine($" {edu.Qualification}"); Console.WriteLine($" {matchInfo}"); } } private static void PrintOverallSummary(List results) { var successfulCVs = results.Count(r => r.Error == null); var totalEmployers = results.Sum(r => r.EmployerResults.Count); var verifiedEmployers = results.Sum(r => r.EmployerResults.Count(e => e.IsVerified)); var totalEducation = results.Sum(r => r.EducationResults.Count); var verifiedEducation = results.Sum(r => r.EducationResults.Count(e => e.IsVerified)); Console.WriteLine($"CVs Processed: {successfulCVs}/{results.Count}"); Console.WriteLine($"Employers: {verifiedEmployers}/{totalEmployers} verified ({(totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0)}%)"); Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({(totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0)}%)"); // List unverified employers var unverifiedEmployers = results .SelectMany(r => r.EmployerResults.Where(e => !e.IsVerified)) .GroupBy(e => e.ClaimedName) .OrderByDescending(g => g.Count()) .ToList(); if (unverifiedEmployers.Count > 0) { Console.WriteLine($"\nUNVERIFIED EMPLOYERS ({unverifiedEmployers.Count} unique):"); foreach (var group in unverifiedEmployers.Take(20)) { Console.WriteLine($" - {group.Key} (x{group.Count()})"); } } // List unverified institutions var unverifiedEducation = results .SelectMany(r => r.EducationResults.Where(e => !e.IsVerified)) .GroupBy(e => e.ClaimedInstitution) .OrderByDescending(g => g.Count()) .ToList(); if (unverifiedEducation.Count > 0) { Console.WriteLine($"\nUNVERIFIED INSTITUTIONS ({unverifiedEducation.Count} unique):"); foreach (var group in unverifiedEducation.Take(20)) { Console.WriteLine($" - {group.Key} (x{group.Count()})"); } } } /// /// Export results to JSON for further analysis. /// public static void ExportToJson(List results, string outputPath) { var json = JsonSerializer.Serialize(results, new JsonSerializerOptions { WriteIndented = true }); File.WriteAllText(outputPath, json); Console.WriteLine($"\nResults exported to: {outputPath}"); } } public class CVVerificationSummary { public string FileName { get; set; } = ""; public string CandidateName { get; set; } = ""; public string? Error { get; set; } public List EmployerResults { get; set; } = new(); public List EducationResults { get; set; } = new(); } public class EmployerVerificationSummary { public string ClaimedName { get; set; } = ""; public string? MatchedName { get; set; } public string? CompanyNumber { get; set; } public bool IsVerified { get; set; } public int MatchScore { get; set; } public string? Notes { get; set; } public string? Status { get; set; } } public class EducationVerificationSummary { public string ClaimedInstitution { get; set; } = ""; public string? MatchedInstitution { get; set; } public string? Qualification { get; set; } public bool IsVerified { get; set; } public string? Status { get; set; } public string? Notes { get; set; } }