feat: Add AI-powered compound company name splitting
Uses Claude Haiku to intelligently detect when a company name contains multiple companies (e.g., "ASDA/WALMART", "Corus & Laura Ashley Hotels") vs single companies with similar patterns (e.g., "Ernst & Young"). - Adds ExtractCompanyNamesAsync to ICompanyNameMatcherService - Only triggers for names with potential separators (/, &, "and") - Verifies each extracted part individually, returns first match - Uses fast Haiku model to minimize cost Results: - ASDA/WALMART → verified via 'ASDA' → ASDA GROUP LIMITED - Corus & Laura Ashley Hotels → verified via 'Corus' → Tata Steel UK - Employers: 104/120 verified (86%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -12,4 +12,13 @@ public interface ICompanyNameMatcherService
|
|||||||
string cvCompanyName,
|
string cvCompanyName,
|
||||||
List<CompanyCandidate> candidates,
|
List<CompanyCandidate> candidates,
|
||||||
CancellationToken cancellationToken = default);
|
CancellationToken cancellationToken = default);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Uses AI to detect if a company name contains multiple companies and extract them.
|
||||||
|
/// Returns null or single-item list if it's a single company (e.g., "Ernst & Young").
|
||||||
|
/// Returns multiple items if compound (e.g., "ASDA/WALMART" -> ["ASDA", "WALMART"]).
|
||||||
|
/// </summary>
|
||||||
|
Task<List<string>?> ExtractCompanyNamesAsync(
|
||||||
|
string companyName,
|
||||||
|
CancellationToken cancellationToken = default);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -207,4 +207,107 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
return null; // Fall back to fuzzy matching
|
return null; // Fall back to fuzzy matching
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private const string CompoundNamePrompt = """
|
||||||
|
Analyze this company name from a CV and determine if it refers to ONE company or MULTIPLE companies.
|
||||||
|
|
||||||
|
Company name: "{COMPANY_NAME}"
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
- "Ernst & Young" → ONE company (it's the full name of the accounting firm)
|
||||||
|
- "Marks & Spencer" → ONE company (it's the full name of the retailer)
|
||||||
|
- "ASDA/WALMART" → TWO companies: ["ASDA", "WALMART"] (person worked at both or it's showing ownership)
|
||||||
|
- "Corus & Laura Ashley Hotels" → TWO companies: ["Corus", "Laura Ashley Hotels"] (different industries)
|
||||||
|
- "PwC" → ONE company
|
||||||
|
- "Deloitte and Touche" → ONE company (historical name of Deloitte)
|
||||||
|
- "BMW Group Ireland" → ONE company
|
||||||
|
- "Tesco Stores and Distribution" → ONE company (departments of same company)
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
1. Well-known company names with "&" or "and" are SINGLE companies (Ernst & Young, Marks & Spencer, Procter & Gamble)
|
||||||
|
2. A "/" usually indicates multiple companies or ownership relationship
|
||||||
|
3. If the parts are in completely different industries, they're likely separate companies
|
||||||
|
4. If one part is clearly a subsidiary/department of the other, treat as ONE company
|
||||||
|
|
||||||
|
Respond with ONLY valid JSON:
|
||||||
|
{
|
||||||
|
"isSingleCompany": boolean,
|
||||||
|
"companies": ["company1", "company2"] or ["single company name"],
|
||||||
|
"reasoning": "brief explanation"
|
||||||
|
}
|
||||||
|
""";
|
||||||
|
|
||||||
|
public async Task<List<string>?> ExtractCompanyNamesAsync(
|
||||||
|
string companyName,
|
||||||
|
CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(companyName))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("Using AI to check if '{CompanyName}' is a compound name", companyName);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var prompt = CompoundNamePrompt.Replace("{COMPANY_NAME}", companyName);
|
||||||
|
|
||||||
|
var messages = new List<Message>
|
||||||
|
{
|
||||||
|
new(RoleType.User, prompt)
|
||||||
|
};
|
||||||
|
|
||||||
|
var parameters = new MessageParameters
|
||||||
|
{
|
||||||
|
Model = "claude-3-5-haiku-20241022",
|
||||||
|
MaxTokens = 256,
|
||||||
|
Messages = messages,
|
||||||
|
System = [new SystemMessage("You are a company name parser. Respond only with valid JSON.")]
|
||||||
|
};
|
||||||
|
|
||||||
|
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
|
||||||
|
|
||||||
|
var responseText = response.Content
|
||||||
|
.OfType<TextContent>()
|
||||||
|
.FirstOrDefault()?.Text;
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(responseText))
|
||||||
|
{
|
||||||
|
_logger.LogWarning("AI returned empty response for compound name check");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
responseText = JsonResponseHelper.CleanJsonResponse(responseText);
|
||||||
|
|
||||||
|
var result = JsonSerializer.Deserialize<CompoundNameResponse>(responseText, JsonDefaults.CamelCase);
|
||||||
|
|
||||||
|
if (result is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Failed to deserialize compound name response: {Response}", responseText);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("AI compound name result: IsSingle={IsSingle}, Companies=[{Companies}], Reasoning={Reasoning}",
|
||||||
|
result.IsSingleCompany, string.Join(", ", result.Companies ?? []), result.Reasoning);
|
||||||
|
|
||||||
|
if (result.IsSingleCompany || result.Companies is null || result.Companies.Count < 2)
|
||||||
|
{
|
||||||
|
return null; // Single company, no splitting needed
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Companies;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "AI compound name detection failed for '{CompanyName}'", companyName);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class CompoundNameResponse
|
||||||
|
{
|
||||||
|
public bool IsSingleCompany { get; set; }
|
||||||
|
public List<string>? Companies { get; set; }
|
||||||
|
public string? Reasoning { get; set; }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -226,6 +226,14 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check 4: Is this a compound company name (e.g., "ASDA/WALMART", "Corus & Laura Ashley Hotels")?
|
||||||
|
// Try to verify each part individually
|
||||||
|
var compoundResult = await TryVerifyCompoundNameAsync(normalizedName, companyName, startDate, endDate, jobTitle, flags);
|
||||||
|
if (compoundResult is not null)
|
||||||
|
{
|
||||||
|
return compoundResult;
|
||||||
|
}
|
||||||
|
|
||||||
// Try to find a cached match first (but only if it existed at claimed start date)
|
// Try to find a cached match first (but only if it existed at claimed start date)
|
||||||
var cachedMatch = await FindCachedMatchAsync(normalizedName);
|
var cachedMatch = await FindCachedMatchAsync(normalizedName);
|
||||||
if (cachedMatch is not null)
|
if (cachedMatch is not null)
|
||||||
@@ -833,6 +841,70 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
|||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Attempts to verify compound company names by detecting if multiple companies are mentioned.
|
||||||
|
/// Only triggers for names with potential separators (/, &, "and") to avoid unnecessary AI calls.
|
||||||
|
/// </summary>
|
||||||
|
private async Task<CompanyVerificationResult?> TryVerifyCompoundNameAsync(
|
||||||
|
string normalizedName,
|
||||||
|
string originalName,
|
||||||
|
DateOnly? startDate,
|
||||||
|
DateOnly? endDate,
|
||||||
|
string? jobTitle,
|
||||||
|
List<CompanyVerificationFlag> flags)
|
||||||
|
{
|
||||||
|
// Quick check: only process names that might be compound
|
||||||
|
// Look for separators that could indicate multiple companies
|
||||||
|
var hasPotentialSeparator = normalizedName.Contains('/')
|
||||||
|
|| normalizedName.Contains(" & ")
|
||||||
|
|| normalizedName.Contains(" and ", StringComparison.OrdinalIgnoreCase);
|
||||||
|
|
||||||
|
if (!hasPotentialSeparator)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use AI to determine if this is a compound name and extract parts
|
||||||
|
var extractedParts = await _aiMatcher.ExtractCompanyNamesAsync(normalizedName);
|
||||||
|
|
||||||
|
if (extractedParts is null || extractedParts.Count < 2)
|
||||||
|
{
|
||||||
|
// AI determined this is a single company (e.g., "Ernst & Young")
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("AI detected compound company name '{Name}', extracted parts: {Parts}",
|
||||||
|
originalName, string.Join(", ", extractedParts.Select(p => $"'{p}'")));
|
||||||
|
|
||||||
|
// Try to verify each extracted part - return success on first match
|
||||||
|
foreach (var part in extractedParts)
|
||||||
|
{
|
||||||
|
// Skip parts that are too short
|
||||||
|
if (part.Length < 3) continue;
|
||||||
|
|
||||||
|
_logger.LogDebug("Trying to verify compound part: '{Part}'", part);
|
||||||
|
|
||||||
|
// Recursively verify this part
|
||||||
|
var partResult = await VerifyCompanyAsync(part, startDate, endDate, jobTitle);
|
||||||
|
|
||||||
|
if (partResult.IsVerified)
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Compound name '{Original}' verified via part '{Part}' -> {Match}",
|
||||||
|
originalName, part, partResult.MatchedCompanyName);
|
||||||
|
|
||||||
|
return partResult with
|
||||||
|
{
|
||||||
|
ClaimedCompany = originalName,
|
||||||
|
VerificationNotes = $"Verified via '{part}': {partResult.VerificationNotes ?? partResult.MatchedCompanyName}"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// None of the parts could be verified
|
||||||
|
_logger.LogDebug("No parts of compound name '{Name}' could be verified", originalName);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
|
private async Task<CompanyCache?> FindCachedMatchAsync(string companyName)
|
||||||
{
|
{
|
||||||
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
|
var cutoffDate = DateTime.UtcNow.AddDays(-CacheExpirationDays);
|
||||||
|
|||||||
329
tests/RealCV.Tests/Integration/CVBatchTester.cs
Normal file
329
tests/RealCV.Tests/Integration/CVBatchTester.cs
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
|
||||||
|
namespace RealCV.Tests.Integration;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Test utility to batch process CVs and output verification findings.
|
||||||
|
/// Run with: dotnet test --filter "FullyQualifiedName~CVBatchTester" -- TestRunParameters.Parameter(name=\"CvFolder\", value=\"/path/to/cvs\")
|
||||||
|
/// Or use the ProcessFolder method directly.
|
||||||
|
/// </summary>
|
||||||
|
public class CVBatchTester
|
||||||
|
{
|
||||||
|
private readonly IServiceProvider _serviceProvider;
|
||||||
|
|
||||||
|
public CVBatchTester()
|
||||||
|
{
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
ConfigureServices(services);
|
||||||
|
_serviceProvider = services.BuildServiceProvider();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void ConfigureServices(IServiceCollection services)
|
||||||
|
{
|
||||||
|
// Load configuration
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(Directory.GetCurrentDirectory())
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.AddEnvironmentVariables()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
// Logging
|
||||||
|
services.AddLogging(builder =>
|
||||||
|
{
|
||||||
|
builder.AddConsole();
|
||||||
|
builder.SetMinimumLevel(LogLevel.Information);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Database
|
||||||
|
var connectionString = configuration.GetConnectionString("DefaultConnection")
|
||||||
|
?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(connectionString));
|
||||||
|
|
||||||
|
// Companies House
|
||||||
|
services.Configure<CompaniesHouseSettings>(options =>
|
||||||
|
{
|
||||||
|
options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk";
|
||||||
|
options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? "";
|
||||||
|
});
|
||||||
|
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
|
||||||
|
// Anthropic (for AI matching)
|
||||||
|
services.Configure<AnthropicSettings>(options =>
|
||||||
|
{
|
||||||
|
options.ApiKey = configuration["Anthropic:ApiKey"] ?? "";
|
||||||
|
});
|
||||||
|
|
||||||
|
services.AddHttpClient<AnthropicClient>();
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
|
||||||
|
|
||||||
|
// Services
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Process all CVs in a folder and return verification results.
|
||||||
|
/// </summary>
|
||||||
|
public async Task<List<CVVerificationSummary>> ProcessFolderAsync(string folderPath)
|
||||||
|
{
|
||||||
|
if (!Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
throw new DirectoryNotFoundException($"Folder not found: {folderPath}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine($"Found {cvFiles.Count} CV files in {folderPath}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
var results = new List<CVVerificationSummary>();
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nProcessing: {Path.GetFileName(cvFile)}");
|
||||||
|
Console.WriteLine(new string('-', 60));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await ProcessSingleCVAsync(cvFile);
|
||||||
|
results.Add(result);
|
||||||
|
PrintSummary(result);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"ERROR: {ex.Message}");
|
||||||
|
results.Add(new CVVerificationSummary
|
||||||
|
{
|
||||||
|
FileName = Path.GetFileName(cvFile),
|
||||||
|
Error = ex.Message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print overall summary
|
||||||
|
Console.WriteLine("\n" + new string('=', 80));
|
||||||
|
Console.WriteLine("OVERALL SUMMARY");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
PrintOverallSummary(results);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<CVVerificationSummary> ProcessSingleCVAsync(string filePath)
|
||||||
|
{
|
||||||
|
using var scope = _serviceProvider.CreateScope();
|
||||||
|
var cvParser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var educationVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
// Parse the CV
|
||||||
|
await using var fileStream = File.OpenRead(filePath);
|
||||||
|
var parsedCV = await cvParser.ParseAsync(fileStream, Path.GetFileName(filePath));
|
||||||
|
|
||||||
|
var summary = new CVVerificationSummary
|
||||||
|
{
|
||||||
|
FileName = Path.GetFileName(filePath),
|
||||||
|
CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Verify employers
|
||||||
|
if (parsedCV.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
foreach (var employment in parsedCV.Employment)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
employment.CompanyName,
|
||||||
|
employment.StartDate,
|
||||||
|
employment.EndDate,
|
||||||
|
employment.JobTitle);
|
||||||
|
|
||||||
|
summary.EmployerResults.Add(new EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedName = employment.CompanyName,
|
||||||
|
MatchedName = result.MatchedCompanyName,
|
||||||
|
CompanyNumber = result.MatchedCompanyNumber,
|
||||||
|
IsVerified = result.IsVerified,
|
||||||
|
MatchScore = result.MatchScore,
|
||||||
|
Notes = result.VerificationNotes,
|
||||||
|
Status = result.CompanyStatus
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
summary.EmployerResults.Add(new EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedName = employment.CompanyName,
|
||||||
|
IsVerified = false,
|
||||||
|
Notes = $"Error: {ex.Message}"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify education
|
||||||
|
if (parsedCV.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
var educationResults = educationVerifier.VerifyAll(
|
||||||
|
parsedCV.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList());
|
||||||
|
|
||||||
|
foreach (var result in educationResults)
|
||||||
|
{
|
||||||
|
summary.EducationResults.Add(new EducationVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedInstitution = result.ClaimedInstitution,
|
||||||
|
MatchedInstitution = result.MatchedInstitution,
|
||||||
|
Qualification = result.ClaimedQualification,
|
||||||
|
IsVerified = result.IsVerified,
|
||||||
|
Status = result.Status,
|
||||||
|
Notes = result.VerificationNotes
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void PrintSummary(CVVerificationSummary summary)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Candidate: {summary.CandidateName}");
|
||||||
|
|
||||||
|
Console.WriteLine($"\n EMPLOYERS ({summary.EmployerResults.Count}):");
|
||||||
|
foreach (var emp in summary.EmployerResults)
|
||||||
|
{
|
||||||
|
var status = emp.IsVerified ? "✓" : "✗";
|
||||||
|
var matchInfo = emp.IsVerified
|
||||||
|
? $"-> {emp.MatchedName} ({emp.MatchScore}%)"
|
||||||
|
: emp.Notes ?? "Not found";
|
||||||
|
Console.WriteLine($" {status} {emp.ClaimedName}");
|
||||||
|
Console.WriteLine($" {matchInfo}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"\n EDUCATION ({summary.EducationResults.Count}):");
|
||||||
|
foreach (var edu in summary.EducationResults)
|
||||||
|
{
|
||||||
|
var status = edu.IsVerified ? "✓" : "✗";
|
||||||
|
var matchInfo = edu.IsVerified && edu.MatchedInstitution != null
|
||||||
|
? $"-> {edu.MatchedInstitution}"
|
||||||
|
: edu.Notes ?? edu.Status;
|
||||||
|
Console.WriteLine($" {status} {edu.ClaimedInstitution}");
|
||||||
|
Console.WriteLine($" {edu.Qualification}");
|
||||||
|
Console.WriteLine($" {matchInfo}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void PrintOverallSummary(List<CVVerificationSummary> results)
|
||||||
|
{
|
||||||
|
var successfulCVs = results.Count(r => r.Error == null);
|
||||||
|
var totalEmployers = results.Sum(r => r.EmployerResults.Count);
|
||||||
|
var verifiedEmployers = results.Sum(r => r.EmployerResults.Count(e => e.IsVerified));
|
||||||
|
var totalEducation = results.Sum(r => r.EducationResults.Count);
|
||||||
|
var verifiedEducation = results.Sum(r => r.EducationResults.Count(e => e.IsVerified));
|
||||||
|
|
||||||
|
Console.WriteLine($"CVs Processed: {successfulCVs}/{results.Count}");
|
||||||
|
Console.WriteLine($"Employers: {verifiedEmployers}/{totalEmployers} verified ({(totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0)}%)");
|
||||||
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({(totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0)}%)");
|
||||||
|
|
||||||
|
// List unverified employers
|
||||||
|
var unverifiedEmployers = results
|
||||||
|
.SelectMany(r => r.EmployerResults.Where(e => !e.IsVerified))
|
||||||
|
.GroupBy(e => e.ClaimedName)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (unverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED EMPLOYERS ({unverifiedEmployers.Count} unique):");
|
||||||
|
foreach (var group in unverifiedEmployers.Take(20))
|
||||||
|
{
|
||||||
|
Console.WriteLine($" - {group.Key} (x{group.Count()})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List unverified institutions
|
||||||
|
var unverifiedEducation = results
|
||||||
|
.SelectMany(r => r.EducationResults.Where(e => !e.IsVerified))
|
||||||
|
.GroupBy(e => e.ClaimedInstitution)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (unverifiedEducation.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED INSTITUTIONS ({unverifiedEducation.Count} unique):");
|
||||||
|
foreach (var group in unverifiedEducation.Take(20))
|
||||||
|
{
|
||||||
|
Console.WriteLine($" - {group.Key} (x{group.Count()})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Export results to JSON for further analysis.
|
||||||
|
/// </summary>
|
||||||
|
public static void ExportToJson(List<CVVerificationSummary> results, string outputPath)
|
||||||
|
{
|
||||||
|
var json = JsonSerializer.Serialize(results, new JsonSerializerOptions
|
||||||
|
{
|
||||||
|
WriteIndented = true
|
||||||
|
});
|
||||||
|
File.WriteAllText(outputPath, json);
|
||||||
|
Console.WriteLine($"\nResults exported to: {outputPath}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class CVVerificationSummary
|
||||||
|
{
|
||||||
|
public string FileName { get; set; } = "";
|
||||||
|
public string CandidateName { get; set; } = "";
|
||||||
|
public string? Error { get; set; }
|
||||||
|
public List<EmployerVerificationSummary> EmployerResults { get; set; } = new();
|
||||||
|
public List<EducationVerificationSummary> EducationResults { get; set; } = new();
|
||||||
|
}
|
||||||
|
|
||||||
|
public class EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
public string ClaimedName { get; set; } = "";
|
||||||
|
public string? MatchedName { get; set; }
|
||||||
|
public string? CompanyNumber { get; set; }
|
||||||
|
public bool IsVerified { get; set; }
|
||||||
|
public int MatchScore { get; set; }
|
||||||
|
public string? Notes { get; set; }
|
||||||
|
public string? Status { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class EducationVerificationSummary
|
||||||
|
{
|
||||||
|
public string ClaimedInstitution { get; set; } = "";
|
||||||
|
public string? MatchedInstitution { get; set; }
|
||||||
|
public string? Qualification { get; set; }
|
||||||
|
public bool IsVerified { get; set; }
|
||||||
|
public string? Status { get; set; }
|
||||||
|
public string? Notes { get; set; }
|
||||||
|
}
|
||||||
15
tools/CVBatchTester/CVBatchTester.csproj
Normal file
15
tools/CVBatchTester/CVBatchTester.csproj
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="../../src/RealCV.Application/RealCV.Application.csproj" />
|
||||||
|
<ProjectReference Include="../../src/RealCV.Infrastructure/RealCV.Infrastructure.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
302
tools/CVBatchTester/Program.cs
Normal file
302
tools/CVBatchTester/Program.cs
Normal file
@@ -0,0 +1,302 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
|
||||||
|
namespace CVBatchTester;
|
||||||
|
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
static async Task<int> Main(string[] args)
|
||||||
|
{
|
||||||
|
var folderPath = args.FirstOrDefault() ?? AskForFolder();
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Error: Folder not found: {folderPath}");
|
||||||
|
Console.WriteLine("Usage: CVBatchTester <folder-path>");
|
||||||
|
Console.WriteLine(" e.g. CVBatchTester /home/user/cvs");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"CV Batch Verification Tester");
|
||||||
|
Console.WriteLine($"Processing CVs from: {folderPath}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
// Setup DI
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
ConfigureServices(services);
|
||||||
|
var provider = services.BuildServiceProvider();
|
||||||
|
|
||||||
|
// Find CV files
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.OrderBy(f => f)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine($"Found {cvFiles.Count} CV files\n");
|
||||||
|
|
||||||
|
if (cvFiles.Count == 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine("No CV files found (.pdf, .docx, .doc)");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track results
|
||||||
|
var allUnverifiedEmployers = new List<string>();
|
||||||
|
var allUnverifiedInstitutions = new List<string>();
|
||||||
|
var totalEmployers = 0;
|
||||||
|
var verifiedEmployers = 0;
|
||||||
|
var totalEducation = 0;
|
||||||
|
var verifiedEducation = 0;
|
||||||
|
var processedCount = 0;
|
||||||
|
var errorCount = 0;
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{new string('=', 80)}");
|
||||||
|
Console.WriteLine($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var scope = provider.CreateScope();
|
||||||
|
var parser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
// Parse CV
|
||||||
|
await using var stream = File.OpenRead(cvFile);
|
||||||
|
var cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||||
|
|
||||||
|
Console.WriteLine($"Candidate: {cv.FullName}");
|
||||||
|
|
||||||
|
// Verify Employers
|
||||||
|
if (cv.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||||
|
Console.WriteLine(new string('-', 60));
|
||||||
|
|
||||||
|
foreach (var emp in cv.Employment)
|
||||||
|
{
|
||||||
|
totalEmployers++;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
emp.CompanyName,
|
||||||
|
emp.StartDate,
|
||||||
|
emp.EndDate,
|
||||||
|
emp.JobTitle);
|
||||||
|
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
var period = FormatPeriod(emp.StartDate, emp.EndDate);
|
||||||
|
|
||||||
|
Console.WriteLine($"\n {icon} {emp.CompanyName}");
|
||||||
|
Console.WriteLine($" Period: {period}");
|
||||||
|
Console.WriteLine($" Role: {emp.JobTitle}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEmployers++;
|
||||||
|
Console.WriteLine($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||||
|
if (!string.IsNullOrEmpty(result.MatchedCompanyNumber))
|
||||||
|
Console.WriteLine($" Company #: {result.MatchedCompanyNumber}");
|
||||||
|
if (!string.IsNullOrEmpty(result.CompanyStatus))
|
||||||
|
Console.WriteLine($" Status: {result.CompanyStatus}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n ✗ {emp.CompanyName}");
|
||||||
|
Console.WriteLine($" ERROR: {ex.Message}");
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify Education
|
||||||
|
if (cv.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEDUCATION ({cv.Education.Count}):");
|
||||||
|
Console.WriteLine(new string('-', 60));
|
||||||
|
|
||||||
|
var eduEntries = cv.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList();
|
||||||
|
|
||||||
|
var eduResults = eduVerifier.VerifyAll(eduEntries);
|
||||||
|
|
||||||
|
foreach (var result in eduResults)
|
||||||
|
{
|
||||||
|
totalEducation++;
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
|
||||||
|
Console.WriteLine($"\n {icon} {result.ClaimedInstitution}");
|
||||||
|
Console.WriteLine($" Qualification: {result.ClaimedQualification}");
|
||||||
|
if (!string.IsNullOrEmpty(result.ClaimedSubject))
|
||||||
|
Console.WriteLine($" Subject: {result.ClaimedSubject}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEducation++;
|
||||||
|
if (result.MatchedInstitution != null &&
|
||||||
|
!result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
Console.WriteLine($" Match: {result.MatchedInstitution}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
||||||
|
Console.WriteLine($" Status: {result.Status}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
errorCount++;
|
||||||
|
Console.WriteLine($"ERROR processing file: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print Summary
|
||||||
|
Console.WriteLine($"\n\n{new string('=', 80)}");
|
||||||
|
Console.WriteLine("VERIFICATION SUMMARY");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
Console.WriteLine($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
|
||||||
|
if (errorCount > 0)
|
||||||
|
Console.WriteLine($"Errors: {errorCount}");
|
||||||
|
|
||||||
|
var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0;
|
||||||
|
var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0;
|
||||||
|
|
||||||
|
Console.WriteLine($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
|
||||||
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
|
||||||
|
|
||||||
|
// List unverified employers
|
||||||
|
var uniqueUnverifiedEmployers = allUnverifiedEmployers
|
||||||
|
.GroupBy(e => e, StringComparer.OrdinalIgnoreCase)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ThenBy(g => g.Key)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (uniqueUnverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{new string('-', 60)}");
|
||||||
|
Console.WriteLine($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
|
||||||
|
foreach (var group in uniqueUnverifiedEmployers)
|
||||||
|
{
|
||||||
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||||
|
Console.WriteLine($" - {group.Key}{count}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List unverified institutions
|
||||||
|
var uniqueUnverifiedInstitutions = allUnverifiedInstitutions
|
||||||
|
.GroupBy(i => i, StringComparer.OrdinalIgnoreCase)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ThenBy(g => g.Key)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (uniqueUnverifiedInstitutions.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{new string('-', 60)}");
|
||||||
|
Console.WriteLine($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
|
||||||
|
foreach (var group in uniqueUnverifiedInstitutions)
|
||||||
|
{
|
||||||
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||||
|
Console.WriteLine($" - {group.Key}{count}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"\n{new string('=', 80)}");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string AskForFolder()
|
||||||
|
{
|
||||||
|
Console.Write("Enter CV folder path: ");
|
||||||
|
return Console.ReadLine() ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
static string FormatPeriod(DateOnly? start, DateOnly? end)
|
||||||
|
{
|
||||||
|
var startStr = start?.ToString("MMM yyyy") ?? "?";
|
||||||
|
var endStr = end?.ToString("MMM yyyy") ?? "Present";
|
||||||
|
return $"{startStr} - {endStr}";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ConfigureServices(IServiceCollection services)
|
||||||
|
{
|
||||||
|
// Load configuration - try multiple locations
|
||||||
|
var configPaths = new[]
|
||||||
|
{
|
||||||
|
"/var/www/realcv",
|
||||||
|
"/git/RealCV/src/RealCV.Web",
|
||||||
|
Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web"))
|
||||||
|
};
|
||||||
|
|
||||||
|
var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web";
|
||||||
|
Console.WriteLine($"Loading config from: {webProjectPath}");
|
||||||
|
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(webProjectPath)
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Production.json", optional: true)
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
// Logging - minimal output
|
||||||
|
services.AddLogging(builder =>
|
||||||
|
{
|
||||||
|
builder.AddConsole();
|
||||||
|
builder.SetMinimumLevel(LogLevel.Warning);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Database
|
||||||
|
var connectionString = configuration.GetConnectionString("DefaultConnection")
|
||||||
|
?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(connectionString));
|
||||||
|
|
||||||
|
// Companies House - use configuration binding
|
||||||
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection(CompaniesHouseSettings.SectionName));
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
|
||||||
|
// Anthropic - use configuration binding
|
||||||
|
services.Configure<AnthropicSettings>(configuration.GetSection(AnthropicSettings.SectionName));
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
||||||
|
|
||||||
|
// Services
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
}
|
||||||
|
}
|
||||||
195
tools/batch-test-cvs.cs
Normal file
195
tools/batch-test-cvs.cs
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
#!/usr/bin/env dotnet-script
|
||||||
|
#r "nuget: Microsoft.EntityFrameworkCore.SqlServer, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.Configuration.Json, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.DependencyInjection, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.Logging.Console, 8.0.0"
|
||||||
|
#r "../src/RealCV.Application/bin/Debug/net8.0/RealCV.Application.dll"
|
||||||
|
#r "../src/RealCV.Infrastructure/bin/Debug/net8.0/RealCV.Infrastructure.dll"
|
||||||
|
#r "../src/RealCV.Domain/bin/Debug/net8.0/RealCV.Domain.dll"
|
||||||
|
|
||||||
|
// This is a dotnet-script file. Run with: dotnet script batch-test-cvs.cs -- /path/to/cvs
|
||||||
|
// Install dotnet-script: dotnet tool install -g dotnet-script
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
|
||||||
|
var folderPath = Args.FirstOrDefault() ?? "/tmp/test-cvs";
|
||||||
|
|
||||||
|
if (!Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Error: Folder not found: {folderPath}");
|
||||||
|
Console.WriteLine("Usage: dotnet script batch-test-cvs.cs -- /path/to/cvs");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"Processing CVs from: {folderPath}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
// Setup DI
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(Path.Combine(Directory.GetCurrentDirectory(), "../src/RealCV.Web"))
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
services.AddLogging(b => b.AddConsole().SetMinimumLevel(LogLevel.Warning));
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(configuration.GetConnectionString("DefaultConnection")));
|
||||||
|
|
||||||
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
|
||||||
|
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
|
||||||
|
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
services.AddHttpClient<AnthropicClient>();
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
|
||||||
|
var provider = services.BuildServiceProvider();
|
||||||
|
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*")
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine($"Found {cvFiles.Count} CV files\n");
|
||||||
|
|
||||||
|
var allUnverifiedEmployers = new List<string>();
|
||||||
|
var allUnverifiedInstitutions = new List<string>();
|
||||||
|
var totalEmployers = 0;
|
||||||
|
var verifiedEmployers = 0;
|
||||||
|
var totalEducation = 0;
|
||||||
|
var verifiedEducation = 0;
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{'=',-80}");
|
||||||
|
Console.WriteLine($"FILE: {Path.GetFileName(cvFile)}");
|
||||||
|
Console.WriteLine($"{'=',-80}");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var scope = provider.CreateScope();
|
||||||
|
var parser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
await using var stream = File.OpenRead(cvFile);
|
||||||
|
var cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||||
|
|
||||||
|
Console.WriteLine($"Candidate: {cv.PersonalInfo?.FullName ?? "Unknown"}");
|
||||||
|
|
||||||
|
// Employers
|
||||||
|
if (cv.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||||
|
foreach (var emp in cv.Employment)
|
||||||
|
{
|
||||||
|
totalEmployers++;
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
emp.CompanyName, emp.StartDate, emp.EndDate, emp.JobTitle);
|
||||||
|
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
Console.WriteLine($" {icon} {emp.CompanyName}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEmployers++;
|
||||||
|
Console.WriteLine($" → {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes ?? "Not found"}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Education
|
||||||
|
if (cv.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEDUCATION ({cv.Education.Count}):");
|
||||||
|
var eduEntries = cv.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList();
|
||||||
|
|
||||||
|
var eduResults = eduVerifier.VerifyAll(eduEntries);
|
||||||
|
foreach (var result in eduResults)
|
||||||
|
{
|
||||||
|
totalEducation++;
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
Console.WriteLine($" {icon} {result.ClaimedInstitution}");
|
||||||
|
Console.WriteLine($" {result.ClaimedQualification}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEducation++;
|
||||||
|
if (result.MatchedInstitution != null && result.MatchedInstitution != result.ClaimedInstitution)
|
||||||
|
Console.WriteLine($" → {result.MatchedInstitution}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
||||||
|
Console.WriteLine($" Status: {result.Status}");
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"ERROR: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
Console.WriteLine($"\n\n{'=',-80}");
|
||||||
|
Console.WriteLine("SUMMARY");
|
||||||
|
Console.WriteLine($"{'=',-80}");
|
||||||
|
Console.WriteLine($"CVs Processed: {cvFiles.Count}");
|
||||||
|
Console.WriteLine($"Employers: {verifiedEmployers}/{totalEmployers} verified ({(totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0)}%)");
|
||||||
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({(totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0)}%)");
|
||||||
|
|
||||||
|
var uniqueUnverifiedEmployers = allUnverifiedEmployers.Distinct().OrderBy(x => x).ToList();
|
||||||
|
if (uniqueUnverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count}):");
|
||||||
|
foreach (var emp in uniqueUnverifiedEmployers)
|
||||||
|
Console.WriteLine($" - {emp}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var uniqueUnverifiedInstitutions = allUnverifiedInstitutions.Distinct().OrderBy(x => x).ToList();
|
||||||
|
if (uniqueUnverifiedInstitutions.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count}):");
|
||||||
|
foreach (var inst in uniqueUnverifiedInstitutions)
|
||||||
|
Console.WriteLine($" - {inst}");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
Reference in New Issue
Block a user