- Renamed all directories (TrueCV.* -> RealCV.*) - Renamed all project files (.csproj) - Renamed solution file (TrueCV.sln -> RealCV.sln) - Updated all namespaces in C# and Razor files - Updated project references - Updated CSS variable names 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
168 lines
6.9 KiB
C#
168 lines
6.9 KiB
C#
using System.Text.Json;
|
|
using Anthropic.SDK;
|
|
using Anthropic.SDK.Messaging;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using RealCV.Application.Helpers;
|
|
using RealCV.Application.Interfaces;
|
|
using RealCV.Application.Models;
|
|
using RealCV.Infrastructure.Configuration;
|
|
using RealCV.Infrastructure.Helpers;
|
|
|
|
namespace RealCV.Infrastructure.Services;
|
|
|
|
public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|
{
|
|
private readonly AnthropicClient _anthropicClient;
|
|
private readonly ILogger<AICompanyNameMatcherService> _logger;
|
|
|
|
private const string SystemPrompt = """
|
|
You are a UK company name matching expert. Your task is to determine if a company name
|
|
from a CV matches any of the official company names from Companies House records.
|
|
|
|
You understand:
|
|
- Trading names vs registered names (e.g., "Tesco" = "TESCO PLC")
|
|
- Subsidiaries vs parent companies (e.g., "ASDA" might work for "ASDA STORES LIMITED")
|
|
- Common abbreviations (Ltd = Limited, PLC = Public Limited Company, CiC = Community Interest Company)
|
|
- That completely different words mean different companies (e.g., "Families First" ≠ "Families Against Conformity")
|
|
|
|
You must respond ONLY with valid JSON, no other text or markdown.
|
|
""";
|
|
|
|
private const string MatchingPrompt = """
|
|
Compare the company name from a CV against official Companies House records.
|
|
|
|
CV Company Name: "{CV_COMPANY}"
|
|
|
|
Companies House Candidates:
|
|
{CANDIDATES}
|
|
|
|
Determine which candidate (if any) is the SAME company as the CV entry.
|
|
|
|
Rules:
|
|
1. A match requires the companies to be the SAME organisation, not just similar names
|
|
2. "Families First CiC" is NOT the same as "FAMILIES AGAINST CONFORMITY LTD" - different words = different companies
|
|
3. Trading names should match their registered entity (e.g., "Tesco" matches "TESCO PLC")
|
|
4. Subsidiaries can match if clearly the same organisation (e.g., "ASDA" could match "ASDA STORES LIMITED")
|
|
5. Acronyms in parentheses are abbreviations of the full name (e.g., "North Halifax Partnership (NHP)" = "NORTH HALIFAX PARTNERSHIP")
|
|
6. CiC/CIC = Community Interest Company, LLP = Limited Liability Partnership - these are legal suffixes
|
|
7. If the CV name contains all the key words of a candidate (ignoring Ltd/Limited/CIC/etc.), it's likely a match
|
|
8. If NO candidate is clearly the same company, return "NONE" as the best match
|
|
|
|
Respond with this exact JSON structure:
|
|
{
|
|
"bestMatchCompanyNumber": "string (company number of best match, or 'NONE' if no valid match)",
|
|
"confidenceScore": number (0-100, where 100 = certain match, 0 = no match),
|
|
"matchType": "string (Exact, TradingName, Subsidiary, Parent, NoMatch)",
|
|
"reasoning": "string (brief explanation of why this is or isn't a match)"
|
|
}
|
|
""";
|
|
|
|
public AICompanyNameMatcherService(
|
|
IOptions<AnthropicSettings> settings,
|
|
ILogger<AICompanyNameMatcherService> logger)
|
|
{
|
|
_logger = logger;
|
|
_anthropicClient = new AnthropicClient(settings.Value.ApiKey);
|
|
}
|
|
|
|
public async Task<SemanticMatchResult?> FindBestMatchAsync(
|
|
string cvCompanyName,
|
|
List<CompanyCandidate> candidates,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(cvCompanyName) || candidates.Count == 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
_logger.LogDebug("Using AI to match '{CVCompany}' against {Count} candidates",
|
|
cvCompanyName, candidates.Count);
|
|
|
|
try
|
|
{
|
|
var candidatesText = string.Join("\n", candidates.Select((c, i) =>
|
|
$"{i + 1}. {c.CompanyName} (Number: {c.CompanyNumber}, Status: {c.CompanyStatus ?? "Unknown"})"));
|
|
|
|
var prompt = MatchingPrompt
|
|
.Replace("{CV_COMPANY}", cvCompanyName)
|
|
.Replace("{CANDIDATES}", candidatesText);
|
|
|
|
var messages = new List<Message>
|
|
{
|
|
new(RoleType.User, prompt)
|
|
};
|
|
|
|
var parameters = new MessageParameters
|
|
{
|
|
Model = "claude-sonnet-4-20250514",
|
|
MaxTokens = 1024,
|
|
Messages = messages,
|
|
System = [new SystemMessage(SystemPrompt)]
|
|
};
|
|
|
|
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
|
|
|
|
var responseText = response.Content
|
|
.OfType<TextContent>()
|
|
.FirstOrDefault()?.Text;
|
|
|
|
if (string.IsNullOrWhiteSpace(responseText))
|
|
{
|
|
_logger.LogWarning("AI returned empty response for company matching");
|
|
return null;
|
|
}
|
|
|
|
responseText = JsonResponseHelper.CleanJsonResponse(responseText);
|
|
|
|
var aiResponse = JsonSerializer.Deserialize<AIMatchResponse>(responseText, JsonDefaults.CamelCase);
|
|
|
|
if (aiResponse is null)
|
|
{
|
|
_logger.LogWarning("Failed to deserialize AI response: {Response}", responseText);
|
|
return null;
|
|
}
|
|
|
|
_logger.LogDebug("AI match result: {CompanyNumber} with {Score}% confidence - {Reasoning}",
|
|
aiResponse.BestMatchCompanyNumber, aiResponse.ConfidenceScore, aiResponse.Reasoning);
|
|
|
|
// Find the matched candidate
|
|
if (aiResponse.BestMatchCompanyNumber == "NONE" || aiResponse.ConfidenceScore < 50)
|
|
{
|
|
return new SemanticMatchResult
|
|
{
|
|
CandidateCompanyName = "No match",
|
|
CandidateCompanyNumber = "NONE",
|
|
ConfidenceScore = 0,
|
|
MatchType = "NoMatch",
|
|
Reasoning = aiResponse.Reasoning
|
|
};
|
|
}
|
|
|
|
var matchedCandidate = candidates.FirstOrDefault(c =>
|
|
c.CompanyNumber.Equals(aiResponse.BestMatchCompanyNumber, StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (matchedCandidate is null)
|
|
{
|
|
_logger.LogWarning("AI returned company number {Number} not in candidates list",
|
|
aiResponse.BestMatchCompanyNumber);
|
|
return null;
|
|
}
|
|
|
|
return new SemanticMatchResult
|
|
{
|
|
CandidateCompanyName = matchedCandidate.CompanyName,
|
|
CandidateCompanyNumber = matchedCandidate.CompanyNumber,
|
|
ConfidenceScore = aiResponse.ConfidenceScore,
|
|
MatchType = aiResponse.MatchType,
|
|
Reasoning = aiResponse.Reasoning
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "AI company matching failed for '{CVCompany}'", cvCompanyName);
|
|
return null; // Fall back to fuzzy matching
|
|
}
|
|
}
|
|
}
|