Add AI-powered company name matching using Claude API

Replace fuzzy string matching with semantic AI matching to fix false
positives where similar-sounding but different companies were matched
(e.g., "Families First CiC" incorrectly matching "FAMILIES AGAINST
CONFORMITY LTD").

Changes:
- Add ICompanyNameMatcherService interface and AICompanyNameMatcherService
  implementation using Claude Sonnet 4 for semantic company name comparison
- Add SemanticMatchResult and related models for AI match results
- Update CompanyVerifierService to use AI matching with fuzzy fallback
- Add detection for public sector employers, charities, and self-employed
  entries that cannot be verified via Companies House
- Update tests to work with new AI matcher integration

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-21 00:51:24 +01:00
parent 030ede9e77
commit d047de1c84
7 changed files with 586 additions and 28 deletions

View File

@@ -9,6 +9,8 @@ using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Moq;
using Moq.Protected;
using TrueCV.Application.Interfaces;
using TrueCV.Application.Models;
using TrueCV.Domain.Entities;
using TrueCV.Infrastructure.Configuration;
using TrueCV.Infrastructure.Data;
@@ -22,6 +24,7 @@ public class CompanyVerifierServiceTests : IDisposable
private readonly Mock<HttpMessageHandler> _mockHttpHandler;
private readonly Mock<ILogger<CompanyVerifierService>> _mockServiceLogger;
private readonly Mock<ILogger<CompaniesHouseClient>> _mockClientLogger;
private readonly Mock<ICompanyNameMatcherService> _mockAiMatcher;
private readonly ApplicationDbContext _dbContext;
private readonly CompanyVerifierService _sut;
private readonly HttpClient _httpClient;
@@ -39,6 +42,7 @@ public class CompanyVerifierServiceTests : IDisposable
_mockHttpHandler = new Mock<HttpMessageHandler>();
_mockServiceLogger = new Mock<ILogger<CompanyVerifierService>>();
_mockClientLogger = new Mock<ILogger<CompaniesHouseClient>>();
_mockAiMatcher = new Mock<ICompanyNameMatcherService>();
_httpClient = new HttpClient(_mockHttpHandler.Object);
@@ -68,7 +72,57 @@ public class CompanyVerifierServiceTests : IDisposable
mockFactory.Setup(f => f.CreateDbContextAsync(It.IsAny<CancellationToken>()))
.ReturnsAsync(() => new ApplicationDbContext(_dbOptions));
_sut = new CompanyVerifierService(client, mockFactory.Object, _mockServiceLogger.Object);
// Setup AI matcher to return matching results for exact company name matches
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
It.IsAny<string>(),
It.IsAny<List<CompanyCandidate>>(),
It.IsAny<CancellationToken>()))
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
{
// Find exact or close match in candidates
var exactMatch = candidates.FirstOrDefault(c =>
c.CompanyName.Equals(cvCompanyName, StringComparison.OrdinalIgnoreCase));
if (exactMatch != null)
{
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
{
CandidateCompanyName = exactMatch.CompanyName,
CandidateCompanyNumber = exactMatch.CompanyNumber,
ConfidenceScore = 100,
MatchType = "Exact",
Reasoning = "Exact name match"
});
}
// Try fuzzy match for close names (e.g., with/without Ltd)
var fuzzyMatch = candidates.FirstOrDefault(c =>
c.CompanyName.Contains(cvCompanyName, StringComparison.OrdinalIgnoreCase) ||
cvCompanyName.Contains(c.CompanyName.Replace(" Ltd", "").Replace(" Limited", ""), StringComparison.OrdinalIgnoreCase));
if (fuzzyMatch != null)
{
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
{
CandidateCompanyName = fuzzyMatch.CompanyName,
CandidateCompanyNumber = fuzzyMatch.CompanyNumber,
ConfidenceScore = 85,
MatchType = "TradingName",
Reasoning = "Similar name match"
});
}
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
{
CandidateCompanyName = "No match",
CandidateCompanyNumber = "NONE",
ConfidenceScore = 0,
MatchType = "NoMatch",
Reasoning = "No matching company found"
});
});
_sut = new CompanyVerifierService(client, mockFactory.Object, _mockAiMatcher.Object, _mockServiceLogger.Object);
}
public void Dispose()
@@ -146,7 +200,7 @@ public class CompanyVerifierServiceTests : IDisposable
result.IsVerified.Should().BeFalse();
result.MatchScore.Should().Be(0);
result.MatchedCompanyName.Should().BeNull();
result.VerificationNotes.Should().Contain("70%");
result.VerificationNotes.Should().Contain("could not be verified");
}
[Fact]
@@ -171,7 +225,7 @@ public class CompanyVerifierServiceTests : IDisposable
// Assert
result.IsVerified.Should().BeTrue();
result.MatchedCompanyNumber.Should().Be("99999999");
result.VerificationNotes.Should().Contain("cache");
result.VerificationNotes.Should().BeNull(); // Cached results have no specific notes
// Verify API was NOT called (no HTTP setup means it would fail if called)
_mockHttpHandler.Protected().Verify(
@@ -211,10 +265,10 @@ public class CompanyVerifierServiceTests : IDisposable
result.IsVerified.Should().BeTrue();
result.MatchedCompanyNumber.Should().Be("12345678"); // From API, not cache
// Verify API WAS called
// Verify API WAS called (at least once - multiple queries are generated for matching)
_mockHttpHandler.Protected().Verify(
"SendAsync",
Times.Once(),
Times.AtLeastOnce(),
ItExpr.Is<HttpRequestMessage>(r => r.RequestUri!.ToString().Contains("search/companies")),
ItExpr.IsAny<CancellationToken>());
}
@@ -235,7 +289,7 @@ public class CompanyVerifierServiceTests : IDisposable
result.IsVerified.Should().BeFalse();
result.MatchScore.Should().Be(0);
result.MatchedCompanyName.Should().BeNull();
result.VerificationNotes.Should().Contain("No matching company");
result.VerificationNotes.Should().Contain("could not be verified");
}
[Fact]
@@ -525,20 +579,48 @@ public class CompanyVerifierServiceTests : IDisposable
private void SetupHttpResponse<T>(HttpStatusCode statusCode, T? content)
{
var response = new HttpResponseMessage(statusCode);
if (content != null)
{
response.Content = JsonContent.Create(content, options: JsonOptions);
}
// Return a fresh response for each call to avoid stream disposal issues
// when multiple API calls are made (e.g., multiple search queries)
// Also handle both search and company detail endpoints
_mockHttpHandler
.Protected()
.Setup<Task<HttpResponseMessage>>(
"SendAsync",
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>())
.ReturnsAsync(response);
.ReturnsAsync((HttpRequestMessage request, CancellationToken _) =>
{
var url = request.RequestUri?.ToString() ?? "";
var response = new HttpResponseMessage(statusCode);
// For search requests, return the search response
if (url.Contains("search/companies") && content != null)
{
response.Content = JsonContent.Create(content, options: JsonOptions);
}
// For company detail requests (e.g., /company/12345678), return a valid company response
else if (url.Contains("/company/") && !url.Contains("search"))
{
// Extract company number from URL
var companyNumber = url.Split("/company/").LastOrDefault()?.Split("/").FirstOrDefault()?.Split("?").FirstOrDefault() ?? "12345678";
// Return a minimal valid company response
var companyResponse = new
{
company_number = companyNumber,
company_name = "Test Company Ltd",
company_status = "active",
type = "ltd"
};
response.Content = JsonContent.Create(companyResponse, options: JsonOptions);
}
else if (content != null)
{
response.Content = JsonContent.Create(content, options: JsonOptions);
}
return response;
});
}
private static CompaniesHouseSearchResponseDto CreateSearchResponse(