Add AI-powered company name matching using Claude API
Replace fuzzy string matching with semantic AI matching to fix false positives where similar-sounding but different companies were matched (e.g., "Families First CiC" incorrectly matching "FAMILIES AGAINST CONFORMITY LTD"). Changes: - Add ICompanyNameMatcherService interface and AICompanyNameMatcherService implementation using Claude Sonnet 4 for semantic company name comparison - Add SemanticMatchResult and related models for AI match results - Update CompanyVerifierService to use AI matching with fuzzy fallback - Add detection for public sector employers, charities, and self-employed entries that cannot be verified via Companies House - Update tests to work with new AI matcher integration Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,8 @@ using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Moq;
|
||||
using Moq.Protected;
|
||||
using TrueCV.Application.Interfaces;
|
||||
using TrueCV.Application.Models;
|
||||
using TrueCV.Domain.Entities;
|
||||
using TrueCV.Infrastructure.Configuration;
|
||||
using TrueCV.Infrastructure.Data;
|
||||
@@ -22,6 +24,7 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
private readonly Mock<HttpMessageHandler> _mockHttpHandler;
|
||||
private readonly Mock<ILogger<CompanyVerifierService>> _mockServiceLogger;
|
||||
private readonly Mock<ILogger<CompaniesHouseClient>> _mockClientLogger;
|
||||
private readonly Mock<ICompanyNameMatcherService> _mockAiMatcher;
|
||||
private readonly ApplicationDbContext _dbContext;
|
||||
private readonly CompanyVerifierService _sut;
|
||||
private readonly HttpClient _httpClient;
|
||||
@@ -39,6 +42,7 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
_mockHttpHandler = new Mock<HttpMessageHandler>();
|
||||
_mockServiceLogger = new Mock<ILogger<CompanyVerifierService>>();
|
||||
_mockClientLogger = new Mock<ILogger<CompaniesHouseClient>>();
|
||||
_mockAiMatcher = new Mock<ICompanyNameMatcherService>();
|
||||
|
||||
_httpClient = new HttpClient(_mockHttpHandler.Object);
|
||||
|
||||
@@ -68,7 +72,57 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
mockFactory.Setup(f => f.CreateDbContextAsync(It.IsAny<CancellationToken>()))
|
||||
.ReturnsAsync(() => new ApplicationDbContext(_dbOptions));
|
||||
|
||||
_sut = new CompanyVerifierService(client, mockFactory.Object, _mockServiceLogger.Object);
|
||||
// Setup AI matcher to return matching results for exact company name matches
|
||||
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
|
||||
It.IsAny<string>(),
|
||||
It.IsAny<List<CompanyCandidate>>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
|
||||
{
|
||||
// Find exact or close match in candidates
|
||||
var exactMatch = candidates.FirstOrDefault(c =>
|
||||
c.CompanyName.Equals(cvCompanyName, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (exactMatch != null)
|
||||
{
|
||||
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
|
||||
{
|
||||
CandidateCompanyName = exactMatch.CompanyName,
|
||||
CandidateCompanyNumber = exactMatch.CompanyNumber,
|
||||
ConfidenceScore = 100,
|
||||
MatchType = "Exact",
|
||||
Reasoning = "Exact name match"
|
||||
});
|
||||
}
|
||||
|
||||
// Try fuzzy match for close names (e.g., with/without Ltd)
|
||||
var fuzzyMatch = candidates.FirstOrDefault(c =>
|
||||
c.CompanyName.Contains(cvCompanyName, StringComparison.OrdinalIgnoreCase) ||
|
||||
cvCompanyName.Contains(c.CompanyName.Replace(" Ltd", "").Replace(" Limited", ""), StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (fuzzyMatch != null)
|
||||
{
|
||||
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
|
||||
{
|
||||
CandidateCompanyName = fuzzyMatch.CompanyName,
|
||||
CandidateCompanyNumber = fuzzyMatch.CompanyNumber,
|
||||
ConfidenceScore = 85,
|
||||
MatchType = "TradingName",
|
||||
Reasoning = "Similar name match"
|
||||
});
|
||||
}
|
||||
|
||||
return Task.FromResult<SemanticMatchResult?>(new SemanticMatchResult
|
||||
{
|
||||
CandidateCompanyName = "No match",
|
||||
CandidateCompanyNumber = "NONE",
|
||||
ConfidenceScore = 0,
|
||||
MatchType = "NoMatch",
|
||||
Reasoning = "No matching company found"
|
||||
});
|
||||
});
|
||||
|
||||
_sut = new CompanyVerifierService(client, mockFactory.Object, _mockAiMatcher.Object, _mockServiceLogger.Object);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
@@ -146,7 +200,7 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
result.IsVerified.Should().BeFalse();
|
||||
result.MatchScore.Should().Be(0);
|
||||
result.MatchedCompanyName.Should().BeNull();
|
||||
result.VerificationNotes.Should().Contain("70%");
|
||||
result.VerificationNotes.Should().Contain("could not be verified");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -171,7 +225,7 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
// Assert
|
||||
result.IsVerified.Should().BeTrue();
|
||||
result.MatchedCompanyNumber.Should().Be("99999999");
|
||||
result.VerificationNotes.Should().Contain("cache");
|
||||
result.VerificationNotes.Should().BeNull(); // Cached results have no specific notes
|
||||
|
||||
// Verify API was NOT called (no HTTP setup means it would fail if called)
|
||||
_mockHttpHandler.Protected().Verify(
|
||||
@@ -211,10 +265,10 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
result.IsVerified.Should().BeTrue();
|
||||
result.MatchedCompanyNumber.Should().Be("12345678"); // From API, not cache
|
||||
|
||||
// Verify API WAS called
|
||||
// Verify API WAS called (at least once - multiple queries are generated for matching)
|
||||
_mockHttpHandler.Protected().Verify(
|
||||
"SendAsync",
|
||||
Times.Once(),
|
||||
Times.AtLeastOnce(),
|
||||
ItExpr.Is<HttpRequestMessage>(r => r.RequestUri!.ToString().Contains("search/companies")),
|
||||
ItExpr.IsAny<CancellationToken>());
|
||||
}
|
||||
@@ -235,7 +289,7 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
result.IsVerified.Should().BeFalse();
|
||||
result.MatchScore.Should().Be(0);
|
||||
result.MatchedCompanyName.Should().BeNull();
|
||||
result.VerificationNotes.Should().Contain("No matching company");
|
||||
result.VerificationNotes.Should().Contain("could not be verified");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -525,20 +579,48 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
|
||||
private void SetupHttpResponse<T>(HttpStatusCode statusCode, T? content)
|
||||
{
|
||||
var response = new HttpResponseMessage(statusCode);
|
||||
|
||||
if (content != null)
|
||||
{
|
||||
response.Content = JsonContent.Create(content, options: JsonOptions);
|
||||
}
|
||||
|
||||
// Return a fresh response for each call to avoid stream disposal issues
|
||||
// when multiple API calls are made (e.g., multiple search queries)
|
||||
// Also handle both search and company detail endpoints
|
||||
_mockHttpHandler
|
||||
.Protected()
|
||||
.Setup<Task<HttpResponseMessage>>(
|
||||
"SendAsync",
|
||||
ItExpr.IsAny<HttpRequestMessage>(),
|
||||
ItExpr.IsAny<CancellationToken>())
|
||||
.ReturnsAsync(response);
|
||||
.ReturnsAsync((HttpRequestMessage request, CancellationToken _) =>
|
||||
{
|
||||
var url = request.RequestUri?.ToString() ?? "";
|
||||
var response = new HttpResponseMessage(statusCode);
|
||||
|
||||
// For search requests, return the search response
|
||||
if (url.Contains("search/companies") && content != null)
|
||||
{
|
||||
response.Content = JsonContent.Create(content, options: JsonOptions);
|
||||
}
|
||||
// For company detail requests (e.g., /company/12345678), return a valid company response
|
||||
else if (url.Contains("/company/") && !url.Contains("search"))
|
||||
{
|
||||
// Extract company number from URL
|
||||
var companyNumber = url.Split("/company/").LastOrDefault()?.Split("/").FirstOrDefault()?.Split("?").FirstOrDefault() ?? "12345678";
|
||||
|
||||
// Return a minimal valid company response
|
||||
var companyResponse = new
|
||||
{
|
||||
company_number = companyNumber,
|
||||
company_name = "Test Company Ltd",
|
||||
company_status = "active",
|
||||
type = "ltd"
|
||||
};
|
||||
response.Content = JsonContent.Create(companyResponse, options: JsonOptions);
|
||||
}
|
||||
else if (content != null)
|
||||
{
|
||||
response.Content = JsonContent.Create(content, options: JsonOptions);
|
||||
}
|
||||
|
||||
return response;
|
||||
});
|
||||
}
|
||||
|
||||
private static CompaniesHouseSearchResponseDto CreateSearchResponse(
|
||||
|
||||
Reference in New Issue
Block a user