using FluentAssertions; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using RealCV.Infrastructure.Configuration; using RealCV.Infrastructure.Services; namespace RealCV.Tests.Services; /// /// Tests for the rule-based compound company name detection. /// public sealed class CompoundNameDetectionTests { private readonly AICompanyNameMatcherService _sut; public CompoundNameDetectionTests() { var settings = Options.Create(new AnthropicSettings { ApiKey = "test-key" }); _sut = new AICompanyNameMatcherService(settings, NullLogger.Instance); } #region Known Single Companies (should NOT be split) [Theory] [InlineData("Ernst & Young")] [InlineData("Ernst and Young")] [InlineData("Marks & Spencer")] [InlineData("Marks and Spencer")] [InlineData("Procter & Gamble")] [InlineData("Johnson & Johnson")] [InlineData("Deloitte and Touche")] [InlineData("Allen & Overy")] [InlineData("Slaughter and May")] [InlineData("Holland & Barrett")] [InlineData("Smith & Nephew")] [InlineData("AT&T")] [InlineData("M&S")] public async Task ExtractCompanyNamesAsync_KnownSingleCompany_ReturnsNull(string companyName) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull($"'{companyName}' is a known single company and should not be split"); } [Theory] [InlineData("Ernst & Young LLP")] [InlineData("Marks & Spencer PLC")] [InlineData("Procter & Gamble UK")] [InlineData("Johnson & Johnson Medical")] public async Task ExtractCompanyNamesAsync_KnownSingleCompanyWithSuffix_ReturnsNull(string companyName) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull($"'{companyName}' contains a known single company and should not be split"); } #endregion #region Department/Division Patterns (should NOT be split) [Theory] [InlineData("Tesco Stores and Distribution")] [InlineData("BMW UK and Ireland")] [InlineData("Google Europe and Middle East")] [InlineData("Sales and Marketing")] [InlineData("Research and Development")] [InlineData("Finance and Operations")] public async Task ExtractCompanyNamesAsync_DepartmentPattern_ReturnsNull(string companyName) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull($"'{companyName}' looks like departments/divisions and should not be split"); } #endregion #region Compound Names with Slash (SHOULD be split) [Theory] [InlineData("ASDA/WALMART", new[] { "ASDA", "WALMART" })] [InlineData("BBC/ITV", new[] { "BBC", "ITV" })] [InlineData("Tesco/Sainsbury's", new[] { "Tesco", "Sainsbury's" })] [InlineData("Microsoft/Google", new[] { "Microsoft", "Google" })] public async Task ExtractCompanyNamesAsync_SlashSeparated_ReturnsParts(string companyName, string[] expectedParts) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().NotBeNull($"'{companyName}' contains '/' and should be split"); result.Should().BeEquivalentTo(expectedParts); } #endregion #region Compound Names with And/Ampersand [Theory] [InlineData("Acme Ltd & Beta Ltd", new[] { "Acme Ltd", "Beta Ltd" })] public async Task ExtractCompanyNamesAsync_BothPartsHaveCompanySuffix_ReturnsParts(string companyName, string[] expectedParts) { // When both parts clearly have company suffixes (Ltd, PLC, etc.), split them // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().NotBeNull($"'{companyName}' has company suffixes on both parts"); result.Should().BeEquivalentTo(expectedParts); } [Theory] [InlineData("Corus & Laura Ashley Hotels")] // Ambiguous - neither has company suffix [InlineData("Smith & Jones Consulting")] // Could be a single partnership [InlineData("Acme PLC and Beta PLC")] // Matches " plc and " department pattern public async Task ExtractCompanyNamesAsync_AmbiguousWithAnd_ReturnsNull(string companyName) { // Rule-based system is conservative with ambiguous & and "and" cases // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull($"'{companyName}' is ambiguous and should not be split"); } #endregion #region Edge Cases [Theory] [InlineData("")] [InlineData(" ")] [InlineData(null)] public async Task ExtractCompanyNamesAsync_EmptyOrNull_ReturnsNull(string? companyName) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName!); // Assert result.Should().BeNull(); } [Theory] [InlineData("Microsoft")] [InlineData("Google")] [InlineData("Amazon")] [InlineData("Apple Inc")] [InlineData("Tesco PLC")] public async Task ExtractCompanyNamesAsync_SimpleCompanyName_ReturnsNull(string companyName) { // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull($"'{companyName}' is a simple company name and should not be split"); } [Fact] public async Task ExtractCompanyNamesAsync_ShortParts_ReturnsNull() { // Arrange - Parts too short to be valid company names var companyName = "A & B"; // Act var result = await _sut.ExtractCompanyNamesAsync(companyName); // Assert result.Should().BeNull("parts are too short to be valid company names"); } #endregion }