RealCV/tests/TrueCV.Tests/Services/CVParserServiceTests.cs

using System.Text;
using FluentAssertions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Moq;
using TrueCV.Application.Models;
using TrueCV.Infrastructure.Configuration;
using TrueCV.Infrastructure.Services;

namespace TrueCV.Tests.Services;

public sealed class CVParserServiceTests : IDisposable
{
    private readonly Mock<ILogger<CVParserService>> _loggerMock;
    private readonly Mock<IOptions<AnthropicSettings>> _settingsMock;
    private readonly CVParserService _sut;

    public CVParserServiceTests()
    {
        _loggerMock = new Mock<ILogger<CVParserService>>();
        _settingsMock = new Mock<IOptions<AnthropicSettings>>();
        _settingsMock.Setup(x => x.Value).Returns(new AnthropicSettings { ApiKey = "test-api-key" });

        _sut = new CVParserService(_settingsMock.Object, _loggerMock.Object);
    }

    public void Dispose()
    {
        // Clean up any resources if needed
    }

    #region File Extension Tests

    [Theory]
    [InlineData(".txt")]
    [InlineData(".doc")]
    [InlineData(".rtf")]
    [InlineData(".xml")]
    [InlineData(".html")]
    [InlineData("")]
    public async Task ParseAsync_WithUnsupportedFileExtension_ThrowsNotSupportedException(string extension)
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content"));
        var fileName = $"resume{extension}";

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        await act.Should().ThrowAsync<NotSupportedException>()
            .WithMessage($"*'{extension}'*not supported*");
    }

    [Fact]
    public async Task ParseAsync_WithNoFileExtension_ThrowsNotSupportedException()
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content"));
        const string fileName = "resume";

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        await act.Should().ThrowAsync<NotSupportedException>()
            .WithMessage("*''*not supported*");
    }

    [Theory]
    [InlineData("my resume.pdf")]
    [InlineData("John Doe CV (2024).pdf")]
    [InlineData("resume-final-v2.docx")]
    [InlineData("CV_John_Doe.DOCX")]
    public async Task ParseAsync_WithSpecialCharactersInFileName_HandlesExtensionCorrectly(string fileName)
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content"));

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        // Should attempt to parse (not throw NotSupportedException)
        // Will fail on content parsing, but extension should be recognized
        Exception? thrownException = null;
        try
        {
            await act();
        }
        catch (Exception ex)
        {
            thrownException = ex;
        }

        thrownException.Should().NotBeNull();
        thrownException.Should().NotBeOfType<NotSupportedException>("because file extension should be recognized");
    }

    [Fact]
    public async Task ParseAsync_WithMultipleDotsInFileName_UsesLastExtension()
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content"));
        const string fileName = "my.resume.final.pdf";

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        // Should recognize .pdf as the extension (not .resume or .final)
        Exception? thrownException = null;
        try
        {
            await act();
        }
        catch (Exception ex)
        {
            thrownException = ex;
        }

        thrownException.Should().NotBeNull();
        thrownException.Should().NotBeOfType<NotSupportedException>("because .pdf extension should be recognized");
    }

    [Theory]
    [InlineData("resume.pdf")]
    [InlineData("resume.PDF")]
    [InlineData("resume.Pdf")]
    [InlineData("resume.pDf")]
    public async Task ParseAsync_WithPdfExtension_CaseInsensitive_AttemptsPdfParsing(string fileName)
    {
        // Arrange
        // Create minimal valid-looking stream (empty PDF will fail at parsing, but this tests extension detection)
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid pdf"));

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        // Should attempt PDF parsing and fail (not throw NotSupportedException)
        // PdfPig will throw when parsing invalid PDF content
        Exception? thrownException = null;
        try
        {
            await act();
        }
        catch (Exception ex)
        {
            thrownException = ex;
        }

        thrownException.Should().NotBeNull("because the method should throw for invalid PDF content");
        thrownException.Should().NotBeOfType<NotSupportedException>("because PDF extension should be recognized");
    }

    [Theory]
    [InlineData("resume.docx")]
    [InlineData("resume.DOCX")]
    [InlineData("resume.Docx")]
    [InlineData("resume.dOcX")]
    public async Task ParseAsync_WithDocxExtension_CaseInsensitive_AttemptsDocxParsing(string fileName)
    {
        // Arrange
        // Create minimal stream (invalid DOCX will fail at parsing, but this tests extension detection)
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid docx"));

        // Act
        var act = () => _sut.ParseAsync(stream, fileName);

        // Assert
        // Should attempt DOCX parsing and fail (not throw NotSupportedException)
        // OpenXml will throw when parsing invalid DOCX content
        Exception? thrownException = null;
        try
        {
            await act();
        }
        catch (Exception ex)
        {
            thrownException = ex;
        }

        thrownException.Should().NotBeNull("because the method should throw for invalid DOCX content");
        thrownException.Should().NotBeOfType<NotSupportedException>("because DOCX extension should be recognized");
    }

    #endregion

    #region Input Validation Tests

    [Fact]
    public async Task ParseAsync_WithNullStream_ThrowsArgumentNullException()
    {
        // Arrange
        Stream? stream = null;

        // Act
        var act = () => _sut.ParseAsync(stream!, "resume.pdf");

        // Assert
        await act.Should().ThrowAsync<ArgumentNullException>()
            .WithParameterName("fileStream");
    }

    [Theory]
    [InlineData(null)]
    [InlineData("")]
    [InlineData("   ")]
    public async Task ParseAsync_WithNullOrEmptyFileName_ThrowsArgumentException(string? fileName)
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test"));

        // Act
        var act = () => _sut.ParseAsync(stream, fileName!);

        // Assert
        await act.Should().ThrowAsync<ArgumentException>()
            .WithParameterName("fileName");
    }

    #endregion

    #region Empty File Content Tests

    [Fact]
    public async Task ParseAsync_WithEmptyPdfContent_ThrowsInvalidOperationException()
    {
        // Arrange
        // A minimal valid PDF structure that contains no text
        var emptyPdfBytes = CreateMinimalPdfWithNoText();
        using var stream = new MemoryStream(emptyPdfBytes);

        // Act
        var act = () => _sut.ParseAsync(stream, "empty.pdf");

        // Assert
        await act.Should().ThrowAsync<InvalidOperationException>()
            .WithMessage("*Could not extract text content*");
    }

    [Fact]
    public async Task ParseAsync_WithEmptyDocxContent_ThrowsInvalidOperationException()
    {
        // Arrange
        var emptyDocxBytes = CreateMinimalDocxWithNoText();
        using var stream = new MemoryStream(emptyDocxBytes);

        // Act
        var act = () => _sut.ParseAsync(stream, "empty.docx");

        // Assert
        await act.Should().ThrowAsync<InvalidOperationException>()
            .WithMessage("*Could not extract text content*");
    }

    #endregion

    #region PDF Parsing Path Tests

    [Fact]
    public async Task ParseAsync_WithValidPdfExtension_TriggersPdfParsingPath()
    {
        // Arrange
        var pdfBytes = CreateMinimalPdfWithText("John Doe\njohn@example.com");
        using var stream = new MemoryStream(pdfBytes);

        // Act & Assert
        // This will try to call Claude API which will fail, but we can verify
        // it got past the PDF parsing stage by checking logs or the exception type
        var act = () => _sut.ParseAsync(stream, "resume.pdf");

        // The exception should NOT be NotSupportedException or InvalidOperationException about text extraction
        // It should be a Claude API error (HttpRequestException or similar)
        var exception = await act.Should().ThrowAsync<Exception>();

        // Verify we got past text extraction (would throw InvalidOperationException with "Could not extract")
        exception.Which.Should().NotBeOfType<NotSupportedException>();
        exception.Which.Message.Should().NotContain("Could not extract text content");
    }

    #endregion

    #region DOCX Parsing Path Tests

    [Fact]
    public async Task ParseAsync_WithValidDocxExtension_TriggersDocxParsingPath()
    {
        // Arrange
        var docxBytes = CreateMinimalDocxWithText("Jane Smith\njane@example.com");
        using var stream = new MemoryStream(docxBytes);

        // Act & Assert
        // This will try to call Claude API which will fail, but we can verify
        // it got past the DOCX parsing stage
        var act = () => _sut.ParseAsync(stream, "resume.docx");

        // The exception should NOT be NotSupportedException or InvalidOperationException about text extraction
        var exception = await act.Should().ThrowAsync<Exception>();

        exception.Which.Should().NotBeOfType<NotSupportedException>();
        exception.Which.Message.Should().NotContain("Could not extract text content");
    }

    #endregion

    #region Claude API Error Handling Tests

    [Fact]
    public async Task ParseAsync_WhenClaudeApiFailsWithInvalidKey_ThrowsHttpRequestException()
    {
        // Arrange
        // Using a test API key which will cause authentication failure
        var docxBytes = CreateMinimalDocxWithText("John Doe\njohn@example.com\nSenior Developer at Tech Corp");
        using var stream = new MemoryStream(docxBytes);

        // Act
        var act = () => _sut.ParseAsync(stream, "resume.docx");

        // Assert
        // The service should propagate the HTTP error from the Claude API
        // (401 Unauthorized for invalid API key)
        var exception = await act.Should().ThrowAsync<Exception>();

        // Should NOT be our domain exceptions (text extraction worked)
        exception.Which.Message.Should().NotContain("Could not extract text content");
        exception.Which.Should().NotBeOfType<NotSupportedException>();
    }

    [Fact]
    public async Task ParseAsync_WithValidTextContent_AttemptsClaudeApiCall()
    {
        // Arrange
        var pdfBytes = CreateMinimalPdfWithText("Jane Smith\njane@company.com\n+1-555-0123");
        using var stream = new MemoryStream(pdfBytes);

        // Act
        var act = () => _sut.ParseAsync(stream, "resume.pdf");

        // Assert
        // With an invalid/test API key, we expect the API call to fail
        // but this confirms we successfully extracted text and attempted the API call
        var exception = await act.Should().ThrowAsync<Exception>();

        // Verify it got past text extraction phase
        exception.Which.Message.Should().NotContain("Could not extract text content");

        // Verify logging occurred for sending to Claude API
        _loggerMock.Verify(
            x => x.Log(
                LogLevel.Debug,
                It.IsAny<EventId>(),
                It.Is<It.IsAnyType>((v, t) => v.ToString()!.Contains("Sending CV text to Claude API")),
                It.IsAny<Exception?>(),
                It.IsAny<Func<It.IsAnyType, Exception?, string>>()),
            Times.Once);
    }

    #endregion

    #region Logging Tests

    [Fact]
    public async Task ParseAsync_LogsDebugMessageWithFileName()
    {
        // Arrange
        using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test"));
        const string fileName = "resume.txt"; // unsupported to fail early

        // Act
        try
        {
            await _sut.ParseAsync(stream, fileName);
        }
        catch (NotSupportedException)
        {
            // Expected
        }

        // Assert
        _loggerMock.Verify(
            x => x.Log(
                LogLevel.Debug,
                It.IsAny<EventId>(),
                It.Is<It.IsAnyType>((v, t) => v.ToString()!.Contains("Parsing CV file")),
                It.IsAny<Exception?>(),
                It.IsAny<Func<It.IsAnyType, Exception?, string>>()),
            Times.Once);
    }

    #endregion

    #region Helper Methods

    /// <summary>
    /// Creates a minimal valid PDF with no text content.
    /// </summary>
    private static byte[] CreateMinimalPdfWithNoText()
    {
        // Minimal PDF 1.4 structure with empty page
        const string pdfContent = @"%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>
endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer
<< /Size 4 /Root 1 0 R >>
startxref
191
%%EOF";
        return Encoding.ASCII.GetBytes(pdfContent);
    }

    /// <summary>
    /// Creates a minimal valid PDF with text content.
    /// </summary>
    private static byte[] CreateMinimalPdfWithText(string text)
    {
        // Escape special characters for PDF
        var escapedText = text.Replace("\\", "\\\\").Replace("(", "\\(").Replace(")", "\\)");

        var pdfContent = $@"%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
endobj
4 0 obj
<< /Length 100 >>
stream
BT
/F1 12 Tf
100 700 Td
({escapedText}) Tj
ET
endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000238 00000 n
0000000388 00000 n
trailer
<< /Size 6 /Root 1 0 R >>
startxref
463
%%EOF";
        return Encoding.ASCII.GetBytes(pdfContent);
    }

    /// <summary>
    /// Creates a minimal valid DOCX with no text content.
    /// </summary>
    private static byte[] CreateMinimalDocxWithNoText()
    {
        using var memoryStream = new MemoryStream();
        using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true))
        {
            // [Content_Types].xml
            var contentTypesEntry = archive.CreateEntry("[Content_Types].xml");
            using (var writer = new StreamWriter(contentTypesEntry.Open()))
            {
                writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Types xmlns=""http://schemas.openxmlformats.org/package/2006/content-types"">
  <Default Extension=""rels"" ContentType=""application/vnd.openxmlformats-package.relationships+xml""/>
  <Default Extension=""xml"" ContentType=""application/xml""/>
  <Override PartName=""/word/document.xml"" ContentType=""application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml""/>
</Types>");
            }

            // _rels/.rels
            var relsEntry = archive.CreateEntry("_rels/.rels");
            using (var writer = new StreamWriter(relsEntry.Open()))
            {
                writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
  <Relationship Id=""rId1"" Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"" Target=""word/document.xml""/>
</Relationships>");
            }

            // word/document.xml (empty body)
            var documentEntry = archive.CreateEntry("word/document.xml");
            using (var writer = new StreamWriter(documentEntry.Open()))
            {
                writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  <w:body>
  </w:body>
</w:document>");
            }
        }

        return memoryStream.ToArray();
    }

    /// <summary>
    /// Creates a minimal valid DOCX with text content.
    /// </summary>
    private static byte[] CreateMinimalDocxWithText(string text)
    {
        using var memoryStream = new MemoryStream();
        using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true))
        {
            // [Content_Types].xml
            var contentTypesEntry = archive.CreateEntry("[Content_Types].xml");
            using (var writer = new StreamWriter(contentTypesEntry.Open()))
            {
                writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Types xmlns=""http://schemas.openxmlformats.org/package/2006/content-types"">
  <Default Extension=""rels"" ContentType=""application/vnd.openxmlformats-package.relationships+xml""/>
  <Default Extension=""xml"" ContentType=""application/xml""/>
  <Override PartName=""/word/document.xml"" ContentType=""application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml""/>
</Types>");
            }

            // _rels/.rels
            var relsEntry = archive.CreateEntry("_rels/.rels");
            using (var writer = new StreamWriter(relsEntry.Open()))
            {
                writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
  <Relationship Id=""rId1"" Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"" Target=""word/document.xml""/>
</Relationships>");
            }

            // word/document.xml with text
            var escapedText = System.Security.SecurityElement.Escape(text);
            var documentEntry = archive.CreateEntry("word/document.xml");
            using (var writer = new StreamWriter(documentEntry.Open()))
            {
                writer.Write($@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
  <w:body>
    <w:p>
      <w:r>
        <w:t>{escapedText}</w:t>
      </w:r>
    </w:p>
  </w:body>
</w:document>");
            }
        }

        return memoryStream.ToArray();
    }

    #endregion
}

/// <summary>
/// Tests for CVData model mapping that can be tested in isolation.
/// These tests verify the expected structure and constraints of the parsed CV data.
/// </summary>
/// <remarks>
/// Note: Integration tests for full Claude API response parsing require a valid Anthropic API key.
/// These tests should be run separately with a configured test environment.
/// To properly unit test Claude API response handling, consider refactoring CVParserService
/// to accept an IAnthropicClient interface for dependency injection.
/// </remarks>
public sealed class CVDataMappingTests
{
    [Fact]
    public void CVData_RequiresFullName()
    {
        // Arrange & Act
        var cvData = new CVData
        {
            FullName = "John Doe"
        };

        // Assert
        cvData.FullName.Should().Be("John Doe");
        cvData.Email.Should().BeNull();
        cvData.Phone.Should().BeNull();
        cvData.Employment.Should().BeEmpty();
        cvData.Education.Should().BeEmpty();
        cvData.Skills.Should().BeEmpty();
    }

    [Fact]
    public void CVData_WithAllFields_MapsCorrectly()
    {
        // Arrange & Act
        var cvData = new CVData
        {
            FullName = "Jane Smith",
            Email = "jane@example.com",
            Phone = "+1-555-123-4567",
            Employment =
            [
                new EmploymentEntry
                {
                    CompanyName = "Tech Corp",
                    JobTitle = "Senior Developer",
                    Location = "New York",
                    StartDate = new DateOnly(2020, 1, 1),
                    EndDate = null,
                    IsCurrent = true,
                    Description = "Leading development team"
                }
            ],
            Education =
            [
                new EducationEntry
                {
                    Institution = "MIT",
                    Qualification = "BSc",
                    Subject = "Computer Science",
                    Grade = "3.9 GPA",
                    StartDate = new DateOnly(2012, 9, 1),
                    EndDate = new DateOnly(2016, 5, 15)
                }
            ],
            Skills = ["C#", ".NET", "Azure", "SQL"]
        };

        // Assert
        cvData.FullName.Should().Be("Jane Smith");
        cvData.Email.Should().Be("jane@example.com");
        cvData.Phone.Should().Be("+1-555-123-4567");

        cvData.Employment.Should().HaveCount(1);
        cvData.Employment[0].CompanyName.Should().Be("Tech Corp");
        cvData.Employment[0].IsCurrent.Should().BeTrue();
        cvData.Employment[0].EndDate.Should().BeNull();

        cvData.Education.Should().HaveCount(1);
        cvData.Education[0].Institution.Should().Be("MIT");
        cvData.Education[0].Qualification.Should().Be("BSc");

        cvData.Skills.Should().HaveCount(4);
        cvData.Skills.Should().Contain("C#");
    }

    [Fact]
    public void EmploymentEntry_RequiresCompanyNameAndJobTitle()
    {
        // Arrange & Act
        var entry = new EmploymentEntry
        {
            CompanyName = "Company",
            JobTitle = "Developer"
        };

        // Assert
        entry.CompanyName.Should().Be("Company");
        entry.JobTitle.Should().Be("Developer");
        entry.Location.Should().BeNull();
        entry.StartDate.Should().BeNull();
        entry.EndDate.Should().BeNull();
        entry.IsCurrent.Should().BeFalse();
        entry.Description.Should().BeNull();
    }

    [Fact]
    public void EducationEntry_RequiresInstitution()
    {
        // Arrange & Act
        var entry = new EducationEntry
        {
            Institution = "University"
        };

        // Assert
        entry.Institution.Should().Be("University");
        entry.Qualification.Should().BeNull();
        entry.Subject.Should().BeNull();
        entry.Grade.Should().BeNull();
        entry.StartDate.Should().BeNull();
        entry.EndDate.Should().BeNull();
    }
}