using System.Text; using FluentAssertions; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Moq; using RealCV.Application.Models; using RealCV.Infrastructure.Configuration; using RealCV.Infrastructure.Services; namespace RealCV.Tests.Services; public sealed class CVParserServiceTests : IDisposable { private readonly Mock> _loggerMock; private readonly Mock> _settingsMock; private readonly CVParserService _sut; public CVParserServiceTests() { _loggerMock = new Mock>(); _settingsMock = new Mock>(); _settingsMock.Setup(x => x.Value).Returns(new AnthropicSettings { ApiKey = "test-api-key" }); _sut = new CVParserService(_settingsMock.Object, _loggerMock.Object); } public void Dispose() { // Clean up any resources if needed } #region File Extension Tests [Theory] [InlineData(".txt")] [InlineData(".doc")] [InlineData(".rtf")] [InlineData(".xml")] [InlineData(".html")] [InlineData("")] public async Task ParseAsync_WithUnsupportedFileExtension_ThrowsNotSupportedException(string extension) { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content")); var fileName = $"resume{extension}"; // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert await act.Should().ThrowAsync() .WithMessage($"*'{extension}'*not supported*"); } [Fact] public async Task ParseAsync_WithNoFileExtension_ThrowsNotSupportedException() { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content")); const string fileName = "resume"; // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert await act.Should().ThrowAsync() .WithMessage("*''*not supported*"); } [Theory] [InlineData("my resume.pdf")] [InlineData("John Doe CV (2024).pdf")] [InlineData("resume-final-v2.docx")] [InlineData("CV_John_Doe.DOCX")] public async Task ParseAsync_WithSpecialCharactersInFileName_HandlesExtensionCorrectly(string fileName) { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content")); // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert // Should attempt to parse (not throw NotSupportedException) // Will fail on content parsing, but extension should be recognized Exception? thrownException = null; try { await act(); } catch (Exception ex) { thrownException = ex; } thrownException.Should().NotBeNull(); thrownException.Should().NotBeOfType("because file extension should be recognized"); } [Fact] public async Task ParseAsync_WithMultipleDotsInFileName_UsesLastExtension() { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content")); const string fileName = "my.resume.final.pdf"; // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert // Should recognize .pdf as the extension (not .resume or .final) Exception? thrownException = null; try { await act(); } catch (Exception ex) { thrownException = ex; } thrownException.Should().NotBeNull(); thrownException.Should().NotBeOfType("because .pdf extension should be recognized"); } [Theory] [InlineData("resume.pdf")] [InlineData("resume.PDF")] [InlineData("resume.Pdf")] [InlineData("resume.pDf")] public async Task ParseAsync_WithPdfExtension_CaseInsensitive_AttemptsPdfParsing(string fileName) { // Arrange // Create minimal valid-looking stream (empty PDF will fail at parsing, but this tests extension detection) using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid pdf")); // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert // Should attempt PDF parsing and fail (not throw NotSupportedException) // PdfPig will throw when parsing invalid PDF content Exception? thrownException = null; try { await act(); } catch (Exception ex) { thrownException = ex; } thrownException.Should().NotBeNull("because the method should throw for invalid PDF content"); thrownException.Should().NotBeOfType("because PDF extension should be recognized"); } [Theory] [InlineData("resume.docx")] [InlineData("resume.DOCX")] [InlineData("resume.Docx")] [InlineData("resume.dOcX")] public async Task ParseAsync_WithDocxExtension_CaseInsensitive_AttemptsDocxParsing(string fileName) { // Arrange // Create minimal stream (invalid DOCX will fail at parsing, but this tests extension detection) using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid docx")); // Act var act = () => _sut.ParseAsync(stream, fileName); // Assert // Should attempt DOCX parsing and fail (not throw NotSupportedException) // OpenXml will throw when parsing invalid DOCX content Exception? thrownException = null; try { await act(); } catch (Exception ex) { thrownException = ex; } thrownException.Should().NotBeNull("because the method should throw for invalid DOCX content"); thrownException.Should().NotBeOfType("because DOCX extension should be recognized"); } #endregion #region Input Validation Tests [Fact] public async Task ParseAsync_WithNullStream_ThrowsArgumentNullException() { // Arrange Stream? stream = null; // Act var act = () => _sut.ParseAsync(stream!, "resume.pdf"); // Assert await act.Should().ThrowAsync() .WithParameterName("fileStream"); } [Theory] [InlineData(null)] [InlineData("")] [InlineData(" ")] public async Task ParseAsync_WithNullOrEmptyFileName_ThrowsArgumentException(string? fileName) { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test")); // Act var act = () => _sut.ParseAsync(stream, fileName!); // Assert await act.Should().ThrowAsync() .WithParameterName("fileName"); } #endregion #region Empty File Content Tests [Fact] public async Task ParseAsync_WithEmptyPdfContent_ThrowsInvalidOperationException() { // Arrange // A minimal valid PDF structure that contains no text var emptyPdfBytes = CreateMinimalPdfWithNoText(); using var stream = new MemoryStream(emptyPdfBytes); // Act var act = () => _sut.ParseAsync(stream, "empty.pdf"); // Assert await act.Should().ThrowAsync() .WithMessage("*Could not extract text content*"); } [Fact] public async Task ParseAsync_WithEmptyDocxContent_ThrowsInvalidOperationException() { // Arrange var emptyDocxBytes = CreateMinimalDocxWithNoText(); using var stream = new MemoryStream(emptyDocxBytes); // Act var act = () => _sut.ParseAsync(stream, "empty.docx"); // Assert await act.Should().ThrowAsync() .WithMessage("*Could not extract text content*"); } #endregion #region PDF Parsing Path Tests [Fact] public async Task ParseAsync_WithValidPdfExtension_TriggersPdfParsingPath() { // Arrange var pdfBytes = CreateMinimalPdfWithText("John Doe\njohn@example.com"); using var stream = new MemoryStream(pdfBytes); // Act & Assert // This will try to call Claude API which will fail, but we can verify // it got past the PDF parsing stage by checking logs or the exception type var act = () => _sut.ParseAsync(stream, "resume.pdf"); // The exception should NOT be NotSupportedException or InvalidOperationException about text extraction // It should be a Claude API error (HttpRequestException or similar) var exception = await act.Should().ThrowAsync(); // Verify we got past text extraction (would throw InvalidOperationException with "Could not extract") exception.Which.Should().NotBeOfType(); exception.Which.Message.Should().NotContain("Could not extract text content"); } #endregion #region DOCX Parsing Path Tests [Fact] public async Task ParseAsync_WithValidDocxExtension_TriggersDocxParsingPath() { // Arrange var docxBytes = CreateMinimalDocxWithText("Jane Smith\njane@example.com"); using var stream = new MemoryStream(docxBytes); // Act & Assert // This will try to call Claude API which will fail, but we can verify // it got past the DOCX parsing stage var act = () => _sut.ParseAsync(stream, "resume.docx"); // The exception should NOT be NotSupportedException or InvalidOperationException about text extraction var exception = await act.Should().ThrowAsync(); exception.Which.Should().NotBeOfType(); exception.Which.Message.Should().NotContain("Could not extract text content"); } #endregion #region Claude API Error Handling Tests [Fact] public async Task ParseAsync_WhenClaudeApiFailsWithInvalidKey_ThrowsHttpRequestException() { // Arrange // Using a test API key which will cause authentication failure var docxBytes = CreateMinimalDocxWithText("John Doe\njohn@example.com\nSenior Developer at Tech Corp"); using var stream = new MemoryStream(docxBytes); // Act var act = () => _sut.ParseAsync(stream, "resume.docx"); // Assert // The service should propagate the HTTP error from the Claude API // (401 Unauthorized for invalid API key) var exception = await act.Should().ThrowAsync(); // Should NOT be our domain exceptions (text extraction worked) exception.Which.Message.Should().NotContain("Could not extract text content"); exception.Which.Should().NotBeOfType(); } [Fact] public async Task ParseAsync_WithValidTextContent_AttemptsClaudeApiCall() { // Arrange var pdfBytes = CreateMinimalPdfWithText("Jane Smith\njane@company.com\n+1-555-0123"); using var stream = new MemoryStream(pdfBytes); // Act var act = () => _sut.ParseAsync(stream, "resume.pdf"); // Assert // With an invalid/test API key, we expect the API call to fail // but this confirms we successfully extracted text and attempted the API call var exception = await act.Should().ThrowAsync(); // Verify it got past text extraction phase exception.Which.Message.Should().NotContain("Could not extract text content"); // Verify logging occurred for sending to Claude API _loggerMock.Verify( x => x.Log( LogLevel.Debug, It.IsAny(), It.Is((v, t) => v.ToString()!.Contains("Sending CV text to Claude API")), It.IsAny(), It.IsAny>()), Times.Once); } #endregion #region Logging Tests [Fact] public async Task ParseAsync_LogsDebugMessageWithFileName() { // Arrange using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test")); const string fileName = "resume.txt"; // unsupported to fail early // Act try { await _sut.ParseAsync(stream, fileName); } catch (NotSupportedException) { // Expected } // Assert _loggerMock.Verify( x => x.Log( LogLevel.Debug, It.IsAny(), It.Is((v, t) => v.ToString()!.Contains("Parsing CV file")), It.IsAny(), It.IsAny>()), Times.Once); } #endregion #region Helper Methods /// /// Creates a minimal valid PDF with no text content. /// private static byte[] CreateMinimalPdfWithNoText() { // Minimal PDF 1.4 structure with empty page const string pdfContent = @"%PDF-1.4 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj 3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj xref 0 4 0000000000 65535 f 0000000009 00000 n 0000000058 00000 n 0000000115 00000 n trailer << /Size 4 /Root 1 0 R >> startxref 191 %%EOF"; return Encoding.ASCII.GetBytes(pdfContent); } /// /// Creates a minimal valid PDF with text content. /// private static byte[] CreateMinimalPdfWithText(string text) { // Escape special characters for PDF var escapedText = text.Replace("\\", "\\\\").Replace("(", "\\(").Replace(")", "\\)"); var pdfContent = $@"%PDF-1.4 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj 3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >> endobj 4 0 obj << /Length 100 >> stream BT /F1 12 Tf 100 700 Td ({escapedText}) Tj ET endstream endobj 5 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj xref 0 6 0000000000 65535 f 0000000009 00000 n 0000000058 00000 n 0000000115 00000 n 0000000238 00000 n 0000000388 00000 n trailer << /Size 6 /Root 1 0 R >> startxref 463 %%EOF"; return Encoding.ASCII.GetBytes(pdfContent); } /// /// Creates a minimal valid DOCX with no text content. /// private static byte[] CreateMinimalDocxWithNoText() { using var memoryStream = new MemoryStream(); using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true)) { // [Content_Types].xml var contentTypesEntry = archive.CreateEntry("[Content_Types].xml"); using (var writer = new StreamWriter(contentTypesEntry.Open())) { writer.Write(@" "); } // _rels/.rels var relsEntry = archive.CreateEntry("_rels/.rels"); using (var writer = new StreamWriter(relsEntry.Open())) { writer.Write(@" "); } // word/document.xml (empty body) var documentEntry = archive.CreateEntry("word/document.xml"); using (var writer = new StreamWriter(documentEntry.Open())) { writer.Write(@" "); } } return memoryStream.ToArray(); } /// /// Creates a minimal valid DOCX with text content. /// private static byte[] CreateMinimalDocxWithText(string text) { using var memoryStream = new MemoryStream(); using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true)) { // [Content_Types].xml var contentTypesEntry = archive.CreateEntry("[Content_Types].xml"); using (var writer = new StreamWriter(contentTypesEntry.Open())) { writer.Write(@" "); } // _rels/.rels var relsEntry = archive.CreateEntry("_rels/.rels"); using (var writer = new StreamWriter(relsEntry.Open())) { writer.Write(@" "); } // word/document.xml with text var escapedText = System.Security.SecurityElement.Escape(text); var documentEntry = archive.CreateEntry("word/document.xml"); using (var writer = new StreamWriter(documentEntry.Open())) { writer.Write($@" {escapedText} "); } } return memoryStream.ToArray(); } #endregion } /// /// Tests for CVData model mapping that can be tested in isolation. /// These tests verify the expected structure and constraints of the parsed CV data. /// /// /// Note: Integration tests for full Claude API response parsing require a valid Anthropic API key. /// These tests should be run separately with a configured test environment. /// To properly unit test Claude API response handling, consider refactoring CVParserService /// to accept an IAnthropicClient interface for dependency injection. /// public sealed class CVDataMappingTests { [Fact] public void CVData_RequiresFullName() { // Arrange & Act var cvData = new CVData { FullName = "John Doe" }; // Assert cvData.FullName.Should().Be("John Doe"); cvData.Email.Should().BeNull(); cvData.Phone.Should().BeNull(); cvData.Employment.Should().BeEmpty(); cvData.Education.Should().BeEmpty(); cvData.Skills.Should().BeEmpty(); } [Fact] public void CVData_WithAllFields_MapsCorrectly() { // Arrange & Act var cvData = new CVData { FullName = "Jane Smith", Email = "jane@example.com", Phone = "+1-555-123-4567", Employment = [ new EmploymentEntry { CompanyName = "Tech Corp", JobTitle = "Senior Developer", Location = "New York", StartDate = new DateOnly(2020, 1, 1), EndDate = null, IsCurrent = true, Description = "Leading development team" } ], Education = [ new EducationEntry { Institution = "MIT", Qualification = "BSc", Subject = "Computer Science", Grade = "3.9 GPA", StartDate = new DateOnly(2012, 9, 1), EndDate = new DateOnly(2016, 5, 15) } ], Skills = ["C#", ".NET", "Azure", "SQL"] }; // Assert cvData.FullName.Should().Be("Jane Smith"); cvData.Email.Should().Be("jane@example.com"); cvData.Phone.Should().Be("+1-555-123-4567"); cvData.Employment.Should().HaveCount(1); cvData.Employment[0].CompanyName.Should().Be("Tech Corp"); cvData.Employment[0].IsCurrent.Should().BeTrue(); cvData.Employment[0].EndDate.Should().BeNull(); cvData.Education.Should().HaveCount(1); cvData.Education[0].Institution.Should().Be("MIT"); cvData.Education[0].Qualification.Should().Be("BSc"); cvData.Skills.Should().HaveCount(4); cvData.Skills.Should().Contain("C#"); } [Fact] public void EmploymentEntry_RequiresCompanyNameAndJobTitle() { // Arrange & Act var entry = new EmploymentEntry { CompanyName = "Company", JobTitle = "Developer" }; // Assert entry.CompanyName.Should().Be("Company"); entry.JobTitle.Should().Be("Developer"); entry.Location.Should().BeNull(); entry.StartDate.Should().BeNull(); entry.EndDate.Should().BeNull(); entry.IsCurrent.Should().BeFalse(); entry.Description.Should().BeNull(); } [Fact] public void EducationEntry_RequiresInstitution() { // Arrange & Act var entry = new EducationEntry { Institution = "University" }; // Assert entry.Institution.Should().Be("University"); entry.Qualification.Should().BeNull(); entry.Subject.Should().BeNull(); entry.Grade.Should().BeNull(); entry.StartDate.Should().BeNull(); entry.EndDate.Should().BeNull(); } }