Files
RealCV/tests/TrueCV.Tests/Services/CVParserServiceTests.cs
peter 89d1f7e33b Add comprehensive unit test suite
Test project with 143 tests covering:
- TimelineAnalyserService (27 tests): gap/overlap detection, edge cases
- CVParserService (35 tests): file parsing, extension handling, API calls
- CompanyVerifierService (23 tests): verification, caching, fuzzy matching
- CVCheckService (24 tests): CRUD operations, file upload, job queuing
- ProcessCVCheckJob (34 tests): full workflow, scoring algorithm, flags

Uses xUnit, Moq, FluentAssertions, EF Core InMemory

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:45:07 +01:00

695 lines
22 KiB
C#

using System.Text;
using FluentAssertions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Moq;
using TrueCV.Application.Models;
using TrueCV.Infrastructure.Configuration;
using TrueCV.Infrastructure.Services;
namespace TrueCV.Tests.Services;
public sealed class CVParserServiceTests : IDisposable
{
private readonly Mock<ILogger<CVParserService>> _loggerMock;
private readonly Mock<IOptions<AnthropicSettings>> _settingsMock;
private readonly CVParserService _sut;
public CVParserServiceTests()
{
_loggerMock = new Mock<ILogger<CVParserService>>();
_settingsMock = new Mock<IOptions<AnthropicSettings>>();
_settingsMock.Setup(x => x.Value).Returns(new AnthropicSettings { ApiKey = "test-api-key" });
_sut = new CVParserService(_settingsMock.Object, _loggerMock.Object);
}
public void Dispose()
{
// Clean up any resources if needed
}
#region File Extension Tests
[Theory]
[InlineData(".txt")]
[InlineData(".doc")]
[InlineData(".rtf")]
[InlineData(".xml")]
[InlineData(".html")]
[InlineData("")]
public async Task ParseAsync_WithUnsupportedFileExtension_ThrowsNotSupportedException(string extension)
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content"));
var fileName = $"resume{extension}";
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
await act.Should().ThrowAsync<NotSupportedException>()
.WithMessage($"*'{extension}'*not supported*");
}
[Fact]
public async Task ParseAsync_WithNoFileExtension_ThrowsNotSupportedException()
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test content"));
const string fileName = "resume";
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
await act.Should().ThrowAsync<NotSupportedException>()
.WithMessage("*''*not supported*");
}
[Theory]
[InlineData("my resume.pdf")]
[InlineData("John Doe CV (2024).pdf")]
[InlineData("resume-final-v2.docx")]
[InlineData("CV_John_Doe.DOCX")]
public async Task ParseAsync_WithSpecialCharactersInFileName_HandlesExtensionCorrectly(string fileName)
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content"));
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
// Should attempt to parse (not throw NotSupportedException)
// Will fail on content parsing, but extension should be recognized
Exception? thrownException = null;
try
{
await act();
}
catch (Exception ex)
{
thrownException = ex;
}
thrownException.Should().NotBeNull();
thrownException.Should().NotBeOfType<NotSupportedException>("because file extension should be recognized");
}
[Fact]
public async Task ParseAsync_WithMultipleDotsInFileName_UsesLastExtension()
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not valid content"));
const string fileName = "my.resume.final.pdf";
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
// Should recognize .pdf as the extension (not .resume or .final)
Exception? thrownException = null;
try
{
await act();
}
catch (Exception ex)
{
thrownException = ex;
}
thrownException.Should().NotBeNull();
thrownException.Should().NotBeOfType<NotSupportedException>("because .pdf extension should be recognized");
}
[Theory]
[InlineData("resume.pdf")]
[InlineData("resume.PDF")]
[InlineData("resume.Pdf")]
[InlineData("resume.pDf")]
public async Task ParseAsync_WithPdfExtension_CaseInsensitive_AttemptsPdfParsing(string fileName)
{
// Arrange
// Create minimal valid-looking stream (empty PDF will fail at parsing, but this tests extension detection)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid pdf"));
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
// Should attempt PDF parsing and fail (not throw NotSupportedException)
// PdfPig will throw when parsing invalid PDF content
Exception? thrownException = null;
try
{
await act();
}
catch (Exception ex)
{
thrownException = ex;
}
thrownException.Should().NotBeNull("because the method should throw for invalid PDF content");
thrownException.Should().NotBeOfType<NotSupportedException>("because PDF extension should be recognized");
}
[Theory]
[InlineData("resume.docx")]
[InlineData("resume.DOCX")]
[InlineData("resume.Docx")]
[InlineData("resume.dOcX")]
public async Task ParseAsync_WithDocxExtension_CaseInsensitive_AttemptsDocxParsing(string fileName)
{
// Arrange
// Create minimal stream (invalid DOCX will fail at parsing, but this tests extension detection)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("not a valid docx"));
// Act
var act = () => _sut.ParseAsync(stream, fileName);
// Assert
// Should attempt DOCX parsing and fail (not throw NotSupportedException)
// OpenXml will throw when parsing invalid DOCX content
Exception? thrownException = null;
try
{
await act();
}
catch (Exception ex)
{
thrownException = ex;
}
thrownException.Should().NotBeNull("because the method should throw for invalid DOCX content");
thrownException.Should().NotBeOfType<NotSupportedException>("because DOCX extension should be recognized");
}
#endregion
#region Input Validation Tests
[Fact]
public async Task ParseAsync_WithNullStream_ThrowsArgumentNullException()
{
// Arrange
Stream? stream = null;
// Act
var act = () => _sut.ParseAsync(stream!, "resume.pdf");
// Assert
await act.Should().ThrowAsync<ArgumentNullException>()
.WithParameterName("fileStream");
}
[Theory]
[InlineData(null)]
[InlineData("")]
[InlineData(" ")]
public async Task ParseAsync_WithNullOrEmptyFileName_ThrowsArgumentException(string? fileName)
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test"));
// Act
var act = () => _sut.ParseAsync(stream, fileName!);
// Assert
await act.Should().ThrowAsync<ArgumentException>()
.WithParameterName("fileName");
}
#endregion
#region Empty File Content Tests
[Fact]
public async Task ParseAsync_WithEmptyPdfContent_ThrowsInvalidOperationException()
{
// Arrange
// A minimal valid PDF structure that contains no text
var emptyPdfBytes = CreateMinimalPdfWithNoText();
using var stream = new MemoryStream(emptyPdfBytes);
// Act
var act = () => _sut.ParseAsync(stream, "empty.pdf");
// Assert
await act.Should().ThrowAsync<InvalidOperationException>()
.WithMessage("*Could not extract text content*");
}
[Fact]
public async Task ParseAsync_WithEmptyDocxContent_ThrowsInvalidOperationException()
{
// Arrange
var emptyDocxBytes = CreateMinimalDocxWithNoText();
using var stream = new MemoryStream(emptyDocxBytes);
// Act
var act = () => _sut.ParseAsync(stream, "empty.docx");
// Assert
await act.Should().ThrowAsync<InvalidOperationException>()
.WithMessage("*Could not extract text content*");
}
#endregion
#region PDF Parsing Path Tests
[Fact]
public async Task ParseAsync_WithValidPdfExtension_TriggersPdfParsingPath()
{
// Arrange
var pdfBytes = CreateMinimalPdfWithText("John Doe\njohn@example.com");
using var stream = new MemoryStream(pdfBytes);
// Act & Assert
// This will try to call Claude API which will fail, but we can verify
// it got past the PDF parsing stage by checking logs or the exception type
var act = () => _sut.ParseAsync(stream, "resume.pdf");
// The exception should NOT be NotSupportedException or InvalidOperationException about text extraction
// It should be a Claude API error (HttpRequestException or similar)
var exception = await act.Should().ThrowAsync<Exception>();
// Verify we got past text extraction (would throw InvalidOperationException with "Could not extract")
exception.Which.Should().NotBeOfType<NotSupportedException>();
exception.Which.Message.Should().NotContain("Could not extract text content");
}
#endregion
#region DOCX Parsing Path Tests
[Fact]
public async Task ParseAsync_WithValidDocxExtension_TriggersDocxParsingPath()
{
// Arrange
var docxBytes = CreateMinimalDocxWithText("Jane Smith\njane@example.com");
using var stream = new MemoryStream(docxBytes);
// Act & Assert
// This will try to call Claude API which will fail, but we can verify
// it got past the DOCX parsing stage
var act = () => _sut.ParseAsync(stream, "resume.docx");
// The exception should NOT be NotSupportedException or InvalidOperationException about text extraction
var exception = await act.Should().ThrowAsync<Exception>();
exception.Which.Should().NotBeOfType<NotSupportedException>();
exception.Which.Message.Should().NotContain("Could not extract text content");
}
#endregion
#region Claude API Error Handling Tests
[Fact]
public async Task ParseAsync_WhenClaudeApiFailsWithInvalidKey_ThrowsHttpRequestException()
{
// Arrange
// Using a test API key which will cause authentication failure
var docxBytes = CreateMinimalDocxWithText("John Doe\njohn@example.com\nSenior Developer at Tech Corp");
using var stream = new MemoryStream(docxBytes);
// Act
var act = () => _sut.ParseAsync(stream, "resume.docx");
// Assert
// The service should propagate the HTTP error from the Claude API
// (401 Unauthorized for invalid API key)
var exception = await act.Should().ThrowAsync<Exception>();
// Should NOT be our domain exceptions (text extraction worked)
exception.Which.Message.Should().NotContain("Could not extract text content");
exception.Which.Should().NotBeOfType<NotSupportedException>();
}
[Fact]
public async Task ParseAsync_WithValidTextContent_AttemptsClaudeApiCall()
{
// Arrange
var pdfBytes = CreateMinimalPdfWithText("Jane Smith\njane@company.com\n+1-555-0123");
using var stream = new MemoryStream(pdfBytes);
// Act
var act = () => _sut.ParseAsync(stream, "resume.pdf");
// Assert
// With an invalid/test API key, we expect the API call to fail
// but this confirms we successfully extracted text and attempted the API call
var exception = await act.Should().ThrowAsync<Exception>();
// Verify it got past text extraction phase
exception.Which.Message.Should().NotContain("Could not extract text content");
// Verify logging occurred for sending to Claude API
_loggerMock.Verify(
x => x.Log(
LogLevel.Debug,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((v, t) => v.ToString()!.Contains("Sending CV text to Claude API")),
It.IsAny<Exception?>(),
It.IsAny<Func<It.IsAnyType, Exception?, string>>()),
Times.Once);
}
#endregion
#region Logging Tests
[Fact]
public async Task ParseAsync_LogsDebugMessageWithFileName()
{
// Arrange
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test"));
const string fileName = "resume.txt"; // unsupported to fail early
// Act
try
{
await _sut.ParseAsync(stream, fileName);
}
catch (NotSupportedException)
{
// Expected
}
// Assert
_loggerMock.Verify(
x => x.Log(
LogLevel.Debug,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((v, t) => v.ToString()!.Contains("Parsing CV file")),
It.IsAny<Exception?>(),
It.IsAny<Func<It.IsAnyType, Exception?, string>>()),
Times.Once);
}
#endregion
#region Helper Methods
/// <summary>
/// Creates a minimal valid PDF with no text content.
/// </summary>
private static byte[] CreateMinimalPdfWithNoText()
{
// Minimal PDF 1.4 structure with empty page
const string pdfContent = @"%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>
endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer
<< /Size 4 /Root 1 0 R >>
startxref
191
%%EOF";
return Encoding.ASCII.GetBytes(pdfContent);
}
/// <summary>
/// Creates a minimal valid PDF with text content.
/// </summary>
private static byte[] CreateMinimalPdfWithText(string text)
{
// Escape special characters for PDF
var escapedText = text.Replace("\\", "\\\\").Replace("(", "\\(").Replace(")", "\\)");
var pdfContent = $@"%PDF-1.4
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
endobj
4 0 obj
<< /Length 100 >>
stream
BT
/F1 12 Tf
100 700 Td
({escapedText}) Tj
ET
endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000238 00000 n
0000000388 00000 n
trailer
<< /Size 6 /Root 1 0 R >>
startxref
463
%%EOF";
return Encoding.ASCII.GetBytes(pdfContent);
}
/// <summary>
/// Creates a minimal valid DOCX with no text content.
/// </summary>
private static byte[] CreateMinimalDocxWithNoText()
{
using var memoryStream = new MemoryStream();
using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true))
{
// [Content_Types].xml
var contentTypesEntry = archive.CreateEntry("[Content_Types].xml");
using (var writer = new StreamWriter(contentTypesEntry.Open()))
{
writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Types xmlns=""http://schemas.openxmlformats.org/package/2006/content-types"">
<Default Extension=""rels"" ContentType=""application/vnd.openxmlformats-package.relationships+xml""/>
<Default Extension=""xml"" ContentType=""application/xml""/>
<Override PartName=""/word/document.xml"" ContentType=""application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml""/>
</Types>");
}
// _rels/.rels
var relsEntry = archive.CreateEntry("_rels/.rels");
using (var writer = new StreamWriter(relsEntry.Open()))
{
writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
<Relationship Id=""rId1"" Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"" Target=""word/document.xml""/>
</Relationships>");
}
// word/document.xml (empty body)
var documentEntry = archive.CreateEntry("word/document.xml");
using (var writer = new StreamWriter(documentEntry.Open()))
{
writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:body>
</w:body>
</w:document>");
}
}
return memoryStream.ToArray();
}
/// <summary>
/// Creates a minimal valid DOCX with text content.
/// </summary>
private static byte[] CreateMinimalDocxWithText(string text)
{
using var memoryStream = new MemoryStream();
using (var archive = new System.IO.Compression.ZipArchive(memoryStream, System.IO.Compression.ZipArchiveMode.Create, true))
{
// [Content_Types].xml
var contentTypesEntry = archive.CreateEntry("[Content_Types].xml");
using (var writer = new StreamWriter(contentTypesEntry.Open()))
{
writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Types xmlns=""http://schemas.openxmlformats.org/package/2006/content-types"">
<Default Extension=""rels"" ContentType=""application/vnd.openxmlformats-package.relationships+xml""/>
<Default Extension=""xml"" ContentType=""application/xml""/>
<Override PartName=""/word/document.xml"" ContentType=""application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml""/>
</Types>");
}
// _rels/.rels
var relsEntry = archive.CreateEntry("_rels/.rels");
using (var writer = new StreamWriter(relsEntry.Open()))
{
writer.Write(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
<Relationship Id=""rId1"" Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"" Target=""word/document.xml""/>
</Relationships>");
}
// word/document.xml with text
var escapedText = System.Security.SecurityElement.Escape(text);
var documentEntry = archive.CreateEntry("word/document.xml");
using (var writer = new StreamWriter(documentEntry.Open()))
{
writer.Write($@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:body>
<w:p>
<w:r>
<w:t>{escapedText}</w:t>
</w:r>
</w:p>
</w:body>
</w:document>");
}
}
return memoryStream.ToArray();
}
#endregion
}
/// <summary>
/// Tests for CVData model mapping that can be tested in isolation.
/// These tests verify the expected structure and constraints of the parsed CV data.
/// </summary>
/// <remarks>
/// Note: Integration tests for full Claude API response parsing require a valid Anthropic API key.
/// These tests should be run separately with a configured test environment.
/// To properly unit test Claude API response handling, consider refactoring CVParserService
/// to accept an IAnthropicClient interface for dependency injection.
/// </remarks>
public sealed class CVDataMappingTests
{
[Fact]
public void CVData_RequiresFullName()
{
// Arrange & Act
var cvData = new CVData
{
FullName = "John Doe"
};
// Assert
cvData.FullName.Should().Be("John Doe");
cvData.Email.Should().BeNull();
cvData.Phone.Should().BeNull();
cvData.Employment.Should().BeEmpty();
cvData.Education.Should().BeEmpty();
cvData.Skills.Should().BeEmpty();
}
[Fact]
public void CVData_WithAllFields_MapsCorrectly()
{
// Arrange & Act
var cvData = new CVData
{
FullName = "Jane Smith",
Email = "jane@example.com",
Phone = "+1-555-123-4567",
Employment =
[
new EmploymentEntry
{
CompanyName = "Tech Corp",
JobTitle = "Senior Developer",
Location = "New York",
StartDate = new DateOnly(2020, 1, 1),
EndDate = null,
IsCurrent = true,
Description = "Leading development team"
}
],
Education =
[
new EducationEntry
{
Institution = "MIT",
Qualification = "BSc",
Subject = "Computer Science",
Grade = "3.9 GPA",
StartDate = new DateOnly(2012, 9, 1),
EndDate = new DateOnly(2016, 5, 15)
}
],
Skills = ["C#", ".NET", "Azure", "SQL"]
};
// Assert
cvData.FullName.Should().Be("Jane Smith");
cvData.Email.Should().Be("jane@example.com");
cvData.Phone.Should().Be("+1-555-123-4567");
cvData.Employment.Should().HaveCount(1);
cvData.Employment[0].CompanyName.Should().Be("Tech Corp");
cvData.Employment[0].IsCurrent.Should().BeTrue();
cvData.Employment[0].EndDate.Should().BeNull();
cvData.Education.Should().HaveCount(1);
cvData.Education[0].Institution.Should().Be("MIT");
cvData.Education[0].Qualification.Should().Be("BSc");
cvData.Skills.Should().HaveCount(4);
cvData.Skills.Should().Contain("C#");
}
[Fact]
public void EmploymentEntry_RequiresCompanyNameAndJobTitle()
{
// Arrange & Act
var entry = new EmploymentEntry
{
CompanyName = "Company",
JobTitle = "Developer"
};
// Assert
entry.CompanyName.Should().Be("Company");
entry.JobTitle.Should().Be("Developer");
entry.Location.Should().BeNull();
entry.StartDate.Should().BeNull();
entry.EndDate.Should().BeNull();
entry.IsCurrent.Should().BeFalse();
entry.Description.Should().BeNull();
}
[Fact]
public void EducationEntry_RequiresInstitution()
{
// Arrange & Act
var entry = new EducationEntry
{
Institution = "University"
};
// Assert
entry.Institution.Should().Be("University");
entry.Qualification.Should().BeNull();
entry.Subject.Should().BeNull();
entry.Grade.Should().BeNull();
entry.StartDate.Should().BeNull();
entry.EndDate.Should().BeNull();
}
}