feat: add EmailClassifier with Claude API integration and response parsing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 11:41:45 +01:00
parent 78f5ca864d
commit bd42cc3382
2 changed files with 230 additions and 0 deletions
--- a/src/SpamGuard/Services/EmailClassifier.cs
+++ b/src/SpamGuard/Services/EmailClassifier.cs
@@ -0,0 +1,130 @@
 // src/SpamGuard/Services/EmailClassifier.cs
 namespace SpamGuard.Services;
 using System.Text;
 using System.Text.Json;
 using System.Text.RegularExpressions;
 using Microsoft.Extensions.Logging;
 using Microsoft.Extensions.Options;
 using SpamGuard.Configuration;
 using SpamGuard.Models;
 public sealed partial class EmailClassifier
 {
    private readonly SpamGuardOptions _options;
    private readonly ILogger<EmailClassifier> _logger;
    private readonly HttpClient _httpClient;
    private const string SystemPrompt = """
        You are an email spam classifier. Analyze the following email and determine if it is spam or legitimate.
        Spam includes:
        - Unsolicited marketing or promotional emails the recipient never signed up for
        - AI-generated emails designed to look like legitimate correspondence
        - Newsletter signups the recipient didn't request
        Legitimate includes:
        - Emails from known contacts or businesses the recipient has a relationship with
        - Transactional emails (receipts, shipping notifications, password resets)
        - Emails the recipient would expect to receive
        Respond with JSON only:
        {"classification": "spam" | "legitimate", "confidence": 0.0-1.0, "reason": "brief explanation"}
        """;
    public EmailClassifier(
        IOptions<SpamGuardOptions> options,
        ILogger<EmailClassifier> logger,
        HttpClient httpClient)
    {
        _options = options.Value;
        _logger = logger;
        _httpClient = httpClient;
    }
    public string BuildPrompt(EmailSummary email)
    {
        var body = email.BodySnippet.Length > _options.Claude.MaxBodyLength
            ? email.BodySnippet[.._options.Claude.MaxBodyLength]
            : email.BodySnippet;
        return $"""
            Email details:
            From: {email.From}
            Subject: {email.Subject}
            Body: {body}
            """;
    }
    public async Task<ClassificationResult?> ClassifyAsync(EmailSummary email, CancellationToken ct = default)
    {
        var userMessage = BuildPrompt(email);
        _logger.LogDebug("Classifying email UID={Uid} from {From}", email.Uid, email.From);
        var requestBody = new
        {
            model = _options.Claude.Model,
            max_tokens = 256,
            system = SystemPrompt,
            messages = new[]
            {
                new { role = "user", content = userMessage }
            }
        };
        var json = JsonSerializer.Serialize(requestBody);
        var request = new HttpRequestMessage(HttpMethod.Post, "https://api.anthropic.com/v1/messages")
        {
            Content = new StringContent(json, Encoding.UTF8, "application/json")
        };
        request.Headers.Add("x-api-key", _options.Claude.ApiKey);
        request.Headers.Add("anthropic-version", "2023-06-01");
        var response = await _httpClient.SendAsync(request, ct);
        response.EnsureSuccessStatusCode();
        var responseJson = await response.Content.ReadAsStringAsync(ct);
        var doc = JsonDocument.Parse(responseJson);
        var text = doc.RootElement
            .GetProperty("content")[0]
            .GetProperty("text")
            .GetString() ?? "";
        var result = ParseResponse(text);
        if (result != null)
            _logger.LogInformation(
                "UID={Uid} classified as {Classification} (confidence={Confidence}): {Reason}",
                email.Uid, result.Classification, result.Confidence, result.Reason);
        else
            _logger.LogWarning("UID={Uid} classification failed to parse: {Text}", email.Uid, text);
        return result;
    }
    public static ClassificationResult? ParseResponse(string text)
    {
        // Strip markdown code fencing if present
        var cleaned = StripMarkdownFencing().Replace(text, "$1").Trim();
        try
        {
            var doc = JsonDocument.Parse(cleaned);
            var root = doc.RootElement;
            return new ClassificationResult(
                Classification: root.GetProperty("classification").GetString() ?? "unknown",
                Confidence: root.GetProperty("confidence").GetDouble(),
                Reason: root.GetProperty("reason").GetString() ?? ""
            );
        }
        catch (Exception)
        {
            return null;
        }
    }
    [GeneratedRegex(@"```(?:json)?\s*([\s\S]*?)\s*```", RegexOptions.Compiled)]
    private static partial Regex StripMarkdownFencing();
 }
--- a/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
+++ b/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
@@ -0,0 +1,100 @@
 // tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
 namespace SpamGuard.Tests.Services;
 using System.Net;
 using System.Text.Json;
 using Microsoft.Extensions.Logging.Abstractions;
 using Microsoft.Extensions.Options;
 using SpamGuard.Configuration;
 using SpamGuard.Models;
 using SpamGuard.Services;
 public class EmailClassifierTests
 {
    private static SpamGuardOptions DefaultOptions => new()
    {
        Claude = new ClaudeOptions
        {
            ApiKey = "test-key",
            Model = "claude-sonnet-4-6",
            MaxBodyLength = 2000
        }
    };
    private static EmailSummary SampleEmail => new(
        Uid: 1,
        From: "spammer@sketchy.com",
        Subject: "Buy now! Limited offer!",
        BodySnippet: "Click here to claim your prize...",
        Date: DateTimeOffset.UtcNow
    );
    [Fact]
    public void BuildPrompt_ContainsSenderAndSubjectAndBody()
    {
        var classifier = new EmailClassifier(
            Options.Create(DefaultOptions),
            new NullLogger<EmailClassifier>(),
            new HttpClient()
        );
        var prompt = classifier.BuildPrompt(SampleEmail);
        Assert.Contains("spammer@sketchy.com", prompt);
        Assert.Contains("Buy now! Limited offer!", prompt);
        Assert.Contains("Click here to claim your prize...", prompt);
    }
    [Fact]
    public void BuildPrompt_TruncatesLongBody()
    {
        var longBody = new string('x', 5000);
        var email = SampleEmail with { BodySnippet = longBody };
        var classifier = new EmailClassifier(
            Options.Create(DefaultOptions),
            new NullLogger<EmailClassifier>(),
            new HttpClient()
        );
        var prompt = classifier.BuildPrompt(email);
        // Body in prompt should be truncated to MaxBodyLength
        Assert.DoesNotContain(longBody, prompt);
    }
    [Fact]
    public void ParseResponse_ValidJson_ReturnsResult()
    {
        var json = """{"classification": "spam", "confidence": 0.95, "reason": "Unsolicited marketing"}""";
        var result = EmailClassifier.ParseResponse(json);
        Assert.NotNull(result);
        Assert.True(result.IsSpam);
        Assert.Equal(0.95, result.Confidence);
        Assert.Equal("Unsolicited marketing", result.Reason);
    }
    [Fact]
    public void ParseResponse_InvalidJson_ReturnsNull()
    {
        var result = EmailClassifier.ParseResponse("not json at all");
        Assert.Null(result);
    }
    [Fact]
    public void ParseResponse_JsonWithMarkdownFencing_ReturnsResult()
    {
        var json = """
            ```json
            {"classification": "legitimate", "confidence": 0.85, "reason": "Normal business email"}
            ```
            """;
        var result = EmailClassifier.ParseResponse(json);
        Assert.NotNull(result);
        Assert.False(result.IsSpam);
    }
 }