feat: add EmailClassifier with Claude API integration and response parsing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 11:41:45 +01:00
parent 78f5ca864d
commit bd42cc3382
2 changed files with 230 additions and 0 deletions
--- a/src/SpamGuard/Services/EmailClassifier.cs
+++ b/src/SpamGuard/Services/EmailClassifier.cs
@@ -0,0 +1,130 @@
+// src/SpamGuard/Services/EmailClassifier.cs
+namespace SpamGuard.Services;
+
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+using SpamGuard.Configuration;
+using SpamGuard.Models;
+
+public sealed partial class EmailClassifier
+{
+    private readonly SpamGuardOptions _options;
+    private readonly ILogger<EmailClassifier> _logger;
+    private readonly HttpClient _httpClient;
+
+    private const string SystemPrompt = """
+        You are an email spam classifier. Analyze the following email and determine if it is spam or legitimate.
+
+        Spam includes:
+        - Unsolicited marketing or promotional emails the recipient never signed up for
+        - AI-generated emails designed to look like legitimate correspondence
+        - Newsletter signups the recipient didn't request
+
+        Legitimate includes:
+        - Emails from known contacts or businesses the recipient has a relationship with
+        - Transactional emails (receipts, shipping notifications, password resets)
+        - Emails the recipient would expect to receive
+
+        Respond with JSON only:
+        {"classification": "spam" | "legitimate", "confidence": 0.0-1.0, "reason": "brief explanation"}
+        """;
+
+    public EmailClassifier(
+        IOptions<SpamGuardOptions> options,
+        ILogger<EmailClassifier> logger,
+        HttpClient httpClient)
+    {
+        _options = options.Value;
+        _logger = logger;
+        _httpClient = httpClient;
+    }
+
+    public string BuildPrompt(EmailSummary email)
+    {
+        var body = email.BodySnippet.Length > _options.Claude.MaxBodyLength
+            ? email.BodySnippet[.._options.Claude.MaxBodyLength]
+            : email.BodySnippet;
+
+        return $"""
+            Email details:
+            From: {email.From}
+            Subject: {email.Subject}
+            Body: {body}
+            """;
+    }
+
+    public async Task<ClassificationResult?> ClassifyAsync(EmailSummary email, CancellationToken ct = default)
+    {
+        var userMessage = BuildPrompt(email);
+
+        _logger.LogDebug("Classifying email UID={Uid} from {From}", email.Uid, email.From);
+
+        var requestBody = new
+        {
+            model = _options.Claude.Model,
+            max_tokens = 256,
+            system = SystemPrompt,
+            messages = new[]
+            {
+                new { role = "user", content = userMessage }
+            }
+        };
+
+        var json = JsonSerializer.Serialize(requestBody);
+        var request = new HttpRequestMessage(HttpMethod.Post, "https://api.anthropic.com/v1/messages")
+        {
+            Content = new StringContent(json, Encoding.UTF8, "application/json")
+        };
+        request.Headers.Add("x-api-key", _options.Claude.ApiKey);
+        request.Headers.Add("anthropic-version", "2023-06-01");
+
+        var response = await _httpClient.SendAsync(request, ct);
+        response.EnsureSuccessStatusCode();
+
+        var responseJson = await response.Content.ReadAsStringAsync(ct);
+        var doc = JsonDocument.Parse(responseJson);
+        var text = doc.RootElement
+            .GetProperty("content")[0]
+            .GetProperty("text")
+            .GetString() ?? "";
+
+        var result = ParseResponse(text);
+
+        if (result != null)
+            _logger.LogInformation(
+                "UID={Uid} classified as {Classification} (confidence={Confidence}): {Reason}",
+                email.Uid, result.Classification, result.Confidence, result.Reason);
+        else
+            _logger.LogWarning("UID={Uid} classification failed to parse: {Text}", email.Uid, text);
+
+        return result;
+    }
+
+    public static ClassificationResult? ParseResponse(string text)
+    {
+        // Strip markdown code fencing if present
+        var cleaned = StripMarkdownFencing().Replace(text, "$1").Trim();
+
+        try
+        {
+            var doc = JsonDocument.Parse(cleaned);
+            var root = doc.RootElement;
+
+            return new ClassificationResult(
+                Classification: root.GetProperty("classification").GetString() ?? "unknown",
+                Confidence: root.GetProperty("confidence").GetDouble(),
+                Reason: root.GetProperty("reason").GetString() ?? ""
+            );
+        }
+        catch (Exception)
+        {
+            return null;
+        }
+    }
+
+    [GeneratedRegex(@"```(?:json)?\s*([\s\S]*?)\s*```", RegexOptions.Compiled)]
+    private static partial Regex StripMarkdownFencing();
+}
--- a/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
+++ b/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
@@ -0,0 +1,100 @@
+// tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
+namespace SpamGuard.Tests.Services;
+
+using System.Net;
+using System.Text.Json;
+using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Extensions.Options;
+using SpamGuard.Configuration;
+using SpamGuard.Models;
+using SpamGuard.Services;
+
+public class EmailClassifierTests
+{
+    private static SpamGuardOptions DefaultOptions => new()
+    {
+        Claude = new ClaudeOptions
+        {
+            ApiKey = "test-key",
+            Model = "claude-sonnet-4-6",
+            MaxBodyLength = 2000
+        }
+    };
+
+    private static EmailSummary SampleEmail => new(
+        Uid: 1,
+        From: "spammer@sketchy.com",
+        Subject: "Buy now! Limited offer!",
+        BodySnippet: "Click here to claim your prize...",
+        Date: DateTimeOffset.UtcNow
+    );
+
+    [Fact]
+    public void BuildPrompt_ContainsSenderAndSubjectAndBody()
+    {
+        var classifier = new EmailClassifier(
+            Options.Create(DefaultOptions),
+            new NullLogger<EmailClassifier>(),
+            new HttpClient()
+        );
+
+        var prompt = classifier.BuildPrompt(SampleEmail);
+
+        Assert.Contains("spammer@sketchy.com", prompt);
+        Assert.Contains("Buy now! Limited offer!", prompt);
+        Assert.Contains("Click here to claim your prize...", prompt);
+    }
+
+    [Fact]
+    public void BuildPrompt_TruncatesLongBody()
+    {
+        var longBody = new string('x', 5000);
+        var email = SampleEmail with { BodySnippet = longBody };
+
+        var classifier = new EmailClassifier(
+            Options.Create(DefaultOptions),
+            new NullLogger<EmailClassifier>(),
+            new HttpClient()
+        );
+
+        var prompt = classifier.BuildPrompt(email);
+
+        // Body in prompt should be truncated to MaxBodyLength
+        Assert.DoesNotContain(longBody, prompt);
+    }
+
+    [Fact]
+    public void ParseResponse_ValidJson_ReturnsResult()
+    {
+        var json = """{"classification": "spam", "confidence": 0.95, "reason": "Unsolicited marketing"}""";
+
+        var result = EmailClassifier.ParseResponse(json);
+
+        Assert.NotNull(result);
+        Assert.True(result.IsSpam);
+        Assert.Equal(0.95, result.Confidence);
+        Assert.Equal("Unsolicited marketing", result.Reason);
+    }
+
+    [Fact]
+    public void ParseResponse_InvalidJson_ReturnsNull()
+    {
+        var result = EmailClassifier.ParseResponse("not json at all");
+        Assert.Null(result);
+    }
+
+    [Fact]
+    public void ParseResponse_JsonWithMarkdownFencing_ReturnsResult()
+    {
+        var json = """
+            ```json
+            {"classification": "legitimate", "confidence": 0.85, "reason": "Normal business email"}
+            ```
+            """;
+
+        var result = EmailClassifier.ParseResponse(json);
+
+        Assert.NotNull(result);
+        Assert.False(result.IsSpam);
+    }
+}