feat: add EmailClassifier with Claude API integration and response parsing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-07 11:41:45 +01:00
parent 78f5ca864d
commit bd42cc3382
2 changed files with 230 additions and 0 deletions

View File

@@ -0,0 +1,130 @@
// src/SpamGuard/Services/EmailClassifier.cs
namespace SpamGuard.Services;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SpamGuard.Configuration;
using SpamGuard.Models;
public sealed partial class EmailClassifier
{
private readonly SpamGuardOptions _options;
private readonly ILogger<EmailClassifier> _logger;
private readonly HttpClient _httpClient;
private const string SystemPrompt = """
You are an email spam classifier. Analyze the following email and determine if it is spam or legitimate.
Spam includes:
- Unsolicited marketing or promotional emails the recipient never signed up for
- AI-generated emails designed to look like legitimate correspondence
- Newsletter signups the recipient didn't request
Legitimate includes:
- Emails from known contacts or businesses the recipient has a relationship with
- Transactional emails (receipts, shipping notifications, password resets)
- Emails the recipient would expect to receive
Respond with JSON only:
{"classification": "spam" | "legitimate", "confidence": 0.0-1.0, "reason": "brief explanation"}
""";
public EmailClassifier(
IOptions<SpamGuardOptions> options,
ILogger<EmailClassifier> logger,
HttpClient httpClient)
{
_options = options.Value;
_logger = logger;
_httpClient = httpClient;
}
public string BuildPrompt(EmailSummary email)
{
var body = email.BodySnippet.Length > _options.Claude.MaxBodyLength
? email.BodySnippet[.._options.Claude.MaxBodyLength]
: email.BodySnippet;
return $"""
Email details:
From: {email.From}
Subject: {email.Subject}
Body: {body}
""";
}
public async Task<ClassificationResult?> ClassifyAsync(EmailSummary email, CancellationToken ct = default)
{
var userMessage = BuildPrompt(email);
_logger.LogDebug("Classifying email UID={Uid} from {From}", email.Uid, email.From);
var requestBody = new
{
model = _options.Claude.Model,
max_tokens = 256,
system = SystemPrompt,
messages = new[]
{
new { role = "user", content = userMessage }
}
};
var json = JsonSerializer.Serialize(requestBody);
var request = new HttpRequestMessage(HttpMethod.Post, "https://api.anthropic.com/v1/messages")
{
Content = new StringContent(json, Encoding.UTF8, "application/json")
};
request.Headers.Add("x-api-key", _options.Claude.ApiKey);
request.Headers.Add("anthropic-version", "2023-06-01");
var response = await _httpClient.SendAsync(request, ct);
response.EnsureSuccessStatusCode();
var responseJson = await response.Content.ReadAsStringAsync(ct);
var doc = JsonDocument.Parse(responseJson);
var text = doc.RootElement
.GetProperty("content")[0]
.GetProperty("text")
.GetString() ?? "";
var result = ParseResponse(text);
if (result != null)
_logger.LogInformation(
"UID={Uid} classified as {Classification} (confidence={Confidence}): {Reason}",
email.Uid, result.Classification, result.Confidence, result.Reason);
else
_logger.LogWarning("UID={Uid} classification failed to parse: {Text}", email.Uid, text);
return result;
}
public static ClassificationResult? ParseResponse(string text)
{
// Strip markdown code fencing if present
var cleaned = StripMarkdownFencing().Replace(text, "$1").Trim();
try
{
var doc = JsonDocument.Parse(cleaned);
var root = doc.RootElement;
return new ClassificationResult(
Classification: root.GetProperty("classification").GetString() ?? "unknown",
Confidence: root.GetProperty("confidence").GetDouble(),
Reason: root.GetProperty("reason").GetString() ?? ""
);
}
catch (Exception)
{
return null;
}
}
[GeneratedRegex(@"```(?:json)?\s*([\s\S]*?)\s*```", RegexOptions.Compiled)]
private static partial Regex StripMarkdownFencing();
}

View File

@@ -0,0 +1,100 @@
// tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
namespace SpamGuard.Tests.Services;
using System.Net;
using System.Text.Json;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using SpamGuard.Configuration;
using SpamGuard.Models;
using SpamGuard.Services;
public class EmailClassifierTests
{
private static SpamGuardOptions DefaultOptions => new()
{
Claude = new ClaudeOptions
{
ApiKey = "test-key",
Model = "claude-sonnet-4-6",
MaxBodyLength = 2000
}
};
private static EmailSummary SampleEmail => new(
Uid: 1,
From: "spammer@sketchy.com",
Subject: "Buy now! Limited offer!",
BodySnippet: "Click here to claim your prize...",
Date: DateTimeOffset.UtcNow
);
[Fact]
public void BuildPrompt_ContainsSenderAndSubjectAndBody()
{
var classifier = new EmailClassifier(
Options.Create(DefaultOptions),
new NullLogger<EmailClassifier>(),
new HttpClient()
);
var prompt = classifier.BuildPrompt(SampleEmail);
Assert.Contains("spammer@sketchy.com", prompt);
Assert.Contains("Buy now! Limited offer!", prompt);
Assert.Contains("Click here to claim your prize...", prompt);
}
[Fact]
public void BuildPrompt_TruncatesLongBody()
{
var longBody = new string('x', 5000);
var email = SampleEmail with { BodySnippet = longBody };
var classifier = new EmailClassifier(
Options.Create(DefaultOptions),
new NullLogger<EmailClassifier>(),
new HttpClient()
);
var prompt = classifier.BuildPrompt(email);
// Body in prompt should be truncated to MaxBodyLength
Assert.DoesNotContain(longBody, prompt);
}
[Fact]
public void ParseResponse_ValidJson_ReturnsResult()
{
var json = """{"classification": "spam", "confidence": 0.95, "reason": "Unsolicited marketing"}""";
var result = EmailClassifier.ParseResponse(json);
Assert.NotNull(result);
Assert.True(result.IsSpam);
Assert.Equal(0.95, result.Confidence);
Assert.Equal("Unsolicited marketing", result.Reason);
}
[Fact]
public void ParseResponse_InvalidJson_ReturnsNull()
{
var result = EmailClassifier.ParseResponse("not json at all");
Assert.Null(result);
}
[Fact]
public void ParseResponse_JsonWithMarkdownFencing_ReturnsResult()
{
var json = """
```json
{"classification": "legitimate", "confidence": 0.85, "reason": "Normal business email"}
```
""";
var result = EmailClassifier.ParseResponse(json);
Assert.NotNull(result);
Assert.False(result.IsSpam);
}
}