feat: add EmailClassifier with Claude API integration and response parsing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
130
src/SpamGuard/Services/EmailClassifier.cs
Normal file
130
src/SpamGuard/Services/EmailClassifier.cs
Normal file
@@ -0,0 +1,130 @@
|
||||
// src/SpamGuard/Services/EmailClassifier.cs
|
||||
namespace SpamGuard.Services;
|
||||
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SpamGuard.Configuration;
|
||||
using SpamGuard.Models;
|
||||
|
||||
public sealed partial class EmailClassifier
|
||||
{
|
||||
private readonly SpamGuardOptions _options;
|
||||
private readonly ILogger<EmailClassifier> _logger;
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
private const string SystemPrompt = """
|
||||
You are an email spam classifier. Analyze the following email and determine if it is spam or legitimate.
|
||||
|
||||
Spam includes:
|
||||
- Unsolicited marketing or promotional emails the recipient never signed up for
|
||||
- AI-generated emails designed to look like legitimate correspondence
|
||||
- Newsletter signups the recipient didn't request
|
||||
|
||||
Legitimate includes:
|
||||
- Emails from known contacts or businesses the recipient has a relationship with
|
||||
- Transactional emails (receipts, shipping notifications, password resets)
|
||||
- Emails the recipient would expect to receive
|
||||
|
||||
Respond with JSON only:
|
||||
{"classification": "spam" | "legitimate", "confidence": 0.0-1.0, "reason": "brief explanation"}
|
||||
""";
|
||||
|
||||
public EmailClassifier(
|
||||
IOptions<SpamGuardOptions> options,
|
||||
ILogger<EmailClassifier> logger,
|
||||
HttpClient httpClient)
|
||||
{
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
_httpClient = httpClient;
|
||||
}
|
||||
|
||||
public string BuildPrompt(EmailSummary email)
|
||||
{
|
||||
var body = email.BodySnippet.Length > _options.Claude.MaxBodyLength
|
||||
? email.BodySnippet[.._options.Claude.MaxBodyLength]
|
||||
: email.BodySnippet;
|
||||
|
||||
return $"""
|
||||
Email details:
|
||||
From: {email.From}
|
||||
Subject: {email.Subject}
|
||||
Body: {body}
|
||||
""";
|
||||
}
|
||||
|
||||
public async Task<ClassificationResult?> ClassifyAsync(EmailSummary email, CancellationToken ct = default)
|
||||
{
|
||||
var userMessage = BuildPrompt(email);
|
||||
|
||||
_logger.LogDebug("Classifying email UID={Uid} from {From}", email.Uid, email.From);
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = _options.Claude.Model,
|
||||
max_tokens = 256,
|
||||
system = SystemPrompt,
|
||||
messages = new[]
|
||||
{
|
||||
new { role = "user", content = userMessage }
|
||||
}
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var request = new HttpRequestMessage(HttpMethod.Post, "https://api.anthropic.com/v1/messages")
|
||||
{
|
||||
Content = new StringContent(json, Encoding.UTF8, "application/json")
|
||||
};
|
||||
request.Headers.Add("x-api-key", _options.Claude.ApiKey);
|
||||
request.Headers.Add("anthropic-version", "2023-06-01");
|
||||
|
||||
var response = await _httpClient.SendAsync(request, ct);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var doc = JsonDocument.Parse(responseJson);
|
||||
var text = doc.RootElement
|
||||
.GetProperty("content")[0]
|
||||
.GetProperty("text")
|
||||
.GetString() ?? "";
|
||||
|
||||
var result = ParseResponse(text);
|
||||
|
||||
if (result != null)
|
||||
_logger.LogInformation(
|
||||
"UID={Uid} classified as {Classification} (confidence={Confidence}): {Reason}",
|
||||
email.Uid, result.Classification, result.Confidence, result.Reason);
|
||||
else
|
||||
_logger.LogWarning("UID={Uid} classification failed to parse: {Text}", email.Uid, text);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static ClassificationResult? ParseResponse(string text)
|
||||
{
|
||||
// Strip markdown code fencing if present
|
||||
var cleaned = StripMarkdownFencing().Replace(text, "$1").Trim();
|
||||
|
||||
try
|
||||
{
|
||||
var doc = JsonDocument.Parse(cleaned);
|
||||
var root = doc.RootElement;
|
||||
|
||||
return new ClassificationResult(
|
||||
Classification: root.GetProperty("classification").GetString() ?? "unknown",
|
||||
Confidence: root.GetProperty("confidence").GetDouble(),
|
||||
Reason: root.GetProperty("reason").GetString() ?? ""
|
||||
);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"```(?:json)?\s*([\s\S]*?)\s*```", RegexOptions.Compiled)]
|
||||
private static partial Regex StripMarkdownFencing();
|
||||
}
|
||||
100
tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
Normal file
100
tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
Normal file
@@ -0,0 +1,100 @@
|
||||
// tests/SpamGuard.Tests/Services/EmailClassifierTests.cs
|
||||
namespace SpamGuard.Tests.Services;
|
||||
|
||||
using System.Net;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SpamGuard.Configuration;
|
||||
using SpamGuard.Models;
|
||||
using SpamGuard.Services;
|
||||
|
||||
public class EmailClassifierTests
|
||||
{
|
||||
private static SpamGuardOptions DefaultOptions => new()
|
||||
{
|
||||
Claude = new ClaudeOptions
|
||||
{
|
||||
ApiKey = "test-key",
|
||||
Model = "claude-sonnet-4-6",
|
||||
MaxBodyLength = 2000
|
||||
}
|
||||
};
|
||||
|
||||
private static EmailSummary SampleEmail => new(
|
||||
Uid: 1,
|
||||
From: "spammer@sketchy.com",
|
||||
Subject: "Buy now! Limited offer!",
|
||||
BodySnippet: "Click here to claim your prize...",
|
||||
Date: DateTimeOffset.UtcNow
|
||||
);
|
||||
|
||||
[Fact]
|
||||
public void BuildPrompt_ContainsSenderAndSubjectAndBody()
|
||||
{
|
||||
var classifier = new EmailClassifier(
|
||||
Options.Create(DefaultOptions),
|
||||
new NullLogger<EmailClassifier>(),
|
||||
new HttpClient()
|
||||
);
|
||||
|
||||
var prompt = classifier.BuildPrompt(SampleEmail);
|
||||
|
||||
Assert.Contains("spammer@sketchy.com", prompt);
|
||||
Assert.Contains("Buy now! Limited offer!", prompt);
|
||||
Assert.Contains("Click here to claim your prize...", prompt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildPrompt_TruncatesLongBody()
|
||||
{
|
||||
var longBody = new string('x', 5000);
|
||||
var email = SampleEmail with { BodySnippet = longBody };
|
||||
|
||||
var classifier = new EmailClassifier(
|
||||
Options.Create(DefaultOptions),
|
||||
new NullLogger<EmailClassifier>(),
|
||||
new HttpClient()
|
||||
);
|
||||
|
||||
var prompt = classifier.BuildPrompt(email);
|
||||
|
||||
// Body in prompt should be truncated to MaxBodyLength
|
||||
Assert.DoesNotContain(longBody, prompt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseResponse_ValidJson_ReturnsResult()
|
||||
{
|
||||
var json = """{"classification": "spam", "confidence": 0.95, "reason": "Unsolicited marketing"}""";
|
||||
|
||||
var result = EmailClassifier.ParseResponse(json);
|
||||
|
||||
Assert.NotNull(result);
|
||||
Assert.True(result.IsSpam);
|
||||
Assert.Equal(0.95, result.Confidence);
|
||||
Assert.Equal("Unsolicited marketing", result.Reason);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseResponse_InvalidJson_ReturnsNull()
|
||||
{
|
||||
var result = EmailClassifier.ParseResponse("not json at all");
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseResponse_JsonWithMarkdownFencing_ReturnsResult()
|
||||
{
|
||||
var json = """
|
||||
```json
|
||||
{"classification": "legitimate", "confidence": 0.85, "reason": "Normal business email"}
|
||||
```
|
||||
""";
|
||||
|
||||
var result = EmailClassifier.ParseResponse(json);
|
||||
|
||||
Assert.NotNull(result);
|
||||
Assert.False(result.IsSpam);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user