diff --git a/src/SpamGuard/Services/EmailClassifier.cs b/src/SpamGuard/Services/EmailClassifier.cs new file mode 100644 index 0000000..7e8395c --- /dev/null +++ b/src/SpamGuard/Services/EmailClassifier.cs @@ -0,0 +1,130 @@ +// src/SpamGuard/Services/EmailClassifier.cs +namespace SpamGuard.Services; + +using System.Text; +using System.Text.Json; +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using SpamGuard.Configuration; +using SpamGuard.Models; + +public sealed partial class EmailClassifier +{ + private readonly SpamGuardOptions _options; + private readonly ILogger _logger; + private readonly HttpClient _httpClient; + + private const string SystemPrompt = """ + You are an email spam classifier. Analyze the following email and determine if it is spam or legitimate. + + Spam includes: + - Unsolicited marketing or promotional emails the recipient never signed up for + - AI-generated emails designed to look like legitimate correspondence + - Newsletter signups the recipient didn't request + + Legitimate includes: + - Emails from known contacts or businesses the recipient has a relationship with + - Transactional emails (receipts, shipping notifications, password resets) + - Emails the recipient would expect to receive + + Respond with JSON only: + {"classification": "spam" | "legitimate", "confidence": 0.0-1.0, "reason": "brief explanation"} + """; + + public EmailClassifier( + IOptions options, + ILogger logger, + HttpClient httpClient) + { + _options = options.Value; + _logger = logger; + _httpClient = httpClient; + } + + public string BuildPrompt(EmailSummary email) + { + var body = email.BodySnippet.Length > _options.Claude.MaxBodyLength + ? email.BodySnippet[.._options.Claude.MaxBodyLength] + : email.BodySnippet; + + return $""" + Email details: + From: {email.From} + Subject: {email.Subject} + Body: {body} + """; + } + + public async Task ClassifyAsync(EmailSummary email, CancellationToken ct = default) + { + var userMessage = BuildPrompt(email); + + _logger.LogDebug("Classifying email UID={Uid} from {From}", email.Uid, email.From); + + var requestBody = new + { + model = _options.Claude.Model, + max_tokens = 256, + system = SystemPrompt, + messages = new[] + { + new { role = "user", content = userMessage } + } + }; + + var json = JsonSerializer.Serialize(requestBody); + var request = new HttpRequestMessage(HttpMethod.Post, "https://api.anthropic.com/v1/messages") + { + Content = new StringContent(json, Encoding.UTF8, "application/json") + }; + request.Headers.Add("x-api-key", _options.Claude.ApiKey); + request.Headers.Add("anthropic-version", "2023-06-01"); + + var response = await _httpClient.SendAsync(request, ct); + response.EnsureSuccessStatusCode(); + + var responseJson = await response.Content.ReadAsStringAsync(ct); + var doc = JsonDocument.Parse(responseJson); + var text = doc.RootElement + .GetProperty("content")[0] + .GetProperty("text") + .GetString() ?? ""; + + var result = ParseResponse(text); + + if (result != null) + _logger.LogInformation( + "UID={Uid} classified as {Classification} (confidence={Confidence}): {Reason}", + email.Uid, result.Classification, result.Confidence, result.Reason); + else + _logger.LogWarning("UID={Uid} classification failed to parse: {Text}", email.Uid, text); + + return result; + } + + public static ClassificationResult? ParseResponse(string text) + { + // Strip markdown code fencing if present + var cleaned = StripMarkdownFencing().Replace(text, "$1").Trim(); + + try + { + var doc = JsonDocument.Parse(cleaned); + var root = doc.RootElement; + + return new ClassificationResult( + Classification: root.GetProperty("classification").GetString() ?? "unknown", + Confidence: root.GetProperty("confidence").GetDouble(), + Reason: root.GetProperty("reason").GetString() ?? "" + ); + } + catch (Exception) + { + return null; + } + } + + [GeneratedRegex(@"```(?:json)?\s*([\s\S]*?)\s*```", RegexOptions.Compiled)] + private static partial Regex StripMarkdownFencing(); +} diff --git a/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs b/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs new file mode 100644 index 0000000..0268b18 --- /dev/null +++ b/tests/SpamGuard.Tests/Services/EmailClassifierTests.cs @@ -0,0 +1,100 @@ +// tests/SpamGuard.Tests/Services/EmailClassifierTests.cs +namespace SpamGuard.Tests.Services; + +using System.Net; +using System.Text.Json; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using SpamGuard.Configuration; +using SpamGuard.Models; +using SpamGuard.Services; + +public class EmailClassifierTests +{ + private static SpamGuardOptions DefaultOptions => new() + { + Claude = new ClaudeOptions + { + ApiKey = "test-key", + Model = "claude-sonnet-4-6", + MaxBodyLength = 2000 + } + }; + + private static EmailSummary SampleEmail => new( + Uid: 1, + From: "spammer@sketchy.com", + Subject: "Buy now! Limited offer!", + BodySnippet: "Click here to claim your prize...", + Date: DateTimeOffset.UtcNow + ); + + [Fact] + public void BuildPrompt_ContainsSenderAndSubjectAndBody() + { + var classifier = new EmailClassifier( + Options.Create(DefaultOptions), + new NullLogger(), + new HttpClient() + ); + + var prompt = classifier.BuildPrompt(SampleEmail); + + Assert.Contains("spammer@sketchy.com", prompt); + Assert.Contains("Buy now! Limited offer!", prompt); + Assert.Contains("Click here to claim your prize...", prompt); + } + + [Fact] + public void BuildPrompt_TruncatesLongBody() + { + var longBody = new string('x', 5000); + var email = SampleEmail with { BodySnippet = longBody }; + + var classifier = new EmailClassifier( + Options.Create(DefaultOptions), + new NullLogger(), + new HttpClient() + ); + + var prompt = classifier.BuildPrompt(email); + + // Body in prompt should be truncated to MaxBodyLength + Assert.DoesNotContain(longBody, prompt); + } + + [Fact] + public void ParseResponse_ValidJson_ReturnsResult() + { + var json = """{"classification": "spam", "confidence": 0.95, "reason": "Unsolicited marketing"}"""; + + var result = EmailClassifier.ParseResponse(json); + + Assert.NotNull(result); + Assert.True(result.IsSpam); + Assert.Equal(0.95, result.Confidence); + Assert.Equal("Unsolicited marketing", result.Reason); + } + + [Fact] + public void ParseResponse_InvalidJson_ReturnsNull() + { + var result = EmailClassifier.ParseResponse("not json at all"); + Assert.Null(result); + } + + [Fact] + public void ParseResponse_JsonWithMarkdownFencing_ReturnsResult() + { + var json = """ + ```json + {"classification": "legitimate", "confidence": 0.85, "reason": "Normal business email"} + ``` + """; + + var result = EmailClassifier.ParseResponse(json); + + Assert.NotNull(result); + Assert.False(result.IsSpam); + } +}