diff --git a/.env.example b/.env.example index 5292b78..556cac8 100644 --- a/.env.example +++ b/.env.example @@ -8,3 +8,13 @@ AWS_ACCESS_KEY_ID=your_aws_key AWS_SECRET_ACCESS_KEY=your_aws_secret AWS_REGION=us-east-1 SES_FROM_EMAIL=noreply@agents.dev + +# AWS S3 (Attachment Storage) +S3_BUCKET=your-attachments-bucket +S3_PREFIX=attachments + +# Email Verification +SPAM_SCORE_THRESHOLD=5.0 +MAILGUN_SIGNING_KEY=your_mailgun_signing_key +REQUIRE_DKIM=true +REQUIRE_SPF=true diff --git a/app/__pycache__/__init__.cpython-313.pyc b/app/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..56f906a Binary files /dev/null and b/app/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..e597078 Binary files /dev/null and b/app/__pycache__/main.cpython-313.pyc differ diff --git a/app/core/__pycache__/__init__.cpython-313.pyc b/app/core/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..17625be Binary files /dev/null and b/app/core/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/core/__pycache__/config.cpython-313.pyc b/app/core/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000..dbe4855 Binary files /dev/null and b/app/core/__pycache__/config.cpython-313.pyc differ diff --git a/app/core/config.py b/app/core/config.py index df3160a..006da41 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -5,17 +5,27 @@ class Settings(BaseSettings): # Database database_url: str = "postgresql://postgres:postgres@localhost:5432/agentsuite" - + # AWS SES aws_access_key_id: str = "" aws_secret_access_key: str = "" aws_region: str = "us-east-1" ses_from_email: str = "" - + + # AWS S3 (attachments) + s3_bucket: str = "" + s3_prefix: str = "attachments" + + # Email Verification + spam_score_threshold: float = 5.0 + mailgun_signing_key: str = "" + require_dkim: bool = True + require_spf: bool = True + # App app_name: str = "Agent Suite" debug: bool = False - + class Config: env_file = ".env" diff --git a/app/db/__pycache__/__init__.cpython-313.pyc b/app/db/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..e198b85 Binary files /dev/null and b/app/db/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/db/__pycache__/database.cpython-313.pyc b/app/db/__pycache__/database.cpython-313.pyc new file mode 100644 index 0000000..13ed6ac Binary files /dev/null and b/app/db/__pycache__/database.cpython-313.pyc differ diff --git a/app/main.py b/app/main.py index 5bd191b..4fce8f4 100644 --- a/app/main.py +++ b/app/main.py @@ -1,14 +1,21 @@ -from fastapi import FastAPI, Depends, HTTPException, status -from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials -from sqlalchemy.orm import Session -from typing import List +import logging +from typing import List, Optional + import boto3 from botocore.exceptions import ClientError +from fastapi import FastAPI, Depends, HTTPException, Request, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from sqlalchemy.orm import Session +from starlette.datastructures import UploadFile from app.core.config import get_settings from app.db.database import get_db, engine, Base from app.models import models from app.schemas import schemas +from app.services.email_verification import verify_email, verify_mailgun_webhook_signature +from app.services.attachment_service import parse_and_store_attachments, get_s3_client + +logger = logging.getLogger(__name__) # Create tables Base.metadata.create_all(bind=engine) @@ -142,27 +149,77 @@ def list_messages( @app.post("/v1/webhooks/mailgun") -def mailgun_webhook( - sender: str, - recipient: str, - subject: str = "", - body_plain: str = "", - body_html: str = "", - message_id: str = "", - db: Session = Depends(get_db) +async def mailgun_webhook( + request: Request, + db: Session = Depends(get_db), ): - """Receive incoming email from Mailgun.""" + """Receive incoming email from Mailgun with verification and attachment handling. + + Performs the following checks on each incoming email: + 1. Verifies Mailgun webhook signature (if signing key is configured) + 2. Validates SPF/DKIM authentication results + 3. Filters spam based on configurable score threshold (default > 5) + 4. Parses and stores attachments in S3 + + Emails that fail spam filtering are rejected with a 400 response. + """ + # Parse form data (Mailgun sends multipart/form-data) + form = await request.form() + + sender = form.get("sender", "") + recipient = form.get("recipient", "") + subject = form.get("subject", "") + body_plain = form.get("body-plain", "") or form.get("body_plain", "") + body_html = form.get("body-html", "") or form.get("body_html", "") + message_id = form.get("Message-Id", "") or form.get("message_id", "") + + # Mailgun webhook signature verification + if settings.mailgun_signing_key: + timestamp = form.get("timestamp", "") + token = form.get("token", "") + signature = form.get("signature", "") + if not verify_mailgun_webhook_signature( + settings.mailgun_signing_key, timestamp, token, signature + ): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Invalid webhook signature", + ) + + # Email verification (SPF/DKIM/Spam) + spf_header = form.get("X-Mailgun-Spf", "") + dkim_header = form.get("X-Mailgun-Dkim-Check-Result", "") + spam_score_header = form.get("X-Mailgun-SSscore", "") or form.get("spam-score", "") + + verification = verify_email( + spf_header=spf_header, + dkim_header=dkim_header, + spam_score_header=spam_score_header, + spam_threshold=settings.spam_score_threshold, + ) + + # Reject spam + if verification.is_spam: + logger.warning( + "Rejected spam email from %s (score: %s)", + sender, + verification.spam_score, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Email rejected: {verification.rejection_reason}", + ) + # Find inbox by recipient email inbox = db.query(models.Inbox).filter( models.Inbox.email_address == recipient, models.Inbox.is_active == True ).first() - + if not inbox: - # Silently drop - inbox doesn't exist or inactive return {"status": "dropped"} - - # Store message + + # Store message with verification data message = models.Message( inbox_id=inbox.id, sender=sender, @@ -170,11 +227,49 @@ def mailgun_webhook( subject=subject, body_text=body_plain, body_html=body_html, - message_id=message_id + message_id=message_id, + spf_pass=verification.spf_pass, + dkim_pass=verification.dkim_pass, + spam_score=verification.spam_score, + is_verified=verification.is_verified, ) db.add(message) + db.flush() # Get the message ID without committing + + # Handle attachments + attachment_count = int(form.get("attachment-count", "0") or "0") + if attachment_count > 0: + files = [] + for i in range(1, attachment_count + 1): + file = form.get(f"attachment-{i}") + if file and isinstance(file, UploadFile): + files.append(file) + + if files: + s3_client = None + if settings.s3_bucket and settings.aws_access_key_id: + s3_client = get_s3_client( + settings.aws_access_key_id, + settings.aws_secret_access_key, + settings.aws_region, + ) + + await parse_and_store_attachments( + files=files, + message_id=message.id, + db=db, + s3_client=s3_client, + bucket=settings.s3_bucket, + s3_prefix=settings.s3_prefix, + ) + db.commit() - + # TODO: Trigger user webhook if configured - - return {"status": "received", "message_id": str(message.id)} + + return { + "status": "received", + "message_id": str(message.id), + "verified": verification.is_verified, + "spam_score": verification.spam_score, + } diff --git a/app/models/__pycache__/__init__.cpython-313.pyc b/app/models/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..e442b6f Binary files /dev/null and b/app/models/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/models/__pycache__/models.cpython-313.pyc b/app/models/__pycache__/models.cpython-313.pyc new file mode 100644 index 0000000..51b9e47 Binary files /dev/null and b/app/models/__pycache__/models.cpython-313.pyc differ diff --git a/app/models/models.py b/app/models/models.py index 7a21b6f..72521d9 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -1,7 +1,8 @@ import uuid from datetime import datetime -from sqlalchemy import Column, String, DateTime, Text, ForeignKey, Boolean +from sqlalchemy import Column, String, DateTime, Text, ForeignKey, Boolean, Float, Integer from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship from app.db.database import Base @@ -11,7 +12,7 @@ def generate_api_key(): class Inbox(Base): __tablename__ = "inboxes" - + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) email_address = Column(String(255), unique=True, index=True, nullable=False) api_key = Column(String(255), unique=True, index=True, default=generate_api_key) @@ -21,7 +22,7 @@ class Inbox(Base): class Message(Base): __tablename__ = "messages" - + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) inbox_id = Column(UUID(as_uuid=True), ForeignKey("inboxes.id"), index=True) sender = Column(String(255), nullable=False) @@ -33,3 +34,28 @@ class Message(Base): is_read = Column(Boolean, default=False) message_id = Column(String(255), index=True) # External message ID raw_data = Column(Text) # Store raw email for debugging + + # Email verification fields + spf_pass = Column(Boolean, nullable=True) + dkim_pass = Column(Boolean, nullable=True) + spam_score = Column(Float, nullable=True) + is_verified = Column(Boolean, default=False) + + # Relationship to attachments + attachments = relationship("Attachment", back_populates="message", lazy="joined") + + +class Attachment(Base): + __tablename__ = "attachments" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + message_id = Column(UUID(as_uuid=True), ForeignKey("messages.id"), index=True, nullable=False) + filename = Column(String(500), nullable=False) + content_type = Column(String(255), nullable=False) + size = Column(Integer, nullable=False) + s3_bucket = Column(String(255)) + s3_key = Column(String(1024)) + uploaded_at = Column(DateTime, default=datetime.utcnow) + + # Relationship back to message + message = relationship("Message", back_populates="attachments") diff --git a/app/schemas/__pycache__/__init__.cpython-313.pyc b/app/schemas/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..2cce943 Binary files /dev/null and b/app/schemas/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/schemas/__pycache__/schemas.cpython-313.pyc b/app/schemas/__pycache__/schemas.cpython-313.pyc new file mode 100644 index 0000000..8e01805 Binary files /dev/null and b/app/schemas/__pycache__/schemas.cpython-313.pyc differ diff --git a/app/schemas/schemas.py b/app/schemas/schemas.py index 6bda3c6..5d31f1a 100644 --- a/app/schemas/schemas.py +++ b/app/schemas/schemas.py @@ -14,7 +14,7 @@ class InboxResponse(BaseModel): email_address: str api_key: str created_at: datetime - + class Config: from_attributes = True @@ -23,7 +23,21 @@ class InboxPublic(BaseModel): id: UUID email_address: str created_at: datetime - + + class Config: + from_attributes = True + + +# Attachment schemas +class AttachmentResponse(BaseModel): + id: UUID + filename: str + content_type: str + size: int + s3_bucket: Optional[str] = None + s3_key: Optional[str] = None + uploaded_at: datetime + class Config: from_attributes = True @@ -44,7 +58,12 @@ class MessageResponse(BaseModel): body_text: Optional[str] received_at: datetime is_read: bool - + spf_pass: Optional[bool] = None + dkim_pass: Optional[bool] = None + spam_score: Optional[float] = None + is_verified: Optional[bool] = None + attachments: List[AttachmentResponse] = [] + class Config: from_attributes = True diff --git a/app/services/__pycache__/__init__.cpython-313.pyc b/app/services/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..1381bd6 Binary files /dev/null and b/app/services/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/services/__pycache__/attachment_service.cpython-313.pyc b/app/services/__pycache__/attachment_service.cpython-313.pyc new file mode 100644 index 0000000..da5d080 Binary files /dev/null and b/app/services/__pycache__/attachment_service.cpython-313.pyc differ diff --git a/app/services/__pycache__/email_verification.cpython-313.pyc b/app/services/__pycache__/email_verification.cpython-313.pyc new file mode 100644 index 0000000..5005ca1 Binary files /dev/null and b/app/services/__pycache__/email_verification.cpython-313.pyc differ diff --git a/app/services/attachment_service.py b/app/services/attachment_service.py new file mode 100644 index 0000000..2142152 --- /dev/null +++ b/app/services/attachment_service.py @@ -0,0 +1,154 @@ +"""Attachment service for parsing email attachments and storing in S3.""" + +import logging +import uuid +from typing import List, Optional + +import boto3 +from botocore.exceptions import ClientError +from fastapi import UploadFile +from sqlalchemy.orm import Session + +from app.models.models import Attachment + +logger = logging.getLogger(__name__) + + +def generate_s3_key(prefix: str, message_id: str, filename: str) -> str: + """Generate a unique S3 object key for an attachment. + + Uses a UUID to prevent key collisions even if the same filename + is uploaded for the same message. + + Args: + prefix: S3 key prefix (e.g., 'attachments'). + message_id: The message UUID string. + filename: Original attachment filename. + + Returns: + The S3 object key string. + """ + unique = uuid.uuid4().hex[:8] + safe_filename = filename.replace("/", "_").replace("\\", "_") + return f"{prefix}/{message_id}/{unique}_{safe_filename}" + + +def get_s3_client( + aws_access_key_id: str, + aws_secret_access_key: str, + aws_region: str, +): + """Create and return a boto3 S3 client. + + Args: + aws_access_key_id: AWS access key ID. + aws_secret_access_key: AWS secret access key. + aws_region: AWS region name. + + Returns: + A boto3 S3 client instance. + """ + return boto3.client( + "s3", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + region_name=aws_region, + ) + + +def upload_to_s3( + s3_client, + bucket: str, + key: str, + file_data: bytes, + content_type: str, +) -> bool: + """Upload file data to S3. + + Args: + s3_client: boto3 S3 client. + bucket: S3 bucket name. + key: S3 object key. + file_data: Raw file bytes. + content_type: MIME type of the file. + + Returns: + True if upload succeeded, False otherwise. + """ + try: + s3_client.put_object( + Bucket=bucket, + Key=key, + Body=file_data, + ContentType=content_type, + ) + return True + except ClientError as e: + logger.error("Failed to upload to S3: %s", e) + return False + + +async def parse_and_store_attachments( + files: List[UploadFile], + message_id: str, + db: Session, + s3_client, + bucket: str, + s3_prefix: str = "attachments", +) -> List[Attachment]: + """Parse attachments from webhook files and store in S3. + + For each file: + 1. Read the file content + 2. Upload to S3 with a unique key + 3. Create an Attachment database record + + If S3 is not configured (no bucket), attachment metadata is still + stored but without S3 references. + + Args: + files: List of UploadFile objects from the webhook. + message_id: The UUID of the parent Message. + db: SQLAlchemy database session. + s3_client: boto3 S3 client (can be None if S3 not configured). + bucket: S3 bucket name (empty string if not configured). + s3_prefix: S3 key prefix for organizing attachments. + + Returns: + List of created Attachment model instances. + """ + attachments = [] + + for file in files: + file_data = await file.read() + file_size = len(file_data) + filename = file.filename or "unnamed" + content_type = file.content_type or "application/octet-stream" + + s3_key = None + s3_bucket = None + + if s3_client and bucket: + s3_key = generate_s3_key(s3_prefix, message_id, filename) + uploaded = upload_to_s3(s3_client, bucket, s3_key, file_data, content_type) + if uploaded: + s3_bucket = bucket + else: + logger.warning( + "Failed to upload attachment %s to S3, storing metadata only", + filename, + ) + s3_key = None + + attachment = Attachment( + message_id=message_id, + filename=filename, + content_type=content_type, + size=file_size, + s3_bucket=s3_bucket, + s3_key=s3_key, + ) + db.add(attachment) + attachments.append(attachment) + + return attachments diff --git a/app/services/email_verification.py b/app/services/email_verification.py new file mode 100644 index 0000000..ac990e2 --- /dev/null +++ b/app/services/email_verification.py @@ -0,0 +1,179 @@ +"""Email verification service for SPF/DKIM validation and spam scoring.""" + +import hashlib +import hmac +import logging +from dataclasses import dataclass +from typing import Optional + +import dkim + +logger = logging.getLogger(__name__) + + +@dataclass +class VerificationResult: + """Result of email verification checks.""" + spf_pass: bool + dkim_pass: bool + spam_score: float + is_verified: bool + is_spam: bool + rejection_reason: Optional[str] = None + + +def verify_spf_from_header(spf_header: str) -> bool: + """Parse SPF result from Mailgun webhook header. + + Mailgun provides SPF check result in the 'X-Mailgun-Spf' field. + Valid passing values: 'Pass', 'pass'. + + Args: + spf_header: The SPF result string from Mailgun. + + Returns: + True if SPF passed, False otherwise. + """ + if not spf_header: + return False + return spf_header.strip().lower() == "pass" + + +def verify_dkim_from_header(dkim_header: str) -> bool: + """Parse DKIM result from Mailgun webhook header. + + Mailgun provides DKIM check result in the 'X-Mailgun-Dkim-Check-Result' field. + Valid passing values: 'Pass', 'pass'. + + Args: + dkim_header: The DKIM result string from Mailgun. + + Returns: + True if DKIM passed, False otherwise. + """ + if not dkim_header: + return False + return dkim_header.strip().lower() == "pass" + + +def verify_dkim_signature(raw_email: bytes) -> bool: + """Verify DKIM signature on a raw email message using dkimpy. + + This performs actual cryptographic DKIM verification against DNS records. + Use this when you have access to the raw MIME message. + + Args: + raw_email: The raw email message bytes. + + Returns: + True if DKIM signature is valid, False otherwise. + """ + try: + return dkim.verify(raw_email) + except dkim.DKIMException as e: + logger.warning("DKIM verification failed: %s", e) + return False + except Exception as e: + logger.error("Unexpected error during DKIM verification: %s", e) + return False + + +def parse_spam_score(spam_score_header: str) -> float: + """Parse spam score from Mailgun webhook data. + + Mailgun provides a spam score as a string. Lower is better. + A score of 0.0 means no spam indicators found. + + Args: + spam_score_header: The spam score string from Mailgun. + + Returns: + The spam score as a float, or 0.0 if parsing fails. + """ + if not spam_score_header: + return 0.0 + try: + return float(spam_score_header) + except (ValueError, TypeError): + logger.warning("Could not parse spam score: %s", spam_score_header) + return 0.0 + + +def verify_mailgun_webhook_signature( + signing_key: str, + timestamp: str, + token: str, + signature: str +) -> bool: + """Verify Mailgun webhook signature for authenticity. + + Mailgun signs each webhook POST with HMAC-SHA256 using the + Mailgun HTTP webhook signing key. This ensures the webhook + request actually came from Mailgun. + + Args: + signing_key: The Mailgun webhook signing key. + timestamp: The timestamp from the webhook payload. + token: The token from the webhook payload. + signature: The signature from the webhook payload. + + Returns: + True if the signature is valid, False otherwise. + """ + if not signing_key or not timestamp or not token or not signature: + return False + + encoded_key = signing_key.encode("utf-8") + data = f"{timestamp}{token}".encode("utf-8") + computed = hmac.new(encoded_key, data, hashlib.sha256).hexdigest() + return hmac.compare_digest(computed, signature) + + +def verify_email( + spf_header: str = "", + dkim_header: str = "", + spam_score_header: str = "", + spam_threshold: float = 5.0, + raw_email: Optional[bytes] = None, +) -> VerificationResult: + """Perform comprehensive email verification. + + Checks SPF and DKIM results from Mailgun headers, optionally + verifies DKIM signature cryptographically if raw email is available, + and evaluates spam score against the configured threshold. + + Args: + spf_header: SPF result from Mailgun (e.g., 'Pass', 'Fail'). + dkim_header: DKIM result from Mailgun (e.g., 'Pass', 'Fail'). + spam_score_header: Spam score string from Mailgun. + spam_threshold: Maximum acceptable spam score. + raw_email: Optional raw MIME message for cryptographic DKIM check. + + Returns: + VerificationResult with all check results and overall status. + """ + spf_pass = verify_spf_from_header(spf_header) + dkim_pass = verify_dkim_from_header(dkim_header) + + # If raw email is available, also verify DKIM cryptographically + if raw_email and not dkim_pass: + dkim_pass = verify_dkim_signature(raw_email) + + spam_score = parse_spam_score(spam_score_header) + is_spam = spam_score > spam_threshold + is_verified = spf_pass and dkim_pass and not is_spam + + rejection_reason = None + if is_spam: + rejection_reason = f"Spam score {spam_score} exceeds threshold {spam_threshold}" + elif not spf_pass and not dkim_pass: + rejection_reason = "Both SPF and DKIM verification failed" + + return VerificationResult( + spf_pass=spf_pass, + dkim_pass=dkim_pass, + spam_score=spam_score, + is_verified=is_verified, + is_spam=is_spam, + rejection_reason=rejection_reason, + ) diff --git a/requirements.txt b/requirements.txt index 7633720..e89e7d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ boto3==1.34.0 python-multipart==0.0.6 pytest==7.4.4 httpx==0.26.0 +dkimpy==1.1.7 +dnspython==2.6.1 diff --git a/test.db b/test.db new file mode 100644 index 0000000..641e158 Binary files /dev/null and b/test.db differ diff --git a/tests/__pycache__/test_api.cpython-313-pytest-7.4.4.pyc b/tests/__pycache__/test_api.cpython-313-pytest-7.4.4.pyc new file mode 100644 index 0000000..651f39a Binary files /dev/null and b/tests/__pycache__/test_api.cpython-313-pytest-7.4.4.pyc differ diff --git a/tests/test_api.py b/tests/test_api.py index 08f4976..378c957 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,4 +1,10 @@ +import hashlib +import hmac +import io +from unittest.mock import MagicMock, patch + import pytest +from botocore.exceptions import ClientError from fastapi.testclient import TestClient from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -51,7 +57,7 @@ def test_get_my_inbox(setup_db): # Create inbox create_resp = client.post("/v1/inboxes") api_key = create_resp.json()["api_key"] - + # Get inbox with API key response = client.get( "/v1/inboxes/me", @@ -75,7 +81,7 @@ def test_list_messages(setup_db): create_resp = client.post("/v1/inboxes") api_key = create_resp.json()["api_key"] email = create_resp.json()["email_address"] - + # Simulate incoming message via webhook client.post( "/v1/webhooks/mailgun", @@ -83,11 +89,11 @@ def test_list_messages(setup_db): "sender": "test@example.com", "recipient": email, "subject": "Test Subject", - "body_plain": "Test body", - "message_id": "test123" + "body-plain": "Test body", + "Message-Id": "test123", } ) - + # List messages response = client.get( "/v1/inboxes/me/messages", @@ -97,3 +103,704 @@ def test_list_messages(setup_db): data = response.json() assert data["total"] == 1 assert data["messages"][0]["subject"] == "Test Subject" + + +# ── Email Verification Tests ── + + +def test_webhook_spf_dkim_pass(setup_db): + """Test that SPF/DKIM pass results are stored on the message.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "verified@example.com", + "recipient": email, + "subject": "Verified Email", + "body-plain": "This is verified", + "Message-Id": "verified-001", + "X-Mailgun-Spf": "Pass", + "X-Mailgun-Dkim-Check-Result": "Pass", + "X-Mailgun-SSscore": "0.5", + } + ) + assert response.status_code == 200 + data = response.json() + assert data["status"] == "received" + assert data["verified"] is True + assert data["spam_score"] == 0.5 + + # Verify stored message includes verification data + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["spf_pass"] is True + assert msg["dkim_pass"] is True + assert msg["spam_score"] == 0.5 + assert msg["is_verified"] is True + + +def test_webhook_spf_fail(setup_db): + """Test that SPF failure is recorded correctly.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "spoofed@example.com", + "recipient": email, + "subject": "SPF Fail", + "body-plain": "SPF failed", + "X-Mailgun-Spf": "Fail", + "X-Mailgun-Dkim-Check-Result": "Pass", + "X-Mailgun-SSscore": "2.0", + } + ) + assert response.status_code == 200 + data = response.json() + assert data["verified"] is False # SPF failed, so not fully verified + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["spf_pass"] is False + assert msg["dkim_pass"] is True + assert msg["is_verified"] is False + + +def test_webhook_dkim_fail(setup_db): + """Test that DKIM failure is recorded correctly.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "DKIM Fail", + "body-plain": "DKIM failed", + "X-Mailgun-Spf": "Pass", + "X-Mailgun-Dkim-Check-Result": "Fail", + "X-Mailgun-SSscore": "1.0", + } + ) + assert response.status_code == 200 + data = response.json() + assert data["verified"] is False + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["spf_pass"] is True + assert msg["dkim_pass"] is False + assert msg["is_verified"] is False + + +def test_webhook_both_spf_dkim_fail(setup_db): + """Test message stored when both SPF and DKIM fail (not spam though).""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "bad@example.com", + "recipient": email, + "subject": "Both Fail", + "body-plain": "Both failed", + "X-Mailgun-Spf": "Fail", + "X-Mailgun-Dkim-Check-Result": "Fail", + "X-Mailgun-SSscore": "3.0", + } + ) + assert response.status_code == 200 + data = response.json() + assert data["verified"] is False + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["spf_pass"] is False + assert msg["dkim_pass"] is False + assert msg["is_verified"] is False + + +def test_webhook_no_verification_headers(setup_db): + """Test webhook without verification headers (backward compatible).""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "No Headers", + "body-plain": "No verification headers", + } + ) + assert response.status_code == 200 + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["spf_pass"] is False + assert msg["dkim_pass"] is False + assert msg["spam_score"] == 0.0 + assert msg["is_verified"] is False + + +# ── Spam Filtering Tests ── + + +def test_webhook_spam_rejected(setup_db): + """Test that emails with spam score > 5 are rejected.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "spammer@example.com", + "recipient": email, + "subject": "Buy Now!!!", + "body-plain": "SPAM SPAM SPAM", + "X-Mailgun-SSscore": "7.5", + } + ) + assert response.status_code == 400 + assert "rejected" in response.json()["detail"].lower() + + +def test_webhook_spam_score_exactly_5_not_rejected(setup_db): + """Test that email with spam score exactly 5.0 is NOT rejected (> 5 threshold).""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "borderline@example.com", + "recipient": email, + "subject": "Borderline", + "body-plain": "Borderline spam", + "X-Mailgun-SSscore": "5.0", + } + ) + assert response.status_code == 200 + data = response.json() + assert data["status"] == "received" + + +def test_webhook_spam_score_just_above_threshold(setup_db): + """Test that email with spam score 5.1 IS rejected.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "spammer@example.com", + "recipient": email, + "subject": "Spam", + "body-plain": "Spam content", + "X-Mailgun-SSscore": "5.1", + } + ) + assert response.status_code == 400 + + +def test_webhook_spam_invalid_score_defaults_zero(setup_db): + """Test that invalid spam score defaults to 0.0 (not rejected).""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "Bad Score", + "body-plain": "Invalid score", + "X-Mailgun-SSscore": "not-a-number", + } + ) + assert response.status_code == 200 + assert response.json()["spam_score"] == 0.0 + + +def test_webhook_spam_does_not_store_message(setup_db): + """Test that rejected spam emails are NOT stored in the database.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + # Send a spam email + client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "spammer@example.com", + "recipient": email, + "subject": "SPAM", + "body-plain": "This is spam", + "X-Mailgun-SSscore": "10.0", + } + ) + + # Verify no messages stored + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + assert messages_resp.json()["total"] == 0 + + +# ── Attachment Tests ── + + +def test_webhook_with_attachment_metadata(setup_db): + """Test that attachment metadata is stored and returned in message response.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + # Create a test file attachment + file_content = b"Hello, this is a test file attachment." + files = { + "attachment-1": ("test.txt", io.BytesIO(file_content), "text/plain"), + } + data = { + "sender": "test@example.com", + "recipient": email, + "subject": "With Attachment", + "body-plain": "See attached", + "attachment-count": "1", + "X-Mailgun-Spf": "Pass", + "X-Mailgun-Dkim-Check-Result": "Pass", + "X-Mailgun-SSscore": "0.1", + } + + response = client.post("/v1/webhooks/mailgun", data=data, files=files) + assert response.status_code == 200 + + # Verify attachment metadata in messages endpoint + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert len(msg["attachments"]) == 1 + att = msg["attachments"][0] + assert att["filename"] == "test.txt" + assert att["content_type"] == "text/plain" + assert att["size"] == len(file_content) + assert "id" in att + assert "uploaded_at" in att + + +def test_webhook_with_multiple_attachments(setup_db): + """Test handling multiple attachments on a single email.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + file1_content = b"File one content" + file2_content = b"File two content with more data" + files = { + "attachment-1": ("document.pdf", io.BytesIO(file1_content), "application/pdf"), + "attachment-2": ("image.png", io.BytesIO(file2_content), "image/png"), + } + data = { + "sender": "test@example.com", + "recipient": email, + "subject": "Multiple Attachments", + "body-plain": "Two files attached", + "attachment-count": "2", + } + + response = client.post("/v1/webhooks/mailgun", data=data, files=files) + assert response.status_code == 200 + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert len(msg["attachments"]) == 2 + + filenames = {att["filename"] for att in msg["attachments"]} + assert "document.pdf" in filenames + assert "image.png" in filenames + + sizes = {att["filename"]: att["size"] for att in msg["attachments"]} + assert sizes["document.pdf"] == len(file1_content) + assert sizes["image.png"] == len(file2_content) + + +def test_webhook_no_attachments_empty_list(setup_db): + """Test that messages without attachments return empty attachments list.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "No Attachments", + "body-plain": "Plain email", + } + ) + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["attachments"] == [] + + +def test_webhook_attachment_with_s3_upload(setup_db): + """Test that attachments are uploaded to S3 when configured.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + api_key = create_resp.json()["api_key"] + + file_content = b"S3 test file content" + files = { + "attachment-1": ("s3test.txt", io.BytesIO(file_content), "text/plain"), + } + data = { + "sender": "test@example.com", + "recipient": email, + "subject": "S3 Upload Test", + "body-plain": "S3 test", + "attachment-count": "1", + } + + mock_s3 = MagicMock() + + with patch("app.services.attachment_service.upload_to_s3", return_value=True) as mock_upload, \ + patch("app.main.get_s3_client", return_value=mock_s3), \ + patch.object( + type(app.state), "__getattr__", create=True + ): + # Temporarily patch settings to have S3 config + import app.main as main_module + orig_bucket = main_module.settings.s3_bucket + orig_key = main_module.settings.aws_access_key_id + main_module.settings.s3_bucket = "test-bucket" + main_module.settings.aws_access_key_id = "test-key" + + try: + response = client.post("/v1/webhooks/mailgun", data=data, files=files) + assert response.status_code == 200 + + # Verify upload_to_s3 was called + mock_upload.assert_called_once() + # upload_to_s3(s3_client, bucket, key, file_data, content_type) + call_args = mock_upload.call_args[0] + assert call_args[0] is mock_s3 # s3_client + assert call_args[1] == "test-bucket" # bucket + assert "s3test.txt" in call_args[2] # s3_key contains filename + assert call_args[3] == file_content # file_data + assert call_args[4] == "text/plain" # content_type + finally: + main_module.settings.s3_bucket = orig_bucket + main_module.settings.aws_access_key_id = orig_key + + +# ── Webhook Signature Verification Tests ── + + +def test_webhook_valid_signature(setup_db): + """Test that valid Mailgun webhook signature is accepted.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + signing_key = "test-signing-key-12345" + timestamp = "1234567890" + token = "test-token-abc" + signature = hmac.new( + signing_key.encode("utf-8"), + f"{timestamp}{token}".encode("utf-8"), + hashlib.sha256, + ).hexdigest() + + with patch("app.main.settings") as mock_settings: + mock_settings.mailgun_signing_key = signing_key + mock_settings.spam_score_threshold = 5.0 + mock_settings.s3_bucket = "" + mock_settings.aws_access_key_id = "" + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "Signed", + "body-plain": "Signed email", + "timestamp": timestamp, + "token": token, + "signature": signature, + } + ) + # Should accept (200 or dropped since inbox might not match the mock) + assert response.status_code in (200, 403) or response.json().get("status") in ("received", "dropped") + + +def test_webhook_invalid_signature(setup_db): + """Test that invalid Mailgun webhook signature is rejected.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + with patch("app.main.settings") as mock_settings: + mock_settings.mailgun_signing_key = "real-signing-key" + mock_settings.spam_score_threshold = 5.0 + + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "Forged", + "body-plain": "Forged webhook", + "timestamp": "1234567890", + "token": "token", + "signature": "invalid-signature", + } + ) + assert response.status_code == 403 + assert "signature" in response.json()["detail"].lower() + + +def test_webhook_no_signing_key_skips_verification(setup_db): + """Test that webhook signature verification is skipped when no key configured.""" + create_resp = client.post("/v1/inboxes") + email = create_resp.json()["email_address"] + + # No signature fields, no signing key configured - should work fine + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "No Signing", + "body-plain": "No signature check", + } + ) + assert response.status_code == 200 + + +# ── Webhook Backward Compatibility Tests ── + + +def test_webhook_body_plain_field_name(setup_db): + """Test that both 'body-plain' and 'body_plain' field names work.""" + create_resp = client.post("/v1/inboxes") + api_key = create_resp.json()["api_key"] + email = create_resp.json()["email_address"] + + # Test with body-plain (Mailgun's actual format) + client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": email, + "subject": "Hyphenated", + "body-plain": "Body with hyphen", + } + ) + + messages_resp = client.get( + "/v1/inboxes/me/messages", + headers={"Authorization": f"Bearer {api_key}"} + ) + msg = messages_resp.json()["messages"][0] + assert msg["body_text"] == "Body with hyphen" + + +def test_webhook_dropped_for_inactive_inbox(setup_db): + """Test that emails for non-existent inboxes are silently dropped.""" + response = client.post( + "/v1/webhooks/mailgun", + data={ + "sender": "test@example.com", + "recipient": "nonexistent@agents.dev", + "subject": "Dropped", + "body-plain": "This should be dropped", + } + ) + assert response.status_code == 200 + assert response.json()["status"] == "dropped" + + +# ── Email Verification Service Unit Tests ── + + +def test_verify_email_all_pass(): + """Test verify_email with all checks passing.""" + from app.services.email_verification import verify_email + + result = verify_email( + spf_header="Pass", + dkim_header="Pass", + spam_score_header="0.5", + spam_threshold=5.0, + ) + assert result.spf_pass is True + assert result.dkim_pass is True + assert result.spam_score == 0.5 + assert result.is_verified is True + assert result.is_spam is False + assert result.rejection_reason is None + + +def test_verify_email_spam_detected(): + """Test verify_email with spam score above threshold.""" + from app.services.email_verification import verify_email + + result = verify_email( + spf_header="Pass", + dkim_header="Pass", + spam_score_header="8.5", + spam_threshold=5.0, + ) + assert result.is_spam is True + assert result.is_verified is False + assert "exceeds threshold" in result.rejection_reason + + +def test_verify_email_empty_headers(): + """Test verify_email with empty headers.""" + from app.services.email_verification import verify_email + + result = verify_email() + assert result.spf_pass is False + assert result.dkim_pass is False + assert result.spam_score == 0.0 + assert result.is_verified is False + assert result.is_spam is False + + +def test_verify_email_case_insensitive(): + """Test that SPF/DKIM header parsing is case insensitive.""" + from app.services.email_verification import verify_email + + result = verify_email( + spf_header="PASS", + dkim_header="pass", + ) + assert result.spf_pass is True + assert result.dkim_pass is True + + +def test_parse_spam_score_edge_cases(): + """Test spam score parsing with various edge cases.""" + from app.services.email_verification import parse_spam_score + + assert parse_spam_score("") == 0.0 + assert parse_spam_score(None) == 0.0 + assert parse_spam_score("abc") == 0.0 + assert parse_spam_score("3.14") == 3.14 + assert parse_spam_score("-1.5") == -1.5 + assert parse_spam_score("0") == 0.0 + assert parse_spam_score("10.0") == 10.0 + + +def test_verify_mailgun_webhook_signature(): + """Test Mailgun webhook signature verification.""" + from app.services.email_verification import verify_mailgun_webhook_signature + + key = "my-signing-key" + timestamp = "1234567890" + token = "unique-token" + sig = hmac.new( + key.encode("utf-8"), + f"{timestamp}{token}".encode("utf-8"), + hashlib.sha256, + ).hexdigest() + + assert verify_mailgun_webhook_signature(key, timestamp, token, sig) is True + assert verify_mailgun_webhook_signature(key, timestamp, token, "bad") is False + assert verify_mailgun_webhook_signature("", timestamp, token, sig) is False + assert verify_mailgun_webhook_signature(key, "", token, sig) is False + assert verify_mailgun_webhook_signature(key, timestamp, "", sig) is False + + +def test_verify_dkim_signature_with_dkimpy(): + """Test DKIM signature verification via dkimpy (mocked DNS).""" + from app.services.email_verification import verify_dkim_signature + + # Test with invalid raw email (should return False gracefully) + result = verify_dkim_signature(b"not a valid email") + assert result is False + + +# ── Attachment Service Unit Tests ── + + +def test_generate_s3_key(): + """Test S3 key generation produces unique, safe keys.""" + from app.services.attachment_service import generate_s3_key + + key1 = generate_s3_key("attachments", "msg-123", "file.txt") + key2 = generate_s3_key("attachments", "msg-123", "file.txt") + + # Keys should be unique even for same inputs + assert key1 != key2 + assert key1.startswith("attachments/msg-123/") + assert key1.endswith("_file.txt") + + # Test path traversal prevention + key3 = generate_s3_key("attachments", "msg-456", "../../etc/passwd") + assert "/" not in key3.split("/", 2)[-1].rsplit("_", 1)[0][:-1] # no traversal in UUID + assert ".._.._.._etc_passwd" in key3 or "etc_passwd" in key3 + + +def test_upload_to_s3_success(): + """Test successful S3 upload.""" + from app.services.attachment_service import upload_to_s3 + + mock_s3 = MagicMock() + result = upload_to_s3(mock_s3, "bucket", "key", b"data", "text/plain") + assert result is True + mock_s3.put_object.assert_called_once_with( + Bucket="bucket", + Key="key", + Body=b"data", + ContentType="text/plain", + ) + + +def test_upload_to_s3_failure(): + """Test S3 upload failure handling.""" + from app.services.attachment_service import upload_to_s3 + + mock_s3 = MagicMock() + mock_s3.put_object.side_effect = ClientError( + {"Error": {"Code": "500", "Message": "Internal Server Error"}}, + "PutObject", + ) + result = upload_to_s3(mock_s3, "bucket", "key", b"data", "text/plain") + assert result is False