Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
# pylint: disable=too-many-lines

import json
from urllib.parse import quote

from bs4 import BeautifulSoup
from django.conf import settings
from django.db import transaction
from django.db.models import Count, Exists, OuterRef, Q
from django.utils.translation import gettext_lazy as _
Expand Down Expand Up @@ -661,7 +664,55 @@ def get_textBody(self, instance): # pylint: disable=invalid-name
@extend_schema_field(serializers.ListField(child=serializers.DictField()))
def get_htmlBody(self, instance): # pylint: disable=invalid-name
"""Return the list of HTML body parts (JMAP style)."""
return instance.get_parsed_field("htmlBody") or []
html_body_parts = instance.get_parsed_field("htmlBody") or []

request = self.context.get("request")
if not request or not hasattr(request, "user") or not request.user.is_authenticated:
return html_body_parts

mailbox = instance.thread.accesses.filter(
mailbox__accesses__user=request.user
).first()

if not mailbox:
return html_body_parts

if settings.PROXY_EXTERNAL_IMAGES:
html_body_parts = self._proxy_images_in_html(html_body_parts, instance, mailbox.mailbox)

return html_body_parts

def _proxy_images_in_html(self, html_body_parts, instance, mailbox):
"""Rewrite external image URLs and CID references to use proxy."""
attachments = instance.get_parsed_field("attachments") or []
cid_map = {att.get("cid"): idx for idx, att in enumerate(attachments) if att.get("cid")}

proxified_parts = []
for part in html_body_parts:
html_content = part.get("content", "")
if not html_content:
proxified_parts.append(part)
continue

soup = BeautifulSoup(html_content, "html.parser")

for img in soup.find_all("img"):
src = img.get("src")
if not src:
continue

if src.startswith("cid:"):
cid = src[4:]
if cid in cid_map:
img["src"] = f"/api/{settings.API_VERSION}/blob/msg_{instance.id}_{cid_map[cid]}/download/"

elif src.startswith(("http://", "https://")):
img["src"] = f"/api/{settings.API_VERSION}/mailboxes/{mailbox.id}/image-proxy/?url={quote(src)}"

part["content"] = str(soup)
proxified_parts.append(part)

return proxified_parts

@extend_schema_field(serializers.CharField(allow_null=True))
def get_draftBody(self, instance): # pylint: disable=invalid-name
Expand Down
4 changes: 4 additions & 0 deletions src/backend/core/api/viewsets/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def download(self, request, pk=None):
f'attachment; filename="{attachment["name"]}"'
)
response["Content-Length"] = attachment["size"]
# Enable browser caching for 30 days (inline images benefit from this)
response["Cache-Control"] = "private, max-age=2592000"

else:
# Get the blob
Expand All @@ -218,6 +220,8 @@ def download(self, request, pk=None):
# Add appropriate headers for download
response["Content-Disposition"] = f'attachment; filename="{filename}"'
response["Content-Length"] = blob.size
# Enable browser caching for 30 days (inline images benefit from this)
response["Cache-Control"] = "private, max-age=2592000"

return response

Expand Down
135 changes: 135 additions & 0 deletions src/backend/core/api/viewsets/image_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""API ViewSet for proxying external images."""

import logging
from urllib.parse import unquote

import requests
from django.conf import settings
from django.http import HttpResponse
from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema
from rest_framework import status
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.viewsets import ViewSet

from core import models
from core.api import permissions

logger = logging.getLogger(__name__)


class ImageProxyViewSet(ViewSet):
"""
ViewSet for proxying external images to protect user privacy.
Images are fetched on-demand from external sources and served through
the application. This prevents tracking pixels from leaking user IP
addresses and browsing behavior to external servers.
"""

permission_classes = [permissions.IsAuthenticated]

@extend_schema(
description="""Proxy an external image through the server.
This endpoint fetches images from external sources and serves them
through the application to protect user privacy. Requires the
PROXY_EXTERNAL_IMAGES environment variable to be set to true.
""",
parameters=[
OpenApiParameter(
name="mailbox_id",
type=str,
location=OpenApiParameter.PATH,
description="ID of the mailbox",
required=True,
),
OpenApiParameter(
name="url",
type=str,
location=OpenApiParameter.QUERY,
description="The external image URL to proxy",
required=True,
),
],
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
Comment on lines +57 to +63
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add 404 response to OpenAPI schema.

The implementation returns 404 when the mailbox is not found (lines 70-72), but this response code is not documented in the OpenAPI schema.

Apply this diff:

         responses={
+            404: OpenApiResponse(description="Mailbox not found"),
         },
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
404: OpenApiResponse(description="Mailbox not found"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
🤖 Prompt for AI Agents
In src/backend/core/api/viewsets/image_proxy.py around lines 57 to 63, the
OpenAPI responses mapping is missing the 404 entry even though the handler
returns 404 when a mailbox is not found (lines 70-72); add a 404:
OpenApiResponse(description="Not found" or "Mailbox not found") entry to the
responses dict so the OpenAPI schema documents the 404 case.

)
def list(self, request, mailbox_id=None):
"""Proxy an external image through the server."""
try:
mailbox = models.Mailbox.objects.get(pk=mailbox_id)
except models.Mailbox.DoesNotExist:
return Response(
{"error": "Mailbox not found"}, status=status.HTTP_404_NOT_FOUND
)

if not mailbox.accesses.filter(user=request.user).exists():
return Response(
{"error": "Forbidden"}, status=status.HTTP_403_FORBIDDEN
)

if not settings.PROXY_EXTERNAL_IMAGES:
return Response(
{"error": "Image proxy not enabled"},
status=status.HTTP_403_FORBIDDEN,
)

url = request.query_params.get("url")
if not url:
return Response(
{"error": "Missing url parameter"}, status=status.HTTP_400_BAD_REQUEST
)

url = unquote(url)

max_size = settings.PROXY_MAX_IMAGE_SIZE_MB * 1024 * 1024

try:
response = requests.get(
url,
timeout=10,
stream=True,
headers={"User-Agent": "Messages-ImageProxy/1.0"},
)
response.raise_for_status()

content_type = response.headers.get("content-type", "")
if not content_type.startswith("image/"):
return Response(
{"error": "Not an image"}, status=status.HTTP_400_BAD_REQUEST
)

content_length = int(response.headers.get("content-length", 0))
if content_length > max_size:
return Response(
{"error": "Image too large"},
status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
)

image_content = response.content
if len(image_content) > max_size:
return Response(
{"error": "Image too large"},
status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
)

return HttpResponse(
image_content,
content_type=content_type,
headers={
"Cache-Control": "public, max-age=2592000",
"X-Proxied-From": url,
},
)

except requests.RequestException as e:
logger.warning("Failed to fetch external image from %s: %s", url, e)
return Response(
{"error": "Failed to fetch image"}, status=status.HTTP_502_BAD_GATEWAY
)
13 changes: 13 additions & 0 deletions src/backend/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from core.api.viewsets.contacts import ContactViewSet
from core.api.viewsets.draft import DraftMessageView
from core.api.viewsets.flag import ChangeFlagView
from core.api.viewsets.image_proxy import ImageProxyViewSet
from core.api.viewsets.import_message import ImportViewSet, MessagesArchiveUploadViewSet
from core.api.viewsets.inbound.mta import InboundMTAViewSet
from core.api.viewsets.inbound.widget import InboundWidgetViewSet
Expand Down Expand Up @@ -66,6 +67,12 @@
r"accesses", MailboxAccessViewSet, basename="mailboxaccess"
)

# Router for /mailboxes/{mailbox_id}/image-proxy/
mailbox_image_proxy_nested_router = DefaultRouter()
mailbox_image_proxy_nested_router.register(
r"image-proxy", ImageProxyViewSet, basename="image-proxy"
)

# Router for /maildomains/{maildomain_pk}/**/
maildomain_nested_router = DefaultRouter()
# Register /maildomains/{maildomain_pk}/mailboxes/
Expand Down Expand Up @@ -129,6 +136,12 @@
mailbox_access_nested_router.urls
), # Includes /mailboxes/{id}/accesses/
),
path(
"mailboxes/<uuid:mailbox_id>/",
include(
mailbox_image_proxy_nested_router.urls
), # Includes /mailboxes/{id}/image-proxy/
),
path(
"mailboxes/<uuid:mailbox_id>/",
include(
Expand Down
8 changes: 8 additions & 0 deletions src/backend/messages/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ class Base(Configuration):
None, environ_name="OPENSEARCH_CA_CERTS", environ_prefix=None
)

# Image Proxy
PROXY_EXTERNAL_IMAGES = values.BooleanValue(
False, environ_name="PROXY_EXTERNAL_IMAGES", environ_prefix=None
)
PROXY_MAX_IMAGE_SIZE_MB = values.PositiveIntegerValue(
5, environ_name="PROXY_MAX_IMAGE_SIZE_MB", environ_prefix=None
)

# Security
ALLOWED_HOSTS = values.ListValue([])
SECRET_KEY = values.Value(None)
Expand Down
38 changes: 36 additions & 2 deletions src/backend/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ requires-python = ">=3.13,<4.0"

# Note: after changing this list you must re-run `make back-poetry-lock`
dependencies = [
"beautifulsoup4==4.12.3",
"boto3==1.40.43",
"botocore==1.40.43",
"celery[redis]==5.5.2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const MessageBody = ({ rawHtmlBody, rawTextBody = '', attachments = [], isHidden
const instance = DomPurify();
instance.addHook(
'afterSanitizeAttributes',
function (node) {
function (node: Element) {
// Allow anchor tags to be opened in the parent window if the href is an anchor
// Other links are opened in a new tab and safe rel attributes is set

Expand All @@ -70,15 +70,20 @@ const MessageBody = ({ rawHtmlBody, rawTextBody = '', attachments = [], isHidden
node.setAttribute('rel', 'noopener noreferrer');
}

// Transform CID references in img src attributes
if (node.tagName === 'IMG' && cidToBlobUrlMap.size > 0) {
const src = node.getAttribute('src');
if (src && src.startsWith('cid:')) {
const cid = src.substring(4); // Remove 'cid:' prefix
const blobUrl = cidToBlobUrlMap.get(cid);
if (blobUrl) {
node.setAttribute('src', blobUrl);
node.setAttribute('loading', 'lazy');
// Transform CID references in img src attributes and add lazy loading to all images
if (node.tagName === 'IMG') {
// Add lazy loading to all images for better performance
node.setAttribute('loading', 'lazy');

// Transform CID references if applicable
if (cidToBlobUrlMap.size > 0) {
const src = node.getAttribute('src');
if (src && src.startsWith('cid:')) {
const cid = src.substring(4); // Remove 'cid:' prefix
const blobUrl = cidToBlobUrlMap.get(cid);
if (blobUrl) {
node.setAttribute('src', blobUrl);
}
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/nginx/servers.conf.erb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ upstream backend_server {
}

server {
listen <%= ENV["PORT"] %>;

listen <%= ENV["PORT"] %> http2;
server_name _;
server_tokens off;

Expand Down
Loading