Skip to content

Commit 2f01be8

Browse files
ericapisaniclaude
andcommitted
fix(pydantic-ai): Fix DATA_URL_BASE64_REGEX to match complex MIME types
The regex used to detect and redact base64 data URLs only allowed alphabetic characters in MIME types, causing it to fail for types like `image/svg+xml`, `application/vnd.ms-excel`, or `font/woff2`. When the match failed, the full raw data URL (including base64 content) was passed through to Sentry instead of being redacted with BLOB_DATA_SUBSTITUTE, resulting in unintended data leakage. Expand the MIME type character class to include digits, `.`, `+`, and `-` to match all common MIME types per RFC 2045. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 9c24bd9 commit 2f01be8

2 files changed

Lines changed: 32 additions & 1 deletion

File tree

sentry_sdk/integrations/pydantic_ai/consts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..."
66
# Group 1: MIME type (e.g. "image/png"), Group 2: base64 data
77
DATA_URL_BASE64_REGEX = re.compile(
8-
r"^data:([a-zA-Z]+/[a-zA-Z]+);base64,([A-Za-z0-9+/\-_]+={0,2})$"
8+
r"^data:([a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*);base64,([A-Za-z0-9+/\-_]+={0,2})$"
99
)

tests/integrations/pydantic_ai/test_pydantic_ai.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2956,6 +2956,37 @@ def test_image_url_http_url_with_base64_data_in_query_param_is_not_redacted_no_m
29562956
assert found_image, "Image content item should be found in messages data"
29572957

29582958

2959+
def test_image_url_redacts_base64_data_url_with_complex_mime_type(
2960+
sentry_init, capture_events
2961+
):
2962+
"""Test that ImageUrl with a data URL using a complex MIME type (e.g. image/svg+xml) is redacted."""
2963+
sentry_init(
2964+
integrations=[PydanticAIIntegration()],
2965+
traces_sample_rate=1.0,
2966+
send_default_pii=True,
2967+
)
2968+
2969+
events = capture_events()
2970+
2971+
with sentry_sdk.start_transaction(op="test", name="test"):
2972+
span = sentry_sdk.start_span(op="test_span")
2973+
image_url = ImageUrl(
2974+
url="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciLz4="
2975+
)
2976+
user_part = UserPromptPart(content=["Look at this image:", image_url])
2977+
mock_msg = MagicMock()
2978+
mock_msg.parts = [user_part]
2979+
mock_msg.instructions = None
2980+
2981+
_set_input_messages(span, [mock_msg])
2982+
span.finish()
2983+
2984+
(event,) = events
2985+
span_data = event["spans"][0]["data"]
2986+
messages_data = _get_messages_from_span(span_data)
2987+
assert _find_image_content(messages_data, "image/svg+xml")
2988+
2989+
29592990
@pytest.mark.asyncio
29602991
async def test_invoke_agent_image_url_http_url_no_redaction(
29612992
sentry_init, capture_events

0 commit comments

Comments
 (0)