Skip to content

Commit b007228

Browse files
jgreer013claude
andcommitted
fix(litellm): store bookkeeping span off-band, not in forwarded metadata
With LiteLLMIntegration enabled, any call passing caller `metadata` crashed during request serialization. `_input_callback` stored the live Span in the caller's `metadata` dict, and some providers (e.g. Anthropic's /v1/messages passthrough) forward that dict into the outbound request body, so `json.dumps(request_body)` raised `TypeError: Object of type Span is not JSON serializable` before the request was sent. The span (holding the verbatim prompt under send_default_pii) could also leak to the provider. Stash the span on a top-level key of the per-request kwargs dict (litellm's `model_call_details`) that litellm threads through the input/success/failure callbacks, instead of in the forwarded `metadata` sub-dict. This ties the span's lifetime to the request with no module-level tracking, mirroring how the clickhouse/dramatiq integrations stash a span on their per-request object. The Anthropic request body is built only from recognized request params, not from `model_call_details`, so the span is never serialized onto the wire (verified end-to-end against the passthrough). Fixes #6596 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 3d2deca commit b007228

2 files changed

Lines changed: 78 additions & 15 deletions

File tree

sentry_sdk/integrations/litellm.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,24 @@
3131
raise DidNotEnable("LiteLLM not installed")
3232

3333

34-
def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
35-
"""Get the metadata dictionary from the kwargs."""
36-
litellm_params = kwargs.setdefault("litellm_params", {})
34+
# Stash the span on a top-level key of the per-request kwargs dict litellm passes
35+
# to every callback, so it lives and dies with the request. Not in
36+
# kwargs["litellm_params"]["metadata"]: litellm forwards that caller dict to some
37+
# providers (e.g. Anthropic's /v1/messages passthrough), which would break
38+
# request serialization and leak the span to the provider.
39+
_SPAN_KEY = "_sentry_span"
3740

38-
# we need this weird little dance, as metadata might be set but may be None initially
39-
metadata = litellm_params.get("metadata")
40-
if metadata is None:
41-
metadata = {}
42-
litellm_params["metadata"] = metadata
43-
return metadata
41+
42+
def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
43+
kwargs[_SPAN_KEY] = span
44+
45+
46+
def _peek_span(kwargs: "Dict[str, Any]") -> "Any":
47+
return kwargs.get(_SPAN_KEY)
48+
49+
50+
def _pop_span(kwargs: "Dict[str, Any]") -> "Any":
51+
return kwargs.pop(_SPAN_KEY, None)
4452

4553

4654
def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
@@ -117,8 +125,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
117125
)
118126
span.__enter__()
119127

120-
# Store span for later
121-
_get_metadata_dict(kwargs)["_sentry_span"] = span
128+
_store_span(kwargs, span)
122129

123130
# Set basic data
124131
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
@@ -198,8 +205,7 @@ def _success_callback(
198205
) -> None:
199206
"""Handle successful completion."""
200207

201-
metadata = _get_metadata_dict(kwargs)
202-
span = metadata.get("_sentry_span")
208+
span = _peek_span(kwargs)
203209
if span is None:
204210
return
205211

@@ -259,7 +265,7 @@ def _success_callback(
259265
or "complete_streaming_response" in kwargs
260266
or "async_complete_streaming_response" in kwargs
261267
):
262-
span = metadata.pop("_sentry_span", None)
268+
span = _pop_span(kwargs)
263269
if span is not None:
264270
span.__exit__(None, None, None)
265271

@@ -285,7 +291,7 @@ def _failure_callback(
285291
end_time: "datetime",
286292
) -> None:
287293
"""Handle request failure."""
288-
span = _get_metadata_dict(kwargs).get("_sentry_span")
294+
span = _pop_span(kwargs)
289295
if span is None:
290296
return
291297

tests/integrations/litellm/test_litellm.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime
66
from unittest import mock
77

8+
import httpx
89
import pytest
910

1011
import sentry_sdk
@@ -2532,6 +2533,62 @@ def test_integration_setup(sentry_init):
25322533
assert _failure_callback in (litellm.failure_callback or [])
25332534

25342535

2536+
@pytest.mark.asyncio(loop_scope="session")
2537+
async def test_anthropic_passthrough_request_stays_serializable(
2538+
reset_litellm_executor, sentry_init
2539+
):
2540+
"""Regression test for GH-6596: litellm's Anthropic ``/v1/messages``
2541+
passthrough forwards the caller's ``metadata`` into the request body, so the
2542+
integration must not make that body unserializable. Drive the real
2543+
passthrough with a mocked transport and assert the request body serializes.
2544+
"""
2545+
sentry_init(
2546+
integrations=[LiteLLMIntegration()],
2547+
disabled_integrations=[StdlibIntegration],
2548+
traces_sample_rate=1.0,
2549+
send_default_pii=True,
2550+
)
2551+
2552+
captured = {}
2553+
anthropic_response = {
2554+
"id": "msg_1",
2555+
"type": "message",
2556+
"role": "assistant",
2557+
"content": [{"type": "text", "text": "Hi there"}],
2558+
"model": "claude-3-5-sonnet-latest",
2559+
"stop_reason": "end_turn",
2560+
"stop_sequence": None,
2561+
"usage": {"input_tokens": 1, "output_tokens": 1},
2562+
}
2563+
2564+
client = AsyncHTTPHandler()
2565+
2566+
def capture_post(*args, **kwargs):
2567+
captured["data"] = kwargs.get("data")
2568+
return httpx.Response(
2569+
200,
2570+
json=anthropic_response,
2571+
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
2572+
)
2573+
2574+
with mock.patch.object(client, "post", side_effect=capture_post), start_transaction(
2575+
name="litellm test"
2576+
):
2577+
await litellm.anthropic.messages.acreate(
2578+
model="anthropic/claude-3-5-sonnet-latest",
2579+
messages=[{"role": "user", "content": "Hello!"}],
2580+
max_tokens=16,
2581+
metadata={"user_id": "my-org"},
2582+
api_key="test-key",
2583+
client=client,
2584+
)
2585+
2586+
# The bug crashed in json.dumps before send; reaching the transport with a
2587+
# serializable body proves it is fixed.
2588+
assert "data" in captured
2589+
json.dumps(json.loads(captured["data"]))
2590+
2591+
25352592
def test_litellm_message_truncation(sentry_init, capture_events):
25362593
"""Test that large messages are truncated properly in LiteLLM integration."""
25372594
sentry_init(

0 commit comments

Comments
 (0)