Skip to content

Commit 986f979

Browse files
jgreer013claude
andcommitted
fix(litellm): bound the off-band span registry
Address self-review: the module-level span registry was only evicted by the terminal success/failure callback, so a call abandoned before a terminal callback fires (e.g. a stream the caller stops iterating) leaked its Span entry -- holding prompt data -- for the process lifetime. The prior kwargs-scoped storage was GC'd with the request, so this was a regression. Back the registry with an OrderedDict capped at _MAX_TRACKED_SPANS and evict oldest-first in _store_span, so abandoned calls cannot grow it unbounded. A WeakValueDictionary is not an option here: Span/Transaction objects are not weakly referenceable. Add tests for the bound, terminal-callback cleanup, and the litellm_call_id-absent fallback key; correct the registry comment. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ef29c5f commit 986f979

2 files changed

Lines changed: 144 additions & 3 deletions

File tree

sentry_sdk/integrations/litellm.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import copy
22
import threading
3+
from collections import OrderedDict
34
from typing import TYPE_CHECKING
45

56
import sentry_sdk
@@ -40,8 +41,11 @@
4041
# to the provider. `litellm_call_id` is a per-request UUID that stays stable
4142
# across the input/success/failure callbacks; the identity of the (shared)
4243
# callback kwargs dict is the fallback for direct callback invocations that omit
43-
# it. Entries are removed by the terminal success/failure callback.
44-
_spans_by_call = {} # type: Dict[Any, Any]
44+
# it. The terminal success/failure callback removes the entry; the registry is
45+
# capped (oldest evicted first) so calls abandoned before a terminal callback
46+
# fires -- e.g. a stream the caller stops iterating -- cannot grow it unbounded.
47+
_MAX_TRACKED_SPANS = 1000
48+
_spans_by_call = OrderedDict() # type: OrderedDict[Any, Any]
4549
_spans_by_call_lock = threading.Lock()
4650

4751

@@ -50,8 +54,18 @@ def _span_key(kwargs: "Dict[str, Any]") -> "Any":
5054

5155

5256
def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
57+
key = _span_key(kwargs)
58+
evicted = [] # type: List[Any]
5359
with _spans_by_call_lock:
54-
_spans_by_call[_span_key(kwargs)] = span
60+
_spans_by_call[key] = span
61+
_spans_by_call.move_to_end(key)
62+
while len(_spans_by_call) > _MAX_TRACKED_SPANS:
63+
_, evicted_span = _spans_by_call.popitem(last=False)
64+
evicted.append(evicted_span)
65+
# Finish evicted spans outside the lock so an over-cap call (heavy
66+
# concurrency) still records a span instead of leaking an unfinished one.
67+
for evicted_span in evicted:
68+
evicted_span.__exit__(None, None, None)
5569

5670

5771
def _peek_span(kwargs: "Dict[str, Any]") -> "Any":

tests/integrations/litellm/test_litellm.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ async def __call__(self, *args, **kwargs):
3939
from sentry_sdk import start_transaction
4040
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
4141
from sentry_sdk.consts import OP, SPANDATA
42+
from sentry_sdk.integrations import litellm as litellm_integration
4243
from sentry_sdk.integrations.litellm import (
4344
LiteLLMIntegration,
4445
_convert_message_parts,
@@ -2587,6 +2588,132 @@ def test_caller_metadata_stays_json_serializable(
25872588
assert len(chat_spans) == 1
25882589

25892590

2591+
def test_span_registry_is_bounded():
2592+
"""The off-band span registry must stay bounded when calls are abandoned
2593+
before a terminal callback fires (e.g. an interrupted stream), so a
2594+
long-running process cannot accumulate Span objects without limit. Evicted
2595+
spans are finished, so an over-cap call still records a span instead of
2596+
leaking an unfinished one.
2597+
"""
2598+
2599+
class _FakeSpan:
2600+
def __init__(self):
2601+
self.exited = False
2602+
2603+
def __exit__(self, *exc_info):
2604+
self.exited = True
2605+
2606+
registry = litellm_integration._spans_by_call
2607+
registry.clear()
2608+
try:
2609+
count = 5000
2610+
spans = []
2611+
for i in range(count):
2612+
span = _FakeSpan()
2613+
spans.append(span)
2614+
litellm_integration._store_span(
2615+
{"litellm_call_id": "call-{}".format(i)}, span
2616+
)
2617+
2618+
# Bounded, not unbounded: the registry must not retain every entry.
2619+
assert len(registry) < count
2620+
# Most-recent entries are kept and left running...
2621+
assert (
2622+
litellm_integration._peek_span(
2623+
{"litellm_call_id": "call-{}".format(count - 1)}
2624+
)
2625+
is spans[-1]
2626+
)
2627+
assert spans[-1].exited is False
2628+
# ...while the oldest are evicted and finished (not silently dropped).
2629+
assert litellm_integration._peek_span({"litellm_call_id": "call-0"}) is None
2630+
assert spans[0].exited is True
2631+
finally:
2632+
registry.clear()
2633+
2634+
2635+
def test_span_registry_cleaned_up_after_terminal_callbacks(sentry_init):
2636+
"""Both terminal callbacks must remove the off-band registry entry, so a
2637+
completed or failed call leaves nothing behind."""
2638+
sentry_init(
2639+
integrations=[LiteLLMIntegration()],
2640+
disabled_integrations=[StdlibIntegration],
2641+
traces_sample_rate=1.0,
2642+
_experiments={"trace_lifecycle": "static"},
2643+
)
2644+
litellm_integration._spans_by_call.clear()
2645+
2646+
with start_transaction(name="litellm test"):
2647+
success_kwargs = {
2648+
"model": "gpt-3.5-turbo",
2649+
"messages": [{"role": "user", "content": "hi"}],
2650+
"litellm_call_id": "success-call",
2651+
}
2652+
_input_callback(success_kwargs)
2653+
assert "success-call" in litellm_integration._spans_by_call
2654+
_success_callback(
2655+
success_kwargs, MockCompletionResponse(), datetime.now(), datetime.now()
2656+
)
2657+
assert "success-call" not in litellm_integration._spans_by_call
2658+
2659+
failure_kwargs = {
2660+
"model": "gpt-3.5-turbo",
2661+
"messages": [{"role": "user", "content": "hi"}],
2662+
"litellm_call_id": "failure-call",
2663+
}
2664+
_input_callback(failure_kwargs)
2665+
assert "failure-call" in litellm_integration._spans_by_call
2666+
_failure_callback(
2667+
failure_kwargs, ValueError("boom"), datetime.now(), datetime.now()
2668+
)
2669+
assert "failure-call" not in litellm_integration._spans_by_call
2670+
2671+
2672+
def test_span_key_falls_back_to_kwargs_identity(sentry_init):
2673+
"""When litellm omits litellm_call_id (direct callback use), the shared
2674+
kwargs dict identity keys the registry, and distinct calls stay independent.
2675+
"""
2676+
sentry_init(
2677+
integrations=[LiteLLMIntegration()],
2678+
disabled_integrations=[StdlibIntegration],
2679+
traces_sample_rate=1.0,
2680+
_experiments={"trace_lifecycle": "static"},
2681+
)
2682+
litellm_integration._spans_by_call.clear()
2683+
2684+
with start_transaction(name="litellm test"):
2685+
kwargs_a = {
2686+
"model": "gpt-3.5-turbo",
2687+
"messages": [{"role": "user", "content": "a"}],
2688+
}
2689+
kwargs_b = {
2690+
"model": "gpt-3.5-turbo",
2691+
"messages": [{"role": "user", "content": "b"}],
2692+
}
2693+
2694+
_input_callback(kwargs_a)
2695+
_input_callback(kwargs_b)
2696+
2697+
# Distinct dicts (no litellm_call_id) get distinct keys -> no cross-talk.
2698+
span_a = litellm_integration._peek_span(kwargs_a)
2699+
span_b = litellm_integration._peek_span(kwargs_b)
2700+
assert span_a is not None
2701+
assert span_b is not None
2702+
assert span_a is not span_b
2703+
2704+
# Closing A leaves B's span intact.
2705+
_success_callback(
2706+
kwargs_a, MockCompletionResponse(), datetime.now(), datetime.now()
2707+
)
2708+
assert litellm_integration._peek_span(kwargs_a) is None
2709+
assert litellm_integration._peek_span(kwargs_b) is span_b
2710+
2711+
_success_callback(
2712+
kwargs_b, MockCompletionResponse(), datetime.now(), datetime.now()
2713+
)
2714+
assert litellm_integration._peek_span(kwargs_b) is None
2715+
2716+
25902717
def test_litellm_message_truncation(sentry_init, capture_events):
25912718
"""Test that large messages are truncated properly in LiteLLM integration."""
25922719
sentry_init(

0 commit comments

Comments
 (0)