@@ -39,6 +39,7 @@ async def __call__(self, *args, **kwargs):
3939from sentry_sdk import start_transaction
4040from sentry_sdk ._types import BLOB_DATA_SUBSTITUTE
4141from sentry_sdk .consts import OP , SPANDATA
42+ from sentry_sdk .integrations import litellm as litellm_integration
4243from sentry_sdk .integrations .litellm import (
4344 LiteLLMIntegration ,
4445 _convert_message_parts ,
@@ -2587,6 +2588,132 @@ def test_caller_metadata_stays_json_serializable(
25872588 assert len (chat_spans ) == 1
25882589
25892590
2591+ def test_span_registry_is_bounded ():
2592+ """The off-band span registry must stay bounded when calls are abandoned
2593+ before a terminal callback fires (e.g. an interrupted stream), so a
2594+ long-running process cannot accumulate Span objects without limit. Evicted
2595+ spans are finished, so an over-cap call still records a span instead of
2596+ leaking an unfinished one.
2597+ """
2598+
2599+ class _FakeSpan :
2600+ def __init__ (self ):
2601+ self .exited = False
2602+
2603+ def __exit__ (self , * exc_info ):
2604+ self .exited = True
2605+
2606+ registry = litellm_integration ._spans_by_call
2607+ registry .clear ()
2608+ try :
2609+ count = 5000
2610+ spans = []
2611+ for i in range (count ):
2612+ span = _FakeSpan ()
2613+ spans .append (span )
2614+ litellm_integration ._store_span (
2615+ {"litellm_call_id" : "call-{}" .format (i )}, span
2616+ )
2617+
2618+ # Bounded, not unbounded: the registry must not retain every entry.
2619+ assert len (registry ) < count
2620+ # Most-recent entries are kept and left running...
2621+ assert (
2622+ litellm_integration ._peek_span (
2623+ {"litellm_call_id" : "call-{}" .format (count - 1 )}
2624+ )
2625+ is spans [- 1 ]
2626+ )
2627+ assert spans [- 1 ].exited is False
2628+ # ...while the oldest are evicted and finished (not silently dropped).
2629+ assert litellm_integration ._peek_span ({"litellm_call_id" : "call-0" }) is None
2630+ assert spans [0 ].exited is True
2631+ finally :
2632+ registry .clear ()
2633+
2634+
2635+ def test_span_registry_cleaned_up_after_terminal_callbacks (sentry_init ):
2636+ """Both terminal callbacks must remove the off-band registry entry, so a
2637+ completed or failed call leaves nothing behind."""
2638+ sentry_init (
2639+ integrations = [LiteLLMIntegration ()],
2640+ disabled_integrations = [StdlibIntegration ],
2641+ traces_sample_rate = 1.0 ,
2642+ _experiments = {"trace_lifecycle" : "static" },
2643+ )
2644+ litellm_integration ._spans_by_call .clear ()
2645+
2646+ with start_transaction (name = "litellm test" ):
2647+ success_kwargs = {
2648+ "model" : "gpt-3.5-turbo" ,
2649+ "messages" : [{"role" : "user" , "content" : "hi" }],
2650+ "litellm_call_id" : "success-call" ,
2651+ }
2652+ _input_callback (success_kwargs )
2653+ assert "success-call" in litellm_integration ._spans_by_call
2654+ _success_callback (
2655+ success_kwargs , MockCompletionResponse (), datetime .now (), datetime .now ()
2656+ )
2657+ assert "success-call" not in litellm_integration ._spans_by_call
2658+
2659+ failure_kwargs = {
2660+ "model" : "gpt-3.5-turbo" ,
2661+ "messages" : [{"role" : "user" , "content" : "hi" }],
2662+ "litellm_call_id" : "failure-call" ,
2663+ }
2664+ _input_callback (failure_kwargs )
2665+ assert "failure-call" in litellm_integration ._spans_by_call
2666+ _failure_callback (
2667+ failure_kwargs , ValueError ("boom" ), datetime .now (), datetime .now ()
2668+ )
2669+ assert "failure-call" not in litellm_integration ._spans_by_call
2670+
2671+
2672+ def test_span_key_falls_back_to_kwargs_identity (sentry_init ):
2673+ """When litellm omits litellm_call_id (direct callback use), the shared
2674+ kwargs dict identity keys the registry, and distinct calls stay independent.
2675+ """
2676+ sentry_init (
2677+ integrations = [LiteLLMIntegration ()],
2678+ disabled_integrations = [StdlibIntegration ],
2679+ traces_sample_rate = 1.0 ,
2680+ _experiments = {"trace_lifecycle" : "static" },
2681+ )
2682+ litellm_integration ._spans_by_call .clear ()
2683+
2684+ with start_transaction (name = "litellm test" ):
2685+ kwargs_a = {
2686+ "model" : "gpt-3.5-turbo" ,
2687+ "messages" : [{"role" : "user" , "content" : "a" }],
2688+ }
2689+ kwargs_b = {
2690+ "model" : "gpt-3.5-turbo" ,
2691+ "messages" : [{"role" : "user" , "content" : "b" }],
2692+ }
2693+
2694+ _input_callback (kwargs_a )
2695+ _input_callback (kwargs_b )
2696+
2697+ # Distinct dicts (no litellm_call_id) get distinct keys -> no cross-talk.
2698+ span_a = litellm_integration ._peek_span (kwargs_a )
2699+ span_b = litellm_integration ._peek_span (kwargs_b )
2700+ assert span_a is not None
2701+ assert span_b is not None
2702+ assert span_a is not span_b
2703+
2704+ # Closing A leaves B's span intact.
2705+ _success_callback (
2706+ kwargs_a , MockCompletionResponse (), datetime .now (), datetime .now ()
2707+ )
2708+ assert litellm_integration ._peek_span (kwargs_a ) is None
2709+ assert litellm_integration ._peek_span (kwargs_b ) is span_b
2710+
2711+ _success_callback (
2712+ kwargs_b , MockCompletionResponse (), datetime .now (), datetime .now ()
2713+ )
2714+ assert litellm_integration ._peek_span (kwargs_b ) is None
2715+
2716+
25902717def test_litellm_message_truncation (sentry_init , capture_events ):
25912718 """Test that large messages are truncated properly in LiteLLM integration."""
25922719 sentry_init (
0 commit comments