@@ -934,3 +934,146 @@ async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
934934 model = "gpt-4" , messages = [OpenAIUserMessageParam (role = "user" , content = "Hello" )]
935935 )
936936 )
937+
938+
939+ class TestOpenAIMixinStreamingMetrics :
940+ """Test cases for streaming metrics injection in OpenAIMixin"""
941+
942+ async def test_openai_chat_completion_streaming_metrics_injection (self , mixin , mock_client_context ):
943+ """Test that stream_options={"include_usage": True} is injected when streaming and telemetry is enabled"""
944+
945+ params = OpenAIChatCompletionRequestWithExtraBody (
946+ model = "test-model" ,
947+ messages = [{"role" : "user" , "content" : "hello" }],
948+ stream = True ,
949+ stream_options = None ,
950+ )
951+
952+ mock_client = MagicMock ()
953+ mock_client .chat .completions .create = AsyncMock (return_value = MagicMock ())
954+
955+ with mock_client_context (mixin , mock_client ):
956+ with patch ("llama_stack.core.telemetry.tracing.get_current_span" ) as mock_get_span :
957+ mock_get_span .return_value = MagicMock ()
958+
959+ with patch (
960+ "llama_stack.providers.utils.inference.openai_mixin.prepare_openai_completion_params"
961+ ) as mock_prepare :
962+ mock_prepare .return_value = {"model" : "test-model" }
963+
964+ await mixin .openai_chat_completion (params )
965+
966+ call_kwargs = mock_prepare .call_args .kwargs
967+ assert call_kwargs ["stream_options" ] == {"include_usage" : True }
968+
969+ assert params .stream_options is None
970+
971+ async def test_openai_chat_completion_streaming_no_telemetry (self , mixin , mock_client_context ):
972+ """Test that stream_options is NOT injected when telemetry is disabled"""
973+
974+ params = OpenAIChatCompletionRequestWithExtraBody (
975+ model = "test-model" ,
976+ messages = [{"role" : "user" , "content" : "hello" }],
977+ stream = True ,
978+ stream_options = None ,
979+ )
980+
981+ mock_client = MagicMock ()
982+ mock_client .chat .completions .create = AsyncMock (return_value = MagicMock ())
983+
984+ with mock_client_context (mixin , mock_client ):
985+ with patch ("llama_stack.core.telemetry.tracing.get_current_span" ) as mock_get_span :
986+ mock_get_span .return_value = None
987+
988+ with patch (
989+ "llama_stack.providers.utils.inference.openai_mixin.prepare_openai_completion_params"
990+ ) as mock_prepare :
991+ mock_prepare .return_value = {"model" : "test-model" }
992+
993+ await mixin .openai_chat_completion (params )
994+
995+ call_kwargs = mock_prepare .call_args .kwargs
996+ assert call_kwargs ["stream_options" ] is None
997+
998+ async def test_openai_completion_streaming_metrics_injection (self , mixin , mock_client_context ):
999+ """Test that stream_options={"include_usage": True} is injected for legacy completion"""
1000+
1001+ params = OpenAICompletionRequestWithExtraBody (
1002+ model = "test-model" ,
1003+ prompt = "hello" ,
1004+ stream = True ,
1005+ stream_options = None ,
1006+ )
1007+
1008+ mock_client = MagicMock ()
1009+ mock_client .completions .create = AsyncMock (return_value = MagicMock ())
1010+
1011+ with mock_client_context (mixin , mock_client ):
1012+ with patch ("llama_stack.core.telemetry.tracing.get_current_span" ) as mock_get_span :
1013+ mock_get_span .return_value = MagicMock ()
1014+
1015+ with patch (
1016+ "llama_stack.providers.utils.inference.openai_mixin.prepare_openai_completion_params"
1017+ ) as mock_prepare :
1018+ mock_prepare .return_value = {"model" : "test-model" }
1019+
1020+ await mixin .openai_completion (params )
1021+
1022+ call_kwargs = mock_prepare .call_args .kwargs
1023+ assert call_kwargs ["stream_options" ] == {"include_usage" : True }
1024+ assert params .stream_options is None
1025+
1026+ async def test_preserves_existing_stream_options (self , mixin , mock_client_context ):
1027+ """Test that existing stream_options are preserved and merged"""
1028+
1029+ params = OpenAIChatCompletionRequestWithExtraBody (
1030+ model = "test-model" ,
1031+ messages = [{"role" : "user" , "content" : "hello" }],
1032+ stream = True ,
1033+ stream_options = {"include_usage" : False },
1034+ )
1035+
1036+ mock_client = MagicMock ()
1037+ mock_client .chat .completions .create = AsyncMock (return_value = MagicMock ())
1038+
1039+ with mock_client_context (mixin , mock_client ):
1040+ with patch ("llama_stack.core.telemetry.tracing.get_current_span" ) as mock_get_span :
1041+ mock_get_span .return_value = MagicMock ()
1042+
1043+ with patch (
1044+ "llama_stack.providers.utils.inference.openai_mixin.prepare_openai_completion_params"
1045+ ) as mock_prepare :
1046+ mock_prepare .return_value = {"model" : "test-model" }
1047+
1048+ await mixin .openai_chat_completion (params )
1049+
1050+ call_kwargs = mock_prepare .call_args .kwargs
1051+ # It should stay False because it was present
1052+ assert call_kwargs ["stream_options" ] == {"include_usage" : False }
1053+
1054+ async def test_merges_existing_stream_options (self , mixin , mock_client_context ):
1055+ """Test that existing stream_options are merged"""
1056+
1057+ params = OpenAIChatCompletionRequestWithExtraBody (
1058+ model = "test-model" ,
1059+ messages = [{"role" : "user" , "content" : "hello" }],
1060+ stream = True ,
1061+ stream_options = {"other_option" : True },
1062+ )
1063+
1064+ mock_client = MagicMock ()
1065+ mock_client .chat .completions .create = AsyncMock (return_value = MagicMock ())
1066+
1067+ with mock_client_context (mixin , mock_client ):
1068+ with patch ("llama_stack.core.telemetry.tracing.get_current_span" ) as mock_get_span :
1069+ mock_get_span .return_value = MagicMock ()
1070+
1071+ with patch (
1072+ "llama_stack.providers.utils.inference.openai_mixin.prepare_openai_completion_params"
1073+ ) as mock_prepare :
1074+ mock_prepare .return_value = {"model" : "test-model" }
1075+
1076+ await mixin .openai_chat_completion (params )
1077+
1078+ call_kwargs = mock_prepare .call_args .kwargs
1079+ assert call_kwargs ["stream_options" ] == {"other_option" : True , "include_usage" : True }
0 commit comments