semi working tool calling

jmansdorfer · jmansdorfer · commit 22e3bda4c78d · 2025-03-20T17:39:32.000-04:00
diff --git a/predictionguard/src/chat.py b/predictionguard/src/chat.py
@@ -69,17 +69,34 @@ def __init__(self, api_key, url):
     def create(
         self,
         model: str,
-        messages: Union[str, List[Dict[str, Any]]],
+        messages: Union[
+            str, List[
+                Dict[str, Any]
+            ]
+        ],
         input: Optional[Dict[str, Any]] = None,
         output: Optional[Dict[str, Any]] = None,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[Dict[str, int]] = None,
+        logit_bias: Optional[
+            Dict[str, int]
+        ] = None,
         max_completion_tokens: Optional[int] = 100,
         max_tokens: Optional[int] = None,
+        parallel_tool_calls: Optional[bool] = None,
         presence_penalty: Optional[float] = None,
-        stop: Optional[Union[str, List[str]]] = None,
+        stop: Optional[
+            Union[
+                str, List[str]
+            ]
+        ] = None,
         stream: Optional[bool] = False,
         temperature: Optional[float] = 1.0,
+        tool_choice: Optional[Union[
+            str, Dict[
+                str, Dict[str, str]
+            ]
+        ]] = None,
+        tools: Optional[List[Dict[str, Union[str, Dict[str, str]]]]] = None,
         top_p: Optional[float] = 0.99,
         top_k: Optional[float] = 50,
     ) -> Dict[str, Any]:
@@ -93,10 +110,13 @@ def create(
         :param frequency_penalty: The frequency penalty to use.
         :param logit_bias: The logit bias to use.
         :param max_completion_tokens: The maximum amount of tokens the model should return.
+        :param parallel_tool_calls: The parallel tool calls to use.
         :param presence_penalty: The presence penalty to use.
         :param stop: The completion stopping criteria.
         :param stream: Option to stream the API response
         :param temperature: The consistency of the model responses to the same prompt. The higher the more consistent.
+        :param tool_choice: The tool choice to use.
+        :param tools: Options to pass to the tool choice.
         :param top_p: The sampling for the model to use.
         :param top_k: The Top-K sampling for the model to use.
         :return: A dictionary containing the chat response.
@@ -121,10 +141,13 @@ def create(
             frequency_penalty,
             logit_bias,
             max_completion_tokens,
-            temperature,
+            parallel_tool_calls,
             presence_penalty,
             stop,
             stream,
+            temperature,
+            tool_choice,
+            tools,
             top_p,
             top_k
         )
@@ -143,10 +166,13 @@ def _generate_chat(
         frequency_penalty,
         logit_bias,
         max_completion_tokens,
+        parallel_tool_calls,
         presence_penalty,
         stop,
         stream,
         temperature,
+        tool_choice,
+        tools,
         top_p,
         top_k,
     ):
@@ -276,10 +302,13 @@ def stream_generator(url, headers, payload, stream):
             "frequency_penalty": frequency_penalty,
             "logit_bias": logit_bias,
             "max_completion_tokens": max_completion_tokens,
+            "parallel_tool_calls": parallel_tool_calls,
             "presence_penalty": presence_penalty,
             "stop": stop,
             "stream": stream,
             "temperature": temperature,
+            "tool_choice": tool_choice,
+            "tools": tools,
             "top_p": top_p,
             "top_k": top_k,
         }
diff --git a/tests/test_chat.py b/tests/test_chat.py
@@ -187,6 +187,21 @@ def test_chat_completions_create_vision_stream_fail():
             response_list.append(res)
 
 
+def test_chat_completions_create_tool_call():
+    test_client = PredictionGuard()
+
+    response = test_client.chat.completions.create(
+        model=os.environ["TEST_MODEL_NAME"],
+        messages=[
+            {"role": "system", "content": "You are a helpful chatbot."},
+            {"role": "user", "content": "Tell me a joke."},
+        ],
+
+    )
+
+    assert len(response["choices"][0]["message"]["content"]) > 0
+
+
 def test_chat_completions_list_models():
     test_client = PredictionGuard()