togethercomputer · TechnoFinch · Feb 11, 2025 · Apr 11, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -150,6 +150,9 @@ You can provide multiple models or test categories by separating them with comma
 
 ```bash
 bfcl generate --model claude-3-5-sonnet-20241022-FC,gpt-4o-2024-11-20-FC --test-category simple,parallel,multiple,multi_turn
+
+
+bfcl generate --model moonshotai/Kimi-K2-Instruct --num-threads 6
 ```
 
 #### Selecting Specific Test Cases with `--run-ids`
@@ -242,7 +245,7 @@ VLLM_PORT=1053
 For those who prefer using script execution instead of the CLI, you can run the following command:
 
 ```bash
-python -m bfcl_eval.openfunctions_evaluation --model MODEL_NAME --test-category TEST_CATEGORY
+python -m bfcl_eval.openfunctions_evaluation --model moonshotai/Kimi-K2-Instruct --test-category TEST_CATEGORY
 ```
 
 When specifying multiple models or test categories, separate them with **spaces**, not commas. All other flags mentioned earlier are compatible with the script execution method as well.

diff --git a/berkeley-function-call-leaderboard/bfcl_eval/.env.example b/berkeley-function-call-leaderboard/bfcl_eval/.env.example
@@ -32,10 +32,13 @@ NOVITA_API_KEY=sk-XXXXXX
 # We use the API Key from Alipay to inference Bailing (Ling) models (see https://zxb.alipay.com/llm/landing)
 LING_API_KEY=sk-XXXXXX
 
+# [OPTIONAL] For inference via Together AI endpoint
+TOGETHER_API_KEY=
+
 # [OPTIONAL] For local vllm/sglang server configuration
 # Defaults to localhost port 1053 if not provided
 VLLM_ENDPOINT=localhost
 VLLM_PORT=1053
 
 # [OPTIONAL] Required for WandB to log the generated .csv in the format 'entity:project
-WANDB_BFCL_PROJECT=ENTITY:PROJECT
+WANDB_BFCL_PROJECT=ENTITY:PROJECT
diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py
@@ -7,6 +7,7 @@
 from bfcl_eval.model_handler.api_inference.deepseek import DeepSeekAPIHandler
 from bfcl_eval.model_handler.api_inference.dm_cito import DMCitoHandler
 from bfcl_eval.model_handler.api_inference.fireworks import FireworksHandler
+from bfcl_eval.model_handler.api_inference.together import TogetherHandler
 from bfcl_eval.model_handler.api_inference.functionary import FunctionaryHandler
 from bfcl_eval.model_handler.api_inference.gemini import GeminiHandler
 from bfcl_eval.model_handler.api_inference.gogoagent import GoGoAgentHandler
@@ -2034,6 +2035,18 @@ class ModelConfig:
         is_fc_model=True,
         underscore_to_dot=False,
     ),
+    "moonshotai/Kimi-K2-Instruct-FC": ModelConfig(
+        model_name="moonshotai/Kimi-K2-Instruct",
+        display_name="Kimi-K2-Instruct",
+        url="https://huggingface.co/katanemo/Arch-Agent-32B",
+        org="moonshotai",
+        license="katanemo-research",
+        model_handler=TogetherHandler,
+        input_price=None,
+        output_price=None,
+        is_fc_model=True,
+        underscore_to_dot=False,
+    ),
 }
 
 

diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py
@@ -167,4 +167,6 @@
     "katanemo/Arch-Agent-3B",
     "katanemo/Arch-Agent-7B",
     "katanemo/Arch-Agent-32B"
+    "katanemo/Arch-Agent-32B",
+    "moonshotai/Kimi-K2-Instruct-FC"
 ]
diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py
@@ -0,0 +1,51 @@
+import os
+
+from bfcl_eval.model_handler.api_inference.openai_completion import OpenAICompletionsHandler
+from bfcl_eval.model_handler.model_style import ModelStyle
+from openai import OpenAI
+
+
+class TogetherHandler(OpenAICompletionsHandler):
+    def __init__(self, model_name, temperature) -> None:
+        super().__init__(model_name, temperature)
+        self.model_style = ModelStyle.TOGETHER_AI
+        self.client = OpenAI(
+            base_url="https://api.together.xyz/v1",
+            api_key=os.getenv("TOGETHER_API_KEY"),
+        )
+
+    #### FC methods ####
+
+    def _query_FC(self, inference_data: dict):
+        message: list[dict] = inference_data["message"]
+        tools = inference_data["tools"]
+        inference_data["inference_input_log"] = {
+            "message": repr(message),
+            "tools": tools,
+        }
+
+        if len(tools) > 0:
+            return self.generate_with_backoff(
+                messages=message,
+                model=self.model_name.replace("-FC", "").replace("-together", ""),
+                temperature=self.temperature,
+                tools=tools,
+            )
+        else:
+
+            return self.generate_with_backoff(
+                messages=message,
+                model=self.model_name.replace("-FC", "").replace("-together", ""),
+                temperature=self.temperature,
+            )
+
+    #### Prompting methods ####
+
+    def _query_prompting(self, inference_data: dict):
+        inference_data["inference_input_log"] = {"message": repr(inference_data["message"])}
+
+        return self.generate_with_backoff(
+            messages=inference_data["message"],
+            model=self.model_name.replace("-together", ""),
+            temperature=self.temperature,
+        )
diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/model_style.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/model_style.py
@@ -11,6 +11,7 @@ class ModelStyle(Enum):
     GOOGLE = "google"
     AMAZON = "amazon"
     FIREWORK_AI = "firework_ai"
+    TOGETHER_AI = "together_ai"
     NEXUS = "nexus"
     OSSMODEL = "ossmodel"
     COHERE = "cohere"

diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py
@@ -163,6 +163,7 @@ def convert_to_tool(functions, mapping, model_style):
                 ModelStyle.WRITER,
                 ModelStyle.AMAZON,
                 ModelStyle.NOVITA_AI,
+                ModelStyle.TOGETHER_AI,
             ]:
                 item[
                     "description"
@@ -189,6 +190,7 @@ def convert_to_tool(functions, mapping, model_style):
             ModelStyle.FIREWORK_AI,
             ModelStyle.WRITER,
             ModelStyle.NOVITA_AI,
+            ModelStyle.TOGETHER_AI,
         ]:
             oai_tool.append({"type": "function", "function": item})
         elif model_style == ModelStyle.AMAZON: