xlab-uiuc · HacksonClark · Apr 23, 2025 · Apr 22, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/clients/README.md b/clients/README.md
@@ -5,22 +5,75 @@ These clients are some baselines that we have implemented and evaluated to help
 
 ## Clients
 
-- [GPT](/clients/gpt.py): A naive GPT4-based LLM agent with only shell access.
+- [GPT](/clients/gpt.py): A naive GPT series LLM agent with only shell access.
+- [DeepSeek](/clients/deepseek.py): A naive DeepSeek series LLM agent with only shell access.
+- [Qwen](/clients/qwen.py): A naive Qwen series LLM agent with only shell access.
+- [vLLM](/clients/vllm.py): A naive vLLM agent with any open source LLM deployed locally and only shell access.
 - [ReAct](/clients/react.py): A naive LLM agent that uses the ReAct framework.
 - [FLASH](/clients/flash.py): A naive LLM agent that uses status supervision and hindsight integration components to ensure the high reliability of workflow execution.
 
+### Using the vLLM Client
+
+The vLLM client allows you to run local open-source models as an agent for AIOpsLab tasks. This approach is particularly useful when you want to:
+- Use your own hardware for inference
+- Experiment with different open-source models
+- Work in environments without internet access to cloud LLM providers
+
+### Quick Setup Guide
+
+1. **Launch the vLLM server**:
+    ```bash
+    # Make the script executable
+    chmod +x ./clients/launch_vllm.sh
+
+    # Run the script
+    ./clients/launch_vllm.sh
+    ```
+    This will launch vLLM in the background using the default model (Qwen/Qwen2.5-3B-Instruct).
+
+2. **Check server status**:
+    ```bash
+    # View the log file to confirm the server is running
+    cat vllm_Qwen_Qwen2.5-3B-Instruct.log
+    ```
+
+3. **Customize the model** (optional):
+    Edit `launch_vllm.sh` to change the model:
+    ```bash
+    # Open the file
+    nano ./clients/launch_vllm.sh
+
+    # Change the MODEL variable to your preferred model
+    # Example: MODEL="mistralai/Mistral-7B-Instruct-v0.1"
+    ```
+
+4. **Run the vLLM agent**:
+    ```
+    python clients/vllm.py
+    ```
+
+### Requirements
+
+- Poetry for dependency management
+- Sufficient GPU resources for your chosen model
+- The model must support the OpenAI chat completion API format
+
+### Advanced Configuration
+
+The vLLM client connects to `http://localhost:8000/v1` by default. If you've configured vLLM to use a different port or host, update the base_url in `clients/utils/llm.py` in the vLLMClient class.
+
 <!-- 
 Note: The script [GPT-managed-identity](/clients/gpt_managed_identity.py) uses the `DefaultAzureCredential` method from the `azure-identity` package to authenticate. This method simplifies authentication by supporting various credential types, including managed identities.
 
 We recommend using a [user-assigned managed identity](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/how-manage-user-assigned-managed-identities?pivots=identity-mi-methods-azp) for this setup. Ensure the following steps are completed:
 
 1. **Role Assignment**: Assign the managed identity appropriate roles:
-   - A role that provides read access to the VM, such as the built-in **Reader** role.
-   - A role that grants read/write access to the Azure OpenAI Service, such as the **Azure AI Developer** role.
+    - A role that provides read access to the VM, such as the built-in **Reader** role.
+    - A role that grants read/write access to the Azure OpenAI Service, such as the **Azure AI Developer** role.
 
 2. **Attach the Managed Identity to the Controller VM**:  
-   Follow the steps in the official documentation to add the managed identity to the VM:  
-   [Add a user-assigned managed identity to a VM](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/how-to-configure-managed-identities?pivots=qs-configure-portal-windows-vm#user-assigned-managed-identity).
+    Follow the steps in the official documentation to add the managed identity to the VM:  
+    [Add a user-assigned managed identity to a VM](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/how-to-configure-managed-identities?pivots=qs-configure-portal-windows-vm#user-assigned-managed-identity).
 
 Please ensure the required Azure configuration is provided using the /configs/example_azure_config.yml file, or use it as a template to create a new configuration file
 

diff --git a/clients/deepseek.py b/clients/deepseek.py
@@ -0,0 +1,86 @@
+"""Naive DeepSeek-R1 client (with shell access) for AIOpsLab.
+
+"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning" arXiv preprint arXiv:2501.12948 (2025).
+
+Paper: https://arxiv.org/abs/2501.12948
+"""
+
+
+import os
+import asyncio
+
+import wandb
+from aiopslab.orchestrator import Orchestrator
+from clients.utils.llm import DeepSeekClient
+from clients.utils.templates import DOCS_SHELL_ONLY
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class Agent:
+    def __init__(self):
+        self.history = []
+        self.llm = DeepSeekClient()
+
+    def init_context(self, problem_desc: str, instructions: str, apis: str):
+        """Initialize the context for the agent."""
+
+        self.shell_api = self._filter_dict(
+            apis, lambda k, _: "exec_shell" in k)
+        self.submit_api = self._filter_dict(apis, lambda k, _: "submit" in k)
+
+        def stringify_apis(apis): return "\n\n".join(
+            [f"{k}\n{v}" for k, v in apis.items()]
+        )
+
+        self.system_message = DOCS_SHELL_ONLY.format(
+            prob_desc=problem_desc,
+            shell_api=stringify_apis(self.shell_api),
+            submit_api=stringify_apis(self.submit_api),
+        )
+
+        self.task_message = instructions
+
+        self.history.append({"role": "system", "content": self.system_message})
+        self.history.append({"role": "user", "content": self.task_message})
+        self.history.append({"role": "assistant", "content": ""}) # Interleave the user/assistant messages in the message sequence.
+
+    async def get_action(self, input) -> str:
+        """Wrapper to interface the agent with OpsBench.
+
+        Args:
+            input (str): The input from the orchestrator/environment.
+
+        Returns:
+            str: The response from the agent.
+        """
+        self.history.append({"role": "user", "content": input})
+        response = self.llm.run(self.history)
+        self.history.append({"role": "assistant", "content": response[0]})
+        return response[0]
+
+    def _filter_dict(self, dictionary, filter_func):
+        return {k: v for k, v in dictionary.items() if filter_func(k, v)}
+
+
+if __name__ == "__main__":
+    # Load use_wandb from environment variable with a default of False
+    use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
+
+    if use_wandb:
+        # Initialize wandb running
+        wandb.init(project="AIOpsLab", entity="AIOpsLab")
+
+    agent = Agent()
+
+    orchestrator = Orchestrator()
+    orchestrator.register_agent(agent, name="deepseek-r1")
+
+    pid = "misconfig_app_hotel_res-mitigation-1"
+    problem_desc, instructs, apis = orchestrator.init_problem(pid)
+    agent.init_context(problem_desc, instructs, apis)
+    asyncio.run(orchestrator.start_problem(max_steps=10))
+
+    if use_wandb:
+        # Finish the wandb run
+        wandb.finish()
diff --git a/clients/flash.py b/clients/flash.py
@@ -4,7 +4,7 @@
 import logging
 from typing import List, Dict, Tuple, Any
 from pydantic import BaseModel
-from clients.utils.llm import GPT4Turbo
+from clients.utils.llm import GPTClient
 from aiopslab.orchestrator import Orchestrator
 
 logging.basicConfig(level=logging.INFO)
@@ -14,7 +14,7 @@
 class FlashAgent:
     def __init__(self):
         self.history = []
-        self.llm = GPT4Turbo()
+        self.llm = GPTClient()
         self.hindsight_builder = HindsightBuilder()
 
     def init_context(self, problem_desc: str, instructions: str, apis: dict):
@@ -76,7 +76,7 @@ async def diagnose_with_hindsight(self, input: str, history: dict):
 class HindsightBuilder:
     """Agent hindsight generator."""
 
-    llm = GPT4Turbo()
+    llm = GPTClient()
 
     def generate_prompt(self, input: str, history: dict) -> str:
         """

diff --git a/clients/gpt.py b/clients/gpt.py
@@ -11,7 +11,7 @@
 
 import wandb
 from aiopslab.orchestrator import Orchestrator
-from clients.utils.llm import GPT4Turbo
+from clients.utils.llm import GPTClient
 from clients.utils.templates import DOCS_SHELL_ONLY
 from dotenv import load_dotenv
 
@@ -21,7 +21,7 @@
 class Agent:
     def __init__(self):
         self.history = []
-        self.llm = GPT4Turbo()
+        self.llm = GPTClient()
 
     def init_context(self, problem_desc: str, instructions: str, apis: str):
         """Initialize the context for the agent."""

diff --git a/clients/gpt_managed_identity.py b/clients/gpt_managed_identity.py
@@ -1,4 +1,4 @@
-"""Naive GPT4 client (with shell access) for AIOpsLab. Uses Azure Managed Identity for authentication.
+"""Naive GPT client (with shell access) for AIOpsLab. Uses Azure Managed Identity for authentication.
 
 Achiam, Josh, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida et al. 
 "Gpt-4 technical report." arXiv preprint arXiv:2303.08774 (2023).
@@ -11,14 +11,14 @@
 import asyncio
 
 from aiopslab.orchestrator import Orchestrator
-from clients.utils.llm import GPT4Turbo
+from clients.utils.llm import GPTClient
 from clients.utils.templates import DOCS_SHELL_ONLY
 
 
 class Agent:
     def __init__(self, azure_config_file: str):
         self.history = []
-        self.llm = GPT4Turbo(auth_type="managed", azure_config_file=azure_config_file)
+        self.llm = GPTClient(auth_type="managed", azure_config_file=azure_config_file)
 
     def init_context(self, problem_desc: str, instructions: str, apis: str):
         """Initialize the context for the agent."""

diff --git a/clients/launch_vllm.sh b/clients/launch_vllm.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# This script launches vLLM in the background, redirecting output to a log file.
+
+# Set the model directory/path and desired port
+MODEL="Qwen/Qwen2.5-3B-Instruct"
+
+# Create a safe filename by replacing slashes with underscores
+SAFE_MODEL_NAME=$(echo $MODEL | tr '/' '_')
+
+# Launch vLLM in background using nohup and redirect both stdout and stderr to a log file.
+# nohup poetry run vllm serve $MODEL --tensor-parallel-size 4 > vllm_$SAFE_MODEL_NAME.log 2>&1 &
+nohup poetry run vllm serve $MODEL > vllm_$SAFE_MODEL_NAME.log 2>&1 &
+
+# Print a message indicating that vLLM is running.
+echo "vLLM has been launched in the background with the $MODEL model. Check vllm_$SAFE_MODEL_NAME.log for output."
diff --git a/clients/qwen.py b/clients/qwen.py
@@ -0,0 +1,78 @@
+"""Naive Qwen client (with shell access) for AIOpsLab.
+"""
+
+import os
+import asyncio
+
+import wandb
+from aiopslab.orchestrator import Orchestrator
+from clients.utils.llm import QwenClient
+from clients.utils.templates import DOCS_SHELL_ONLY
+
+
+class Agent:
+    def __init__(self):
+        self.history = []
+        self.llm = QwenClient()
+
+    def init_context(self, problem_desc: str, instructions: str, apis: str):
+        """Initialize the context for the agent."""
+
+        self.shell_api = self._filter_dict(
+            apis, lambda k, _: "exec_shell" in k)
+        self.submit_api = self._filter_dict(apis, lambda k, _: "submit" in k)
+
+        def stringify_apis(apis): return "\n\n".join(
+            [f"{k}\n{v}" for k, v in apis.items()]
+        )
+
+        self.system_message = DOCS_SHELL_ONLY.format(
+            prob_desc=problem_desc,
+            shell_api=stringify_apis(self.shell_api),
+            submit_api=stringify_apis(self.submit_api),
+        )
+
+        self.task_message = instructions
+
+        self.history.append({"role": "system", "content": self.system_message})
+        self.history.append({"role": "user", "content": self.task_message})
+
+    async def get_action(self, input) -> str:
+        """Wrapper to interface the agent with OpsBench.
+
+        Args:
+            input (str): The input from the orchestrator/environment.
+
+        Returns:
+            str: The response from the agent.
+        """
+        self.history.append({"role": "user", "content": input})
+        response = self.llm.run(self.history)
+        self.history.append({"role": "assistant", "content": response[0]})
+        return response[0]
+
+    def _filter_dict(self, dictionary, filter_func):
+        return {k: v for k, v in dictionary.items() if filter_func(k, v)}
+
+
+if __name__ == "__main__":
+    # Load use_wandb from environment variable with a default of False
+    use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
+
+    if use_wandb:
+        # Initialize wandb running
+        wandb.init(project="AIOpsLab", entity="AIOpsLab")
+
+    agent = Agent()
+
+    orchestrator = Orchestrator()
+    orchestrator.register_agent(agent, name="qwq-32b")
+
+    pid = "misconfig_app_hotel_res-mitigation-1"
+    problem_desc, instructs, apis = orchestrator.init_problem(pid)
+    agent.init_context(problem_desc, instructs, apis)
+    asyncio.run(orchestrator.start_problem(max_steps=10))
+
+    if use_wandb:
+        # Finish the wandb run
+        wandb.finish()
diff --git a/clients/react.py b/clients/react.py
@@ -10,7 +10,7 @@
 import asyncio
 
 from aiopslab.orchestrator import Orchestrator
-from clients.utils.llm import GPT4Turbo
+from clients.utils.llm import GPTClient
 from clients.utils.templates import DOCS
 
 RESP_INSTR = """DO NOT REPEAT ACTIONS! Respond with:
@@ -22,7 +22,7 @@
 class Agent:
     def __init__(self):
         self.history = []
-        self.llm = GPT4Turbo()
+        self.llm = GPTClient()
 
     def init_context(self, problem_desc: str, instructions: str, apis: str):
         """Initialize the context for the agent."""