From 0dbb9201baee4419f0b7965d82294f9dabc1665b Mon Sep 17 00:00:00 2001 From: Farrel Mahaztra <15523645+farrelmahaztra@users.noreply.github.com> Date: Tue, 7 Oct 2025 17:12:19 +0700 Subject: [PATCH] chore: convert agent list to an enum --- examples/run_evaluation.py | 11 +++++---- hud/cli/__init__.py | 23 +++++++++++-------- hud/cli/eval.py | 47 +++++++++++++++++++------------------- hud/cli/tests/test_eval.py | 12 +++++----- hud/types.py | 10 ++++++++ 5 files changed, 60 insertions(+), 43 deletions(-) diff --git a/examples/run_evaluation.py b/examples/run_evaluation.py index e5d2b4af..cb2d43f2 100644 --- a/examples/run_evaluation.py +++ b/examples/run_evaluation.py @@ -44,6 +44,7 @@ from datasets import load_dataset from hud.agents import ClaudeAgent, OperatorAgent from hud.datasets import Task, run_dataset +from hud.types import AgentType logger = logging.getLogger(__name__) @@ -58,14 +59,14 @@ def _build_agent( - agent_type: Literal["claude", "openai"], + agent_type: Literal[AgentType.CLAUDE, AgentType.OPENAI], *, model: str | None = None, allowed_tools: list[str] | None = None, ) -> ClaudeAgent | OperatorAgent: """Create and return the requested agent type.""" - if agent_type == "openai": + if agent_type == AgentType.OPENAI: # Only pass allowed_tools if explicitly provided # This allows tasks to specify their own via agent_config if allowed_tools: @@ -105,7 +106,7 @@ def _build_agent( async def run_single_task( dataset_name: str, *, - agent_type: Literal["claude", "openai"] = "claude", + agent_type: Literal[AgentType.CLAUDE, AgentType.OPENAI] = AgentType.CLAUDE, model: str | None = None, allowed_tools: list[str] | None = None, max_steps: int = 10, @@ -144,7 +145,7 @@ async def run_single_task( async def run_full_dataset( dataset_name: str, *, - agent_type: Literal["claude", "openai"] = "claude", + agent_type: Literal[AgentType.CLAUDE, AgentType.OPENAI] = AgentType.CLAUDE, model: str | None = None, allowed_tools: list[str] | None = None, max_concurrent: int = 50, @@ -154,7 +155,7 @@ async def run_full_dataset( # Build agent class + config for run_dataset – we pass the *class* and a minimal # config dict, run_dataset will create a fresh agent per task. - if agent_type == "openai": + if agent_type == AgentType.OPENAI: agent_class = OperatorAgent agent_config: dict[str, Any] = { "validate_api_key": False, diff --git a/hud/cli/__init__.py b/hud/cli/__init__.py index 3708cf0e..ba41ae91 100644 --- a/hud/cli/__init__.py +++ b/hud/cli/__init__.py @@ -12,6 +12,8 @@ from rich.panel import Panel from rich.table import Table +from hud.types import AgentType + from . import list_func as list_module from .analyze import ( analyze_environment, @@ -844,7 +846,7 @@ def eval( hud_console = HUDConsole() if integration_test: - agent = "integration_test" + agent = AgentType.INTEGRATION_TEST # If no source provided, reuse RL helper to find a tasks file interactively if source is None: @@ -891,17 +893,17 @@ def eval( # Add standard agent choices choices.extend( [ - {"name": "Claude 4 Sonnet", "value": "claude"}, - {"name": "OpenAI Computer Use", "value": "openai"}, - {"name": "vLLM (Local Server)", "value": "vllm"}, - {"name": "LiteLLM (Multi-provider)", "value": "litellm"}, + {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE}, + {"name": "OpenAI Computer Use", "value": AgentType.OPENAI}, + {"name": "vLLM (Local Server)", "value": AgentType.VLLM}, + {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM}, ] ) agent = hud_console.select("Select an agent to use:", choices=choices, default=0) # Handle HUD model selection - if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]: + if agent and agent not in [e.value for e in AgentType]: # Find remote model name model = agent if not vllm_base_url: @@ -918,20 +920,23 @@ def eval( hud_console.error(f"Model {model} not found") raise typer.Exit(1) model = base_model - agent = "vllm" # Use vLLM backend for HUD models + agent = AgentType.VLLM # Use vLLM backend for HUD models hud_console.info(f"Using HUD model: {model} (trained on {base_model})") # Validate agent choice - valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"] + valid_agents = [e.value for e in AgentType] if agent not in valid_agents: hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}") raise typer.Exit(1) + # Type narrowing: agent is now guaranteed to be an AgentType value after validation + agent = AgentType(agent) + # Run the command eval_command( source=source, full=full, - agent=agent, # type: ignore + agent=agent, model=model, allowed_tools=allowed_tools, max_concurrent=max_concurrent, diff --git a/hud/cli/eval.py b/hud/cli/eval.py index e8afceac..066c9b01 100644 --- a/hud/cli/eval.py +++ b/hud/cli/eval.py @@ -5,13 +5,14 @@ import asyncio import logging from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any import typer import hud from hud.cli.utils.env_check import ensure_built, find_environment_dir from hud.settings import settings +from hud.types import AgentType from hud.utils.group_eval import display_group_statistics, run_tasks_grouped from hud.utils.hud_console import HUDConsole @@ -113,7 +114,7 @@ def _build_vllm_config( def build_agent( - agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"], + agent_type: AgentType, *, model: str | None = None, allowed_tools: list[str] | None = None, @@ -123,11 +124,11 @@ def build_agent( """Create and return the requested agent type.""" # Import agents lazily to avoid dependency issues - if agent_type == "integration_test": + if agent_type == AgentType.INTEGRATION_TEST: from hud.agents.misc.integration_test_agent import IntegrationTestRunner return IntegrationTestRunner(verbose=verbose) - elif agent_type == "vllm": + elif agent_type == AgentType.VLLM: # Create a generic OpenAI agent for vLLM server try: from hud.agents.openai_chat_generic import GenericOpenAIChatAgent @@ -147,7 +148,7 @@ def build_agent( ) return GenericOpenAIChatAgent(**config) - elif agent_type == "openai": + elif agent_type == AgentType.OPENAI: try: from hud.agents import OperatorAgent except ImportError as e: @@ -165,7 +166,7 @@ def build_agent( else: return OperatorAgent(verbose=verbose) - elif agent_type == "litellm": + elif agent_type == AgentType.LITELLM: try: from hud.agents.lite_llm import LiteAgent except ImportError as e: @@ -209,7 +210,7 @@ def build_agent( async def run_single_task( source: str, *, - agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude", + agent_type: AgentType = AgentType.CLAUDE, model: str | None = None, allowed_tools: list[str] | None = None, max_steps: int = 10, @@ -268,14 +269,14 @@ async def run_single_task( # Use grouped evaluation if group_size > 1 agent_config: dict[str, Any] = {} - if agent_type == "integration_test": + if agent_type == AgentType.INTEGRATION_TEST: from hud.agents.misc.integration_test_agent import IntegrationTestRunner agent_class = IntegrationTestRunner agent_config = {"verbose": verbose} if allowed_tools: agent_config["allowed_tools"] = allowed_tools - elif agent_type == "vllm": + elif agent_type == AgentType.VLLM: # Special handling for vLLM from hud.agents.openai_chat_generic import GenericOpenAIChatAgent @@ -288,14 +289,14 @@ async def run_single_task( allowed_tools=allowed_tools, verbose=verbose, ) - elif agent_type == "openai": + elif agent_type == AgentType.OPENAI: from hud.agents import OperatorAgent agent_class = OperatorAgent agent_config = {"verbose": verbose} if allowed_tools: agent_config["allowed_tools"] = allowed_tools - elif agent_type == "litellm": + elif agent_type == AgentType.LITELLM: from hud.agents.lite_llm import LiteAgent agent_class = LiteAgent @@ -305,7 +306,7 @@ async def run_single_task( } if allowed_tools: agent_config["allowed_tools"] = allowed_tools - elif agent_type == "claude": + elif agent_type == AgentType.CLAUDE: from hud.agents import ClaudeAgent agent_class = ClaudeAgent @@ -353,7 +354,7 @@ async def run_single_task( async def run_full_dataset( source: str, *, - agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude", + agent_type: AgentType = AgentType.CLAUDE, model: str | None = None, allowed_tools: list[str] | None = None, max_concurrent: int = 30, @@ -395,12 +396,12 @@ async def run_full_dataset( # Build agent class + config for run_dataset agent_config: dict[str, Any] - if agent_type == "integration_test": # --integration-test mode + if agent_type == AgentType.INTEGRATION_TEST: # --integration-test mode from hud.agents.misc.integration_test_agent import IntegrationTestRunner agent_class = IntegrationTestRunner agent_config = {"verbose": verbose} - elif agent_type == "vllm": + elif agent_type == AgentType.VLLM: try: from hud.agents.openai_chat_generic import GenericOpenAIChatAgent @@ -419,7 +420,7 @@ async def run_full_dataset( allowed_tools=allowed_tools, verbose=verbose, ) - elif agent_type == "openai": + elif agent_type == AgentType.OPENAI: try: from hud.agents import OperatorAgent @@ -435,7 +436,7 @@ async def run_full_dataset( if allowed_tools: agent_config["allowed_tools"] = allowed_tools - elif agent_type == "litellm": + elif agent_type == AgentType.LITELLM: try: from hud.agents.lite_llm import LiteAgent @@ -539,8 +540,8 @@ def eval_command( "--full", help="Run the entire dataset (omit for single-task debug mode)", ), - agent: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = typer.Option( - "claude", + agent: AgentType = typer.Option( # noqa: B008 + AgentType.CLAUDE, "--agent", help="Agent backend to use (claude, openai, vllm for local server, or litellm)", ), @@ -648,21 +649,21 @@ def eval_command( # We pass integration_test as the agent_type if integration_test: - agent = "integration_test" + agent = AgentType.INTEGRATION_TEST # Check for required API keys - if agent == "claude": + if agent == AgentType.CLAUDE: if not settings.anthropic_api_key: hud_console.error("ANTHROPIC_API_KEY is required for Claude agent") hud_console.info( "Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here" ) raise typer.Exit(1) - elif agent == "openai" and not settings.openai_api_key: + elif agent == AgentType.OPENAI and not settings.openai_api_key: hud_console.error("OPENAI_API_KEY is required for OpenAI agent") hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here") raise typer.Exit(1) - elif agent == "vllm": + elif agent == AgentType.VLLM: if model: hud_console.info(f"Using vLLM with model: {model}") else: diff --git a/hud/cli/tests/test_eval.py b/hud/cli/tests/test_eval.py index 5e3afac8..4b4a18af 100644 --- a/hud/cli/tests/test_eval.py +++ b/hud/cli/tests/test_eval.py @@ -11,7 +11,7 @@ build_agent, run_single_task, ) -from hud.types import Task, Trace +from hud.types import AgentType, Task, Trace class TestBuildAgent: @@ -26,7 +26,7 @@ def test_builds_integration_test_agent(self) -> None: mock_runner.return_value = mock_instance # Test with verbose=False - result = build_agent("integration_test", verbose=False) + result = build_agent(AgentType.INTEGRATION_TEST, verbose=False) mock_runner.assert_called_once_with(verbose=False) assert result == mock_instance @@ -40,7 +40,7 @@ def test_builds_claude_agent(self) -> None: mock_runner.return_value = mock_instance # Test with verbose=False - result = build_agent("claude", verbose=False) + result = build_agent(AgentType.CLAUDE, verbose=False) mock_runner.assert_called_once_with(model="claude-sonnet-4-20250514", verbose=False) assert result == mock_instance @@ -55,7 +55,7 @@ def test_builds_claude_agent_with_custom_model_and_allowed_tools(self) -> None: # Test with verbose=False result = build_agent( - "claude", + AgentType.CLAUDE, model="claude-sonnet-4-20250514", allowed_tools=["act"], verbose=True, @@ -97,7 +97,7 @@ async def test_applies_agent_config_from_task(self) -> None: patch("hud.cli.eval.find_environment_dir", return_value=None), patch("hud.cli.eval.hud.trace"), ): - await run_single_task("test.json", agent_type="integration_test", max_steps=10) + await run_single_task("test.json", agent_type=AgentType.INTEGRATION_TEST, max_steps=10) # Verify agent.run was called with the task containing agent_config mock_agent.run.assert_called_once() @@ -119,7 +119,7 @@ async def test_runs_with_group_size_greater_than_one(self) -> None: mock_grouped.return_value = [{"task": mock_task, "rewards": [1.0, 0.5]}] await run_single_task( - "test.json", agent_type="integration_test", group_size=3, max_steps=10 + "test.json", agent_type=AgentType.INTEGRATION_TEST, group_size=3, max_steps=10 ) # Verify run_tasks_grouped was called with correct group_size diff --git a/hud/types.py b/hud/types.py index d9f81460..91b4dc29 100644 --- a/hud/types.py +++ b/hud/types.py @@ -5,6 +5,7 @@ import logging import uuid from collections import defaultdict +from enum import Enum from string import Template from typing import Any, Literal @@ -18,6 +19,14 @@ logger = logging.getLogger(__name__) +class AgentType(str, Enum): + CLAUDE = "claude" + OPENAI = "openai" + VLLM = "vllm" + LITELLM = "litellm" + INTEGRATION_TEST = "integration_test" + + class Task(BaseModel): """ A task configuration that can be used to create a task. @@ -319,6 +328,7 @@ def populate_from_context(self) -> None: __all__ = [ "AgentResponse", + "AgentType", "MCPToolCall", "MCPToolResult", "Trace",