runloopai
diff --git a/‎src/runloop_api_client/sdk/_types.py‎
Lines changed: 6 additions & 0 deletions b/‎src/runloop_api_client/sdk/_types.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/runloop_api_client/sdk/async_benchmark_run.py‎
Lines changed: 126 additions & 0 deletions b/‎src/runloop_api_client/sdk/async_benchmark_run.py‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎src/runloop_api_client/sdk/benchmark_run.py‎
Lines changed: 126 additions & 0 deletions b/‎src/runloop_api_client/sdk/benchmark_run.py‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎tests/sdk/conftest.py‎
Lines changed: 30 additions & 0 deletions b/‎tests/sdk/conftest.py‎
Lines changed: 30 additions & 0 deletions
@@ -5,6 +5,7 @@
 from ..lib.polling import PollingConfig
 from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
+from ..types.benchmarks import RunListScenarioRunsParams
 from ..types.input_context import InputContext
 from ..types.scenario_view import ScenarioView
 from ..types.agent_list_params import AgentListParams
@@ -203,3 +204,8 @@ class ScenarioPreview(ScenarioView):
 
     input_context: InputContextPreview  # type: ignore[assignment]
     """The input context for the Scenario."""
+
+
+# Benchmark Run params
+class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions):
+    pass
@@ -0,0 +1,126 @@
+"""AsyncBenchmarkRun resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import ScenarioRunView, BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import AsyncRunloop
+
+
+class AsyncBenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios (async).
+
+    Provides async methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = await benchmark.run(run_name="evaluation-v1")
+        >>> info = await run.get_info()
+        >>> scenario_runs = await run.list_scenario_runs()
+    """
+
+    def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None:
+        """Create an AsyncBenchmarkRun instance.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncBenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    async def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    async def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    async def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    async def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[ScenarioRunView]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of scenario run views
+        :rtype: List[ScenarioRunView]
+        """
+        page = self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return [item async for item in page]
@@ -0,0 +1,126 @@
+"""BenchmarkRun resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import ScenarioRunView, BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import Runloop
+
+
+class BenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios.
+
+    Provides methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = benchmark.run(run_name="evaluation-v1")
+        >>> info = run.get_info()
+        >>> scenario_runs = run.list_scenario_runs()
+    """
+
+    def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None:
+        """Create a BenchmarkRun instance.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<BenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[ScenarioRunView]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of scenario run views
+        :rtype: List[ScenarioRunView]
+        """
+        page = self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return list(page)
@@ -129,6 +129,30 @@ class MockScenarioRunView:
     scoring_contract_result: object = None
 
 
+@dataclass
+class MockBenchmarkRunView:
+    """Mock BenchmarkRunView for testing."""
+
+    id: str = "bench_run_123"
+    benchmark_id: str = "bench_123"
+    state: str = "running"
+    metadata: Dict[str, str] = field(default_factory=dict)
+    start_time_ms: int = 1234567890000
+    duration_ms: int | None = None
+    score: float | None = None
+
+
+class AsyncIterableMock:
+    """A simple async iterable mock for testing paginated responses."""
+
+    def __init__(self, items: list[Any]) -> None:
+        self._items = items
+
+    async def __aiter__(self):
+        for item in self._items:
+            yield item
+
+
 def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock:
     """
     Create a mock httpx.AsyncClient with proper context manager setup.
@@ -237,6 +261,12 @@ def scenario_run_view() -> MockScenarioRunView:
     return MockScenarioRunView()
 
 
+@pytest.fixture
+def benchmark_run_view() -> MockBenchmarkRunView:
+    """Create a mock BenchmarkRunView."""
+    return MockBenchmarkRunView()
+
+
 @pytest.fixture
 def mock_httpx_response() -> Mock:
     """Create a mock httpx.Response."""