From db486de16d87f1d40686d1c8ce2b8ff3986cc05f Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 16 Dec 2025 13:01:58 -0800 Subject: [PATCH 1/5] update requirements-dev --- requirements-dev.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index b9f3f2862..c48025dbf 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -94,7 +94,7 @@ python-dateutil==2.9.0.post0 ; python_full_version < '3.10' # via time-machine respx==0.22.0 rich==14.2.0 -ruff==0.14.8 +ruff==0.14.9 six==1.17.0 ; python_full_version < '3.10' # via python-dateutil sniffio==1.3.1 From 3a88bc3a72fae6fb763efd64730d0c16ca1ac63d Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 16 Dec 2025 13:02:58 -0800 Subject: [PATCH 2/5] pyproject formatting nit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2c90fa53a..93e6c93e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "anyio>=3.5.0, <5", "distro>=1.7.0, <2", "sniffio", - "uuid-utils>=0.11.0", + "uuid-utils>=0.11.0", ] requires-python = ">= 3.9" From 4650641013aa86914c3c3e36b951054bbcb56ce6 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 16 Dec 2025 17:36:50 -0800 Subject: [PATCH 3/5] feat(sdk): add BenchmarkRun and AsyncBenchmarkRun classes --- src/runloop_api_client/sdk/_types.py | 6 + .../sdk/async_benchmark_run.py | 126 ++++++++++++++++ src/runloop_api_client/sdk/benchmark_run.py | 126 ++++++++++++++++ tests/sdk/conftest.py | 30 ++++ tests/sdk/test_async_benchmark_run.py | 94 ++++++++++++ tests/sdk/test_benchmark_run.py | 92 ++++++++++++ .../sdk/test_async_benchmark_run.py | 138 ++++++++++++++++++ tests/smoketests/sdk/test_benchmark_run.py | 138 ++++++++++++++++++ 8 files changed, 750 insertions(+) create mode 100644 src/runloop_api_client/sdk/async_benchmark_run.py create mode 100644 src/runloop_api_client/sdk/benchmark_run.py create mode 100644 tests/sdk/test_async_benchmark_run.py create mode 100644 tests/sdk/test_benchmark_run.py create mode 100644 tests/smoketests/sdk/test_async_benchmark_run.py create mode 100644 tests/smoketests/sdk/test_benchmark_run.py diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py index be09f6eed..6bf9da020 100644 --- a/src/runloop_api_client/sdk/_types.py +++ b/src/runloop_api_client/sdk/_types.py @@ -5,6 +5,7 @@ from ..lib.polling import PollingConfig from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams +from ..types.benchmarks import RunListScenarioRunsParams from ..types.input_context import InputContext from ..types.scenario_view import ScenarioView from ..types.agent_list_params import AgentListParams @@ -203,3 +204,8 @@ class ScenarioPreview(ScenarioView): input_context: InputContextPreview # type: ignore[assignment] """The input context for the Scenario.""" + + +# Benchmark Run params +class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions): + pass diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py new file mode 100644 index 000000000..bf4c3d96a --- /dev/null +++ b/src/runloop_api_client/sdk/async_benchmark_run.py @@ -0,0 +1,126 @@ +"""AsyncBenchmarkRun resource class for asynchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import ScenarioRunView, BenchmarkRunView +from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams +from .._client import AsyncRunloop + + +class AsyncBenchmarkRun: + """A benchmark run for evaluating agent performance across scenarios (async). + + Provides async methods for monitoring run status, managing the run lifecycle, + and accessing scenario run results. Obtain instances via + ``benchmark.run()`` or ``benchmark.list_runs()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bench-xxx") + >>> run = await benchmark.run(run_name="evaluation-v1") + >>> info = await run.get_info() + >>> scenario_runs = await run.list_scenario_runs() + """ + + def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None: + """Create an AsyncBenchmarkRun instance. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + :param run_id: Benchmark run ID + :type run_id: str + :param benchmark_id: Parent benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = run_id + self._benchmark_id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark run ID. + + :return: Unique benchmark run ID + :rtype: str + """ + return self._id + + @property + def benchmark_id(self) -> str: + """Return the parent benchmark ID. + + :return: Parent benchmark ID + :rtype: str + """ + return self._benchmark_id + + async def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkRunView: + """Retrieve current benchmark run status and metadata. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark run state info + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.retrieve( + self._id, + **options, + ) + + async def cancel( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Cancel the benchmark run. + + Stops all running scenarios and marks the run as canceled. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark run state + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.cancel( + self._id, + **options, + ) + + async def complete( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Complete the benchmark run. + + Marks the run as completed. Call this after all scenarios have finished. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Completed benchmark run state + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.complete( + self._id, + **options, + ) + + async def list_scenario_runs( + self, + **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], + ) -> List[ScenarioRunView]: + """List all scenario runs for this benchmark run. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters + :return: List of scenario run views + :rtype: List[ScenarioRunView] + """ + page = self._client.benchmarks.runs.list_scenario_runs( + self._id, + **params, + ) + return [item async for item in page] diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py new file mode 100644 index 000000000..e281825a2 --- /dev/null +++ b/src/runloop_api_client/sdk/benchmark_run.py @@ -0,0 +1,126 @@ +"""BenchmarkRun resource class for synchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import ScenarioRunView, BenchmarkRunView +from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams +from .._client import Runloop + + +class BenchmarkRun: + """A benchmark run for evaluating agent performance across scenarios. + + Provides methods for monitoring run status, managing the run lifecycle, + and accessing scenario run results. Obtain instances via + ``benchmark.run()`` or ``benchmark.list_runs()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bench-xxx") + >>> run = benchmark.run(run_name="evaluation-v1") + >>> info = run.get_info() + >>> scenario_runs = run.list_scenario_runs() + """ + + def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None: + """Create a BenchmarkRun instance. + + :param client: Runloop client instance + :type client: Runloop + :param run_id: Benchmark run ID + :type run_id: str + :param benchmark_id: Parent benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = run_id + self._benchmark_id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark run ID. + + :return: Unique benchmark run ID + :rtype: str + """ + return self._id + + @property + def benchmark_id(self) -> str: + """Return the parent benchmark ID. + + :return: Parent benchmark ID + :rtype: str + """ + return self._benchmark_id + + def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkRunView: + """Retrieve current benchmark run status and metadata. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark run state info + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.retrieve( + self._id, + **options, + ) + + def cancel( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Cancel the benchmark run. + + Stops all running scenarios and marks the run as canceled. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark run state + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.cancel( + self._id, + **options, + ) + + def complete( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Complete the benchmark run. + + Marks the run as completed. Call this after all scenarios have finished. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Completed benchmark run state + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.complete( + self._id, + **options, + ) + + def list_scenario_runs( + self, + **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], + ) -> List[ScenarioRunView]: + """List all scenario runs for this benchmark run. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters + :return: List of scenario run views + :rtype: List[ScenarioRunView] + """ + page = self._client.benchmarks.runs.list_scenario_runs( + self._id, + **params, + ) + return list(page) diff --git a/tests/sdk/conftest.py b/tests/sdk/conftest.py index c5546fe55..10ddf6254 100644 --- a/tests/sdk/conftest.py +++ b/tests/sdk/conftest.py @@ -129,6 +129,30 @@ class MockScenarioRunView: scoring_contract_result: object = None +@dataclass +class MockBenchmarkRunView: + """Mock BenchmarkRunView for testing.""" + + id: str = "bench_run_123" + benchmark_id: str = "bench_123" + state: str = "running" + metadata: Dict[str, str] = field(default_factory=dict) + start_time_ms: int = 1234567890000 + duration_ms: int | None = None + score: float | None = None + + +class AsyncIterableMock: + """A simple async iterable mock for testing paginated responses.""" + + def __init__(self, items: list[Any]) -> None: + self._items = items + + async def __aiter__(self): + for item in self._items: + yield item + + def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock: """ Create a mock httpx.AsyncClient with proper context manager setup. @@ -237,6 +261,12 @@ def scenario_run_view() -> MockScenarioRunView: return MockScenarioRunView() +@pytest.fixture +def benchmark_run_view() -> MockBenchmarkRunView: + """Create a mock BenchmarkRunView.""" + return MockBenchmarkRunView() + + @pytest.fixture def mock_httpx_response() -> Mock: """Create a mock httpx.Response.""" diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py new file mode 100644 index 000000000..1e1102c6e --- /dev/null +++ b/tests/sdk/test_async_benchmark_run.py @@ -0,0 +1,94 @@ +"""Comprehensive tests for async AsyncBenchmarkRun class.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock + +from tests.sdk.conftest import AsyncIterableMock, MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun + + +class TestAsyncBenchmarkRun: + """Tests for AsyncBenchmarkRun class.""" + + def test_init(self, mock_async_client: AsyncMock) -> None: + """Test AsyncBenchmarkRun initialization.""" + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + assert run.id == "bench_run_123" + assert run.benchmark_id == "bench_123" + + def test_repr(self, mock_async_client: AsyncMock) -> None: + """Test AsyncBenchmarkRun string representation.""" + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + assert repr(run) == "" + + async def test_get_info(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test get_info method.""" + mock_async_client.benchmarks.runs.retrieve = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.get_info() + + assert result == benchmark_run_view + mock_async_client.benchmarks.runs.retrieve.assert_awaited_once_with("bench_run_123") + + async def test_cancel(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test cancel method.""" + benchmark_run_view.state = "canceled" + mock_async_client.benchmarks.runs.cancel = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.cancel() + + assert result == benchmark_run_view + assert result.state == "canceled" + mock_async_client.benchmarks.runs.cancel.assert_awaited_once_with("bench_run_123") + + async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test complete method.""" + benchmark_run_view.state = "completed" + mock_async_client.benchmarks.runs.complete = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.complete() + + assert result == benchmark_run_view + assert result.state == "completed" + mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bench_run_123") + + async def test_list_scenario_runs( + self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView + ) -> None: + """Test list_scenario_runs method.""" + mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view]) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.list_scenario_runs() + + assert len(result) == 1 + assert result[0] == scenario_run_view + mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") + + async def test_list_scenario_runs_with_params( + self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView + ) -> None: + """Test list_scenario_runs method with filtering parameters.""" + mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view]) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.list_scenario_runs(limit=10, state="completed") + + assert len(result) == 1 + mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with( + "bench_run_123", limit=10, state="completed" + ) + + async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None: + """Test list_scenario_runs returns empty list when no scenario runs.""" + mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([]) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.list_scenario_runs() + + assert result == [] + mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py new file mode 100644 index 000000000..125af81eb --- /dev/null +++ b/tests/sdk/test_benchmark_run.py @@ -0,0 +1,92 @@ +"""Comprehensive tests for sync BenchmarkRun class.""" + +from __future__ import annotations + +from unittest.mock import Mock + +from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.benchmark_run import BenchmarkRun + + +class TestBenchmarkRun: + """Tests for BenchmarkRun class.""" + + def test_init(self, mock_client: Mock) -> None: + """Test BenchmarkRun initialization.""" + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + assert run.id == "bench_run_123" + assert run.benchmark_id == "bench_123" + + def test_repr(self, mock_client: Mock) -> None: + """Test BenchmarkRun string representation.""" + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + assert repr(run) == "" + + def test_get_info(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test get_info method.""" + mock_client.benchmarks.runs.retrieve.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.get_info() + + assert result == benchmark_run_view + mock_client.benchmarks.runs.retrieve.assert_called_once_with("bench_run_123") + + def test_cancel(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test cancel method.""" + benchmark_run_view.state = "canceled" + mock_client.benchmarks.runs.cancel.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.cancel() + + assert result == benchmark_run_view + assert result.state == "canceled" + mock_client.benchmarks.runs.cancel.assert_called_once_with("bench_run_123") + + def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test complete method.""" + benchmark_run_view.state = "completed" + mock_client.benchmarks.runs.complete.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.complete() + + assert result == benchmark_run_view + assert result.state == "completed" + mock_client.benchmarks.runs.complete.assert_called_once_with("bench_run_123") + + def test_list_scenario_runs(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: + """Test list_scenario_runs method.""" + mock_page = [scenario_run_view] + mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.list_scenario_runs() + + assert len(result) == 1 + assert result[0] == scenario_run_view + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") + + def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: + """Test list_scenario_runs method with filtering parameters.""" + mock_page = [scenario_run_view] + mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.list_scenario_runs(limit=10, state="completed") + + assert len(result) == 1 + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with( + "bench_run_123", limit=10, state="completed" + ) + + def test_list_scenario_runs_empty(self, mock_client: Mock) -> None: + """Test list_scenario_runs returns empty list when no scenario runs.""" + mock_client.benchmarks.runs.list_scenario_runs.return_value = [] + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.list_scenario_runs() + + assert result == [] + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py new file mode 100644 index 000000000..843376fcc --- /dev/null +++ b/tests/smoketests/sdk/test_async_benchmark_run.py @@ -0,0 +1,138 @@ +"""Asynchronous SDK smoke tests for AsyncBenchmarkRun operations. + +These tests validate the AsyncBenchmarkRun class against the real API. +Until AsyncBenchmarkOps is available (PR3), we use the raw async API client +to find or create benchmark runs for testing. +""" + +from __future__ import annotations + +import pytest + +from runloop_api_client.sdk import AsyncRunloopSDK +from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun + +pytestmark = [pytest.mark.smoketest] + +TWO_MINUTE_TIMEOUT = 120 + + +class TestAsyncBenchmarkRunRetrieval: + """Test AsyncBenchmarkRun retrieval operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_benchmark_run_from_existing(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test creating AsyncBenchmarkRun from existing benchmark run. + + This test: + 1. Lists benchmark runs via raw async API + 2. Creates an AsyncBenchmarkRun wrapper + 3. Validates get_info returns correct data + """ + # List existing benchmark runs via raw API + runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1) + runs = [run async for run in runs_page] + + if not runs: + pytest.skip("No benchmark runs available to test") + + run_data = runs[0] + + # Create AsyncBenchmarkRun wrapper + benchmark_run = AsyncBenchmarkRun( + client=async_sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + assert benchmark_run.id == run_data.id + assert benchmark_run.benchmark_id == run_data.benchmark_id + + # Test get_info + info = await benchmark_run.get_info() + assert info.id == run_data.id + assert info.benchmark_id == run_data.benchmark_id + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test AsyncBenchmarkRun.list_scenario_runs method. + + This test: + 1. Finds an existing benchmark run + 2. Lists its scenario runs + """ + # List existing benchmark runs via raw API + runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1) + runs = [run async for run in runs_page] + + if not runs: + pytest.skip("No benchmark runs available to test") + + run_data = runs[0] + + # Create AsyncBenchmarkRun wrapper + benchmark_run = AsyncBenchmarkRun( + client=async_sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + # List scenario runs (might be empty, that's okay) + scenario_runs = await benchmark_run.list_scenario_runs() + assert isinstance(scenario_runs, list) + + +class TestAsyncBenchmarkRunLifecycle: + """Test AsyncBenchmarkRun lifecycle operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test creating a benchmark run and canceling it. + + This test: + 1. Finds an existing benchmark + 2. Starts a new benchmark run + 3. Creates an AsyncBenchmarkRun wrapper + 4. Cancels the run + """ + # Find an existing benchmark via raw API + benchmarks_page = async_sdk_client.api.benchmarks.list(limit=1) + benchmarks = [b async for b in benchmarks_page] + + if not benchmarks: + pytest.skip("No benchmarks available to test") + + benchmark = benchmarks[0] + + # Start a new benchmark run + run_data = await async_sdk_client.api.benchmarks.start_run( + benchmark.id, + name="sdk-smoketest-async-benchmark-run", + ) + + try: + # Create AsyncBenchmarkRun wrapper + benchmark_run = AsyncBenchmarkRun( + client=async_sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + assert benchmark_run.id == run_data.id + + # Get info + info = await benchmark_run.get_info() + assert info.id == run_data.id + assert info.state in ["queued", "running", "completed", "canceled"] + + # Cancel the run + result = await benchmark_run.cancel() + assert result.state in ["canceled", "completed"] # May already be completed + + except Exception: + # Ensure cleanup on any error + try: + await async_sdk_client.api.benchmarks.runs.cancel(run_data.id) + except Exception: + pass + raise diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py new file mode 100644 index 000000000..0002e06b8 --- /dev/null +++ b/tests/smoketests/sdk/test_benchmark_run.py @@ -0,0 +1,138 @@ +"""Synchronous SDK smoke tests for BenchmarkRun operations. + +These tests validate the BenchmarkRun class against the real API. +Until BenchmarkOps is available (PR3), we use the raw API client to +find or create benchmark runs for testing. +""" + +from __future__ import annotations + +import pytest + +from runloop_api_client.sdk import RunloopSDK +from runloop_api_client.sdk.benchmark_run import BenchmarkRun + +pytestmark = [pytest.mark.smoketest] + +TWO_MINUTE_TIMEOUT = 120 + + +class TestBenchmarkRunRetrieval: + """Test BenchmarkRun retrieval operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_benchmark_run_from_existing(self, sdk_client: RunloopSDK) -> None: + """Test creating BenchmarkRun from existing benchmark run. + + This test: + 1. Lists benchmark runs via raw API + 2. Creates a BenchmarkRun wrapper + 3. Validates get_info returns correct data + """ + # List existing benchmark runs via raw API + runs_page = sdk_client.api.benchmarks.runs.list(limit=1) + runs = list(runs_page) + + if not runs: + pytest.skip("No benchmark runs available to test") + + run_data = runs[0] + + # Create BenchmarkRun wrapper + benchmark_run = BenchmarkRun( + client=sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + assert benchmark_run.id == run_data.id + assert benchmark_run.benchmark_id == run_data.benchmark_id + + # Test get_info + info = benchmark_run.get_info() + assert info.id == run_data.id + assert info.benchmark_id == run_data.benchmark_id + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None: + """Test BenchmarkRun.list_scenario_runs method. + + This test: + 1. Finds an existing benchmark run + 2. Lists its scenario runs + """ + # List existing benchmark runs via raw API + runs_page = sdk_client.api.benchmarks.runs.list(limit=1) + runs = list(runs_page) + + if not runs: + pytest.skip("No benchmark runs available to test") + + run_data = runs[0] + + # Create BenchmarkRun wrapper + benchmark_run = BenchmarkRun( + client=sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + # List scenario runs (might be empty, that's okay) + scenario_runs = benchmark_run.list_scenario_runs() + assert isinstance(scenario_runs, list) + + +class TestBenchmarkRunLifecycle: + """Test BenchmarkRun lifecycle operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None: + """Test creating a benchmark run and canceling it. + + This test: + 1. Finds an existing benchmark + 2. Starts a new benchmark run + 3. Creates a BenchmarkRun wrapper + 4. Cancels the run + """ + # Find an existing benchmark via raw API + benchmarks_page = sdk_client.api.benchmarks.list(limit=1) + benchmarks = list(benchmarks_page) + + if not benchmarks: + pytest.skip("No benchmarks available to test") + + benchmark = benchmarks[0] + + # Start a new benchmark run + run_data = sdk_client.api.benchmarks.start_run( + benchmark.id, + name="sdk-smoketest-benchmark-run", + ) + + try: + # Create BenchmarkRun wrapper + benchmark_run = BenchmarkRun( + client=sdk_client.api, + run_id=run_data.id, + benchmark_id=run_data.benchmark_id, + ) + + assert benchmark_run.id == run_data.id + + # Get info + info = benchmark_run.get_info() + assert info.id == run_data.id + assert info.state in ["queued", "running", "completed", "canceled"] + + # Cancel the run + result = benchmark_run.cancel() + assert result.state in ["canceled", "completed"] # May already be completed + + except Exception: + # Ensure cleanup on any error + try: + sdk_client.api.benchmarks.runs.cancel(run_data.id) + except Exception: + pass + raise From 3dbc3ab3de4826167de50f7747c80b108e3228fd Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 17 Dec 2025 11:06:42 -0800 Subject: [PATCH 4/5] fixed smoketests --- tests/smoketests/sdk/test_async_benchmark_run.py | 6 +++--- tests/smoketests/sdk/test_benchmark_run.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py index 843376fcc..aabc5606d 100644 --- a/tests/smoketests/sdk/test_async_benchmark_run.py +++ b/tests/smoketests/sdk/test_async_benchmark_run.py @@ -106,8 +106,8 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl # Start a new benchmark run run_data = await async_sdk_client.api.benchmarks.start_run( - benchmark.id, - name="sdk-smoketest-async-benchmark-run", + benchmark_id=benchmark.id, + run_name="sdk-smoketest-async-benchmark-run", ) try: @@ -123,7 +123,7 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl # Get info info = await benchmark_run.get_info() assert info.id == run_data.id - assert info.state in ["queued", "running", "completed", "canceled"] + assert info.state in ["running", "completed", "canceled"] # Cancel the run result = await benchmark_run.cancel() diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py index 0002e06b8..ab267eae8 100644 --- a/tests/smoketests/sdk/test_benchmark_run.py +++ b/tests/smoketests/sdk/test_benchmark_run.py @@ -106,8 +106,8 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None: # Start a new benchmark run run_data = sdk_client.api.benchmarks.start_run( - benchmark.id, - name="sdk-smoketest-benchmark-run", + benchmark_id=benchmark.id, + run_name="sdk-smoketest-benchmark-run", ) try: @@ -123,7 +123,7 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None: # Get info info = benchmark_run.get_info() assert info.id == run_data.id - assert info.state in ["queued", "running", "completed", "canceled"] + assert info.state in ["running", "completed", "canceled"] # Cancel the run result = benchmark_run.cancel() From 24b53872f00b2760f681313f8ebbaca35c38706e Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 17 Dec 2025 12:22:28 -0800 Subject: [PATCH 5/5] `list_scenario_runs()` now returns a list of ScenarioRun/AsyncScenarioRun objects --- .../sdk/async_benchmark_run.py | 13 ++-- src/runloop_api_client/sdk/benchmark_run.py | 11 ++-- tests/sdk/test_async_benchmark_run.py | 62 +++++++++++++------ tests/sdk/test_benchmark_run.py | 58 ++++++++++++----- .../sdk/test_async_benchmark_run.py | 19 ++++-- tests/smoketests/sdk/test_benchmark_run.py | 16 +++-- 6 files changed, 121 insertions(+), 58 deletions(-) diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py index bf4c3d96a..fed2f5b00 100644 --- a/src/runloop_api_client/sdk/async_benchmark_run.py +++ b/src/runloop_api_client/sdk/async_benchmark_run.py @@ -5,9 +5,10 @@ from typing import List from typing_extensions import Unpack, override -from ..types import ScenarioRunView, BenchmarkRunView +from ..types import BenchmarkRunView from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams from .._client import AsyncRunloop +from .async_scenario_run import AsyncScenarioRun class AsyncBenchmarkRun: @@ -112,15 +113,15 @@ async def complete( async def list_scenario_runs( self, **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], - ) -> List[ScenarioRunView]: + ) -> List[AsyncScenarioRun]: """List all scenario runs for this benchmark run. :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters - :return: List of scenario run views - :rtype: List[ScenarioRunView] + :return: List of async scenario run objects + :rtype: List[AsyncScenarioRun] """ - page = self._client.benchmarks.runs.list_scenario_runs( + page = await self._client.benchmarks.runs.list_scenario_runs( self._id, **params, ) - return [item async for item in page] + return [AsyncScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs] diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py index e281825a2..ff6ed567a 100644 --- a/src/runloop_api_client/sdk/benchmark_run.py +++ b/src/runloop_api_client/sdk/benchmark_run.py @@ -5,9 +5,10 @@ from typing import List from typing_extensions import Unpack, override -from ..types import ScenarioRunView, BenchmarkRunView +from ..types import BenchmarkRunView from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams from .._client import Runloop +from .scenario_run import ScenarioRun class BenchmarkRun: @@ -112,15 +113,15 @@ def complete( def list_scenario_runs( self, **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], - ) -> List[ScenarioRunView]: + ) -> List[ScenarioRun]: """List all scenario runs for this benchmark run. :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters - :return: List of scenario run views - :rtype: List[ScenarioRunView] + :return: List of scenario run objects + :rtype: List[ScenarioRun] """ page = self._client.benchmarks.runs.list_scenario_runs( self._id, **params, ) - return list(page) + return [ScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs] diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py index 1e1102c6e..1785f683a 100644 --- a/tests/sdk/test_async_benchmark_run.py +++ b/tests/sdk/test_async_benchmark_run.py @@ -2,9 +2,11 @@ from __future__ import annotations +from types import SimpleNamespace from unittest.mock import AsyncMock -from tests.sdk.conftest import AsyncIterableMock, MockScenarioRunView, MockBenchmarkRunView +from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun @@ -56,39 +58,63 @@ async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: assert result.state == "completed" mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bench_run_123") - async def test_list_scenario_runs( + async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None: + """Test list_scenario_runs method with empty results.""" + page = SimpleNamespace(runs=[]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.list_scenario_runs() + + assert len(result) == 0 + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123") + + async def test_list_scenario_runs_single( self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView ) -> None: - """Test list_scenario_runs method.""" - mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view]) + """Test list_scenario_runs method with single result.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") result = await run.list_scenario_runs() assert len(result) == 1 - assert result[0] == scenario_run_view - mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") + assert isinstance(result[0], AsyncScenarioRun) + assert result[0].id == scenario_run_view.id + assert result[0].devbox_id == scenario_run_view.devbox_id + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123") + + async def test_list_scenario_runs_multiple(self, mock_async_client: AsyncMock) -> None: + """Test list_scenario_runs method with multiple results.""" + scenario_run_view1 = MockScenarioRunView(id="run_001", devbox_id="dev_001") + scenario_run_view2 = MockScenarioRunView(id="run_002", devbox_id="dev_002") + page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") + result = await run.list_scenario_runs() + + assert len(result) == 2 + assert isinstance(result[0], AsyncScenarioRun) + assert isinstance(result[1], AsyncScenarioRun) + assert result[0].id == "run_001" + assert result[1].id == "run_002" + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123") async def test_list_scenario_runs_with_params( self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView ) -> None: """Test list_scenario_runs method with filtering parameters.""" - mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view]) + page = SimpleNamespace(runs=[scenario_run_view]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") result = await run.list_scenario_runs(limit=10, state="completed") assert len(result) == 1 - mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with( + assert isinstance(result[0], AsyncScenarioRun) + assert result[0].id == scenario_run_view.id + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with( "bench_run_123", limit=10, state="completed" ) - - async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None: - """Test list_scenario_runs returns empty list when no scenario runs.""" - mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([]) - - run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123") - result = await run.list_scenario_runs() - - assert result == [] - mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py index 125af81eb..d54fb9432 100644 --- a/tests/sdk/test_benchmark_run.py +++ b/tests/sdk/test_benchmark_run.py @@ -2,9 +2,11 @@ from __future__ import annotations +from types import SimpleNamespace from unittest.mock import Mock from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.scenario_run import ScenarioRun from runloop_api_client.sdk.benchmark_run import BenchmarkRun @@ -56,37 +58,59 @@ def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunV assert result.state == "completed" mock_client.benchmarks.runs.complete.assert_called_once_with("bench_run_123") - def test_list_scenario_runs(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: - """Test list_scenario_runs method.""" - mock_page = [scenario_run_view] - mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page + def test_list_scenario_runs_empty(self, mock_client: Mock) -> None: + """Test list_scenario_runs method with empty results.""" + page = SimpleNamespace(runs=[]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.list_scenario_runs() + + assert len(result) == 0 + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") + + def test_list_scenario_runs_single(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: + """Test list_scenario_runs method with single result.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") result = run.list_scenario_runs() assert len(result) == 1 - assert result[0] == scenario_run_view + assert isinstance(result[0], ScenarioRun) + assert result[0].id == scenario_run_view.id + assert result[0].devbox_id == scenario_run_view.devbox_id + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") + + def test_list_scenario_runs_multiple(self, mock_client: Mock) -> None: + """Test list_scenario_runs method with multiple results.""" + scenario_run_view1 = MockScenarioRunView(id="run_001", devbox_id="dev_001") + scenario_run_view2 = MockScenarioRunView(id="run_002", devbox_id="dev_002") + page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") + result = run.list_scenario_runs() + + assert len(result) == 2 + assert isinstance(result[0], ScenarioRun) + assert isinstance(result[1], ScenarioRun) + assert result[0].id == "run_001" + assert result[1].id == "run_002" mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test list_scenario_runs method with filtering parameters.""" - mock_page = [scenario_run_view] - mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page + page = SimpleNamespace(runs=[scenario_run_view]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") result = run.list_scenario_runs(limit=10, state="completed") assert len(result) == 1 + assert isinstance(result[0], ScenarioRun) + assert result[0].id == scenario_run_view.id mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with( "bench_run_123", limit=10, state="completed" ) - - def test_list_scenario_runs_empty(self, mock_client: Mock) -> None: - """Test list_scenario_runs returns empty list when no scenario runs.""" - mock_client.benchmarks.runs.list_scenario_runs.return_value = [] - - run = BenchmarkRun(mock_client, "bench_run_123", "bench_123") - result = run.list_scenario_runs() - - assert result == [] - mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123") diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py index aabc5606d..3eab471b5 100644 --- a/tests/smoketests/sdk/test_async_benchmark_run.py +++ b/tests/smoketests/sdk/test_async_benchmark_run.py @@ -10,6 +10,7 @@ import pytest from runloop_api_client.sdk import AsyncRunloopSDK +from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun pytestmark = [pytest.mark.smoketest] @@ -30,8 +31,8 @@ async def test_benchmark_run_from_existing(self, async_sdk_client: AsyncRunloopS 3. Validates get_info returns correct data """ # List existing benchmark runs via raw API - runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1) - runs = [run async for run in runs_page] + runs_page = await async_sdk_client.api.benchmarks.runs.list(limit=1) + runs = runs_page.runs if not runs: pytest.skip("No benchmark runs available to test") @@ -62,8 +63,8 @@ async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRun 2. Lists its scenario runs """ # List existing benchmark runs via raw API - runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1) - runs = [run async for run in runs_page] + runs_page = await async_sdk_client.api.benchmarks.runs.list(limit=1) + runs = runs_page.runs if not runs: pytest.skip("No benchmark runs available to test") @@ -81,6 +82,12 @@ async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRun scenario_runs = await benchmark_run.list_scenario_runs() assert isinstance(scenario_runs, list) + # Verify returned items are AsyncScenarioRun objects + for scenario_run in scenario_runs: + assert isinstance(scenario_run, AsyncScenarioRun) + assert scenario_run.id is not None + assert scenario_run.devbox_id is not None + class TestAsyncBenchmarkRunLifecycle: """Test AsyncBenchmarkRun lifecycle operations.""" @@ -96,8 +103,8 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl 4. Cancels the run """ # Find an existing benchmark via raw API - benchmarks_page = async_sdk_client.api.benchmarks.list(limit=1) - benchmarks = [b async for b in benchmarks_page] + benchmarks_page = await async_sdk_client.api.benchmarks.list(limit=1) + benchmarks = benchmarks_page.benchmarks if not benchmarks: pytest.skip("No benchmarks available to test") diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py index ab267eae8..f21e9e87e 100644 --- a/tests/smoketests/sdk/test_benchmark_run.py +++ b/tests/smoketests/sdk/test_benchmark_run.py @@ -10,6 +10,7 @@ import pytest from runloop_api_client.sdk import RunloopSDK +from runloop_api_client.sdk.scenario_run import ScenarioRun from runloop_api_client.sdk.benchmark_run import BenchmarkRun pytestmark = [pytest.mark.smoketest] @@ -30,8 +31,7 @@ def test_benchmark_run_from_existing(self, sdk_client: RunloopSDK) -> None: 3. Validates get_info returns correct data """ # List existing benchmark runs via raw API - runs_page = sdk_client.api.benchmarks.runs.list(limit=1) - runs = list(runs_page) + runs = sdk_client.api.benchmarks.runs.list(limit=1).runs if not runs: pytest.skip("No benchmark runs available to test") @@ -62,8 +62,7 @@ def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None: 2. Lists its scenario runs """ # List existing benchmark runs via raw API - runs_page = sdk_client.api.benchmarks.runs.list(limit=1) - runs = list(runs_page) + runs = sdk_client.api.benchmarks.runs.list(limit=1).runs if not runs: pytest.skip("No benchmark runs available to test") @@ -81,6 +80,12 @@ def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None: scenario_runs = benchmark_run.list_scenario_runs() assert isinstance(scenario_runs, list) + # Verify returned items are ScenarioRun objects + for scenario_run in scenario_runs: + assert isinstance(scenario_run, ScenarioRun) + assert scenario_run.id is not None + assert scenario_run.devbox_id is not None + class TestBenchmarkRunLifecycle: """Test BenchmarkRun lifecycle operations.""" @@ -96,8 +101,7 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None: 4. Cancels the run """ # Find an existing benchmark via raw API - benchmarks_page = sdk_client.api.benchmarks.list(limit=1) - benchmarks = list(benchmarks_page) + benchmarks = sdk_client.api.benchmarks.list(limit=1).benchmarks if not benchmarks: pytest.skip("No benchmarks available to test")