From db486de16d87f1d40686d1c8ce2b8ff3986cc05f Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 16 Dec 2025 13:01:58 -0800
Subject: [PATCH 1/5] update requirements-dev

---
 requirements-dev.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index b9f3f2862..c48025dbf 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -94,7 +94,7 @@ python-dateutil==2.9.0.post0 ; python_full_version < '3.10'
     # via time-machine
 respx==0.22.0
 rich==14.2.0
-ruff==0.14.8
+ruff==0.14.9
 six==1.17.0 ; python_full_version < '3.10'
     # via python-dateutil
 sniffio==1.3.1

From 3a88bc3a72fae6fb763efd64730d0c16ca1ac63d Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 16 Dec 2025 13:02:58 -0800
Subject: [PATCH 2/5] pyproject formatting nit

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c90fa53a..93e6c93e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ dependencies = [
   "anyio>=3.5.0, <5",
   "distro>=1.7.0, <2",
   "sniffio",
-    "uuid-utils>=0.11.0",
+  "uuid-utils>=0.11.0",
 ]
 
 requires-python = ">= 3.9"

From 4650641013aa86914c3c3e36b951054bbcb56ce6 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 16 Dec 2025 17:36:50 -0800
Subject: [PATCH 3/5] feat(sdk): add BenchmarkRun and AsyncBenchmarkRun classes

---
 src/runloop_api_client/sdk/_types.py          |   6 +
 .../sdk/async_benchmark_run.py                | 126 ++++++++++++++++
 src/runloop_api_client/sdk/benchmark_run.py   | 126 ++++++++++++++++
 tests/sdk/conftest.py                         |  30 ++++
 tests/sdk/test_async_benchmark_run.py         |  94 ++++++++++++
 tests/sdk/test_benchmark_run.py               |  92 ++++++++++++
 .../sdk/test_async_benchmark_run.py           | 138 ++++++++++++++++++
 tests/smoketests/sdk/test_benchmark_run.py    | 138 ++++++++++++++++++
 8 files changed, 750 insertions(+)
 create mode 100644 src/runloop_api_client/sdk/async_benchmark_run.py
 create mode 100644 src/runloop_api_client/sdk/benchmark_run.py
 create mode 100644 tests/sdk/test_async_benchmark_run.py
 create mode 100644 tests/sdk/test_benchmark_run.py
 create mode 100644 tests/smoketests/sdk/test_async_benchmark_run.py
 create mode 100644 tests/smoketests/sdk/test_benchmark_run.py

diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
index be09f6eed..6bf9da020 100644
--- a/src/runloop_api_client/sdk/_types.py
+++ b/src/runloop_api_client/sdk/_types.py
@@ -5,6 +5,7 @@
 from ..lib.polling import PollingConfig
 from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
+from ..types.benchmarks import RunListScenarioRunsParams
 from ..types.input_context import InputContext
 from ..types.scenario_view import ScenarioView
 from ..types.agent_list_params import AgentListParams
@@ -203,3 +204,8 @@ class ScenarioPreview(ScenarioView):
 
     input_context: InputContextPreview  # type: ignore[assignment]
     """The input context for the Scenario."""
+
+
+# Benchmark Run params
+class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions):
+    pass
diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py
new file mode 100644
index 000000000..bf4c3d96a
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_benchmark_run.py
@@ -0,0 +1,126 @@
+"""AsyncBenchmarkRun resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import ScenarioRunView, BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import AsyncRunloop
+
+
+class AsyncBenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios (async).
+
+    Provides async methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = await benchmark.run(run_name="evaluation-v1")
+        >>> info = await run.get_info()
+        >>> scenario_runs = await run.list_scenario_runs()
+    """
+
+    def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None:
+        """Create an AsyncBenchmarkRun instance.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncBenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    async def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    async def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    async def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    async def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[ScenarioRunView]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of scenario run views
+        :rtype: List[ScenarioRunView]
+        """
+        page = self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return [item async for item in page]
diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py
new file mode 100644
index 000000000..e281825a2
--- /dev/null
+++ b/src/runloop_api_client/sdk/benchmark_run.py
@@ -0,0 +1,126 @@
+"""BenchmarkRun resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import ScenarioRunView, BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import Runloop
+
+
+class BenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios.
+
+    Provides methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = benchmark.run(run_name="evaluation-v1")
+        >>> info = run.get_info()
+        >>> scenario_runs = run.list_scenario_runs()
+    """
+
+    def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None:
+        """Create a BenchmarkRun instance.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<BenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[ScenarioRunView]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of scenario run views
+        :rtype: List[ScenarioRunView]
+        """
+        page = self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return list(page)
diff --git a/tests/sdk/conftest.py b/tests/sdk/conftest.py
index c5546fe55..10ddf6254 100644
--- a/tests/sdk/conftest.py
+++ b/tests/sdk/conftest.py
@@ -129,6 +129,30 @@ class MockScenarioRunView:
     scoring_contract_result: object = None
 
 
+@dataclass
+class MockBenchmarkRunView:
+    """Mock BenchmarkRunView for testing."""
+
+    id: str = "bench_run_123"
+    benchmark_id: str = "bench_123"
+    state: str = "running"
+    metadata: Dict[str, str] = field(default_factory=dict)
+    start_time_ms: int = 1234567890000
+    duration_ms: int | None = None
+    score: float | None = None
+
+
+class AsyncIterableMock:
+    """A simple async iterable mock for testing paginated responses."""
+
+    def __init__(self, items: list[Any]) -> None:
+        self._items = items
+
+    async def __aiter__(self):
+        for item in self._items:
+            yield item
+
+
 def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock:
     """
     Create a mock httpx.AsyncClient with proper context manager setup.
@@ -237,6 +261,12 @@ def scenario_run_view() -> MockScenarioRunView:
     return MockScenarioRunView()
 
 
+@pytest.fixture
+def benchmark_run_view() -> MockBenchmarkRunView:
+    """Create a mock BenchmarkRunView."""
+    return MockBenchmarkRunView()
+
+
 @pytest.fixture
 def mock_httpx_response() -> Mock:
     """Create a mock httpx.Response."""
diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py
new file mode 100644
index 000000000..1e1102c6e
--- /dev/null
+++ b/tests/sdk/test_async_benchmark_run.py
@@ -0,0 +1,94 @@
+"""Comprehensive tests for async AsyncBenchmarkRun class."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+from tests.sdk.conftest import AsyncIterableMock, MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+
+class TestAsyncBenchmarkRun:
+    """Tests for AsyncBenchmarkRun class."""
+
+    def test_init(self, mock_async_client: AsyncMock) -> None:
+        """Test AsyncBenchmarkRun initialization."""
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        assert run.id == "bench_run_123"
+        assert run.benchmark_id == "bench_123"
+
+    def test_repr(self, mock_async_client: AsyncMock) -> None:
+        """Test AsyncBenchmarkRun string representation."""
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        assert repr(run) == "<AsyncBenchmarkRun id='bench_run_123'>"
+
+    async def test_get_info(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test get_info method."""
+        mock_async_client.benchmarks.runs.retrieve = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.get_info()
+
+        assert result == benchmark_run_view
+        mock_async_client.benchmarks.runs.retrieve.assert_awaited_once_with("bench_run_123")
+
+    async def test_cancel(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test cancel method."""
+        benchmark_run_view.state = "canceled"
+        mock_async_client.benchmarks.runs.cancel = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.cancel()
+
+        assert result == benchmark_run_view
+        assert result.state == "canceled"
+        mock_async_client.benchmarks.runs.cancel.assert_awaited_once_with("bench_run_123")
+
+    async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test complete method."""
+        benchmark_run_view.state = "completed"
+        mock_async_client.benchmarks.runs.complete = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.complete()
+
+        assert result == benchmark_run_view
+        assert result.state == "completed"
+        mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bench_run_123")
+
+    async def test_list_scenario_runs(
+        self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+    ) -> None:
+        """Test list_scenario_runs method."""
+        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view])
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 1
+        assert result[0] == scenario_run_view
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
+
+    async def test_list_scenario_runs_with_params(
+        self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+    ) -> None:
+        """Test list_scenario_runs method with filtering parameters."""
+        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view])
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.list_scenario_runs(limit=10, state="completed")
+
+        assert len(result) == 1
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with(
+            "bench_run_123", limit=10, state="completed"
+        )
+
+    async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None:
+        """Test list_scenario_runs returns empty list when no scenario runs."""
+        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([])
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.list_scenario_runs()
+
+        assert result == []
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py
new file mode 100644
index 000000000..125af81eb
--- /dev/null
+++ b/tests/sdk/test_benchmark_run.py
@@ -0,0 +1,92 @@
+"""Comprehensive tests for sync BenchmarkRun class."""
+
+from __future__ import annotations
+
+from unittest.mock import Mock
+
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+
+class TestBenchmarkRun:
+    """Tests for BenchmarkRun class."""
+
+    def test_init(self, mock_client: Mock) -> None:
+        """Test BenchmarkRun initialization."""
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        assert run.id == "bench_run_123"
+        assert run.benchmark_id == "bench_123"
+
+    def test_repr(self, mock_client: Mock) -> None:
+        """Test BenchmarkRun string representation."""
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        assert repr(run) == "<BenchmarkRun id='bench_run_123'>"
+
+    def test_get_info(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test get_info method."""
+        mock_client.benchmarks.runs.retrieve.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.get_info()
+
+        assert result == benchmark_run_view
+        mock_client.benchmarks.runs.retrieve.assert_called_once_with("bench_run_123")
+
+    def test_cancel(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test cancel method."""
+        benchmark_run_view.state = "canceled"
+        mock_client.benchmarks.runs.cancel.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.cancel()
+
+        assert result == benchmark_run_view
+        assert result.state == "canceled"
+        mock_client.benchmarks.runs.cancel.assert_called_once_with("bench_run_123")
+
+    def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test complete method."""
+        benchmark_run_view.state = "completed"
+        mock_client.benchmarks.runs.complete.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.complete()
+
+        assert result == benchmark_run_view
+        assert result.state == "completed"
+        mock_client.benchmarks.runs.complete.assert_called_once_with("bench_run_123")
+
+    def test_list_scenario_runs(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+        """Test list_scenario_runs method."""
+        mock_page = [scenario_run_view]
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 1
+        assert result[0] == scenario_run_view
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
+
+    def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+        """Test list_scenario_runs method with filtering parameters."""
+        mock_page = [scenario_run_view]
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.list_scenario_runs(limit=10, state="completed")
+
+        assert len(result) == 1
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with(
+            "bench_run_123", limit=10, state="completed"
+        )
+
+    def test_list_scenario_runs_empty(self, mock_client: Mock) -> None:
+        """Test list_scenario_runs returns empty list when no scenario runs."""
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = []
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.list_scenario_runs()
+
+        assert result == []
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py
new file mode 100644
index 000000000..843376fcc
--- /dev/null
+++ b/tests/smoketests/sdk/test_async_benchmark_run.py
@@ -0,0 +1,138 @@
+"""Asynchronous SDK smoke tests for AsyncBenchmarkRun operations.
+
+These tests validate the AsyncBenchmarkRun class against the real API.
+Until AsyncBenchmarkOps is available (PR3), we use the raw async API client
+to find or create benchmark runs for testing.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from runloop_api_client.sdk import AsyncRunloopSDK
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+
+class TestAsyncBenchmarkRunRetrieval:
+    """Test AsyncBenchmarkRun retrieval operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_benchmark_run_from_existing(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test creating AsyncBenchmarkRun from existing benchmark run.
+
+        This test:
+        1. Lists benchmark runs via raw async API
+        2. Creates an AsyncBenchmarkRun wrapper
+        3. Validates get_info returns correct data
+        """
+        # List existing benchmark runs via raw API
+        runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = [run async for run in runs_page]
+
+        if not runs:
+            pytest.skip("No benchmark runs available to test")
+
+        run_data = runs[0]
+
+        # Create AsyncBenchmarkRun wrapper
+        benchmark_run = AsyncBenchmarkRun(
+            client=async_sdk_client.api,
+            run_id=run_data.id,
+            benchmark_id=run_data.benchmark_id,
+        )
+
+        assert benchmark_run.id == run_data.id
+        assert benchmark_run.benchmark_id == run_data.benchmark_id
+
+        # Test get_info
+        info = await benchmark_run.get_info()
+        assert info.id == run_data.id
+        assert info.benchmark_id == run_data.benchmark_id
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test AsyncBenchmarkRun.list_scenario_runs method.
+
+        This test:
+        1. Finds an existing benchmark run
+        2. Lists its scenario runs
+        """
+        # List existing benchmark runs via raw API
+        runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = [run async for run in runs_page]
+
+        if not runs:
+            pytest.skip("No benchmark runs available to test")
+
+        run_data = runs[0]
+
+        # Create AsyncBenchmarkRun wrapper
+        benchmark_run = AsyncBenchmarkRun(
+            client=async_sdk_client.api,
+            run_id=run_data.id,
+            benchmark_id=run_data.benchmark_id,
+        )
+
+        # List scenario runs (might be empty, that's okay)
+        scenario_runs = await benchmark_run.list_scenario_runs()
+        assert isinstance(scenario_runs, list)
+
+
+class TestAsyncBenchmarkRunLifecycle:
+    """Test AsyncBenchmarkRun lifecycle operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test creating a benchmark run and canceling it.
+
+        This test:
+        1. Finds an existing benchmark
+        2. Starts a new benchmark run
+        3. Creates an AsyncBenchmarkRun wrapper
+        4. Cancels the run
+        """
+        # Find an existing benchmark via raw API
+        benchmarks_page = async_sdk_client.api.benchmarks.list(limit=1)
+        benchmarks = [b async for b in benchmarks_page]
+
+        if not benchmarks:
+            pytest.skip("No benchmarks available to test")
+
+        benchmark = benchmarks[0]
+
+        # Start a new benchmark run
+        run_data = await async_sdk_client.api.benchmarks.start_run(
+            benchmark.id,
+            name="sdk-smoketest-async-benchmark-run",
+        )
+
+        try:
+            # Create AsyncBenchmarkRun wrapper
+            benchmark_run = AsyncBenchmarkRun(
+                client=async_sdk_client.api,
+                run_id=run_data.id,
+                benchmark_id=run_data.benchmark_id,
+            )
+
+            assert benchmark_run.id == run_data.id
+
+            # Get info
+            info = await benchmark_run.get_info()
+            assert info.id == run_data.id
+            assert info.state in ["queued", "running", "completed", "canceled"]
+
+            # Cancel the run
+            result = await benchmark_run.cancel()
+            assert result.state in ["canceled", "completed"]  # May already be completed
+
+        except Exception:
+            # Ensure cleanup on any error
+            try:
+                await async_sdk_client.api.benchmarks.runs.cancel(run_data.id)
+            except Exception:
+                pass
+            raise
diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py
new file mode 100644
index 000000000..0002e06b8
--- /dev/null
+++ b/tests/smoketests/sdk/test_benchmark_run.py
@@ -0,0 +1,138 @@
+"""Synchronous SDK smoke tests for BenchmarkRun operations.
+
+These tests validate the BenchmarkRun class against the real API.
+Until BenchmarkOps is available (PR3), we use the raw API client to
+find or create benchmark runs for testing.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from runloop_api_client.sdk import RunloopSDK
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+
+class TestBenchmarkRunRetrieval:
+    """Test BenchmarkRun retrieval operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_benchmark_run_from_existing(self, sdk_client: RunloopSDK) -> None:
+        """Test creating BenchmarkRun from existing benchmark run.
+
+        This test:
+        1. Lists benchmark runs via raw API
+        2. Creates a BenchmarkRun wrapper
+        3. Validates get_info returns correct data
+        """
+        # List existing benchmark runs via raw API
+        runs_page = sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = list(runs_page)
+
+        if not runs:
+            pytest.skip("No benchmark runs available to test")
+
+        run_data = runs[0]
+
+        # Create BenchmarkRun wrapper
+        benchmark_run = BenchmarkRun(
+            client=sdk_client.api,
+            run_id=run_data.id,
+            benchmark_id=run_data.benchmark_id,
+        )
+
+        assert benchmark_run.id == run_data.id
+        assert benchmark_run.benchmark_id == run_data.benchmark_id
+
+        # Test get_info
+        info = benchmark_run.get_info()
+        assert info.id == run_data.id
+        assert info.benchmark_id == run_data.benchmark_id
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None:
+        """Test BenchmarkRun.list_scenario_runs method.
+
+        This test:
+        1. Finds an existing benchmark run
+        2. Lists its scenario runs
+        """
+        # List existing benchmark runs via raw API
+        runs_page = sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = list(runs_page)
+
+        if not runs:
+            pytest.skip("No benchmark runs available to test")
+
+        run_data = runs[0]
+
+        # Create BenchmarkRun wrapper
+        benchmark_run = BenchmarkRun(
+            client=sdk_client.api,
+            run_id=run_data.id,
+            benchmark_id=run_data.benchmark_id,
+        )
+
+        # List scenario runs (might be empty, that's okay)
+        scenario_runs = benchmark_run.list_scenario_runs()
+        assert isinstance(scenario_runs, list)
+
+
+class TestBenchmarkRunLifecycle:
+    """Test BenchmarkRun lifecycle operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None:
+        """Test creating a benchmark run and canceling it.
+
+        This test:
+        1. Finds an existing benchmark
+        2. Starts a new benchmark run
+        3. Creates a BenchmarkRun wrapper
+        4. Cancels the run
+        """
+        # Find an existing benchmark via raw API
+        benchmarks_page = sdk_client.api.benchmarks.list(limit=1)
+        benchmarks = list(benchmarks_page)
+
+        if not benchmarks:
+            pytest.skip("No benchmarks available to test")
+
+        benchmark = benchmarks[0]
+
+        # Start a new benchmark run
+        run_data = sdk_client.api.benchmarks.start_run(
+            benchmark.id,
+            name="sdk-smoketest-benchmark-run",
+        )
+
+        try:
+            # Create BenchmarkRun wrapper
+            benchmark_run = BenchmarkRun(
+                client=sdk_client.api,
+                run_id=run_data.id,
+                benchmark_id=run_data.benchmark_id,
+            )
+
+            assert benchmark_run.id == run_data.id
+
+            # Get info
+            info = benchmark_run.get_info()
+            assert info.id == run_data.id
+            assert info.state in ["queued", "running", "completed", "canceled"]
+
+            # Cancel the run
+            result = benchmark_run.cancel()
+            assert result.state in ["canceled", "completed"]  # May already be completed
+
+        except Exception:
+            # Ensure cleanup on any error
+            try:
+                sdk_client.api.benchmarks.runs.cancel(run_data.id)
+            except Exception:
+                pass
+            raise

From 3dbc3ab3de4826167de50f7747c80b108e3228fd Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 17 Dec 2025 11:06:42 -0800
Subject: [PATCH 4/5] fixed smoketests

---
 tests/smoketests/sdk/test_async_benchmark_run.py | 6 +++---
 tests/smoketests/sdk/test_benchmark_run.py       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py
index 843376fcc..aabc5606d 100644
--- a/tests/smoketests/sdk/test_async_benchmark_run.py
+++ b/tests/smoketests/sdk/test_async_benchmark_run.py
@@ -106,8 +106,8 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl
 
         # Start a new benchmark run
         run_data = await async_sdk_client.api.benchmarks.start_run(
-            benchmark.id,
-            name="sdk-smoketest-async-benchmark-run",
+            benchmark_id=benchmark.id,
+            run_name="sdk-smoketest-async-benchmark-run",
         )
 
         try:
@@ -123,7 +123,7 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl
             # Get info
             info = await benchmark_run.get_info()
             assert info.id == run_data.id
-            assert info.state in ["queued", "running", "completed", "canceled"]
+            assert info.state in ["running", "completed", "canceled"]
 
             # Cancel the run
             result = await benchmark_run.cancel()
diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py
index 0002e06b8..ab267eae8 100644
--- a/tests/smoketests/sdk/test_benchmark_run.py
+++ b/tests/smoketests/sdk/test_benchmark_run.py
@@ -106,8 +106,8 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None:
 
         # Start a new benchmark run
         run_data = sdk_client.api.benchmarks.start_run(
-            benchmark.id,
-            name="sdk-smoketest-benchmark-run",
+            benchmark_id=benchmark.id,
+            run_name="sdk-smoketest-benchmark-run",
         )
 
         try:
@@ -123,7 +123,7 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None:
             # Get info
             info = benchmark_run.get_info()
             assert info.id == run_data.id
-            assert info.state in ["queued", "running", "completed", "canceled"]
+            assert info.state in ["running", "completed", "canceled"]
 
             # Cancel the run
             result = benchmark_run.cancel()

From 24b53872f00b2760f681313f8ebbaca35c38706e Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 17 Dec 2025 12:22:28 -0800
Subject: [PATCH 5/5] `list_scenario_runs()` now returns a list of
 ScenarioRun/AsyncScenarioRun objects

---
 .../sdk/async_benchmark_run.py                | 13 ++--
 src/runloop_api_client/sdk/benchmark_run.py   | 11 ++--
 tests/sdk/test_async_benchmark_run.py         | 62 +++++++++++++------
 tests/sdk/test_benchmark_run.py               | 58 ++++++++++++-----
 .../sdk/test_async_benchmark_run.py           | 19 ++++--
 tests/smoketests/sdk/test_benchmark_run.py    | 16 +++--
 6 files changed, 121 insertions(+), 58 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py
index bf4c3d96a..fed2f5b00 100644
--- a/src/runloop_api_client/sdk/async_benchmark_run.py
+++ b/src/runloop_api_client/sdk/async_benchmark_run.py
@@ -5,9 +5,10 @@
 from typing import List
 from typing_extensions import Unpack, override
 
-from ..types import ScenarioRunView, BenchmarkRunView
+from ..types import BenchmarkRunView
 from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
 from .._client import AsyncRunloop
+from .async_scenario_run import AsyncScenarioRun
 
 
 class AsyncBenchmarkRun:
@@ -112,15 +113,15 @@ async def complete(
     async def list_scenario_runs(
         self,
         **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
-    ) -> List[ScenarioRunView]:
+    ) -> List[AsyncScenarioRun]:
         """List all scenario runs for this benchmark run.
 
         :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
-        :return: List of scenario run views
-        :rtype: List[ScenarioRunView]
+        :return: List of async scenario run objects
+        :rtype: List[AsyncScenarioRun]
         """
-        page = self._client.benchmarks.runs.list_scenario_runs(
+        page = await self._client.benchmarks.runs.list_scenario_runs(
             self._id,
             **params,
         )
-        return [item async for item in page]
+        return [AsyncScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py
index e281825a2..ff6ed567a 100644
--- a/src/runloop_api_client/sdk/benchmark_run.py
+++ b/src/runloop_api_client/sdk/benchmark_run.py
@@ -5,9 +5,10 @@
 from typing import List
 from typing_extensions import Unpack, override
 
-from ..types import ScenarioRunView, BenchmarkRunView
+from ..types import BenchmarkRunView
 from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
 from .._client import Runloop
+from .scenario_run import ScenarioRun
 
 
 class BenchmarkRun:
@@ -112,15 +113,15 @@ def complete(
     def list_scenario_runs(
         self,
         **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
-    ) -> List[ScenarioRunView]:
+    ) -> List[ScenarioRun]:
         """List all scenario runs for this benchmark run.
 
         :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
-        :return: List of scenario run views
-        :rtype: List[ScenarioRunView]
+        :return: List of scenario run objects
+        :rtype: List[ScenarioRun]
         """
         page = self._client.benchmarks.runs.list_scenario_runs(
             self._id,
             **params,
         )
-        return list(page)
+        return [ScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py
index 1e1102c6e..1785f683a 100644
--- a/tests/sdk/test_async_benchmark_run.py
+++ b/tests/sdk/test_async_benchmark_run.py
@@ -2,9 +2,11 @@
 
 from __future__ import annotations
 
+from types import SimpleNamespace
 from unittest.mock import AsyncMock
 
-from tests.sdk.conftest import AsyncIterableMock, MockScenarioRunView, MockBenchmarkRunView
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun
 from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
 
 
@@ -56,39 +58,63 @@ async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view:
         assert result.state == "completed"
         mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bench_run_123")
 
-    async def test_list_scenario_runs(
+    async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None:
+        """Test list_scenario_runs method with empty results."""
+        page = SimpleNamespace(runs=[])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 0
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123")
+
+    async def test_list_scenario_runs_single(
         self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
     ) -> None:
-        """Test list_scenario_runs method."""
-        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view])
+        """Test list_scenario_runs method with single result."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
 
         run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
         result = await run.list_scenario_runs()
 
         assert len(result) == 1
-        assert result[0] == scenario_run_view
-        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        assert result[0].devbox_id == scenario_run_view.devbox_id
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123")
+
+    async def test_list_scenario_runs_multiple(self, mock_async_client: AsyncMock) -> None:
+        """Test list_scenario_runs method with multiple results."""
+        scenario_run_view1 = MockScenarioRunView(id="run_001", devbox_id="dev_001")
+        scenario_run_view2 = MockScenarioRunView(id="run_002", devbox_id="dev_002")
+        page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert isinstance(result[1], AsyncScenarioRun)
+        assert result[0].id == "run_001"
+        assert result[1].id == "run_002"
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bench_run_123")
 
     async def test_list_scenario_runs_with_params(
         self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
     ) -> None:
         """Test list_scenario_runs method with filtering parameters."""
-        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([scenario_run_view])
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
 
         run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
         result = await run.list_scenario_runs(limit=10, state="completed")
 
         assert len(result) == 1
-        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with(
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with(
             "bench_run_123", limit=10, state="completed"
         )
-
-    async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None:
-        """Test list_scenario_runs returns empty list when no scenario runs."""
-        mock_async_client.benchmarks.runs.list_scenario_runs.return_value = AsyncIterableMock([])
-
-        run = AsyncBenchmarkRun(mock_async_client, "bench_run_123", "bench_123")
-        result = await run.list_scenario_runs()
-
-        assert result == []
-        mock_async_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py
index 125af81eb..d54fb9432 100644
--- a/tests/sdk/test_benchmark_run.py
+++ b/tests/sdk/test_benchmark_run.py
@@ -2,9 +2,11 @@
 
 from __future__ import annotations
 
+from types import SimpleNamespace
 from unittest.mock import Mock
 
 from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.scenario_run import ScenarioRun
 from runloop_api_client.sdk.benchmark_run import BenchmarkRun
 
 
@@ -56,37 +58,59 @@ def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunV
         assert result.state == "completed"
         mock_client.benchmarks.runs.complete.assert_called_once_with("bench_run_123")
 
-    def test_list_scenario_runs(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
-        """Test list_scenario_runs method."""
-        mock_page = [scenario_run_view]
-        mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page
+    def test_list_scenario_runs_empty(self, mock_client: Mock) -> None:
+        """Test list_scenario_runs method with empty results."""
+        page = SimpleNamespace(runs=[])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 0
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
+
+    def test_list_scenario_runs_single(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+        """Test list_scenario_runs method with single result."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
 
         run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
         result = run.list_scenario_runs()
 
         assert len(result) == 1
-        assert result[0] == scenario_run_view
+        assert isinstance(result[0], ScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        assert result[0].devbox_id == scenario_run_view.devbox_id
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
+
+    def test_list_scenario_runs_multiple(self, mock_client: Mock) -> None:
+        """Test list_scenario_runs method with multiple results."""
+        scenario_run_view1 = MockScenarioRunView(id="run_001", devbox_id="dev_001")
+        scenario_run_view2 = MockScenarioRunView(id="run_002", devbox_id="dev_002")
+        page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], ScenarioRun)
+        assert isinstance(result[1], ScenarioRun)
+        assert result[0].id == "run_001"
+        assert result[1].id == "run_002"
         mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
 
     def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test list_scenario_runs method with filtering parameters."""
-        mock_page = [scenario_run_view]
-        mock_client.benchmarks.runs.list_scenario_runs.return_value = mock_page
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
 
         run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
         result = run.list_scenario_runs(limit=10, state="completed")
 
         assert len(result) == 1
+        assert isinstance(result[0], ScenarioRun)
+        assert result[0].id == scenario_run_view.id
         mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with(
             "bench_run_123", limit=10, state="completed"
         )
-
-    def test_list_scenario_runs_empty(self, mock_client: Mock) -> None:
-        """Test list_scenario_runs returns empty list when no scenario runs."""
-        mock_client.benchmarks.runs.list_scenario_runs.return_value = []
-
-        run = BenchmarkRun(mock_client, "bench_run_123", "bench_123")
-        result = run.list_scenario_runs()
-
-        assert result == []
-        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bench_run_123")
diff --git a/tests/smoketests/sdk/test_async_benchmark_run.py b/tests/smoketests/sdk/test_async_benchmark_run.py
index aabc5606d..3eab471b5 100644
--- a/tests/smoketests/sdk/test_async_benchmark_run.py
+++ b/tests/smoketests/sdk/test_async_benchmark_run.py
@@ -10,6 +10,7 @@
 import pytest
 
 from runloop_api_client.sdk import AsyncRunloopSDK
+from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun
 from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
 
 pytestmark = [pytest.mark.smoketest]
@@ -30,8 +31,8 @@ async def test_benchmark_run_from_existing(self, async_sdk_client: AsyncRunloopS
         3. Validates get_info returns correct data
         """
         # List existing benchmark runs via raw API
-        runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1)
-        runs = [run async for run in runs_page]
+        runs_page = await async_sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = runs_page.runs
 
         if not runs:
             pytest.skip("No benchmark runs available to test")
@@ -62,8 +63,8 @@ async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRun
         2. Lists its scenario runs
         """
         # List existing benchmark runs via raw API
-        runs_page = async_sdk_client.api.benchmarks.runs.list(limit=1)
-        runs = [run async for run in runs_page]
+        runs_page = await async_sdk_client.api.benchmarks.runs.list(limit=1)
+        runs = runs_page.runs
 
         if not runs:
             pytest.skip("No benchmark runs available to test")
@@ -81,6 +82,12 @@ async def test_benchmark_run_list_scenario_runs(self, async_sdk_client: AsyncRun
         scenario_runs = await benchmark_run.list_scenario_runs()
         assert isinstance(scenario_runs, list)
 
+        # Verify returned items are AsyncScenarioRun objects
+        for scenario_run in scenario_runs:
+            assert isinstance(scenario_run, AsyncScenarioRun)
+            assert scenario_run.id is not None
+            assert scenario_run.devbox_id is not None
+
 
 class TestAsyncBenchmarkRunLifecycle:
     """Test AsyncBenchmarkRun lifecycle operations."""
@@ -96,8 +103,8 @@ async def test_benchmark_run_create_and_cancel(self, async_sdk_client: AsyncRunl
         4. Cancels the run
         """
         # Find an existing benchmark via raw API
-        benchmarks_page = async_sdk_client.api.benchmarks.list(limit=1)
-        benchmarks = [b async for b in benchmarks_page]
+        benchmarks_page = await async_sdk_client.api.benchmarks.list(limit=1)
+        benchmarks = benchmarks_page.benchmarks
 
         if not benchmarks:
             pytest.skip("No benchmarks available to test")
diff --git a/tests/smoketests/sdk/test_benchmark_run.py b/tests/smoketests/sdk/test_benchmark_run.py
index ab267eae8..f21e9e87e 100644
--- a/tests/smoketests/sdk/test_benchmark_run.py
+++ b/tests/smoketests/sdk/test_benchmark_run.py
@@ -10,6 +10,7 @@
 import pytest
 
 from runloop_api_client.sdk import RunloopSDK
+from runloop_api_client.sdk.scenario_run import ScenarioRun
 from runloop_api_client.sdk.benchmark_run import BenchmarkRun
 
 pytestmark = [pytest.mark.smoketest]
@@ -30,8 +31,7 @@ def test_benchmark_run_from_existing(self, sdk_client: RunloopSDK) -> None:
         3. Validates get_info returns correct data
         """
         # List existing benchmark runs via raw API
-        runs_page = sdk_client.api.benchmarks.runs.list(limit=1)
-        runs = list(runs_page)
+        runs = sdk_client.api.benchmarks.runs.list(limit=1).runs
 
         if not runs:
             pytest.skip("No benchmark runs available to test")
@@ -62,8 +62,7 @@ def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None:
         2. Lists its scenario runs
         """
         # List existing benchmark runs via raw API
-        runs_page = sdk_client.api.benchmarks.runs.list(limit=1)
-        runs = list(runs_page)
+        runs = sdk_client.api.benchmarks.runs.list(limit=1).runs
 
         if not runs:
             pytest.skip("No benchmark runs available to test")
@@ -81,6 +80,12 @@ def test_benchmark_run_list_scenario_runs(self, sdk_client: RunloopSDK) -> None:
         scenario_runs = benchmark_run.list_scenario_runs()
         assert isinstance(scenario_runs, list)
 
+        # Verify returned items are ScenarioRun objects
+        for scenario_run in scenario_runs:
+            assert isinstance(scenario_run, ScenarioRun)
+            assert scenario_run.id is not None
+            assert scenario_run.devbox_id is not None
+
 
 class TestBenchmarkRunLifecycle:
     """Test BenchmarkRun lifecycle operations."""
@@ -96,8 +101,7 @@ def test_benchmark_run_create_and_cancel(self, sdk_client: RunloopSDK) -> None:
         4. Cancels the run
         """
         # Find an existing benchmark via raw API
-        benchmarks_page = sdk_client.api.benchmarks.list(limit=1)
-        benchmarks = list(benchmarks_page)
+        benchmarks = sdk_client.api.benchmarks.list(limit=1).benchmarks
 
         if not benchmarks:
             pytest.skip("No benchmarks available to test")