|
| 1 | +"""AsyncBenchmark resource class for asynchronous operations.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from typing import List |
| 6 | +from typing_extensions import Unpack, override |
| 7 | + |
| 8 | +from ..types import BenchmarkView |
| 9 | +from ._types import ( |
| 10 | + BaseRequestOptions, |
| 11 | + LongRequestOptions, |
| 12 | + SDKBenchmarkUpdateParams, |
| 13 | + SDKBenchmarkListRunsParams, |
| 14 | + SDKBenchmarkStartRunParams, |
| 15 | +) |
| 16 | +from .._types import SequenceNotStr |
| 17 | +from .._client import AsyncRunloop |
| 18 | +from .async_benchmark_run import AsyncBenchmarkRun |
| 19 | + |
| 20 | + |
| 21 | +class AsyncBenchmark: |
| 22 | + """A benchmark for evaluating agent performance across scenarios (async). |
| 23 | +
|
| 24 | + Provides async methods for retrieving benchmark details, updating the benchmark, |
| 25 | + managing scenarios, and starting benchmark runs. Obtain instances via |
| 26 | + ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``. |
| 27 | +
|
| 28 | + Example: |
| 29 | + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") |
| 30 | + >>> info = await benchmark.get_info() |
| 31 | + >>> run = await benchmark.run(run_name="evaluation-v1") |
| 32 | + """ |
| 33 | + |
| 34 | + def __init__(self, client: AsyncRunloop, benchmark_id: str) -> None: |
| 35 | + """Create an AsyncBenchmark instance. |
| 36 | +
|
| 37 | + :param client: AsyncRunloop client instance |
| 38 | + :type client: AsyncRunloop |
| 39 | + :param benchmark_id: Benchmark ID |
| 40 | + :type benchmark_id: str |
| 41 | + """ |
| 42 | + self._client = client |
| 43 | + self._id = benchmark_id |
| 44 | + |
| 45 | + @override |
| 46 | + def __repr__(self) -> str: |
| 47 | + return f"<AsyncBenchmark id={self._id!r}>" |
| 48 | + |
| 49 | + @property |
| 50 | + def id(self) -> str: |
| 51 | + """Return the benchmark ID. |
| 52 | +
|
| 53 | + :return: Unique benchmark ID |
| 54 | + :rtype: str |
| 55 | + """ |
| 56 | + return self._id |
| 57 | + |
| 58 | + async def get_info( |
| 59 | + self, |
| 60 | + **options: Unpack[BaseRequestOptions], |
| 61 | + ) -> BenchmarkView: |
| 62 | + """Retrieve current benchmark details. |
| 63 | +
|
| 64 | + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options |
| 65 | + :return: Current benchmark info |
| 66 | + :rtype: BenchmarkView |
| 67 | + """ |
| 68 | + return await self._client.benchmarks.retrieve( |
| 69 | + self._id, |
| 70 | + **options, |
| 71 | + ) |
| 72 | + |
| 73 | + async def update( |
| 74 | + self, |
| 75 | + **params: Unpack[SDKBenchmarkUpdateParams], |
| 76 | + ) -> BenchmarkView: |
| 77 | + """Update the benchmark. |
| 78 | +
|
| 79 | + Only provided fields will be updated. |
| 80 | +
|
| 81 | + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters |
| 82 | + :return: Updated benchmark info |
| 83 | + :rtype: BenchmarkView |
| 84 | + """ |
| 85 | + return await self._client.benchmarks.update( |
| 86 | + self._id, |
| 87 | + **params, |
| 88 | + ) |
| 89 | + |
| 90 | + async def run( |
| 91 | + self, |
| 92 | + **params: Unpack[SDKBenchmarkStartRunParams], |
| 93 | + ) -> AsyncBenchmarkRun: |
| 94 | + """Start a new benchmark run. |
| 95 | +
|
| 96 | + Creates a new benchmark run and returns an AsyncBenchmarkRun instance for |
| 97 | + managing the run lifecycle. |
| 98 | +
|
| 99 | + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters |
| 100 | + :return: AsyncBenchmarkRun instance for managing the run |
| 101 | + :rtype: AsyncBenchmarkRun |
| 102 | + """ |
| 103 | + run_view = await self._client.benchmarks.start_run( |
| 104 | + benchmark_id=self._id, |
| 105 | + **params, |
| 106 | + ) |
| 107 | + return AsyncBenchmarkRun(self._client, run_view.id, run_view.benchmark_id) |
| 108 | + |
| 109 | + async def add_scenarios( |
| 110 | + self, |
| 111 | + scenario_ids: SequenceNotStr[str], |
| 112 | + **options: Unpack[LongRequestOptions], |
| 113 | + ) -> BenchmarkView: |
| 114 | + """Add scenarios to the benchmark. |
| 115 | +
|
| 116 | + :param scenario_ids: List of scenario IDs to add |
| 117 | + :type scenario_ids: SequenceNotStr[str] |
| 118 | + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options |
| 119 | + :return: Updated benchmark info |
| 120 | + :rtype: BenchmarkView |
| 121 | + """ |
| 122 | + return await self._client.benchmarks.update_scenarios( |
| 123 | + self._id, |
| 124 | + scenarios_to_add=scenario_ids, |
| 125 | + **options, |
| 126 | + ) |
| 127 | + |
| 128 | + async def remove_scenarios( |
| 129 | + self, |
| 130 | + scenario_ids: SequenceNotStr[str], |
| 131 | + **options: Unpack[LongRequestOptions], |
| 132 | + ) -> BenchmarkView: |
| 133 | + """Remove scenarios from the benchmark. |
| 134 | +
|
| 135 | + :param scenario_ids: List of scenario IDs to remove |
| 136 | + :type scenario_ids: SequenceNotStr[str] |
| 137 | + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options |
| 138 | + :return: Updated benchmark info |
| 139 | + :rtype: BenchmarkView |
| 140 | + """ |
| 141 | + return await self._client.benchmarks.update_scenarios( |
| 142 | + self._id, |
| 143 | + scenarios_to_remove=scenario_ids, |
| 144 | + **options, |
| 145 | + ) |
| 146 | + |
| 147 | + async def list_runs( |
| 148 | + self, |
| 149 | + **params: Unpack[SDKBenchmarkListRunsParams], |
| 150 | + ) -> List[AsyncBenchmarkRun]: |
| 151 | + """List all runs for this benchmark. |
| 152 | +
|
| 153 | + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters |
| 154 | + :return: List of async benchmark runs |
| 155 | + :rtype: List[AsyncBenchmarkRun] |
| 156 | + """ |
| 157 | + page = await self._client.benchmarks.runs.list( |
| 158 | + benchmark_id=self._id, |
| 159 | + **params, |
| 160 | + ) |
| 161 | + return [AsyncBenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs] |
0 commit comments