diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b7c4c0a77..c96a502b4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,7 +57,7 @@ jobs:
       - name: Get GitHub OIDC Token
         if: github.repository == 'stainless-sdks/runloop-python'
         id: github-oidc
-        uses: runloopai/github-script@main
+        uses: actions/github-script@v8
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 2a8f4ffdd..0e5b256d2 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.3.0"
+  ".": "1.3.1"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 5eb10a624..f28b394ab 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5359067a857aa94f69bae0d3311856be3e637da067fdc9dbf8bd26fe476efbd8.yml
-openapi_spec_hash: 5227ef7c306d5226c3aee8932b2e8c6a
-config_hash: cb43d4ca9e64d5a099199d6818d70539
+configured_endpoints: 106
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-63dab7833d6670810c4f4882df560ebbfe2de8e8e1a98d51422368607b5335ae.yml
+openapi_spec_hash: ebb5068064f7469f9239b18a51a6fe44
+config_hash: fd168de77f219e46a1427bbec2eecfb9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be0b78508..c5141bdbf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## 1.3.1 (2026-01-28)
+
+Full Changelog: [v1.3.0...v1.3.1](https://github.com/runloopai/api-client-python/compare/v1.3.0...v1.3.1)
+
+### Features
+
+* **benchmark:** adding in progress benchmark runs for benchmark jobs ([#7183](https://github.com/runloopai/api-client-python/issues/7183)) ([2f11e9f](https://github.com/runloopai/api-client-python/commit/2f11e9f0298d0a30c03abd19c6c7096a26487b02))
+
+
+### Chores
+
+* **ci:** upgrade `actions/github-script` ([7ba3962](https://github.com/runloopai/api-client-python/commit/7ba3962181d62b0791dd6d064c52ca512443a2ca))
+
 ## 1.3.0 (2026-01-22)
 
 Full Changelog: [v1.3.0-alpha...v1.3.0](https://github.com/runloopai/api-client-python/compare/v1.3.0-alpha...v1.3.0)
diff --git a/api.md b/api.md
index 32d35cfbd..45d93e973 100644
--- a/api.md
+++ b/api.md
@@ -55,6 +55,24 @@ Methods:
 - <code title="post /v1/benchmark_runs/{id}/complete">client.benchmark_runs.<a href="./src/runloop_api_client/resources/benchmark_runs.py">complete</a>(id) -> <a href="./src/runloop_api_client/types/benchmark_run_view.py">BenchmarkRunView</a></code>
 - <code title="get /v1/benchmark_runs/{id}/scenario_runs">client.benchmark_runs.<a href="./src/runloop_api_client/resources/benchmark_runs.py">list_scenario_runs</a>(id, \*\*<a href="src/runloop_api_client/types/benchmark_run_list_scenario_runs_params.py">params</a>) -> <a href="./src/runloop_api_client/types/scenario_run_view.py">SyncBenchmarkRunsCursorIDPage[ScenarioRunView]</a></code>
 
+# BenchmarkJobs
+
+Types:
+
+```python
+from runloop_api_client.types import (
+    BenchmarkJobCreateParameters,
+    BenchmarkJobListView,
+    BenchmarkJobView,
+)
+```
+
+Methods:
+
+- <code title="post /v1/benchmark_jobs">client.benchmark_jobs.<a href="./src/runloop_api_client/resources/benchmark_jobs.py">create</a>(\*\*<a href="src/runloop_api_client/types/benchmark_job_create_params.py">params</a>) -> <a href="./src/runloop_api_client/types/benchmark_job_view.py">BenchmarkJobView</a></code>
+- <code title="get /v1/benchmark_jobs/{id}">client.benchmark_jobs.<a href="./src/runloop_api_client/resources/benchmark_jobs.py">retrieve</a>(id) -> <a href="./src/runloop_api_client/types/benchmark_job_view.py">BenchmarkJobView</a></code>
+- <code title="get /v1/benchmark_jobs">client.benchmark_jobs.<a href="./src/runloop_api_client/resources/benchmark_jobs.py">list</a>(\*\*<a href="src/runloop_api_client/types/benchmark_job_list_params.py">params</a>) -> <a href="./src/runloop_api_client/types/benchmark_job_list_view.py">BenchmarkJobListView</a></code>
+
 # Agents
 
 Types:
diff --git a/pyproject.toml b/pyproject.toml
index 5007d5e66..86f7930a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "runloop_api_client"
-version = "1.3.0"
+version = "1.3.1"
 description = "The official Python library for the runloop API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/runloop_api_client/_client.py b/src/runloop_api_client/_client.py
index 557170af6..3b469e7ec 100644
--- a/src/runloop_api_client/_client.py
+++ b/src/runloop_api_client/_client.py
@@ -40,6 +40,7 @@
         benchmarks,
         blueprints,
         repositories,
+        benchmark_jobs,
         benchmark_runs,
         network_policies,
     )
@@ -49,6 +50,7 @@
     from .resources.benchmarks import BenchmarksResource, AsyncBenchmarksResource
     from .resources.blueprints import BlueprintsResource, AsyncBlueprintsResource
     from .resources.repositories import RepositoriesResource, AsyncRepositoriesResource
+    from .resources.benchmark_jobs import BenchmarkJobsResource, AsyncBenchmarkJobsResource
     from .resources.benchmark_runs import BenchmarkRunsResource, AsyncBenchmarkRunsResource
     from .resources.network_policies import NetworkPoliciesResource, AsyncNetworkPoliciesResource
     from .resources.devboxes.devboxes import DevboxesResource, AsyncDevboxesResource
@@ -126,6 +128,12 @@ def benchmark_runs(self) -> BenchmarkRunsResource:
 
         return BenchmarkRunsResource(self)
 
+    @cached_property
+    def benchmark_jobs(self) -> BenchmarkJobsResource:
+        from .resources.benchmark_jobs import BenchmarkJobsResource
+
+        return BenchmarkJobsResource(self)
+
     @cached_property
     def agents(self) -> AgentsResource:
         from .resources.agents import AgentsResource
@@ -356,6 +364,12 @@ def benchmark_runs(self) -> AsyncBenchmarkRunsResource:
 
         return AsyncBenchmarkRunsResource(self)
 
+    @cached_property
+    def benchmark_jobs(self) -> AsyncBenchmarkJobsResource:
+        from .resources.benchmark_jobs import AsyncBenchmarkJobsResource
+
+        return AsyncBenchmarkJobsResource(self)
+
     @cached_property
     def agents(self) -> AsyncAgentsResource:
         from .resources.agents import AsyncAgentsResource
@@ -535,6 +549,12 @@ def benchmark_runs(self) -> benchmark_runs.BenchmarkRunsResourceWithRawResponse:
 
         return BenchmarkRunsResourceWithRawResponse(self._client.benchmark_runs)
 
+    @cached_property
+    def benchmark_jobs(self) -> benchmark_jobs.BenchmarkJobsResourceWithRawResponse:
+        from .resources.benchmark_jobs import BenchmarkJobsResourceWithRawResponse
+
+        return BenchmarkJobsResourceWithRawResponse(self._client.benchmark_jobs)
+
     @cached_property
     def agents(self) -> agents.AgentsResourceWithRawResponse:
         from .resources.agents import AgentsResourceWithRawResponse
@@ -602,6 +622,12 @@ def benchmark_runs(self) -> benchmark_runs.AsyncBenchmarkRunsResourceWithRawResp
 
         return AsyncBenchmarkRunsResourceWithRawResponse(self._client.benchmark_runs)
 
+    @cached_property
+    def benchmark_jobs(self) -> benchmark_jobs.AsyncBenchmarkJobsResourceWithRawResponse:
+        from .resources.benchmark_jobs import AsyncBenchmarkJobsResourceWithRawResponse
+
+        return AsyncBenchmarkJobsResourceWithRawResponse(self._client.benchmark_jobs)
+
     @cached_property
     def agents(self) -> agents.AsyncAgentsResourceWithRawResponse:
         from .resources.agents import AsyncAgentsResourceWithRawResponse
@@ -669,6 +695,12 @@ def benchmark_runs(self) -> benchmark_runs.BenchmarkRunsResourceWithStreamingRes
 
         return BenchmarkRunsResourceWithStreamingResponse(self._client.benchmark_runs)
 
+    @cached_property
+    def benchmark_jobs(self) -> benchmark_jobs.BenchmarkJobsResourceWithStreamingResponse:
+        from .resources.benchmark_jobs import BenchmarkJobsResourceWithStreamingResponse
+
+        return BenchmarkJobsResourceWithStreamingResponse(self._client.benchmark_jobs)
+
     @cached_property
     def agents(self) -> agents.AgentsResourceWithStreamingResponse:
         from .resources.agents import AgentsResourceWithStreamingResponse
@@ -736,6 +768,12 @@ def benchmark_runs(self) -> benchmark_runs.AsyncBenchmarkRunsResourceWithStreami
 
         return AsyncBenchmarkRunsResourceWithStreamingResponse(self._client.benchmark_runs)
 
+    @cached_property
+    def benchmark_jobs(self) -> benchmark_jobs.AsyncBenchmarkJobsResourceWithStreamingResponse:
+        from .resources.benchmark_jobs import AsyncBenchmarkJobsResourceWithStreamingResponse
+
+        return AsyncBenchmarkJobsResourceWithStreamingResponse(self._client.benchmark_jobs)
+
     @cached_property
     def agents(self) -> agents.AsyncAgentsResourceWithStreamingResponse:
         from .resources.agents import AsyncAgentsResourceWithStreamingResponse
diff --git a/src/runloop_api_client/_version.py b/src/runloop_api_client/_version.py
index c746bdc5e..590b9e003 100644
--- a/src/runloop_api_client/_version.py
+++ b/src/runloop_api_client/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "runloop_api_client"
-__version__ = "1.3.0"  # x-release-please-version
+__version__ = "1.3.1"  # x-release-please-version
diff --git a/src/runloop_api_client/resources/__init__.py b/src/runloop_api_client/resources/__init__.py
index 90a4a2971..325158535 100644
--- a/src/runloop_api_client/resources/__init__.py
+++ b/src/runloop_api_client/resources/__init__.py
@@ -64,6 +64,14 @@
     RepositoriesResourceWithStreamingResponse,
     AsyncRepositoriesResourceWithStreamingResponse,
 )
+from .benchmark_jobs import (
+    BenchmarkJobsResource,
+    AsyncBenchmarkJobsResource,
+    BenchmarkJobsResourceWithRawResponse,
+    AsyncBenchmarkJobsResourceWithRawResponse,
+    BenchmarkJobsResourceWithStreamingResponse,
+    AsyncBenchmarkJobsResourceWithStreamingResponse,
+)
 from .benchmark_runs import (
     BenchmarkRunsResource,
     AsyncBenchmarkRunsResource,
@@ -94,6 +102,12 @@
     "AsyncBenchmarkRunsResourceWithRawResponse",
     "BenchmarkRunsResourceWithStreamingResponse",
     "AsyncBenchmarkRunsResourceWithStreamingResponse",
+    "BenchmarkJobsResource",
+    "AsyncBenchmarkJobsResource",
+    "BenchmarkJobsResourceWithRawResponse",
+    "AsyncBenchmarkJobsResourceWithRawResponse",
+    "BenchmarkJobsResourceWithStreamingResponse",
+    "AsyncBenchmarkJobsResourceWithStreamingResponse",
     "AgentsResource",
     "AsyncAgentsResource",
     "AgentsResourceWithRawResponse",
diff --git a/src/runloop_api_client/resources/benchmark_jobs.py b/src/runloop_api_client/resources/benchmark_jobs.py
new file mode 100644
index 000000000..f6172d118
--- /dev/null
+++ b/src/runloop_api_client/resources/benchmark_jobs.py
@@ -0,0 +1,394 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+
+import httpx
+
+from ..types import benchmark_job_list_params, benchmark_job_create_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.benchmark_job_view import BenchmarkJobView
+from ..types.benchmark_job_list_view import BenchmarkJobListView
+
+__all__ = ["BenchmarkJobsResource", "AsyncBenchmarkJobsResource"]
+
+
+class BenchmarkJobsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BenchmarkJobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/runloopai/api-client-python#accessing-raw-response-data-eg-headers
+        """
+        return BenchmarkJobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BenchmarkJobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/runloopai/api-client-python#with_streaming_response
+        """
+        return BenchmarkJobsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        name: Optional[str] | Omit = omit,
+        spec: Optional[benchmark_job_create_params.Spec] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+        idempotency_key: str | None = None,
+    ) -> BenchmarkJobView:
+        """
+        [Beta] Create a BenchmarkJob that runs a set of scenarios entirely on runloop.
+
+        Args:
+          name: The name of the BenchmarkJob. If not provided, name will be generated based on
+              target dataset.
+
+          spec: The job specification. Exactly one spec type must be set.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+
+          idempotency_key: Specify a custom idempotency key for this request
+        """
+        return self._post(
+            "/v1/benchmark_jobs",
+            body=maybe_transform(
+                {
+                    "name": name,
+                    "spec": spec,
+                },
+                benchmark_job_create_params.BenchmarkJobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                idempotency_key=idempotency_key,
+            ),
+            cast_to=BenchmarkJobView,
+        )
+
+    def retrieve(
+        self,
+        id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkJobView:
+        """
+        [Beta] Get a BenchmarkJob given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not id:
+            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        return self._get(
+            f"/v1/benchmark_jobs/{id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BenchmarkJobView,
+        )
+
+    def list(
+        self,
+        *,
+        limit: int | Omit = omit,
+        name: str | Omit = omit,
+        starting_after: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkJobListView:
+        """
+        [Beta] List all BenchmarkJobs matching filter.
+
+        Args:
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
+
+          starting_after: Load the next page of data starting after the item with the given ID.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/v1/benchmark_jobs",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "limit": limit,
+                        "name": name,
+                        "starting_after": starting_after,
+                    },
+                    benchmark_job_list_params.BenchmarkJobListParams,
+                ),
+            ),
+            cast_to=BenchmarkJobListView,
+        )
+
+
+class AsyncBenchmarkJobsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBenchmarkJobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/runloopai/api-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBenchmarkJobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBenchmarkJobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/runloopai/api-client-python#with_streaming_response
+        """
+        return AsyncBenchmarkJobsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        name: Optional[str] | Omit = omit,
+        spec: Optional[benchmark_job_create_params.Spec] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+        idempotency_key: str | None = None,
+    ) -> BenchmarkJobView:
+        """
+        [Beta] Create a BenchmarkJob that runs a set of scenarios entirely on runloop.
+
+        Args:
+          name: The name of the BenchmarkJob. If not provided, name will be generated based on
+              target dataset.
+
+          spec: The job specification. Exactly one spec type must be set.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+
+          idempotency_key: Specify a custom idempotency key for this request
+        """
+        return await self._post(
+            "/v1/benchmark_jobs",
+            body=await async_maybe_transform(
+                {
+                    "name": name,
+                    "spec": spec,
+                },
+                benchmark_job_create_params.BenchmarkJobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                idempotency_key=idempotency_key,
+            ),
+            cast_to=BenchmarkJobView,
+        )
+
+    async def retrieve(
+        self,
+        id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkJobView:
+        """
+        [Beta] Get a BenchmarkJob given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not id:
+            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        return await self._get(
+            f"/v1/benchmark_jobs/{id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BenchmarkJobView,
+        )
+
+    async def list(
+        self,
+        *,
+        limit: int | Omit = omit,
+        name: str | Omit = omit,
+        starting_after: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkJobListView:
+        """
+        [Beta] List all BenchmarkJobs matching filter.
+
+        Args:
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
+
+          starting_after: Load the next page of data starting after the item with the given ID.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/v1/benchmark_jobs",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "limit": limit,
+                        "name": name,
+                        "starting_after": starting_after,
+                    },
+                    benchmark_job_list_params.BenchmarkJobListParams,
+                ),
+            ),
+            cast_to=BenchmarkJobListView,
+        )
+
+
+class BenchmarkJobsResourceWithRawResponse:
+    def __init__(self, benchmark_jobs: BenchmarkJobsResource) -> None:
+        self._benchmark_jobs = benchmark_jobs
+
+        self.create = to_raw_response_wrapper(
+            benchmark_jobs.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            benchmark_jobs.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            benchmark_jobs.list,
+        )
+
+
+class AsyncBenchmarkJobsResourceWithRawResponse:
+    def __init__(self, benchmark_jobs: AsyncBenchmarkJobsResource) -> None:
+        self._benchmark_jobs = benchmark_jobs
+
+        self.create = async_to_raw_response_wrapper(
+            benchmark_jobs.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            benchmark_jobs.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            benchmark_jobs.list,
+        )
+
+
+class BenchmarkJobsResourceWithStreamingResponse:
+    def __init__(self, benchmark_jobs: BenchmarkJobsResource) -> None:
+        self._benchmark_jobs = benchmark_jobs
+
+        self.create = to_streamed_response_wrapper(
+            benchmark_jobs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            benchmark_jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            benchmark_jobs.list,
+        )
+
+
+class AsyncBenchmarkJobsResourceWithStreamingResponse:
+    def __init__(self, benchmark_jobs: AsyncBenchmarkJobsResource) -> None:
+        self._benchmark_jobs = benchmark_jobs
+
+        self.create = async_to_streamed_response_wrapper(
+            benchmark_jobs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            benchmark_jobs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            benchmark_jobs.list,
+        )
diff --git a/src/runloop_api_client/types/__init__.py b/src/runloop_api_client/types/__init__.py
index f9057658a..59130e662 100644
--- a/src/runloop_api_client/types/__init__.py
+++ b/src/runloop_api_client/types/__init__.py
@@ -28,6 +28,7 @@
 from .secret_list_view import SecretListView as SecretListView
 from .agent_list_params import AgentListParams as AgentListParams
 from .scenario_run_view import ScenarioRunView as ScenarioRunView
+from .benchmark_job_view import BenchmarkJobView as BenchmarkJobView
 from .benchmark_run_view import BenchmarkRunView as BenchmarkRunView
 from .devbox_list_params import DevboxListParams as DevboxListParams
 from .devbox_tunnel_view import DevboxTunnelView as DevboxTunnelView
@@ -58,6 +59,7 @@
 from .scoring_contract_param import ScoringContractParam as ScoringContractParam
 from .scoring_function_param import ScoringFunctionParam as ScoringFunctionParam
 from .benchmark_create_params import BenchmarkCreateParams as BenchmarkCreateParams
+from .benchmark_job_list_view import BenchmarkJobListView as BenchmarkJobListView
 from .benchmark_run_list_view import BenchmarkRunListView as BenchmarkRunListView
 from .benchmark_update_params import BenchmarkUpdateParams as BenchmarkUpdateParams
 from .blueprint_create_params import BlueprintCreateParams as BlueprintCreateParams
@@ -67,6 +69,7 @@
 from .object_download_url_view import ObjectDownloadURLView as ObjectDownloadURLView
 from .repository_create_params import RepositoryCreateParams as RepositoryCreateParams
 from .repository_manifest_view import RepositoryManifestView as RepositoryManifestView
+from .benchmark_job_list_params import BenchmarkJobListParams as BenchmarkJobListParams
 from .benchmark_run_list_params import BenchmarkRunListParams as BenchmarkRunListParams
 from .devbox_send_std_in_result import DevboxSendStdInResult as DevboxSendStdInResult
 from .devbox_snapshot_list_view import DevboxSnapshotListView as DevboxSnapshotListView
@@ -82,6 +85,7 @@
 from .network_policy_list_params import NetworkPolicyListParams as NetworkPolicyListParams
 from .repository_connection_view import RepositoryConnectionView as RepositoryConnectionView
 from .scenario_environment_param import ScenarioEnvironmentParam as ScenarioEnvironmentParam
+from .benchmark_job_create_params import BenchmarkJobCreateParams as BenchmarkJobCreateParams
 from .devbox_create_tunnel_params import DevboxCreateTunnelParams as DevboxCreateTunnelParams
 from .devbox_download_file_params import DevboxDownloadFileParams as DevboxDownloadFileParams
 from .devbox_execute_async_params import DevboxExecuteAsyncParams as DevboxExecuteAsyncParams
diff --git a/src/runloop_api_client/types/benchmark_job_create_params.py b/src/runloop_api_client/types/benchmark_job_create_params.py
new file mode 100644
index 000000000..8ac3a5475
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_job_create_params.py
@@ -0,0 +1,220 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = [
+    "BenchmarkJobCreateParams",
+    "Spec",
+    "SpecHarborJobSpec",
+    "SpecBenchmarkDefinitionJobSpec",
+    "SpecBenchmarkDefinitionJobSpecAgentConfig",
+    "SpecBenchmarkDefinitionJobSpecAgentConfigAgentEnvironment",
+    "SpecBenchmarkDefinitionJobSpecOrchestratorConfig",
+    "SpecScenarioDefinitionJobSpec",
+    "SpecScenarioDefinitionJobSpecAgentConfig",
+    "SpecScenarioDefinitionJobSpecAgentConfigAgentEnvironment",
+    "SpecScenarioDefinitionJobSpecOrchestratorConfig",
+]
+
+
+class BenchmarkJobCreateParams(TypedDict, total=False):
+    name: Optional[str]
+    """The name of the BenchmarkJob.
+
+    If not provided, name will be generated based on target dataset.
+    """
+
+    spec: Optional[Spec]
+    """The job specification. Exactly one spec type must be set."""
+
+
+class SpecHarborJobSpec(TypedDict, total=False):
+    """Harbor-based job specification with inline YAML configuration."""
+
+    inline_yaml: Required[str]
+    """The Harbor job configuration as inline YAML content."""
+
+    type: Required[Literal["harbor"]]
+
+
+class SpecBenchmarkDefinitionJobSpecAgentConfigAgentEnvironment(TypedDict, total=False):
+    """Environment configuration to use for this agent"""
+
+    environment_variables: Optional[Dict[str, str]]
+    """Environment variables to set when launching the agent."""
+
+    secrets: Optional[Dict[str, str]]
+    """Secrets to inject as environment variables when launching the agent.
+
+    Map of environment variable names to secret IDs.
+    """
+
+
+class SpecBenchmarkDefinitionJobSpecAgentConfig(TypedDict, total=False):
+    """Configuration for an agent in a benchmark job"""
+
+    name: Required[str]
+    """Name of the agent"""
+
+    type: Required[Literal["job_agent"]]
+
+    agent_environment: Optional[SpecBenchmarkDefinitionJobSpecAgentConfigAgentEnvironment]
+    """Environment configuration to use for this agent"""
+
+    agent_id: Optional[str]
+    """ID of the agent to use (optional if agent exists by name)"""
+
+    kwargs: Optional[Dict[str, str]]
+    """Additional kwargs for agent configuration"""
+
+    model_name: Optional[str]
+    """Model name override for this agent"""
+
+    timeout_seconds: Optional[float]
+    """Timeout in seconds for this agent"""
+
+
+class SpecBenchmarkDefinitionJobSpecOrchestratorConfig(TypedDict, total=False):
+    """Orchestrator configuration (optional overrides).
+
+    If not provided, default values will be used.
+    """
+
+    n_attempts: Optional[int]
+    """Number of retry attempts on failure (default: 0).
+
+    This is the retry policy for failed scenarios. Default is 0.
+    """
+
+    n_concurrent_trials: Optional[int]
+    """Number of concurrent trials to run (default: 1).
+
+    Controls parallelism for scenario execution. Default is 1.
+    """
+
+    quiet: Optional[bool]
+    """Suppress verbose output (default: false)"""
+
+    timeout_multiplier: Optional[float]
+    """Timeout multiplier for retries (default: 1.0).
+
+    Each retry will multiply the timeout by this factor.
+    """
+
+
+class SpecBenchmarkDefinitionJobSpec(TypedDict, total=False):
+    """Specifies a benchmark definition with runtime configuration.
+
+    The benchmark definition's scenarios will be executed using the provided agent and orchestrator configurations.
+    """
+
+    agent_configs: Required[Iterable[SpecBenchmarkDefinitionJobSpecAgentConfig]]
+    """Agent configurations to use for this run. Must specify at least one agent."""
+
+    benchmark_id: Required[str]
+    """ID of the benchmark definition to run.
+
+    The scenarios from this benchmark will be executed.
+    """
+
+    type: Required[Literal["benchmark"]]
+
+    orchestrator_config: Optional[SpecBenchmarkDefinitionJobSpecOrchestratorConfig]
+    """Orchestrator configuration (optional overrides).
+
+    If not provided, default values will be used.
+    """
+
+
+class SpecScenarioDefinitionJobSpecAgentConfigAgentEnvironment(TypedDict, total=False):
+    """Environment configuration to use for this agent"""
+
+    environment_variables: Optional[Dict[str, str]]
+    """Environment variables to set when launching the agent."""
+
+    secrets: Optional[Dict[str, str]]
+    """Secrets to inject as environment variables when launching the agent.
+
+    Map of environment variable names to secret IDs.
+    """
+
+
+class SpecScenarioDefinitionJobSpecAgentConfig(TypedDict, total=False):
+    """Configuration for an agent in a benchmark job"""
+
+    name: Required[str]
+    """Name of the agent"""
+
+    type: Required[Literal["job_agent"]]
+
+    agent_environment: Optional[SpecScenarioDefinitionJobSpecAgentConfigAgentEnvironment]
+    """Environment configuration to use for this agent"""
+
+    agent_id: Optional[str]
+    """ID of the agent to use (optional if agent exists by name)"""
+
+    kwargs: Optional[Dict[str, str]]
+    """Additional kwargs for agent configuration"""
+
+    model_name: Optional[str]
+    """Model name override for this agent"""
+
+    timeout_seconds: Optional[float]
+    """Timeout in seconds for this agent"""
+
+
+class SpecScenarioDefinitionJobSpecOrchestratorConfig(TypedDict, total=False):
+    """Orchestrator configuration (optional overrides).
+
+    If not provided, default values will be used.
+    """
+
+    n_attempts: Optional[int]
+    """Number of retry attempts on failure (default: 0).
+
+    This is the retry policy for failed scenarios. Default is 0.
+    """
+
+    n_concurrent_trials: Optional[int]
+    """Number of concurrent trials to run (default: 1).
+
+    Controls parallelism for scenario execution. Default is 1.
+    """
+
+    quiet: Optional[bool]
+    """Suppress verbose output (default: false)"""
+
+    timeout_multiplier: Optional[float]
+    """Timeout multiplier for retries (default: 1.0).
+
+    Each retry will multiply the timeout by this factor.
+    """
+
+
+class SpecScenarioDefinitionJobSpec(TypedDict, total=False):
+    """Specifies a set of scenarios with runtime configuration.
+
+    The  scenarios will be executed using the provided agent and orchestrator configurations.
+    """
+
+    agent_configs: Required[Iterable[SpecScenarioDefinitionJobSpecAgentConfig]]
+    """Agent configurations to use for this run. Must specify at least one agent."""
+
+    scenario_ids: Required[SequenceNotStr[str]]
+    """List of scenario IDs to execute"""
+
+    type: Required[Literal["scenarios"]]
+
+    orchestrator_config: Optional[SpecScenarioDefinitionJobSpecOrchestratorConfig]
+    """Orchestrator configuration (optional overrides).
+
+    If not provided, default values will be used.
+    """
+
+
+Spec: TypeAlias = Union[SpecHarborJobSpec, SpecBenchmarkDefinitionJobSpec, SpecScenarioDefinitionJobSpec]
diff --git a/src/runloop_api_client/types/benchmark_job_list_params.py b/src/runloop_api_client/types/benchmark_job_list_params.py
new file mode 100644
index 000000000..c0db8843c
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_job_list_params.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BenchmarkJobListParams"]
+
+
+class BenchmarkJobListParams(TypedDict, total=False):
+    limit: int
+    """The limit of items to return. Default is 20. Max is 5000."""
+
+    name: str
+    """Filter by name"""
+
+    starting_after: str
+    """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_job_list_view.py b/src/runloop_api_client/types/benchmark_job_list_view.py
new file mode 100644
index 000000000..5090fe8e8
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_job_list_view.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from .._models import BaseModel
+from .benchmark_job_view import BenchmarkJobView
+
+__all__ = ["BenchmarkJobListView"]
+
+
+class BenchmarkJobListView(BaseModel):
+    has_more: bool
+
+    jobs: List[BenchmarkJobView]
+    """List of BenchmarkJobs matching filter."""
+
+    remaining_count: int
+
+    total_count: int
diff --git a/src/runloop_api_client/types/benchmark_job_view.py b/src/runloop_api_client/types/benchmark_job_view.py
new file mode 100644
index 000000000..f245f33ac
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_job_view.py
@@ -0,0 +1,344 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+    "BenchmarkJobView",
+    "BenchmarkOutcome",
+    "BenchmarkOutcomeScenarioOutcome",
+    "BenchmarkOutcomeScenarioOutcomeFailureReason",
+    "InProgressRun",
+    "InProgressRunAgentConfig",
+    "InProgressRunAgentConfigExternalAPIAgentConfig",
+    "InProgressRunAgentConfigJobAgentConfig",
+    "InProgressRunAgentConfigJobAgentConfigAgentEnvironment",
+    "JobSource",
+    "JobSourceHarborJobSource",
+    "JobSourceBenchmarkDefJobSource",
+    "JobSourceScenariosJobSource",
+    "JobSpec",
+    "JobSpecAgentConfig",
+    "JobSpecAgentConfigAgentEnvironment",
+    "JobSpecOrchestratorConfig",
+]
+
+
+class BenchmarkOutcomeScenarioOutcomeFailureReason(BaseModel):
+    """Failure information if the scenario failed or timed out.
+
+    Contains exception type and message.
+    """
+
+    exception_message: str
+    """The exception message providing context"""
+
+    exception_type: str
+    """The exception class name (e.g., 'TimeoutException', 'AgentTimeoutError')"""
+
+
+class BenchmarkOutcomeScenarioOutcome(BaseModel):
+    """
+    Outcome data for a single scenario execution, including its final state and scoring results.
+    """
+
+    scenario_definition_id: str
+    """The ID of the scenario definition that was executed."""
+
+    scenario_name: str
+    """The name of the scenario."""
+
+    scenario_run_id: str
+    """The ID of the scenario run."""
+
+    state: Literal["COMPLETED", "FAILED", "TIMEOUT", "CANCELED"]
+    """The final state of the scenario execution."""
+
+    duration_ms: Optional[int] = None
+    """Duration of the scenario execution in milliseconds."""
+
+    failure_reason: Optional[BenchmarkOutcomeScenarioOutcomeFailureReason] = None
+    """Failure information if the scenario failed or timed out.
+
+    Contains exception type and message.
+    """
+
+    score: Optional[float] = None
+    """The score achieved for this scenario (0.0 to 1.0).
+
+    Only present if state is COMPLETED.
+    """
+
+
+class BenchmarkOutcome(BaseModel):
+    """
+    Outcome data for a single benchmark run within a benchmark job, representing results for one agent configuration.
+    """
+
+    agent_name: str
+    """The name of the agent configuration used."""
+
+    benchmark_run_id: str
+    """The ID of the benchmark run."""
+
+    n_completed: int
+    """Number of scenarios that completed successfully."""
+
+    n_failed: int
+    """Number of scenarios that failed."""
+
+    n_timeout: int
+    """Number of scenarios that timed out."""
+
+    scenario_outcomes: List[BenchmarkOutcomeScenarioOutcome]
+    """Detailed outcomes for each scenario in this benchmark run."""
+
+    average_score: Optional[float] = None
+    """Average score across all completed scenarios (0.0 to 1.0)."""
+
+    duration_ms: Optional[int] = None
+    """Total duration of the benchmark run in milliseconds."""
+
+    api_model_name: Optional[str] = FieldInfo(alias="model_name", default=None)
+    """The model name used by the agent."""
+
+
+class InProgressRunAgentConfigExternalAPIAgentConfig(BaseModel):
+    """Configuration for externally-driven benchmark runs via API"""
+
+    type: Literal["external_api"]
+
+    info: Optional[str] = None
+    """Placeholder for future external agent metadata"""
+
+
+class InProgressRunAgentConfigJobAgentConfigAgentEnvironment(BaseModel):
+    """Environment configuration to use for this agent"""
+
+    environment_variables: Optional[Dict[str, str]] = None
+    """Environment variables to set when launching the agent."""
+
+    secrets: Optional[Dict[str, str]] = None
+    """Secrets to inject as environment variables when launching the agent.
+
+    Map of environment variable names to secret IDs.
+    """
+
+
+class InProgressRunAgentConfigJobAgentConfig(BaseModel):
+    """Configuration for an agent in a benchmark job"""
+
+    name: str
+    """Name of the agent"""
+
+    type: Literal["job_agent"]
+
+    agent_environment: Optional[InProgressRunAgentConfigJobAgentConfigAgentEnvironment] = None
+    """Environment configuration to use for this agent"""
+
+    agent_id: Optional[str] = None
+    """ID of the agent to use (optional if agent exists by name)"""
+
+    kwargs: Optional[Dict[str, str]] = None
+    """Additional kwargs for agent configuration"""
+
+    api_model_name: Optional[str] = FieldInfo(alias="model_name", default=None)
+    """Model name override for this agent"""
+
+    timeout_seconds: Optional[float] = None
+    """Timeout in seconds for this agent"""
+
+
+InProgressRunAgentConfig: TypeAlias = Annotated[
+    Union[InProgressRunAgentConfigExternalAPIAgentConfig, InProgressRunAgentConfigJobAgentConfig, None],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InProgressRun(BaseModel):
+    """
+    A lightweight view of a benchmark run currently in progress, showing basic execution details without full outcome data.
+    """
+
+    benchmark_run_id: str
+    """The ID of the benchmark run."""
+
+    start_time_ms: int
+    """Start time (Unix milliseconds)."""
+
+    state: Literal["running", "canceled", "completed"]
+    """The current state of the run."""
+
+    agent_config: Optional[InProgressRunAgentConfig] = None
+    """Agent configuration used for this run.
+
+    Specifies whether the run was driven by an external API agent or a job-defined
+    agent.
+    """
+
+    duration_ms: Optional[int] = None
+    """Duration so far in milliseconds."""
+
+
+class JobSourceHarborJobSource(BaseModel):
+    """Harbor job source with inline YAML configuration"""
+
+    inline_yaml: str
+    """The Harbor job configuration as inline YAML content"""
+
+    type: Literal["harbor"]
+
+
+class JobSourceBenchmarkDefJobSource(BaseModel):
+    """Benchmark definition job source"""
+
+    benchmark_id: str
+    """The ID of the benchmark definition"""
+
+    type: Literal["benchmark"]
+
+    benchmark_name: Optional[str] = None
+    """Optional user-provided name for the benchmark definition"""
+
+
+class JobSourceScenariosJobSource(BaseModel):
+    """Scenarios job source with a list of scenario definition IDs"""
+
+    scenario_ids: List[str]
+    """List of scenario definition IDs to execute"""
+
+    type: Literal["scenarios"]
+
+
+JobSource: TypeAlias = Annotated[
+    Union[JobSourceHarborJobSource, JobSourceBenchmarkDefJobSource, JobSourceScenariosJobSource, None],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class JobSpecAgentConfigAgentEnvironment(BaseModel):
+    """Environment configuration to use for this agent"""
+
+    environment_variables: Optional[Dict[str, str]] = None
+    """Environment variables to set when launching the agent."""
+
+    secrets: Optional[Dict[str, str]] = None
+    """Secrets to inject as environment variables when launching the agent.
+
+    Map of environment variable names to secret IDs.
+    """
+
+
+class JobSpecAgentConfig(BaseModel):
+    """Configuration for an agent in a benchmark job"""
+
+    name: str
+    """Name of the agent"""
+
+    type: Literal["job_agent"]
+
+    agent_environment: Optional[JobSpecAgentConfigAgentEnvironment] = None
+    """Environment configuration to use for this agent"""
+
+    agent_id: Optional[str] = None
+    """ID of the agent to use (optional if agent exists by name)"""
+
+    kwargs: Optional[Dict[str, str]] = None
+    """Additional kwargs for agent configuration"""
+
+    api_model_name: Optional[str] = FieldInfo(alias="model_name", default=None)
+    """Model name override for this agent"""
+
+    timeout_seconds: Optional[float] = None
+    """Timeout in seconds for this agent"""
+
+
+class JobSpecOrchestratorConfig(BaseModel):
+    """Orchestrator configuration"""
+
+    n_attempts: Optional[int] = None
+    """Number of retry attempts on failure (default: 0).
+
+    This is the retry policy for failed scenarios. Default is 0.
+    """
+
+    n_concurrent_trials: Optional[int] = None
+    """Number of concurrent trials to run (default: 1).
+
+    Controls parallelism for scenario execution. Default is 1.
+    """
+
+    quiet: Optional[bool] = None
+    """Suppress verbose output (default: false)"""
+
+    timeout_multiplier: Optional[float] = None
+    """Timeout multiplier for retries (default: 1.0).
+
+    Each retry will multiply the timeout by this factor.
+    """
+
+
+class JobSpec(BaseModel):
+    """The resolved job specification.
+
+    Contains scenarios, agents, and orchestrator config.
+    """
+
+    agent_configs: List[JobSpecAgentConfig]
+    """Agent configurations for this job"""
+
+    scenario_ids: List[str]
+    """List of scenario IDs to execute"""
+
+    orchestrator_config: Optional[JobSpecOrchestratorConfig] = None
+    """Orchestrator configuration"""
+
+
+class BenchmarkJobView(BaseModel):
+    """
+    A BenchmarkJobView represents a benchmark job that runs a set of scenarios entirely on runloop.
+    """
+
+    id: str
+    """The ID of the BenchmarkJob."""
+
+    create_time_ms: int
+    """Timestamp when job was created (Unix milliseconds)."""
+
+    name: str
+    """The unique name of the BenchmarkJob."""
+
+    state: Literal["initializing", "queued", "running", "completed", "failed", "cancelled", "timeout"]
+    """The current state of the benchmark job."""
+
+    benchmark_outcomes: Optional[List[BenchmarkOutcome]] = None
+    """Detailed outcome data for each benchmark run created by this job.
+
+    Includes per-agent results and scenario-level details.
+    """
+
+    failure_reason: Optional[str] = None
+    """Failure reason if job failed."""
+
+    in_progress_runs: Optional[List[InProgressRun]] = None
+    """Benchmark runs currently in progress for this job.
+
+    Shows runs that have not yet completed.
+    """
+
+    job_source: Optional[JobSource] = None
+    """The source configuration that was used to create this job.
+
+    Either Harbor YAML or benchmark definition reference.
+    """
+
+    job_spec: Optional[JobSpec] = None
+    """The resolved job specification.
+
+    Contains scenarios, agents, and orchestrator config.
+    """
diff --git a/tests/api_resources/test_benchmark_jobs.py b/tests/api_resources/test_benchmark_jobs.py
new file mode 100644
index 000000000..461943458
--- /dev/null
+++ b/tests/api_resources/test_benchmark_jobs.py
@@ -0,0 +1,243 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from runloop_api_client import Runloop, AsyncRunloop
+from runloop_api_client.types import (
+    BenchmarkJobView,
+    BenchmarkJobListView,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBenchmarkJobs:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: Runloop) -> None:
+        benchmark_job = client.benchmark_jobs.create()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: Runloop) -> None:
+        benchmark_job = client.benchmark_jobs.create(
+            name="name",
+            spec={
+                "inline_yaml": "inline_yaml",
+                "type": "harbor",
+            },
+        )
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: Runloop) -> None:
+        response = client.benchmark_jobs.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = response.parse()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: Runloop) -> None:
+        with client.benchmark_jobs.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = response.parse()
+            assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: Runloop) -> None:
+        benchmark_job = client.benchmark_jobs.retrieve(
+            "id",
+        )
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Runloop) -> None:
+        response = client.benchmark_jobs.with_raw_response.retrieve(
+            "id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = response.parse()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Runloop) -> None:
+        with client.benchmark_jobs.with_streaming_response.retrieve(
+            "id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = response.parse()
+            assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Runloop) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+            client.benchmark_jobs.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Runloop) -> None:
+        benchmark_job = client.benchmark_jobs.list()
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Runloop) -> None:
+        benchmark_job = client.benchmark_jobs.list(
+            limit=0,
+            name="name",
+            starting_after="starting_after",
+        )
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Runloop) -> None:
+        response = client.benchmark_jobs.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = response.parse()
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Runloop) -> None:
+        with client.benchmark_jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = response.parse()
+            assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncBenchmarkJobs:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncRunloop) -> None:
+        benchmark_job = await async_client.benchmark_jobs.create()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -> None:
+        benchmark_job = await async_client.benchmark_jobs.create(
+            name="name",
+            spec={
+                "inline_yaml": "inline_yaml",
+                "type": "harbor",
+            },
+        )
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncRunloop) -> None:
+        response = await async_client.benchmark_jobs.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = await response.parse()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncRunloop) -> None:
+        async with async_client.benchmark_jobs.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = await response.parse()
+            assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncRunloop) -> None:
+        benchmark_job = await async_client.benchmark_jobs.retrieve(
+            "id",
+        )
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncRunloop) -> None:
+        response = await async_client.benchmark_jobs.with_raw_response.retrieve(
+            "id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = await response.parse()
+        assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncRunloop) -> None:
+        async with async_client.benchmark_jobs.with_streaming_response.retrieve(
+            "id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = await response.parse()
+            assert_matches_type(BenchmarkJobView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncRunloop) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+            await async_client.benchmark_jobs.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncRunloop) -> None:
+        benchmark_job = await async_client.benchmark_jobs.list()
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None:
+        benchmark_job = await async_client.benchmark_jobs.list(
+            limit=0,
+            name="name",
+            starting_after="starting_after",
+        )
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncRunloop) -> None:
+        response = await async_client.benchmark_jobs.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark_job = await response.parse()
+        assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncRunloop) -> None:
+        async with async_client.benchmark_jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark_job = await response.parse()
+            assert_matches_type(BenchmarkJobListView, benchmark_job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True