diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 7f3f5c846..d2d60a3df 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.23.0"
+  ".": "0.24.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index fb43e6c65..89dc8775e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 77
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-a3d91c690527ff6a9040ade46943ba56916987f1f7d1fb45a9974546770ffe97.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-cb8add05a7b418d6f8a5624be8477564853da49e8bf9671ae89b8ce49a04b6cd.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 835e12e70..a27f0172d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## 0.24.0 (2025-02-19)
+
+Full Changelog: [v0.23.0...v0.24.0](https://github.com/runloopai/api-client-python/compare/v0.23.0...v0.24.0)
+
+### Features
+
+* **api:** api update ([#549](https://github.com/runloopai/api-client-python/issues/549)) ([3e19f41](https://github.com/runloopai/api-client-python/commit/3e19f41cf8e2b9cd225439d9df90f80b34d89660))
+
+
+### Bug Fixes
+
+* asyncify on non-asyncio runtimes ([#547](https://github.com/runloopai/api-client-python/issues/547)) ([8ce7003](https://github.com/runloopai/api-client-python/commit/8ce700397fc5d6755f0a95b637efad74d6dc3fe4))
+
+
+### Chores
+
+* **internal:** codegen related update ([#543](https://github.com/runloopai/api-client-python/issues/543)) ([5c44e84](https://github.com/runloopai/api-client-python/commit/5c44e84a8a029b22461df339c280885dc972fa96))
+* **internal:** codegen related update ([#546](https://github.com/runloopai/api-client-python/issues/546)) ([d8b620d](https://github.com/runloopai/api-client-python/commit/d8b620d93c687d37a843262d5ac60fd4a9b815e8))
+* **internal:** update client tests ([#545](https://github.com/runloopai/api-client-python/issues/545)) ([30307f4](https://github.com/runloopai/api-client-python/commit/30307f4b78ba76240793b851dd735a0beb835b75))
+* **internal:** update client tests ([#548](https://github.com/runloopai/api-client-python/issues/548)) ([4e782f1](https://github.com/runloopai/api-client-python/commit/4e782f19bfa7e08ae318eca6f00fb4d5ffe5c9af))
+
 ## 0.23.0 (2025-02-11)
 
 Full Changelog: [v0.22.0...v0.23.0](https://github.com/runloopai/api-client-python/compare/v0.22.0...v0.23.0)
diff --git a/pyproject.toml b/pyproject.toml
index 0d8b9c708..1e703a82d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "runloop_api_client"
-version = "0.23.0"
+version = "0.24.0"
 description = "The official Python library for the runloop API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/runloop_api_client/_utils/_sync.py b/src/runloop_api_client/_utils/_sync.py
index 8b3aaf2b5..ad7ec71b7 100644
--- a/src/runloop_api_client/_utils/_sync.py
+++ b/src/runloop_api_client/_utils/_sync.py
@@ -7,16 +7,20 @@
 from typing import Any, TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
+import anyio
+import sniffio
+import anyio.to_thread
+
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
 if sys.version_info >= (3, 9):
-    to_thread = asyncio.to_thread
+    _asyncio_to_thread = asyncio.to_thread
 else:
     # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
     # for Python 3.8 support
-    async def to_thread(
+    async def _asyncio_to_thread(
         func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
     ) -> Any:
         """Asynchronously run function *func* in a separate thread.
@@ -34,6 +38,17 @@ async def to_thread(
         return await loop.run_in_executor(None, func_call)
 
 
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await _asyncio_to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
 # inspired by `asyncer`, https://github.com/tiangolo/asyncer
 def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
diff --git a/src/runloop_api_client/_version.py b/src/runloop_api_client/_version.py
index 83f733319..c03b20ee6 100644
--- a/src/runloop_api_client/_version.py
+++ b/src/runloop_api_client/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "runloop_api_client"
-__version__ = "0.23.0"  # x-release-please-version
+__version__ = "0.24.0"  # x-release-please-version
diff --git a/src/runloop_api_client/resources/benchmarks/benchmarks.py b/src/runloop_api_client/resources/benchmarks/benchmarks.py
index f1ea0ffe7..54501bfa3 100644
--- a/src/runloop_api_client/resources/benchmarks/benchmarks.py
+++ b/src/runloop_api_client/resources/benchmarks/benchmarks.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import httpx
 
@@ -69,6 +69,7 @@ def create(
         self,
         *,
         name: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         scenario_ids: Optional[List[str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -84,6 +85,8 @@ def create(
         Args:
           name: The name of the Benchmark.
 
+          metadata: User defined metadata to attach to the benchmark for organization.
+
           scenario_ids: The Scenario IDs that make up the Benchmark.
 
           extra_headers: Send extra headers
@@ -101,6 +104,7 @@ def create(
             body=maybe_transform(
                 {
                     "name": name,
+                    "metadata": metadata,
                     "scenario_ids": scenario_ids,
                 },
                 benchmark_create_params.BenchmarkCreateParams,
@@ -246,6 +250,7 @@ def start_run(
         self,
         *,
         benchmark_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -261,6 +266,8 @@ def start_run(
         Args:
           benchmark_id: ID of the Benchmark to run.
 
+          metadata: User defined metadata to attach to the benchmark run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -278,6 +285,7 @@ def start_run(
             body=maybe_transform(
                 {
                     "benchmark_id": benchmark_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 benchmark_start_run_params.BenchmarkStartRunParams,
@@ -321,6 +329,7 @@ async def create(
         self,
         *,
         name: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         scenario_ids: Optional[List[str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -336,6 +345,8 @@ async def create(
         Args:
           name: The name of the Benchmark.
 
+          metadata: User defined metadata to attach to the benchmark for organization.
+
           scenario_ids: The Scenario IDs that make up the Benchmark.
 
           extra_headers: Send extra headers
@@ -353,6 +364,7 @@ async def create(
             body=await async_maybe_transform(
                 {
                     "name": name,
+                    "metadata": metadata,
                     "scenario_ids": scenario_ids,
                 },
                 benchmark_create_params.BenchmarkCreateParams,
@@ -498,6 +510,7 @@ async def start_run(
         self,
         *,
         benchmark_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -513,6 +526,8 @@ async def start_run(
         Args:
           benchmark_id: ID of the Benchmark to run.
 
+          metadata: User defined metadata to attach to the benchmark run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -530,6 +545,7 @@ async def start_run(
             body=await async_maybe_transform(
                 {
                     "benchmark_id": benchmark_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 benchmark_start_run_params.BenchmarkStartRunParams,
diff --git a/src/runloop_api_client/resources/scenarios/scenarios.py b/src/runloop_api_client/resources/scenarios/scenarios.py
index ce7269441..54a3dfbbd 100644
--- a/src/runloop_api_client/resources/scenarios/scenarios.py
+++ b/src/runloop_api_client/resources/scenarios/scenarios.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 
 import httpx
 
@@ -88,6 +88,8 @@ def create(
         name: str,
         scoring_contract: ScoringContractParam,
         environment_parameters: Optional[ScenarioEnvironmentParam] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        reference_output: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -109,6 +111,12 @@ def create(
 
           environment_parameters: The Environment in which the Scenario will run.
 
+          metadata: User defined metadata to attach to the scenario for organization.
+
+          reference_output: A string representation of the reference output to solve the scenario. Commonly
+              can be the result of a git diff or a sequence of command actions to apply to the
+              environment.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -127,6 +135,8 @@ def create(
                     "name": name,
                     "scoring_contract": scoring_contract,
                     "environment_parameters": environment_parameters,
+                    "metadata": metadata,
+                    "reference_output": reference_output,
                 },
                 scenario_create_params.ScenarioCreateParams,
             ),
@@ -281,6 +291,7 @@ def start_run(
         *,
         scenario_id: str,
         benchmark_run_id: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -298,6 +309,8 @@ def start_run(
 
           benchmark_run_id: Benchmark to associate the run.
 
+          metadata: User defined metadata to attach to the run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -316,6 +329,7 @@ def start_run(
                 {
                     "scenario_id": scenario_id,
                     "benchmark_run_id": benchmark_run_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 scenario_start_run_params.ScenarioStartRunParams,
@@ -423,6 +437,8 @@ async def create(
         name: str,
         scoring_contract: ScoringContractParam,
         environment_parameters: Optional[ScenarioEnvironmentParam] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        reference_output: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -444,6 +460,12 @@ async def create(
 
           environment_parameters: The Environment in which the Scenario will run.
 
+          metadata: User defined metadata to attach to the scenario for organization.
+
+          reference_output: A string representation of the reference output to solve the scenario. Commonly
+              can be the result of a git diff or a sequence of command actions to apply to the
+              environment.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -462,6 +484,8 @@ async def create(
                     "name": name,
                     "scoring_contract": scoring_contract,
                     "environment_parameters": environment_parameters,
+                    "metadata": metadata,
+                    "reference_output": reference_output,
                 },
                 scenario_create_params.ScenarioCreateParams,
             ),
@@ -616,6 +640,7 @@ async def start_run(
         *,
         scenario_id: str,
         benchmark_run_id: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -633,6 +658,8 @@ async def start_run(
 
           benchmark_run_id: Benchmark to associate the run.
 
+          metadata: User defined metadata to attach to the run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -651,6 +678,7 @@ async def start_run(
                 {
                     "scenario_id": scenario_id,
                     "benchmark_run_id": benchmark_run_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 scenario_start_run_params.ScenarioStartRunParams,
diff --git a/src/runloop_api_client/types/benchmark_create_params.py b/src/runloop_api_client/types/benchmark_create_params.py
index 3fea3d2a0..1597e3768 100644
--- a/src/runloop_api_client/types/benchmark_create_params.py
+++ b/src/runloop_api_client/types/benchmark_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 from typing_extensions import Required, TypedDict
 
 __all__ = ["BenchmarkCreateParams"]
@@ -12,5 +12,8 @@ class BenchmarkCreateParams(TypedDict, total=False):
     name: Required[str]
     """The name of the Benchmark."""
 
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the benchmark for organization."""
+
     scenario_ids: Optional[List[str]]
     """The Scenario IDs that make up the Benchmark."""
diff --git a/src/runloop_api_client/types/benchmark_run_view.py b/src/runloop_api_client/types/benchmark_run_view.py
index 51af18a0f..9bfbd3858 100644
--- a/src/runloop_api_client/types/benchmark_run_view.py
+++ b/src/runloop_api_client/types/benchmark_run_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -15,6 +15,12 @@ class BenchmarkRunView(BaseModel):
     benchmark_id: str
     """The ID of the Benchmark."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the benchmark run for organization."""
+
+    pending_scenarios: List[str]
+    """List of Scenarios that need to be completed before benchmark can be completed."""
+
     start_time_ms: int
     """The time the benchmark run execution started (Unix timestamp milliseconds)."""
 
@@ -27,9 +33,6 @@ class BenchmarkRunView(BaseModel):
     name: Optional[str] = None
     """The name of the BenchmarkRun."""
 
-    pending_scenarios: Optional[List[str]] = None
-    """List of Scenarios that need to be completed before benchmark can be completed."""
-
     score: Optional[float] = None
     """The final score across the BenchmarkRun, present once completed.
 
diff --git a/src/runloop_api_client/types/benchmark_start_run_params.py b/src/runloop_api_client/types/benchmark_start_run_params.py
index 39d618669..9a0a70057 100644
--- a/src/runloop_api_client/types/benchmark_start_run_params.py
+++ b/src/runloop_api_client/types/benchmark_start_run_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Required, TypedDict
 
 __all__ = ["BenchmarkStartRunParams"]
@@ -12,5 +12,8 @@ class BenchmarkStartRunParams(TypedDict, total=False):
     benchmark_id: Required[str]
     """ID of the Benchmark to run."""
 
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the benchmark run for organization."""
+
     run_name: Optional[str]
     """Display name of the run."""
diff --git a/src/runloop_api_client/types/benchmark_view.py b/src/runloop_api_client/types/benchmark_view.py
index 071415151..7f2f1e1e3 100644
--- a/src/runloop_api_client/types/benchmark_view.py
+++ b/src/runloop_api_client/types/benchmark_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import Dict, List
 
 from pydantic import Field as FieldInfo
 
@@ -13,6 +13,9 @@ class BenchmarkView(BaseModel):
     id: str
     """The ID of the Benchmark."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the benchmark for organization."""
+
     name: str
     """The name of the Benchmark."""
 
diff --git a/src/runloop_api_client/types/scenario_create_params.py b/src/runloop_api_client/types/scenario_create_params.py
index 87a48b457..5bd2f3d90 100644
--- a/src/runloop_api_client/types/scenario_create_params.py
+++ b/src/runloop_api_client/types/scenario_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Required, TypedDict
 
 from .input_context_param import InputContextParam
@@ -24,3 +24,13 @@ class ScenarioCreateParams(TypedDict, total=False):
 
     environment_parameters: Optional[ScenarioEnvironmentParam]
     """The Environment in which the Scenario will run."""
+
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the scenario for organization."""
+
+    reference_output: Optional[str]
+    """A string representation of the reference output to solve the scenario.
+
+    Commonly can be the result of a git diff or a sequence of command actions to
+    apply to the environment.
+    """
diff --git a/src/runloop_api_client/types/scenario_environment.py b/src/runloop_api_client/types/scenario_environment.py
index 756ab33e1..97c2977fc 100644
--- a/src/runloop_api_client/types/scenario_environment.py
+++ b/src/runloop_api_client/types/scenario_environment.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from .._models import BaseModel
+from .shared.launch_parameters import LaunchParameters
 
 __all__ = ["ScenarioEnvironment"]
 
@@ -11,8 +12,17 @@ class ScenarioEnvironment(BaseModel):
     blueprint_id: Optional[str] = None
     """Use the blueprint with matching ID."""
 
+    launch_parameters: Optional[LaunchParameters] = None
+    """Optional launch parameters to apply to the devbox environment at launch."""
+
     prebuilt_id: Optional[str] = None
     """Use the prebuilt with matching ID."""
 
     snapshot_id: Optional[str] = None
     """Use the snapshot with matching ID."""
+
+    working_directory: Optional[str] = None
+    """The working directory where the agent is expected to fulfill the scenario.
+
+    Scoring functions also run from the working directory.
+    """
diff --git a/src/runloop_api_client/types/scenario_environment_param.py b/src/runloop_api_client/types/scenario_environment_param.py
index a59c8b8b1..3b749bb76 100644
--- a/src/runloop_api_client/types/scenario_environment_param.py
+++ b/src/runloop_api_client/types/scenario_environment_param.py
@@ -5,6 +5,8 @@
 from typing import Optional
 from typing_extensions import TypedDict
 
+from .shared_params.launch_parameters import LaunchParameters
+
 __all__ = ["ScenarioEnvironmentParam"]
 
 
@@ -12,8 +14,17 @@ class ScenarioEnvironmentParam(TypedDict, total=False):
     blueprint_id: Optional[str]
     """Use the blueprint with matching ID."""
 
+    launch_parameters: Optional[LaunchParameters]
+    """Optional launch parameters to apply to the devbox environment at launch."""
+
     prebuilt_id: Optional[str]
     """Use the prebuilt with matching ID."""
 
     snapshot_id: Optional[str]
     """Use the snapshot with matching ID."""
+
+    working_directory: Optional[str]
+    """The working directory where the agent is expected to fulfill the scenario.
+
+    Scoring functions also run from the working directory.
+    """
diff --git a/src/runloop_api_client/types/scenario_run_view.py b/src/runloop_api_client/types/scenario_run_view.py
index 89d64dfd1..893279409 100644
--- a/src/runloop_api_client/types/scenario_run_view.py
+++ b/src/runloop_api_client/types/scenario_run_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -16,6 +16,9 @@ class ScenarioRunView(BaseModel):
     devbox_id: str
     """ID of the Devbox on which the Scenario is running."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the scenario run for organization."""
+
     scenario_id: str
     """ID of the Scenario that has been run."""
 
diff --git a/src/runloop_api_client/types/scenario_start_run_params.py b/src/runloop_api_client/types/scenario_start_run_params.py
index 6704db386..f9161aa2f 100644
--- a/src/runloop_api_client/types/scenario_start_run_params.py
+++ b/src/runloop_api_client/types/scenario_start_run_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Required, TypedDict
 
 __all__ = ["ScenarioStartRunParams"]
@@ -15,5 +15,8 @@ class ScenarioStartRunParams(TypedDict, total=False):
     benchmark_run_id: Optional[str]
     """Benchmark to associate the run."""
 
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the run for organization."""
+
     run_name: Optional[str]
     """Display name of the run."""
diff --git a/src/runloop_api_client/types/scenario_view.py b/src/runloop_api_client/types/scenario_view.py
index d69c792b4..4ba20d9c9 100644
--- a/src/runloop_api_client/types/scenario_view.py
+++ b/src/runloop_api_client/types/scenario_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Optional
 
 from .._models import BaseModel
 from .input_context import InputContext
@@ -17,6 +17,9 @@ class ScenarioView(BaseModel):
     input_context: InputContext
     """The input context for the Scenario."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the scenario for organization."""
+
     name: str
     """The name of the Scenario."""
 
@@ -25,3 +28,10 @@ class ScenarioView(BaseModel):
 
     environment: Optional[ScenarioEnvironment] = None
     """The Environment in which the Scenario is run."""
+
+    reference_output: Optional[str] = None
+    """A string representation of the reference output to solve the scenario.
+
+    Commonly can be the result of a git diff or a sequence of command actions to
+    apply to the environment.
+    """
diff --git a/src/runloop_api_client/types/scoring_function.py b/src/runloop_api_client/types/scoring_function.py
index a5adc2b75..bef775eca 100644
--- a/src/runloop_api_client/types/scoring_function.py
+++ b/src/runloop_api_client/types/scoring_function.py
@@ -9,10 +9,15 @@
 
 class ScoringFunction(BaseModel):
     name: str
-    """Name of scoring function."""
+    """Name of scoring function. Names must only contain [a-zA-Z0-9_-]."""
 
     type: str
-    """Type of the scoring function. Defaults to bash script."""
+    """Type of the scoring function.
+
+    Use 'bash' as type and fill out 'bash_script' field for scoring via custom bash
+    scripts. Otherwise use a type corresponding to a custom scorer function or a
+    public Runloop scorer type.
+    """
 
     weight: float
     """Wight to apply to scoring function score.
@@ -23,8 +28,8 @@ class ScoringFunction(BaseModel):
     bash_script: Optional[str] = None
     """
     A single bash script that sets up the environment, scores, and prints the final
-    score to standard out. Score should be an integer between 0 and 100, and look
-    like "score=[0..100].
+    score to standard out. Score should be a float between 0.0 and 1.0, and look
+    like "score=[0.0..1.0].
     """
 
     scorer_params: Optional[object] = None
diff --git a/src/runloop_api_client/types/scoring_function_param.py b/src/runloop_api_client/types/scoring_function_param.py
index 1f101bad2..0caa61468 100644
--- a/src/runloop_api_client/types/scoring_function_param.py
+++ b/src/runloop_api_client/types/scoring_function_param.py
@@ -10,10 +10,15 @@
 
 class ScoringFunctionParam(TypedDict, total=False):
     name: Required[str]
-    """Name of scoring function."""
+    """Name of scoring function. Names must only contain [a-zA-Z0-9_-]."""
 
     type: Required[str]
-    """Type of the scoring function. Defaults to bash script."""
+    """Type of the scoring function.
+
+    Use 'bash' as type and fill out 'bash_script' field for scoring via custom bash
+    scripts. Otherwise use a type corresponding to a custom scorer function or a
+    public Runloop scorer type.
+    """
 
     weight: Required[float]
     """Wight to apply to scoring function score.
@@ -24,8 +29,8 @@ class ScoringFunctionParam(TypedDict, total=False):
     bash_script: Optional[str]
     """
     A single bash script that sets up the environment, scores, and prints the final
-    score to standard out. Score should be an integer between 0 and 100, and look
-    like "score=[0..100].
+    score to standard out. Score should be a float between 0.0 and 1.0, and look
+    like "score=[0.0..1.0].
     """
 
     scorer_params: Optional[object]
diff --git a/src/runloop_api_client/types/scoring_function_result_view.py b/src/runloop_api_client/types/scoring_function_result_view.py
index 7fcbcd50c..8f782df11 100644
--- a/src/runloop_api_client/types/scoring_function_result_view.py
+++ b/src/runloop_api_client/types/scoring_function_result_view.py
@@ -1,5 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
@@ -15,3 +16,6 @@ class ScoringFunctionResultView(BaseModel):
 
     scoring_function_name: str
     """Scoring function name that ran."""
+
+    state: Literal["unknown", "complete", "error"]
+    """The state of the scoring function application."""
diff --git a/tests/api_resources/scenarios/test_scorers.py b/tests/api_resources/scenarios/test_scorers.py
index f3cc6cb50..4c04211da 100644
--- a/tests/api_resources/scenarios/test_scorers.py
+++ b/tests/api_resources/scenarios/test_scorers.py
@@ -190,8 +190,19 @@ def test_method_validate_with_all_params(self, client: Runloop) -> None:
             scoring_context={},
             environment_parameters={
                 "blueprint_id": "blueprint_id",
+                "launch_parameters": {
+                    "after_idle": {
+                        "idle_time_seconds": 0,
+                        "on_idle": "shutdown",
+                    },
+                    "available_ports": [0],
+                    "keep_alive_time_seconds": 0,
+                    "launch_commands": ["string"],
+                    "resource_size_request": "SMALL",
+                },
                 "prebuilt_id": "prebuilt_id",
                 "snapshot_id": "snapshot_id",
+                "working_directory": "working_directory",
             },
         )
         assert_matches_type(ScorerValidateResponse, scorer, path=["response"])
@@ -400,8 +411,19 @@ async def test_method_validate_with_all_params(self, async_client: AsyncRunloop)
             scoring_context={},
             environment_parameters={
                 "blueprint_id": "blueprint_id",
+                "launch_parameters": {
+                    "after_idle": {
+                        "idle_time_seconds": 0,
+                        "on_idle": "shutdown",
+                    },
+                    "available_ports": [0],
+                    "keep_alive_time_seconds": 0,
+                    "launch_commands": ["string"],
+                    "resource_size_request": "SMALL",
+                },
                 "prebuilt_id": "prebuilt_id",
                 "snapshot_id": "snapshot_id",
+                "working_directory": "working_directory",
             },
         )
         assert_matches_type(ScorerValidateResponse, scorer, path=["response"])
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
index 82ed0bfa1..087dffb1e 100644
--- a/tests/api_resources/test_benchmarks.py
+++ b/tests/api_resources/test_benchmarks.py
@@ -32,6 +32,7 @@ def test_method_create(self, client: Runloop) -> None:
     def test_method_create_with_all_params(self, client: Runloop) -> None:
         benchmark = client.benchmarks.create(
             name="name",
+            metadata={"foo": "string"},
             scenario_ids=["string"],
         )
         assert_matches_type(BenchmarkView, benchmark, path=["response"])
@@ -175,6 +176,7 @@ def test_method_start_run(self, client: Runloop) -> None:
     def test_method_start_run_with_all_params(self, client: Runloop) -> None:
         benchmark = client.benchmarks.start_run(
             benchmark_id="benchmark_id",
+            metadata={"foo": "string"},
             run_name="run_name",
         )
         assert_matches_type(BenchmarkRunView, benchmark, path=["response"])
@@ -218,6 +220,7 @@ async def test_method_create(self, async_client: AsyncRunloop) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -> None:
         benchmark = await async_client.benchmarks.create(
             name="name",
+            metadata={"foo": "string"},
             scenario_ids=["string"],
         )
         assert_matches_type(BenchmarkView, benchmark, path=["response"])
@@ -361,6 +364,7 @@ async def test_method_start_run(self, async_client: AsyncRunloop) -> None:
     async def test_method_start_run_with_all_params(self, async_client: AsyncRunloop) -> None:
         benchmark = await async_client.benchmarks.start_run(
             benchmark_id="benchmark_id",
+            metadata={"foo": "string"},
             run_name="run_name",
         )
         assert_matches_type(BenchmarkRunView, benchmark, path=["response"])
diff --git a/tests/api_resources/test_scenarios.py b/tests/api_resources/test_scenarios.py
index 78b3cebc2..245da96bd 100644
--- a/tests/api_resources/test_scenarios.py
+++ b/tests/api_resources/test_scenarios.py
@@ -59,9 +59,22 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
             },
             environment_parameters={
                 "blueprint_id": "blueprint_id",
+                "launch_parameters": {
+                    "after_idle": {
+                        "idle_time_seconds": 0,
+                        "on_idle": "shutdown",
+                    },
+                    "available_ports": [0],
+                    "keep_alive_time_seconds": 0,
+                    "launch_commands": ["string"],
+                    "resource_size_request": "SMALL",
+                },
                 "prebuilt_id": "prebuilt_id",
                 "snapshot_id": "snapshot_id",
+                "working_directory": "working_directory",
             },
+            metadata={"foo": "string"},
+            reference_output="reference_output",
         )
         assert_matches_type(ScenarioView, scenario, path=["response"])
 
@@ -227,6 +240,7 @@ def test_method_start_run_with_all_params(self, client: Runloop) -> None:
         scenario = client.scenarios.start_run(
             scenario_id="scenario_id",
             benchmark_run_id="benchmark_run_id",
+            metadata={"foo": "string"},
             run_name="run_name",
         )
         assert_matches_type(ScenarioRunView, scenario, path=["response"])
@@ -297,9 +311,22 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
             },
             environment_parameters={
                 "blueprint_id": "blueprint_id",
+                "launch_parameters": {
+                    "after_idle": {
+                        "idle_time_seconds": 0,
+                        "on_idle": "shutdown",
+                    },
+                    "available_ports": [0],
+                    "keep_alive_time_seconds": 0,
+                    "launch_commands": ["string"],
+                    "resource_size_request": "SMALL",
+                },
                 "prebuilt_id": "prebuilt_id",
                 "snapshot_id": "snapshot_id",
+                "working_directory": "working_directory",
             },
+            metadata={"foo": "string"},
+            reference_output="reference_output",
         )
         assert_matches_type(ScenarioView, scenario, path=["response"])
 
@@ -465,6 +492,7 @@ async def test_method_start_run_with_all_params(self, async_client: AsyncRunloop
         scenario = await async_client.scenarios.start_run(
             scenario_id="scenario_id",
             benchmark_run_id="benchmark_run_id",
+            metadata={"foo": "string"},
             run_name="run_name",
         )
         assert_matches_type(ScenarioRunView, scenario, path=["response"])
diff --git a/tests/test_client.py b/tests/test_client.py
index 4e2d14a39..638f65a36 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -23,6 +23,7 @@
 
 from runloop_api_client import Runloop, AsyncRunloop, APIResponseValidationError
 from runloop_api_client._types import Omit
+from runloop_api_client._utils import maybe_transform
 from runloop_api_client._models import BaseModel, FinalRequestOptions
 from runloop_api_client._constants import RAW_RESPONSE_HEADER
 from runloop_api_client._exceptions import RunloopError, APIStatusError, APITimeoutError, APIResponseValidationError
@@ -32,6 +33,7 @@
     BaseClient,
     make_request_options,
 )
+from runloop_api_client.types.devbox_create_params import DevboxCreateParams
 
 from .utils import update_env
 
@@ -772,7 +774,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
         with pytest.raises(APITimeoutError):
             self.client.post(
                 "/v1/devboxes",
-                body=cast(object, dict()),
+                body=cast(object, maybe_transform({}, DevboxCreateParams)),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -787,7 +789,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
         with pytest.raises(APIStatusError):
             self.client.post(
                 "/v1/devboxes",
-                body=cast(object, dict()),
+                body=cast(object, maybe_transform({}, DevboxCreateParams)),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -1593,7 +1595,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APITimeoutError):
             await self.client.post(
                 "/v1/devboxes",
-                body=cast(object, dict()),
+                body=cast(object, maybe_transform({}, DevboxCreateParams)),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -1608,7 +1610,7 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APIStatusError):
             await self.client.post(
                 "/v1/devboxes",
-                body=cast(object, dict()),
+                body=cast(object, maybe_transform({}, DevboxCreateParams)),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )