From 71d1ad5ef21ef7b7b47b4da0ee12e183f3c3fa14 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Fri, 5 Dec 2025 16:56:57 -0800
Subject: [PATCH 01/31] init commit

---
 src/runloop_api_client/sdk/__init__.py        |   4 +
 src/runloop_api_client/sdk/async_.py          |  18 +
 .../sdk/async_scenario_builder.py             | 451 ++++++++++++++++++
 .../sdk/scenario_builder.py                   | 451 ++++++++++++++++++
 src/runloop_api_client/sdk/sync.py            |  18 +
 tests/sdk/test_async_scenario_builder.py      | 178 +++++++
 tests/sdk/test_scenario_builder.py            | 365 ++++++++++++++
 7 files changed, 1485 insertions(+)
 create mode 100644 src/runloop_api_client/sdk/async_scenario_builder.py
 create mode 100644 src/runloop_api_client/sdk/scenario_builder.py
 create mode 100644 tests/sdk/test_async_scenario_builder.py
 create mode 100644 tests/sdk/test_scenario_builder.py

diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py
index 1b95e585b..232ef9515 100644
--- a/src/runloop_api_client/sdk/__init__.py
+++ b/src/runloop_api_client/sdk/__init__.py
@@ -33,9 +33,11 @@
 from .async_blueprint import AsyncBlueprint
 from .async_execution import AsyncExecution
 from .execution_result import ExecutionResult
+from .scenario_builder import ScenarioBuilder
 from .async_scenario_run import AsyncScenarioRun
 from .async_storage_object import AsyncStorageObject
 from .async_execution_result import AsyncExecutionResult
+from .async_scenario_builder import AsyncScenarioBuilder
 
 __all__ = [
     # Main SDK entry points
@@ -71,6 +73,8 @@
     "AsyncScenario",
     "ScenarioRun",
     "AsyncScenarioRun",
+    "ScenarioBuilder",
+    "AsyncScenarioBuilder",
     "Scorer",
     "AsyncScorer",
     "Snapshot",
diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index b54e831a5..a3ef1ffe5 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -37,6 +37,7 @@
 from .async_blueprint import AsyncBlueprint
 from ..lib.context_loader import TarFilter, build_directory_tar
 from .async_storage_object import AsyncStorageObject
+from .async_scenario_builder import AsyncScenarioBuilder
 from ..types.object_create_params import ContentType
 from ..types.shared_params.agent_source import Git, Npm, Pip, Object
 
@@ -773,6 +774,13 @@ class AsyncScenarioOps:
         >>> scenario = runloop.scenario.from_id("scn-xxx")
         >>> run = await scenario.run()
         >>> scenarios = await runloop.scenario.list()
+
+    Example using builder:
+        >>> builder = runloop.scenario.builder("my-scenario")
+        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.with_problem_statement("Fix the bug")
+        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> scenario = await builder.push()
     """
 
     def __init__(self, client: AsyncRunloop) -> None:
@@ -783,6 +791,16 @@ def __init__(self, client: AsyncRunloop) -> None:
         """
         self._client = client
 
+    def builder(self, name: str) -> AsyncScenarioBuilder:
+        """Create a new scenario builder.
+
+        :param name: Name for the scenario
+        :type name: str
+        :return: A new AsyncScenarioBuilder instance
+        :rtype: AsyncScenarioBuilder
+        """
+        return AsyncScenarioBuilder(self._client, name)
+
     def from_id(self, scenario_id: str) -> AsyncScenario:
         """Get an AsyncScenario instance for an existing scenario ID.
 
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
new file mode 100644
index 000000000..1d650afe2
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -0,0 +1,451 @@
+"""AsyncScenarioBuilder for constructing scenarios with a fluent API."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Iterable, Optional
+from typing_extensions import Self, Literal, override
+
+from .._client import AsyncRunloop
+from .async_scenario import AsyncScenario
+from ..types.scoring_function_param import (
+    Scorer,
+    ScoringFunctionParam,
+    ScorerCustomScoringFunction,
+    ScorerAstGrepScoringFunction,
+    ScorerCommandScoringFunction,
+    ScorerTestBasedScoringFunction,
+    ScorerBashScriptScoringFunction,
+    ScorerPythonScriptScoringFunction,
+    ScorerTestBasedScoringFunctionTestFile,
+)
+
+
+class AsyncScenarioBuilder:
+    """Async builder for constructing scenarios with a fluent API.
+
+    Provides a step-by-step interface for configuring all aspects of a scenario
+    before pushing it to the platform.
+
+    Example:
+        >>> builder = sdk.scenario.builder("my-scenario")
+        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.with_working_directory("/app")
+        >>> builder.with_problem_statement("Fix the bug in main.py")
+        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> scenario = await builder.push()
+    """
+
+    def __init__(self, client: AsyncRunloop, name: str) -> None:
+        """Initialize the builder.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param name: Name for the scenario
+        :type name: str
+        """
+        self._client = client
+        self._name = name
+
+        # Environment configuration
+        self._blueprint_id: Optional[str] = None
+        self._snapshot_id: Optional[str] = None
+        self._working_directory: Optional[str] = None
+
+        # Input context
+        self._problem_statement: Optional[str] = None
+        self._additional_context: Optional[object] = None
+
+        # Scoring
+        self._scorers: List[ScoringFunctionParam] = []
+
+        # Metadata and other options
+        self._metadata: Optional[Dict[str, str]] = None
+        self._reference_output: Optional[str] = None
+        self._required_env_vars: Optional[List[str]] = None
+        self._required_secrets: Optional[List[str]] = None
+        self._validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] = None
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncScenarioBuilder name={self._name!r}>"
+
+    @property
+    def name(self) -> str:
+        """Return the scenario name.
+
+        :return: Scenario name
+        :rtype: str
+        """
+        return self._name
+
+    def from_blueprint_id(self, blueprint_id: str) -> Self:
+        """Set the blueprint ID for the scenario environment.
+
+        :param blueprint_id: Blueprint ID to use
+        :type blueprint_id: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._blueprint_id = blueprint_id
+        self._snapshot_id = None  # Clear snapshot if blueprint is set
+        return self
+
+    def from_snapshot_id(self, snapshot_id: str) -> Self:
+        """Set the snapshot ID for the scenario environment.
+
+        :param snapshot_id: Snapshot ID to use
+        :type snapshot_id: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._snapshot_id = snapshot_id
+        self._blueprint_id = None  # Clear blueprint if snapshot is set
+        return self
+
+    def with_working_directory(self, directory: str) -> Self:
+        """Set the working directory for the scenario.
+
+        :param directory: Working directory path
+        :type directory: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._working_directory = directory
+        return self
+
+    def with_problem_statement(self, statement: str) -> Self:
+        """Set the problem statement for the scenario.
+
+        :param statement: Problem statement text
+        :type statement: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._problem_statement = statement
+        return self
+
+    def with_additional_context(self, context: object) -> Self:
+        """Set additional structured context for the scenario.
+
+        :param context: Additional context (JSON-serializable)
+        :type context: object
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._additional_context = context
+        return self
+
+    def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self:
+        """Internal helper to add a scorer to the list.
+
+        :raises ValueError: If weight is not positive
+        """
+        if weight <= 0:
+            raise ValueError(f"Scorer weight must be positive, got {weight}")
+        self._scorers.append({"name": name, "weight": weight, "scorer": scorer})
+        return self
+
+    def add_test_scorer(
+        self,
+        name: str,
+        *,
+        test_command: str,
+        weight: float = 1.0,
+        test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] = None,
+    ) -> Self:
+        """Add a test-based scorer that runs a test command.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param test_command: Command to run tests (e.g., "pytest")
+        :type test_command: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param test_files: Optional test files to create before running
+        :type test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerTestBasedScoringFunction = {
+            "type": "test_based_scorer",
+            "test_command": test_command,
+        }
+        if test_files:
+            scorer["test_files"] = test_files
+        return self._add_scorer(name, weight, scorer)
+
+    def add_command_scorer(
+        self,
+        name: str,
+        *,
+        command: str,
+        weight: float = 1.0,
+    ) -> Self:
+        """Add a command scorer that runs a shell command.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param command: Shell command to execute
+        :type command: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerCommandScoringFunction = {
+            "type": "command_scorer",
+            "command": command,
+        }
+        return self._add_scorer(name, weight, scorer)
+
+    def add_bash_scorer(
+        self,
+        name: str,
+        *,
+        bash_script: str,
+        weight: float = 1.0,
+    ) -> Self:
+        """Add a bash script scorer.
+
+        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param bash_script: Bash script content
+        :type bash_script: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerBashScriptScoringFunction = {
+            "type": "bash_script_scorer",
+            "bash_script": bash_script,
+        }
+        return self._add_scorer(name, weight, scorer)
+
+    def add_python_scorer(
+        self,
+        name: str,
+        *,
+        python_script: str,
+        weight: float = 1.0,
+        python_version_constraint: Optional[str] = None,
+        requirements_contents: Optional[str] = None,
+    ) -> Self:
+        """Add a Python script scorer.
+
+        The script should print the score (0.0-1.0) to stdout.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param python_script: Python script content
+        :type python_script: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param python_version_constraint: Python version (default "==3.12.10")
+        :type python_version_constraint: Optional[str]
+        :param requirements_contents: pip requirements.txt content
+        :type requirements_contents: Optional[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerPythonScriptScoringFunction = {
+            "type": "python_script_scorer",
+            "python_script": python_script,
+        }
+        if python_version_constraint:
+            scorer["python_version_constraint"] = python_version_constraint
+        if requirements_contents:
+            scorer["requirements_contents"] = requirements_contents
+        return self._add_scorer(name, weight, scorer)
+
+    def add_ast_grep_scorer(
+        self,
+        name: str,
+        *,
+        pattern: str,
+        weight: float = 1.0,
+        search_directory: str = ".",
+        lang: Optional[str] = None,
+    ) -> Self:
+        """Add an AST grep scorer that matches code patterns.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param pattern: AST pattern to match
+        :type pattern: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param search_directory: Directory to search (default ".")
+        :type search_directory: str
+        :param lang: Language of the pattern (optional)
+        :type lang: Optional[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerAstGrepScoringFunction = {
+            "type": "ast_grep_scorer",
+            "pattern": pattern,
+            "search_directory": search_directory,
+        }
+        if lang:
+            scorer["lang"] = lang
+        return self._add_scorer(name, weight, scorer)
+
+    def add_custom_scorer(
+        self,
+        name: str,
+        *,
+        custom_scorer_type: str,
+        weight: float = 1.0,
+        scorer_params: Optional[object] = None,
+    ) -> Self:
+        """Add a custom scorer registered with Runloop.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param custom_scorer_type: Type identifier registered with Runloop
+        :type custom_scorer_type: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param scorer_params: Additional JSON parameters for the scorer
+        :type scorer_params: Optional[object]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerCustomScoringFunction = {
+            "type": "custom_scorer",
+            "custom_scorer_type": custom_scorer_type,
+        }
+        if scorer_params:
+            scorer["scorer_params"] = scorer_params
+        return self._add_scorer(name, weight, scorer)
+
+    def with_metadata(self, metadata: Dict[str, str]) -> Self:
+        """Set metadata for the scenario.
+
+        :param metadata: Key-value metadata
+        :type metadata: Dict[str, str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._metadata = metadata
+        return self
+
+    def with_reference_output(self, output: str) -> Self:
+        """Set the reference output/solution for validation.
+
+        :param output: Reference output (e.g., git diff)
+        :type output: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._reference_output = output
+        return self
+
+    def with_required_env_vars(self, env_vars: List[str]) -> Self:
+        """Set required environment variables.
+
+        :param env_vars: List of required environment variable names
+        :type env_vars: List[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._required_env_vars = env_vars
+        return self
+
+    def with_required_secrets(self, secrets: List[str]) -> Self:
+        """Set required secrets.
+
+        :param secrets: List of required secret names
+        :type secrets: List[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._required_secrets = secrets
+        return self
+
+    def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self:
+        """Set the validation strategy.
+
+        :param validation_type: Validation type
+        :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._validation_type = validation_type
+        return self
+
+    def _build_params(self) -> Dict[str, Any]:
+        """Build the scenario creation parameters.
+
+        Weights are automatically normalized to sum to 1.0.
+
+        :raises ValueError: If required fields are missing
+        :return: Parameters for scenario creation
+        :rtype: Dict[str, Any]
+        """
+        if not self._problem_statement:
+            raise ValueError("Problem statement is required. Call with_problem_statement() first.")
+
+        if not self._scorers:
+            raise ValueError(
+                "At least one scorer is required. "
+                "Call add_test_scorer(), add_bash_scorer(), or another scorer method first."
+            )
+
+        # Normalize weights to sum to 1.0
+        total_weight = sum(s["weight"] for s in self._scorers)
+        normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+
+        params: Dict[str, Any] = {
+            "name": self._name,
+            "input_context": {
+                "problem_statement": self._problem_statement,
+            },
+            "scoring_contract": {
+                "scoring_function_parameters": normalized_scorers,
+            },
+        }
+
+        # Add additional context if set
+        if self._additional_context is not None:
+            params["input_context"]["additional_context"] = self._additional_context
+
+        # Build environment parameters if any are set
+        env_params: Dict[str, Any] = {}
+        if self._blueprint_id:
+            env_params["blueprint_id"] = self._blueprint_id
+        if self._snapshot_id:
+            env_params["snapshot_id"] = self._snapshot_id
+        if self._working_directory:
+            env_params["working_directory"] = self._working_directory
+
+        if env_params:
+            params["environment_parameters"] = env_params
+
+        # Add optional fields
+        if self._metadata:
+            params["metadata"] = self._metadata
+        if self._reference_output:
+            params["reference_output"] = self._reference_output
+        if self._required_env_vars:
+            params["required_environment_variables"] = self._required_env_vars
+        if self._required_secrets:
+            params["required_secret_names"] = self._required_secrets
+        if self._validation_type:
+            params["validation_type"] = self._validation_type
+
+        return params
+
+    async def push(self) -> AsyncScenario:
+        """Create the scenario on the platform.
+
+        :raises ValueError: If required fields are missing
+        :return: Created scenario wrapper
+        :rtype: AsyncScenario
+        """
+        params = self._build_params()
+        scenario_view = await self._client.scenarios.create(**params)
+        return AsyncScenario(self._client, scenario_view.id)
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
new file mode 100644
index 000000000..49e693113
--- /dev/null
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -0,0 +1,451 @@
+"""ScenarioBuilder for constructing scenarios with a fluent API."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Iterable, Optional
+from typing_extensions import Self, Literal, override
+
+from .._client import Runloop
+from .scenario import Scenario
+from ..types.scoring_function_param import (
+    Scorer,
+    ScoringFunctionParam,
+    ScorerCustomScoringFunction,
+    ScorerAstGrepScoringFunction,
+    ScorerCommandScoringFunction,
+    ScorerTestBasedScoringFunction,
+    ScorerBashScriptScoringFunction,
+    ScorerPythonScriptScoringFunction,
+    ScorerTestBasedScoringFunctionTestFile,
+)
+
+
+class ScenarioBuilder:
+    """Builder for constructing scenarios with a fluent API.
+
+    Provides a step-by-step interface for configuring all aspects of a scenario
+    before pushing it to the platform.
+
+    Example:
+        >>> builder = sdk.scenario.builder("my-scenario")
+        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.with_working_directory("/app")
+        >>> builder.with_problem_statement("Fix the bug in main.py")
+        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> scenario = builder.push()
+    """
+
+    def __init__(self, client: Runloop, name: str) -> None:
+        """Initialize the builder.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        :param name: Name for the scenario
+        :type name: str
+        """
+        self._client = client
+        self._name = name
+
+        # Environment configuration
+        self._blueprint_id: Optional[str] = None
+        self._snapshot_id: Optional[str] = None
+        self._working_directory: Optional[str] = None
+
+        # Input context
+        self._problem_statement: Optional[str] = None
+        self._additional_context: Optional[object] = None
+
+        # Scoring
+        self._scorers: List[ScoringFunctionParam] = []
+
+        # Metadata and other options
+        self._metadata: Optional[Dict[str, str]] = None
+        self._reference_output: Optional[str] = None
+        self._required_env_vars: Optional[List[str]] = None
+        self._required_secrets: Optional[List[str]] = None
+        self._validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] = None
+
+    @override
+    def __repr__(self) -> str:
+        return f"<ScenarioBuilder name={self._name!r}>"
+
+    @property
+    def name(self) -> str:
+        """Return the scenario name.
+
+        :return: Scenario name
+        :rtype: str
+        """
+        return self._name
+
+    def from_blueprint_id(self, blueprint_id: str) -> Self:
+        """Set the blueprint ID for the scenario environment.
+
+        :param blueprint_id: Blueprint ID to use
+        :type blueprint_id: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._blueprint_id = blueprint_id
+        self._snapshot_id = None  # Clear snapshot if blueprint is set
+        return self
+
+    def from_snapshot_id(self, snapshot_id: str) -> Self:
+        """Set the snapshot ID for the scenario environment.
+
+        :param snapshot_id: Snapshot ID to use
+        :type snapshot_id: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._snapshot_id = snapshot_id
+        self._blueprint_id = None  # Clear blueprint if snapshot is set
+        return self
+
+    def with_working_directory(self, directory: str) -> Self:
+        """Set the working directory for the scenario.
+
+        :param directory: Working directory path
+        :type directory: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._working_directory = directory
+        return self
+
+    def with_problem_statement(self, statement: str) -> Self:
+        """Set the problem statement for the scenario.
+
+        :param statement: Problem statement text
+        :type statement: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._problem_statement = statement
+        return self
+
+    def with_additional_context(self, context: object) -> Self:
+        """Set additional structured context for the scenario.
+
+        :param context: Additional context (JSON-serializable)
+        :type context: object
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._additional_context = context
+        return self
+
+    def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self:
+        """Internal helper to add a scorer to the list.
+
+        :raises ValueError: If weight is not positive
+        """
+        if weight <= 0:
+            raise ValueError(f"Scorer weight must be positive, got {weight}")
+        self._scorers.append({"name": name, "weight": weight, "scorer": scorer})
+        return self
+
+    def add_test_scorer(
+        self,
+        name: str,
+        *,
+        test_command: str,
+        weight: float = 1.0,
+        test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] = None,
+    ) -> Self:
+        """Add a test-based scorer that runs a test command.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param test_command: Command to run tests (e.g., "pytest")
+        :type test_command: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param test_files: Optional test files to create before running
+        :type test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerTestBasedScoringFunction = {
+            "type": "test_based_scorer",
+            "test_command": test_command,
+        }
+        if test_files:
+            scorer["test_files"] = test_files
+        return self._add_scorer(name, weight, scorer)
+
+    def add_command_scorer(
+        self,
+        name: str,
+        *,
+        command: str,
+        weight: float = 1.0,
+    ) -> Self:
+        """Add a command scorer that runs a shell command.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param command: Shell command to execute
+        :type command: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerCommandScoringFunction = {
+            "type": "command_scorer",
+            "command": command,
+        }
+        return self._add_scorer(name, weight, scorer)
+
+    def add_bash_scorer(
+        self,
+        name: str,
+        *,
+        bash_script: str,
+        weight: float = 1.0,
+    ) -> Self:
+        """Add a bash script scorer.
+
+        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param bash_script: Bash script content
+        :type bash_script: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerBashScriptScoringFunction = {
+            "type": "bash_script_scorer",
+            "bash_script": bash_script,
+        }
+        return self._add_scorer(name, weight, scorer)
+
+    def add_python_scorer(
+        self,
+        name: str,
+        *,
+        python_script: str,
+        weight: float = 1.0,
+        python_version_constraint: Optional[str] = None,
+        requirements_contents: Optional[str] = None,
+    ) -> Self:
+        """Add a Python script scorer.
+
+        The script should print the score (0.0-1.0) to stdout.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param python_script: Python script content
+        :type python_script: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param python_version_constraint: Python version (default "==3.12.10")
+        :type python_version_constraint: Optional[str]
+        :param requirements_contents: pip requirements.txt content
+        :type requirements_contents: Optional[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerPythonScriptScoringFunction = {
+            "type": "python_script_scorer",
+            "python_script": python_script,
+        }
+        if python_version_constraint:
+            scorer["python_version_constraint"] = python_version_constraint
+        if requirements_contents:
+            scorer["requirements_contents"] = requirements_contents
+        return self._add_scorer(name, weight, scorer)
+
+    def add_ast_grep_scorer(
+        self,
+        name: str,
+        *,
+        pattern: str,
+        weight: float = 1.0,
+        search_directory: str = ".",
+        lang: Optional[str] = None,
+    ) -> Self:
+        """Add an AST grep scorer that matches code patterns.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param pattern: AST pattern to match
+        :type pattern: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param search_directory: Directory to search (default ".")
+        :type search_directory: str
+        :param lang: Language of the pattern (optional)
+        :type lang: Optional[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerAstGrepScoringFunction = {
+            "type": "ast_grep_scorer",
+            "pattern": pattern,
+            "search_directory": search_directory,
+        }
+        if lang:
+            scorer["lang"] = lang
+        return self._add_scorer(name, weight, scorer)
+
+    def add_custom_scorer(
+        self,
+        name: str,
+        *,
+        custom_scorer_type: str,
+        weight: float = 1.0,
+        scorer_params: Optional[object] = None,
+    ) -> Self:
+        """Add a custom scorer registered with Runloop.
+
+        :param name: Name of the scoring function
+        :type name: str
+        :param custom_scorer_type: Type identifier registered with Runloop
+        :type custom_scorer_type: str
+        :param weight: Weight for this scorer (normalized automatically)
+        :type weight: float
+        :param scorer_params: Additional JSON parameters for the scorer
+        :type scorer_params: Optional[object]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        scorer: ScorerCustomScoringFunction = {
+            "type": "custom_scorer",
+            "custom_scorer_type": custom_scorer_type,
+        }
+        if scorer_params:
+            scorer["scorer_params"] = scorer_params
+        return self._add_scorer(name, weight, scorer)
+
+    def with_metadata(self, metadata: Dict[str, str]) -> Self:
+        """Set metadata for the scenario.
+
+        :param metadata: Key-value metadata
+        :type metadata: Dict[str, str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._metadata = metadata
+        return self
+
+    def with_reference_output(self, output: str) -> Self:
+        """Set the reference output/solution for validation.
+
+        :param output: Reference output (e.g., git diff)
+        :type output: str
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._reference_output = output
+        return self
+
+    def with_required_env_vars(self, env_vars: List[str]) -> Self:
+        """Set required environment variables.
+
+        :param env_vars: List of required environment variable names
+        :type env_vars: List[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._required_env_vars = env_vars
+        return self
+
+    def with_required_secrets(self, secrets: List[str]) -> Self:
+        """Set required secrets.
+
+        :param secrets: List of required secret names
+        :type secrets: List[str]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._required_secrets = secrets
+        return self
+
+    def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self:
+        """Set the validation strategy.
+
+        :param validation_type: Validation type
+        :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]
+        :return: Self for method chaining
+        :rtype: Self
+        """
+        self._validation_type = validation_type
+        return self
+
+    def _build_params(self) -> Dict[str, Any]:
+        """Build the scenario creation parameters.
+
+        Weights are automatically normalized to sum to 1.0.
+
+        :raises ValueError: If required fields are missing
+        :return: Parameters for scenario creation
+        :rtype: Dict[str, Any]
+        """
+        if not self._problem_statement:
+            raise ValueError("Problem statement is required. Call with_problem_statement() first.")
+
+        if not self._scorers:
+            raise ValueError(
+                "At least one scorer is required. "
+                "Call add_test_scorer(), add_bash_scorer(), or another scorer method first."
+            )
+
+        # Normalize weights to sum to 1.0
+        total_weight = sum(s["weight"] for s in self._scorers)
+        normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+
+        params: Dict[str, Any] = {
+            "name": self._name,
+            "input_context": {
+                "problem_statement": self._problem_statement,
+            },
+            "scoring_contract": {
+                "scoring_function_parameters": normalized_scorers,
+            },
+        }
+
+        # Add additional context if set
+        if self._additional_context is not None:
+            params["input_context"]["additional_context"] = self._additional_context
+
+        # Build environment parameters if any are set
+        env_params: Dict[str, Any] = {}
+        if self._blueprint_id:
+            env_params["blueprint_id"] = self._blueprint_id
+        if self._snapshot_id:
+            env_params["snapshot_id"] = self._snapshot_id
+        if self._working_directory:
+            env_params["working_directory"] = self._working_directory
+
+        if env_params:
+            params["environment_parameters"] = env_params
+
+        # Add optional fields
+        if self._metadata:
+            params["metadata"] = self._metadata
+        if self._reference_output:
+            params["reference_output"] = self._reference_output
+        if self._required_env_vars:
+            params["required_environment_variables"] = self._required_env_vars
+        if self._required_secrets:
+            params["required_secret_names"] = self._required_secrets
+        if self._validation_type:
+            params["validation_type"] = self._validation_type
+
+        return params
+
+    def push(self) -> Scenario:
+        """Create the scenario on the platform.
+
+        :raises ValueError: If required fields are missing
+        :return: Created scenario wrapper
+        :rtype: Scenario
+        """
+        params = self._build_params()
+        scenario_view = self._client.scenarios.create(**params)
+        return Scenario(self._client, scenario_view.id)
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 6b38b5091..28e13e404 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -36,6 +36,7 @@
 from .blueprint import Blueprint
 from .storage_object import StorageObject
 from ..lib.context_loader import TarFilter, build_directory_tar
+from .scenario_builder import ScenarioBuilder
 from ..types.object_create_params import ContentType
 from ..types.shared_params.agent_source import Git, Npm, Pip, Object
 
@@ -794,6 +795,13 @@ class ScenarioOps:
         >>> scenario = runloop.scenario.from_id("scn-xxx")
         >>> run = scenario.run()
         >>> scenarios = runloop.scenario.list()
+
+    Example using builder:
+        >>> builder = runloop.scenario.builder("my-scenario")
+        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.with_problem_statement("Fix the bug")
+        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> scenario = builder.push()
     """
 
     def __init__(self, client: Runloop) -> None:
@@ -804,6 +812,16 @@ def __init__(self, client: Runloop) -> None:
         """
         self._client = client
 
+    def builder(self, name: str) -> ScenarioBuilder:
+        """Create a new scenario builder.
+
+        :param name: Name for the scenario
+        :type name: str
+        :return: A new ScenarioBuilder instance
+        :rtype: ScenarioBuilder
+        """
+        return ScenarioBuilder(self._client, name)
+
     def from_id(self, scenario_id: str) -> Scenario:
         """Get a Scenario instance for an existing scenario ID.
 
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
new file mode 100644
index 000000000..3ea9d2631
--- /dev/null
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -0,0 +1,178 @@
+"""Unit tests for AsyncScenarioBuilder class."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder
+
+
+class TestAsyncScenarioBuilder:
+    """Tests for the asynchronous AsyncScenarioBuilder."""
+
+    @pytest.fixture
+    def mock_async_client(self) -> MagicMock:
+        """Create a mock AsyncRunloop client."""
+        client = MagicMock()
+        client.scenarios = MagicMock()
+        client.scenarios.create = AsyncMock()
+        return client
+
+    @pytest.fixture
+    def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
+        """Create an AsyncScenarioBuilder instance with mock client."""
+        return AsyncScenarioBuilder(mock_async_client, "test-scenario")
+
+    def test_init(self, mock_async_client: MagicMock) -> None:
+        """Test builder initialization."""
+        builder = AsyncScenarioBuilder(mock_async_client, "my-scenario")
+
+        assert builder._client is mock_async_client
+        assert builder._name == "my-scenario"
+        assert builder.name == "my-scenario"
+
+    def test_repr(self, builder: AsyncScenarioBuilder) -> None:
+        """Test builder __repr__."""
+        assert repr(builder) == "<AsyncScenarioBuilder name='test-scenario'>"
+
+    def test_from_blueprint_id_returns_self(self, builder: AsyncScenarioBuilder) -> None:
+        """Test from_blueprint_id returns self for chaining."""
+        result = builder.from_blueprint_id("bp-123")
+
+        assert result is builder
+        assert builder._blueprint_id == "bp-123"
+        assert builder._snapshot_id is None
+
+    def test_from_snapshot_id_returns_self(self, builder: AsyncScenarioBuilder) -> None:
+        """Test from_snapshot_id returns self for chaining."""
+        result = builder.from_snapshot_id("snap-123")
+
+        assert result is builder
+        assert builder._snapshot_id == "snap-123"
+        assert builder._blueprint_id is None
+
+    def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None:
+        """Test with_working_directory returns self for chaining."""
+        result = builder.with_working_directory("/app")
+
+        assert result is builder
+        assert builder._working_directory == "/app"
+
+    def test_with_problem_statement_returns_self(self, builder: AsyncScenarioBuilder) -> None:
+        """Test with_problem_statement returns self for chaining."""
+        result = builder.with_problem_statement("Fix the bug")
+
+        assert result is builder
+        assert builder._problem_statement == "Fix the bug"
+
+    def test_add_test_scorer(self, builder: AsyncScenarioBuilder) -> None:
+        """Test add_test_scorer method."""
+        result = builder.add_test_scorer(
+            "my-tests",
+            test_command="pytest",
+            weight=2.0,
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["name"] == "my-tests"
+        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
+
+    def test_add_command_scorer(self, builder: AsyncScenarioBuilder) -> None:
+        """Test add_command_scorer method."""
+        result = builder.add_command_scorer(
+            "cmd-scorer",
+            command="./check.sh",
+        )
+
+        assert result is builder
+        assert builder._scorers[0]["scorer"]["type"] == "command_scorer"
+
+    def test_add_bash_scorer(self, builder: AsyncScenarioBuilder) -> None:
+        """Test add_bash_scorer method."""
+        result = builder.add_bash_scorer(
+            "bash-scorer",
+            bash_script="echo 'score=1.0'",
+        )
+
+        assert result is builder
+        assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer"
+
+    def test_build_params_missing_problem_statement(self, builder: AsyncScenarioBuilder) -> None:
+        """Test _build_params raises if problem statement is missing."""
+        builder.add_test_scorer("test", test_command="pytest")
+
+        with pytest.raises(ValueError, match="Problem statement is required"):
+            builder._build_params()
+
+    def test_build_params_missing_scorer(self, builder: AsyncScenarioBuilder) -> None:
+        """Test _build_params raises if no scorers are added."""
+        builder.with_problem_statement("Fix the bug")
+
+        with pytest.raises(ValueError, match="At least one scorer is required"):
+            builder._build_params()
+
+    def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None:
+        """Test _build_params with minimal configuration."""
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+
+        params = builder._build_params()
+
+        assert params["name"] == "test-scenario"
+        assert params["input_context"]["problem_statement"] == "Fix the bug"
+        assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
+
+    def test_build_params_with_environment(self, builder: AsyncScenarioBuilder) -> None:
+        """Test _build_params includes environment parameters."""
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+        builder.from_blueprint_id("bp-123")
+        builder.with_working_directory("/app")
+
+        params = builder._build_params()
+
+        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
+        assert params["environment_parameters"]["working_directory"] == "/app"
+
+    @pytest.mark.asyncio
+    async def test_push_calls_api_and_returns_scenario(
+        self, builder: AsyncScenarioBuilder, mock_async_client: MagicMock
+    ) -> None:
+        """Test push() calls API with correct params and returns AsyncScenario."""
+        mock_async_client.scenarios.create.return_value.id = "scn-new-123"
+
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+
+        scenario = await builder.push()
+
+        mock_async_client.scenarios.create.assert_called_once()
+        call_kwargs = mock_async_client.scenarios.create.call_args.kwargs
+        assert call_kwargs["name"] == "test-scenario"
+        assert call_kwargs["input_context"]["problem_statement"] == "Fix the bug"
+
+        assert scenario.id == "scn-new-123"
+
+    def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None:
+        """Test that all builder methods can be chained fluently."""
+        result = (
+            builder.from_blueprint_id("bp-123")
+            .with_working_directory("/app")
+            .with_problem_statement("Fix the bug")
+            .with_additional_context({"hint": "check main.py"})
+            .add_test_scorer("tests", test_command="pytest")
+            .with_metadata({"team": "infra"})
+            .with_reference_output("diff content")
+            .with_required_env_vars(["API_KEY"])
+            .with_required_secrets(["secret"])
+            .with_validation_type("FORWARD")
+        )
+
+        assert result is builder
+        assert builder._blueprint_id == "bp-123"
+        assert builder._working_directory == "/app"
+        assert builder._problem_statement == "Fix the bug"
+        assert len(builder._scorers) == 1
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
new file mode 100644
index 000000000..a2941abbc
--- /dev/null
+++ b/tests/sdk/test_scenario_builder.py
@@ -0,0 +1,365 @@
+"""Unit tests for ScenarioBuilder class."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from runloop_api_client.sdk.scenario_builder import ScenarioBuilder
+from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
+
+
+class TestScenarioBuilder:
+    """Tests for the synchronous ScenarioBuilder."""
+
+    @pytest.fixture
+    def mock_client(self) -> MagicMock:
+        """Create a mock Runloop client."""
+        client = MagicMock()
+        return client
+
+    @pytest.fixture
+    def builder(self, mock_client: MagicMock) -> ScenarioBuilder:
+        """Create a ScenarioBuilder instance with mock client."""
+        return ScenarioBuilder(mock_client, "test-scenario")
+
+    def test_init(self, mock_client: MagicMock) -> None:
+        """Test builder initialization."""
+        builder = ScenarioBuilder(mock_client, "my-scenario")
+
+        assert builder._client is mock_client
+        assert builder._name == "my-scenario"
+        assert builder.name == "my-scenario"
+
+    def test_repr(self, builder: ScenarioBuilder) -> None:
+        """Test builder __repr__."""
+        assert repr(builder) == "<ScenarioBuilder name='test-scenario'>"
+
+    def test_from_blueprint_id_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test from_blueprint_id returns self for chaining."""
+        result = builder.from_blueprint_id("bp-123")
+
+        assert result is builder
+        assert builder._blueprint_id == "bp-123"
+        assert builder._snapshot_id is None
+
+    def test_from_snapshot_id_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test from_snapshot_id returns self for chaining."""
+        result = builder.from_snapshot_id("snap-123")
+
+        assert result is builder
+        assert builder._snapshot_id == "snap-123"
+        assert builder._blueprint_id is None
+
+    def test_from_blueprint_clears_snapshot(self, builder: ScenarioBuilder) -> None:
+        """Test that setting blueprint clears snapshot."""
+        builder.from_snapshot_id("snap-123")
+        builder.from_blueprint_id("bp-123")
+
+        assert builder._blueprint_id == "bp-123"
+        assert builder._snapshot_id is None
+
+    def test_from_snapshot_clears_blueprint(self, builder: ScenarioBuilder) -> None:
+        """Test that setting snapshot clears blueprint."""
+        builder.from_blueprint_id("bp-123")
+        builder.from_snapshot_id("snap-123")
+
+        assert builder._snapshot_id == "snap-123"
+        assert builder._blueprint_id is None
+
+    def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_working_directory returns self for chaining."""
+        result = builder.with_working_directory("/app")
+
+        assert result is builder
+        assert builder._working_directory == "/app"
+
+    def test_with_problem_statement_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_problem_statement returns self for chaining."""
+        result = builder.with_problem_statement("Fix the bug")
+
+        assert result is builder
+        assert builder._problem_statement == "Fix the bug"
+
+    def test_with_additional_context_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_additional_context returns self for chaining."""
+        context = {"hint": "Look at line 42"}
+        result = builder.with_additional_context(context)
+
+        assert result is builder
+        assert builder._additional_context == context
+
+    def test_add_test_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_test_scorer method."""
+        result = builder.add_test_scorer(
+            "my-tests",
+            test_command="pytest",
+            weight=2.0,
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["name"] == "my-tests"
+        assert builder._scorers[0]["weight"] == 2.0
+        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
+        assert "test_command" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["test_command"] == "pytest"
+
+    def test_add_test_scorer_with_files(self, builder: ScenarioBuilder) -> None:
+        """Test add_test_scorer with test files."""
+        test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
+            {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
+        ]
+        result = builder.add_test_scorer("tests", test_command="pytest", test_files=test_files)
+
+        assert result is builder
+        assert "test_files" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["test_files"] == test_files
+
+    def test_add_command_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_command_scorer method."""
+        result = builder.add_command_scorer(
+            "cmd-scorer",
+            command="./check.sh",
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["scorer"]["type"] == "command_scorer"
+        assert "command" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["command"] == "./check.sh"
+
+    def test_add_bash_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_bash_scorer method."""
+        result = builder.add_bash_scorer(
+            "bash-scorer",
+            bash_script="echo 'score=1.0'",
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer"
+        assert "bash_script" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["bash_script"] == "echo 'score=1.0'"
+
+    def test_add_python_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_python_scorer method."""
+        result = builder.add_python_scorer(
+            "python-scorer",
+            python_script="print('score=1.0')",
+            python_version_constraint=">=3.10",
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["scorer"]["type"] == "python_script_scorer"
+        assert builder._scorers[0]["scorer"]["python_script"] == "print('score=1.0')"
+        assert "python_version_constraint" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["python_version_constraint"] == ">=3.10"
+
+    def test_add_ast_grep_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_ast_grep_scorer method."""
+        result = builder.add_ast_grep_scorer(
+            "ast-scorer",
+            pattern="$A.foo()",
+            search_directory="/src",
+            lang="python",
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["scorer"]["type"] == "ast_grep_scorer"
+        assert builder._scorers[0]["scorer"]["pattern"] == "$A.foo()"
+        assert builder._scorers[0]["scorer"]["search_directory"] == "/src"
+        assert "lang" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["lang"] == "python"
+
+    def test_add_custom_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test add_custom_scorer method."""
+        result = builder.add_custom_scorer(
+            "custom-scorer",
+            custom_scorer_type="my_custom_scorer",
+            scorer_params={"threshold": 0.5},
+        )
+
+        assert result is builder
+        assert len(builder._scorers) == 1
+        assert builder._scorers[0]["scorer"]["type"] == "custom_scorer"
+        assert builder._scorers[0]["scorer"]["custom_scorer_type"] == "my_custom_scorer"
+        assert "scorer_params" in builder._scorers[0]["scorer"]
+        assert builder._scorers[0]["scorer"]["scorer_params"] == {"threshold": 0.5}
+
+    def test_add_multiple_scorers(self, builder: ScenarioBuilder) -> None:
+        """Test adding multiple scorers."""
+        builder.add_test_scorer("test1", test_command="pytest", weight=1.0)
+        builder.add_command_scorer("test2", command="./check.sh", weight=2.0)
+
+        assert len(builder._scorers) == 2
+        assert builder._scorers[0]["name"] == "test1"
+        assert builder._scorers[1]["name"] == "test2"
+
+    def test_add_scorer_rejects_zero_weight(self, builder: ScenarioBuilder) -> None:
+        """Test that adding a scorer with zero weight raises ValueError."""
+        with pytest.raises(ValueError, match="Scorer weight must be positive"):
+            builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0)
+
+    def test_add_scorer_rejects_negative_weight(self, builder: ScenarioBuilder) -> None:
+        """Test that adding a scorer with negative weight raises ValueError."""
+        with pytest.raises(ValueError, match="Scorer weight must be positive"):
+            builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0)
+
+    def test_with_metadata_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_metadata returns self for chaining."""
+        result = builder.with_metadata({"team": "infra"})
+
+        assert result is builder
+        assert builder._metadata == {"team": "infra"}
+
+    def test_with_reference_output_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_reference_output returns self for chaining."""
+        result = builder.with_reference_output("--- a/file.py\n+++ b/file.py")
+
+        assert result is builder
+        assert builder._reference_output == "--- a/file.py\n+++ b/file.py"
+
+    def test_with_required_env_vars_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_required_env_vars returns self for chaining."""
+        result = builder.with_required_env_vars(["API_KEY", "SECRET"])
+
+        assert result is builder
+        assert builder._required_env_vars == ["API_KEY", "SECRET"]
+
+    def test_with_required_secrets_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_required_secrets returns self for chaining."""
+        result = builder.with_required_secrets(["db_password"])
+
+        assert result is builder
+        assert builder._required_secrets == ["db_password"]
+
+    def test_with_validation_type_returns_self(self, builder: ScenarioBuilder) -> None:
+        """Test with_validation_type returns self for chaining."""
+        result = builder.with_validation_type("FORWARD")
+
+        assert result is builder
+        assert builder._validation_type == "FORWARD"
+
+    def test_build_params_missing_problem_statement(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params raises if problem statement is missing."""
+        builder.add_test_scorer("test", test_command="pytest")
+
+        with pytest.raises(ValueError, match="Problem statement is required"):
+            builder._build_params()
+
+    def test_build_params_missing_scorer(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params raises if no scorers are added."""
+        builder.with_problem_statement("Fix the bug")
+
+        with pytest.raises(ValueError, match="At least one scorer is required.*add_test_scorer"):
+            builder._build_params()
+
+    def test_build_params_minimal(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params with minimal configuration."""
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+
+        params = builder._build_params()
+
+        assert params["name"] == "test-scenario"
+        assert params["input_context"]["problem_statement"] == "Fix the bug"
+        assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
+
+    def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params includes environment parameters."""
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+        builder.from_blueprint_id("bp-123")
+        builder.with_working_directory("/app")
+
+        params = builder._build_params()
+
+        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
+        assert params["environment_parameters"]["working_directory"] == "/app"
+
+    def test_build_params_with_all_options(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params with all optional fields set."""
+        builder.with_problem_statement("Fix the bug")
+        builder.with_additional_context({"hint": "line 42"})
+        builder.add_test_scorer("tests", test_command="pytest")
+        builder.from_blueprint_id("bp-123")
+        builder.with_working_directory("/app")
+        builder.with_metadata({"team": "infra"})
+        builder.with_reference_output("diff content")
+        builder.with_required_env_vars(["API_KEY"])
+        builder.with_required_secrets(["db_pass"])
+        builder.with_validation_type("FORWARD")
+
+        params = builder._build_params()
+
+        assert params["name"] == "test-scenario"
+        assert params["input_context"]["problem_statement"] == "Fix the bug"
+        assert params["input_context"]["additional_context"] == {"hint": "line 42"}
+        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
+        assert params["environment_parameters"]["working_directory"] == "/app"
+        assert params["metadata"] == {"team": "infra"}
+        assert params["reference_output"] == "diff content"
+        assert params["required_environment_variables"] == ["API_KEY"]
+        assert params["required_secret_names"] == ["db_pass"]
+        assert params["validation_type"] == "FORWARD"
+
+    def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None:
+        """Test that _build_params normalizes scorer weights to sum to 1.0."""
+        builder.with_problem_statement("Fix the bug")
+        builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0)
+
+        params = builder._build_params()
+        scorers = params["scoring_contract"]["scoring_function_parameters"]
+
+        # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
+        assert len(scorers) == 3
+        assert abs(scorers[0]["weight"] - 1 / 6) < 0.0001
+        assert abs(scorers[1]["weight"] - 2 / 6) < 0.0001
+        assert abs(scorers[2]["weight"] - 3 / 6) < 0.0001
+
+        # Total should be 1.0
+        total = sum(s["weight"] for s in scorers)
+        assert abs(total - 1.0) < 0.0001
+
+    def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, mock_client: MagicMock) -> None:
+        """Test push() calls API with correct params and returns Scenario."""
+        mock_client.scenarios.create.return_value.id = "scn-new-123"
+
+        builder.with_problem_statement("Fix the bug")
+        builder.add_test_scorer("tests", test_command="pytest")
+
+        scenario = builder.push()
+
+        mock_client.scenarios.create.assert_called_once()
+        call_kwargs = mock_client.scenarios.create.call_args.kwargs
+        assert call_kwargs["name"] == "test-scenario"
+        assert call_kwargs["input_context"]["problem_statement"] == "Fix the bug"
+
+        assert scenario.id == "scn-new-123"
+
+    def test_fluent_chaining(self, builder: ScenarioBuilder) -> None:
+        """Test that all builder methods can be chained fluently."""
+        result = (
+            builder.from_blueprint_id("bp-123")
+            .with_working_directory("/app")
+            .with_problem_statement("Fix the bug")
+            .with_additional_context({"hint": "check main.py"})
+            .add_test_scorer("tests", test_command="pytest")
+            .with_metadata({"team": "infra"})
+            .with_reference_output("diff content")
+            .with_required_env_vars(["API_KEY"])
+            .with_required_secrets(["secret"])
+            .with_validation_type("FORWARD")
+        )
+
+        assert result is builder
+        assert builder._blueprint_id == "bp-123"
+        assert builder._working_directory == "/app"
+        assert builder._problem_statement == "Fix the bug"
+        assert len(builder._scorers) == 1

From 853a998f0a16044754fcaba47c23aae4642fe0aa Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Mon, 8 Dec 2025 14:42:14 -0800
Subject: [PATCH 02/31] formatting fix

---
 src/runloop_api_client/sdk/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 28e13e404..151ce5c73 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -35,8 +35,8 @@
 from .snapshot import Snapshot
 from .blueprint import Blueprint
 from .storage_object import StorageObject
-from ..lib.context_loader import TarFilter, build_directory_tar
 from .scenario_builder import ScenarioBuilder
+from ..lib.context_loader import TarFilter, build_directory_tar
 from ..types.object_create_params import ContentType
 from ..types.shared_params.agent_source import Git, Npm, Pip, Object
 

From 1630a87335c8670cf5db8948f03ed7a8913fce4f Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 12:16:46 -0800
Subject: [PATCH 03/31] clean up imports in scenario ops unit tests

---
 tests/sdk/test_async_ops.py |  9 ++-------
 tests/sdk/test_ops.py       | 10 ++--------
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py
index b276f29ee..49c4c4006 100644
--- a/tests/sdk/test_async_ops.py
+++ b/tests/sdk/test_async_ops.py
@@ -24,6 +24,7 @@
     AsyncAgent,
     AsyncDevbox,
     AsyncScorer,
+    AsyncScenario,
     AsyncSnapshot,
     AsyncBlueprint,
     AsyncStorageObject,
@@ -33,6 +34,7 @@
     AsyncDevboxOps,
     AsyncScorerOps,
     AsyncRunloopSDK,
+    AsyncScenarioOps,
     AsyncSnapshotOps,
     AsyncBlueprintOps,
     AsyncStorageObjectOps,
@@ -1122,8 +1124,6 @@ class TestAsyncScenarioOps:
 
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
-        from runloop_api_client.sdk import AsyncScenario
-        from runloop_api_client.sdk.async_ import AsyncScenarioOps
 
         ops = AsyncScenarioOps(mock_async_client)
         scenario = ops.from_id("scn_123")
@@ -1134,7 +1134,6 @@ def test_from_id(self, mock_async_client: AsyncMock) -> None:
     @pytest.mark.asyncio
     async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
         """Test list method with empty results."""
-        from runloop_api_client.sdk.async_ import AsyncScenarioOps
 
         async def async_iter():
             return
@@ -1151,8 +1150,6 @@ async def async_iter():
     @pytest.mark.asyncio
     async def test_list_single(self, mock_async_client: AsyncMock, scenario_view: MockScenarioView) -> None:
         """Test list method with single result."""
-        from runloop_api_client.sdk import AsyncScenario
-        from runloop_api_client.sdk.async_ import AsyncScenarioOps
 
         async def async_iter():
             yield scenario_view
@@ -1170,8 +1167,6 @@ async def async_iter():
     @pytest.mark.asyncio
     async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
         """Test list method with multiple results."""
-        from runloop_api_client.sdk import AsyncScenario
-        from runloop_api_client.sdk.async_ import AsyncScenarioOps
 
         scenario_view1 = MockScenarioView(id="scn_001", name="scenario-1")
         scenario_view2 = MockScenarioView(id="scn_002", name="scenario-2")
diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py
index fcca7bcbf..724b2ee17 100644
--- a/tests/sdk/test_ops.py
+++ b/tests/sdk/test_ops.py
@@ -20,12 +20,13 @@
     MockBlueprintView,
     create_mock_httpx_response,
 )
-from runloop_api_client.sdk import Agent, Devbox, Scorer, Snapshot, Blueprint, StorageObject
+from runloop_api_client.sdk import Agent, Devbox, Scorer, Scenario, Snapshot, Blueprint, StorageObject
 from runloop_api_client.sdk.sync import (
     AgentOps,
     DevboxOps,
     ScorerOps,
     RunloopSDK,
+    ScenarioOps,
     SnapshotOps,
     BlueprintOps,
     StorageObjectOps,
@@ -1015,8 +1016,6 @@ class TestScenarioOps:
 
     def test_from_id(self, mock_client: Mock) -> None:
         """Test from_id method."""
-        from runloop_api_client.sdk import Scenario
-        from runloop_api_client.sdk.sync import ScenarioOps
 
         ops = ScenarioOps(mock_client)
         scenario = ops.from_id("scn_123")
@@ -1026,7 +1025,6 @@ def test_from_id(self, mock_client: Mock) -> None:
 
     def test_list_empty(self, mock_client: Mock) -> None:
         """Test list method with empty results."""
-        from runloop_api_client.sdk.sync import ScenarioOps
 
         mock_client.scenarios.list.return_value = []
 
@@ -1038,8 +1036,6 @@ def test_list_empty(self, mock_client: Mock) -> None:
 
     def test_list_single(self, mock_client: Mock, scenario_view: MockScenarioView) -> None:
         """Test list method with single result."""
-        from runloop_api_client.sdk import Scenario
-        from runloop_api_client.sdk.sync import ScenarioOps
 
         mock_client.scenarios.list.return_value = [scenario_view]
 
@@ -1053,8 +1049,6 @@ def test_list_single(self, mock_client: Mock, scenario_view: MockScenarioView) -
 
     def test_list_multiple(self, mock_client: Mock) -> None:
         """Test list method with multiple results."""
-        from runloop_api_client.sdk import Scenario
-        from runloop_api_client.sdk.sync import ScenarioOps
 
         scenario_view1 = MockScenarioView(id="scn_001", name="scenario-1")
         scenario_view2 = MockScenarioView(id="scn_002", name="scenario-2")

From f3145a9c20e564e9d4f60a88329d9af8a3d70d80 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 13:27:18 -0800
Subject: [PATCH 04/31] use Blueprint and Snapshot objects directly in
 ScenarioBuilder

---
 .../sdk/async_scenario_builder.py             | 40 ++++++-----
 .../sdk/scenario_builder.py                   | 40 ++++++-----
 tests/sdk/test_async_scenario_builder.py      | 42 +++++++----
 tests/sdk/test_scenario_builder.py            | 70 ++++++++++++-------
 4 files changed, 112 insertions(+), 80 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 1d650afe2..cabd51e5b 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -7,6 +7,8 @@
 
 from .._client import AsyncRunloop
 from .async_scenario import AsyncScenario
+from .async_snapshot import AsyncSnapshot
+from .async_blueprint import AsyncBlueprint
 from ..types.scoring_function_param import (
     Scorer,
     ScoringFunctionParam,
@@ -28,7 +30,7 @@ class AsyncScenarioBuilder:
 
     Example:
         >>> builder = sdk.scenario.builder("my-scenario")
-        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.from_blueprint(blueprint)
         >>> builder.with_working_directory("/app")
         >>> builder.with_problem_statement("Fix the bug in main.py")
         >>> builder.add_test_scorer("tests", test_command="pytest")
@@ -47,8 +49,8 @@ def __init__(self, client: AsyncRunloop, name: str) -> None:
         self._name = name
 
         # Environment configuration
-        self._blueprint_id: Optional[str] = None
-        self._snapshot_id: Optional[str] = None
+        self._blueprint: Optional[AsyncBlueprint] = None
+        self._snapshot: Optional[AsyncSnapshot] = None
         self._working_directory: Optional[str] = None
 
         # Input context
@@ -78,28 +80,28 @@ def name(self) -> str:
         """
         return self._name
 
-    def from_blueprint_id(self, blueprint_id: str) -> Self:
-        """Set the blueprint ID for the scenario environment.
+    def from_blueprint(self, blueprint: AsyncBlueprint) -> Self:
+        """Set the blueprint for the scenario environment.
 
-        :param blueprint_id: Blueprint ID to use
-        :type blueprint_id: str
+        :param blueprint: Blueprint to use
+        :type blueprint: AsyncBlueprint
         :return: Self for method chaining
         :rtype: Self
         """
-        self._blueprint_id = blueprint_id
-        self._snapshot_id = None  # Clear snapshot if blueprint is set
+        self._blueprint = blueprint
+        self._snapshot = None  # Clear snapshot if blueprint is set
         return self
 
-    def from_snapshot_id(self, snapshot_id: str) -> Self:
-        """Set the snapshot ID for the scenario environment.
+    def from_snapshot(self, snapshot: AsyncSnapshot) -> Self:
+        """Set the snapshot for the scenario environment.
 
-        :param snapshot_id: Snapshot ID to use
-        :type snapshot_id: str
+        :param snapshot: Snapshot to use
+        :type snapshot: AsyncSnapshot
         :return: Self for method chaining
         :rtype: Self
         """
-        self._snapshot_id = snapshot_id
-        self._blueprint_id = None  # Clear blueprint if snapshot is set
+        self._snapshot = snapshot
+        self._blueprint = None  # Clear blueprint if snapshot is set
         return self
 
     def with_working_directory(self, directory: str) -> Self:
@@ -415,10 +417,10 @@ def _build_params(self) -> Dict[str, Any]:
 
         # Build environment parameters if any are set
         env_params: Dict[str, Any] = {}
-        if self._blueprint_id:
-            env_params["blueprint_id"] = self._blueprint_id
-        if self._snapshot_id:
-            env_params["snapshot_id"] = self._snapshot_id
+        if self._blueprint:
+            env_params["blueprint_id"] = self._blueprint.id
+        if self._snapshot:
+            env_params["snapshot_id"] = self._snapshot.id
         if self._working_directory:
             env_params["working_directory"] = self._working_directory
 
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 49e693113..842980b4f 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -7,6 +7,8 @@
 
 from .._client import Runloop
 from .scenario import Scenario
+from .snapshot import Snapshot
+from .blueprint import Blueprint
 from ..types.scoring_function_param import (
     Scorer,
     ScoringFunctionParam,
@@ -28,7 +30,7 @@ class ScenarioBuilder:
 
     Example:
         >>> builder = sdk.scenario.builder("my-scenario")
-        >>> builder.from_blueprint_id("bp-xxx")
+        >>> builder.from_blueprint(blueprint)
         >>> builder.with_working_directory("/app")
         >>> builder.with_problem_statement("Fix the bug in main.py")
         >>> builder.add_test_scorer("tests", test_command="pytest")
@@ -47,8 +49,8 @@ def __init__(self, client: Runloop, name: str) -> None:
         self._name = name
 
         # Environment configuration
-        self._blueprint_id: Optional[str] = None
-        self._snapshot_id: Optional[str] = None
+        self._blueprint: Optional[Blueprint] = None
+        self._snapshot: Optional[Snapshot] = None
         self._working_directory: Optional[str] = None
 
         # Input context
@@ -78,28 +80,28 @@ def name(self) -> str:
         """
         return self._name
 
-    def from_blueprint_id(self, blueprint_id: str) -> Self:
-        """Set the blueprint ID for the scenario environment.
+    def from_blueprint(self, blueprint: Blueprint) -> Self:
+        """Set the blueprint for the scenario environment.
 
-        :param blueprint_id: Blueprint ID to use
-        :type blueprint_id: str
+        :param blueprint: Blueprint to use
+        :type blueprint: Blueprint
         :return: Self for method chaining
         :rtype: Self
         """
-        self._blueprint_id = blueprint_id
-        self._snapshot_id = None  # Clear snapshot if blueprint is set
+        self._blueprint = blueprint
+        self._snapshot = None  # Clear snapshot if blueprint is set
         return self
 
-    def from_snapshot_id(self, snapshot_id: str) -> Self:
-        """Set the snapshot ID for the scenario environment.
+    def from_snapshot(self, snapshot: Snapshot) -> Self:
+        """Set the snapshot for the scenario environment.
 
-        :param snapshot_id: Snapshot ID to use
-        :type snapshot_id: str
+        :param snapshot: Snapshot to use
+        :type snapshot: Snapshot
         :return: Self for method chaining
         :rtype: Self
         """
-        self._snapshot_id = snapshot_id
-        self._blueprint_id = None  # Clear blueprint if snapshot is set
+        self._snapshot = snapshot
+        self._blueprint = None  # Clear blueprint if snapshot is set
         return self
 
     def with_working_directory(self, directory: str) -> Self:
@@ -415,10 +417,10 @@ def _build_params(self) -> Dict[str, Any]:
 
         # Build environment parameters if any are set
         env_params: Dict[str, Any] = {}
-        if self._blueprint_id:
-            env_params["blueprint_id"] = self._blueprint_id
-        if self._snapshot_id:
-            env_params["snapshot_id"] = self._snapshot_id
+        if self._blueprint:
+            env_params["blueprint_id"] = self._blueprint.id
+        if self._snapshot:
+            env_params["snapshot_id"] = self._snapshot.id
         if self._working_directory:
             env_params["working_directory"] = self._working_directory
 
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 3ea9d2631..64ed2ba0c 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -6,6 +6,8 @@
 
 import pytest
 
+from runloop_api_client.sdk.async_snapshot import AsyncSnapshot
+from runloop_api_client.sdk.async_blueprint import AsyncBlueprint
 from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder
 
 
@@ -20,6 +22,16 @@ def mock_async_client(self) -> MagicMock:
         client.scenarios.create = AsyncMock()
         return client
 
+    @pytest.fixture
+    def mock_blueprint(self, mock_async_client: MagicMock) -> AsyncBlueprint:
+        """Create a mock AsyncBlueprint object."""
+        return AsyncBlueprint(mock_async_client, "bp-123")
+
+    @pytest.fixture
+    def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot:
+        """Create a mock AsyncSnapshot object."""
+        return AsyncSnapshot(mock_async_client, "snap-123")
+
     @pytest.fixture
     def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
         """Create an AsyncScenarioBuilder instance with mock client."""
@@ -37,21 +49,21 @@ def test_repr(self, builder: AsyncScenarioBuilder) -> None:
         """Test builder __repr__."""
         assert repr(builder) == "<AsyncScenarioBuilder name='test-scenario'>"
 
-    def test_from_blueprint_id_returns_self(self, builder: AsyncScenarioBuilder) -> None:
-        """Test from_blueprint_id returns self for chaining."""
-        result = builder.from_blueprint_id("bp-123")
+    def test_from_blueprint_returns_self(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
+        """Test from_blueprint returns self for chaining."""
+        result = builder.from_blueprint(mock_blueprint)
 
         assert result is builder
-        assert builder._blueprint_id == "bp-123"
-        assert builder._snapshot_id is None
+        assert builder._blueprint is mock_blueprint
+        assert builder._snapshot is None
 
-    def test_from_snapshot_id_returns_self(self, builder: AsyncScenarioBuilder) -> None:
-        """Test from_snapshot_id returns self for chaining."""
-        result = builder.from_snapshot_id("snap-123")
+    def test_from_snapshot_returns_self(self, builder: AsyncScenarioBuilder, mock_snapshot: AsyncSnapshot) -> None:
+        """Test from_snapshot returns self for chaining."""
+        result = builder.from_snapshot(mock_snapshot)
 
         assert result is builder
-        assert builder._snapshot_id == "snap-123"
-        assert builder._blueprint_id is None
+        assert builder._snapshot is mock_snapshot
+        assert builder._blueprint is None
 
     def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None:
         """Test with_working_directory returns self for chaining."""
@@ -125,11 +137,11 @@ def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None:
         assert params["input_context"]["problem_statement"] == "Fix the bug"
         assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
 
-    def test_build_params_with_environment(self, builder: AsyncScenarioBuilder) -> None:
+    def test_build_params_with_environment(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
         """Test _build_params includes environment parameters."""
         builder.with_problem_statement("Fix the bug")
         builder.add_test_scorer("tests", test_command="pytest")
-        builder.from_blueprint_id("bp-123")
+        builder.from_blueprint(mock_blueprint)
         builder.with_working_directory("/app")
 
         params = builder._build_params()
@@ -156,10 +168,10 @@ async def test_push_calls_api_and_returns_scenario(
 
         assert scenario.id == "scn-new-123"
 
-    def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None:
+    def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
         """Test that all builder methods can be chained fluently."""
         result = (
-            builder.from_blueprint_id("bp-123")
+            builder.from_blueprint(mock_blueprint)
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
@@ -172,7 +184,7 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None:
         )
 
         assert result is builder
-        assert builder._blueprint_id == "bp-123"
+        assert builder._blueprint is mock_blueprint
         assert builder._working_directory == "/app"
         assert builder._problem_statement == "Fix the bug"
         assert len(builder._scorers) == 1
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index a2941abbc..d0f11bb06 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -6,6 +6,8 @@
 
 import pytest
 
+from runloop_api_client.sdk.snapshot import Snapshot
+from runloop_api_client.sdk.blueprint import Blueprint
 from runloop_api_client.sdk.scenario_builder import ScenarioBuilder
 from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
@@ -19,6 +21,16 @@ def mock_client(self) -> MagicMock:
         client = MagicMock()
         return client
 
+    @pytest.fixture
+    def mock_blueprint(self, mock_client: MagicMock) -> Blueprint:
+        """Create a mock Blueprint object."""
+        return Blueprint(mock_client, "bp-123")
+
+    @pytest.fixture
+    def mock_snapshot(self, mock_client: MagicMock) -> Snapshot:
+        """Create a mock Snapshot object."""
+        return Snapshot(mock_client, "snap-123")
+
     @pytest.fixture
     def builder(self, mock_client: MagicMock) -> ScenarioBuilder:
         """Create a ScenarioBuilder instance with mock client."""
@@ -36,37 +48,41 @@ def test_repr(self, builder: ScenarioBuilder) -> None:
         """Test builder __repr__."""
         assert repr(builder) == "<ScenarioBuilder name='test-scenario'>"
 
-    def test_from_blueprint_id_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test from_blueprint_id returns self for chaining."""
-        result = builder.from_blueprint_id("bp-123")
+    def test_from_blueprint_returns_self(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
+        """Test from_blueprint returns self for chaining."""
+        result = builder.from_blueprint(mock_blueprint)
 
         assert result is builder
-        assert builder._blueprint_id == "bp-123"
-        assert builder._snapshot_id is None
+        assert builder._blueprint is mock_blueprint
+        assert builder._snapshot is None
 
-    def test_from_snapshot_id_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test from_snapshot_id returns self for chaining."""
-        result = builder.from_snapshot_id("snap-123")
+    def test_from_snapshot_returns_self(self, builder: ScenarioBuilder, mock_snapshot: Snapshot) -> None:
+        """Test from_snapshot returns self for chaining."""
+        result = builder.from_snapshot(mock_snapshot)
 
         assert result is builder
-        assert builder._snapshot_id == "snap-123"
-        assert builder._blueprint_id is None
+        assert builder._snapshot is mock_snapshot
+        assert builder._blueprint is None
 
-    def test_from_blueprint_clears_snapshot(self, builder: ScenarioBuilder) -> None:
+    def test_from_blueprint_clears_snapshot(
+        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
+    ) -> None:
         """Test that setting blueprint clears snapshot."""
-        builder.from_snapshot_id("snap-123")
-        builder.from_blueprint_id("bp-123")
+        builder.from_snapshot(mock_snapshot)
+        builder.from_blueprint(mock_blueprint)
 
-        assert builder._blueprint_id == "bp-123"
-        assert builder._snapshot_id is None
+        assert builder._blueprint is mock_blueprint
+        assert builder._snapshot is None
 
-    def test_from_snapshot_clears_blueprint(self, builder: ScenarioBuilder) -> None:
+    def test_from_snapshot_clears_blueprint(
+        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
+    ) -> None:
         """Test that setting snapshot clears blueprint."""
-        builder.from_blueprint_id("bp-123")
-        builder.from_snapshot_id("snap-123")
+        builder.from_blueprint(mock_blueprint)
+        builder.from_snapshot(mock_snapshot)
 
-        assert builder._snapshot_id == "snap-123"
-        assert builder._blueprint_id is None
+        assert builder._snapshot is mock_snapshot
+        assert builder._blueprint is None
 
     def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None:
         """Test with_working_directory returns self for chaining."""
@@ -269,11 +285,11 @@ def test_build_params_minimal(self, builder: ScenarioBuilder) -> None:
         assert params["input_context"]["problem_statement"] == "Fix the bug"
         assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
 
-    def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None:
+    def test_build_params_with_environment(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test _build_params includes environment parameters."""
         builder.with_problem_statement("Fix the bug")
         builder.add_test_scorer("tests", test_command="pytest")
-        builder.from_blueprint_id("bp-123")
+        builder.from_blueprint(mock_blueprint)
         builder.with_working_directory("/app")
 
         params = builder._build_params()
@@ -281,12 +297,12 @@ def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None:
         assert params["environment_parameters"]["blueprint_id"] == "bp-123"
         assert params["environment_parameters"]["working_directory"] == "/app"
 
-    def test_build_params_with_all_options(self, builder: ScenarioBuilder) -> None:
+    def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test _build_params with all optional fields set."""
         builder.with_problem_statement("Fix the bug")
         builder.with_additional_context({"hint": "line 42"})
         builder.add_test_scorer("tests", test_command="pytest")
-        builder.from_blueprint_id("bp-123")
+        builder.from_blueprint(mock_blueprint)
         builder.with_working_directory("/app")
         builder.with_metadata({"team": "infra"})
         builder.with_reference_output("diff content")
@@ -343,10 +359,10 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc
 
         assert scenario.id == "scn-new-123"
 
-    def test_fluent_chaining(self, builder: ScenarioBuilder) -> None:
+    def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test that all builder methods can be chained fluently."""
         result = (
-            builder.from_blueprint_id("bp-123")
+            builder.from_blueprint(mock_blueprint)
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
@@ -359,7 +375,7 @@ def test_fluent_chaining(self, builder: ScenarioBuilder) -> None:
         )
 
         assert result is builder
-        assert builder._blueprint_id == "bp-123"
+        assert builder._blueprint is mock_blueprint
         assert builder._working_directory == "/app"
         assert builder._problem_statement == "Fix the bug"
         assert len(builder._scorers) == 1

From 01cdb365c873c2aa5fed05828ede633023548a25 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 13:32:47 -0800
Subject: [PATCH 05/31] consolidate from_blueprint and from_snapshot unit tests

---
 tests/sdk/test_async_scenario_builder.py | 17 +++++++++-----
 tests/sdk/test_scenario_builder.py       | 29 ++++++------------------
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 64ed2ba0c..03c38d8b2 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -49,22 +49,27 @@ def test_repr(self, builder: AsyncScenarioBuilder) -> None:
         """Test builder __repr__."""
         assert repr(builder) == "<AsyncScenarioBuilder name='test-scenario'>"
 
-    def test_from_blueprint_returns_self(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
-        """Test from_blueprint returns self for chaining."""
+    def test_from_blueprint_and_snapshot(
+        self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot
+    ) -> None:
+        """Test blueprint/snapshot setting returns self and are mutually exclusive."""
+        # from_blueprint returns self and sets blueprint
         result = builder.from_blueprint(mock_blueprint)
-
         assert result is builder
         assert builder._blueprint is mock_blueprint
         assert builder._snapshot is None
 
-    def test_from_snapshot_returns_self(self, builder: AsyncScenarioBuilder, mock_snapshot: AsyncSnapshot) -> None:
-        """Test from_snapshot returns self for chaining."""
+        # from_snapshot returns self, sets snapshot, and clears blueprint
         result = builder.from_snapshot(mock_snapshot)
-
         assert result is builder
         assert builder._snapshot is mock_snapshot
         assert builder._blueprint is None
 
+        # from_blueprint clears snapshot
+        builder.from_blueprint(mock_blueprint)
+        assert builder._blueprint is mock_blueprint
+        assert builder._snapshot is None
+
     def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None:
         """Test with_working_directory returns self for chaining."""
         result = builder.with_working_directory("/app")
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index d0f11bb06..224e62b02 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -48,42 +48,27 @@ def test_repr(self, builder: ScenarioBuilder) -> None:
         """Test builder __repr__."""
         assert repr(builder) == "<ScenarioBuilder name='test-scenario'>"
 
-    def test_from_blueprint_returns_self(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
-        """Test from_blueprint returns self for chaining."""
+    def test_from_blueprint_and_snapshot(
+        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
+    ) -> None:
+        """Test blueprint/snapshot setting returns self and are mutually exclusive."""
+        # from_blueprint returns self and sets blueprint
         result = builder.from_blueprint(mock_blueprint)
-
         assert result is builder
         assert builder._blueprint is mock_blueprint
         assert builder._snapshot is None
 
-    def test_from_snapshot_returns_self(self, builder: ScenarioBuilder, mock_snapshot: Snapshot) -> None:
-        """Test from_snapshot returns self for chaining."""
+        # from_snapshot returns self, sets snapshot, and clears blueprint
         result = builder.from_snapshot(mock_snapshot)
-
         assert result is builder
         assert builder._snapshot is mock_snapshot
         assert builder._blueprint is None
 
-    def test_from_blueprint_clears_snapshot(
-        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
-    ) -> None:
-        """Test that setting blueprint clears snapshot."""
-        builder.from_snapshot(mock_snapshot)
+        # from_blueprint clears snapshot
         builder.from_blueprint(mock_blueprint)
-
         assert builder._blueprint is mock_blueprint
         assert builder._snapshot is None
 
-    def test_from_snapshot_clears_blueprint(
-        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
-    ) -> None:
-        """Test that setting snapshot clears blueprint."""
-        builder.from_blueprint(mock_blueprint)
-        builder.from_snapshot(mock_snapshot)
-
-        assert builder._snapshot is mock_snapshot
-        assert builder._blueprint is None
-
     def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None:
         """Test with_working_directory returns self for chaining."""
         result = builder.with_working_directory("/app")

From 1868d9fb697fbdec4c33d95438e4a928faaa173d Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 13:43:07 -0800
Subject: [PATCH 06/31] further consolidate scenario builder unit tests, make
 sure async coverage matches sync

---
 tests/sdk/test_async_scenario_builder.py | 169 ++++++++-------
 tests/sdk/test_scenario_builder.py       | 251 +++++------------------
 2 files changed, 153 insertions(+), 267 deletions(-)

diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 03c38d8b2..792b7c6b6 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -9,6 +9,7 @@
 from runloop_api_client.sdk.async_snapshot import AsyncSnapshot
 from runloop_api_client.sdk.async_blueprint import AsyncBlueprint
 from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder
+from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
 
 class TestAsyncScenarioBuilder:
@@ -37,17 +38,14 @@ def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
         """Create an AsyncScenarioBuilder instance with mock client."""
         return AsyncScenarioBuilder(mock_async_client, "test-scenario")
 
-    def test_init(self, mock_async_client: MagicMock) -> None:
-        """Test builder initialization."""
+    def test_instantiation(self, mock_async_client: MagicMock) -> None:
+        """Test builder initialization and repr."""
         builder = AsyncScenarioBuilder(mock_async_client, "my-scenario")
 
         assert builder._client is mock_async_client
         assert builder._name == "my-scenario"
         assert builder.name == "my-scenario"
-
-    def test_repr(self, builder: AsyncScenarioBuilder) -> None:
-        """Test builder __repr__."""
-        assert repr(builder) == "<AsyncScenarioBuilder name='test-scenario'>"
+        assert repr(builder) == "<AsyncScenarioBuilder name='my-scenario'>"
 
     def test_from_blueprint_and_snapshot(
         self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot
@@ -70,89 +68,122 @@ def test_from_blueprint_and_snapshot(
         assert builder._blueprint is mock_blueprint
         assert builder._snapshot is None
 
-    def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None:
-        """Test with_working_directory returns self for chaining."""
-        result = builder.with_working_directory("/app")
-
-        assert result is builder
-        assert builder._working_directory == "/app"
-
-    def test_with_problem_statement_returns_self(self, builder: AsyncScenarioBuilder) -> None:
-        """Test with_problem_statement returns self for chaining."""
-        result = builder.with_problem_statement("Fix the bug")
-
-        assert result is builder
-        assert builder._problem_statement == "Fix the bug"
-
-    def test_add_test_scorer(self, builder: AsyncScenarioBuilder) -> None:
-        """Test add_test_scorer method."""
-        result = builder.add_test_scorer(
-            "my-tests",
-            test_command="pytest",
-            weight=2.0,
-        )
-
+    def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
+        """Test all scorer types, optional params, and multiple scorers."""
+        # Test scorer with test files
+        test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
+            {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
+        ]
+        result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
         assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["name"] == "my-tests"
+        assert builder._scorers[0]["name"] == "test-scorer"
+        assert builder._scorers[0]["weight"] == 2.0
         assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
-
-    def test_add_command_scorer(self, builder: AsyncScenarioBuilder) -> None:
-        """Test add_command_scorer method."""
-        result = builder.add_command_scorer(
-            "cmd-scorer",
-            command="./check.sh",
+        assert builder._scorers[0]["scorer"].get("test_command") == "pytest"
+        assert builder._scorers[0]["scorer"].get("test_files") == test_files
+
+        # Command scorer
+        builder.add_command_scorer("cmd-scorer", command="./check.sh")
+        assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
+        assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
+
+        # Bash scorer
+        builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
+        assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
+
+        # Python scorer with optional params
+        builder.add_python_scorer(
+            "python-scorer",
+            python_script="print('1.0')",
+            python_version_constraint=">=3.10",
+            requirements_contents="numpy",
         )
-
-        assert result is builder
-        assert builder._scorers[0]["scorer"]["type"] == "command_scorer"
-
-    def test_add_bash_scorer(self, builder: AsyncScenarioBuilder) -> None:
-        """Test add_bash_scorer method."""
-        result = builder.add_bash_scorer(
-            "bash-scorer",
-            bash_script="echo 'score=1.0'",
-        )
-
-        assert result is builder
-        assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer"
-
-    def test_build_params_missing_problem_statement(self, builder: AsyncScenarioBuilder) -> None:
-        """Test _build_params raises if problem statement is missing."""
+        assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
+        assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
+        assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
+
+        # AST grep scorer with optional lang
+        builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
+        assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
+        assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
+        assert builder._scorers[4]["scorer"].get("lang") == "python"
+
+        # Custom scorer with optional params
+        builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5})
+        assert builder._scorers[5]["scorer"]["type"] == "custom_scorer"
+        assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
+        assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
+
+        # Verify multiple scorers accumulated
+        assert len(builder._scorers) == 6
+
+    def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None:
+        """Test that adding a scorer with zero or negative weight raises ValueError."""
+        with pytest.raises(ValueError, match="Scorer weight must be positive"):
+            builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0)
+
+        with pytest.raises(ValueError, match="Scorer weight must be positive"):
+            builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0)
+
+    def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None:
+        """Test _build_params raises for missing required fields."""
+        # Missing problem statement
         builder.add_test_scorer("test", test_command="pytest")
-
         with pytest.raises(ValueError, match="Problem statement is required"):
             builder._build_params()
 
-    def test_build_params_missing_scorer(self, builder: AsyncScenarioBuilder) -> None:
-        """Test _build_params raises if no scorers are added."""
-        builder.with_problem_statement("Fix the bug")
-
+        # Missing scorer (new builder)
+        builder2 = AsyncScenarioBuilder(builder._client, "test2")
+        builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
-            builder._build_params()
+            builder2._build_params()
 
-    def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None:
-        """Test _build_params with minimal configuration."""
+    def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
+        """Test _build_params with all optional fields set."""
         builder.with_problem_statement("Fix the bug")
+        builder.with_additional_context({"hint": "line 42"})
         builder.add_test_scorer("tests", test_command="pytest")
+        builder.from_blueprint(mock_blueprint)
+        builder.with_working_directory("/app")
+        builder.with_metadata({"team": "infra"})
+        builder.with_reference_output("diff content")
+        builder.with_required_env_vars(["API_KEY"])
+        builder.with_required_secrets(["db_pass"])
+        builder.with_validation_type("FORWARD")
 
         params = builder._build_params()
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
-        assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
-
-    def test_build_params_with_environment(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
-        """Test _build_params includes environment parameters."""
+        assert params["input_context"]["additional_context"] == {"hint": "line 42"}
+        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
+        assert params["environment_parameters"]["working_directory"] == "/app"
+        assert params["metadata"] == {"team": "infra"}
+        assert params["reference_output"] == "diff content"
+        assert params["required_environment_variables"] == ["API_KEY"]
+        assert params["required_secret_names"] == ["db_pass"]
+        assert params["validation_type"] == "FORWARD"
+
+    def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None:
+        """Test that _build_params normalizes scorer weights to sum to 1.0."""
         builder.with_problem_statement("Fix the bug")
-        builder.add_test_scorer("tests", test_command="pytest")
-        builder.from_blueprint(mock_blueprint)
-        builder.with_working_directory("/app")
+        builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
         params = builder._build_params()
+        scorers = params["scoring_contract"]["scoring_function_parameters"]
 
-        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
-        assert params["environment_parameters"]["working_directory"] == "/app"
+        # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
+        assert len(scorers) == 3
+        assert abs(scorers[0]["weight"] - 1 / 6) < 0.0001
+        assert abs(scorers[1]["weight"] - 2 / 6) < 0.0001
+        assert abs(scorers[2]["weight"] - 3 / 6) < 0.0001
+
+        # Total should be 1.0
+        total = sum(s["weight"] for s in scorers)
+        assert abs(total - 1.0) < 0.0001
 
     @pytest.mark.asyncio
     async def test_push_calls_api_and_returns_scenario(
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 224e62b02..d9d90b67f 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -36,17 +36,14 @@ def builder(self, mock_client: MagicMock) -> ScenarioBuilder:
         """Create a ScenarioBuilder instance with mock client."""
         return ScenarioBuilder(mock_client, "test-scenario")
 
-    def test_init(self, mock_client: MagicMock) -> None:
-        """Test builder initialization."""
+    def test_instantiation(self, mock_client: MagicMock) -> None:
+        """Test builder initialization and repr."""
         builder = ScenarioBuilder(mock_client, "my-scenario")
 
         assert builder._client is mock_client
         assert builder._name == "my-scenario"
         assert builder.name == "my-scenario"
-
-    def test_repr(self, builder: ScenarioBuilder) -> None:
-        """Test builder __repr__."""
-        assert repr(builder) == "<ScenarioBuilder name='test-scenario'>"
+        assert repr(builder) == "<ScenarioBuilder name='my-scenario'>"
 
     def test_from_blueprint_and_snapshot(
         self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
@@ -69,218 +66,76 @@ def test_from_blueprint_and_snapshot(
         assert builder._blueprint is mock_blueprint
         assert builder._snapshot is None
 
-    def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_working_directory returns self for chaining."""
-        result = builder.with_working_directory("/app")
-
-        assert result is builder
-        assert builder._working_directory == "/app"
-
-    def test_with_problem_statement_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_problem_statement returns self for chaining."""
-        result = builder.with_problem_statement("Fix the bug")
-
-        assert result is builder
-        assert builder._problem_statement == "Fix the bug"
-
-    def test_with_additional_context_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_additional_context returns self for chaining."""
-        context = {"hint": "Look at line 42"}
-        result = builder.with_additional_context(context)
-
-        assert result is builder
-        assert builder._additional_context == context
-
-    def test_add_test_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_test_scorer method."""
-        result = builder.add_test_scorer(
-            "my-tests",
-            test_command="pytest",
-            weight=2.0,
-        )
-
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["name"] == "my-tests"
-        assert builder._scorers[0]["weight"] == 2.0
-        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
-        assert "test_command" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["test_command"] == "pytest"
-
-    def test_add_test_scorer_with_files(self, builder: ScenarioBuilder) -> None:
-        """Test add_test_scorer with test files."""
+    def test_scorers(self, builder: ScenarioBuilder) -> None:
+        """Test all scorer types, optional params, and multiple scorers."""
+        # Test scorer with test files
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_scorer("tests", test_command="pytest", test_files=test_files)
-
+        result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
         assert result is builder
-        assert "test_files" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["test_files"] == test_files
-
-    def test_add_command_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_command_scorer method."""
-        result = builder.add_command_scorer(
-            "cmd-scorer",
-            command="./check.sh",
-        )
+        assert builder._scorers[0]["name"] == "test-scorer"
+        assert builder._scorers[0]["weight"] == 2.0
+        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
+        assert builder._scorers[0]["scorer"].get("test_command") == "pytest"
+        assert builder._scorers[0]["scorer"].get("test_files") == test_files
 
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["scorer"]["type"] == "command_scorer"
-        assert "command" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["command"] == "./check.sh"
-
-    def test_add_bash_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_bash_scorer method."""
-        result = builder.add_bash_scorer(
-            "bash-scorer",
-            bash_script="echo 'score=1.0'",
-        )
+        # Command scorer
+        builder.add_command_scorer("cmd-scorer", command="./check.sh")
+        assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
+        assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
 
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer"
-        assert "bash_script" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["bash_script"] == "echo 'score=1.0'"
+        # Bash scorer
+        builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
+        assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
 
-    def test_add_python_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_python_scorer method."""
-        result = builder.add_python_scorer(
+        # Python scorer with optional params
+        builder.add_python_scorer(
             "python-scorer",
-            python_script="print('score=1.0')",
+            python_script="print('1.0')",
             python_version_constraint=">=3.10",
+            requirements_contents="numpy",
         )
-
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["scorer"]["type"] == "python_script_scorer"
-        assert builder._scorers[0]["scorer"]["python_script"] == "print('score=1.0')"
-        assert "python_version_constraint" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["python_version_constraint"] == ">=3.10"
-
-    def test_add_ast_grep_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_ast_grep_scorer method."""
-        result = builder.add_ast_grep_scorer(
-            "ast-scorer",
-            pattern="$A.foo()",
-            search_directory="/src",
-            lang="python",
-        )
-
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["scorer"]["type"] == "ast_grep_scorer"
-        assert builder._scorers[0]["scorer"]["pattern"] == "$A.foo()"
-        assert builder._scorers[0]["scorer"]["search_directory"] == "/src"
-        assert "lang" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["lang"] == "python"
-
-    def test_add_custom_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test add_custom_scorer method."""
-        result = builder.add_custom_scorer(
-            "custom-scorer",
-            custom_scorer_type="my_custom_scorer",
-            scorer_params={"threshold": 0.5},
-        )
-
-        assert result is builder
-        assert len(builder._scorers) == 1
-        assert builder._scorers[0]["scorer"]["type"] == "custom_scorer"
-        assert builder._scorers[0]["scorer"]["custom_scorer_type"] == "my_custom_scorer"
-        assert "scorer_params" in builder._scorers[0]["scorer"]
-        assert builder._scorers[0]["scorer"]["scorer_params"] == {"threshold": 0.5}
-
-    def test_add_multiple_scorers(self, builder: ScenarioBuilder) -> None:
-        """Test adding multiple scorers."""
-        builder.add_test_scorer("test1", test_command="pytest", weight=1.0)
-        builder.add_command_scorer("test2", command="./check.sh", weight=2.0)
-
-        assert len(builder._scorers) == 2
-        assert builder._scorers[0]["name"] == "test1"
-        assert builder._scorers[1]["name"] == "test2"
-
-    def test_add_scorer_rejects_zero_weight(self, builder: ScenarioBuilder) -> None:
-        """Test that adding a scorer with zero weight raises ValueError."""
+        assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
+        assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
+        assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
+
+        # AST grep scorer with optional lang
+        builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
+        assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
+        assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
+        assert builder._scorers[4]["scorer"].get("lang") == "python"
+
+        # Custom scorer with optional params
+        builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5})
+        assert builder._scorers[5]["scorer"]["type"] == "custom_scorer"
+        assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
+        assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
+
+        # Verify multiple scorers accumulated
+        assert len(builder._scorers) == 6
+
+    def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None:
+        """Test that adding a scorer with zero or negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
             builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0)
 
-    def test_add_scorer_rejects_negative_weight(self, builder: ScenarioBuilder) -> None:
-        """Test that adding a scorer with negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
             builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0)
 
-    def test_with_metadata_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_metadata returns self for chaining."""
-        result = builder.with_metadata({"team": "infra"})
-
-        assert result is builder
-        assert builder._metadata == {"team": "infra"}
-
-    def test_with_reference_output_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_reference_output returns self for chaining."""
-        result = builder.with_reference_output("--- a/file.py\n+++ b/file.py")
-
-        assert result is builder
-        assert builder._reference_output == "--- a/file.py\n+++ b/file.py"
-
-    def test_with_required_env_vars_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_required_env_vars returns self for chaining."""
-        result = builder.with_required_env_vars(["API_KEY", "SECRET"])
-
-        assert result is builder
-        assert builder._required_env_vars == ["API_KEY", "SECRET"]
-
-    def test_with_required_secrets_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_required_secrets returns self for chaining."""
-        result = builder.with_required_secrets(["db_password"])
-
-        assert result is builder
-        assert builder._required_secrets == ["db_password"]
-
-    def test_with_validation_type_returns_self(self, builder: ScenarioBuilder) -> None:
-        """Test with_validation_type returns self for chaining."""
-        result = builder.with_validation_type("FORWARD")
-
-        assert result is builder
-        assert builder._validation_type == "FORWARD"
-
-    def test_build_params_missing_problem_statement(self, builder: ScenarioBuilder) -> None:
-        """Test _build_params raises if problem statement is missing."""
+    def test_build_params_validation(self, builder: ScenarioBuilder) -> None:
+        """Test _build_params raises for missing required fields."""
+        # Missing problem statement
         builder.add_test_scorer("test", test_command="pytest")
-
         with pytest.raises(ValueError, match="Problem statement is required"):
             builder._build_params()
 
-    def test_build_params_missing_scorer(self, builder: ScenarioBuilder) -> None:
-        """Test _build_params raises if no scorers are added."""
-        builder.with_problem_statement("Fix the bug")
-
-        with pytest.raises(ValueError, match="At least one scorer is required.*add_test_scorer"):
-            builder._build_params()
-
-    def test_build_params_minimal(self, builder: ScenarioBuilder) -> None:
-        """Test _build_params with minimal configuration."""
-        builder.with_problem_statement("Fix the bug")
-        builder.add_test_scorer("tests", test_command="pytest")
-
-        params = builder._build_params()
-
-        assert params["name"] == "test-scenario"
-        assert params["input_context"]["problem_statement"] == "Fix the bug"
-        assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1
-
-    def test_build_params_with_environment(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
-        """Test _build_params includes environment parameters."""
-        builder.with_problem_statement("Fix the bug")
-        builder.add_test_scorer("tests", test_command="pytest")
-        builder.from_blueprint(mock_blueprint)
-        builder.with_working_directory("/app")
-
-        params = builder._build_params()
-
-        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
-        assert params["environment_parameters"]["working_directory"] == "/app"
+        # Missing scorer (new builder)
+        builder2 = ScenarioBuilder(builder._client, "test2")
+        builder2.with_problem_statement("Fix the bug")
+        with pytest.raises(ValueError, match="At least one scorer is required"):
+            builder2._build_params()
 
     def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test _build_params with all optional fields set."""

From 9ecbc3d055031a5258cb8b6d6daa83951e319215 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 16:23:37 -0800
Subject: [PATCH 07/31] stricter type declaration for _build_params

---
 .../sdk/async_scenario_builder.py                | 16 +++++++++-------
 src/runloop_api_client/sdk/scenario_builder.py   | 16 +++++++++-------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index cabd51e5b..963c1171e 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, List, Iterable, Optional
+from typing import Dict, List, Iterable, Optional
 from typing_extensions import Self, Literal, override
 
+from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
 from .._client import AsyncRunloop
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
@@ -379,14 +380,14 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD"
         self._validation_type = validation_type
         return self
 
-    def _build_params(self) -> Dict[str, Any]:
+    def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
         Weights are automatically normalized to sum to 1.0.
 
         :raises ValueError: If required fields are missing
         :return: Parameters for scenario creation
-        :rtype: Dict[str, Any]
+        :rtype: ScenarioCreateParams
         """
         if not self._problem_statement:
             raise ValueError("Problem statement is required. Call with_problem_statement() first.")
@@ -399,15 +400,16 @@ def _build_params(self) -> Dict[str, Any]:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+        for s in self._scorers:
+            s["weight"] = s["weight"] / total_weight
 
-        params: Dict[str, Any] = {
+        params: ScenarioCreateParams = {
             "name": self._name,
             "input_context": {
                 "problem_statement": self._problem_statement,
             },
             "scoring_contract": {
-                "scoring_function_parameters": normalized_scorers,
+                "scoring_function_parameters": self._scorers,
             },
         }
 
@@ -416,7 +418,7 @@ def _build_params(self) -> Dict[str, Any]:
             params["input_context"]["additional_context"] = self._additional_context
 
         # Build environment parameters if any are set
-        env_params: Dict[str, Any] = {}
+        env_params: ScenarioEnvironmentParam = {}
         if self._blueprint:
             env_params["blueprint_id"] = self._blueprint.id
         if self._snapshot:
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 842980b4f..505982c7d 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, List, Iterable, Optional
+from typing import Dict, List, Iterable, Optional
 from typing_extensions import Self, Literal, override
 
+from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
 from .._client import Runloop
 from .scenario import Scenario
 from .snapshot import Snapshot
@@ -379,14 +380,14 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD"
         self._validation_type = validation_type
         return self
 
-    def _build_params(self) -> Dict[str, Any]:
+    def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
         Weights are automatically normalized to sum to 1.0.
 
         :raises ValueError: If required fields are missing
         :return: Parameters for scenario creation
-        :rtype: Dict[str, Any]
+        :rtype: ScenarioCreateParams
         """
         if not self._problem_statement:
             raise ValueError("Problem statement is required. Call with_problem_statement() first.")
@@ -399,15 +400,16 @@ def _build_params(self) -> Dict[str, Any]:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+        for s in self._scorers:
+            s["weight"] = s["weight"] / total_weight
 
-        params: Dict[str, Any] = {
+        params: ScenarioCreateParams = {
             "name": self._name,
             "input_context": {
                 "problem_statement": self._problem_statement,
             },
             "scoring_contract": {
-                "scoring_function_parameters": normalized_scorers,
+                "scoring_function_parameters": self._scorers,
             },
         }
 
@@ -416,7 +418,7 @@ def _build_params(self) -> Dict[str, Any]:
             params["input_context"]["additional_context"] = self._additional_context
 
         # Build environment parameters if any are set
-        env_params: Dict[str, Any] = {}
+        env_params: ScenarioEnvironmentParam = {}
         if self._blueprint:
             env_params["blueprint_id"] = self._blueprint.id
         if self._snapshot:

From 9908844dbedd54ab0005879110a6462f8add4d7d Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 16:41:51 -0800
Subject: [PATCH 08/31] expose request options in push()

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 8 +++++---
 src/runloop_api_client/sdk/scenario_builder.py       | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 963c1171e..9f64993ff 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -3,9 +3,10 @@
 from __future__ import annotations
 
 from typing import Dict, List, Iterable, Optional
-from typing_extensions import Self, Literal, override
+from typing_extensions import Self, Unpack, Literal, override
 
 from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
+from ._types import LongRequestOptions
 from .._client import AsyncRunloop
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
@@ -443,13 +444,14 @@ def _build_params(self) -> ScenarioCreateParams:
 
         return params
 
-    async def push(self) -> AsyncScenario:
+    async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario:
         """Create the scenario on the platform.
 
+        :param options: Optional long-running request configuration
         :raises ValueError: If required fields are missing
         :return: Created scenario wrapper
         :rtype: AsyncScenario
         """
         params = self._build_params()
-        scenario_view = await self._client.scenarios.create(**params)
+        scenario_view = await self._client.scenarios.create(**params, **options)
         return AsyncScenario(self._client, scenario_view.id)
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 505982c7d..295d90f24 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -3,9 +3,10 @@
 from __future__ import annotations
 
 from typing import Dict, List, Iterable, Optional
-from typing_extensions import Self, Literal, override
+from typing_extensions import Self, Unpack, Literal, override
 
 from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
+from ._types import LongRequestOptions
 from .._client import Runloop
 from .scenario import Scenario
 from .snapshot import Snapshot
@@ -443,13 +444,14 @@ def _build_params(self) -> ScenarioCreateParams:
 
         return params
 
-    def push(self) -> Scenario:
+    def push(self, **options: Unpack[LongRequestOptions]) -> Scenario:
         """Create the scenario on the platform.
 
+        :param options: Optional long-running request configuration
         :raises ValueError: If required fields are missing
         :return: Created scenario wrapper
         :rtype: Scenario
         """
         params = self._build_params()
-        scenario_view = self._client.scenarios.create(**params)
+        scenario_view = self._client.scenarios.create(**params, **options)
         return Scenario(self._client, scenario_view.id)

From 9511827827b4b4360abb3340c257d0f9481a9a1a Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 16:56:29 -0800
Subject: [PATCH 09/31] add scenario creation smoketests, with push_or_update
 logic

---
 tests/smoketests/sdk/test_async_scenario.py | 154 ++++++++++++++++++--
 tests/smoketests/sdk/test_scenario.py       | 153 +++++++++++++++++--
 2 files changed, 289 insertions(+), 18 deletions(-)

diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py
index 1a6a6a6e7..20a23e47e 100644
--- a/tests/smoketests/sdk/test_async_scenario.py
+++ b/tests/smoketests/sdk/test_async_scenario.py
@@ -4,12 +4,63 @@
 
 import pytest
 
-from runloop_api_client.sdk import AsyncRunloopSDK
+from runloop_api_client.sdk import AsyncRunloopSDK, AsyncScenarioBuilder
+from tests.smoketests.utils import unique_name
+from runloop_api_client.types import ScenarioView
+from runloop_api_client.sdk._types import SDKScenarioUpdateParams
+from runloop_api_client.sdk._helpers import filter_params
 
 pytestmark = [pytest.mark.smoketest]
 
 TWO_MINUTE_TIMEOUT = 120
 FIVE_MINUTE_TIMEOUT = 300
+TEN_MINUTE_TIMEOUT = 600
+
+# Metadata tag for all smoketest scenarios (for easy identification/cleanup)
+SMOKETEST_METADATA = {"smoketest": "true"}
+
+
+async def push_or_update_scenario(sdk_client: AsyncRunloopSDK, builder: AsyncScenarioBuilder) -> ScenarioView:
+    """Push a new scenario or update existing one with the same name.
+
+    This is a workaround until scenario delete endpoint is available.
+    Uses fixed scenario names to avoid littering the platform with test scenarios.
+
+    When updating an existing scenario, this function will delete the OLD blueprint/snapshot
+    that's no longer needed (if different from the new one). The NEW blueprint/snapshot
+    is kept so the scenario remains runnable.
+    """
+    # Check if scenario already exists
+    scenarios = await sdk_client.scenario.list(name=builder.name, limit=1)
+
+    if scenarios:
+        # Get old scenario info to find old blueprint/snapshot IDs
+        scenario = scenarios[0]
+        old_scenario_info = await scenario.get_info()
+        old_env = old_scenario_info.environment
+        old_blueprint_id = old_env.blueprint_id if old_env else None
+        old_snapshot_id = old_env.snapshot_id if old_env else None
+
+        # Get new blueprint/snapshot IDs from builder
+        new_blueprint_id = builder._blueprint.id if builder._blueprint else None
+        new_snapshot_id = builder._snapshot.id if builder._snapshot else None
+
+        # Update existing scenario with builder's params
+        params = builder._build_params()
+        result = await scenario.update(**filter_params(params, SDKScenarioUpdateParams))
+
+        # Delete OLD blueprint/snapshot if they're being replaced
+        if old_blueprint_id and old_blueprint_id != new_blueprint_id:
+            await sdk_client.blueprint.from_id(old_blueprint_id).delete()
+
+        if old_snapshot_id and old_snapshot_id != new_snapshot_id:
+            await sdk_client.snapshot.from_id(old_snapshot_id).delete()
+
+        return result
+    else:
+        # Create new scenario - keep the blueprint/snapshot (scenario needs them)
+        scenario = await builder.push()
+        return await scenario.get_info()
 
 
 class TestAsyncScenarioRetrieval:
@@ -52,7 +103,7 @@ class TestAsyncScenarioRun:
     """Test async scenario run operations."""
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
-    async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -> None:
+    async def test_scenario_run_async_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -> None:
         """Test running a scenario and accessing the devbox.
 
         This test:
@@ -63,7 +114,7 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -
         5. Cancels the run
         """
         # Find a scenario to run
-        scenarios = await async_sdk_client.scenario.list(limit=5)
+        scenarios = await async_sdk_client.scenario.list(limit=1)
         if not scenarios:
             pytest.skip("No scenarios available to test run")
 
@@ -72,6 +123,7 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -
 
         # Start a run
         run = await scenario.run_async(run_name="sdk-smoketest-async-run")
+        devbox = None
 
         try:
             assert run.id is not None
@@ -82,7 +134,8 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -
 
             # Access devbox
             devbox = run.devbox
-            assert devbox.id == run.devbox_id
+            info = await devbox.get_info()
+            assert info.status == "running"
 
             # Get run info
             info = await run.get_info()
@@ -94,13 +147,14 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -
             try:
                 await run.cancel()
             except Exception:
-                pass  # Best effort cleanup
+                if devbox:
+                    await devbox.shutdown()
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
-    async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRunloopSDK) -> None:
-        """Test run_and_await_env_ready convenience method."""
+    async def test_scenario_run(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test run convenience method."""
         # Find a scenario to run
-        scenarios = await async_sdk_client.scenario.list(limit=5)
+        scenarios = await async_sdk_client.scenario.list(limit=1)
         if not scenarios:
             pytest.skip("No scenarios available to test run")
 
@@ -108,6 +162,7 @@ async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRun
 
         # Start a run and wait for environment in one call
         run = await scenario.run(run_name="sdk-smoketest-async-await")
+        devbox = None
 
         try:
             assert run.id is not None
@@ -123,4 +178,85 @@ async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRun
             try:
                 await run.cancel()
             except Exception:
-                pass
+                if devbox:
+                    await devbox.shutdown()
+
+
+class TestAsyncScenarioBuilder:
+    """Test AsyncScenarioBuilder operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test creating/updating a minimal scenario with just problem statement and scorer."""
+        builder = (
+            async_sdk_client.scenario.builder("sdk-smoketest-async-builder-minimal")
+            .with_problem_statement("Async minimal test problem statement")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("async-minimal-scorer", command="echo 1.0")
+        )
+
+        info = await push_or_update_scenario(async_sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-async-builder-minimal"
+        assert info.input_context.problem_statement == "Async minimal test problem statement"
+        assert len(info.scoring_contract.scoring_function_parameters) == 1
+        assert info.scoring_contract.scoring_function_parameters[0].name == "async-minimal-scorer"
+
+    @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
+    async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test creating/updating a scenario from a blueprint.
+        """
+        blueprint = await async_sdk_client.blueprint.create(
+            name=unique_name("sdk-smoketest-async-scenario-bp"),
+            dockerfile="FROM ubuntu:20.04",
+        )
+
+        builder = (
+            async_sdk_client.scenario.builder("sdk-smoketest-async-builder-blueprint")
+            .from_blueprint(blueprint)
+            .with_working_directory("/home/user")
+            .with_problem_statement("Async blueprint test problem")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("async-blueprint-scorer", command="echo 1.0")
+        )
+
+        info = await push_or_update_scenario(async_sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-async-builder-blueprint"
+        assert info.input_context.problem_statement == "Async blueprint test problem"
+        assert info.environment is not None
+        assert info.environment.blueprint_id == blueprint.id
+        assert info.environment.working_directory == "/home/user"
+
+    @pytest.mark.timeout(TEN_MINUTE_TIMEOUT)
+    async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunloopSDK) -> None:
+        """Test creating/updating a scenario from a snapshot.
+        """
+        # Create blueprint -> devbox -> snapshot chain
+        blueprint = await async_sdk_client.blueprint.create(
+            name=unique_name("sdk-smoketest-async-scenario-snap-bp"),
+            dockerfile="FROM ubuntu:20.04",
+        )
+        devbox = await async_sdk_client.devbox.create(blueprint_id=blueprint.id)
+        snapshot = await devbox.snapshot_disk(name=unique_name("sdk-smoketest-async-scenario-snap"))
+
+        # Shut down the devbox - it's not needed after creating the snapshot
+        try:
+            await devbox.shutdown()
+        except Exception:
+            pass
+
+        builder = (
+            async_sdk_client.scenario.builder("sdk-smoketest-async-builder-snapshot")
+            .from_snapshot(snapshot)
+            .with_problem_statement("Async snapshot test problem")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("async-snapshot-scorer", command="echo 1.0")
+        )
+
+        info = await push_or_update_scenario(async_sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-async-builder-snapshot"
+        assert info.input_context.problem_statement == "Async snapshot test problem"
+        assert info.environment is not None
+        assert info.environment.snapshot_id == snapshot.id
diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py
index af8d81486..0e9c82f73 100644
--- a/tests/smoketests/sdk/test_scenario.py
+++ b/tests/smoketests/sdk/test_scenario.py
@@ -4,12 +4,62 @@
 
 import pytest
 
-from runloop_api_client.sdk import RunloopSDK
+from runloop_api_client.sdk import RunloopSDK, ScenarioBuilder
+from tests.smoketests.utils import unique_name
+from runloop_api_client.types import ScenarioView
+from runloop_api_client.sdk._types import SDKScenarioUpdateParams
+from runloop_api_client.sdk._helpers import filter_params
 
 pytestmark = [pytest.mark.smoketest]
 
 TWO_MINUTE_TIMEOUT = 120
 FIVE_MINUTE_TIMEOUT = 300
+TEN_MINUTE_TIMEOUT = 600
+
+# Metadata tag for all smoketest scenarios (for easy identification/cleanup)
+SMOKETEST_METADATA = {"smoketest": "true"}
+
+
+def push_or_update_scenario(sdk_client: RunloopSDK, builder: ScenarioBuilder) -> ScenarioView:
+    """Push a new scenario or update existing one with the same name.
+
+    This is a workaround until scenario delete endpoint is available.
+    Uses fixed scenario names to avoid littering the platform with test scenarios.
+
+    When updating an existing scenario, this function will delete the OLD blueprint/snapshot
+    that's no longer needed (if different from the new one). The NEW blueprint/snapshot
+    is kept so the scenario remains runnable.
+    """
+    # Check if scenario already exists
+    scenarios = sdk_client.scenario.list(name=builder.name, limit=1)
+
+    if scenarios:
+        # Get old scenario info to find old blueprint/snapshot IDs
+        scenario = scenarios[0]
+        env = scenario.get_info().environment
+        old_blueprint_id = env.blueprint_id if env else None
+        old_snapshot_id = env.snapshot_id if env else None
+
+        # Get new blueprint/snapshot IDs from builder
+        new_blueprint_id = builder._blueprint.id if builder._blueprint else None
+        new_snapshot_id = builder._snapshot.id if builder._snapshot else None
+
+        # Update existing scenario with builder's params
+        params = builder._build_params()
+        result = scenario.update(**filter_params(params, SDKScenarioUpdateParams))
+
+        # Delete OLD blueprint/snapshot if they're being replaced
+        if old_blueprint_id and old_blueprint_id != new_blueprint_id:
+            sdk_client.blueprint.from_id(old_blueprint_id).delete()
+
+        if old_snapshot_id and old_snapshot_id != new_snapshot_id:
+            sdk_client.snapshot.from_id(old_snapshot_id).delete()
+
+        return result
+    else:
+        # Create new scenario - keep the blueprint/snapshot (scenario needs them)
+        scenario = builder.push()
+        return scenario.get_info()
 
 
 class TestScenarioRetrieval:
@@ -52,7 +102,7 @@ class TestScenarioRun:
     """Test scenario run operations."""
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
-    def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None:
+    def test_scenario_run_async_lifecycle(self, sdk_client: RunloopSDK) -> None:
         """Test running a scenario and accessing the devbox.
 
         This test:
@@ -63,7 +113,7 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None:
         5. Cancels the run
         """
         # Find a scenario to run
-        scenarios = sdk_client.scenario.list(limit=5)
+        scenarios = sdk_client.scenario.list(limit=1)
         if not scenarios:
             pytest.skip("No scenarios available to test run")
 
@@ -72,6 +122,7 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None:
 
         # Start a run
         run = scenario.run_async(run_name="sdk-smoketest-run")
+        devbox = None
 
         try:
             assert run.id is not None
@@ -82,7 +133,8 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None:
 
             # Access devbox
             devbox = run.devbox
-            assert devbox.id == run.devbox_id
+            info = devbox.get_info()
+            assert info.status == "running"
 
             # Get run info
             info = run.get_info()
@@ -94,13 +146,14 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None:
             try:
                 run.cancel()
             except Exception:
-                pass  # Best effort cleanup
+                if devbox:
+                    devbox.shutdown()
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
-    def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None:
-        """Test run_and_await_env_ready convenience method."""
+    def test_scenario_run(self, sdk_client: RunloopSDK) -> None:
+        """Test run convenience method."""
         # Find a scenario to run
-        scenarios = sdk_client.scenario.list(limit=5)
+        scenarios = sdk_client.scenario.list(limit=1)
         if not scenarios:
             pytest.skip("No scenarios available to test run")
 
@@ -108,6 +161,7 @@ def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None:
 
         # Start a run and wait for environment in one call
         run = scenario.run(run_name="sdk-smoketest-await")
+        devbox = None
 
         try:
             assert run.id is not None
@@ -123,4 +177,85 @@ def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None:
             try:
                 run.cancel()
             except Exception:
-                pass
+                if devbox:
+                    devbox.shutdown()
+
+
+class TestScenarioBuilder:
+    """Test ScenarioBuilder operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None:
+        """Test creating/updating a minimal scenario with just problem statement and scorer."""
+        builder = (
+            sdk_client.scenario.builder("sdk-smoketest-builder-minimal")
+            .with_problem_statement("Minimal test problem statement")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("minimal-scorer", command="echo 1.0")
+        )
+
+        info = push_or_update_scenario(sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-builder-minimal"
+        assert info.input_context.problem_statement == "Minimal test problem statement"
+        assert len(info.scoring_contract.scoring_function_parameters) == 1
+        assert info.scoring_contract.scoring_function_parameters[0].name == "minimal-scorer"
+
+    @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
+    def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None:
+        """Test creating/updating a scenario from a blueprint.
+        """
+        blueprint = sdk_client.blueprint.create(
+            name=unique_name("sdk-smoketest-scenario-bp"),
+            dockerfile="FROM ubuntu:20.04",
+        )
+
+        builder = (
+            sdk_client.scenario.builder("sdk-smoketest-builder-blueprint")
+            .from_blueprint(blueprint)
+            .with_working_directory("/home/user")
+            .with_problem_statement("Blueprint test problem")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("blueprint-scorer", command="echo 1.0")
+        )
+
+        info = push_or_update_scenario(sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-builder-blueprint"
+        assert info.input_context.problem_statement == "Blueprint test problem"
+        assert info.environment is not None
+        assert info.environment.blueprint_id == blueprint.id
+        assert info.environment.working_directory == "/home/user"
+
+    @pytest.mark.timeout(TEN_MINUTE_TIMEOUT)
+    def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None:
+        """Test creating/updating a scenario from a snapshot.
+        """
+        # Create blueprint -> devbox -> snapshot chain
+        blueprint = sdk_client.blueprint.create(
+            name=unique_name("sdk-smoketest-scenario-snap-bp"),
+            dockerfile="FROM ubuntu:20.04",
+        )
+        devbox = sdk_client.devbox.create(blueprint_id=blueprint.id)
+        snapshot = devbox.snapshot_disk(name=unique_name("sdk-smoketest-scenario-snap"))
+
+        # Shut down the devbox - it's not needed after creating the snapshot
+        try:
+            devbox.shutdown()
+        except Exception:
+            pass
+
+        builder = (
+            sdk_client.scenario.builder("sdk-smoketest-builder-snapshot")
+            .from_snapshot(snapshot)
+            .with_problem_statement("Snapshot test problem")
+            .with_metadata(SMOKETEST_METADATA)
+            .add_command_scorer("snapshot-scorer", command="echo 1.0")
+        )
+
+        info = push_or_update_scenario(sdk_client, builder)
+
+        assert info.name == "sdk-smoketest-builder-snapshot"
+        assert info.input_context.problem_statement == "Snapshot test problem"
+        assert info.environment is not None
+        assert info.environment.snapshot_id == snapshot.id

From c145f3b1d1a263005ad75a79cc91c196bccae327 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 16:58:07 -0800
Subject: [PATCH 10/31] update sdk smoke tests with all ops

---
 tests/smoketests/sdk/test_async_sdk.py | 5 ++++-
 tests/smoketests/sdk/test_sdk.py       | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/smoketests/sdk/test_async_sdk.py b/tests/smoketests/sdk/test_async_sdk.py
index 49f7e961d..fd8c03ca8 100644
--- a/tests/smoketests/sdk/test_async_sdk.py
+++ b/tests/smoketests/sdk/test_async_sdk.py
@@ -16,12 +16,15 @@ class TestAsyncRunloopSDKInitialization:
 
     @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT)
     async def test_sdk_instance_creation(self, async_sdk_client: AsyncRunloopSDK) -> None:
-        """Test that async SDK instance is created successfully with all client properties."""
+        """Test that async SDK instance is created successfully with all operations."""
         assert async_sdk_client is not None
         assert async_sdk_client.devbox is not None
         assert async_sdk_client.blueprint is not None
         assert async_sdk_client.snapshot is not None
         assert async_sdk_client.storage_object is not None
+        assert async_sdk_client.scorer is not None
+        assert async_sdk_client.agent is not None
+        assert async_sdk_client.scenario is not None
 
     @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT)
     async def test_legacy_api_access(self, async_sdk_client: AsyncRunloopSDK) -> None:
diff --git a/tests/smoketests/sdk/test_sdk.py b/tests/smoketests/sdk/test_sdk.py
index b55a98112..f79b88d43 100644
--- a/tests/smoketests/sdk/test_sdk.py
+++ b/tests/smoketests/sdk/test_sdk.py
@@ -16,12 +16,15 @@ class TestRunloopSDKInitialization:
 
     @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT)
     def test_sdk_instance_creation(self, sdk_client: RunloopSDK) -> None:
-        """Test that SDK instance is created successfully with all client properties."""
+        """Test that SDK instance is created successfully with all operations."""
         assert sdk_client is not None
         assert sdk_client.devbox is not None
         assert sdk_client.blueprint is not None
         assert sdk_client.snapshot is not None
         assert sdk_client.storage_object is not None
+        assert sdk_client.scorer is not None
+        assert sdk_client.agent is not None
+        assert sdk_client.scenario is not None
 
     @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT)
     def test_legacy_api_access(self, sdk_client: RunloopSDK) -> None:

From c1e993670cc301e0c6d6b954a492bec05a960d27 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 17:01:45 -0800
Subject: [PATCH 11/31] avoid modifyng _scorers internal state when normalizing
 weights (create copy instead)

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 5 ++---
 src/runloop_api_client/sdk/scenario_builder.py       | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 9f64993ff..b46fa1abe 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -401,8 +401,7 @@ def _build_params(self) -> ScenarioCreateParams:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        for s in self._scorers:
-            s["weight"] = s["weight"] / total_weight
+        normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
 
         params: ScenarioCreateParams = {
             "name": self._name,
@@ -410,7 +409,7 @@ def _build_params(self) -> ScenarioCreateParams:
                 "problem_statement": self._problem_statement,
             },
             "scoring_contract": {
-                "scoring_function_parameters": self._scorers,
+                "scoring_function_parameters": normalized_scorers,
             },
         }
 
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 295d90f24..326f8d033 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -401,8 +401,7 @@ def _build_params(self) -> ScenarioCreateParams:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        for s in self._scorers:
-            s["weight"] = s["weight"] / total_weight
+        normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
 
         params: ScenarioCreateParams = {
             "name": self._name,
@@ -410,7 +409,7 @@ def _build_params(self) -> ScenarioCreateParams:
                 "problem_statement": self._problem_statement,
             },
             "scoring_contract": {
-                "scoring_function_parameters": self._scorers,
+                "scoring_function_parameters": normalized_scorers,
             },
         }
 

From 259248233da28f380e8416a429e85ab66b79ca54 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 17:02:12 -0800
Subject: [PATCH 12/31] formatting fixes

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 4 +++-
 src/runloop_api_client/sdk/scenario_builder.py       | 4 +++-
 tests/smoketests/sdk/test_async_scenario.py          | 6 ++----
 tests/smoketests/sdk/test_scenario.py                | 6 ++----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index b46fa1abe..a04e399fd 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -401,7 +401,9 @@ def _build_params(self) -> ScenarioCreateParams:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+        normalized_scorers: List[ScoringFunctionParam] = [
+            {**s, "weight": s["weight"] / total_weight} for s in self._scorers
+        ]
 
         params: ScenarioCreateParams = {
             "name": self._name,
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 326f8d033..f76c34561 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -401,7 +401,9 @@ def _build_params(self) -> ScenarioCreateParams:
 
         # Normalize weights to sum to 1.0
         total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+        normalized_scorers: List[ScoringFunctionParam] = [
+            {**s, "weight": s["weight"] / total_weight} for s in self._scorers
+        ]
 
         params: ScenarioCreateParams = {
             "name": self._name,
diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py
index 20a23e47e..43a3b842b 100644
--- a/tests/smoketests/sdk/test_async_scenario.py
+++ b/tests/smoketests/sdk/test_async_scenario.py
@@ -204,8 +204,7 @@ async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK)
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
     async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunloopSDK) -> None:
-        """Test creating/updating a scenario from a blueprint.
-        """
+        """Test creating/updating a scenario from a blueprint."""
         blueprint = await async_sdk_client.blueprint.create(
             name=unique_name("sdk-smoketest-async-scenario-bp"),
             dockerfile="FROM ubuntu:20.04",
@@ -230,8 +229,7 @@ async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunl
 
     @pytest.mark.timeout(TEN_MINUTE_TIMEOUT)
     async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunloopSDK) -> None:
-        """Test creating/updating a scenario from a snapshot.
-        """
+        """Test creating/updating a scenario from a snapshot."""
         # Create blueprint -> devbox -> snapshot chain
         blueprint = await async_sdk_client.blueprint.create(
             name=unique_name("sdk-smoketest-async-scenario-snap-bp"),
diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py
index 0e9c82f73..e69ff3f7c 100644
--- a/tests/smoketests/sdk/test_scenario.py
+++ b/tests/smoketests/sdk/test_scenario.py
@@ -203,8 +203,7 @@ def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None:
 
     @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT)
     def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None:
-        """Test creating/updating a scenario from a blueprint.
-        """
+        """Test creating/updating a scenario from a blueprint."""
         blueprint = sdk_client.blueprint.create(
             name=unique_name("sdk-smoketest-scenario-bp"),
             dockerfile="FROM ubuntu:20.04",
@@ -229,8 +228,7 @@ def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None:
 
     @pytest.mark.timeout(TEN_MINUTE_TIMEOUT)
     def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None:
-        """Test creating/updating a scenario from a snapshot.
-        """
+        """Test creating/updating a scenario from a snapshot."""
         # Create blueprint -> devbox -> snapshot chain
         blueprint = sdk_client.blueprint.create(
             name=unique_name("sdk-smoketest-scenario-snap-bp"),

From 87501370caae5d1a31397274863cd65fcc21d4d8 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 17:45:56 -0800
Subject: [PATCH 13/31] update builder docstrings to use fluent pattern,
 replaced all references to RunloopSDK() to 'runloop' instead of 'sdk' or
 'client'

---
 README.md                                     |  4 +-
 src/runloop_api_client/sdk/async_.py          | 10 +++--
 .../sdk/async_scenario_builder.py             | 12 +++---
 .../sdk/scenario_builder.py                   | 12 +++---
 src/runloop_api_client/sdk/sync.py            | 10 +++--
 tests/sdk/test_async_ops.py                   | 34 ++++++++---------
 tests/sdk/test_ops.py                         | 38 +++++++++----------
 tests/smoketests/sdk/conftest.py              | 14 +++----
 8 files changed, 71 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md
index 180f822bc..14e808a10 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,10 @@ For a higher-level, Pythonic interface, check out the new [`RunloopSDK`](README-
 ```python
 from runloop_api_client import RunloopSDK
 
-sdk = RunloopSDK()  # Uses RUNLOOP_API_KEY environment variable by default
+runloop = RunloopSDK()  # Uses RUNLOOP_API_KEY environment variable by default
 
 # Create a devbox and execute commands with a clean, object-oriented interface
-with sdk.devbox.create(name="my-devbox") as devbox:
+with runloop.devbox.create(name="my-devbox") as devbox:
     result = devbox.cmd.exec("echo 'Hello from Runloop!'")
     print(result.stdout())
 ```
diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index a3ef1ffe5..558af33ba 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -776,10 +776,12 @@ class AsyncScenarioOps:
         >>> scenarios = await runloop.scenario.list()
 
     Example using builder:
-        >>> builder = runloop.scenario.builder("my-scenario")
-        >>> builder.from_blueprint_id("bp-xxx")
-        >>> builder.with_problem_statement("Fix the bug")
-        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> builder = (
+        ...     runloop.scenario.builder("my-scenario")
+        ...     .from_blueprint(blueprint)
+        ...     .with_problem_statement("Fix the bug")
+        ...     .add_test_scorer("tests", test_command="pytest")
+        ... )
         >>> scenario = await builder.push()
     """
 
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index a04e399fd..d83210a8e 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -31,11 +31,13 @@ class AsyncScenarioBuilder:
     before pushing it to the platform.
 
     Example:
-        >>> builder = sdk.scenario.builder("my-scenario")
-        >>> builder.from_blueprint(blueprint)
-        >>> builder.with_working_directory("/app")
-        >>> builder.with_problem_statement("Fix the bug in main.py")
-        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> builder = (
+        ...     runloop.scenario.builder("my-scenario")
+        ...     .from_blueprint(blueprint)
+        ...     .with_working_directory("/app")
+        ...     .with_problem_statement("Fix the bug in main.py")
+        ...     .add_test_scorer("tests", test_command="pytest")
+        ... )
         >>> scenario = await builder.push()
     """
 
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index f76c34561..3c17b8e31 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -31,11 +31,13 @@ class ScenarioBuilder:
     before pushing it to the platform.
 
     Example:
-        >>> builder = sdk.scenario.builder("my-scenario")
-        >>> builder.from_blueprint(blueprint)
-        >>> builder.with_working_directory("/app")
-        >>> builder.with_problem_statement("Fix the bug in main.py")
-        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> builder = (
+        ...     runloop.scenario.builder("my-scenario")
+        ...     .from_blueprint(blueprint)
+        ...     .with_working_directory("/app")
+        ...     .with_problem_statement("Fix the bug in main.py")
+        ...     .add_test_scorer("tests", test_command="pytest")
+        ... )
         >>> scenario = builder.push()
     """
 
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 151ce5c73..1252710c4 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -797,10 +797,12 @@ class ScenarioOps:
         >>> scenarios = runloop.scenario.list()
 
     Example using builder:
-        >>> builder = runloop.scenario.builder("my-scenario")
-        >>> builder.from_blueprint_id("bp-xxx")
-        >>> builder.with_problem_statement("Fix the bug")
-        >>> builder.add_test_scorer("tests", test_command="pytest")
+        >>> builder = (
+        ...     runloop.scenario.builder("my-scenario")
+        ...     .from_blueprint(blueprint)
+        ...     .with_problem_statement("Fix the bug")
+        ...     .add_test_scorer("tests", test_command="pytest")
+        ... )
         >>> scenario = builder.push()
     """
 
diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py
index 49c4c4006..9fca5bb4b 100644
--- a/tests/sdk/test_async_ops.py
+++ b/tests/sdk/test_async_ops.py
@@ -1193,33 +1193,33 @@ class TestAsyncRunloopSDK:
 
     def test_init(self) -> None:
         """Test AsyncRunloopSDK initialization."""
-        sdk = AsyncRunloopSDK(bearer_token="test-token")
-        assert sdk.api is not None
-        assert isinstance(sdk.agent, AsyncAgentOps)
-        assert isinstance(sdk.devbox, AsyncDevboxOps)
-        assert isinstance(sdk.scorer, AsyncScorerOps)
-        assert isinstance(sdk.snapshot, AsyncSnapshotOps)
-        assert isinstance(sdk.blueprint, AsyncBlueprintOps)
-        assert isinstance(sdk.storage_object, AsyncStorageObjectOps)
+        runloop = AsyncRunloopSDK(bearer_token="test-token")
+        assert runloop.api is not None
+        assert isinstance(runloop.agent, AsyncAgentOps)
+        assert isinstance(runloop.devbox, AsyncDevboxOps)
+        assert isinstance(runloop.scorer, AsyncScorerOps)
+        assert isinstance(runloop.snapshot, AsyncSnapshotOps)
+        assert isinstance(runloop.blueprint, AsyncBlueprintOps)
+        assert isinstance(runloop.storage_object, AsyncStorageObjectOps)
 
     @pytest.mark.asyncio
     async def test_aclose(self) -> None:
         """Test aclose method."""
-        sdk = AsyncRunloopSDK(bearer_token="test-token")
+        runloop = AsyncRunloopSDK(bearer_token="test-token")
         # Verify aclose doesn't raise
-        await sdk.aclose()
+        await runloop.aclose()
 
     @pytest.mark.asyncio
     async def test_context_manager(self) -> None:
         """Test context manager behavior."""
-        async with AsyncRunloopSDK(bearer_token="test-token") as sdk:
-            assert sdk.api is not None
+        async with AsyncRunloopSDK(bearer_token="test-token") as runloop:
+            assert runloop.api is not None
         # Verify context manager properly closes (implementation detail of context manager protocol)
 
     def test_api_property(self) -> None:
         """Test api property access."""
-        sdk = AsyncRunloopSDK(bearer_token="test-token")
-        assert sdk.api is not None
-        assert hasattr(sdk.api, "devboxes")
-        assert hasattr(sdk.api, "blueprints")
-        assert hasattr(sdk.api, "objects")
+        runloop = AsyncRunloopSDK(bearer_token="test-token")
+        assert runloop.api is not None
+        assert hasattr(runloop.api, "devboxes")
+        assert hasattr(runloop.api, "blueprints")
+        assert hasattr(runloop.api, "objects")
diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py
index 724b2ee17..f7a566265 100644
--- a/tests/sdk/test_ops.py
+++ b/tests/sdk/test_ops.py
@@ -1070,36 +1070,36 @@ class TestRunloopSDK:
 
     def test_init(self) -> None:
         """Test RunloopSDK initialization."""
-        sdk = RunloopSDK(bearer_token="test-token")
-        assert sdk.api is not None
-        assert isinstance(sdk.agent, AgentOps)
-        assert isinstance(sdk.devbox, DevboxOps)
-        assert isinstance(sdk.scorer, ScorerOps)
-        assert isinstance(sdk.snapshot, SnapshotOps)
-        assert isinstance(sdk.blueprint, BlueprintOps)
-        assert isinstance(sdk.storage_object, StorageObjectOps)
+        runloop = RunloopSDK(bearer_token="test-token")
+        assert runloop.api is not None
+        assert isinstance(runloop.agent, AgentOps)
+        assert isinstance(runloop.devbox, DevboxOps)
+        assert isinstance(runloop.scorer, ScorerOps)
+        assert isinstance(runloop.snapshot, SnapshotOps)
+        assert isinstance(runloop.blueprint, BlueprintOps)
+        assert isinstance(runloop.storage_object, StorageObjectOps)
 
     def test_init_with_max_retries(self) -> None:
         """Test RunloopSDK initialization with max_retries."""
-        sdk = RunloopSDK(bearer_token="test-token", max_retries=3)
-        assert sdk.api is not None
+        runloop = RunloopSDK(bearer_token="test-token", max_retries=3)
+        assert runloop.api is not None
 
     def test_close(self) -> None:
         """Test close method."""
-        sdk = RunloopSDK(bearer_token="test-token")
+        runloop = RunloopSDK(bearer_token="test-token")
         # Verify close doesn't raise
-        sdk.close()
+        runloop.close()
 
     def test_context_manager(self) -> None:
         """Test context manager behavior."""
-        with RunloopSDK(bearer_token="test-token") as sdk:
-            assert sdk.api is not None
+        with RunloopSDK(bearer_token="test-token") as runloop:
+            assert runloop.api is not None
         # Verify context manager properly closes (implementation detail of context manager protocol)
 
     def test_api_property(self) -> None:
         """Test api property access."""
-        sdk = RunloopSDK(bearer_token="test-token")
-        assert sdk.api is not None
-        assert hasattr(sdk.api, "devboxes")
-        assert hasattr(sdk.api, "blueprints")
-        assert hasattr(sdk.api, "objects")
+        runloop = RunloopSDK(bearer_token="test-token")
+        assert runloop.api is not None
+        assert hasattr(runloop.api, "devboxes")
+        assert hasattr(runloop.api, "blueprints")
+        assert hasattr(runloop.api, "objects")
diff --git a/tests/smoketests/sdk/conftest.py b/tests/smoketests/sdk/conftest.py
index 003b0f314..b17a4cc1c 100644
--- a/tests/smoketests/sdk/conftest.py
+++ b/tests/smoketests/sdk/conftest.py
@@ -24,16 +24,16 @@ def sdk_client() -> Iterator[RunloopSDK]:
     if not bearer_token:
         pytest.skip("RUNLOOP_API_KEY environment variable not set")
 
-    client = RunloopSDK(
+    runloop = RunloopSDK(
         bearer_token=bearer_token,
         base_url=base_url,
     )
 
     try:
-        yield client
+        yield runloop
     finally:
         try:
-            client.close()
+            runloop.close()
         except Exception:
             pass
 
@@ -52,17 +52,17 @@ async def async_sdk_client() -> AsyncIterator[AsyncRunloopSDK]:
     if not bearer_token:
         pytest.skip("RUNLOOP_API_KEY environment variable not set")
 
-    client = AsyncRunloopSDK(
+    runloop = AsyncRunloopSDK(
         bearer_token=bearer_token,
         base_url=base_url,
     )
 
     try:
-        async with client:
-            yield client
+        async with runloop:
+            yield runloop
     except Exception:
         # If context manager fails, try manual cleanup
         try:
-            await client.aclose()
+            await runloop.aclose()
         except Exception:
             pass

From b2084a9beda3a0579df5522eece67056448f8cc1 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 17:59:35 -0800
Subject: [PATCH 14/31] clarify from_blueprint and from_snapshot docstrings

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++--
 src/runloop_api_client/sdk/scenario_builder.py       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index d83210a8e..4138cbb32 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -85,7 +85,7 @@ def name(self) -> str:
         return self._name
 
     def from_blueprint(self, blueprint: AsyncBlueprint) -> Self:
-        """Set the blueprint for the scenario environment.
+        """Set a blueprint to define the baseline environment for the scenario.
 
         :param blueprint: Blueprint to use
         :type blueprint: AsyncBlueprint
@@ -97,7 +97,7 @@ def from_blueprint(self, blueprint: AsyncBlueprint) -> Self:
         return self
 
     def from_snapshot(self, snapshot: AsyncSnapshot) -> Self:
-        """Set the snapshot for the scenario environment.
+        """Set a snapshot to define the baseline environment for the scenario.
 
         :param snapshot: Snapshot to use
         :type snapshot: AsyncSnapshot
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 3c17b8e31..17951f280 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -85,7 +85,7 @@ def name(self) -> str:
         return self._name
 
     def from_blueprint(self, blueprint: Blueprint) -> Self:
-        """Set the blueprint for the scenario environment.
+        """Set a blueprint to define the baseline environment for the scenario.
 
         :param blueprint: Blueprint to use
         :type blueprint: Blueprint
@@ -97,7 +97,7 @@ def from_blueprint(self, blueprint: Blueprint) -> Self:
         return self
 
     def from_snapshot(self, snapshot: Snapshot) -> Self:
-        """Set the snapshot for the scenario environment.
+        """Set a snapshot to define the baseline environment for the scenario.
 
         :param snapshot: Snapshot to use
         :type snapshot: Snapshot

From 6132ca8b922d26483236b6b1cc0becf68dcbcf2d Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:06:36 -0800
Subject: [PATCH 15/31] rename add_scorer methods to be more clear

---
 .../sdk/async_scenario_builder.py             |  8 +++---
 .../sdk/scenario_builder.py                   |  8 +++---
 tests/sdk/test_async_scenario_builder.py      | 26 +++++++++----------
 tests/sdk/test_scenario_builder.py            | 26 +++++++++----------
 tests/smoketests/sdk/test_async_scenario.py   |  6 ++---
 tests/smoketests/sdk/test_scenario.py         |  6 ++---
 6 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 4138cbb32..5bc04eb0a 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -151,7 +151,7 @@ def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self:
         self._scorers.append({"name": name, "weight": weight, "scorer": scorer})
         return self
 
-    def add_test_scorer(
+    def add_test_command_scorer(
         self,
         name: str,
         *,
@@ -180,7 +180,7 @@ def add_test_scorer(
             scorer["test_files"] = test_files
         return self._add_scorer(name, weight, scorer)
 
-    def add_command_scorer(
+    def add_shell_command_scorer(
         self,
         name: str,
         *,
@@ -204,7 +204,7 @@ def add_command_scorer(
         }
         return self._add_scorer(name, weight, scorer)
 
-    def add_bash_scorer(
+    def add_bash_script_scorer(
         self,
         name: str,
         *,
@@ -230,7 +230,7 @@ def add_bash_scorer(
         }
         return self._add_scorer(name, weight, scorer)
 
-    def add_python_scorer(
+    def add_python_script_scorer(
         self,
         name: str,
         *,
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 17951f280..0ac06e31a 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -151,7 +151,7 @@ def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self:
         self._scorers.append({"name": name, "weight": weight, "scorer": scorer})
         return self
 
-    def add_test_scorer(
+    def add_test_command_scorer(
         self,
         name: str,
         *,
@@ -180,7 +180,7 @@ def add_test_scorer(
             scorer["test_files"] = test_files
         return self._add_scorer(name, weight, scorer)
 
-    def add_command_scorer(
+    def add_shell_command_scorer(
         self,
         name: str,
         *,
@@ -204,7 +204,7 @@ def add_command_scorer(
         }
         return self._add_scorer(name, weight, scorer)
 
-    def add_bash_scorer(
+    def add_bash_script_scorer(
         self,
         name: str,
         *,
@@ -230,7 +230,7 @@ def add_bash_scorer(
         }
         return self._add_scorer(name, weight, scorer)
 
-    def add_python_scorer(
+    def add_python_script_scorer(
         self,
         name: str,
         *,
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 792b7c6b6..236aab520 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -74,7 +74,7 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
+        result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
         assert result is builder
         assert builder._scorers[0]["name"] == "test-scorer"
         assert builder._scorers[0]["weight"] == 2.0
@@ -83,17 +83,17 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
         assert builder._scorers[0]["scorer"].get("test_files") == test_files
 
         # Command scorer
-        builder.add_command_scorer("cmd-scorer", command="./check.sh")
+        builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
         assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
         assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
 
         # Bash scorer
-        builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
         assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
         assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
 
         # Python scorer with optional params
-        builder.add_python_scorer(
+        builder.add_python_script_scorer(
             "python-scorer",
             python_script="print('1.0')",
             python_version_constraint=">=3.10",
@@ -121,15 +121,15 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
     def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None:
         """Test that adding a scorer with zero or negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0)
+            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
 
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0)
+            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
     def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None:
         """Test _build_params raises for missing required fields."""
         # Missing problem statement
-        builder.add_test_scorer("test", test_command="pytest")
+        builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
             builder._build_params()
 
@@ -143,7 +143,7 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock
         """Test _build_params with all optional fields set."""
         builder.with_problem_statement("Fix the bug")
         builder.with_additional_context({"hint": "line 42"})
-        builder.add_test_scorer("tests", test_command="pytest")
+        builder.add_test_command_scorer("tests", test_command="pytest")
         builder.from_blueprint(mock_blueprint)
         builder.with_working_directory("/app")
         builder.with_metadata({"team": "infra"})
@@ -168,9 +168,9 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock
     def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
         builder.with_problem_statement("Fix the bug")
-        builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0)
-        builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0)
-        builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0)
+        builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
         params = builder._build_params()
         scorers = params["scoring_contract"]["scoring_function_parameters"]
@@ -193,7 +193,7 @@ async def test_push_calls_api_and_returns_scenario(
         mock_async_client.scenarios.create.return_value.id = "scn-new-123"
 
         builder.with_problem_statement("Fix the bug")
-        builder.add_test_scorer("tests", test_command="pytest")
+        builder.add_test_command_scorer("tests", test_command="pytest")
 
         scenario = await builder.push()
 
@@ -211,7 +211,7 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: As
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
-            .add_test_scorer("tests", test_command="pytest")
+            .add_test_command_scorer("tests", test_command="pytest")
             .with_metadata({"team": "infra"})
             .with_reference_output("diff content")
             .with_required_env_vars(["API_KEY"])
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index d9d90b67f..922fd6020 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -72,7 +72,7 @@ def test_scorers(self, builder: ScenarioBuilder) -> None:
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
+        result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
         assert result is builder
         assert builder._scorers[0]["name"] == "test-scorer"
         assert builder._scorers[0]["weight"] == 2.0
@@ -81,17 +81,17 @@ def test_scorers(self, builder: ScenarioBuilder) -> None:
         assert builder._scorers[0]["scorer"].get("test_files") == test_files
 
         # Command scorer
-        builder.add_command_scorer("cmd-scorer", command="./check.sh")
+        builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
         assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
         assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
 
         # Bash scorer
-        builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
         assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
         assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
 
         # Python scorer with optional params
-        builder.add_python_scorer(
+        builder.add_python_script_scorer(
             "python-scorer",
             python_script="print('1.0')",
             python_version_constraint=">=3.10",
@@ -119,15 +119,15 @@ def test_scorers(self, builder: ScenarioBuilder) -> None:
     def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None:
         """Test that adding a scorer with zero or negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0)
+            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
 
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0)
+            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
     def test_build_params_validation(self, builder: ScenarioBuilder) -> None:
         """Test _build_params raises for missing required fields."""
         # Missing problem statement
-        builder.add_test_scorer("test", test_command="pytest")
+        builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
             builder._build_params()
 
@@ -141,7 +141,7 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue
         """Test _build_params with all optional fields set."""
         builder.with_problem_statement("Fix the bug")
         builder.with_additional_context({"hint": "line 42"})
-        builder.add_test_scorer("tests", test_command="pytest")
+        builder.add_test_command_scorer("tests", test_command="pytest")
         builder.from_blueprint(mock_blueprint)
         builder.with_working_directory("/app")
         builder.with_metadata({"team": "infra"})
@@ -166,9 +166,9 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue
     def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
         builder.with_problem_statement("Fix the bug")
-        builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0)
-        builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0)
-        builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0)
+        builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
         params = builder._build_params()
         scorers = params["scoring_contract"]["scoring_function_parameters"]
@@ -188,7 +188,7 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc
         mock_client.scenarios.create.return_value.id = "scn-new-123"
 
         builder.with_problem_statement("Fix the bug")
-        builder.add_test_scorer("tests", test_command="pytest")
+        builder.add_test_command_scorer("tests", test_command="pytest")
 
         scenario = builder.push()
 
@@ -206,7 +206,7 @@ def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Bluepri
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
-            .add_test_scorer("tests", test_command="pytest")
+            .add_test_command_scorer("tests", test_command="pytest")
             .with_metadata({"team": "infra"})
             .with_reference_output("diff content")
             .with_required_env_vars(["API_KEY"])
diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py
index 43a3b842b..32359f957 100644
--- a/tests/smoketests/sdk/test_async_scenario.py
+++ b/tests/smoketests/sdk/test_async_scenario.py
@@ -192,7 +192,7 @@ async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK)
             async_sdk_client.scenario.builder("sdk-smoketest-async-builder-minimal")
             .with_problem_statement("Async minimal test problem statement")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("async-minimal-scorer", command="echo 1.0")
+            .add_shell_command_scorer("async-minimal-scorer", command="echo 1.0")
         )
 
         info = await push_or_update_scenario(async_sdk_client, builder)
@@ -216,7 +216,7 @@ async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunl
             .with_working_directory("/home/user")
             .with_problem_statement("Async blueprint test problem")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("async-blueprint-scorer", command="echo 1.0")
+            .add_shell_command_scorer("async-blueprint-scorer", command="echo 1.0")
         )
 
         info = await push_or_update_scenario(async_sdk_client, builder)
@@ -249,7 +249,7 @@ async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunlo
             .from_snapshot(snapshot)
             .with_problem_statement("Async snapshot test problem")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("async-snapshot-scorer", command="echo 1.0")
+            .add_shell_command_scorer("async-snapshot-scorer", command="echo 1.0")
         )
 
         info = await push_or_update_scenario(async_sdk_client, builder)
diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py
index e69ff3f7c..0b23d6b24 100644
--- a/tests/smoketests/sdk/test_scenario.py
+++ b/tests/smoketests/sdk/test_scenario.py
@@ -191,7 +191,7 @@ def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None:
             sdk_client.scenario.builder("sdk-smoketest-builder-minimal")
             .with_problem_statement("Minimal test problem statement")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("minimal-scorer", command="echo 1.0")
+            .add_shell_command_scorer("minimal-scorer", command="echo 1.0")
         )
 
         info = push_or_update_scenario(sdk_client, builder)
@@ -215,7 +215,7 @@ def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None:
             .with_working_directory("/home/user")
             .with_problem_statement("Blueprint test problem")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("blueprint-scorer", command="echo 1.0")
+            .add_shell_command_scorer("blueprint-scorer", command="echo 1.0")
         )
 
         info = push_or_update_scenario(sdk_client, builder)
@@ -248,7 +248,7 @@ def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None:
             .from_snapshot(snapshot)
             .with_problem_statement("Snapshot test problem")
             .with_metadata(SMOKETEST_METADATA)
-            .add_command_scorer("snapshot-scorer", command="echo 1.0")
+            .add_shell_command_scorer("snapshot-scorer", command="echo 1.0")
         )
 
         info = push_or_update_scenario(sdk_client, builder)

From e8d024d9a91cd154a3f4fe2a7ae50d4a0851bf83 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:07:11 -0800
Subject: [PATCH 16/31] format fix

---
 tests/sdk/test_async_scenario_builder.py | 4 +++-
 tests/sdk/test_scenario_builder.py       | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 236aab520..f27425c35 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -74,7 +74,9 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
+        result = builder.add_test_command_scorer(
+            "test-scorer", test_command="pytest", weight=2.0, test_files=test_files
+        )
         assert result is builder
         assert builder._scorers[0]["name"] == "test-scorer"
         assert builder._scorers[0]["weight"] == 2.0
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 922fd6020..0e65d84de 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -72,7 +72,9 @@ def test_scorers(self, builder: ScenarioBuilder) -> None:
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files)
+        result = builder.add_test_command_scorer(
+            "test-scorer", test_command="pytest", weight=2.0, test_files=test_files
+        )
         assert result is builder
         assert builder._scorers[0]["name"] == "test-scorer"
         assert builder._scorers[0]["weight"] == 2.0

From a70af1b269e93a878ee83a7d8f125f57f6558f70 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:16:06 -0800
Subject: [PATCH 17/31] address type check errors in scenario builder unit
 tests

---
 tests/sdk/test_async_scenario_builder.py | 20 +++++++++++---------
 tests/sdk/test_scenario_builder.py       | 20 +++++++++++---------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index f27425c35..3491fff66 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -158,14 +158,16 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
-        assert params["input_context"]["additional_context"] == {"hint": "line 42"}
-        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
-        assert params["environment_parameters"]["working_directory"] == "/app"
-        assert params["metadata"] == {"team": "infra"}
-        assert params["reference_output"] == "diff content"
-        assert params["required_environment_variables"] == ["API_KEY"]
-        assert params["required_secret_names"] == ["db_pass"]
-        assert params["validation_type"] == "FORWARD"
+        assert params["input_context"].get("additional_context") == {"hint": "line 42"}
+        env_params = params.get("environment_parameters")
+        assert env_params is not None
+        assert env_params.get("blueprint_id") == "bp-123"
+        assert env_params.get("working_directory") == "/app"
+        assert params.get("metadata") == {"team": "infra"}
+        assert params.get("reference_output") == "diff content"
+        assert params.get("required_environment_variables") == ["API_KEY"]
+        assert params.get("required_secret_names") == ["db_pass"]
+        assert params.get("validation_type") == "FORWARD"
 
     def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
@@ -175,7 +177,7 @@ def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) ->
         builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
         params = builder._build_params()
-        scorers = params["scoring_contract"]["scoring_function_parameters"]
+        scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
         assert len(scorers) == 3
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 0e65d84de..d9aee9fb1 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -156,14 +156,16 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
-        assert params["input_context"]["additional_context"] == {"hint": "line 42"}
-        assert params["environment_parameters"]["blueprint_id"] == "bp-123"
-        assert params["environment_parameters"]["working_directory"] == "/app"
-        assert params["metadata"] == {"team": "infra"}
-        assert params["reference_output"] == "diff content"
-        assert params["required_environment_variables"] == ["API_KEY"]
-        assert params["required_secret_names"] == ["db_pass"]
-        assert params["validation_type"] == "FORWARD"
+        assert params["input_context"].get("additional_context") == {"hint": "line 42"}
+        env_params = params.get("environment_parameters")
+        assert env_params is not None
+        assert env_params.get("blueprint_id") == "bp-123"
+        assert env_params.get("working_directory") == "/app"
+        assert params.get("metadata") == {"team": "infra"}
+        assert params.get("reference_output") == "diff content"
+        assert params.get("required_environment_variables") == ["API_KEY"]
+        assert params.get("required_secret_names") == ["db_pass"]
+        assert params.get("validation_type") == "FORWARD"
 
     def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
@@ -173,7 +175,7 @@ def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None
         builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
         params = builder._build_params()
-        scorers = params["scoring_contract"]["scoring_function_parameters"]
+        scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
         assert len(scorers) == 3

From 1242c7f57a0dbb9f4435310a404579b7fe5b8a6e Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:25:14 -0800
Subject: [PATCH 18/31] rename add_scorer methods in docstrings

---
 src/runloop_api_client/sdk/async_.py                 | 2 +-
 src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++--
 src/runloop_api_client/sdk/scenario_builder.py       | 4 ++--
 src/runloop_api_client/sdk/sync.py                   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index 558af33ba..a75185594 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -780,7 +780,7 @@ class AsyncScenarioOps:
         ...     runloop.scenario.builder("my-scenario")
         ...     .from_blueprint(blueprint)
         ...     .with_problem_statement("Fix the bug")
-        ...     .add_test_scorer("tests", test_command="pytest")
+        ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> scenario = await builder.push()
     """
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 5bc04eb0a..b785422ab 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -36,7 +36,7 @@ class AsyncScenarioBuilder:
         ...     .from_blueprint(blueprint)
         ...     .with_working_directory("/app")
         ...     .with_problem_statement("Fix the bug in main.py")
-        ...     .add_test_scorer("tests", test_command="pytest")
+        ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> scenario = await builder.push()
     """
@@ -398,7 +398,7 @@ def _build_params(self) -> ScenarioCreateParams:
         if not self._scorers:
             raise ValueError(
                 "At least one scorer is required. "
-                "Call add_test_scorer(), add_bash_scorer(), or another scorer method first."
+                "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first."
             )
 
         # Normalize weights to sum to 1.0
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 0ac06e31a..86527a690 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -36,7 +36,7 @@ class ScenarioBuilder:
         ...     .from_blueprint(blueprint)
         ...     .with_working_directory("/app")
         ...     .with_problem_statement("Fix the bug in main.py")
-        ...     .add_test_scorer("tests", test_command="pytest")
+        ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> scenario = builder.push()
     """
@@ -398,7 +398,7 @@ def _build_params(self) -> ScenarioCreateParams:
         if not self._scorers:
             raise ValueError(
                 "At least one scorer is required. "
-                "Call add_test_scorer(), add_bash_scorer(), or another scorer method first."
+                "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first."
             )
 
         # Normalize weights to sum to 1.0
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 1252710c4..008453236 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -801,7 +801,7 @@ class ScenarioOps:
         ...     runloop.scenario.builder("my-scenario")
         ...     .from_blueprint(blueprint)
         ...     .with_problem_statement("Fix the bug")
-        ...     .add_test_scorer("tests", test_command="pytest")
+        ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> scenario = builder.push()
     """

From cdb28a1933a5cd04dcc061475ad3580b27e1c134 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:28:36 -0800
Subject: [PATCH 19/31] make sure it is clear that score is 0.0-1.0 inclusive

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++--
 src/runloop_api_client/sdk/async_scorer.py           | 2 +-
 src/runloop_api_client/sdk/scenario_builder.py       | 4 ++--
 src/runloop_api_client/sdk/scorer.py                 | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index b785422ab..da436d36c 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -213,7 +213,7 @@ def add_bash_script_scorer(
     ) -> Self:
         """Add a bash script scorer.
 
-        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0.
+        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive.
 
         :param name: Name of the scoring function
         :type name: str
@@ -241,7 +241,7 @@ def add_python_script_scorer(
     ) -> Self:
         """Add a Python script scorer.
 
-        The script should print the score (0.0-1.0) to stdout.
+        The script should print the score in the range [0.0, 1.0] to stdout.
 
         :param name: Name of the scoring function
         :type name: str
diff --git a/src/runloop_api_client/sdk/async_scorer.py b/src/runloop_api_client/sdk/async_scorer.py
index 3df4fb4e0..91ced0c38 100644
--- a/src/runloop_api_client/sdk/async_scorer.py
+++ b/src/runloop_api_client/sdk/async_scorer.py
@@ -16,7 +16,7 @@
 class AsyncScorer:
     """A custom scorer for evaluating scenario outputs (async).
 
-    Scorers define bash scripts that produce a score (0.0-1.0) for scenario runs.
+    Scorers define bash scripts that produce a score in the range [0.0, 1.0] for scenario runs.
     Obtain instances via ``runloop.scorer.create()`` or ``runloop.scorer.from_id()``.
 
     Example:
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 86527a690..0ac6139ea 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -213,7 +213,7 @@ def add_bash_script_scorer(
     ) -> Self:
         """Add a bash script scorer.
 
-        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0.
+        The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive.
 
         :param name: Name of the scoring function
         :type name: str
@@ -241,7 +241,7 @@ def add_python_script_scorer(
     ) -> Self:
         """Add a Python script scorer.
 
-        The script should print the score (0.0-1.0) to stdout.
+        The script should print the score in the range [0.0, 1.0] to stdout.
 
         :param name: Name of the scoring function
         :type name: str
diff --git a/src/runloop_api_client/sdk/scorer.py b/src/runloop_api_client/sdk/scorer.py
index a25bb44a8..8df57ac05 100644
--- a/src/runloop_api_client/sdk/scorer.py
+++ b/src/runloop_api_client/sdk/scorer.py
@@ -16,7 +16,7 @@
 class Scorer:
     """A custom scorer for evaluating scenario outputs.
 
-    Scorers define bash scripts that produce a score (0.0-1.0) for scenario runs.
+    Scorers define bash scripts that produce a score in the range [0.0, 1.0] for scenario runs.
     Obtain instances via ``runloop.scorer.create()`` or ``runloop.scorer.from_id()``.
 
     Example:

From 1727c2e567469a62295876fdbe5c0a1072c42e08 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:44:55 -0800
Subject: [PATCH 20/31] update script scorer docstrings

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 7 +++++--
 src/runloop_api_client/sdk/scenario_builder.py       | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index da436d36c..3bb9f19cf 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -211,7 +211,7 @@ def add_bash_script_scorer(
         bash_script: str,
         weight: float = 1.0,
     ) -> Self:
-        """Add a bash script scorer.
+        """Add a standalone bash script scorer.
 
         The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive.
 
@@ -239,7 +239,10 @@ def add_python_script_scorer(
         python_version_constraint: Optional[str] = None,
         requirements_contents: Optional[str] = None,
     ) -> Self:
-        """Add a Python script scorer.
+        """Add a standalone Python script scorer.
+        
+        The script is run in an isolated uv environment, and the dependencies are declared in the
+        `uv script header <https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies>`__.
 
         The script should print the score in the range [0.0, 1.0] to stdout.
 
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 0ac6139ea..8dbd0b8ab 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -211,7 +211,7 @@ def add_bash_script_scorer(
         bash_script: str,
         weight: float = 1.0,
     ) -> Self:
-        """Add a bash script scorer.
+        """Add a standalone bash script scorer.
 
         The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive.
 
@@ -239,7 +239,10 @@ def add_python_script_scorer(
         python_version_constraint: Optional[str] = None,
         requirements_contents: Optional[str] = None,
     ) -> Self:
-        """Add a Python script scorer.
+        """Add a standalone Python script scorer.
+        
+        The script is run in an isolated uv environment, and the dependencies are declared in the
+        `uv script header <https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies>`__.
 
         The script should print the score in the range [0.0, 1.0] to stdout.
 

From 30c4497331e74e1e10e83335202f08a1068fd710 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:45:10 -0800
Subject: [PATCH 21/31] formatting

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 2 +-
 src/runloop_api_client/sdk/scenario_builder.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 3bb9f19cf..ecae369e7 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -240,7 +240,7 @@ def add_python_script_scorer(
         requirements_contents: Optional[str] = None,
     ) -> Self:
         """Add a standalone Python script scorer.
-        
+
         The script is run in an isolated uv environment, and the dependencies are declared in the
         `uv script header <https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies>`__.
 
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 8dbd0b8ab..bd5bb52ab 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -240,7 +240,7 @@ def add_python_script_scorer(
         requirements_contents: Optional[str] = None,
     ) -> Self:
         """Add a standalone Python script scorer.
-        
+
         The script is run in an isolated uv environment, and the dependencies are declared in the
         `uv script header <https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies>`__.
 

From 797c6fd3fb951aadb8b9544176747d5253a34830 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 18:52:50 -0800
Subject: [PATCH 22/31] clarify reference solution/gold patch terminology and
 validation strategy

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 7 ++++---
 src/runloop_api_client/sdk/scenario_builder.py       | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index ecae369e7..03e6fdeb7 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -343,9 +343,10 @@ def with_metadata(self, metadata: Dict[str, str]) -> Self:
         return self
 
     def with_reference_output(self, output: str) -> Self:
-        """Set the reference output/solution for validation.
+        """Set the reference solution or gold patch for validation.
+        After application, the scorer is expected to return a score of 1.0.
 
-        :param output: Reference output (e.g., git diff)
+        :param output: Reference solution or gold patch (e.g., git diff)
         :type output: str
         :return: Self for method chaining
         :rtype: Self
@@ -376,7 +377,7 @@ def with_required_secrets(self, secrets: List[str]) -> Self:
         return self
 
     def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self:
-        """Set the validation strategy.
+        """Set the validation strategy to specify how the reference solution or gold patch is applied to the scenario.
 
         :param validation_type: Validation type
         :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index bd5bb52ab..ab25580bd 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -343,9 +343,10 @@ def with_metadata(self, metadata: Dict[str, str]) -> Self:
         return self
 
     def with_reference_output(self, output: str) -> Self:
-        """Set the reference output/solution for validation.
+        """Set the reference solution or gold patch for validation.
+        After application, the scorer is expected to return a score of 1.0.
 
-        :param output: Reference output (e.g., git diff)
+        :param output: Reference solution or gold patch (e.g., git diff)
         :type output: str
         :return: Self for method chaining
         :rtype: Self
@@ -376,7 +377,7 @@ def with_required_secrets(self, secrets: List[str]) -> Self:
         return self
 
     def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self:
-        """Set the validation strategy.
+        """Set the validation strategy to specify how the reference solution or gold patch is applied to the scenario.
 
         :param validation_type: Validation type
         :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]

From a099739f852d42de06f8f9ac95ec6faf172f23b1 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Tue, 9 Dec 2025 19:17:25 -0800
Subject: [PATCH 23/31] make name first argument passed to scenario builder

---
 src/runloop_api_client/sdk/async_.py                 | 2 +-
 src/runloop_api_client/sdk/async_scenario_builder.py | 6 +++---
 src/runloop_api_client/sdk/scenario_builder.py       | 6 +++---
 src/runloop_api_client/sdk/sync.py                   | 2 +-
 tests/sdk/test_async_scenario_builder.py             | 6 +++---
 tests/sdk/test_scenario_builder.py                   | 6 +++---
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index a75185594..f121a60be 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -801,7 +801,7 @@ def builder(self, name: str) -> AsyncScenarioBuilder:
         :return: A new AsyncScenarioBuilder instance
         :rtype: AsyncScenarioBuilder
         """
-        return AsyncScenarioBuilder(self._client, name)
+        return AsyncScenarioBuilder(name, self._client)
 
     def from_id(self, scenario_id: str) -> AsyncScenario:
         """Get an AsyncScenario instance for an existing scenario ID.
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 03e6fdeb7..4edad16f3 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -41,13 +41,13 @@ class AsyncScenarioBuilder:
         >>> scenario = await builder.push()
     """
 
-    def __init__(self, client: AsyncRunloop, name: str) -> None:
+    def __init__(self, name: str, client: AsyncRunloop) -> None:
         """Initialize the builder.
 
-        :param client: AsyncRunloop client instance
-        :type client: AsyncRunloop
         :param name: Name for the scenario
         :type name: str
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
         """
         self._client = client
         self._name = name
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index ab25580bd..b35740bcc 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -41,13 +41,13 @@ class ScenarioBuilder:
         >>> scenario = builder.push()
     """
 
-    def __init__(self, client: Runloop, name: str) -> None:
+    def __init__(self, name: str, client: Runloop) -> None:
         """Initialize the builder.
 
-        :param client: Runloop client instance
-        :type client: Runloop
         :param name: Name for the scenario
         :type name: str
+        :param client: Runloop client instance
+        :type client: Runloop
         """
         self._client = client
         self._name = name
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 008453236..22df39fe6 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -822,7 +822,7 @@ def builder(self, name: str) -> ScenarioBuilder:
         :return: A new ScenarioBuilder instance
         :rtype: ScenarioBuilder
         """
-        return ScenarioBuilder(self._client, name)
+        return ScenarioBuilder(name, self._client)
 
     def from_id(self, scenario_id: str) -> Scenario:
         """Get a Scenario instance for an existing scenario ID.
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 3491fff66..e75938434 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -36,11 +36,11 @@ def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot:
     @pytest.fixture
     def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
         """Create an AsyncScenarioBuilder instance with mock client."""
-        return AsyncScenarioBuilder(mock_async_client, "test-scenario")
+        return AsyncScenarioBuilder("test-scenario", mock_async_client)
 
     def test_instantiation(self, mock_async_client: MagicMock) -> None:
         """Test builder initialization and repr."""
-        builder = AsyncScenarioBuilder(mock_async_client, "my-scenario")
+        builder = AsyncScenarioBuilder("my-scenario", mock_async_client)
 
         assert builder._client is mock_async_client
         assert builder._name == "my-scenario"
@@ -136,7 +136,7 @@ def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None:
             builder._build_params()
 
         # Missing scorer (new builder)
-        builder2 = AsyncScenarioBuilder(builder._client, "test2")
+        builder2 = AsyncScenarioBuilder("test2", builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
             builder2._build_params()
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index d9aee9fb1..8a7284d17 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -34,11 +34,11 @@ def mock_snapshot(self, mock_client: MagicMock) -> Snapshot:
     @pytest.fixture
     def builder(self, mock_client: MagicMock) -> ScenarioBuilder:
         """Create a ScenarioBuilder instance with mock client."""
-        return ScenarioBuilder(mock_client, "test-scenario")
+        return ScenarioBuilder("test-scenario", mock_client)
 
     def test_instantiation(self, mock_client: MagicMock) -> None:
         """Test builder initialization and repr."""
-        builder = ScenarioBuilder(mock_client, "my-scenario")
+        builder = ScenarioBuilder("my-scenario", mock_client)
 
         assert builder._client is mock_client
         assert builder._name == "my-scenario"
@@ -134,7 +134,7 @@ def test_build_params_validation(self, builder: ScenarioBuilder) -> None:
             builder._build_params()
 
         # Missing scorer (new builder)
-        builder2 = ScenarioBuilder(builder._client, "test2")
+        builder2 = ScenarioBuilder("test2", builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
             builder2._build_params()

From cd9adce37b034f9a8dfd547716872e080ba693fe Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 11:51:12 -0800
Subject: [PATCH 24/31] add preview method

---
 src/runloop_api_client/sdk/__init__.py        |  2 +
 src/runloop_api_client/sdk/_types.py          | 29 ++++++-
 .../sdk/async_scenario_builder.py             | 87 +++++++++++--------
 .../sdk/scenario_builder.py                   | 87 +++++++++++--------
 4 files changed, 132 insertions(+), 73 deletions(-)

diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py
index 232ef9515..5773b9d53 100644
--- a/src/runloop_api_client/sdk/__init__.py
+++ b/src/runloop_api_client/sdk/__init__.py
@@ -7,6 +7,7 @@
 
 from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps
 from .agent import Agent
+from ._types import ScenarioPreview
 from .async_ import (
     AsyncAgentOps,
     AsyncDevboxOps,
@@ -75,6 +76,7 @@
     "AsyncScenarioRun",
     "ScenarioBuilder",
     "AsyncScenarioBuilder",
+    "ScenarioPreview",
     "Scorer",
     "AsyncScorer",
     "Snapshot",
diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
index 31d543abb..ef180a56e 100644
--- a/src/runloop_api_client/sdk/_types.py
+++ b/src/runloop_api_client/sdk/_types.py
@@ -1,10 +1,13 @@
-from typing import Union, Callable, Optional
+from typing import Dict, Union, Callable, Optional
 from typing_extensions import TypedDict
 
 from .._types import Body, Query, Headers, Timeout, NotGiven
 from ..lib.polling import PollingConfig
 from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
+from ..types.input_context import InputContext
+from ..types.scenario_view import ScenarioView
+from ..types.scoring_contract import ScoringContract
 from ..types.agent_list_params import AgentListParams
 from ..types.devbox_list_params import DevboxListParams
 from ..types.object_list_params import ObjectListParams
@@ -186,3 +189,27 @@ class SDKScenarioRunAsyncParams(ScenarioStartRunBaseParams, LongRequestOptions):
 
 class SDKScenarioRunParams(ScenarioStartRunBaseParams, LongPollingRequestOptions):
     pass
+
+
+class InputContextPreview(InputContext):
+    problem_statement: Optional[str] = None  # type: ignore[assignment]
+    """The problem statement for the Scenario."""
+
+
+class ScenarioPreview(ScenarioView):
+    """Preview of scenario configuration with all fields optional."""
+
+    id: Optional[str] = None  # type: ignore[assignment]
+    """The ID of the Scenario."""
+
+    input_context: Optional[InputContextPreview] = None  # type: ignore[assignment]
+    """The input context for the Scenario."""
+
+    metadata: Optional[Dict[str, str]] = None  # type: ignore[assignment]
+    """User defined metadata to attach to the scenario for organization."""
+
+    name: Optional[str] = None  # type: ignore[assignment]
+    """The name of the Scenario."""
+
+    scoring_contract: Optional[ScoringContract] = None  # type: ignore[assignment]
+    """The scoring contract for the Scenario."""
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 4edad16f3..e74f1ee68 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -6,7 +6,7 @@
 from typing_extensions import Self, Unpack, Literal, override
 
 from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
-from ._types import LongRequestOptions
+from ._types import ScenarioPreview, LongRequestOptions
 from .._client import AsyncRunloop
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
@@ -387,6 +387,22 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD"
         self._validation_type = validation_type
         return self
 
+    def _build_normalized_scorers(self) -> List[ScoringFunctionParam]:
+        """Build normalized scorers list."""
+        total_weight = sum(s["weight"] for s in self._scorers)
+        return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+
+    def _build_environment_params(self) -> ScenarioEnvironmentParam:
+        """Build environment parameters."""
+        env_params: ScenarioEnvironmentParam = {}
+        if self._blueprint:
+            env_params["blueprint_id"] = self._blueprint.id
+        if self._snapshot:
+            env_params["snapshot_id"] = self._snapshot.id
+        if self._working_directory:
+            env_params["working_directory"] = self._working_directory
+        return env_params
+
     def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
@@ -405,51 +421,50 @@ def _build_params(self) -> ScenarioCreateParams:
                 "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first."
             )
 
-        # Normalize weights to sum to 1.0
-        total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers: List[ScoringFunctionParam] = [
-            {**s, "weight": s["weight"] / total_weight} for s in self._scorers
-        ]
-
-        params: ScenarioCreateParams = {
+        return {
             "name": self._name,
             "input_context": {
                 "problem_statement": self._problem_statement,
+                "additional_context": self._additional_context,
             },
             "scoring_contract": {
-                "scoring_function_parameters": normalized_scorers,
+                "scoring_function_parameters": self._build_normalized_scorers(),
             },
+            "environment_parameters": self._build_environment_params(),
+            "metadata": self._metadata,
+            "reference_output": self._reference_output,
+            "required_environment_variables": self._required_env_vars,
+            "required_secret_names": self._required_secrets,
+            "validation_type": self._validation_type,
         }
 
-        # Add additional context if set
-        if self._additional_context is not None:
-            params["input_context"]["additional_context"] = self._additional_context
+    def preview(self) -> ScenarioPreview:
+        """Preview the scenario configuration without pushing to the platform.
 
-        # Build environment parameters if any are set
-        env_params: ScenarioEnvironmentParam = {}
-        if self._blueprint:
-            env_params["blueprint_id"] = self._blueprint.id
-        if self._snapshot:
-            env_params["snapshot_id"] = self._snapshot.id
-        if self._working_directory:
-            env_params["working_directory"] = self._working_directory
+        Returns the current configuration state as a ScenarioPreview object.
+        Does not validate or raise errors for missing required fields.
 
-        if env_params:
-            params["environment_parameters"] = env_params
-
-        # Add optional fields
-        if self._metadata:
-            params["metadata"] = self._metadata
-        if self._reference_output:
-            params["reference_output"] = self._reference_output
-        if self._required_env_vars:
-            params["required_environment_variables"] = self._required_env_vars
-        if self._required_secrets:
-            params["required_secret_names"] = self._required_secrets
-        if self._validation_type:
-            params["validation_type"] = self._validation_type
-
-        return params
+        :return: Preview of the scenario configuration
+        :rtype: ScenarioPreview
+        """
+        return ScenarioPreview.model_validate(
+            {
+                "name": self._name,
+                "input_context": {
+                    "problem_statement": self._problem_statement,
+                    "additional_context": self._additional_context,
+                },
+                "scoring_contract": {
+                    "scoring_function_parameters": self._build_normalized_scorers(),
+                },
+                "environment": self._build_environment_params(),
+                "metadata": self._metadata,
+                "reference_output": self._reference_output,
+                "required_environment_variables": self._required_env_vars,
+                "required_secret_names": self._required_secrets,
+                "validation_type": self._validation_type,
+            }
+        )
 
     async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario:
         """Create the scenario on the platform.
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index b35740bcc..7e0c5094d 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -6,7 +6,7 @@
 from typing_extensions import Self, Unpack, Literal, override
 
 from ..types import ScenarioCreateParams, ScenarioEnvironmentParam
-from ._types import LongRequestOptions
+from ._types import ScenarioPreview, LongRequestOptions
 from .._client import Runloop
 from .scenario import Scenario
 from .snapshot import Snapshot
@@ -387,6 +387,22 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD"
         self._validation_type = validation_type
         return self
 
+    def _build_normalized_scorers(self) -> List[ScoringFunctionParam]:
+        """Build normalized scorers list."""
+        total_weight = sum(s["weight"] for s in self._scorers)
+        return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
+
+    def _build_environment_params(self) -> ScenarioEnvironmentParam:
+        """Build environment parameters"""
+        env_params: ScenarioEnvironmentParam = {}
+        if self._blueprint:
+            env_params["blueprint_id"] = self._blueprint.id
+        if self._snapshot:
+            env_params["snapshot_id"] = self._snapshot.id
+        if self._working_directory:
+            env_params["working_directory"] = self._working_directory
+        return env_params
+
     def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
@@ -405,51 +421,50 @@ def _build_params(self) -> ScenarioCreateParams:
                 "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first."
             )
 
-        # Normalize weights to sum to 1.0
-        total_weight = sum(s["weight"] for s in self._scorers)
-        normalized_scorers: List[ScoringFunctionParam] = [
-            {**s, "weight": s["weight"] / total_weight} for s in self._scorers
-        ]
-
-        params: ScenarioCreateParams = {
+        return {
             "name": self._name,
             "input_context": {
                 "problem_statement": self._problem_statement,
+                "additional_context": self._additional_context,
             },
             "scoring_contract": {
-                "scoring_function_parameters": normalized_scorers,
+                "scoring_function_parameters": self._build_normalized_scorers(),
             },
+            "environment_parameters": self._build_environment_params(),
+            "metadata": self._metadata,
+            "reference_output": self._reference_output,
+            "required_environment_variables": self._required_env_vars,
+            "required_secret_names": self._required_secrets,
+            "validation_type": self._validation_type,
         }
 
-        # Add additional context if set
-        if self._additional_context is not None:
-            params["input_context"]["additional_context"] = self._additional_context
+    def preview(self) -> ScenarioPreview:
+        """Preview the scenario configuration without pushing to the platform.
 
-        # Build environment parameters if any are set
-        env_params: ScenarioEnvironmentParam = {}
-        if self._blueprint:
-            env_params["blueprint_id"] = self._blueprint.id
-        if self._snapshot:
-            env_params["snapshot_id"] = self._snapshot.id
-        if self._working_directory:
-            env_params["working_directory"] = self._working_directory
+        Returns the current configuration state as a ScenarioPreview object.
+        Does not validate or raise errors for missing required fields.
 
-        if env_params:
-            params["environment_parameters"] = env_params
-
-        # Add optional fields
-        if self._metadata:
-            params["metadata"] = self._metadata
-        if self._reference_output:
-            params["reference_output"] = self._reference_output
-        if self._required_env_vars:
-            params["required_environment_variables"] = self._required_env_vars
-        if self._required_secrets:
-            params["required_secret_names"] = self._required_secrets
-        if self._validation_type:
-            params["validation_type"] = self._validation_type
-
-        return params
+        :return: Preview of the scenario configuration
+        :rtype: ScenarioPreview
+        """
+        return ScenarioPreview.model_validate(
+            {
+                "name": self._name,
+                "input_context": {
+                    "problem_statement": self._problem_statement,
+                    "additional_context": self._additional_context,
+                },
+                "scoring_contract": {
+                    "scoring_function_parameters": self._build_normalized_scorers(),
+                },
+                "environment": self._build_environment_params(),
+                "metadata": self._metadata,
+                "reference_output": self._reference_output,
+                "required_environment_variables": self._required_env_vars,
+                "required_secret_names": self._required_secrets,
+                "validation_type": self._validation_type,
+            }
+        )
 
     def push(self, **options: Unpack[LongRequestOptions]) -> Scenario:
         """Create the scenario on the platform.

From 034399bb2cc72ca6e04c0439379f436290b3f752 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 12:05:31 -0800
Subject: [PATCH 25/31] clean up unit test imports, rename builder fixture to
 mock_builder

---
 tests/sdk/test_async_execution.py        |   3 +-
 tests/sdk/test_async_execution_result.py |   2 +-
 tests/sdk/test_async_ops.py              |   6 +-
 tests/sdk/test_async_scenario_builder.py | 162 ++++++++++++-----------
 tests/sdk/test_execution.py              |   3 +-
 tests/sdk/test_execution_result.py       |   2 +-
 tests/sdk/test_ops.py                    |  10 +-
 tests/sdk/test_scenario_builder.py       | 160 +++++++++++-----------
 8 files changed, 178 insertions(+), 170 deletions(-)

diff --git a/tests/sdk/test_async_execution.py b/tests/sdk/test_async_execution.py
index b33b4cf1f..06629cf63 100644
--- a/tests/sdk/test_async_execution.py
+++ b/tests/sdk/test_async_execution.py
@@ -14,7 +14,8 @@
     TASK_COMPLETION_SHORT,
     MockExecutionView,
 )
-from runloop_api_client.sdk.async_execution import AsyncExecution, _AsyncStreamingGroup
+from runloop_api_client.sdk import AsyncExecution
+from runloop_api_client.sdk.async_execution import _AsyncStreamingGroup
 
 # Legacy aliases for backward compatibility
 SHORT_SLEEP = TASK_COMPLETION_SHORT
diff --git a/tests/sdk/test_async_execution_result.py b/tests/sdk/test_async_execution_result.py
index 2a71da1c7..cf8a23caa 100644
--- a/tests/sdk/test_async_execution_result.py
+++ b/tests/sdk/test_async_execution_result.py
@@ -8,7 +8,7 @@
 import pytest
 
 from tests.sdk.conftest import MockExecutionView
-from runloop_api_client.sdk.async_execution_result import AsyncExecutionResult
+from runloop_api_client.sdk import AsyncExecutionResult
 
 
 class TestAsyncExecutionResult:
diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py
index 9fca5bb4b..f8a16e1c0 100644
--- a/tests/sdk/test_async_ops.py
+++ b/tests/sdk/test_async_ops.py
@@ -24,19 +24,17 @@
     AsyncAgent,
     AsyncDevbox,
     AsyncScorer,
+    AsyncAgentOps,
     AsyncScenario,
     AsyncSnapshot,
     AsyncBlueprint,
-    AsyncStorageObject,
-)
-from runloop_api_client.sdk.async_ import (
-    AsyncAgentOps,
     AsyncDevboxOps,
     AsyncScorerOps,
     AsyncRunloopSDK,
     AsyncScenarioOps,
     AsyncSnapshotOps,
     AsyncBlueprintOps,
+    AsyncStorageObject,
     AsyncStorageObjectOps,
 )
 from runloop_api_client.lib.polling import PollingConfig
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index e75938434..79a128f11 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -6,9 +6,7 @@
 
 import pytest
 
-from runloop_api_client.sdk.async_snapshot import AsyncSnapshot
-from runloop_api_client.sdk.async_blueprint import AsyncBlueprint
-from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder
+from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, AsyncScenarioBuilder
 from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
 
@@ -34,7 +32,7 @@ def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot:
         return AsyncSnapshot(mock_async_client, "snap-123")
 
     @pytest.fixture
-    def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
+    def mock_builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder:
         """Create an AsyncScenarioBuilder instance with mock client."""
         return AsyncScenarioBuilder("test-scenario", mock_async_client)
 
@@ -48,113 +46,117 @@ def test_instantiation(self, mock_async_client: MagicMock) -> None:
         assert repr(builder) == "<AsyncScenarioBuilder name='my-scenario'>"
 
     def test_from_blueprint_and_snapshot(
-        self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot
+        self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot
     ) -> None:
         """Test blueprint/snapshot setting returns self and are mutually exclusive."""
         # from_blueprint returns self and sets blueprint
-        result = builder.from_blueprint(mock_blueprint)
-        assert result is builder
-        assert builder._blueprint is mock_blueprint
-        assert builder._snapshot is None
+        result = mock_builder.from_blueprint(mock_blueprint)
+        assert result is mock_builder
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._snapshot is None
 
         # from_snapshot returns self, sets snapshot, and clears blueprint
-        result = builder.from_snapshot(mock_snapshot)
-        assert result is builder
-        assert builder._snapshot is mock_snapshot
-        assert builder._blueprint is None
+        result = mock_builder.from_snapshot(mock_snapshot)
+        assert result is mock_builder
+        assert mock_builder._snapshot is mock_snapshot
+        assert mock_builder._blueprint is None
 
         # from_blueprint clears snapshot
-        builder.from_blueprint(mock_blueprint)
-        assert builder._blueprint is mock_blueprint
-        assert builder._snapshot is None
+        mock_builder.from_blueprint(mock_blueprint)
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._snapshot is None
 
-    def test_scorers(self, builder: AsyncScenarioBuilder) -> None:
+    def test_scorers(self, mock_builder: AsyncScenarioBuilder) -> None:
         """Test all scorer types, optional params, and multiple scorers."""
         # Test scorer with test files
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_command_scorer(
+        result = mock_builder.add_test_command_scorer(
             "test-scorer", test_command="pytest", weight=2.0, test_files=test_files
         )
-        assert result is builder
-        assert builder._scorers[0]["name"] == "test-scorer"
-        assert builder._scorers[0]["weight"] == 2.0
-        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
-        assert builder._scorers[0]["scorer"].get("test_command") == "pytest"
-        assert builder._scorers[0]["scorer"].get("test_files") == test_files
+        assert result is mock_builder
+        assert mock_builder._scorers[0]["name"] == "test-scorer"
+        assert mock_builder._scorers[0]["weight"] == 2.0
+        assert mock_builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
+        assert mock_builder._scorers[0]["scorer"].get("test_command") == "pytest"
+        assert mock_builder._scorers[0]["scorer"].get("test_files") == test_files
 
         # Command scorer
-        builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
-        assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
-        assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
+        mock_builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
+        assert mock_builder._scorers[1]["scorer"]["type"] == "command_scorer"
+        assert mock_builder._scorers[1]["scorer"].get("command") == "./check.sh"
 
         # Bash scorer
-        builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
-        assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
-        assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
+        mock_builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        assert mock_builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
+        assert mock_builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
 
         # Python scorer with optional params
-        builder.add_python_script_scorer(
+        mock_builder.add_python_script_scorer(
             "python-scorer",
             python_script="print('1.0')",
             python_version_constraint=">=3.10",
             requirements_contents="numpy",
         )
-        assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
-        assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
-        assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
+        assert mock_builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
+        assert mock_builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
+        assert mock_builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
 
         # AST grep scorer with optional lang
-        builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
-        assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
-        assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
-        assert builder._scorers[4]["scorer"].get("lang") == "python"
+        mock_builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
+        assert mock_builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
+        assert mock_builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
+        assert mock_builder._scorers[4]["scorer"].get("lang") == "python"
 
         # Custom scorer with optional params
-        builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5})
-        assert builder._scorers[5]["scorer"]["type"] == "custom_scorer"
-        assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
-        assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
+        mock_builder.add_custom_scorer(
+            "custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}
+        )
+        assert mock_builder._scorers[5]["scorer"]["type"] == "custom_scorer"
+        assert mock_builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
+        assert mock_builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
 
         # Verify multiple scorers accumulated
-        assert len(builder._scorers) == 6
+        assert len(mock_builder._scorers) == 6
 
-    def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None:
+    def test_add_scorer_rejects_invalid_weight(self, mock_builder: AsyncScenarioBuilder) -> None:
         """Test that adding a scorer with zero or negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
+            mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
 
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
+            mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
-    def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None:
+    def test_build_params_validation(self, mock_builder: AsyncScenarioBuilder) -> None:
         """Test _build_params raises for missing required fields."""
         # Missing problem statement
-        builder.add_test_command_scorer("test", test_command="pytest")
+        mock_builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
-            builder._build_params()
+            mock_builder._build_params()
 
         # Missing scorer (new builder)
-        builder2 = AsyncScenarioBuilder("test2", builder._client)
+        builder2 = AsyncScenarioBuilder("test2", mock_builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
             builder2._build_params()
 
-    def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
+    def test_build_params_with_all_options(
+        self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint
+    ) -> None:
         """Test _build_params with all optional fields set."""
-        builder.with_problem_statement("Fix the bug")
-        builder.with_additional_context({"hint": "line 42"})
-        builder.add_test_command_scorer("tests", test_command="pytest")
-        builder.from_blueprint(mock_blueprint)
-        builder.with_working_directory("/app")
-        builder.with_metadata({"team": "infra"})
-        builder.with_reference_output("diff content")
-        builder.with_required_env_vars(["API_KEY"])
-        builder.with_required_secrets(["db_pass"])
-        builder.with_validation_type("FORWARD")
-
-        params = builder._build_params()
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.with_additional_context({"hint": "line 42"})
+        mock_builder.add_test_command_scorer("tests", test_command="pytest")
+        mock_builder.from_blueprint(mock_blueprint)
+        mock_builder.with_working_directory("/app")
+        mock_builder.with_metadata({"team": "infra"})
+        mock_builder.with_reference_output("diff content")
+        mock_builder.with_required_env_vars(["API_KEY"])
+        mock_builder.with_required_secrets(["db_pass"])
+        mock_builder.with_validation_type("FORWARD")
+
+        params = mock_builder._build_params()
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
@@ -169,14 +171,14 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock
         assert params.get("required_secret_names") == ["db_pass"]
         assert params.get("validation_type") == "FORWARD"
 
-    def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None:
+    def test_build_params_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
-        builder.with_problem_statement("Fix the bug")
-        builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
-        builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
-        builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
-        params = builder._build_params()
+        params = mock_builder._build_params()
         scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
@@ -191,15 +193,15 @@ def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) ->
 
     @pytest.mark.asyncio
     async def test_push_calls_api_and_returns_scenario(
-        self, builder: AsyncScenarioBuilder, mock_async_client: MagicMock
+        self, mock_builder: AsyncScenarioBuilder, mock_async_client: MagicMock
     ) -> None:
         """Test push() calls API with correct params and returns AsyncScenario."""
         mock_async_client.scenarios.create.return_value.id = "scn-new-123"
 
-        builder.with_problem_statement("Fix the bug")
-        builder.add_test_command_scorer("tests", test_command="pytest")
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.add_test_command_scorer("tests", test_command="pytest")
 
-        scenario = await builder.push()
+        scenario = await mock_builder.push()
 
         mock_async_client.scenarios.create.assert_called_once()
         call_kwargs = mock_async_client.scenarios.create.call_args.kwargs
@@ -208,10 +210,10 @@ async def test_push_calls_api_and_returns_scenario(
 
         assert scenario.id == "scn-new-123"
 
-    def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
+    def test_fluent_chaining(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
         """Test that all builder methods can be chained fluently."""
         result = (
-            builder.from_blueprint(mock_blueprint)
+            mock_builder.from_blueprint(mock_blueprint)
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
@@ -223,8 +225,8 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: As
             .with_validation_type("FORWARD")
         )
 
-        assert result is builder
-        assert builder._blueprint is mock_blueprint
-        assert builder._working_directory == "/app"
-        assert builder._problem_statement == "Fix the bug"
-        assert len(builder._scorers) == 1
+        assert result is mock_builder
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._working_directory == "/app"
+        assert mock_builder._problem_statement == "Fix the bug"
+        assert len(mock_builder._scorers) == 1
diff --git a/tests/sdk/test_execution.py b/tests/sdk/test_execution.py
index fa2aaca2f..63b244d0e 100644
--- a/tests/sdk/test_execution.py
+++ b/tests/sdk/test_execution.py
@@ -12,7 +12,8 @@
     TASK_COMPLETION_SHORT,
     MockExecutionView,
 )
-from runloop_api_client.sdk.execution import Execution, _StreamingGroup
+from runloop_api_client.sdk import Execution
+from runloop_api_client.sdk.execution import _StreamingGroup
 
 # Legacy aliases for backward compatibility during transition
 SHORT_SLEEP = THREAD_STARTUP_DELAY
diff --git a/tests/sdk/test_execution_result.py b/tests/sdk/test_execution_result.py
index 60d51827f..689b108d5 100644
--- a/tests/sdk/test_execution_result.py
+++ b/tests/sdk/test_execution_result.py
@@ -6,7 +6,7 @@
 from unittest.mock import Mock
 
 from tests.sdk.conftest import MockExecutionView
-from runloop_api_client.sdk.execution_result import ExecutionResult
+from runloop_api_client.sdk import ExecutionResult
 
 
 class TestExecutionResult:
diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py
index f7a566265..7ac503933 100644
--- a/tests/sdk/test_ops.py
+++ b/tests/sdk/test_ops.py
@@ -20,15 +20,21 @@
     MockBlueprintView,
     create_mock_httpx_response,
 )
-from runloop_api_client.sdk import Agent, Devbox, Scorer, Scenario, Snapshot, Blueprint, StorageObject
-from runloop_api_client.sdk.sync import (
+from runloop_api_client.sdk import (
+    Agent,
+    Devbox,
+    Scorer,
     AgentOps,
+    Scenario,
+    Snapshot,
+    Blueprint,
     DevboxOps,
     ScorerOps,
     RunloopSDK,
     ScenarioOps,
     SnapshotOps,
     BlueprintOps,
+    StorageObject,
     StorageObjectOps,
 )
 from runloop_api_client.lib.polling import PollingConfig
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 8a7284d17..1b42df054 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -6,9 +6,7 @@
 
 import pytest
 
-from runloop_api_client.sdk.snapshot import Snapshot
-from runloop_api_client.sdk.blueprint import Blueprint
-from runloop_api_client.sdk.scenario_builder import ScenarioBuilder
+from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder
 from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
 
@@ -32,7 +30,7 @@ def mock_snapshot(self, mock_client: MagicMock) -> Snapshot:
         return Snapshot(mock_client, "snap-123")
 
     @pytest.fixture
-    def builder(self, mock_client: MagicMock) -> ScenarioBuilder:
+    def mock_builder(self, mock_client: MagicMock) -> ScenarioBuilder:
         """Create a ScenarioBuilder instance with mock client."""
         return ScenarioBuilder("test-scenario", mock_client)
 
@@ -46,113 +44,115 @@ def test_instantiation(self, mock_client: MagicMock) -> None:
         assert repr(builder) == "<ScenarioBuilder name='my-scenario'>"
 
     def test_from_blueprint_and_snapshot(
-        self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
+        self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
     ) -> None:
         """Test blueprint/snapshot setting returns self and are mutually exclusive."""
         # from_blueprint returns self and sets blueprint
-        result = builder.from_blueprint(mock_blueprint)
-        assert result is builder
-        assert builder._blueprint is mock_blueprint
-        assert builder._snapshot is None
+        result = mock_builder.from_blueprint(mock_blueprint)
+        assert result is mock_builder
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._snapshot is None
 
         # from_snapshot returns self, sets snapshot, and clears blueprint
-        result = builder.from_snapshot(mock_snapshot)
-        assert result is builder
-        assert builder._snapshot is mock_snapshot
-        assert builder._blueprint is None
+        result = mock_builder.from_snapshot(mock_snapshot)
+        assert result is mock_builder
+        assert mock_builder._snapshot is mock_snapshot
+        assert mock_builder._blueprint is None
 
         # from_blueprint clears snapshot
-        builder.from_blueprint(mock_blueprint)
-        assert builder._blueprint is mock_blueprint
-        assert builder._snapshot is None
+        mock_builder.from_blueprint(mock_blueprint)
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._snapshot is None
 
-    def test_scorers(self, builder: ScenarioBuilder) -> None:
+    def test_scorers(self, mock_builder: ScenarioBuilder) -> None:
         """Test all scorer types, optional params, and multiple scorers."""
         # Test scorer with test files
         test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
             {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
         ]
-        result = builder.add_test_command_scorer(
+        result = mock_builder.add_test_command_scorer(
             "test-scorer", test_command="pytest", weight=2.0, test_files=test_files
         )
-        assert result is builder
-        assert builder._scorers[0]["name"] == "test-scorer"
-        assert builder._scorers[0]["weight"] == 2.0
-        assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
-        assert builder._scorers[0]["scorer"].get("test_command") == "pytest"
-        assert builder._scorers[0]["scorer"].get("test_files") == test_files
+        assert result is mock_builder
+        assert mock_builder._scorers[0]["name"] == "test-scorer"
+        assert mock_builder._scorers[0]["weight"] == 2.0
+        assert mock_builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
+        assert mock_builder._scorers[0]["scorer"].get("test_command") == "pytest"
+        assert mock_builder._scorers[0]["scorer"].get("test_files") == test_files
 
         # Command scorer
-        builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
-        assert builder._scorers[1]["scorer"]["type"] == "command_scorer"
-        assert builder._scorers[1]["scorer"].get("command") == "./check.sh"
+        mock_builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
+        assert mock_builder._scorers[1]["scorer"]["type"] == "command_scorer"
+        assert mock_builder._scorers[1]["scorer"].get("command") == "./check.sh"
 
         # Bash scorer
-        builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
-        assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
-        assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
+        mock_builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
+        assert mock_builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
+        assert mock_builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
 
         # Python scorer with optional params
-        builder.add_python_script_scorer(
+        mock_builder.add_python_script_scorer(
             "python-scorer",
             python_script="print('1.0')",
             python_version_constraint=">=3.10",
             requirements_contents="numpy",
         )
-        assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
-        assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
-        assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
+        assert mock_builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
+        assert mock_builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
+        assert mock_builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
 
         # AST grep scorer with optional lang
-        builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
-        assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
-        assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
-        assert builder._scorers[4]["scorer"].get("lang") == "python"
+        mock_builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
+        assert mock_builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
+        assert mock_builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
+        assert mock_builder._scorers[4]["scorer"].get("lang") == "python"
 
         # Custom scorer with optional params
-        builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5})
-        assert builder._scorers[5]["scorer"]["type"] == "custom_scorer"
-        assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
-        assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
+        mock_builder.add_custom_scorer(
+            "custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}
+        )
+        assert mock_builder._scorers[5]["scorer"]["type"] == "custom_scorer"
+        assert mock_builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
+        assert mock_builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
 
         # Verify multiple scorers accumulated
-        assert len(builder._scorers) == 6
+        assert len(mock_builder._scorers) == 6
 
-    def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None:
+    def test_add_scorer_rejects_invalid_weight(self, mock_builder: ScenarioBuilder) -> None:
         """Test that adding a scorer with zero or negative weight raises ValueError."""
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
+            mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
 
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
-            builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
+            mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
-    def test_build_params_validation(self, builder: ScenarioBuilder) -> None:
+    def test_build_params_validation(self, mock_builder: ScenarioBuilder) -> None:
         """Test _build_params raises for missing required fields."""
         # Missing problem statement
-        builder.add_test_command_scorer("test", test_command="pytest")
+        mock_builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
-            builder._build_params()
+            mock_builder._build_params()
 
         # Missing scorer (new builder)
-        builder2 = ScenarioBuilder("test2", builder._client)
+        builder2 = ScenarioBuilder("test2", mock_builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
             builder2._build_params()
 
-    def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
+    def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test _build_params with all optional fields set."""
-        builder.with_problem_statement("Fix the bug")
-        builder.with_additional_context({"hint": "line 42"})
-        builder.add_test_command_scorer("tests", test_command="pytest")
-        builder.from_blueprint(mock_blueprint)
-        builder.with_working_directory("/app")
-        builder.with_metadata({"team": "infra"})
-        builder.with_reference_output("diff content")
-        builder.with_required_env_vars(["API_KEY"])
-        builder.with_required_secrets(["db_pass"])
-        builder.with_validation_type("FORWARD")
-
-        params = builder._build_params()
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.with_additional_context({"hint": "line 42"})
+        mock_builder.add_test_command_scorer("tests", test_command="pytest")
+        mock_builder.from_blueprint(mock_blueprint)
+        mock_builder.with_working_directory("/app")
+        mock_builder.with_metadata({"team": "infra"})
+        mock_builder.with_reference_output("diff content")
+        mock_builder.with_required_env_vars(["API_KEY"])
+        mock_builder.with_required_secrets(["db_pass"])
+        mock_builder.with_validation_type("FORWARD")
+
+        params = mock_builder._build_params()
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
@@ -167,14 +167,14 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue
         assert params.get("required_secret_names") == ["db_pass"]
         assert params.get("validation_type") == "FORWARD"
 
-    def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None:
+    def test_build_params_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None:
         """Test that _build_params normalizes scorer weights to sum to 1.0."""
-        builder.with_problem_statement("Fix the bug")
-        builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
-        builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
-        builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
-        params = builder._build_params()
+        params = mock_builder._build_params()
         scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
@@ -187,14 +187,14 @@ def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None
         total = sum(s["weight"] for s in scorers)
         assert abs(total - 1.0) < 0.0001
 
-    def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, mock_client: MagicMock) -> None:
+    def test_push_calls_api_and_returns_scenario(self, mock_builder: ScenarioBuilder, mock_client: MagicMock) -> None:
         """Test push() calls API with correct params and returns Scenario."""
         mock_client.scenarios.create.return_value.id = "scn-new-123"
 
-        builder.with_problem_statement("Fix the bug")
-        builder.add_test_command_scorer("tests", test_command="pytest")
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.add_test_command_scorer("tests", test_command="pytest")
 
-        scenario = builder.push()
+        scenario = mock_builder.push()
 
         mock_client.scenarios.create.assert_called_once()
         call_kwargs = mock_client.scenarios.create.call_args.kwargs
@@ -203,10 +203,10 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc
 
         assert scenario.id == "scn-new-123"
 
-    def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
+    def test_fluent_chaining(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
         """Test that all builder methods can be chained fluently."""
         result = (
-            builder.from_blueprint(mock_blueprint)
+            mock_builder.from_blueprint(mock_blueprint)
             .with_working_directory("/app")
             .with_problem_statement("Fix the bug")
             .with_additional_context({"hint": "check main.py"})
@@ -218,8 +218,8 @@ def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Bluepri
             .with_validation_type("FORWARD")
         )
 
-        assert result is builder
-        assert builder._blueprint is mock_blueprint
-        assert builder._working_directory == "/app"
-        assert builder._problem_statement == "Fix the bug"
-        assert len(builder._scorers) == 1
+        assert result is mock_builder
+        assert mock_builder._blueprint is mock_blueprint
+        assert mock_builder._working_directory == "/app"
+        assert mock_builder._problem_statement == "Fix the bug"
+        assert len(mock_builder._scorers) == 1

From 1fffeeb26e723dac5504c15e6f430cbb96132644 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 16:56:04 -0800
Subject: [PATCH 26/31] added `preview()` method to scenario builder

---
 src/runloop_api_client/sdk/_types.py          | 14 +---
 .../sdk/async_scenario_builder.py             | 19 +++---
 .../sdk/scenario_builder.py                   | 19 +++---
 tests/sdk/test_async_scenario_builder.py      | 64 ++++++++++++++++++-
 tests/sdk/test_scenario_builder.py            | 64 ++++++++++++++++++-
 5 files changed, 146 insertions(+), 34 deletions(-)

diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
index ef180a56e..be09f6eed 100644
--- a/src/runloop_api_client/sdk/_types.py
+++ b/src/runloop_api_client/sdk/_types.py
@@ -1,4 +1,4 @@
-from typing import Dict, Union, Callable, Optional
+from typing import Union, Callable, Optional
 from typing_extensions import TypedDict
 
 from .._types import Body, Query, Headers, Timeout, NotGiven
@@ -7,7 +7,6 @@
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
 from ..types.input_context import InputContext
 from ..types.scenario_view import ScenarioView
-from ..types.scoring_contract import ScoringContract
 from ..types.agent_list_params import AgentListParams
 from ..types.devbox_list_params import DevboxListParams
 from ..types.object_list_params import ObjectListParams
@@ -202,14 +201,5 @@ class ScenarioPreview(ScenarioView):
     id: Optional[str] = None  # type: ignore[assignment]
     """The ID of the Scenario."""
 
-    input_context: Optional[InputContextPreview] = None  # type: ignore[assignment]
+    input_context: InputContextPreview  # type: ignore[assignment]
     """The input context for the Scenario."""
-
-    metadata: Optional[Dict[str, str]] = None  # type: ignore[assignment]
-    """User defined metadata to attach to the scenario for organization."""
-
-    name: Optional[str] = None  # type: ignore[assignment]
-    """The name of the Scenario."""
-
-    scoring_contract: Optional[ScoringContract] = None  # type: ignore[assignment]
-    """The scoring contract for the Scenario."""
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index e74f1ee68..008602867 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -65,7 +65,7 @@ def __init__(self, name: str, client: AsyncRunloop) -> None:
         self._scorers: List[ScoringFunctionParam] = []
 
         # Metadata and other options
-        self._metadata: Optional[Dict[str, str]] = None
+        self._metadata: Dict[str, str] = {}
         self._reference_output: Optional[str] = None
         self._required_env_vars: Optional[List[str]] = None
         self._required_secrets: Optional[List[str]] = None
@@ -392,16 +392,15 @@ def _build_normalized_scorers(self) -> List[ScoringFunctionParam]:
         total_weight = sum(s["weight"] for s in self._scorers)
         return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
 
-    def _build_environment_params(self) -> ScenarioEnvironmentParam:
+    def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]:
         """Build environment parameters."""
-        env_params: ScenarioEnvironmentParam = {}
-        if self._blueprint:
-            env_params["blueprint_id"] = self._blueprint.id
-        if self._snapshot:
-            env_params["snapshot_id"] = self._snapshot.id
-        if self._working_directory:
-            env_params["working_directory"] = self._working_directory
-        return env_params
+        if not self._blueprint and not self._snapshot and not self._working_directory:
+            return None
+        return {
+            "blueprint_id": self._blueprint.id if self._blueprint else None,
+            "snapshot_id": self._snapshot.id if self._snapshot else None,
+            "working_directory": self._working_directory if self._working_directory else None,
+        }
 
     def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 7e0c5094d..dec8d38b6 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -65,7 +65,7 @@ def __init__(self, name: str, client: Runloop) -> None:
         self._scorers: List[ScoringFunctionParam] = []
 
         # Metadata and other options
-        self._metadata: Optional[Dict[str, str]] = None
+        self._metadata: Dict[str, str] = {}
         self._reference_output: Optional[str] = None
         self._required_env_vars: Optional[List[str]] = None
         self._required_secrets: Optional[List[str]] = None
@@ -392,16 +392,15 @@ def _build_normalized_scorers(self) -> List[ScoringFunctionParam]:
         total_weight = sum(s["weight"] for s in self._scorers)
         return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers]
 
-    def _build_environment_params(self) -> ScenarioEnvironmentParam:
+    def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]:
         """Build environment parameters"""
-        env_params: ScenarioEnvironmentParam = {}
-        if self._blueprint:
-            env_params["blueprint_id"] = self._blueprint.id
-        if self._snapshot:
-            env_params["snapshot_id"] = self._snapshot.id
-        if self._working_directory:
-            env_params["working_directory"] = self._working_directory
-        return env_params
+        if not self._blueprint and not self._snapshot and not self._working_directory:
+            return None
+        return {
+            "blueprint_id": self._blueprint.id if self._blueprint else None,
+            "snapshot_id": self._snapshot.id if self._snapshot else None,
+            "working_directory": self._working_directory if self._working_directory else None,
+        }
 
     def _build_params(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 79a128f11..d0d1060b8 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, AsyncScenarioBuilder
+from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, ScenarioPreview, AsyncScenarioBuilder
 from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
 
@@ -230,3 +230,65 @@ def test_fluent_chaining(self, mock_builder: AsyncScenarioBuilder, mock_blueprin
         assert mock_builder._working_directory == "/app"
         assert mock_builder._problem_statement == "Fix the bug"
         assert len(mock_builder._scorers) == 1
+
+    def test_preview_with_no_config(self, mock_builder: AsyncScenarioBuilder) -> None:
+        """Test preview() works with no configuration (only name from constructor)."""
+        preview = mock_builder.preview()
+
+        assert isinstance(preview, ScenarioPreview)
+        assert preview.name == "test-scenario"
+        assert preview.input_context is not None
+        assert preview.input_context.problem_statement is None
+        assert preview.input_context.additional_context is None
+        assert preview.scoring_contract is not None
+        assert len(preview.scoring_contract.scoring_function_parameters) == 0
+        assert preview.environment is None
+        assert len(preview.metadata) == 0
+        assert preview.reference_output is None
+        assert preview.required_environment_variables is None
+        assert preview.required_secret_names is None
+        assert preview.validation_type is None
+
+    def test_preview_with_full_config(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
+        """Test preview() with all fields configured, including weight normalization."""
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.with_additional_context({"hint": "line 42"})
+        mock_builder.from_blueprint(mock_blueprint)
+        mock_builder.with_working_directory("/app")
+        mock_builder.with_metadata({"team": "infra"})
+        mock_builder.with_reference_output("diff content")
+        mock_builder.with_required_env_vars(["API_KEY"])
+        mock_builder.with_required_secrets(["db_pass"])
+        mock_builder.with_validation_type("FORWARD")
+        # Add multiple scorers with different weights to test normalization
+        mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
+
+        preview = mock_builder.preview()
+
+        # Verify it returns ScenarioPreview
+        assert isinstance(preview, ScenarioPreview)
+
+        # Verify all fields are populated
+        assert preview.name == "test-scenario"
+        assert preview.input_context is not None
+        assert preview.input_context.problem_statement == "Fix the bug"
+        assert preview.input_context.additional_context == {"hint": "line 42"}
+        assert preview.environment is not None
+        assert preview.environment.blueprint_id == "bp-123"
+        assert preview.environment.working_directory == "/app"
+        assert preview.metadata == {"team": "infra"}
+        assert preview.reference_output == "diff content"
+        assert preview.required_environment_variables == ["API_KEY"]
+        assert preview.required_secret_names == ["db_pass"]
+        assert preview.validation_type == "FORWARD"
+
+        # Verify weights are normalized (1, 2, 3 -> 1/6, 2/6, 3/6)
+        assert preview.scoring_contract is not None
+        scorers = preview.scoring_contract.scoring_function_parameters
+        assert len(scorers) == 3
+        assert abs(scorers[0].weight - 1 / 6) < 0.0001
+        assert abs(scorers[1].weight - 2 / 6) < 0.0001
+        assert abs(scorers[2].weight - 3 / 6) < 0.0001
+        assert abs(sum(s.weight for s in scorers) - 1.0) < 0.0001
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 1b42df054..85617d975 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder
+from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder, ScenarioPreview
 from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
 
 
@@ -223,3 +223,65 @@ def test_fluent_chaining(self, mock_builder: ScenarioBuilder, mock_blueprint: Bl
         assert mock_builder._working_directory == "/app"
         assert mock_builder._problem_statement == "Fix the bug"
         assert len(mock_builder._scorers) == 1
+
+    def test_preview_with_no_config(self, mock_builder: ScenarioBuilder) -> None:
+        """Test preview() works with no configuration (only name from constructor)."""
+        preview = mock_builder.preview()
+
+        assert isinstance(preview, ScenarioPreview)
+        assert preview.name == "test-scenario"
+        assert preview.input_context is not None
+        assert preview.input_context.problem_statement is None
+        assert preview.input_context.additional_context is None
+        assert preview.scoring_contract is not None
+        assert len(preview.scoring_contract.scoring_function_parameters) == 0
+        assert preview.environment is None
+        assert len(preview.metadata) == 0
+        assert preview.reference_output is None
+        assert preview.required_environment_variables is None
+        assert preview.required_secret_names is None
+        assert preview.validation_type is None
+
+    def test_preview_with_full_config(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
+        """Test preview() with all fields configured, including weight normalization."""
+        mock_builder.with_problem_statement("Fix the bug")
+        mock_builder.with_additional_context({"hint": "line 42"})
+        mock_builder.from_blueprint(mock_blueprint)
+        mock_builder.with_working_directory("/app")
+        mock_builder.with_metadata({"team": "infra"})
+        mock_builder.with_reference_output("diff content")
+        mock_builder.with_required_env_vars(["API_KEY"])
+        mock_builder.with_required_secrets(["db_pass"])
+        mock_builder.with_validation_type("FORWARD")
+        # Add multiple scorers with different weights to test normalization
+        mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
+        mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
+        mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
+
+        preview = mock_builder.preview()
+
+        # Verify it returns ScenarioPreview
+        assert isinstance(preview, ScenarioPreview)
+
+        # Verify all fields are populated
+        assert preview.name == "test-scenario"
+        assert preview.input_context is not None
+        assert preview.input_context.problem_statement == "Fix the bug"
+        assert preview.input_context.additional_context == {"hint": "line 42"}
+        assert preview.environment is not None
+        assert preview.environment.blueprint_id == "bp-123"
+        assert preview.environment.working_directory == "/app"
+        assert preview.metadata == {"team": "infra"}
+        assert preview.reference_output == "diff content"
+        assert preview.required_environment_variables == ["API_KEY"]
+        assert preview.required_secret_names == ["db_pass"]
+        assert preview.validation_type == "FORWARD"
+
+        # Verify weights are normalized (1, 2, 3 -> 1/6, 2/6, 3/6)
+        assert preview.scoring_contract is not None
+        scorers = preview.scoring_contract.scoring_function_parameters
+        assert len(scorers) == 3
+        assert abs(scorers[0].weight - 1 / 6) < 0.0001
+        assert abs(scorers[1].weight - 2 / 6) < 0.0001
+        assert abs(scorers[2].weight - 3 / 6) < 0.0001
+        assert abs(sum(s.weight for s in scorers) - 1.0) < 0.0001

From 44ae5d02ac19b44911b88ad2770ca78a8cef5eca Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 17:51:42 -0800
Subject: [PATCH 27/31] rename `_build_params` to `build` (now publicly exposed
 method)

---
 .../sdk/async_scenario_builder.py             |  4 ++--
 .../sdk/scenario_builder.py                   |  4 ++--
 tests/sdk/test_async_scenario_builder.py      | 20 +++++++++----------
 tests/sdk/test_scenario_builder.py            | 20 +++++++++----------
 tests/smoketests/sdk/test_async_scenario.py   |  2 +-
 tests/smoketests/sdk/test_scenario.py         |  2 +-
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 008602867..7ee58f24b 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -402,7 +402,7 @@ def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]:
             "working_directory": self._working_directory if self._working_directory else None,
         }
 
-    def _build_params(self) -> ScenarioCreateParams:
+    def build(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
         Weights are automatically normalized to sum to 1.0.
@@ -473,6 +473,6 @@ async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario:
         :return: Created scenario wrapper
         :rtype: AsyncScenario
         """
-        params = self._build_params()
+        params = self.build()
         scenario_view = await self._client.scenarios.create(**params, **options)
         return AsyncScenario(self._client, scenario_view.id)
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index dec8d38b6..bd2fddeda 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -402,7 +402,7 @@ def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]:
             "working_directory": self._working_directory if self._working_directory else None,
         }
 
-    def _build_params(self) -> ScenarioCreateParams:
+    def build(self) -> ScenarioCreateParams:
         """Build the scenario creation parameters.
 
         Weights are automatically normalized to sum to 1.0.
@@ -473,6 +473,6 @@ def push(self, **options: Unpack[LongRequestOptions]) -> Scenario:
         :return: Created scenario wrapper
         :rtype: Scenario
         """
-        params = self._build_params()
+        params = self.build()
         scenario_view = self._client.scenarios.create(**params, **options)
         return Scenario(self._client, scenario_view.id)
diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index d0d1060b8..618900188 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -128,23 +128,23 @@ def test_add_scorer_rejects_invalid_weight(self, mock_builder: AsyncScenarioBuil
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
             mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
-    def test_build_params_validation(self, mock_builder: AsyncScenarioBuilder) -> None:
-        """Test _build_params raises for missing required fields."""
+    def test_build_validation(self, mock_builder: AsyncScenarioBuilder) -> None:
+        """Test build raises for missing required fields."""
         # Missing problem statement
         mock_builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
-            mock_builder._build_params()
+            mock_builder.build()
 
         # Missing scorer (new builder)
         builder2 = AsyncScenarioBuilder("test2", mock_builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
-            builder2._build_params()
+            builder2.build()
 
-    def test_build_params_with_all_options(
+    def test_build_with_all_options(
         self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint
     ) -> None:
-        """Test _build_params with all optional fields set."""
+        """Test build with all optional fields set."""
         mock_builder.with_problem_statement("Fix the bug")
         mock_builder.with_additional_context({"hint": "line 42"})
         mock_builder.add_test_command_scorer("tests", test_command="pytest")
@@ -156,7 +156,7 @@ def test_build_params_with_all_options(
         mock_builder.with_required_secrets(["db_pass"])
         mock_builder.with_validation_type("FORWARD")
 
-        params = mock_builder._build_params()
+        params = mock_builder.build()
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
@@ -171,14 +171,14 @@ def test_build_params_with_all_options(
         assert params.get("required_secret_names") == ["db_pass"]
         assert params.get("validation_type") == "FORWARD"
 
-    def test_build_params_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None:
-        """Test that _build_params normalizes scorer weights to sum to 1.0."""
+    def test_build_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None:
+        """Test that build normalizes scorer weights to sum to 1.0."""
         mock_builder.with_problem_statement("Fix the bug")
         mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
         mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
         mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
-        params = mock_builder._build_params()
+        params = mock_builder.build()
         scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py
index 85617d975..75597d5f3 100644
--- a/tests/sdk/test_scenario_builder.py
+++ b/tests/sdk/test_scenario_builder.py
@@ -126,21 +126,21 @@ def test_add_scorer_rejects_invalid_weight(self, mock_builder: ScenarioBuilder)
         with pytest.raises(ValueError, match="Scorer weight must be positive"):
             mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
 
-    def test_build_params_validation(self, mock_builder: ScenarioBuilder) -> None:
-        """Test _build_params raises for missing required fields."""
+    def test_build_validation(self, mock_builder: ScenarioBuilder) -> None:
+        """Test build raises for missing required fields."""
         # Missing problem statement
         mock_builder.add_test_command_scorer("test", test_command="pytest")
         with pytest.raises(ValueError, match="Problem statement is required"):
-            mock_builder._build_params()
+            mock_builder.build()
 
         # Missing scorer (new builder)
         builder2 = ScenarioBuilder("test2", mock_builder._client)
         builder2.with_problem_statement("Fix the bug")
         with pytest.raises(ValueError, match="At least one scorer is required"):
-            builder2._build_params()
+            builder2.build()
 
-    def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
-        """Test _build_params with all optional fields set."""
+    def test_build_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
+        """Test build with all optional fields set."""
         mock_builder.with_problem_statement("Fix the bug")
         mock_builder.with_additional_context({"hint": "line 42"})
         mock_builder.add_test_command_scorer("tests", test_command="pytest")
@@ -152,7 +152,7 @@ def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock
         mock_builder.with_required_secrets(["db_pass"])
         mock_builder.with_validation_type("FORWARD")
 
-        params = mock_builder._build_params()
+        params = mock_builder.build()
 
         assert params["name"] == "test-scenario"
         assert params["input_context"]["problem_statement"] == "Fix the bug"
@@ -167,14 +167,14 @@ def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock
         assert params.get("required_secret_names") == ["db_pass"]
         assert params.get("validation_type") == "FORWARD"
 
-    def test_build_params_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None:
-        """Test that _build_params normalizes scorer weights to sum to 1.0."""
+    def test_build_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None:
+        """Test that build normalizes scorer weights to sum to 1.0."""
         mock_builder.with_problem_statement("Fix the bug")
         mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
         mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
         mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
 
-        params = mock_builder._build_params()
+        params = mock_builder.build()
         scorers = list(params["scoring_contract"]["scoring_function_parameters"])
 
         # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py
index 32359f957..b0abf6a41 100644
--- a/tests/smoketests/sdk/test_async_scenario.py
+++ b/tests/smoketests/sdk/test_async_scenario.py
@@ -46,7 +46,7 @@ async def push_or_update_scenario(sdk_client: AsyncRunloopSDK, builder: AsyncSce
         new_snapshot_id = builder._snapshot.id if builder._snapshot else None
 
         # Update existing scenario with builder's params
-        params = builder._build_params()
+        params = builder.build()
         result = await scenario.update(**filter_params(params, SDKScenarioUpdateParams))
 
         # Delete OLD blueprint/snapshot if they're being replaced
diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py
index 0b23d6b24..4128cfa29 100644
--- a/tests/smoketests/sdk/test_scenario.py
+++ b/tests/smoketests/sdk/test_scenario.py
@@ -45,7 +45,7 @@ def push_or_update_scenario(sdk_client: RunloopSDK, builder: ScenarioBuilder) ->
         new_snapshot_id = builder._snapshot.id if builder._snapshot else None
 
         # Update existing scenario with builder's params
-        params = builder._build_params()
+        params = builder.build()
         result = scenario.update(**filter_params(params, SDKScenarioUpdateParams))
 
         # Delete OLD blueprint/snapshot if they're being replaced

From 49ff2c2190e3c547d0818ea8cf39fa9166596d23 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 17:52:07 -0800
Subject: [PATCH 28/31] formatting

---
 tests/sdk/test_async_scenario_builder.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py
index 618900188..e20d99843 100644
--- a/tests/sdk/test_async_scenario_builder.py
+++ b/tests/sdk/test_async_scenario_builder.py
@@ -141,9 +141,7 @@ def test_build_validation(self, mock_builder: AsyncScenarioBuilder) -> None:
         with pytest.raises(ValueError, match="At least one scorer is required"):
             builder2.build()
 
-    def test_build_with_all_options(
-        self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint
-    ) -> None:
+    def test_build_with_all_options(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None:
         """Test build with all optional fields set."""
         mock_builder.with_problem_statement("Fix the bug")
         mock_builder.with_additional_context({"hint": "line 42"})

From cc77ba49c11930b2ceb259585cd4f4a0e001792f Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 17:57:17 -0800
Subject: [PATCH 29/31] update docstring examples to use builder.build()
 instead of builder.push()

---
 src/runloop_api_client/sdk/async_.py                 | 3 ++-
 src/runloop_api_client/sdk/async_scenario_builder.py | 3 ++-
 src/runloop_api_client/sdk/scenario_builder.py       | 3 ++-
 src/runloop_api_client/sdk/sync.py                   | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index f121a60be..857fe1891 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -782,7 +782,8 @@ class AsyncScenarioOps:
         ...     .with_problem_statement("Fix the bug")
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
-        >>> scenario = await builder.push()
+        >>> params = builder.build()
+        >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push()
     """
 
     def __init__(self, client: AsyncRunloop) -> None:
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 7ee58f24b..0830a337e 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -38,7 +38,8 @@ class AsyncScenarioBuilder:
         ...     .with_problem_statement("Fix the bug in main.py")
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
-        >>> scenario = await builder.push()
+        >>> params = builder.build()
+        >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push()
     """
 
     def __init__(self, name: str, client: AsyncRunloop) -> None:
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index bd2fddeda..f95c0872d 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -38,7 +38,8 @@ class ScenarioBuilder:
         ...     .with_problem_statement("Fix the bug in main.py")
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
-        >>> scenario = builder.push()
+        >>> params = builder.build()
+        >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push()
     """
 
     def __init__(self, name: str, client: Runloop) -> None:
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index 22df39fe6..c34953eef 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -803,7 +803,8 @@ class ScenarioOps:
         ...     .with_problem_statement("Fix the bug")
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
-        >>> scenario = builder.push()
+        >>> params = builder.build()
+        >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push()
     """
 
     def __init__(self, client: Runloop) -> None:

From e2c0b5c9d0bbfc9d04878909d46b2d1e9d2f69e9 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 17:57:43 -0800
Subject: [PATCH 30/31] formatting

---
 src/runloop_api_client/sdk/async_.py                 | 2 +-
 src/runloop_api_client/sdk/async_scenario_builder.py | 2 +-
 src/runloop_api_client/sdk/scenario_builder.py       | 2 +-
 src/runloop_api_client/sdk/sync.py                   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index 857fe1891..4bcd08fc1 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -783,7 +783,7 @@ class AsyncScenarioOps:
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> params = builder.build()
-        >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push()
+        >>> scenario = await runloop.scenario.create(**params)  # equivalent to builder.push()
     """
 
     def __init__(self, client: AsyncRunloop) -> None:
diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index 0830a337e..d04044fc3 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -39,7 +39,7 @@ class AsyncScenarioBuilder:
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> params = builder.build()
-        >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push()
+        >>> scenario = await runloop.scenario.create(**params)  # equivalent to builder.push()
     """
 
     def __init__(self, name: str, client: AsyncRunloop) -> None:
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index f95c0872d..787518fe1 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -39,7 +39,7 @@ class ScenarioBuilder:
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> params = builder.build()
-        >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push()
+        >>> scenario = runloop.scenario.create(**params)  # equivalent to builder.push()
     """
 
     def __init__(self, name: str, client: Runloop) -> None:
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index c34953eef..f215c8116 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -804,7 +804,7 @@ class ScenarioOps:
         ...     .add_test_command_scorer("tests", test_command="pytest")
         ... )
         >>> params = builder.build()
-        >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push()
+        >>> scenario = runloop.scenario.create(**params)  # equivalent to builder.push()
     """
 
     def __init__(self, client: Runloop) -> None:

From 5f1e260d6720d470e06ef34b340f02f4d2269505 Mon Sep 17 00:00:00 2001
From: Siddarth Chalasani <siddarth@runloop.ai>
Date: Wed, 10 Dec 2025 18:04:46 -0800
Subject: [PATCH 31/31] update with_problem_statement and
 with_additional_context docstrings

---
 src/runloop_api_client/sdk/async_scenario_builder.py | 3 ++-
 src/runloop_api_client/sdk/scenario_builder.py       | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py
index d04044fc3..37a3aa4b5 100644
--- a/src/runloop_api_client/sdk/async_scenario_builder.py
+++ b/src/runloop_api_client/sdk/async_scenario_builder.py
@@ -121,7 +121,7 @@ def with_working_directory(self, directory: str) -> Self:
         return self
 
     def with_problem_statement(self, statement: str) -> Self:
-        """Set the problem statement for the scenario.
+        """Set the problem statement for the scenario; this will be provided as input context to the agent.
 
         :param statement: Problem statement text
         :type statement: str
@@ -133,6 +133,7 @@ def with_problem_statement(self, statement: str) -> Self:
 
     def with_additional_context(self, context: object) -> Self:
         """Set additional structured context for the scenario.
+        This can be used to provide additional information to the agent, such as hints, examples, or other relevant information.
 
         :param context: Additional context (JSON-serializable)
         :type context: object
diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py
index 787518fe1..e2fc15de4 100644
--- a/src/runloop_api_client/sdk/scenario_builder.py
+++ b/src/runloop_api_client/sdk/scenario_builder.py
@@ -121,7 +121,7 @@ def with_working_directory(self, directory: str) -> Self:
         return self
 
     def with_problem_statement(self, statement: str) -> Self:
-        """Set the problem statement for the scenario.
+        """Set the problem statement for the scenario; this will be provided as input context to the agent.
 
         :param statement: Problem statement text
         :type statement: str
@@ -133,6 +133,7 @@ def with_problem_statement(self, statement: str) -> Self:
 
     def with_additional_context(self, context: object) -> Self:
         """Set additional structured context for the scenario.
+        This can be used to provide additional information to the agent, such as hints, examples, or other relevant information.
 
         :param context: Additional context (JSON-serializable)
         :type context: object