From 71d1ad5ef21ef7b7b47b4da0ee12e183f3c3fa14 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Fri, 5 Dec 2025 16:56:57 -0800 Subject: [PATCH 01/31] init commit --- src/runloop_api_client/sdk/__init__.py | 4 + src/runloop_api_client/sdk/async_.py | 18 + .../sdk/async_scenario_builder.py | 451 ++++++++++++++++++ .../sdk/scenario_builder.py | 451 ++++++++++++++++++ src/runloop_api_client/sdk/sync.py | 18 + tests/sdk/test_async_scenario_builder.py | 178 +++++++ tests/sdk/test_scenario_builder.py | 365 ++++++++++++++ 7 files changed, 1485 insertions(+) create mode 100644 src/runloop_api_client/sdk/async_scenario_builder.py create mode 100644 src/runloop_api_client/sdk/scenario_builder.py create mode 100644 tests/sdk/test_async_scenario_builder.py create mode 100644 tests/sdk/test_scenario_builder.py diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py index 1b95e585b..232ef9515 100644 --- a/src/runloop_api_client/sdk/__init__.py +++ b/src/runloop_api_client/sdk/__init__.py @@ -33,9 +33,11 @@ from .async_blueprint import AsyncBlueprint from .async_execution import AsyncExecution from .execution_result import ExecutionResult +from .scenario_builder import ScenarioBuilder from .async_scenario_run import AsyncScenarioRun from .async_storage_object import AsyncStorageObject from .async_execution_result import AsyncExecutionResult +from .async_scenario_builder import AsyncScenarioBuilder __all__ = [ # Main SDK entry points @@ -71,6 +73,8 @@ "AsyncScenario", "ScenarioRun", "AsyncScenarioRun", + "ScenarioBuilder", + "AsyncScenarioBuilder", "Scorer", "AsyncScorer", "Snapshot", diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index b54e831a5..a3ef1ffe5 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -37,6 +37,7 @@ from .async_blueprint import AsyncBlueprint from ..lib.context_loader import TarFilter, build_directory_tar from .async_storage_object import AsyncStorageObject +from .async_scenario_builder import AsyncScenarioBuilder from ..types.object_create_params import ContentType from ..types.shared_params.agent_source import Git, Npm, Pip, Object @@ -773,6 +774,13 @@ class AsyncScenarioOps: >>> scenario = runloop.scenario.from_id("scn-xxx") >>> run = await scenario.run() >>> scenarios = await runloop.scenario.list() + + Example using builder: + >>> builder = runloop.scenario.builder("my-scenario") + >>> builder.from_blueprint_id("bp-xxx") + >>> builder.with_problem_statement("Fix the bug") + >>> builder.add_test_scorer("tests", test_command="pytest") + >>> scenario = await builder.push() """ def __init__(self, client: AsyncRunloop) -> None: @@ -783,6 +791,16 @@ def __init__(self, client: AsyncRunloop) -> None: """ self._client = client + def builder(self, name: str) -> AsyncScenarioBuilder: + """Create a new scenario builder. + + :param name: Name for the scenario + :type name: str + :return: A new AsyncScenarioBuilder instance + :rtype: AsyncScenarioBuilder + """ + return AsyncScenarioBuilder(self._client, name) + def from_id(self, scenario_id: str) -> AsyncScenario: """Get an AsyncScenario instance for an existing scenario ID. diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py new file mode 100644 index 000000000..1d650afe2 --- /dev/null +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -0,0 +1,451 @@ +"""AsyncScenarioBuilder for constructing scenarios with a fluent API.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Iterable, Optional +from typing_extensions import Self, Literal, override + +from .._client import AsyncRunloop +from .async_scenario import AsyncScenario +from ..types.scoring_function_param import ( + Scorer, + ScoringFunctionParam, + ScorerCustomScoringFunction, + ScorerAstGrepScoringFunction, + ScorerCommandScoringFunction, + ScorerTestBasedScoringFunction, + ScorerBashScriptScoringFunction, + ScorerPythonScriptScoringFunction, + ScorerTestBasedScoringFunctionTestFile, +) + + +class AsyncScenarioBuilder: + """Async builder for constructing scenarios with a fluent API. + + Provides a step-by-step interface for configuring all aspects of a scenario + before pushing it to the platform. + + Example: + >>> builder = sdk.scenario.builder("my-scenario") + >>> builder.from_blueprint_id("bp-xxx") + >>> builder.with_working_directory("/app") + >>> builder.with_problem_statement("Fix the bug in main.py") + >>> builder.add_test_scorer("tests", test_command="pytest") + >>> scenario = await builder.push() + """ + + def __init__(self, client: AsyncRunloop, name: str) -> None: + """Initialize the builder. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + :param name: Name for the scenario + :type name: str + """ + self._client = client + self._name = name + + # Environment configuration + self._blueprint_id: Optional[str] = None + self._snapshot_id: Optional[str] = None + self._working_directory: Optional[str] = None + + # Input context + self._problem_statement: Optional[str] = None + self._additional_context: Optional[object] = None + + # Scoring + self._scorers: List[ScoringFunctionParam] = [] + + # Metadata and other options + self._metadata: Optional[Dict[str, str]] = None + self._reference_output: Optional[str] = None + self._required_env_vars: Optional[List[str]] = None + self._required_secrets: Optional[List[str]] = None + self._validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] = None + + @override + def __repr__(self) -> str: + return f"" + + @property + def name(self) -> str: + """Return the scenario name. + + :return: Scenario name + :rtype: str + """ + return self._name + + def from_blueprint_id(self, blueprint_id: str) -> Self: + """Set the blueprint ID for the scenario environment. + + :param blueprint_id: Blueprint ID to use + :type blueprint_id: str + :return: Self for method chaining + :rtype: Self + """ + self._blueprint_id = blueprint_id + self._snapshot_id = None # Clear snapshot if blueprint is set + return self + + def from_snapshot_id(self, snapshot_id: str) -> Self: + """Set the snapshot ID for the scenario environment. + + :param snapshot_id: Snapshot ID to use + :type snapshot_id: str + :return: Self for method chaining + :rtype: Self + """ + self._snapshot_id = snapshot_id + self._blueprint_id = None # Clear blueprint if snapshot is set + return self + + def with_working_directory(self, directory: str) -> Self: + """Set the working directory for the scenario. + + :param directory: Working directory path + :type directory: str + :return: Self for method chaining + :rtype: Self + """ + self._working_directory = directory + return self + + def with_problem_statement(self, statement: str) -> Self: + """Set the problem statement for the scenario. + + :param statement: Problem statement text + :type statement: str + :return: Self for method chaining + :rtype: Self + """ + self._problem_statement = statement + return self + + def with_additional_context(self, context: object) -> Self: + """Set additional structured context for the scenario. + + :param context: Additional context (JSON-serializable) + :type context: object + :return: Self for method chaining + :rtype: Self + """ + self._additional_context = context + return self + + def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self: + """Internal helper to add a scorer to the list. + + :raises ValueError: If weight is not positive + """ + if weight <= 0: + raise ValueError(f"Scorer weight must be positive, got {weight}") + self._scorers.append({"name": name, "weight": weight, "scorer": scorer}) + return self + + def add_test_scorer( + self, + name: str, + *, + test_command: str, + weight: float = 1.0, + test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] = None, + ) -> Self: + """Add a test-based scorer that runs a test command. + + :param name: Name of the scoring function + :type name: str + :param test_command: Command to run tests (e.g., "pytest") + :type test_command: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param test_files: Optional test files to create before running + :type test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerTestBasedScoringFunction = { + "type": "test_based_scorer", + "test_command": test_command, + } + if test_files: + scorer["test_files"] = test_files + return self._add_scorer(name, weight, scorer) + + def add_command_scorer( + self, + name: str, + *, + command: str, + weight: float = 1.0, + ) -> Self: + """Add a command scorer that runs a shell command. + + :param name: Name of the scoring function + :type name: str + :param command: Shell command to execute + :type command: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerCommandScoringFunction = { + "type": "command_scorer", + "command": command, + } + return self._add_scorer(name, weight, scorer) + + def add_bash_scorer( + self, + name: str, + *, + bash_script: str, + weight: float = 1.0, + ) -> Self: + """Add a bash script scorer. + + The script should output "score=X.X" where X.X is a float between 0.0 and 1.0. + + :param name: Name of the scoring function + :type name: str + :param bash_script: Bash script content + :type bash_script: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerBashScriptScoringFunction = { + "type": "bash_script_scorer", + "bash_script": bash_script, + } + return self._add_scorer(name, weight, scorer) + + def add_python_scorer( + self, + name: str, + *, + python_script: str, + weight: float = 1.0, + python_version_constraint: Optional[str] = None, + requirements_contents: Optional[str] = None, + ) -> Self: + """Add a Python script scorer. + + The script should print the score (0.0-1.0) to stdout. + + :param name: Name of the scoring function + :type name: str + :param python_script: Python script content + :type python_script: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param python_version_constraint: Python version (default "==3.12.10") + :type python_version_constraint: Optional[str] + :param requirements_contents: pip requirements.txt content + :type requirements_contents: Optional[str] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerPythonScriptScoringFunction = { + "type": "python_script_scorer", + "python_script": python_script, + } + if python_version_constraint: + scorer["python_version_constraint"] = python_version_constraint + if requirements_contents: + scorer["requirements_contents"] = requirements_contents + return self._add_scorer(name, weight, scorer) + + def add_ast_grep_scorer( + self, + name: str, + *, + pattern: str, + weight: float = 1.0, + search_directory: str = ".", + lang: Optional[str] = None, + ) -> Self: + """Add an AST grep scorer that matches code patterns. + + :param name: Name of the scoring function + :type name: str + :param pattern: AST pattern to match + :type pattern: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param search_directory: Directory to search (default ".") + :type search_directory: str + :param lang: Language of the pattern (optional) + :type lang: Optional[str] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerAstGrepScoringFunction = { + "type": "ast_grep_scorer", + "pattern": pattern, + "search_directory": search_directory, + } + if lang: + scorer["lang"] = lang + return self._add_scorer(name, weight, scorer) + + def add_custom_scorer( + self, + name: str, + *, + custom_scorer_type: str, + weight: float = 1.0, + scorer_params: Optional[object] = None, + ) -> Self: + """Add a custom scorer registered with Runloop. + + :param name: Name of the scoring function + :type name: str + :param custom_scorer_type: Type identifier registered with Runloop + :type custom_scorer_type: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param scorer_params: Additional JSON parameters for the scorer + :type scorer_params: Optional[object] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerCustomScoringFunction = { + "type": "custom_scorer", + "custom_scorer_type": custom_scorer_type, + } + if scorer_params: + scorer["scorer_params"] = scorer_params + return self._add_scorer(name, weight, scorer) + + def with_metadata(self, metadata: Dict[str, str]) -> Self: + """Set metadata for the scenario. + + :param metadata: Key-value metadata + :type metadata: Dict[str, str] + :return: Self for method chaining + :rtype: Self + """ + self._metadata = metadata + return self + + def with_reference_output(self, output: str) -> Self: + """Set the reference output/solution for validation. + + :param output: Reference output (e.g., git diff) + :type output: str + :return: Self for method chaining + :rtype: Self + """ + self._reference_output = output + return self + + def with_required_env_vars(self, env_vars: List[str]) -> Self: + """Set required environment variables. + + :param env_vars: List of required environment variable names + :type env_vars: List[str] + :return: Self for method chaining + :rtype: Self + """ + self._required_env_vars = env_vars + return self + + def with_required_secrets(self, secrets: List[str]) -> Self: + """Set required secrets. + + :param secrets: List of required secret names + :type secrets: List[str] + :return: Self for method chaining + :rtype: Self + """ + self._required_secrets = secrets + return self + + def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self: + """Set the validation strategy. + + :param validation_type: Validation type + :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"] + :return: Self for method chaining + :rtype: Self + """ + self._validation_type = validation_type + return self + + def _build_params(self) -> Dict[str, Any]: + """Build the scenario creation parameters. + + Weights are automatically normalized to sum to 1.0. + + :raises ValueError: If required fields are missing + :return: Parameters for scenario creation + :rtype: Dict[str, Any] + """ + if not self._problem_statement: + raise ValueError("Problem statement is required. Call with_problem_statement() first.") + + if not self._scorers: + raise ValueError( + "At least one scorer is required. " + "Call add_test_scorer(), add_bash_scorer(), or another scorer method first." + ) + + # Normalize weights to sum to 1.0 + total_weight = sum(s["weight"] for s in self._scorers) + normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + + params: Dict[str, Any] = { + "name": self._name, + "input_context": { + "problem_statement": self._problem_statement, + }, + "scoring_contract": { + "scoring_function_parameters": normalized_scorers, + }, + } + + # Add additional context if set + if self._additional_context is not None: + params["input_context"]["additional_context"] = self._additional_context + + # Build environment parameters if any are set + env_params: Dict[str, Any] = {} + if self._blueprint_id: + env_params["blueprint_id"] = self._blueprint_id + if self._snapshot_id: + env_params["snapshot_id"] = self._snapshot_id + if self._working_directory: + env_params["working_directory"] = self._working_directory + + if env_params: + params["environment_parameters"] = env_params + + # Add optional fields + if self._metadata: + params["metadata"] = self._metadata + if self._reference_output: + params["reference_output"] = self._reference_output + if self._required_env_vars: + params["required_environment_variables"] = self._required_env_vars + if self._required_secrets: + params["required_secret_names"] = self._required_secrets + if self._validation_type: + params["validation_type"] = self._validation_type + + return params + + async def push(self) -> AsyncScenario: + """Create the scenario on the platform. + + :raises ValueError: If required fields are missing + :return: Created scenario wrapper + :rtype: AsyncScenario + """ + params = self._build_params() + scenario_view = await self._client.scenarios.create(**params) + return AsyncScenario(self._client, scenario_view.id) diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py new file mode 100644 index 000000000..49e693113 --- /dev/null +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -0,0 +1,451 @@ +"""ScenarioBuilder for constructing scenarios with a fluent API.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Iterable, Optional +from typing_extensions import Self, Literal, override + +from .._client import Runloop +from .scenario import Scenario +from ..types.scoring_function_param import ( + Scorer, + ScoringFunctionParam, + ScorerCustomScoringFunction, + ScorerAstGrepScoringFunction, + ScorerCommandScoringFunction, + ScorerTestBasedScoringFunction, + ScorerBashScriptScoringFunction, + ScorerPythonScriptScoringFunction, + ScorerTestBasedScoringFunctionTestFile, +) + + +class ScenarioBuilder: + """Builder for constructing scenarios with a fluent API. + + Provides a step-by-step interface for configuring all aspects of a scenario + before pushing it to the platform. + + Example: + >>> builder = sdk.scenario.builder("my-scenario") + >>> builder.from_blueprint_id("bp-xxx") + >>> builder.with_working_directory("/app") + >>> builder.with_problem_statement("Fix the bug in main.py") + >>> builder.add_test_scorer("tests", test_command="pytest") + >>> scenario = builder.push() + """ + + def __init__(self, client: Runloop, name: str) -> None: + """Initialize the builder. + + :param client: Runloop client instance + :type client: Runloop + :param name: Name for the scenario + :type name: str + """ + self._client = client + self._name = name + + # Environment configuration + self._blueprint_id: Optional[str] = None + self._snapshot_id: Optional[str] = None + self._working_directory: Optional[str] = None + + # Input context + self._problem_statement: Optional[str] = None + self._additional_context: Optional[object] = None + + # Scoring + self._scorers: List[ScoringFunctionParam] = [] + + # Metadata and other options + self._metadata: Optional[Dict[str, str]] = None + self._reference_output: Optional[str] = None + self._required_env_vars: Optional[List[str]] = None + self._required_secrets: Optional[List[str]] = None + self._validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] = None + + @override + def __repr__(self) -> str: + return f"" + + @property + def name(self) -> str: + """Return the scenario name. + + :return: Scenario name + :rtype: str + """ + return self._name + + def from_blueprint_id(self, blueprint_id: str) -> Self: + """Set the blueprint ID for the scenario environment. + + :param blueprint_id: Blueprint ID to use + :type blueprint_id: str + :return: Self for method chaining + :rtype: Self + """ + self._blueprint_id = blueprint_id + self._snapshot_id = None # Clear snapshot if blueprint is set + return self + + def from_snapshot_id(self, snapshot_id: str) -> Self: + """Set the snapshot ID for the scenario environment. + + :param snapshot_id: Snapshot ID to use + :type snapshot_id: str + :return: Self for method chaining + :rtype: Self + """ + self._snapshot_id = snapshot_id + self._blueprint_id = None # Clear blueprint if snapshot is set + return self + + def with_working_directory(self, directory: str) -> Self: + """Set the working directory for the scenario. + + :param directory: Working directory path + :type directory: str + :return: Self for method chaining + :rtype: Self + """ + self._working_directory = directory + return self + + def with_problem_statement(self, statement: str) -> Self: + """Set the problem statement for the scenario. + + :param statement: Problem statement text + :type statement: str + :return: Self for method chaining + :rtype: Self + """ + self._problem_statement = statement + return self + + def with_additional_context(self, context: object) -> Self: + """Set additional structured context for the scenario. + + :param context: Additional context (JSON-serializable) + :type context: object + :return: Self for method chaining + :rtype: Self + """ + self._additional_context = context + return self + + def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self: + """Internal helper to add a scorer to the list. + + :raises ValueError: If weight is not positive + """ + if weight <= 0: + raise ValueError(f"Scorer weight must be positive, got {weight}") + self._scorers.append({"name": name, "weight": weight, "scorer": scorer}) + return self + + def add_test_scorer( + self, + name: str, + *, + test_command: str, + weight: float = 1.0, + test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] = None, + ) -> Self: + """Add a test-based scorer that runs a test command. + + :param name: Name of the scoring function + :type name: str + :param test_command: Command to run tests (e.g., "pytest") + :type test_command: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param test_files: Optional test files to create before running + :type test_files: Optional[Iterable[ScorerTestBasedScoringFunctionTestFile]] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerTestBasedScoringFunction = { + "type": "test_based_scorer", + "test_command": test_command, + } + if test_files: + scorer["test_files"] = test_files + return self._add_scorer(name, weight, scorer) + + def add_command_scorer( + self, + name: str, + *, + command: str, + weight: float = 1.0, + ) -> Self: + """Add a command scorer that runs a shell command. + + :param name: Name of the scoring function + :type name: str + :param command: Shell command to execute + :type command: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerCommandScoringFunction = { + "type": "command_scorer", + "command": command, + } + return self._add_scorer(name, weight, scorer) + + def add_bash_scorer( + self, + name: str, + *, + bash_script: str, + weight: float = 1.0, + ) -> Self: + """Add a bash script scorer. + + The script should output "score=X.X" where X.X is a float between 0.0 and 1.0. + + :param name: Name of the scoring function + :type name: str + :param bash_script: Bash script content + :type bash_script: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerBashScriptScoringFunction = { + "type": "bash_script_scorer", + "bash_script": bash_script, + } + return self._add_scorer(name, weight, scorer) + + def add_python_scorer( + self, + name: str, + *, + python_script: str, + weight: float = 1.0, + python_version_constraint: Optional[str] = None, + requirements_contents: Optional[str] = None, + ) -> Self: + """Add a Python script scorer. + + The script should print the score (0.0-1.0) to stdout. + + :param name: Name of the scoring function + :type name: str + :param python_script: Python script content + :type python_script: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param python_version_constraint: Python version (default "==3.12.10") + :type python_version_constraint: Optional[str] + :param requirements_contents: pip requirements.txt content + :type requirements_contents: Optional[str] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerPythonScriptScoringFunction = { + "type": "python_script_scorer", + "python_script": python_script, + } + if python_version_constraint: + scorer["python_version_constraint"] = python_version_constraint + if requirements_contents: + scorer["requirements_contents"] = requirements_contents + return self._add_scorer(name, weight, scorer) + + def add_ast_grep_scorer( + self, + name: str, + *, + pattern: str, + weight: float = 1.0, + search_directory: str = ".", + lang: Optional[str] = None, + ) -> Self: + """Add an AST grep scorer that matches code patterns. + + :param name: Name of the scoring function + :type name: str + :param pattern: AST pattern to match + :type pattern: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param search_directory: Directory to search (default ".") + :type search_directory: str + :param lang: Language of the pattern (optional) + :type lang: Optional[str] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerAstGrepScoringFunction = { + "type": "ast_grep_scorer", + "pattern": pattern, + "search_directory": search_directory, + } + if lang: + scorer["lang"] = lang + return self._add_scorer(name, weight, scorer) + + def add_custom_scorer( + self, + name: str, + *, + custom_scorer_type: str, + weight: float = 1.0, + scorer_params: Optional[object] = None, + ) -> Self: + """Add a custom scorer registered with Runloop. + + :param name: Name of the scoring function + :type name: str + :param custom_scorer_type: Type identifier registered with Runloop + :type custom_scorer_type: str + :param weight: Weight for this scorer (normalized automatically) + :type weight: float + :param scorer_params: Additional JSON parameters for the scorer + :type scorer_params: Optional[object] + :return: Self for method chaining + :rtype: Self + """ + scorer: ScorerCustomScoringFunction = { + "type": "custom_scorer", + "custom_scorer_type": custom_scorer_type, + } + if scorer_params: + scorer["scorer_params"] = scorer_params + return self._add_scorer(name, weight, scorer) + + def with_metadata(self, metadata: Dict[str, str]) -> Self: + """Set metadata for the scenario. + + :param metadata: Key-value metadata + :type metadata: Dict[str, str] + :return: Self for method chaining + :rtype: Self + """ + self._metadata = metadata + return self + + def with_reference_output(self, output: str) -> Self: + """Set the reference output/solution for validation. + + :param output: Reference output (e.g., git diff) + :type output: str + :return: Self for method chaining + :rtype: Self + """ + self._reference_output = output + return self + + def with_required_env_vars(self, env_vars: List[str]) -> Self: + """Set required environment variables. + + :param env_vars: List of required environment variable names + :type env_vars: List[str] + :return: Self for method chaining + :rtype: Self + """ + self._required_env_vars = env_vars + return self + + def with_required_secrets(self, secrets: List[str]) -> Self: + """Set required secrets. + + :param secrets: List of required secret names + :type secrets: List[str] + :return: Self for method chaining + :rtype: Self + """ + self._required_secrets = secrets + return self + + def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self: + """Set the validation strategy. + + :param validation_type: Validation type + :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"] + :return: Self for method chaining + :rtype: Self + """ + self._validation_type = validation_type + return self + + def _build_params(self) -> Dict[str, Any]: + """Build the scenario creation parameters. + + Weights are automatically normalized to sum to 1.0. + + :raises ValueError: If required fields are missing + :return: Parameters for scenario creation + :rtype: Dict[str, Any] + """ + if not self._problem_statement: + raise ValueError("Problem statement is required. Call with_problem_statement() first.") + + if not self._scorers: + raise ValueError( + "At least one scorer is required. " + "Call add_test_scorer(), add_bash_scorer(), or another scorer method first." + ) + + # Normalize weights to sum to 1.0 + total_weight = sum(s["weight"] for s in self._scorers) + normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + + params: Dict[str, Any] = { + "name": self._name, + "input_context": { + "problem_statement": self._problem_statement, + }, + "scoring_contract": { + "scoring_function_parameters": normalized_scorers, + }, + } + + # Add additional context if set + if self._additional_context is not None: + params["input_context"]["additional_context"] = self._additional_context + + # Build environment parameters if any are set + env_params: Dict[str, Any] = {} + if self._blueprint_id: + env_params["blueprint_id"] = self._blueprint_id + if self._snapshot_id: + env_params["snapshot_id"] = self._snapshot_id + if self._working_directory: + env_params["working_directory"] = self._working_directory + + if env_params: + params["environment_parameters"] = env_params + + # Add optional fields + if self._metadata: + params["metadata"] = self._metadata + if self._reference_output: + params["reference_output"] = self._reference_output + if self._required_env_vars: + params["required_environment_variables"] = self._required_env_vars + if self._required_secrets: + params["required_secret_names"] = self._required_secrets + if self._validation_type: + params["validation_type"] = self._validation_type + + return params + + def push(self) -> Scenario: + """Create the scenario on the platform. + + :raises ValueError: If required fields are missing + :return: Created scenario wrapper + :rtype: Scenario + """ + params = self._build_params() + scenario_view = self._client.scenarios.create(**params) + return Scenario(self._client, scenario_view.id) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 6b38b5091..28e13e404 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -36,6 +36,7 @@ from .blueprint import Blueprint from .storage_object import StorageObject from ..lib.context_loader import TarFilter, build_directory_tar +from .scenario_builder import ScenarioBuilder from ..types.object_create_params import ContentType from ..types.shared_params.agent_source import Git, Npm, Pip, Object @@ -794,6 +795,13 @@ class ScenarioOps: >>> scenario = runloop.scenario.from_id("scn-xxx") >>> run = scenario.run() >>> scenarios = runloop.scenario.list() + + Example using builder: + >>> builder = runloop.scenario.builder("my-scenario") + >>> builder.from_blueprint_id("bp-xxx") + >>> builder.with_problem_statement("Fix the bug") + >>> builder.add_test_scorer("tests", test_command="pytest") + >>> scenario = builder.push() """ def __init__(self, client: Runloop) -> None: @@ -804,6 +812,16 @@ def __init__(self, client: Runloop) -> None: """ self._client = client + def builder(self, name: str) -> ScenarioBuilder: + """Create a new scenario builder. + + :param name: Name for the scenario + :type name: str + :return: A new ScenarioBuilder instance + :rtype: ScenarioBuilder + """ + return ScenarioBuilder(self._client, name) + def from_id(self, scenario_id: str) -> Scenario: """Get a Scenario instance for an existing scenario ID. diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py new file mode 100644 index 000000000..3ea9d2631 --- /dev/null +++ b/tests/sdk/test_async_scenario_builder.py @@ -0,0 +1,178 @@ +"""Unit tests for AsyncScenarioBuilder class.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder + + +class TestAsyncScenarioBuilder: + """Tests for the asynchronous AsyncScenarioBuilder.""" + + @pytest.fixture + def mock_async_client(self) -> MagicMock: + """Create a mock AsyncRunloop client.""" + client = MagicMock() + client.scenarios = MagicMock() + client.scenarios.create = AsyncMock() + return client + + @pytest.fixture + def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: + """Create an AsyncScenarioBuilder instance with mock client.""" + return AsyncScenarioBuilder(mock_async_client, "test-scenario") + + def test_init(self, mock_async_client: MagicMock) -> None: + """Test builder initialization.""" + builder = AsyncScenarioBuilder(mock_async_client, "my-scenario") + + assert builder._client is mock_async_client + assert builder._name == "my-scenario" + assert builder.name == "my-scenario" + + def test_repr(self, builder: AsyncScenarioBuilder) -> None: + """Test builder __repr__.""" + assert repr(builder) == "" + + def test_from_blueprint_id_returns_self(self, builder: AsyncScenarioBuilder) -> None: + """Test from_blueprint_id returns self for chaining.""" + result = builder.from_blueprint_id("bp-123") + + assert result is builder + assert builder._blueprint_id == "bp-123" + assert builder._snapshot_id is None + + def test_from_snapshot_id_returns_self(self, builder: AsyncScenarioBuilder) -> None: + """Test from_snapshot_id returns self for chaining.""" + result = builder.from_snapshot_id("snap-123") + + assert result is builder + assert builder._snapshot_id == "snap-123" + assert builder._blueprint_id is None + + def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None: + """Test with_working_directory returns self for chaining.""" + result = builder.with_working_directory("/app") + + assert result is builder + assert builder._working_directory == "/app" + + def test_with_problem_statement_returns_self(self, builder: AsyncScenarioBuilder) -> None: + """Test with_problem_statement returns self for chaining.""" + result = builder.with_problem_statement("Fix the bug") + + assert result is builder + assert builder._problem_statement == "Fix the bug" + + def test_add_test_scorer(self, builder: AsyncScenarioBuilder) -> None: + """Test add_test_scorer method.""" + result = builder.add_test_scorer( + "my-tests", + test_command="pytest", + weight=2.0, + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["name"] == "my-tests" + assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" + + def test_add_command_scorer(self, builder: AsyncScenarioBuilder) -> None: + """Test add_command_scorer method.""" + result = builder.add_command_scorer( + "cmd-scorer", + command="./check.sh", + ) + + assert result is builder + assert builder._scorers[0]["scorer"]["type"] == "command_scorer" + + def test_add_bash_scorer(self, builder: AsyncScenarioBuilder) -> None: + """Test add_bash_scorer method.""" + result = builder.add_bash_scorer( + "bash-scorer", + bash_script="echo 'score=1.0'", + ) + + assert result is builder + assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer" + + def test_build_params_missing_problem_statement(self, builder: AsyncScenarioBuilder) -> None: + """Test _build_params raises if problem statement is missing.""" + builder.add_test_scorer("test", test_command="pytest") + + with pytest.raises(ValueError, match="Problem statement is required"): + builder._build_params() + + def test_build_params_missing_scorer(self, builder: AsyncScenarioBuilder) -> None: + """Test _build_params raises if no scorers are added.""" + builder.with_problem_statement("Fix the bug") + + with pytest.raises(ValueError, match="At least one scorer is required"): + builder._build_params() + + def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None: + """Test _build_params with minimal configuration.""" + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + + params = builder._build_params() + + assert params["name"] == "test-scenario" + assert params["input_context"]["problem_statement"] == "Fix the bug" + assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 + + def test_build_params_with_environment(self, builder: AsyncScenarioBuilder) -> None: + """Test _build_params includes environment parameters.""" + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + builder.from_blueprint_id("bp-123") + builder.with_working_directory("/app") + + params = builder._build_params() + + assert params["environment_parameters"]["blueprint_id"] == "bp-123" + assert params["environment_parameters"]["working_directory"] == "/app" + + @pytest.mark.asyncio + async def test_push_calls_api_and_returns_scenario( + self, builder: AsyncScenarioBuilder, mock_async_client: MagicMock + ) -> None: + """Test push() calls API with correct params and returns AsyncScenario.""" + mock_async_client.scenarios.create.return_value.id = "scn-new-123" + + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + + scenario = await builder.push() + + mock_async_client.scenarios.create.assert_called_once() + call_kwargs = mock_async_client.scenarios.create.call_args.kwargs + assert call_kwargs["name"] == "test-scenario" + assert call_kwargs["input_context"]["problem_statement"] == "Fix the bug" + + assert scenario.id == "scn-new-123" + + def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None: + """Test that all builder methods can be chained fluently.""" + result = ( + builder.from_blueprint_id("bp-123") + .with_working_directory("/app") + .with_problem_statement("Fix the bug") + .with_additional_context({"hint": "check main.py"}) + .add_test_scorer("tests", test_command="pytest") + .with_metadata({"team": "infra"}) + .with_reference_output("diff content") + .with_required_env_vars(["API_KEY"]) + .with_required_secrets(["secret"]) + .with_validation_type("FORWARD") + ) + + assert result is builder + assert builder._blueprint_id == "bp-123" + assert builder._working_directory == "/app" + assert builder._problem_statement == "Fix the bug" + assert len(builder._scorers) == 1 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py new file mode 100644 index 000000000..a2941abbc --- /dev/null +++ b/tests/sdk/test_scenario_builder.py @@ -0,0 +1,365 @@ +"""Unit tests for ScenarioBuilder class.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from runloop_api_client.sdk.scenario_builder import ScenarioBuilder +from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile + + +class TestScenarioBuilder: + """Tests for the synchronous ScenarioBuilder.""" + + @pytest.fixture + def mock_client(self) -> MagicMock: + """Create a mock Runloop client.""" + client = MagicMock() + return client + + @pytest.fixture + def builder(self, mock_client: MagicMock) -> ScenarioBuilder: + """Create a ScenarioBuilder instance with mock client.""" + return ScenarioBuilder(mock_client, "test-scenario") + + def test_init(self, mock_client: MagicMock) -> None: + """Test builder initialization.""" + builder = ScenarioBuilder(mock_client, "my-scenario") + + assert builder._client is mock_client + assert builder._name == "my-scenario" + assert builder.name == "my-scenario" + + def test_repr(self, builder: ScenarioBuilder) -> None: + """Test builder __repr__.""" + assert repr(builder) == "" + + def test_from_blueprint_id_returns_self(self, builder: ScenarioBuilder) -> None: + """Test from_blueprint_id returns self for chaining.""" + result = builder.from_blueprint_id("bp-123") + + assert result is builder + assert builder._blueprint_id == "bp-123" + assert builder._snapshot_id is None + + def test_from_snapshot_id_returns_self(self, builder: ScenarioBuilder) -> None: + """Test from_snapshot_id returns self for chaining.""" + result = builder.from_snapshot_id("snap-123") + + assert result is builder + assert builder._snapshot_id == "snap-123" + assert builder._blueprint_id is None + + def test_from_blueprint_clears_snapshot(self, builder: ScenarioBuilder) -> None: + """Test that setting blueprint clears snapshot.""" + builder.from_snapshot_id("snap-123") + builder.from_blueprint_id("bp-123") + + assert builder._blueprint_id == "bp-123" + assert builder._snapshot_id is None + + def test_from_snapshot_clears_blueprint(self, builder: ScenarioBuilder) -> None: + """Test that setting snapshot clears blueprint.""" + builder.from_blueprint_id("bp-123") + builder.from_snapshot_id("snap-123") + + assert builder._snapshot_id == "snap-123" + assert builder._blueprint_id is None + + def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_working_directory returns self for chaining.""" + result = builder.with_working_directory("/app") + + assert result is builder + assert builder._working_directory == "/app" + + def test_with_problem_statement_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_problem_statement returns self for chaining.""" + result = builder.with_problem_statement("Fix the bug") + + assert result is builder + assert builder._problem_statement == "Fix the bug" + + def test_with_additional_context_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_additional_context returns self for chaining.""" + context = {"hint": "Look at line 42"} + result = builder.with_additional_context(context) + + assert result is builder + assert builder._additional_context == context + + def test_add_test_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_test_scorer method.""" + result = builder.add_test_scorer( + "my-tests", + test_command="pytest", + weight=2.0, + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["name"] == "my-tests" + assert builder._scorers[0]["weight"] == 2.0 + assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" + assert "test_command" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["test_command"] == "pytest" + + def test_add_test_scorer_with_files(self, builder: ScenarioBuilder) -> None: + """Test add_test_scorer with test files.""" + test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ + {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} + ] + result = builder.add_test_scorer("tests", test_command="pytest", test_files=test_files) + + assert result is builder + assert "test_files" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["test_files"] == test_files + + def test_add_command_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_command_scorer method.""" + result = builder.add_command_scorer( + "cmd-scorer", + command="./check.sh", + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["scorer"]["type"] == "command_scorer" + assert "command" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["command"] == "./check.sh" + + def test_add_bash_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_bash_scorer method.""" + result = builder.add_bash_scorer( + "bash-scorer", + bash_script="echo 'score=1.0'", + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer" + assert "bash_script" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["bash_script"] == "echo 'score=1.0'" + + def test_add_python_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_python_scorer method.""" + result = builder.add_python_scorer( + "python-scorer", + python_script="print('score=1.0')", + python_version_constraint=">=3.10", + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["scorer"]["type"] == "python_script_scorer" + assert builder._scorers[0]["scorer"]["python_script"] == "print('score=1.0')" + assert "python_version_constraint" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["python_version_constraint"] == ">=3.10" + + def test_add_ast_grep_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_ast_grep_scorer method.""" + result = builder.add_ast_grep_scorer( + "ast-scorer", + pattern="$A.foo()", + search_directory="/src", + lang="python", + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["scorer"]["type"] == "ast_grep_scorer" + assert builder._scorers[0]["scorer"]["pattern"] == "$A.foo()" + assert builder._scorers[0]["scorer"]["search_directory"] == "/src" + assert "lang" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["lang"] == "python" + + def test_add_custom_scorer(self, builder: ScenarioBuilder) -> None: + """Test add_custom_scorer method.""" + result = builder.add_custom_scorer( + "custom-scorer", + custom_scorer_type="my_custom_scorer", + scorer_params={"threshold": 0.5}, + ) + + assert result is builder + assert len(builder._scorers) == 1 + assert builder._scorers[0]["scorer"]["type"] == "custom_scorer" + assert builder._scorers[0]["scorer"]["custom_scorer_type"] == "my_custom_scorer" + assert "scorer_params" in builder._scorers[0]["scorer"] + assert builder._scorers[0]["scorer"]["scorer_params"] == {"threshold": 0.5} + + def test_add_multiple_scorers(self, builder: ScenarioBuilder) -> None: + """Test adding multiple scorers.""" + builder.add_test_scorer("test1", test_command="pytest", weight=1.0) + builder.add_command_scorer("test2", command="./check.sh", weight=2.0) + + assert len(builder._scorers) == 2 + assert builder._scorers[0]["name"] == "test1" + assert builder._scorers[1]["name"] == "test2" + + def test_add_scorer_rejects_zero_weight(self, builder: ScenarioBuilder) -> None: + """Test that adding a scorer with zero weight raises ValueError.""" + with pytest.raises(ValueError, match="Scorer weight must be positive"): + builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0) + + def test_add_scorer_rejects_negative_weight(self, builder: ScenarioBuilder) -> None: + """Test that adding a scorer with negative weight raises ValueError.""" + with pytest.raises(ValueError, match="Scorer weight must be positive"): + builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0) + + def test_with_metadata_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_metadata returns self for chaining.""" + result = builder.with_metadata({"team": "infra"}) + + assert result is builder + assert builder._metadata == {"team": "infra"} + + def test_with_reference_output_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_reference_output returns self for chaining.""" + result = builder.with_reference_output("--- a/file.py\n+++ b/file.py") + + assert result is builder + assert builder._reference_output == "--- a/file.py\n+++ b/file.py" + + def test_with_required_env_vars_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_required_env_vars returns self for chaining.""" + result = builder.with_required_env_vars(["API_KEY", "SECRET"]) + + assert result is builder + assert builder._required_env_vars == ["API_KEY", "SECRET"] + + def test_with_required_secrets_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_required_secrets returns self for chaining.""" + result = builder.with_required_secrets(["db_password"]) + + assert result is builder + assert builder._required_secrets == ["db_password"] + + def test_with_validation_type_returns_self(self, builder: ScenarioBuilder) -> None: + """Test with_validation_type returns self for chaining.""" + result = builder.with_validation_type("FORWARD") + + assert result is builder + assert builder._validation_type == "FORWARD" + + def test_build_params_missing_problem_statement(self, builder: ScenarioBuilder) -> None: + """Test _build_params raises if problem statement is missing.""" + builder.add_test_scorer("test", test_command="pytest") + + with pytest.raises(ValueError, match="Problem statement is required"): + builder._build_params() + + def test_build_params_missing_scorer(self, builder: ScenarioBuilder) -> None: + """Test _build_params raises if no scorers are added.""" + builder.with_problem_statement("Fix the bug") + + with pytest.raises(ValueError, match="At least one scorer is required.*add_test_scorer"): + builder._build_params() + + def test_build_params_minimal(self, builder: ScenarioBuilder) -> None: + """Test _build_params with minimal configuration.""" + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + + params = builder._build_params() + + assert params["name"] == "test-scenario" + assert params["input_context"]["problem_statement"] == "Fix the bug" + assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 + + def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None: + """Test _build_params includes environment parameters.""" + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + builder.from_blueprint_id("bp-123") + builder.with_working_directory("/app") + + params = builder._build_params() + + assert params["environment_parameters"]["blueprint_id"] == "bp-123" + assert params["environment_parameters"]["working_directory"] == "/app" + + def test_build_params_with_all_options(self, builder: ScenarioBuilder) -> None: + """Test _build_params with all optional fields set.""" + builder.with_problem_statement("Fix the bug") + builder.with_additional_context({"hint": "line 42"}) + builder.add_test_scorer("tests", test_command="pytest") + builder.from_blueprint_id("bp-123") + builder.with_working_directory("/app") + builder.with_metadata({"team": "infra"}) + builder.with_reference_output("diff content") + builder.with_required_env_vars(["API_KEY"]) + builder.with_required_secrets(["db_pass"]) + builder.with_validation_type("FORWARD") + + params = builder._build_params() + + assert params["name"] == "test-scenario" + assert params["input_context"]["problem_statement"] == "Fix the bug" + assert params["input_context"]["additional_context"] == {"hint": "line 42"} + assert params["environment_parameters"]["blueprint_id"] == "bp-123" + assert params["environment_parameters"]["working_directory"] == "/app" + assert params["metadata"] == {"team": "infra"} + assert params["reference_output"] == "diff content" + assert params["required_environment_variables"] == ["API_KEY"] + assert params["required_secret_names"] == ["db_pass"] + assert params["validation_type"] == "FORWARD" + + def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None: + """Test that _build_params normalizes scorer weights to sum to 1.0.""" + builder.with_problem_statement("Fix the bug") + builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0) + builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0) + builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0) + + params = builder._build_params() + scorers = params["scoring_contract"]["scoring_function_parameters"] + + # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 + assert len(scorers) == 3 + assert abs(scorers[0]["weight"] - 1 / 6) < 0.0001 + assert abs(scorers[1]["weight"] - 2 / 6) < 0.0001 + assert abs(scorers[2]["weight"] - 3 / 6) < 0.0001 + + # Total should be 1.0 + total = sum(s["weight"] for s in scorers) + assert abs(total - 1.0) < 0.0001 + + def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, mock_client: MagicMock) -> None: + """Test push() calls API with correct params and returns Scenario.""" + mock_client.scenarios.create.return_value.id = "scn-new-123" + + builder.with_problem_statement("Fix the bug") + builder.add_test_scorer("tests", test_command="pytest") + + scenario = builder.push() + + mock_client.scenarios.create.assert_called_once() + call_kwargs = mock_client.scenarios.create.call_args.kwargs + assert call_kwargs["name"] == "test-scenario" + assert call_kwargs["input_context"]["problem_statement"] == "Fix the bug" + + assert scenario.id == "scn-new-123" + + def test_fluent_chaining(self, builder: ScenarioBuilder) -> None: + """Test that all builder methods can be chained fluently.""" + result = ( + builder.from_blueprint_id("bp-123") + .with_working_directory("/app") + .with_problem_statement("Fix the bug") + .with_additional_context({"hint": "check main.py"}) + .add_test_scorer("tests", test_command="pytest") + .with_metadata({"team": "infra"}) + .with_reference_output("diff content") + .with_required_env_vars(["API_KEY"]) + .with_required_secrets(["secret"]) + .with_validation_type("FORWARD") + ) + + assert result is builder + assert builder._blueprint_id == "bp-123" + assert builder._working_directory == "/app" + assert builder._problem_statement == "Fix the bug" + assert len(builder._scorers) == 1 From 853a998f0a16044754fcaba47c23aae4642fe0aa Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Mon, 8 Dec 2025 14:42:14 -0800 Subject: [PATCH 02/31] formatting fix --- src/runloop_api_client/sdk/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 28e13e404..151ce5c73 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -35,8 +35,8 @@ from .snapshot import Snapshot from .blueprint import Blueprint from .storage_object import StorageObject -from ..lib.context_loader import TarFilter, build_directory_tar from .scenario_builder import ScenarioBuilder +from ..lib.context_loader import TarFilter, build_directory_tar from ..types.object_create_params import ContentType from ..types.shared_params.agent_source import Git, Npm, Pip, Object From 1630a87335c8670cf5db8948f03ed7a8913fce4f Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 12:16:46 -0800 Subject: [PATCH 03/31] clean up imports in scenario ops unit tests --- tests/sdk/test_async_ops.py | 9 ++------- tests/sdk/test_ops.py | 10 ++-------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py index b276f29ee..49c4c4006 100644 --- a/tests/sdk/test_async_ops.py +++ b/tests/sdk/test_async_ops.py @@ -24,6 +24,7 @@ AsyncAgent, AsyncDevbox, AsyncScorer, + AsyncScenario, AsyncSnapshot, AsyncBlueprint, AsyncStorageObject, @@ -33,6 +34,7 @@ AsyncDevboxOps, AsyncScorerOps, AsyncRunloopSDK, + AsyncScenarioOps, AsyncSnapshotOps, AsyncBlueprintOps, AsyncStorageObjectOps, @@ -1122,8 +1124,6 @@ class TestAsyncScenarioOps: def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" - from runloop_api_client.sdk import AsyncScenario - from runloop_api_client.sdk.async_ import AsyncScenarioOps ops = AsyncScenarioOps(mock_async_client) scenario = ops.from_id("scn_123") @@ -1134,7 +1134,6 @@ def test_from_id(self, mock_async_client: AsyncMock) -> None: @pytest.mark.asyncio async def test_list_empty(self, mock_async_client: AsyncMock) -> None: """Test list method with empty results.""" - from runloop_api_client.sdk.async_ import AsyncScenarioOps async def async_iter(): return @@ -1151,8 +1150,6 @@ async def async_iter(): @pytest.mark.asyncio async def test_list_single(self, mock_async_client: AsyncMock, scenario_view: MockScenarioView) -> None: """Test list method with single result.""" - from runloop_api_client.sdk import AsyncScenario - from runloop_api_client.sdk.async_ import AsyncScenarioOps async def async_iter(): yield scenario_view @@ -1170,8 +1167,6 @@ async def async_iter(): @pytest.mark.asyncio async def test_list_multiple(self, mock_async_client: AsyncMock) -> None: """Test list method with multiple results.""" - from runloop_api_client.sdk import AsyncScenario - from runloop_api_client.sdk.async_ import AsyncScenarioOps scenario_view1 = MockScenarioView(id="scn_001", name="scenario-1") scenario_view2 = MockScenarioView(id="scn_002", name="scenario-2") diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py index fcca7bcbf..724b2ee17 100644 --- a/tests/sdk/test_ops.py +++ b/tests/sdk/test_ops.py @@ -20,12 +20,13 @@ MockBlueprintView, create_mock_httpx_response, ) -from runloop_api_client.sdk import Agent, Devbox, Scorer, Snapshot, Blueprint, StorageObject +from runloop_api_client.sdk import Agent, Devbox, Scorer, Scenario, Snapshot, Blueprint, StorageObject from runloop_api_client.sdk.sync import ( AgentOps, DevboxOps, ScorerOps, RunloopSDK, + ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps, @@ -1015,8 +1016,6 @@ class TestScenarioOps: def test_from_id(self, mock_client: Mock) -> None: """Test from_id method.""" - from runloop_api_client.sdk import Scenario - from runloop_api_client.sdk.sync import ScenarioOps ops = ScenarioOps(mock_client) scenario = ops.from_id("scn_123") @@ -1026,7 +1025,6 @@ def test_from_id(self, mock_client: Mock) -> None: def test_list_empty(self, mock_client: Mock) -> None: """Test list method with empty results.""" - from runloop_api_client.sdk.sync import ScenarioOps mock_client.scenarios.list.return_value = [] @@ -1038,8 +1036,6 @@ def test_list_empty(self, mock_client: Mock) -> None: def test_list_single(self, mock_client: Mock, scenario_view: MockScenarioView) -> None: """Test list method with single result.""" - from runloop_api_client.sdk import Scenario - from runloop_api_client.sdk.sync import ScenarioOps mock_client.scenarios.list.return_value = [scenario_view] @@ -1053,8 +1049,6 @@ def test_list_single(self, mock_client: Mock, scenario_view: MockScenarioView) - def test_list_multiple(self, mock_client: Mock) -> None: """Test list method with multiple results.""" - from runloop_api_client.sdk import Scenario - from runloop_api_client.sdk.sync import ScenarioOps scenario_view1 = MockScenarioView(id="scn_001", name="scenario-1") scenario_view2 = MockScenarioView(id="scn_002", name="scenario-2") From f3145a9c20e564e9d4f60a88329d9af8a3d70d80 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 13:27:18 -0800 Subject: [PATCH 04/31] use Blueprint and Snapshot objects directly in ScenarioBuilder --- .../sdk/async_scenario_builder.py | 40 ++++++----- .../sdk/scenario_builder.py | 40 ++++++----- tests/sdk/test_async_scenario_builder.py | 42 +++++++---- tests/sdk/test_scenario_builder.py | 70 ++++++++++++------- 4 files changed, 112 insertions(+), 80 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 1d650afe2..cabd51e5b 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -7,6 +7,8 @@ from .._client import AsyncRunloop from .async_scenario import AsyncScenario +from .async_snapshot import AsyncSnapshot +from .async_blueprint import AsyncBlueprint from ..types.scoring_function_param import ( Scorer, ScoringFunctionParam, @@ -28,7 +30,7 @@ class AsyncScenarioBuilder: Example: >>> builder = sdk.scenario.builder("my-scenario") - >>> builder.from_blueprint_id("bp-xxx") + >>> builder.from_blueprint(blueprint) >>> builder.with_working_directory("/app") >>> builder.with_problem_statement("Fix the bug in main.py") >>> builder.add_test_scorer("tests", test_command="pytest") @@ -47,8 +49,8 @@ def __init__(self, client: AsyncRunloop, name: str) -> None: self._name = name # Environment configuration - self._blueprint_id: Optional[str] = None - self._snapshot_id: Optional[str] = None + self._blueprint: Optional[AsyncBlueprint] = None + self._snapshot: Optional[AsyncSnapshot] = None self._working_directory: Optional[str] = None # Input context @@ -78,28 +80,28 @@ def name(self) -> str: """ return self._name - def from_blueprint_id(self, blueprint_id: str) -> Self: - """Set the blueprint ID for the scenario environment. + def from_blueprint(self, blueprint: AsyncBlueprint) -> Self: + """Set the blueprint for the scenario environment. - :param blueprint_id: Blueprint ID to use - :type blueprint_id: str + :param blueprint: Blueprint to use + :type blueprint: AsyncBlueprint :return: Self for method chaining :rtype: Self """ - self._blueprint_id = blueprint_id - self._snapshot_id = None # Clear snapshot if blueprint is set + self._blueprint = blueprint + self._snapshot = None # Clear snapshot if blueprint is set return self - def from_snapshot_id(self, snapshot_id: str) -> Self: - """Set the snapshot ID for the scenario environment. + def from_snapshot(self, snapshot: AsyncSnapshot) -> Self: + """Set the snapshot for the scenario environment. - :param snapshot_id: Snapshot ID to use - :type snapshot_id: str + :param snapshot: Snapshot to use + :type snapshot: AsyncSnapshot :return: Self for method chaining :rtype: Self """ - self._snapshot_id = snapshot_id - self._blueprint_id = None # Clear blueprint if snapshot is set + self._snapshot = snapshot + self._blueprint = None # Clear blueprint if snapshot is set return self def with_working_directory(self, directory: str) -> Self: @@ -415,10 +417,10 @@ def _build_params(self) -> Dict[str, Any]: # Build environment parameters if any are set env_params: Dict[str, Any] = {} - if self._blueprint_id: - env_params["blueprint_id"] = self._blueprint_id - if self._snapshot_id: - env_params["snapshot_id"] = self._snapshot_id + if self._blueprint: + env_params["blueprint_id"] = self._blueprint.id + if self._snapshot: + env_params["snapshot_id"] = self._snapshot.id if self._working_directory: env_params["working_directory"] = self._working_directory diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 49e693113..842980b4f 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -7,6 +7,8 @@ from .._client import Runloop from .scenario import Scenario +from .snapshot import Snapshot +from .blueprint import Blueprint from ..types.scoring_function_param import ( Scorer, ScoringFunctionParam, @@ -28,7 +30,7 @@ class ScenarioBuilder: Example: >>> builder = sdk.scenario.builder("my-scenario") - >>> builder.from_blueprint_id("bp-xxx") + >>> builder.from_blueprint(blueprint) >>> builder.with_working_directory("/app") >>> builder.with_problem_statement("Fix the bug in main.py") >>> builder.add_test_scorer("tests", test_command="pytest") @@ -47,8 +49,8 @@ def __init__(self, client: Runloop, name: str) -> None: self._name = name # Environment configuration - self._blueprint_id: Optional[str] = None - self._snapshot_id: Optional[str] = None + self._blueprint: Optional[Blueprint] = None + self._snapshot: Optional[Snapshot] = None self._working_directory: Optional[str] = None # Input context @@ -78,28 +80,28 @@ def name(self) -> str: """ return self._name - def from_blueprint_id(self, blueprint_id: str) -> Self: - """Set the blueprint ID for the scenario environment. + def from_blueprint(self, blueprint: Blueprint) -> Self: + """Set the blueprint for the scenario environment. - :param blueprint_id: Blueprint ID to use - :type blueprint_id: str + :param blueprint: Blueprint to use + :type blueprint: Blueprint :return: Self for method chaining :rtype: Self """ - self._blueprint_id = blueprint_id - self._snapshot_id = None # Clear snapshot if blueprint is set + self._blueprint = blueprint + self._snapshot = None # Clear snapshot if blueprint is set return self - def from_snapshot_id(self, snapshot_id: str) -> Self: - """Set the snapshot ID for the scenario environment. + def from_snapshot(self, snapshot: Snapshot) -> Self: + """Set the snapshot for the scenario environment. - :param snapshot_id: Snapshot ID to use - :type snapshot_id: str + :param snapshot: Snapshot to use + :type snapshot: Snapshot :return: Self for method chaining :rtype: Self """ - self._snapshot_id = snapshot_id - self._blueprint_id = None # Clear blueprint if snapshot is set + self._snapshot = snapshot + self._blueprint = None # Clear blueprint if snapshot is set return self def with_working_directory(self, directory: str) -> Self: @@ -415,10 +417,10 @@ def _build_params(self) -> Dict[str, Any]: # Build environment parameters if any are set env_params: Dict[str, Any] = {} - if self._blueprint_id: - env_params["blueprint_id"] = self._blueprint_id - if self._snapshot_id: - env_params["snapshot_id"] = self._snapshot_id + if self._blueprint: + env_params["blueprint_id"] = self._blueprint.id + if self._snapshot: + env_params["snapshot_id"] = self._snapshot.id if self._working_directory: env_params["working_directory"] = self._working_directory diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 3ea9d2631..64ed2ba0c 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -6,6 +6,8 @@ import pytest +from runloop_api_client.sdk.async_snapshot import AsyncSnapshot +from runloop_api_client.sdk.async_blueprint import AsyncBlueprint from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder @@ -20,6 +22,16 @@ def mock_async_client(self) -> MagicMock: client.scenarios.create = AsyncMock() return client + @pytest.fixture + def mock_blueprint(self, mock_async_client: MagicMock) -> AsyncBlueprint: + """Create a mock AsyncBlueprint object.""" + return AsyncBlueprint(mock_async_client, "bp-123") + + @pytest.fixture + def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot: + """Create a mock AsyncSnapshot object.""" + return AsyncSnapshot(mock_async_client, "snap-123") + @pytest.fixture def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: """Create an AsyncScenarioBuilder instance with mock client.""" @@ -37,21 +49,21 @@ def test_repr(self, builder: AsyncScenarioBuilder) -> None: """Test builder __repr__.""" assert repr(builder) == "" - def test_from_blueprint_id_returns_self(self, builder: AsyncScenarioBuilder) -> None: - """Test from_blueprint_id returns self for chaining.""" - result = builder.from_blueprint_id("bp-123") + def test_from_blueprint_returns_self(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: + """Test from_blueprint returns self for chaining.""" + result = builder.from_blueprint(mock_blueprint) assert result is builder - assert builder._blueprint_id == "bp-123" - assert builder._snapshot_id is None + assert builder._blueprint is mock_blueprint + assert builder._snapshot is None - def test_from_snapshot_id_returns_self(self, builder: AsyncScenarioBuilder) -> None: - """Test from_snapshot_id returns self for chaining.""" - result = builder.from_snapshot_id("snap-123") + def test_from_snapshot_returns_self(self, builder: AsyncScenarioBuilder, mock_snapshot: AsyncSnapshot) -> None: + """Test from_snapshot returns self for chaining.""" + result = builder.from_snapshot(mock_snapshot) assert result is builder - assert builder._snapshot_id == "snap-123" - assert builder._blueprint_id is None + assert builder._snapshot is mock_snapshot + assert builder._blueprint is None def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None: """Test with_working_directory returns self for chaining.""" @@ -125,11 +137,11 @@ def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None: assert params["input_context"]["problem_statement"] == "Fix the bug" assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 - def test_build_params_with_environment(self, builder: AsyncScenarioBuilder) -> None: + def test_build_params_with_environment(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: """Test _build_params includes environment parameters.""" builder.with_problem_statement("Fix the bug") builder.add_test_scorer("tests", test_command="pytest") - builder.from_blueprint_id("bp-123") + builder.from_blueprint(mock_blueprint) builder.with_working_directory("/app") params = builder._build_params() @@ -156,10 +168,10 @@ async def test_push_calls_api_and_returns_scenario( assert scenario.id == "scn-new-123" - def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None: + def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: """Test that all builder methods can be chained fluently.""" result = ( - builder.from_blueprint_id("bp-123") + builder.from_blueprint(mock_blueprint) .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) @@ -172,7 +184,7 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder) -> None: ) assert result is builder - assert builder._blueprint_id == "bp-123" + assert builder._blueprint is mock_blueprint assert builder._working_directory == "/app" assert builder._problem_statement == "Fix the bug" assert len(builder._scorers) == 1 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index a2941abbc..d0f11bb06 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -6,6 +6,8 @@ import pytest +from runloop_api_client.sdk.snapshot import Snapshot +from runloop_api_client.sdk.blueprint import Blueprint from runloop_api_client.sdk.scenario_builder import ScenarioBuilder from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile @@ -19,6 +21,16 @@ def mock_client(self) -> MagicMock: client = MagicMock() return client + @pytest.fixture + def mock_blueprint(self, mock_client: MagicMock) -> Blueprint: + """Create a mock Blueprint object.""" + return Blueprint(mock_client, "bp-123") + + @pytest.fixture + def mock_snapshot(self, mock_client: MagicMock) -> Snapshot: + """Create a mock Snapshot object.""" + return Snapshot(mock_client, "snap-123") + @pytest.fixture def builder(self, mock_client: MagicMock) -> ScenarioBuilder: """Create a ScenarioBuilder instance with mock client.""" @@ -36,37 +48,41 @@ def test_repr(self, builder: ScenarioBuilder) -> None: """Test builder __repr__.""" assert repr(builder) == "" - def test_from_blueprint_id_returns_self(self, builder: ScenarioBuilder) -> None: - """Test from_blueprint_id returns self for chaining.""" - result = builder.from_blueprint_id("bp-123") + def test_from_blueprint_returns_self(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: + """Test from_blueprint returns self for chaining.""" + result = builder.from_blueprint(mock_blueprint) assert result is builder - assert builder._blueprint_id == "bp-123" - assert builder._snapshot_id is None + assert builder._blueprint is mock_blueprint + assert builder._snapshot is None - def test_from_snapshot_id_returns_self(self, builder: ScenarioBuilder) -> None: - """Test from_snapshot_id returns self for chaining.""" - result = builder.from_snapshot_id("snap-123") + def test_from_snapshot_returns_self(self, builder: ScenarioBuilder, mock_snapshot: Snapshot) -> None: + """Test from_snapshot returns self for chaining.""" + result = builder.from_snapshot(mock_snapshot) assert result is builder - assert builder._snapshot_id == "snap-123" - assert builder._blueprint_id is None + assert builder._snapshot is mock_snapshot + assert builder._blueprint is None - def test_from_blueprint_clears_snapshot(self, builder: ScenarioBuilder) -> None: + def test_from_blueprint_clears_snapshot( + self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot + ) -> None: """Test that setting blueprint clears snapshot.""" - builder.from_snapshot_id("snap-123") - builder.from_blueprint_id("bp-123") + builder.from_snapshot(mock_snapshot) + builder.from_blueprint(mock_blueprint) - assert builder._blueprint_id == "bp-123" - assert builder._snapshot_id is None + assert builder._blueprint is mock_blueprint + assert builder._snapshot is None - def test_from_snapshot_clears_blueprint(self, builder: ScenarioBuilder) -> None: + def test_from_snapshot_clears_blueprint( + self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot + ) -> None: """Test that setting snapshot clears blueprint.""" - builder.from_blueprint_id("bp-123") - builder.from_snapshot_id("snap-123") + builder.from_blueprint(mock_blueprint) + builder.from_snapshot(mock_snapshot) - assert builder._snapshot_id == "snap-123" - assert builder._blueprint_id is None + assert builder._snapshot is mock_snapshot + assert builder._blueprint is None def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None: """Test with_working_directory returns self for chaining.""" @@ -269,11 +285,11 @@ def test_build_params_minimal(self, builder: ScenarioBuilder) -> None: assert params["input_context"]["problem_statement"] == "Fix the bug" assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 - def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None: + def test_build_params_with_environment(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test _build_params includes environment parameters.""" builder.with_problem_statement("Fix the bug") builder.add_test_scorer("tests", test_command="pytest") - builder.from_blueprint_id("bp-123") + builder.from_blueprint(mock_blueprint) builder.with_working_directory("/app") params = builder._build_params() @@ -281,12 +297,12 @@ def test_build_params_with_environment(self, builder: ScenarioBuilder) -> None: assert params["environment_parameters"]["blueprint_id"] == "bp-123" assert params["environment_parameters"]["working_directory"] == "/app" - def test_build_params_with_all_options(self, builder: ScenarioBuilder) -> None: + def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test _build_params with all optional fields set.""" builder.with_problem_statement("Fix the bug") builder.with_additional_context({"hint": "line 42"}) builder.add_test_scorer("tests", test_command="pytest") - builder.from_blueprint_id("bp-123") + builder.from_blueprint(mock_blueprint) builder.with_working_directory("/app") builder.with_metadata({"team": "infra"}) builder.with_reference_output("diff content") @@ -343,10 +359,10 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc assert scenario.id == "scn-new-123" - def test_fluent_chaining(self, builder: ScenarioBuilder) -> None: + def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test that all builder methods can be chained fluently.""" result = ( - builder.from_blueprint_id("bp-123") + builder.from_blueprint(mock_blueprint) .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) @@ -359,7 +375,7 @@ def test_fluent_chaining(self, builder: ScenarioBuilder) -> None: ) assert result is builder - assert builder._blueprint_id == "bp-123" + assert builder._blueprint is mock_blueprint assert builder._working_directory == "/app" assert builder._problem_statement == "Fix the bug" assert len(builder._scorers) == 1 From 01cdb365c873c2aa5fed05828ede633023548a25 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 13:32:47 -0800 Subject: [PATCH 05/31] consolidate from_blueprint and from_snapshot unit tests --- tests/sdk/test_async_scenario_builder.py | 17 +++++++++----- tests/sdk/test_scenario_builder.py | 29 ++++++------------------ 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 64ed2ba0c..03c38d8b2 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -49,22 +49,27 @@ def test_repr(self, builder: AsyncScenarioBuilder) -> None: """Test builder __repr__.""" assert repr(builder) == "" - def test_from_blueprint_returns_self(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: - """Test from_blueprint returns self for chaining.""" + def test_from_blueprint_and_snapshot( + self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot + ) -> None: + """Test blueprint/snapshot setting returns self and are mutually exclusive.""" + # from_blueprint returns self and sets blueprint result = builder.from_blueprint(mock_blueprint) - assert result is builder assert builder._blueprint is mock_blueprint assert builder._snapshot is None - def test_from_snapshot_returns_self(self, builder: AsyncScenarioBuilder, mock_snapshot: AsyncSnapshot) -> None: - """Test from_snapshot returns self for chaining.""" + # from_snapshot returns self, sets snapshot, and clears blueprint result = builder.from_snapshot(mock_snapshot) - assert result is builder assert builder._snapshot is mock_snapshot assert builder._blueprint is None + # from_blueprint clears snapshot + builder.from_blueprint(mock_blueprint) + assert builder._blueprint is mock_blueprint + assert builder._snapshot is None + def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None: """Test with_working_directory returns self for chaining.""" result = builder.with_working_directory("/app") diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index d0f11bb06..224e62b02 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -48,42 +48,27 @@ def test_repr(self, builder: ScenarioBuilder) -> None: """Test builder __repr__.""" assert repr(builder) == "" - def test_from_blueprint_returns_self(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: - """Test from_blueprint returns self for chaining.""" + def test_from_blueprint_and_snapshot( + self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot + ) -> None: + """Test blueprint/snapshot setting returns self and are mutually exclusive.""" + # from_blueprint returns self and sets blueprint result = builder.from_blueprint(mock_blueprint) - assert result is builder assert builder._blueprint is mock_blueprint assert builder._snapshot is None - def test_from_snapshot_returns_self(self, builder: ScenarioBuilder, mock_snapshot: Snapshot) -> None: - """Test from_snapshot returns self for chaining.""" + # from_snapshot returns self, sets snapshot, and clears blueprint result = builder.from_snapshot(mock_snapshot) - assert result is builder assert builder._snapshot is mock_snapshot assert builder._blueprint is None - def test_from_blueprint_clears_snapshot( - self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot - ) -> None: - """Test that setting blueprint clears snapshot.""" - builder.from_snapshot(mock_snapshot) + # from_blueprint clears snapshot builder.from_blueprint(mock_blueprint) - assert builder._blueprint is mock_blueprint assert builder._snapshot is None - def test_from_snapshot_clears_blueprint( - self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot - ) -> None: - """Test that setting snapshot clears blueprint.""" - builder.from_blueprint(mock_blueprint) - builder.from_snapshot(mock_snapshot) - - assert builder._snapshot is mock_snapshot - assert builder._blueprint is None - def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None: """Test with_working_directory returns self for chaining.""" result = builder.with_working_directory("/app") From 1868d9fb697fbdec4c33d95438e4a928faaa173d Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 13:43:07 -0800 Subject: [PATCH 06/31] further consolidate scenario builder unit tests, make sure async coverage matches sync --- tests/sdk/test_async_scenario_builder.py | 169 ++++++++------- tests/sdk/test_scenario_builder.py | 251 +++++------------------ 2 files changed, 153 insertions(+), 267 deletions(-) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 03c38d8b2..792b7c6b6 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -9,6 +9,7 @@ from runloop_api_client.sdk.async_snapshot import AsyncSnapshot from runloop_api_client.sdk.async_blueprint import AsyncBlueprint from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder +from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile class TestAsyncScenarioBuilder: @@ -37,17 +38,14 @@ def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: """Create an AsyncScenarioBuilder instance with mock client.""" return AsyncScenarioBuilder(mock_async_client, "test-scenario") - def test_init(self, mock_async_client: MagicMock) -> None: - """Test builder initialization.""" + def test_instantiation(self, mock_async_client: MagicMock) -> None: + """Test builder initialization and repr.""" builder = AsyncScenarioBuilder(mock_async_client, "my-scenario") assert builder._client is mock_async_client assert builder._name == "my-scenario" assert builder.name == "my-scenario" - - def test_repr(self, builder: AsyncScenarioBuilder) -> None: - """Test builder __repr__.""" - assert repr(builder) == "" + assert repr(builder) == "" def test_from_blueprint_and_snapshot( self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot @@ -70,89 +68,122 @@ def test_from_blueprint_and_snapshot( assert builder._blueprint is mock_blueprint assert builder._snapshot is None - def test_with_working_directory_returns_self(self, builder: AsyncScenarioBuilder) -> None: - """Test with_working_directory returns self for chaining.""" - result = builder.with_working_directory("/app") - - assert result is builder - assert builder._working_directory == "/app" - - def test_with_problem_statement_returns_self(self, builder: AsyncScenarioBuilder) -> None: - """Test with_problem_statement returns self for chaining.""" - result = builder.with_problem_statement("Fix the bug") - - assert result is builder - assert builder._problem_statement == "Fix the bug" - - def test_add_test_scorer(self, builder: AsyncScenarioBuilder) -> None: - """Test add_test_scorer method.""" - result = builder.add_test_scorer( - "my-tests", - test_command="pytest", - weight=2.0, - ) - + def test_scorers(self, builder: AsyncScenarioBuilder) -> None: + """Test all scorer types, optional params, and multiple scorers.""" + # Test scorer with test files + test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ + {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} + ] + result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["name"] == "my-tests" + assert builder._scorers[0]["name"] == "test-scorer" + assert builder._scorers[0]["weight"] == 2.0 assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" - - def test_add_command_scorer(self, builder: AsyncScenarioBuilder) -> None: - """Test add_command_scorer method.""" - result = builder.add_command_scorer( - "cmd-scorer", - command="./check.sh", + assert builder._scorers[0]["scorer"].get("test_command") == "pytest" + assert builder._scorers[0]["scorer"].get("test_files") == test_files + + # Command scorer + builder.add_command_scorer("cmd-scorer", command="./check.sh") + assert builder._scorers[1]["scorer"]["type"] == "command_scorer" + assert builder._scorers[1]["scorer"].get("command") == "./check.sh" + + # Bash scorer + builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'") + assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" + assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" + + # Python scorer with optional params + builder.add_python_scorer( + "python-scorer", + python_script="print('1.0')", + python_version_constraint=">=3.10", + requirements_contents="numpy", ) - - assert result is builder - assert builder._scorers[0]["scorer"]["type"] == "command_scorer" - - def test_add_bash_scorer(self, builder: AsyncScenarioBuilder) -> None: - """Test add_bash_scorer method.""" - result = builder.add_bash_scorer( - "bash-scorer", - bash_script="echo 'score=1.0'", - ) - - assert result is builder - assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer" - - def test_build_params_missing_problem_statement(self, builder: AsyncScenarioBuilder) -> None: - """Test _build_params raises if problem statement is missing.""" + assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer" + assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" + assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" + + # AST grep scorer with optional lang + builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") + assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" + assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" + assert builder._scorers[4]["scorer"].get("lang") == "python" + + # Custom scorer with optional params + builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}) + assert builder._scorers[5]["scorer"]["type"] == "custom_scorer" + assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" + assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} + + # Verify multiple scorers accumulated + assert len(builder._scorers) == 6 + + def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None: + """Test that adding a scorer with zero or negative weight raises ValueError.""" + with pytest.raises(ValueError, match="Scorer weight must be positive"): + builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0) + + with pytest.raises(ValueError, match="Scorer weight must be positive"): + builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0) + + def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None: + """Test _build_params raises for missing required fields.""" + # Missing problem statement builder.add_test_scorer("test", test_command="pytest") - with pytest.raises(ValueError, match="Problem statement is required"): builder._build_params() - def test_build_params_missing_scorer(self, builder: AsyncScenarioBuilder) -> None: - """Test _build_params raises if no scorers are added.""" - builder.with_problem_statement("Fix the bug") - + # Missing scorer (new builder) + builder2 = AsyncScenarioBuilder(builder._client, "test2") + builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): - builder._build_params() + builder2._build_params() - def test_build_params_minimal(self, builder: AsyncScenarioBuilder) -> None: - """Test _build_params with minimal configuration.""" + def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: + """Test _build_params with all optional fields set.""" builder.with_problem_statement("Fix the bug") + builder.with_additional_context({"hint": "line 42"}) builder.add_test_scorer("tests", test_command="pytest") + builder.from_blueprint(mock_blueprint) + builder.with_working_directory("/app") + builder.with_metadata({"team": "infra"}) + builder.with_reference_output("diff content") + builder.with_required_env_vars(["API_KEY"]) + builder.with_required_secrets(["db_pass"]) + builder.with_validation_type("FORWARD") params = builder._build_params() assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" - assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 - - def test_build_params_with_environment(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: - """Test _build_params includes environment parameters.""" + assert params["input_context"]["additional_context"] == {"hint": "line 42"} + assert params["environment_parameters"]["blueprint_id"] == "bp-123" + assert params["environment_parameters"]["working_directory"] == "/app" + assert params["metadata"] == {"team": "infra"} + assert params["reference_output"] == "diff content" + assert params["required_environment_variables"] == ["API_KEY"] + assert params["required_secret_names"] == ["db_pass"] + assert params["validation_type"] == "FORWARD" + + def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None: + """Test that _build_params normalizes scorer weights to sum to 1.0.""" builder.with_problem_statement("Fix the bug") - builder.add_test_scorer("tests", test_command="pytest") - builder.from_blueprint(mock_blueprint) - builder.with_working_directory("/app") + builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0) + builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0) + builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0) params = builder._build_params() + scorers = params["scoring_contract"]["scoring_function_parameters"] - assert params["environment_parameters"]["blueprint_id"] == "bp-123" - assert params["environment_parameters"]["working_directory"] == "/app" + # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 + assert len(scorers) == 3 + assert abs(scorers[0]["weight"] - 1 / 6) < 0.0001 + assert abs(scorers[1]["weight"] - 2 / 6) < 0.0001 + assert abs(scorers[2]["weight"] - 3 / 6) < 0.0001 + + # Total should be 1.0 + total = sum(s["weight"] for s in scorers) + assert abs(total - 1.0) < 0.0001 @pytest.mark.asyncio async def test_push_calls_api_and_returns_scenario( diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 224e62b02..d9d90b67f 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -36,17 +36,14 @@ def builder(self, mock_client: MagicMock) -> ScenarioBuilder: """Create a ScenarioBuilder instance with mock client.""" return ScenarioBuilder(mock_client, "test-scenario") - def test_init(self, mock_client: MagicMock) -> None: - """Test builder initialization.""" + def test_instantiation(self, mock_client: MagicMock) -> None: + """Test builder initialization and repr.""" builder = ScenarioBuilder(mock_client, "my-scenario") assert builder._client is mock_client assert builder._name == "my-scenario" assert builder.name == "my-scenario" - - def test_repr(self, builder: ScenarioBuilder) -> None: - """Test builder __repr__.""" - assert repr(builder) == "" + assert repr(builder) == "" def test_from_blueprint_and_snapshot( self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot @@ -69,218 +66,76 @@ def test_from_blueprint_and_snapshot( assert builder._blueprint is mock_blueprint assert builder._snapshot is None - def test_with_working_directory_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_working_directory returns self for chaining.""" - result = builder.with_working_directory("/app") - - assert result is builder - assert builder._working_directory == "/app" - - def test_with_problem_statement_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_problem_statement returns self for chaining.""" - result = builder.with_problem_statement("Fix the bug") - - assert result is builder - assert builder._problem_statement == "Fix the bug" - - def test_with_additional_context_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_additional_context returns self for chaining.""" - context = {"hint": "Look at line 42"} - result = builder.with_additional_context(context) - - assert result is builder - assert builder._additional_context == context - - def test_add_test_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_test_scorer method.""" - result = builder.add_test_scorer( - "my-tests", - test_command="pytest", - weight=2.0, - ) - - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["name"] == "my-tests" - assert builder._scorers[0]["weight"] == 2.0 - assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" - assert "test_command" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["test_command"] == "pytest" - - def test_add_test_scorer_with_files(self, builder: ScenarioBuilder) -> None: - """Test add_test_scorer with test files.""" + def test_scorers(self, builder: ScenarioBuilder) -> None: + """Test all scorer types, optional params, and multiple scorers.""" + # Test scorer with test files test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_scorer("tests", test_command="pytest", test_files=test_files) - + result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) assert result is builder - assert "test_files" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["test_files"] == test_files - - def test_add_command_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_command_scorer method.""" - result = builder.add_command_scorer( - "cmd-scorer", - command="./check.sh", - ) + assert builder._scorers[0]["name"] == "test-scorer" + assert builder._scorers[0]["weight"] == 2.0 + assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" + assert builder._scorers[0]["scorer"].get("test_command") == "pytest" + assert builder._scorers[0]["scorer"].get("test_files") == test_files - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["scorer"]["type"] == "command_scorer" - assert "command" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["command"] == "./check.sh" - - def test_add_bash_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_bash_scorer method.""" - result = builder.add_bash_scorer( - "bash-scorer", - bash_script="echo 'score=1.0'", - ) + # Command scorer + builder.add_command_scorer("cmd-scorer", command="./check.sh") + assert builder._scorers[1]["scorer"]["type"] == "command_scorer" + assert builder._scorers[1]["scorer"].get("command") == "./check.sh" - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["scorer"]["type"] == "bash_script_scorer" - assert "bash_script" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["bash_script"] == "echo 'score=1.0'" + # Bash scorer + builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'") + assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" + assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" - def test_add_python_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_python_scorer method.""" - result = builder.add_python_scorer( + # Python scorer with optional params + builder.add_python_scorer( "python-scorer", - python_script="print('score=1.0')", + python_script="print('1.0')", python_version_constraint=">=3.10", + requirements_contents="numpy", ) - - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["scorer"]["type"] == "python_script_scorer" - assert builder._scorers[0]["scorer"]["python_script"] == "print('score=1.0')" - assert "python_version_constraint" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["python_version_constraint"] == ">=3.10" - - def test_add_ast_grep_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_ast_grep_scorer method.""" - result = builder.add_ast_grep_scorer( - "ast-scorer", - pattern="$A.foo()", - search_directory="/src", - lang="python", - ) - - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["scorer"]["type"] == "ast_grep_scorer" - assert builder._scorers[0]["scorer"]["pattern"] == "$A.foo()" - assert builder._scorers[0]["scorer"]["search_directory"] == "/src" - assert "lang" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["lang"] == "python" - - def test_add_custom_scorer(self, builder: ScenarioBuilder) -> None: - """Test add_custom_scorer method.""" - result = builder.add_custom_scorer( - "custom-scorer", - custom_scorer_type="my_custom_scorer", - scorer_params={"threshold": 0.5}, - ) - - assert result is builder - assert len(builder._scorers) == 1 - assert builder._scorers[0]["scorer"]["type"] == "custom_scorer" - assert builder._scorers[0]["scorer"]["custom_scorer_type"] == "my_custom_scorer" - assert "scorer_params" in builder._scorers[0]["scorer"] - assert builder._scorers[0]["scorer"]["scorer_params"] == {"threshold": 0.5} - - def test_add_multiple_scorers(self, builder: ScenarioBuilder) -> None: - """Test adding multiple scorers.""" - builder.add_test_scorer("test1", test_command="pytest", weight=1.0) - builder.add_command_scorer("test2", command="./check.sh", weight=2.0) - - assert len(builder._scorers) == 2 - assert builder._scorers[0]["name"] == "test1" - assert builder._scorers[1]["name"] == "test2" - - def test_add_scorer_rejects_zero_weight(self, builder: ScenarioBuilder) -> None: - """Test that adding a scorer with zero weight raises ValueError.""" + assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer" + assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" + assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" + + # AST grep scorer with optional lang + builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") + assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" + assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" + assert builder._scorers[4]["scorer"].get("lang") == "python" + + # Custom scorer with optional params + builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}) + assert builder._scorers[5]["scorer"]["type"] == "custom_scorer" + assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" + assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} + + # Verify multiple scorers accumulated + assert len(builder._scorers) == 6 + + def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None: + """Test that adding a scorer with zero or negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0) - def test_add_scorer_rejects_negative_weight(self, builder: ScenarioBuilder) -> None: - """Test that adding a scorer with negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0) - def test_with_metadata_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_metadata returns self for chaining.""" - result = builder.with_metadata({"team": "infra"}) - - assert result is builder - assert builder._metadata == {"team": "infra"} - - def test_with_reference_output_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_reference_output returns self for chaining.""" - result = builder.with_reference_output("--- a/file.py\n+++ b/file.py") - - assert result is builder - assert builder._reference_output == "--- a/file.py\n+++ b/file.py" - - def test_with_required_env_vars_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_required_env_vars returns self for chaining.""" - result = builder.with_required_env_vars(["API_KEY", "SECRET"]) - - assert result is builder - assert builder._required_env_vars == ["API_KEY", "SECRET"] - - def test_with_required_secrets_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_required_secrets returns self for chaining.""" - result = builder.with_required_secrets(["db_password"]) - - assert result is builder - assert builder._required_secrets == ["db_password"] - - def test_with_validation_type_returns_self(self, builder: ScenarioBuilder) -> None: - """Test with_validation_type returns self for chaining.""" - result = builder.with_validation_type("FORWARD") - - assert result is builder - assert builder._validation_type == "FORWARD" - - def test_build_params_missing_problem_statement(self, builder: ScenarioBuilder) -> None: - """Test _build_params raises if problem statement is missing.""" + def test_build_params_validation(self, builder: ScenarioBuilder) -> None: + """Test _build_params raises for missing required fields.""" + # Missing problem statement builder.add_test_scorer("test", test_command="pytest") - with pytest.raises(ValueError, match="Problem statement is required"): builder._build_params() - def test_build_params_missing_scorer(self, builder: ScenarioBuilder) -> None: - """Test _build_params raises if no scorers are added.""" - builder.with_problem_statement("Fix the bug") - - with pytest.raises(ValueError, match="At least one scorer is required.*add_test_scorer"): - builder._build_params() - - def test_build_params_minimal(self, builder: ScenarioBuilder) -> None: - """Test _build_params with minimal configuration.""" - builder.with_problem_statement("Fix the bug") - builder.add_test_scorer("tests", test_command="pytest") - - params = builder._build_params() - - assert params["name"] == "test-scenario" - assert params["input_context"]["problem_statement"] == "Fix the bug" - assert len(params["scoring_contract"]["scoring_function_parameters"]) == 1 - - def test_build_params_with_environment(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: - """Test _build_params includes environment parameters.""" - builder.with_problem_statement("Fix the bug") - builder.add_test_scorer("tests", test_command="pytest") - builder.from_blueprint(mock_blueprint) - builder.with_working_directory("/app") - - params = builder._build_params() - - assert params["environment_parameters"]["blueprint_id"] == "bp-123" - assert params["environment_parameters"]["working_directory"] == "/app" + # Missing scorer (new builder) + builder2 = ScenarioBuilder(builder._client, "test2") + builder2.with_problem_statement("Fix the bug") + with pytest.raises(ValueError, match="At least one scorer is required"): + builder2._build_params() def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test _build_params with all optional fields set.""" From 9ecbc3d055031a5258cb8b6d6daa83951e319215 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 16:23:37 -0800 Subject: [PATCH 07/31] stricter type declaration for _build_params --- .../sdk/async_scenario_builder.py | 16 +++++++++------- src/runloop_api_client/sdk/scenario_builder.py | 16 +++++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index cabd51e5b..963c1171e 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -2,9 +2,10 @@ from __future__ import annotations -from typing import Any, Dict, List, Iterable, Optional +from typing import Dict, List, Iterable, Optional from typing_extensions import Self, Literal, override +from ..types import ScenarioCreateParams, ScenarioEnvironmentParam from .._client import AsyncRunloop from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot @@ -379,14 +380,14 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD" self._validation_type = validation_type return self - def _build_params(self) -> Dict[str, Any]: + def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. Weights are automatically normalized to sum to 1.0. :raises ValueError: If required fields are missing :return: Parameters for scenario creation - :rtype: Dict[str, Any] + :rtype: ScenarioCreateParams """ if not self._problem_statement: raise ValueError("Problem statement is required. Call with_problem_statement() first.") @@ -399,15 +400,16 @@ def _build_params(self) -> Dict[str, Any]: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + for s in self._scorers: + s["weight"] = s["weight"] / total_weight - params: Dict[str, Any] = { + params: ScenarioCreateParams = { "name": self._name, "input_context": { "problem_statement": self._problem_statement, }, "scoring_contract": { - "scoring_function_parameters": normalized_scorers, + "scoring_function_parameters": self._scorers, }, } @@ -416,7 +418,7 @@ def _build_params(self) -> Dict[str, Any]: params["input_context"]["additional_context"] = self._additional_context # Build environment parameters if any are set - env_params: Dict[str, Any] = {} + env_params: ScenarioEnvironmentParam = {} if self._blueprint: env_params["blueprint_id"] = self._blueprint.id if self._snapshot: diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 842980b4f..505982c7d 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -2,9 +2,10 @@ from __future__ import annotations -from typing import Any, Dict, List, Iterable, Optional +from typing import Dict, List, Iterable, Optional from typing_extensions import Self, Literal, override +from ..types import ScenarioCreateParams, ScenarioEnvironmentParam from .._client import Runloop from .scenario import Scenario from .snapshot import Snapshot @@ -379,14 +380,14 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD" self._validation_type = validation_type return self - def _build_params(self) -> Dict[str, Any]: + def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. Weights are automatically normalized to sum to 1.0. :raises ValueError: If required fields are missing :return: Parameters for scenario creation - :rtype: Dict[str, Any] + :rtype: ScenarioCreateParams """ if not self._problem_statement: raise ValueError("Problem statement is required. Call with_problem_statement() first.") @@ -399,15 +400,16 @@ def _build_params(self) -> Dict[str, Any]: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + for s in self._scorers: + s["weight"] = s["weight"] / total_weight - params: Dict[str, Any] = { + params: ScenarioCreateParams = { "name": self._name, "input_context": { "problem_statement": self._problem_statement, }, "scoring_contract": { - "scoring_function_parameters": normalized_scorers, + "scoring_function_parameters": self._scorers, }, } @@ -416,7 +418,7 @@ def _build_params(self) -> Dict[str, Any]: params["input_context"]["additional_context"] = self._additional_context # Build environment parameters if any are set - env_params: Dict[str, Any] = {} + env_params: ScenarioEnvironmentParam = {} if self._blueprint: env_params["blueprint_id"] = self._blueprint.id if self._snapshot: From 9908844dbedd54ab0005879110a6462f8add4d7d Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 16:41:51 -0800 Subject: [PATCH 08/31] expose request options in push() --- src/runloop_api_client/sdk/async_scenario_builder.py | 8 +++++--- src/runloop_api_client/sdk/scenario_builder.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 963c1171e..9f64993ff 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -3,9 +3,10 @@ from __future__ import annotations from typing import Dict, List, Iterable, Optional -from typing_extensions import Self, Literal, override +from typing_extensions import Self, Unpack, Literal, override from ..types import ScenarioCreateParams, ScenarioEnvironmentParam +from ._types import LongRequestOptions from .._client import AsyncRunloop from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot @@ -443,13 +444,14 @@ def _build_params(self) -> ScenarioCreateParams: return params - async def push(self) -> AsyncScenario: + async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario: """Create the scenario on the platform. + :param options: Optional long-running request configuration :raises ValueError: If required fields are missing :return: Created scenario wrapper :rtype: AsyncScenario """ params = self._build_params() - scenario_view = await self._client.scenarios.create(**params) + scenario_view = await self._client.scenarios.create(**params, **options) return AsyncScenario(self._client, scenario_view.id) diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 505982c7d..295d90f24 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -3,9 +3,10 @@ from __future__ import annotations from typing import Dict, List, Iterable, Optional -from typing_extensions import Self, Literal, override +from typing_extensions import Self, Unpack, Literal, override from ..types import ScenarioCreateParams, ScenarioEnvironmentParam +from ._types import LongRequestOptions from .._client import Runloop from .scenario import Scenario from .snapshot import Snapshot @@ -443,13 +444,14 @@ def _build_params(self) -> ScenarioCreateParams: return params - def push(self) -> Scenario: + def push(self, **options: Unpack[LongRequestOptions]) -> Scenario: """Create the scenario on the platform. + :param options: Optional long-running request configuration :raises ValueError: If required fields are missing :return: Created scenario wrapper :rtype: Scenario """ params = self._build_params() - scenario_view = self._client.scenarios.create(**params) + scenario_view = self._client.scenarios.create(**params, **options) return Scenario(self._client, scenario_view.id) From 9511827827b4b4360abb3340c257d0f9481a9a1a Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 16:56:29 -0800 Subject: [PATCH 09/31] add scenario creation smoketests, with push_or_update logic --- tests/smoketests/sdk/test_async_scenario.py | 154 ++++++++++++++++++-- tests/smoketests/sdk/test_scenario.py | 153 +++++++++++++++++-- 2 files changed, 289 insertions(+), 18 deletions(-) diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py index 1a6a6a6e7..20a23e47e 100644 --- a/tests/smoketests/sdk/test_async_scenario.py +++ b/tests/smoketests/sdk/test_async_scenario.py @@ -4,12 +4,63 @@ import pytest -from runloop_api_client.sdk import AsyncRunloopSDK +from runloop_api_client.sdk import AsyncRunloopSDK, AsyncScenarioBuilder +from tests.smoketests.utils import unique_name +from runloop_api_client.types import ScenarioView +from runloop_api_client.sdk._types import SDKScenarioUpdateParams +from runloop_api_client.sdk._helpers import filter_params pytestmark = [pytest.mark.smoketest] TWO_MINUTE_TIMEOUT = 120 FIVE_MINUTE_TIMEOUT = 300 +TEN_MINUTE_TIMEOUT = 600 + +# Metadata tag for all smoketest scenarios (for easy identification/cleanup) +SMOKETEST_METADATA = {"smoketest": "true"} + + +async def push_or_update_scenario(sdk_client: AsyncRunloopSDK, builder: AsyncScenarioBuilder) -> ScenarioView: + """Push a new scenario or update existing one with the same name. + + This is a workaround until scenario delete endpoint is available. + Uses fixed scenario names to avoid littering the platform with test scenarios. + + When updating an existing scenario, this function will delete the OLD blueprint/snapshot + that's no longer needed (if different from the new one). The NEW blueprint/snapshot + is kept so the scenario remains runnable. + """ + # Check if scenario already exists + scenarios = await sdk_client.scenario.list(name=builder.name, limit=1) + + if scenarios: + # Get old scenario info to find old blueprint/snapshot IDs + scenario = scenarios[0] + old_scenario_info = await scenario.get_info() + old_env = old_scenario_info.environment + old_blueprint_id = old_env.blueprint_id if old_env else None + old_snapshot_id = old_env.snapshot_id if old_env else None + + # Get new blueprint/snapshot IDs from builder + new_blueprint_id = builder._blueprint.id if builder._blueprint else None + new_snapshot_id = builder._snapshot.id if builder._snapshot else None + + # Update existing scenario with builder's params + params = builder._build_params() + result = await scenario.update(**filter_params(params, SDKScenarioUpdateParams)) + + # Delete OLD blueprint/snapshot if they're being replaced + if old_blueprint_id and old_blueprint_id != new_blueprint_id: + await sdk_client.blueprint.from_id(old_blueprint_id).delete() + + if old_snapshot_id and old_snapshot_id != new_snapshot_id: + await sdk_client.snapshot.from_id(old_snapshot_id).delete() + + return result + else: + # Create new scenario - keep the blueprint/snapshot (scenario needs them) + scenario = await builder.push() + return await scenario.get_info() class TestAsyncScenarioRetrieval: @@ -52,7 +103,7 @@ class TestAsyncScenarioRun: """Test async scenario run operations.""" @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) - async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -> None: + async def test_scenario_run_async_lifecycle(self, async_sdk_client: AsyncRunloopSDK) -> None: """Test running a scenario and accessing the devbox. This test: @@ -63,7 +114,7 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) - 5. Cancels the run """ # Find a scenario to run - scenarios = await async_sdk_client.scenario.list(limit=5) + scenarios = await async_sdk_client.scenario.list(limit=1) if not scenarios: pytest.skip("No scenarios available to test run") @@ -72,6 +123,7 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) - # Start a run run = await scenario.run_async(run_name="sdk-smoketest-async-run") + devbox = None try: assert run.id is not None @@ -82,7 +134,8 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) - # Access devbox devbox = run.devbox - assert devbox.id == run.devbox_id + info = await devbox.get_info() + assert info.status == "running" # Get run info info = await run.get_info() @@ -94,13 +147,14 @@ async def test_scenario_run_lifecycle(self, async_sdk_client: AsyncRunloopSDK) - try: await run.cancel() except Exception: - pass # Best effort cleanup + if devbox: + await devbox.shutdown() @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) - async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRunloopSDK) -> None: - """Test run_and_await_env_ready convenience method.""" + async def test_scenario_run(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test run convenience method.""" # Find a scenario to run - scenarios = await async_sdk_client.scenario.list(limit=5) + scenarios = await async_sdk_client.scenario.list(limit=1) if not scenarios: pytest.skip("No scenarios available to test run") @@ -108,6 +162,7 @@ async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRun # Start a run and wait for environment in one call run = await scenario.run(run_name="sdk-smoketest-async-await") + devbox = None try: assert run.id is not None @@ -123,4 +178,85 @@ async def test_scenario_run_and_await_env_ready(self, async_sdk_client: AsyncRun try: await run.cancel() except Exception: - pass + if devbox: + await devbox.shutdown() + + +class TestAsyncScenarioBuilder: + """Test AsyncScenarioBuilder operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test creating/updating a minimal scenario with just problem statement and scorer.""" + builder = ( + async_sdk_client.scenario.builder("sdk-smoketest-async-builder-minimal") + .with_problem_statement("Async minimal test problem statement") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("async-minimal-scorer", command="echo 1.0") + ) + + info = await push_or_update_scenario(async_sdk_client, builder) + + assert info.name == "sdk-smoketest-async-builder-minimal" + assert info.input_context.problem_statement == "Async minimal test problem statement" + assert len(info.scoring_contract.scoring_function_parameters) == 1 + assert info.scoring_contract.scoring_function_parameters[0].name == "async-minimal-scorer" + + @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) + async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test creating/updating a scenario from a blueprint. + """ + blueprint = await async_sdk_client.blueprint.create( + name=unique_name("sdk-smoketest-async-scenario-bp"), + dockerfile="FROM ubuntu:20.04", + ) + + builder = ( + async_sdk_client.scenario.builder("sdk-smoketest-async-builder-blueprint") + .from_blueprint(blueprint) + .with_working_directory("/home/user") + .with_problem_statement("Async blueprint test problem") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("async-blueprint-scorer", command="echo 1.0") + ) + + info = await push_or_update_scenario(async_sdk_client, builder) + + assert info.name == "sdk-smoketest-async-builder-blueprint" + assert info.input_context.problem_statement == "Async blueprint test problem" + assert info.environment is not None + assert info.environment.blueprint_id == blueprint.id + assert info.environment.working_directory == "/home/user" + + @pytest.mark.timeout(TEN_MINUTE_TIMEOUT) + async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunloopSDK) -> None: + """Test creating/updating a scenario from a snapshot. + """ + # Create blueprint -> devbox -> snapshot chain + blueprint = await async_sdk_client.blueprint.create( + name=unique_name("sdk-smoketest-async-scenario-snap-bp"), + dockerfile="FROM ubuntu:20.04", + ) + devbox = await async_sdk_client.devbox.create(blueprint_id=blueprint.id) + snapshot = await devbox.snapshot_disk(name=unique_name("sdk-smoketest-async-scenario-snap")) + + # Shut down the devbox - it's not needed after creating the snapshot + try: + await devbox.shutdown() + except Exception: + pass + + builder = ( + async_sdk_client.scenario.builder("sdk-smoketest-async-builder-snapshot") + .from_snapshot(snapshot) + .with_problem_statement("Async snapshot test problem") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("async-snapshot-scorer", command="echo 1.0") + ) + + info = await push_or_update_scenario(async_sdk_client, builder) + + assert info.name == "sdk-smoketest-async-builder-snapshot" + assert info.input_context.problem_statement == "Async snapshot test problem" + assert info.environment is not None + assert info.environment.snapshot_id == snapshot.id diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py index af8d81486..0e9c82f73 100644 --- a/tests/smoketests/sdk/test_scenario.py +++ b/tests/smoketests/sdk/test_scenario.py @@ -4,12 +4,62 @@ import pytest -from runloop_api_client.sdk import RunloopSDK +from runloop_api_client.sdk import RunloopSDK, ScenarioBuilder +from tests.smoketests.utils import unique_name +from runloop_api_client.types import ScenarioView +from runloop_api_client.sdk._types import SDKScenarioUpdateParams +from runloop_api_client.sdk._helpers import filter_params pytestmark = [pytest.mark.smoketest] TWO_MINUTE_TIMEOUT = 120 FIVE_MINUTE_TIMEOUT = 300 +TEN_MINUTE_TIMEOUT = 600 + +# Metadata tag for all smoketest scenarios (for easy identification/cleanup) +SMOKETEST_METADATA = {"smoketest": "true"} + + +def push_or_update_scenario(sdk_client: RunloopSDK, builder: ScenarioBuilder) -> ScenarioView: + """Push a new scenario or update existing one with the same name. + + This is a workaround until scenario delete endpoint is available. + Uses fixed scenario names to avoid littering the platform with test scenarios. + + When updating an existing scenario, this function will delete the OLD blueprint/snapshot + that's no longer needed (if different from the new one). The NEW blueprint/snapshot + is kept so the scenario remains runnable. + """ + # Check if scenario already exists + scenarios = sdk_client.scenario.list(name=builder.name, limit=1) + + if scenarios: + # Get old scenario info to find old blueprint/snapshot IDs + scenario = scenarios[0] + env = scenario.get_info().environment + old_blueprint_id = env.blueprint_id if env else None + old_snapshot_id = env.snapshot_id if env else None + + # Get new blueprint/snapshot IDs from builder + new_blueprint_id = builder._blueprint.id if builder._blueprint else None + new_snapshot_id = builder._snapshot.id if builder._snapshot else None + + # Update existing scenario with builder's params + params = builder._build_params() + result = scenario.update(**filter_params(params, SDKScenarioUpdateParams)) + + # Delete OLD blueprint/snapshot if they're being replaced + if old_blueprint_id and old_blueprint_id != new_blueprint_id: + sdk_client.blueprint.from_id(old_blueprint_id).delete() + + if old_snapshot_id and old_snapshot_id != new_snapshot_id: + sdk_client.snapshot.from_id(old_snapshot_id).delete() + + return result + else: + # Create new scenario - keep the blueprint/snapshot (scenario needs them) + scenario = builder.push() + return scenario.get_info() class TestScenarioRetrieval: @@ -52,7 +102,7 @@ class TestScenarioRun: """Test scenario run operations.""" @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) - def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None: + def test_scenario_run_async_lifecycle(self, sdk_client: RunloopSDK) -> None: """Test running a scenario and accessing the devbox. This test: @@ -63,7 +113,7 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None: 5. Cancels the run """ # Find a scenario to run - scenarios = sdk_client.scenario.list(limit=5) + scenarios = sdk_client.scenario.list(limit=1) if not scenarios: pytest.skip("No scenarios available to test run") @@ -72,6 +122,7 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None: # Start a run run = scenario.run_async(run_name="sdk-smoketest-run") + devbox = None try: assert run.id is not None @@ -82,7 +133,8 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None: # Access devbox devbox = run.devbox - assert devbox.id == run.devbox_id + info = devbox.get_info() + assert info.status == "running" # Get run info info = run.get_info() @@ -94,13 +146,14 @@ def test_scenario_run_lifecycle(self, sdk_client: RunloopSDK) -> None: try: run.cancel() except Exception: - pass # Best effort cleanup + if devbox: + devbox.shutdown() @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) - def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None: - """Test run_and_await_env_ready convenience method.""" + def test_scenario_run(self, sdk_client: RunloopSDK) -> None: + """Test run convenience method.""" # Find a scenario to run - scenarios = sdk_client.scenario.list(limit=5) + scenarios = sdk_client.scenario.list(limit=1) if not scenarios: pytest.skip("No scenarios available to test run") @@ -108,6 +161,7 @@ def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None: # Start a run and wait for environment in one call run = scenario.run(run_name="sdk-smoketest-await") + devbox = None try: assert run.id is not None @@ -123,4 +177,85 @@ def test_scenario_run_and_await_env_ready(self, sdk_client: RunloopSDK) -> None: try: run.cancel() except Exception: - pass + if devbox: + devbox.shutdown() + + +class TestScenarioBuilder: + """Test ScenarioBuilder operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None: + """Test creating/updating a minimal scenario with just problem statement and scorer.""" + builder = ( + sdk_client.scenario.builder("sdk-smoketest-builder-minimal") + .with_problem_statement("Minimal test problem statement") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("minimal-scorer", command="echo 1.0") + ) + + info = push_or_update_scenario(sdk_client, builder) + + assert info.name == "sdk-smoketest-builder-minimal" + assert info.input_context.problem_statement == "Minimal test problem statement" + assert len(info.scoring_contract.scoring_function_parameters) == 1 + assert info.scoring_contract.scoring_function_parameters[0].name == "minimal-scorer" + + @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) + def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None: + """Test creating/updating a scenario from a blueprint. + """ + blueprint = sdk_client.blueprint.create( + name=unique_name("sdk-smoketest-scenario-bp"), + dockerfile="FROM ubuntu:20.04", + ) + + builder = ( + sdk_client.scenario.builder("sdk-smoketest-builder-blueprint") + .from_blueprint(blueprint) + .with_working_directory("/home/user") + .with_problem_statement("Blueprint test problem") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("blueprint-scorer", command="echo 1.0") + ) + + info = push_or_update_scenario(sdk_client, builder) + + assert info.name == "sdk-smoketest-builder-blueprint" + assert info.input_context.problem_statement == "Blueprint test problem" + assert info.environment is not None + assert info.environment.blueprint_id == blueprint.id + assert info.environment.working_directory == "/home/user" + + @pytest.mark.timeout(TEN_MINUTE_TIMEOUT) + def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None: + """Test creating/updating a scenario from a snapshot. + """ + # Create blueprint -> devbox -> snapshot chain + blueprint = sdk_client.blueprint.create( + name=unique_name("sdk-smoketest-scenario-snap-bp"), + dockerfile="FROM ubuntu:20.04", + ) + devbox = sdk_client.devbox.create(blueprint_id=blueprint.id) + snapshot = devbox.snapshot_disk(name=unique_name("sdk-smoketest-scenario-snap")) + + # Shut down the devbox - it's not needed after creating the snapshot + try: + devbox.shutdown() + except Exception: + pass + + builder = ( + sdk_client.scenario.builder("sdk-smoketest-builder-snapshot") + .from_snapshot(snapshot) + .with_problem_statement("Snapshot test problem") + .with_metadata(SMOKETEST_METADATA) + .add_command_scorer("snapshot-scorer", command="echo 1.0") + ) + + info = push_or_update_scenario(sdk_client, builder) + + assert info.name == "sdk-smoketest-builder-snapshot" + assert info.input_context.problem_statement == "Snapshot test problem" + assert info.environment is not None + assert info.environment.snapshot_id == snapshot.id From c145f3b1d1a263005ad75a79cc91c196bccae327 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 16:58:07 -0800 Subject: [PATCH 10/31] update sdk smoke tests with all ops --- tests/smoketests/sdk/test_async_sdk.py | 5 ++++- tests/smoketests/sdk/test_sdk.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/smoketests/sdk/test_async_sdk.py b/tests/smoketests/sdk/test_async_sdk.py index 49f7e961d..fd8c03ca8 100644 --- a/tests/smoketests/sdk/test_async_sdk.py +++ b/tests/smoketests/sdk/test_async_sdk.py @@ -16,12 +16,15 @@ class TestAsyncRunloopSDKInitialization: @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT) async def test_sdk_instance_creation(self, async_sdk_client: AsyncRunloopSDK) -> None: - """Test that async SDK instance is created successfully with all client properties.""" + """Test that async SDK instance is created successfully with all operations.""" assert async_sdk_client is not None assert async_sdk_client.devbox is not None assert async_sdk_client.blueprint is not None assert async_sdk_client.snapshot is not None assert async_sdk_client.storage_object is not None + assert async_sdk_client.scorer is not None + assert async_sdk_client.agent is not None + assert async_sdk_client.scenario is not None @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT) async def test_legacy_api_access(self, async_sdk_client: AsyncRunloopSDK) -> None: diff --git a/tests/smoketests/sdk/test_sdk.py b/tests/smoketests/sdk/test_sdk.py index b55a98112..f79b88d43 100644 --- a/tests/smoketests/sdk/test_sdk.py +++ b/tests/smoketests/sdk/test_sdk.py @@ -16,12 +16,15 @@ class TestRunloopSDKInitialization: @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT) def test_sdk_instance_creation(self, sdk_client: RunloopSDK) -> None: - """Test that SDK instance is created successfully with all client properties.""" + """Test that SDK instance is created successfully with all operations.""" assert sdk_client is not None assert sdk_client.devbox is not None assert sdk_client.blueprint is not None assert sdk_client.snapshot is not None assert sdk_client.storage_object is not None + assert sdk_client.scorer is not None + assert sdk_client.agent is not None + assert sdk_client.scenario is not None @pytest.mark.timeout(THIRTY_SECOND_TIMEOUT) def test_legacy_api_access(self, sdk_client: RunloopSDK) -> None: From c1e993670cc301e0c6d6b954a492bec05a960d27 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 17:01:45 -0800 Subject: [PATCH 11/31] avoid modifyng _scorers internal state when normalizing weights (create copy instead) --- src/runloop_api_client/sdk/async_scenario_builder.py | 5 ++--- src/runloop_api_client/sdk/scenario_builder.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 9f64993ff..b46fa1abe 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -401,8 +401,7 @@ def _build_params(self) -> ScenarioCreateParams: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - for s in self._scorers: - s["weight"] = s["weight"] / total_weight + normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] params: ScenarioCreateParams = { "name": self._name, @@ -410,7 +409,7 @@ def _build_params(self) -> ScenarioCreateParams: "problem_statement": self._problem_statement, }, "scoring_contract": { - "scoring_function_parameters": self._scorers, + "scoring_function_parameters": normalized_scorers, }, } diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 295d90f24..326f8d033 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -401,8 +401,7 @@ def _build_params(self) -> ScenarioCreateParams: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - for s in self._scorers: - s["weight"] = s["weight"] / total_weight + normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] params: ScenarioCreateParams = { "name": self._name, @@ -410,7 +409,7 @@ def _build_params(self) -> ScenarioCreateParams: "problem_statement": self._problem_statement, }, "scoring_contract": { - "scoring_function_parameters": self._scorers, + "scoring_function_parameters": normalized_scorers, }, } From 259248233da28f380e8416a429e85ab66b79ca54 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 17:02:12 -0800 Subject: [PATCH 12/31] formatting fixes --- src/runloop_api_client/sdk/async_scenario_builder.py | 4 +++- src/runloop_api_client/sdk/scenario_builder.py | 4 +++- tests/smoketests/sdk/test_async_scenario.py | 6 ++---- tests/smoketests/sdk/test_scenario.py | 6 ++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index b46fa1abe..a04e399fd 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -401,7 +401,9 @@ def _build_params(self) -> ScenarioCreateParams: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + normalized_scorers: List[ScoringFunctionParam] = [ + {**s, "weight": s["weight"] / total_weight} for s in self._scorers + ] params: ScenarioCreateParams = { "name": self._name, diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 326f8d033..f76c34561 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -401,7 +401,9 @@ def _build_params(self) -> ScenarioCreateParams: # Normalize weights to sum to 1.0 total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers: List[ScoringFunctionParam] = [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + normalized_scorers: List[ScoringFunctionParam] = [ + {**s, "weight": s["weight"] / total_weight} for s in self._scorers + ] params: ScenarioCreateParams = { "name": self._name, diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py index 20a23e47e..43a3b842b 100644 --- a/tests/smoketests/sdk/test_async_scenario.py +++ b/tests/smoketests/sdk/test_async_scenario.py @@ -204,8 +204,7 @@ async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK) @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunloopSDK) -> None: - """Test creating/updating a scenario from a blueprint. - """ + """Test creating/updating a scenario from a blueprint.""" blueprint = await async_sdk_client.blueprint.create( name=unique_name("sdk-smoketest-async-scenario-bp"), dockerfile="FROM ubuntu:20.04", @@ -230,8 +229,7 @@ async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunl @pytest.mark.timeout(TEN_MINUTE_TIMEOUT) async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunloopSDK) -> None: - """Test creating/updating a scenario from a snapshot. - """ + """Test creating/updating a scenario from a snapshot.""" # Create blueprint -> devbox -> snapshot chain blueprint = await async_sdk_client.blueprint.create( name=unique_name("sdk-smoketest-async-scenario-snap-bp"), diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py index 0e9c82f73..e69ff3f7c 100644 --- a/tests/smoketests/sdk/test_scenario.py +++ b/tests/smoketests/sdk/test_scenario.py @@ -203,8 +203,7 @@ def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None: @pytest.mark.timeout(FIVE_MINUTE_TIMEOUT) def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None: - """Test creating/updating a scenario from a blueprint. - """ + """Test creating/updating a scenario from a blueprint.""" blueprint = sdk_client.blueprint.create( name=unique_name("sdk-smoketest-scenario-bp"), dockerfile="FROM ubuntu:20.04", @@ -229,8 +228,7 @@ def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None: @pytest.mark.timeout(TEN_MINUTE_TIMEOUT) def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None: - """Test creating/updating a scenario from a snapshot. - """ + """Test creating/updating a scenario from a snapshot.""" # Create blueprint -> devbox -> snapshot chain blueprint = sdk_client.blueprint.create( name=unique_name("sdk-smoketest-scenario-snap-bp"), From 87501370caae5d1a31397274863cd65fcc21d4d8 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 17:45:56 -0800 Subject: [PATCH 13/31] update builder docstrings to use fluent pattern, replaced all references to RunloopSDK() to 'runloop' instead of 'sdk' or 'client' --- README.md | 4 +- src/runloop_api_client/sdk/async_.py | 10 +++-- .../sdk/async_scenario_builder.py | 12 +++--- .../sdk/scenario_builder.py | 12 +++--- src/runloop_api_client/sdk/sync.py | 10 +++-- tests/sdk/test_async_ops.py | 34 ++++++++--------- tests/sdk/test_ops.py | 38 +++++++++---------- tests/smoketests/sdk/conftest.py | 14 +++---- 8 files changed, 71 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 180f822bc..14e808a10 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,10 @@ For a higher-level, Pythonic interface, check out the new [`RunloopSDK`](README- ```python from runloop_api_client import RunloopSDK -sdk = RunloopSDK() # Uses RUNLOOP_API_KEY environment variable by default +runloop = RunloopSDK() # Uses RUNLOOP_API_KEY environment variable by default # Create a devbox and execute commands with a clean, object-oriented interface -with sdk.devbox.create(name="my-devbox") as devbox: +with runloop.devbox.create(name="my-devbox") as devbox: result = devbox.cmd.exec("echo 'Hello from Runloop!'") print(result.stdout()) ``` diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index a3ef1ffe5..558af33ba 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -776,10 +776,12 @@ class AsyncScenarioOps: >>> scenarios = await runloop.scenario.list() Example using builder: - >>> builder = runloop.scenario.builder("my-scenario") - >>> builder.from_blueprint_id("bp-xxx") - >>> builder.with_problem_statement("Fix the bug") - >>> builder.add_test_scorer("tests", test_command="pytest") + >>> builder = ( + ... runloop.scenario.builder("my-scenario") + ... .from_blueprint(blueprint) + ... .with_problem_statement("Fix the bug") + ... .add_test_scorer("tests", test_command="pytest") + ... ) >>> scenario = await builder.push() """ diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index a04e399fd..d83210a8e 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -31,11 +31,13 @@ class AsyncScenarioBuilder: before pushing it to the platform. Example: - >>> builder = sdk.scenario.builder("my-scenario") - >>> builder.from_blueprint(blueprint) - >>> builder.with_working_directory("/app") - >>> builder.with_problem_statement("Fix the bug in main.py") - >>> builder.add_test_scorer("tests", test_command="pytest") + >>> builder = ( + ... runloop.scenario.builder("my-scenario") + ... .from_blueprint(blueprint) + ... .with_working_directory("/app") + ... .with_problem_statement("Fix the bug in main.py") + ... .add_test_scorer("tests", test_command="pytest") + ... ) >>> scenario = await builder.push() """ diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index f76c34561..3c17b8e31 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -31,11 +31,13 @@ class ScenarioBuilder: before pushing it to the platform. Example: - >>> builder = sdk.scenario.builder("my-scenario") - >>> builder.from_blueprint(blueprint) - >>> builder.with_working_directory("/app") - >>> builder.with_problem_statement("Fix the bug in main.py") - >>> builder.add_test_scorer("tests", test_command="pytest") + >>> builder = ( + ... runloop.scenario.builder("my-scenario") + ... .from_blueprint(blueprint) + ... .with_working_directory("/app") + ... .with_problem_statement("Fix the bug in main.py") + ... .add_test_scorer("tests", test_command="pytest") + ... ) >>> scenario = builder.push() """ diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 151ce5c73..1252710c4 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -797,10 +797,12 @@ class ScenarioOps: >>> scenarios = runloop.scenario.list() Example using builder: - >>> builder = runloop.scenario.builder("my-scenario") - >>> builder.from_blueprint_id("bp-xxx") - >>> builder.with_problem_statement("Fix the bug") - >>> builder.add_test_scorer("tests", test_command="pytest") + >>> builder = ( + ... runloop.scenario.builder("my-scenario") + ... .from_blueprint(blueprint) + ... .with_problem_statement("Fix the bug") + ... .add_test_scorer("tests", test_command="pytest") + ... ) >>> scenario = builder.push() """ diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py index 49c4c4006..9fca5bb4b 100644 --- a/tests/sdk/test_async_ops.py +++ b/tests/sdk/test_async_ops.py @@ -1193,33 +1193,33 @@ class TestAsyncRunloopSDK: def test_init(self) -> None: """Test AsyncRunloopSDK initialization.""" - sdk = AsyncRunloopSDK(bearer_token="test-token") - assert sdk.api is not None - assert isinstance(sdk.agent, AsyncAgentOps) - assert isinstance(sdk.devbox, AsyncDevboxOps) - assert isinstance(sdk.scorer, AsyncScorerOps) - assert isinstance(sdk.snapshot, AsyncSnapshotOps) - assert isinstance(sdk.blueprint, AsyncBlueprintOps) - assert isinstance(sdk.storage_object, AsyncStorageObjectOps) + runloop = AsyncRunloopSDK(bearer_token="test-token") + assert runloop.api is not None + assert isinstance(runloop.agent, AsyncAgentOps) + assert isinstance(runloop.devbox, AsyncDevboxOps) + assert isinstance(runloop.scorer, AsyncScorerOps) + assert isinstance(runloop.snapshot, AsyncSnapshotOps) + assert isinstance(runloop.blueprint, AsyncBlueprintOps) + assert isinstance(runloop.storage_object, AsyncStorageObjectOps) @pytest.mark.asyncio async def test_aclose(self) -> None: """Test aclose method.""" - sdk = AsyncRunloopSDK(bearer_token="test-token") + runloop = AsyncRunloopSDK(bearer_token="test-token") # Verify aclose doesn't raise - await sdk.aclose() + await runloop.aclose() @pytest.mark.asyncio async def test_context_manager(self) -> None: """Test context manager behavior.""" - async with AsyncRunloopSDK(bearer_token="test-token") as sdk: - assert sdk.api is not None + async with AsyncRunloopSDK(bearer_token="test-token") as runloop: + assert runloop.api is not None # Verify context manager properly closes (implementation detail of context manager protocol) def test_api_property(self) -> None: """Test api property access.""" - sdk = AsyncRunloopSDK(bearer_token="test-token") - assert sdk.api is not None - assert hasattr(sdk.api, "devboxes") - assert hasattr(sdk.api, "blueprints") - assert hasattr(sdk.api, "objects") + runloop = AsyncRunloopSDK(bearer_token="test-token") + assert runloop.api is not None + assert hasattr(runloop.api, "devboxes") + assert hasattr(runloop.api, "blueprints") + assert hasattr(runloop.api, "objects") diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py index 724b2ee17..f7a566265 100644 --- a/tests/sdk/test_ops.py +++ b/tests/sdk/test_ops.py @@ -1070,36 +1070,36 @@ class TestRunloopSDK: def test_init(self) -> None: """Test RunloopSDK initialization.""" - sdk = RunloopSDK(bearer_token="test-token") - assert sdk.api is not None - assert isinstance(sdk.agent, AgentOps) - assert isinstance(sdk.devbox, DevboxOps) - assert isinstance(sdk.scorer, ScorerOps) - assert isinstance(sdk.snapshot, SnapshotOps) - assert isinstance(sdk.blueprint, BlueprintOps) - assert isinstance(sdk.storage_object, StorageObjectOps) + runloop = RunloopSDK(bearer_token="test-token") + assert runloop.api is not None + assert isinstance(runloop.agent, AgentOps) + assert isinstance(runloop.devbox, DevboxOps) + assert isinstance(runloop.scorer, ScorerOps) + assert isinstance(runloop.snapshot, SnapshotOps) + assert isinstance(runloop.blueprint, BlueprintOps) + assert isinstance(runloop.storage_object, StorageObjectOps) def test_init_with_max_retries(self) -> None: """Test RunloopSDK initialization with max_retries.""" - sdk = RunloopSDK(bearer_token="test-token", max_retries=3) - assert sdk.api is not None + runloop = RunloopSDK(bearer_token="test-token", max_retries=3) + assert runloop.api is not None def test_close(self) -> None: """Test close method.""" - sdk = RunloopSDK(bearer_token="test-token") + runloop = RunloopSDK(bearer_token="test-token") # Verify close doesn't raise - sdk.close() + runloop.close() def test_context_manager(self) -> None: """Test context manager behavior.""" - with RunloopSDK(bearer_token="test-token") as sdk: - assert sdk.api is not None + with RunloopSDK(bearer_token="test-token") as runloop: + assert runloop.api is not None # Verify context manager properly closes (implementation detail of context manager protocol) def test_api_property(self) -> None: """Test api property access.""" - sdk = RunloopSDK(bearer_token="test-token") - assert sdk.api is not None - assert hasattr(sdk.api, "devboxes") - assert hasattr(sdk.api, "blueprints") - assert hasattr(sdk.api, "objects") + runloop = RunloopSDK(bearer_token="test-token") + assert runloop.api is not None + assert hasattr(runloop.api, "devboxes") + assert hasattr(runloop.api, "blueprints") + assert hasattr(runloop.api, "objects") diff --git a/tests/smoketests/sdk/conftest.py b/tests/smoketests/sdk/conftest.py index 003b0f314..b17a4cc1c 100644 --- a/tests/smoketests/sdk/conftest.py +++ b/tests/smoketests/sdk/conftest.py @@ -24,16 +24,16 @@ def sdk_client() -> Iterator[RunloopSDK]: if not bearer_token: pytest.skip("RUNLOOP_API_KEY environment variable not set") - client = RunloopSDK( + runloop = RunloopSDK( bearer_token=bearer_token, base_url=base_url, ) try: - yield client + yield runloop finally: try: - client.close() + runloop.close() except Exception: pass @@ -52,17 +52,17 @@ async def async_sdk_client() -> AsyncIterator[AsyncRunloopSDK]: if not bearer_token: pytest.skip("RUNLOOP_API_KEY environment variable not set") - client = AsyncRunloopSDK( + runloop = AsyncRunloopSDK( bearer_token=bearer_token, base_url=base_url, ) try: - async with client: - yield client + async with runloop: + yield runloop except Exception: # If context manager fails, try manual cleanup try: - await client.aclose() + await runloop.aclose() except Exception: pass From b2084a9beda3a0579df5522eece67056448f8cc1 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 17:59:35 -0800 Subject: [PATCH 14/31] clarify from_blueprint and from_snapshot docstrings --- src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++-- src/runloop_api_client/sdk/scenario_builder.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index d83210a8e..4138cbb32 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -85,7 +85,7 @@ def name(self) -> str: return self._name def from_blueprint(self, blueprint: AsyncBlueprint) -> Self: - """Set the blueprint for the scenario environment. + """Set a blueprint to define the baseline environment for the scenario. :param blueprint: Blueprint to use :type blueprint: AsyncBlueprint @@ -97,7 +97,7 @@ def from_blueprint(self, blueprint: AsyncBlueprint) -> Self: return self def from_snapshot(self, snapshot: AsyncSnapshot) -> Self: - """Set the snapshot for the scenario environment. + """Set a snapshot to define the baseline environment for the scenario. :param snapshot: Snapshot to use :type snapshot: AsyncSnapshot diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 3c17b8e31..17951f280 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -85,7 +85,7 @@ def name(self) -> str: return self._name def from_blueprint(self, blueprint: Blueprint) -> Self: - """Set the blueprint for the scenario environment. + """Set a blueprint to define the baseline environment for the scenario. :param blueprint: Blueprint to use :type blueprint: Blueprint @@ -97,7 +97,7 @@ def from_blueprint(self, blueprint: Blueprint) -> Self: return self def from_snapshot(self, snapshot: Snapshot) -> Self: - """Set the snapshot for the scenario environment. + """Set a snapshot to define the baseline environment for the scenario. :param snapshot: Snapshot to use :type snapshot: Snapshot From 6132ca8b922d26483236b6b1cc0becf68dcbcf2d Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:06:36 -0800 Subject: [PATCH 15/31] rename add_scorer methods to be more clear --- .../sdk/async_scenario_builder.py | 8 +++--- .../sdk/scenario_builder.py | 8 +++--- tests/sdk/test_async_scenario_builder.py | 26 +++++++++---------- tests/sdk/test_scenario_builder.py | 26 +++++++++---------- tests/smoketests/sdk/test_async_scenario.py | 6 ++--- tests/smoketests/sdk/test_scenario.py | 6 ++--- 6 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 4138cbb32..5bc04eb0a 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -151,7 +151,7 @@ def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self: self._scorers.append({"name": name, "weight": weight, "scorer": scorer}) return self - def add_test_scorer( + def add_test_command_scorer( self, name: str, *, @@ -180,7 +180,7 @@ def add_test_scorer( scorer["test_files"] = test_files return self._add_scorer(name, weight, scorer) - def add_command_scorer( + def add_shell_command_scorer( self, name: str, *, @@ -204,7 +204,7 @@ def add_command_scorer( } return self._add_scorer(name, weight, scorer) - def add_bash_scorer( + def add_bash_script_scorer( self, name: str, *, @@ -230,7 +230,7 @@ def add_bash_scorer( } return self._add_scorer(name, weight, scorer) - def add_python_scorer( + def add_python_script_scorer( self, name: str, *, diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 17951f280..0ac06e31a 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -151,7 +151,7 @@ def _add_scorer(self, name: str, weight: float, scorer: Scorer) -> Self: self._scorers.append({"name": name, "weight": weight, "scorer": scorer}) return self - def add_test_scorer( + def add_test_command_scorer( self, name: str, *, @@ -180,7 +180,7 @@ def add_test_scorer( scorer["test_files"] = test_files return self._add_scorer(name, weight, scorer) - def add_command_scorer( + def add_shell_command_scorer( self, name: str, *, @@ -204,7 +204,7 @@ def add_command_scorer( } return self._add_scorer(name, weight, scorer) - def add_bash_scorer( + def add_bash_script_scorer( self, name: str, *, @@ -230,7 +230,7 @@ def add_bash_scorer( } return self._add_scorer(name, weight, scorer) - def add_python_scorer( + def add_python_script_scorer( self, name: str, *, diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 792b7c6b6..236aab520 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -74,7 +74,7 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None: test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) + result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) assert result is builder assert builder._scorers[0]["name"] == "test-scorer" assert builder._scorers[0]["weight"] == 2.0 @@ -83,17 +83,17 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None: assert builder._scorers[0]["scorer"].get("test_files") == test_files # Command scorer - builder.add_command_scorer("cmd-scorer", command="./check.sh") + builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") assert builder._scorers[1]["scorer"]["type"] == "command_scorer" assert builder._scorers[1]["scorer"].get("command") == "./check.sh" # Bash scorer - builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'") + builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" # Python scorer with optional params - builder.add_python_scorer( + builder.add_python_script_scorer( "python-scorer", python_script="print('1.0')", python_version_constraint=">=3.10", @@ -121,15 +121,15 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None: def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None: """Test that adding a scorer with zero or negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0) + builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0) + builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None: """Test _build_params raises for missing required fields.""" # Missing problem statement - builder.add_test_scorer("test", test_command="pytest") + builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): builder._build_params() @@ -143,7 +143,7 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock """Test _build_params with all optional fields set.""" builder.with_problem_statement("Fix the bug") builder.with_additional_context({"hint": "line 42"}) - builder.add_test_scorer("tests", test_command="pytest") + builder.add_test_command_scorer("tests", test_command="pytest") builder.from_blueprint(mock_blueprint) builder.with_working_directory("/app") builder.with_metadata({"team": "infra"}) @@ -168,9 +168,9 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" builder.with_problem_statement("Fix the bug") - builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0) - builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0) - builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0) + builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) params = builder._build_params() scorers = params["scoring_contract"]["scoring_function_parameters"] @@ -193,7 +193,7 @@ async def test_push_calls_api_and_returns_scenario( mock_async_client.scenarios.create.return_value.id = "scn-new-123" builder.with_problem_statement("Fix the bug") - builder.add_test_scorer("tests", test_command="pytest") + builder.add_test_command_scorer("tests", test_command="pytest") scenario = await builder.push() @@ -211,7 +211,7 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: As .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) - .add_test_scorer("tests", test_command="pytest") + .add_test_command_scorer("tests", test_command="pytest") .with_metadata({"team": "infra"}) .with_reference_output("diff content") .with_required_env_vars(["API_KEY"]) diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index d9d90b67f..922fd6020 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -72,7 +72,7 @@ def test_scorers(self, builder: ScenarioBuilder) -> None: test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) + result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) assert result is builder assert builder._scorers[0]["name"] == "test-scorer" assert builder._scorers[0]["weight"] == 2.0 @@ -81,17 +81,17 @@ def test_scorers(self, builder: ScenarioBuilder) -> None: assert builder._scorers[0]["scorer"].get("test_files") == test_files # Command scorer - builder.add_command_scorer("cmd-scorer", command="./check.sh") + builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") assert builder._scorers[1]["scorer"]["type"] == "command_scorer" assert builder._scorers[1]["scorer"].get("command") == "./check.sh" # Bash scorer - builder.add_bash_scorer("bash-scorer", bash_script="echo 'score=1.0'") + builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" # Python scorer with optional params - builder.add_python_scorer( + builder.add_python_script_scorer( "python-scorer", python_script="print('1.0')", python_version_constraint=">=3.10", @@ -119,15 +119,15 @@ def test_scorers(self, builder: ScenarioBuilder) -> None: def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None: """Test that adding a scorer with zero or negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_scorer("bad", bash_script="echo 1", weight=0.0) + builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_scorer("bad", bash_script="echo 1", weight=-1.0) + builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) def test_build_params_validation(self, builder: ScenarioBuilder) -> None: """Test _build_params raises for missing required fields.""" # Missing problem statement - builder.add_test_scorer("test", test_command="pytest") + builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): builder._build_params() @@ -141,7 +141,7 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue """Test _build_params with all optional fields set.""" builder.with_problem_statement("Fix the bug") builder.with_additional_context({"hint": "line 42"}) - builder.add_test_scorer("tests", test_command="pytest") + builder.add_test_command_scorer("tests", test_command="pytest") builder.from_blueprint(mock_blueprint) builder.with_working_directory("/app") builder.with_metadata({"team": "infra"}) @@ -166,9 +166,9 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" builder.with_problem_statement("Fix the bug") - builder.add_bash_scorer("scorer1", bash_script="echo 1", weight=1.0) - builder.add_bash_scorer("scorer2", bash_script="echo 2", weight=2.0) - builder.add_bash_scorer("scorer3", bash_script="echo 3", weight=3.0) + builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) params = builder._build_params() scorers = params["scoring_contract"]["scoring_function_parameters"] @@ -188,7 +188,7 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc mock_client.scenarios.create.return_value.id = "scn-new-123" builder.with_problem_statement("Fix the bug") - builder.add_test_scorer("tests", test_command="pytest") + builder.add_test_command_scorer("tests", test_command="pytest") scenario = builder.push() @@ -206,7 +206,7 @@ def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Bluepri .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) - .add_test_scorer("tests", test_command="pytest") + .add_test_command_scorer("tests", test_command="pytest") .with_metadata({"team": "infra"}) .with_reference_output("diff content") .with_required_env_vars(["API_KEY"]) diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py index 43a3b842b..32359f957 100644 --- a/tests/smoketests/sdk/test_async_scenario.py +++ b/tests/smoketests/sdk/test_async_scenario.py @@ -192,7 +192,7 @@ async def test_scenario_builder_minimal(self, async_sdk_client: AsyncRunloopSDK) async_sdk_client.scenario.builder("sdk-smoketest-async-builder-minimal") .with_problem_statement("Async minimal test problem statement") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("async-minimal-scorer", command="echo 1.0") + .add_shell_command_scorer("async-minimal-scorer", command="echo 1.0") ) info = await push_or_update_scenario(async_sdk_client, builder) @@ -216,7 +216,7 @@ async def test_scenario_builder_with_blueprint(self, async_sdk_client: AsyncRunl .with_working_directory("/home/user") .with_problem_statement("Async blueprint test problem") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("async-blueprint-scorer", command="echo 1.0") + .add_shell_command_scorer("async-blueprint-scorer", command="echo 1.0") ) info = await push_or_update_scenario(async_sdk_client, builder) @@ -249,7 +249,7 @@ async def test_scenario_builder_with_snapshot(self, async_sdk_client: AsyncRunlo .from_snapshot(snapshot) .with_problem_statement("Async snapshot test problem") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("async-snapshot-scorer", command="echo 1.0") + .add_shell_command_scorer("async-snapshot-scorer", command="echo 1.0") ) info = await push_or_update_scenario(async_sdk_client, builder) diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py index e69ff3f7c..0b23d6b24 100644 --- a/tests/smoketests/sdk/test_scenario.py +++ b/tests/smoketests/sdk/test_scenario.py @@ -191,7 +191,7 @@ def test_scenario_builder_minimal(self, sdk_client: RunloopSDK) -> None: sdk_client.scenario.builder("sdk-smoketest-builder-minimal") .with_problem_statement("Minimal test problem statement") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("minimal-scorer", command="echo 1.0") + .add_shell_command_scorer("minimal-scorer", command="echo 1.0") ) info = push_or_update_scenario(sdk_client, builder) @@ -215,7 +215,7 @@ def test_scenario_builder_with_blueprint(self, sdk_client: RunloopSDK) -> None: .with_working_directory("/home/user") .with_problem_statement("Blueprint test problem") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("blueprint-scorer", command="echo 1.0") + .add_shell_command_scorer("blueprint-scorer", command="echo 1.0") ) info = push_or_update_scenario(sdk_client, builder) @@ -248,7 +248,7 @@ def test_scenario_builder_with_snapshot(self, sdk_client: RunloopSDK) -> None: .from_snapshot(snapshot) .with_problem_statement("Snapshot test problem") .with_metadata(SMOKETEST_METADATA) - .add_command_scorer("snapshot-scorer", command="echo 1.0") + .add_shell_command_scorer("snapshot-scorer", command="echo 1.0") ) info = push_or_update_scenario(sdk_client, builder) From e8d024d9a91cd154a3f4fe2a7ae50d4a0851bf83 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:07:11 -0800 Subject: [PATCH 16/31] format fix --- tests/sdk/test_async_scenario_builder.py | 4 +++- tests/sdk/test_scenario_builder.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 236aab520..f27425c35 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -74,7 +74,9 @@ def test_scorers(self, builder: AsyncScenarioBuilder) -> None: test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) + result = builder.add_test_command_scorer( + "test-scorer", test_command="pytest", weight=2.0, test_files=test_files + ) assert result is builder assert builder._scorers[0]["name"] == "test-scorer" assert builder._scorers[0]["weight"] == 2.0 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 922fd6020..0e65d84de 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -72,7 +72,9 @@ def test_scorers(self, builder: ScenarioBuilder) -> None: test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_command_scorer("test-scorer", test_command="pytest", weight=2.0, test_files=test_files) + result = builder.add_test_command_scorer( + "test-scorer", test_command="pytest", weight=2.0, test_files=test_files + ) assert result is builder assert builder._scorers[0]["name"] == "test-scorer" assert builder._scorers[0]["weight"] == 2.0 From a70af1b269e93a878ee83a7d8f125f57f6558f70 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:16:06 -0800 Subject: [PATCH 17/31] address type check errors in scenario builder unit tests --- tests/sdk/test_async_scenario_builder.py | 20 +++++++++++--------- tests/sdk/test_scenario_builder.py | 20 +++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index f27425c35..3491fff66 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -158,14 +158,16 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" - assert params["input_context"]["additional_context"] == {"hint": "line 42"} - assert params["environment_parameters"]["blueprint_id"] == "bp-123" - assert params["environment_parameters"]["working_directory"] == "/app" - assert params["metadata"] == {"team": "infra"} - assert params["reference_output"] == "diff content" - assert params["required_environment_variables"] == ["API_KEY"] - assert params["required_secret_names"] == ["db_pass"] - assert params["validation_type"] == "FORWARD" + assert params["input_context"].get("additional_context") == {"hint": "line 42"} + env_params = params.get("environment_parameters") + assert env_params is not None + assert env_params.get("blueprint_id") == "bp-123" + assert env_params.get("working_directory") == "/app" + assert params.get("metadata") == {"team": "infra"} + assert params.get("reference_output") == "diff content" + assert params.get("required_environment_variables") == ["API_KEY"] + assert params.get("required_secret_names") == ["db_pass"] + assert params.get("validation_type") == "FORWARD" def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" @@ -175,7 +177,7 @@ def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) params = builder._build_params() - scorers = params["scoring_contract"]["scoring_function_parameters"] + scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 assert len(scorers) == 3 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 0e65d84de..d9aee9fb1 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -156,14 +156,16 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" - assert params["input_context"]["additional_context"] == {"hint": "line 42"} - assert params["environment_parameters"]["blueprint_id"] == "bp-123" - assert params["environment_parameters"]["working_directory"] == "/app" - assert params["metadata"] == {"team": "infra"} - assert params["reference_output"] == "diff content" - assert params["required_environment_variables"] == ["API_KEY"] - assert params["required_secret_names"] == ["db_pass"] - assert params["validation_type"] == "FORWARD" + assert params["input_context"].get("additional_context") == {"hint": "line 42"} + env_params = params.get("environment_parameters") + assert env_params is not None + assert env_params.get("blueprint_id") == "bp-123" + assert env_params.get("working_directory") == "/app" + assert params.get("metadata") == {"team": "infra"} + assert params.get("reference_output") == "diff content" + assert params.get("required_environment_variables") == ["API_KEY"] + assert params.get("required_secret_names") == ["db_pass"] + assert params.get("validation_type") == "FORWARD" def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" @@ -173,7 +175,7 @@ def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) params = builder._build_params() - scorers = params["scoring_contract"]["scoring_function_parameters"] + scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 assert len(scorers) == 3 From 1242c7f57a0dbb9f4435310a404579b7fe5b8a6e Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:25:14 -0800 Subject: [PATCH 18/31] rename add_scorer methods in docstrings --- src/runloop_api_client/sdk/async_.py | 2 +- src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++-- src/runloop_api_client/sdk/scenario_builder.py | 4 ++-- src/runloop_api_client/sdk/sync.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 558af33ba..a75185594 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -780,7 +780,7 @@ class AsyncScenarioOps: ... runloop.scenario.builder("my-scenario") ... .from_blueprint(blueprint) ... .with_problem_statement("Fix the bug") - ... .add_test_scorer("tests", test_command="pytest") + ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> scenario = await builder.push() """ diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 5bc04eb0a..b785422ab 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -36,7 +36,7 @@ class AsyncScenarioBuilder: ... .from_blueprint(blueprint) ... .with_working_directory("/app") ... .with_problem_statement("Fix the bug in main.py") - ... .add_test_scorer("tests", test_command="pytest") + ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> scenario = await builder.push() """ @@ -398,7 +398,7 @@ def _build_params(self) -> ScenarioCreateParams: if not self._scorers: raise ValueError( "At least one scorer is required. " - "Call add_test_scorer(), add_bash_scorer(), or another scorer method first." + "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first." ) # Normalize weights to sum to 1.0 diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 0ac06e31a..86527a690 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -36,7 +36,7 @@ class ScenarioBuilder: ... .from_blueprint(blueprint) ... .with_working_directory("/app") ... .with_problem_statement("Fix the bug in main.py") - ... .add_test_scorer("tests", test_command="pytest") + ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> scenario = builder.push() """ @@ -398,7 +398,7 @@ def _build_params(self) -> ScenarioCreateParams: if not self._scorers: raise ValueError( "At least one scorer is required. " - "Call add_test_scorer(), add_bash_scorer(), or another scorer method first." + "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first." ) # Normalize weights to sum to 1.0 diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 1252710c4..008453236 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -801,7 +801,7 @@ class ScenarioOps: ... runloop.scenario.builder("my-scenario") ... .from_blueprint(blueprint) ... .with_problem_statement("Fix the bug") - ... .add_test_scorer("tests", test_command="pytest") + ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> scenario = builder.push() """ From cdb28a1933a5cd04dcc061475ad3580b27e1c134 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:28:36 -0800 Subject: [PATCH 19/31] make sure it is clear that score is 0.0-1.0 inclusive --- src/runloop_api_client/sdk/async_scenario_builder.py | 4 ++-- src/runloop_api_client/sdk/async_scorer.py | 2 +- src/runloop_api_client/sdk/scenario_builder.py | 4 ++-- src/runloop_api_client/sdk/scorer.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index b785422ab..da436d36c 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -213,7 +213,7 @@ def add_bash_script_scorer( ) -> Self: """Add a bash script scorer. - The script should output "score=X.X" where X.X is a float between 0.0 and 1.0. + The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive. :param name: Name of the scoring function :type name: str @@ -241,7 +241,7 @@ def add_python_script_scorer( ) -> Self: """Add a Python script scorer. - The script should print the score (0.0-1.0) to stdout. + The script should print the score in the range [0.0, 1.0] to stdout. :param name: Name of the scoring function :type name: str diff --git a/src/runloop_api_client/sdk/async_scorer.py b/src/runloop_api_client/sdk/async_scorer.py index 3df4fb4e0..91ced0c38 100644 --- a/src/runloop_api_client/sdk/async_scorer.py +++ b/src/runloop_api_client/sdk/async_scorer.py @@ -16,7 +16,7 @@ class AsyncScorer: """A custom scorer for evaluating scenario outputs (async). - Scorers define bash scripts that produce a score (0.0-1.0) for scenario runs. + Scorers define bash scripts that produce a score in the range [0.0, 1.0] for scenario runs. Obtain instances via ``runloop.scorer.create()`` or ``runloop.scorer.from_id()``. Example: diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 86527a690..0ac6139ea 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -213,7 +213,7 @@ def add_bash_script_scorer( ) -> Self: """Add a bash script scorer. - The script should output "score=X.X" where X.X is a float between 0.0 and 1.0. + The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive. :param name: Name of the scoring function :type name: str @@ -241,7 +241,7 @@ def add_python_script_scorer( ) -> Self: """Add a Python script scorer. - The script should print the score (0.0-1.0) to stdout. + The script should print the score in the range [0.0, 1.0] to stdout. :param name: Name of the scoring function :type name: str diff --git a/src/runloop_api_client/sdk/scorer.py b/src/runloop_api_client/sdk/scorer.py index a25bb44a8..8df57ac05 100644 --- a/src/runloop_api_client/sdk/scorer.py +++ b/src/runloop_api_client/sdk/scorer.py @@ -16,7 +16,7 @@ class Scorer: """A custom scorer for evaluating scenario outputs. - Scorers define bash scripts that produce a score (0.0-1.0) for scenario runs. + Scorers define bash scripts that produce a score in the range [0.0, 1.0] for scenario runs. Obtain instances via ``runloop.scorer.create()`` or ``runloop.scorer.from_id()``. Example: From 1727c2e567469a62295876fdbe5c0a1072c42e08 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:44:55 -0800 Subject: [PATCH 20/31] update script scorer docstrings --- src/runloop_api_client/sdk/async_scenario_builder.py | 7 +++++-- src/runloop_api_client/sdk/scenario_builder.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index da436d36c..3bb9f19cf 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -211,7 +211,7 @@ def add_bash_script_scorer( bash_script: str, weight: float = 1.0, ) -> Self: - """Add a bash script scorer. + """Add a standalone bash script scorer. The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive. @@ -239,7 +239,10 @@ def add_python_script_scorer( python_version_constraint: Optional[str] = None, requirements_contents: Optional[str] = None, ) -> Self: - """Add a Python script scorer. + """Add a standalone Python script scorer. + + The script is run in an isolated uv environment, and the dependencies are declared in the + `uv script header `__. The script should print the score in the range [0.0, 1.0] to stdout. diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 0ac6139ea..8dbd0b8ab 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -211,7 +211,7 @@ def add_bash_script_scorer( bash_script: str, weight: float = 1.0, ) -> Self: - """Add a bash script scorer. + """Add a standalone bash script scorer. The script should output "score=X.X" where X.X is a float between 0.0 and 1.0, inclusive. @@ -239,7 +239,10 @@ def add_python_script_scorer( python_version_constraint: Optional[str] = None, requirements_contents: Optional[str] = None, ) -> Self: - """Add a Python script scorer. + """Add a standalone Python script scorer. + + The script is run in an isolated uv environment, and the dependencies are declared in the + `uv script header `__. The script should print the score in the range [0.0, 1.0] to stdout. From 30c4497331e74e1e10e83335202f08a1068fd710 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:45:10 -0800 Subject: [PATCH 21/31] formatting --- src/runloop_api_client/sdk/async_scenario_builder.py | 2 +- src/runloop_api_client/sdk/scenario_builder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 3bb9f19cf..ecae369e7 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -240,7 +240,7 @@ def add_python_script_scorer( requirements_contents: Optional[str] = None, ) -> Self: """Add a standalone Python script scorer. - + The script is run in an isolated uv environment, and the dependencies are declared in the `uv script header `__. diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 8dbd0b8ab..bd5bb52ab 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -240,7 +240,7 @@ def add_python_script_scorer( requirements_contents: Optional[str] = None, ) -> Self: """Add a standalone Python script scorer. - + The script is run in an isolated uv environment, and the dependencies are declared in the `uv script header `__. From 797c6fd3fb951aadb8b9544176747d5253a34830 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 18:52:50 -0800 Subject: [PATCH 22/31] clarify reference solution/gold patch terminology and validation strategy --- src/runloop_api_client/sdk/async_scenario_builder.py | 7 ++++--- src/runloop_api_client/sdk/scenario_builder.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index ecae369e7..03e6fdeb7 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -343,9 +343,10 @@ def with_metadata(self, metadata: Dict[str, str]) -> Self: return self def with_reference_output(self, output: str) -> Self: - """Set the reference output/solution for validation. + """Set the reference solution or gold patch for validation. + After application, the scorer is expected to return a score of 1.0. - :param output: Reference output (e.g., git diff) + :param output: Reference solution or gold patch (e.g., git diff) :type output: str :return: Self for method chaining :rtype: Self @@ -376,7 +377,7 @@ def with_required_secrets(self, secrets: List[str]) -> Self: return self def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self: - """Set the validation strategy. + """Set the validation strategy to specify how the reference solution or gold patch is applied to the scenario. :param validation_type: Validation type :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"] diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index bd5bb52ab..ab25580bd 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -343,9 +343,10 @@ def with_metadata(self, metadata: Dict[str, str]) -> Self: return self def with_reference_output(self, output: str) -> Self: - """Set the reference output/solution for validation. + """Set the reference solution or gold patch for validation. + After application, the scorer is expected to return a score of 1.0. - :param output: Reference output (e.g., git diff) + :param output: Reference solution or gold patch (e.g., git diff) :type output: str :return: Self for method chaining :rtype: Self @@ -376,7 +377,7 @@ def with_required_secrets(self, secrets: List[str]) -> Self: return self def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]) -> Self: - """Set the validation strategy. + """Set the validation strategy to specify how the reference solution or gold patch is applied to the scenario. :param validation_type: Validation type :type validation_type: Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"] From a099739f852d42de06f8f9ac95ec6faf172f23b1 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Tue, 9 Dec 2025 19:17:25 -0800 Subject: [PATCH 23/31] make name first argument passed to scenario builder --- src/runloop_api_client/sdk/async_.py | 2 +- src/runloop_api_client/sdk/async_scenario_builder.py | 6 +++--- src/runloop_api_client/sdk/scenario_builder.py | 6 +++--- src/runloop_api_client/sdk/sync.py | 2 +- tests/sdk/test_async_scenario_builder.py | 6 +++--- tests/sdk/test_scenario_builder.py | 6 +++--- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index a75185594..f121a60be 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -801,7 +801,7 @@ def builder(self, name: str) -> AsyncScenarioBuilder: :return: A new AsyncScenarioBuilder instance :rtype: AsyncScenarioBuilder """ - return AsyncScenarioBuilder(self._client, name) + return AsyncScenarioBuilder(name, self._client) def from_id(self, scenario_id: str) -> AsyncScenario: """Get an AsyncScenario instance for an existing scenario ID. diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 03e6fdeb7..4edad16f3 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -41,13 +41,13 @@ class AsyncScenarioBuilder: >>> scenario = await builder.push() """ - def __init__(self, client: AsyncRunloop, name: str) -> None: + def __init__(self, name: str, client: AsyncRunloop) -> None: """Initialize the builder. - :param client: AsyncRunloop client instance - :type client: AsyncRunloop :param name: Name for the scenario :type name: str + :param client: AsyncRunloop client instance + :type client: AsyncRunloop """ self._client = client self._name = name diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index ab25580bd..b35740bcc 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -41,13 +41,13 @@ class ScenarioBuilder: >>> scenario = builder.push() """ - def __init__(self, client: Runloop, name: str) -> None: + def __init__(self, name: str, client: Runloop) -> None: """Initialize the builder. - :param client: Runloop client instance - :type client: Runloop :param name: Name for the scenario :type name: str + :param client: Runloop client instance + :type client: Runloop """ self._client = client self._name = name diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 008453236..22df39fe6 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -822,7 +822,7 @@ def builder(self, name: str) -> ScenarioBuilder: :return: A new ScenarioBuilder instance :rtype: ScenarioBuilder """ - return ScenarioBuilder(self._client, name) + return ScenarioBuilder(name, self._client) def from_id(self, scenario_id: str) -> Scenario: """Get a Scenario instance for an existing scenario ID. diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 3491fff66..e75938434 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -36,11 +36,11 @@ def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot: @pytest.fixture def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: """Create an AsyncScenarioBuilder instance with mock client.""" - return AsyncScenarioBuilder(mock_async_client, "test-scenario") + return AsyncScenarioBuilder("test-scenario", mock_async_client) def test_instantiation(self, mock_async_client: MagicMock) -> None: """Test builder initialization and repr.""" - builder = AsyncScenarioBuilder(mock_async_client, "my-scenario") + builder = AsyncScenarioBuilder("my-scenario", mock_async_client) assert builder._client is mock_async_client assert builder._name == "my-scenario" @@ -136,7 +136,7 @@ def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None: builder._build_params() # Missing scorer (new builder) - builder2 = AsyncScenarioBuilder(builder._client, "test2") + builder2 = AsyncScenarioBuilder("test2", builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): builder2._build_params() diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index d9aee9fb1..8a7284d17 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -34,11 +34,11 @@ def mock_snapshot(self, mock_client: MagicMock) -> Snapshot: @pytest.fixture def builder(self, mock_client: MagicMock) -> ScenarioBuilder: """Create a ScenarioBuilder instance with mock client.""" - return ScenarioBuilder(mock_client, "test-scenario") + return ScenarioBuilder("test-scenario", mock_client) def test_instantiation(self, mock_client: MagicMock) -> None: """Test builder initialization and repr.""" - builder = ScenarioBuilder(mock_client, "my-scenario") + builder = ScenarioBuilder("my-scenario", mock_client) assert builder._client is mock_client assert builder._name == "my-scenario" @@ -134,7 +134,7 @@ def test_build_params_validation(self, builder: ScenarioBuilder) -> None: builder._build_params() # Missing scorer (new builder) - builder2 = ScenarioBuilder(builder._client, "test2") + builder2 = ScenarioBuilder("test2", builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): builder2._build_params() From cd9adce37b034f9a8dfd547716872e080ba693fe Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 11:51:12 -0800 Subject: [PATCH 24/31] add preview method --- src/runloop_api_client/sdk/__init__.py | 2 + src/runloop_api_client/sdk/_types.py | 29 ++++++- .../sdk/async_scenario_builder.py | 87 +++++++++++-------- .../sdk/scenario_builder.py | 87 +++++++++++-------- 4 files changed, 132 insertions(+), 73 deletions(-) diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py index 232ef9515..5773b9d53 100644 --- a/src/runloop_api_client/sdk/__init__.py +++ b/src/runloop_api_client/sdk/__init__.py @@ -7,6 +7,7 @@ from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps from .agent import Agent +from ._types import ScenarioPreview from .async_ import ( AsyncAgentOps, AsyncDevboxOps, @@ -75,6 +76,7 @@ "AsyncScenarioRun", "ScenarioBuilder", "AsyncScenarioBuilder", + "ScenarioPreview", "Scorer", "AsyncScorer", "Snapshot", diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py index 31d543abb..ef180a56e 100644 --- a/src/runloop_api_client/sdk/_types.py +++ b/src/runloop_api_client/sdk/_types.py @@ -1,10 +1,13 @@ -from typing import Union, Callable, Optional +from typing import Dict, Union, Callable, Optional from typing_extensions import TypedDict from .._types import Body, Query, Headers, Timeout, NotGiven from ..lib.polling import PollingConfig from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams +from ..types.input_context import InputContext +from ..types.scenario_view import ScenarioView +from ..types.scoring_contract import ScoringContract from ..types.agent_list_params import AgentListParams from ..types.devbox_list_params import DevboxListParams from ..types.object_list_params import ObjectListParams @@ -186,3 +189,27 @@ class SDKScenarioRunAsyncParams(ScenarioStartRunBaseParams, LongRequestOptions): class SDKScenarioRunParams(ScenarioStartRunBaseParams, LongPollingRequestOptions): pass + + +class InputContextPreview(InputContext): + problem_statement: Optional[str] = None # type: ignore[assignment] + """The problem statement for the Scenario.""" + + +class ScenarioPreview(ScenarioView): + """Preview of scenario configuration with all fields optional.""" + + id: Optional[str] = None # type: ignore[assignment] + """The ID of the Scenario.""" + + input_context: Optional[InputContextPreview] = None # type: ignore[assignment] + """The input context for the Scenario.""" + + metadata: Optional[Dict[str, str]] = None # type: ignore[assignment] + """User defined metadata to attach to the scenario for organization.""" + + name: Optional[str] = None # type: ignore[assignment] + """The name of the Scenario.""" + + scoring_contract: Optional[ScoringContract] = None # type: ignore[assignment] + """The scoring contract for the Scenario.""" diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 4edad16f3..e74f1ee68 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -6,7 +6,7 @@ from typing_extensions import Self, Unpack, Literal, override from ..types import ScenarioCreateParams, ScenarioEnvironmentParam -from ._types import LongRequestOptions +from ._types import ScenarioPreview, LongRequestOptions from .._client import AsyncRunloop from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot @@ -387,6 +387,22 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD" self._validation_type = validation_type return self + def _build_normalized_scorers(self) -> List[ScoringFunctionParam]: + """Build normalized scorers list.""" + total_weight = sum(s["weight"] for s in self._scorers) + return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + + def _build_environment_params(self) -> ScenarioEnvironmentParam: + """Build environment parameters.""" + env_params: ScenarioEnvironmentParam = {} + if self._blueprint: + env_params["blueprint_id"] = self._blueprint.id + if self._snapshot: + env_params["snapshot_id"] = self._snapshot.id + if self._working_directory: + env_params["working_directory"] = self._working_directory + return env_params + def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. @@ -405,51 +421,50 @@ def _build_params(self) -> ScenarioCreateParams: "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first." ) - # Normalize weights to sum to 1.0 - total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers: List[ScoringFunctionParam] = [ - {**s, "weight": s["weight"] / total_weight} for s in self._scorers - ] - - params: ScenarioCreateParams = { + return { "name": self._name, "input_context": { "problem_statement": self._problem_statement, + "additional_context": self._additional_context, }, "scoring_contract": { - "scoring_function_parameters": normalized_scorers, + "scoring_function_parameters": self._build_normalized_scorers(), }, + "environment_parameters": self._build_environment_params(), + "metadata": self._metadata, + "reference_output": self._reference_output, + "required_environment_variables": self._required_env_vars, + "required_secret_names": self._required_secrets, + "validation_type": self._validation_type, } - # Add additional context if set - if self._additional_context is not None: - params["input_context"]["additional_context"] = self._additional_context + def preview(self) -> ScenarioPreview: + """Preview the scenario configuration without pushing to the platform. - # Build environment parameters if any are set - env_params: ScenarioEnvironmentParam = {} - if self._blueprint: - env_params["blueprint_id"] = self._blueprint.id - if self._snapshot: - env_params["snapshot_id"] = self._snapshot.id - if self._working_directory: - env_params["working_directory"] = self._working_directory + Returns the current configuration state as a ScenarioPreview object. + Does not validate or raise errors for missing required fields. - if env_params: - params["environment_parameters"] = env_params - - # Add optional fields - if self._metadata: - params["metadata"] = self._metadata - if self._reference_output: - params["reference_output"] = self._reference_output - if self._required_env_vars: - params["required_environment_variables"] = self._required_env_vars - if self._required_secrets: - params["required_secret_names"] = self._required_secrets - if self._validation_type: - params["validation_type"] = self._validation_type - - return params + :return: Preview of the scenario configuration + :rtype: ScenarioPreview + """ + return ScenarioPreview.model_validate( + { + "name": self._name, + "input_context": { + "problem_statement": self._problem_statement, + "additional_context": self._additional_context, + }, + "scoring_contract": { + "scoring_function_parameters": self._build_normalized_scorers(), + }, + "environment": self._build_environment_params(), + "metadata": self._metadata, + "reference_output": self._reference_output, + "required_environment_variables": self._required_env_vars, + "required_secret_names": self._required_secrets, + "validation_type": self._validation_type, + } + ) async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario: """Create the scenario on the platform. diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index b35740bcc..7e0c5094d 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -6,7 +6,7 @@ from typing_extensions import Self, Unpack, Literal, override from ..types import ScenarioCreateParams, ScenarioEnvironmentParam -from ._types import LongRequestOptions +from ._types import ScenarioPreview, LongRequestOptions from .._client import Runloop from .scenario import Scenario from .snapshot import Snapshot @@ -387,6 +387,22 @@ def with_validation_type(self, validation_type: Literal["UNSPECIFIED", "FORWARD" self._validation_type = validation_type return self + def _build_normalized_scorers(self) -> List[ScoringFunctionParam]: + """Build normalized scorers list.""" + total_weight = sum(s["weight"] for s in self._scorers) + return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] + + def _build_environment_params(self) -> ScenarioEnvironmentParam: + """Build environment parameters""" + env_params: ScenarioEnvironmentParam = {} + if self._blueprint: + env_params["blueprint_id"] = self._blueprint.id + if self._snapshot: + env_params["snapshot_id"] = self._snapshot.id + if self._working_directory: + env_params["working_directory"] = self._working_directory + return env_params + def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. @@ -405,51 +421,50 @@ def _build_params(self) -> ScenarioCreateParams: "Call add_test_command_scorer(), add_bash_script_scorer(), or another scorer method first." ) - # Normalize weights to sum to 1.0 - total_weight = sum(s["weight"] for s in self._scorers) - normalized_scorers: List[ScoringFunctionParam] = [ - {**s, "weight": s["weight"] / total_weight} for s in self._scorers - ] - - params: ScenarioCreateParams = { + return { "name": self._name, "input_context": { "problem_statement": self._problem_statement, + "additional_context": self._additional_context, }, "scoring_contract": { - "scoring_function_parameters": normalized_scorers, + "scoring_function_parameters": self._build_normalized_scorers(), }, + "environment_parameters": self._build_environment_params(), + "metadata": self._metadata, + "reference_output": self._reference_output, + "required_environment_variables": self._required_env_vars, + "required_secret_names": self._required_secrets, + "validation_type": self._validation_type, } - # Add additional context if set - if self._additional_context is not None: - params["input_context"]["additional_context"] = self._additional_context + def preview(self) -> ScenarioPreview: + """Preview the scenario configuration without pushing to the platform. - # Build environment parameters if any are set - env_params: ScenarioEnvironmentParam = {} - if self._blueprint: - env_params["blueprint_id"] = self._blueprint.id - if self._snapshot: - env_params["snapshot_id"] = self._snapshot.id - if self._working_directory: - env_params["working_directory"] = self._working_directory + Returns the current configuration state as a ScenarioPreview object. + Does not validate or raise errors for missing required fields. - if env_params: - params["environment_parameters"] = env_params - - # Add optional fields - if self._metadata: - params["metadata"] = self._metadata - if self._reference_output: - params["reference_output"] = self._reference_output - if self._required_env_vars: - params["required_environment_variables"] = self._required_env_vars - if self._required_secrets: - params["required_secret_names"] = self._required_secrets - if self._validation_type: - params["validation_type"] = self._validation_type - - return params + :return: Preview of the scenario configuration + :rtype: ScenarioPreview + """ + return ScenarioPreview.model_validate( + { + "name": self._name, + "input_context": { + "problem_statement": self._problem_statement, + "additional_context": self._additional_context, + }, + "scoring_contract": { + "scoring_function_parameters": self._build_normalized_scorers(), + }, + "environment": self._build_environment_params(), + "metadata": self._metadata, + "reference_output": self._reference_output, + "required_environment_variables": self._required_env_vars, + "required_secret_names": self._required_secrets, + "validation_type": self._validation_type, + } + ) def push(self, **options: Unpack[LongRequestOptions]) -> Scenario: """Create the scenario on the platform. From 034399bb2cc72ca6e04c0439379f436290b3f752 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 12:05:31 -0800 Subject: [PATCH 25/31] clean up unit test imports, rename builder fixture to mock_builder --- tests/sdk/test_async_execution.py | 3 +- tests/sdk/test_async_execution_result.py | 2 +- tests/sdk/test_async_ops.py | 6 +- tests/sdk/test_async_scenario_builder.py | 162 ++++++++++++----------- tests/sdk/test_execution.py | 3 +- tests/sdk/test_execution_result.py | 2 +- tests/sdk/test_ops.py | 10 +- tests/sdk/test_scenario_builder.py | 160 +++++++++++----------- 8 files changed, 178 insertions(+), 170 deletions(-) diff --git a/tests/sdk/test_async_execution.py b/tests/sdk/test_async_execution.py index b33b4cf1f..06629cf63 100644 --- a/tests/sdk/test_async_execution.py +++ b/tests/sdk/test_async_execution.py @@ -14,7 +14,8 @@ TASK_COMPLETION_SHORT, MockExecutionView, ) -from runloop_api_client.sdk.async_execution import AsyncExecution, _AsyncStreamingGroup +from runloop_api_client.sdk import AsyncExecution +from runloop_api_client.sdk.async_execution import _AsyncStreamingGroup # Legacy aliases for backward compatibility SHORT_SLEEP = TASK_COMPLETION_SHORT diff --git a/tests/sdk/test_async_execution_result.py b/tests/sdk/test_async_execution_result.py index 2a71da1c7..cf8a23caa 100644 --- a/tests/sdk/test_async_execution_result.py +++ b/tests/sdk/test_async_execution_result.py @@ -8,7 +8,7 @@ import pytest from tests.sdk.conftest import MockExecutionView -from runloop_api_client.sdk.async_execution_result import AsyncExecutionResult +from runloop_api_client.sdk import AsyncExecutionResult class TestAsyncExecutionResult: diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py index 9fca5bb4b..f8a16e1c0 100644 --- a/tests/sdk/test_async_ops.py +++ b/tests/sdk/test_async_ops.py @@ -24,19 +24,17 @@ AsyncAgent, AsyncDevbox, AsyncScorer, + AsyncAgentOps, AsyncScenario, AsyncSnapshot, AsyncBlueprint, - AsyncStorageObject, -) -from runloop_api_client.sdk.async_ import ( - AsyncAgentOps, AsyncDevboxOps, AsyncScorerOps, AsyncRunloopSDK, AsyncScenarioOps, AsyncSnapshotOps, AsyncBlueprintOps, + AsyncStorageObject, AsyncStorageObjectOps, ) from runloop_api_client.lib.polling import PollingConfig diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index e75938434..79a128f11 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -6,9 +6,7 @@ import pytest -from runloop_api_client.sdk.async_snapshot import AsyncSnapshot -from runloop_api_client.sdk.async_blueprint import AsyncBlueprint -from runloop_api_client.sdk.async_scenario_builder import AsyncScenarioBuilder +from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, AsyncScenarioBuilder from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile @@ -34,7 +32,7 @@ def mock_snapshot(self, mock_async_client: MagicMock) -> AsyncSnapshot: return AsyncSnapshot(mock_async_client, "snap-123") @pytest.fixture - def builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: + def mock_builder(self, mock_async_client: MagicMock) -> AsyncScenarioBuilder: """Create an AsyncScenarioBuilder instance with mock client.""" return AsyncScenarioBuilder("test-scenario", mock_async_client) @@ -48,113 +46,117 @@ def test_instantiation(self, mock_async_client: MagicMock) -> None: assert repr(builder) == "" def test_from_blueprint_and_snapshot( - self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot + self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint, mock_snapshot: AsyncSnapshot ) -> None: """Test blueprint/snapshot setting returns self and are mutually exclusive.""" # from_blueprint returns self and sets blueprint - result = builder.from_blueprint(mock_blueprint) - assert result is builder - assert builder._blueprint is mock_blueprint - assert builder._snapshot is None + result = mock_builder.from_blueprint(mock_blueprint) + assert result is mock_builder + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._snapshot is None # from_snapshot returns self, sets snapshot, and clears blueprint - result = builder.from_snapshot(mock_snapshot) - assert result is builder - assert builder._snapshot is mock_snapshot - assert builder._blueprint is None + result = mock_builder.from_snapshot(mock_snapshot) + assert result is mock_builder + assert mock_builder._snapshot is mock_snapshot + assert mock_builder._blueprint is None # from_blueprint clears snapshot - builder.from_blueprint(mock_blueprint) - assert builder._blueprint is mock_blueprint - assert builder._snapshot is None + mock_builder.from_blueprint(mock_blueprint) + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._snapshot is None - def test_scorers(self, builder: AsyncScenarioBuilder) -> None: + def test_scorers(self, mock_builder: AsyncScenarioBuilder) -> None: """Test all scorer types, optional params, and multiple scorers.""" # Test scorer with test files test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_command_scorer( + result = mock_builder.add_test_command_scorer( "test-scorer", test_command="pytest", weight=2.0, test_files=test_files ) - assert result is builder - assert builder._scorers[0]["name"] == "test-scorer" - assert builder._scorers[0]["weight"] == 2.0 - assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" - assert builder._scorers[0]["scorer"].get("test_command") == "pytest" - assert builder._scorers[0]["scorer"].get("test_files") == test_files + assert result is mock_builder + assert mock_builder._scorers[0]["name"] == "test-scorer" + assert mock_builder._scorers[0]["weight"] == 2.0 + assert mock_builder._scorers[0]["scorer"]["type"] == "test_based_scorer" + assert mock_builder._scorers[0]["scorer"].get("test_command") == "pytest" + assert mock_builder._scorers[0]["scorer"].get("test_files") == test_files # Command scorer - builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") - assert builder._scorers[1]["scorer"]["type"] == "command_scorer" - assert builder._scorers[1]["scorer"].get("command") == "./check.sh" + mock_builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") + assert mock_builder._scorers[1]["scorer"]["type"] == "command_scorer" + assert mock_builder._scorers[1]["scorer"].get("command") == "./check.sh" # Bash scorer - builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") - assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" - assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" + mock_builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") + assert mock_builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" + assert mock_builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" # Python scorer with optional params - builder.add_python_script_scorer( + mock_builder.add_python_script_scorer( "python-scorer", python_script="print('1.0')", python_version_constraint=">=3.10", requirements_contents="numpy", ) - assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer" - assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" - assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" + assert mock_builder._scorers[3]["scorer"]["type"] == "python_script_scorer" + assert mock_builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" + assert mock_builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" # AST grep scorer with optional lang - builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") - assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" - assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" - assert builder._scorers[4]["scorer"].get("lang") == "python" + mock_builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") + assert mock_builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" + assert mock_builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" + assert mock_builder._scorers[4]["scorer"].get("lang") == "python" # Custom scorer with optional params - builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}) - assert builder._scorers[5]["scorer"]["type"] == "custom_scorer" - assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" - assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} + mock_builder.add_custom_scorer( + "custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5} + ) + assert mock_builder._scorers[5]["scorer"]["type"] == "custom_scorer" + assert mock_builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" + assert mock_builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} # Verify multiple scorers accumulated - assert len(builder._scorers) == 6 + assert len(mock_builder._scorers) == 6 - def test_add_scorer_rejects_invalid_weight(self, builder: AsyncScenarioBuilder) -> None: + def test_add_scorer_rejects_invalid_weight(self, mock_builder: AsyncScenarioBuilder) -> None: """Test that adding a scorer with zero or negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) + mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) + mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) - def test_build_params_validation(self, builder: AsyncScenarioBuilder) -> None: + def test_build_params_validation(self, mock_builder: AsyncScenarioBuilder) -> None: """Test _build_params raises for missing required fields.""" # Missing problem statement - builder.add_test_command_scorer("test", test_command="pytest") + mock_builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): - builder._build_params() + mock_builder._build_params() # Missing scorer (new builder) - builder2 = AsyncScenarioBuilder("test2", builder._client) + builder2 = AsyncScenarioBuilder("test2", mock_builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): builder2._build_params() - def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: + def test_build_params_with_all_options( + self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint + ) -> None: """Test _build_params with all optional fields set.""" - builder.with_problem_statement("Fix the bug") - builder.with_additional_context({"hint": "line 42"}) - builder.add_test_command_scorer("tests", test_command="pytest") - builder.from_blueprint(mock_blueprint) - builder.with_working_directory("/app") - builder.with_metadata({"team": "infra"}) - builder.with_reference_output("diff content") - builder.with_required_env_vars(["API_KEY"]) - builder.with_required_secrets(["db_pass"]) - builder.with_validation_type("FORWARD") - - params = builder._build_params() + mock_builder.with_problem_statement("Fix the bug") + mock_builder.with_additional_context({"hint": "line 42"}) + mock_builder.add_test_command_scorer("tests", test_command="pytest") + mock_builder.from_blueprint(mock_blueprint) + mock_builder.with_working_directory("/app") + mock_builder.with_metadata({"team": "infra"}) + mock_builder.with_reference_output("diff content") + mock_builder.with_required_env_vars(["API_KEY"]) + mock_builder.with_required_secrets(["db_pass"]) + mock_builder.with_validation_type("FORWARD") + + params = mock_builder._build_params() assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" @@ -169,14 +171,14 @@ def test_build_params_with_all_options(self, builder: AsyncScenarioBuilder, mock assert params.get("required_secret_names") == ["db_pass"] assert params.get("validation_type") == "FORWARD" - def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> None: + def test_build_params_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" - builder.with_problem_statement("Fix the bug") - builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) - builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) - builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) + mock_builder.with_problem_statement("Fix the bug") + mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) - params = builder._build_params() + params = mock_builder._build_params() scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 @@ -191,15 +193,15 @@ def test_build_params_normalizes_weights(self, builder: AsyncScenarioBuilder) -> @pytest.mark.asyncio async def test_push_calls_api_and_returns_scenario( - self, builder: AsyncScenarioBuilder, mock_async_client: MagicMock + self, mock_builder: AsyncScenarioBuilder, mock_async_client: MagicMock ) -> None: """Test push() calls API with correct params and returns AsyncScenario.""" mock_async_client.scenarios.create.return_value.id = "scn-new-123" - builder.with_problem_statement("Fix the bug") - builder.add_test_command_scorer("tests", test_command="pytest") + mock_builder.with_problem_statement("Fix the bug") + mock_builder.add_test_command_scorer("tests", test_command="pytest") - scenario = await builder.push() + scenario = await mock_builder.push() mock_async_client.scenarios.create.assert_called_once() call_kwargs = mock_async_client.scenarios.create.call_args.kwargs @@ -208,10 +210,10 @@ async def test_push_calls_api_and_returns_scenario( assert scenario.id == "scn-new-123" - def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: + def test_fluent_chaining(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: """Test that all builder methods can be chained fluently.""" result = ( - builder.from_blueprint(mock_blueprint) + mock_builder.from_blueprint(mock_blueprint) .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) @@ -223,8 +225,8 @@ def test_fluent_chaining(self, builder: AsyncScenarioBuilder, mock_blueprint: As .with_validation_type("FORWARD") ) - assert result is builder - assert builder._blueprint is mock_blueprint - assert builder._working_directory == "/app" - assert builder._problem_statement == "Fix the bug" - assert len(builder._scorers) == 1 + assert result is mock_builder + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._working_directory == "/app" + assert mock_builder._problem_statement == "Fix the bug" + assert len(mock_builder._scorers) == 1 diff --git a/tests/sdk/test_execution.py b/tests/sdk/test_execution.py index fa2aaca2f..63b244d0e 100644 --- a/tests/sdk/test_execution.py +++ b/tests/sdk/test_execution.py @@ -12,7 +12,8 @@ TASK_COMPLETION_SHORT, MockExecutionView, ) -from runloop_api_client.sdk.execution import Execution, _StreamingGroup +from runloop_api_client.sdk import Execution +from runloop_api_client.sdk.execution import _StreamingGroup # Legacy aliases for backward compatibility during transition SHORT_SLEEP = THREAD_STARTUP_DELAY diff --git a/tests/sdk/test_execution_result.py b/tests/sdk/test_execution_result.py index 60d51827f..689b108d5 100644 --- a/tests/sdk/test_execution_result.py +++ b/tests/sdk/test_execution_result.py @@ -6,7 +6,7 @@ from unittest.mock import Mock from tests.sdk.conftest import MockExecutionView -from runloop_api_client.sdk.execution_result import ExecutionResult +from runloop_api_client.sdk import ExecutionResult class TestExecutionResult: diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py index f7a566265..7ac503933 100644 --- a/tests/sdk/test_ops.py +++ b/tests/sdk/test_ops.py @@ -20,15 +20,21 @@ MockBlueprintView, create_mock_httpx_response, ) -from runloop_api_client.sdk import Agent, Devbox, Scorer, Scenario, Snapshot, Blueprint, StorageObject -from runloop_api_client.sdk.sync import ( +from runloop_api_client.sdk import ( + Agent, + Devbox, + Scorer, AgentOps, + Scenario, + Snapshot, + Blueprint, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, + StorageObject, StorageObjectOps, ) from runloop_api_client.lib.polling import PollingConfig diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 8a7284d17..1b42df054 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -6,9 +6,7 @@ import pytest -from runloop_api_client.sdk.snapshot import Snapshot -from runloop_api_client.sdk.blueprint import Blueprint -from runloop_api_client.sdk.scenario_builder import ScenarioBuilder +from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile @@ -32,7 +30,7 @@ def mock_snapshot(self, mock_client: MagicMock) -> Snapshot: return Snapshot(mock_client, "snap-123") @pytest.fixture - def builder(self, mock_client: MagicMock) -> ScenarioBuilder: + def mock_builder(self, mock_client: MagicMock) -> ScenarioBuilder: """Create a ScenarioBuilder instance with mock client.""" return ScenarioBuilder("test-scenario", mock_client) @@ -46,113 +44,115 @@ def test_instantiation(self, mock_client: MagicMock) -> None: assert repr(builder) == "" def test_from_blueprint_and_snapshot( - self, builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot + self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot ) -> None: """Test blueprint/snapshot setting returns self and are mutually exclusive.""" # from_blueprint returns self and sets blueprint - result = builder.from_blueprint(mock_blueprint) - assert result is builder - assert builder._blueprint is mock_blueprint - assert builder._snapshot is None + result = mock_builder.from_blueprint(mock_blueprint) + assert result is mock_builder + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._snapshot is None # from_snapshot returns self, sets snapshot, and clears blueprint - result = builder.from_snapshot(mock_snapshot) - assert result is builder - assert builder._snapshot is mock_snapshot - assert builder._blueprint is None + result = mock_builder.from_snapshot(mock_snapshot) + assert result is mock_builder + assert mock_builder._snapshot is mock_snapshot + assert mock_builder._blueprint is None # from_blueprint clears snapshot - builder.from_blueprint(mock_blueprint) - assert builder._blueprint is mock_blueprint - assert builder._snapshot is None + mock_builder.from_blueprint(mock_blueprint) + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._snapshot is None - def test_scorers(self, builder: ScenarioBuilder) -> None: + def test_scorers(self, mock_builder: ScenarioBuilder) -> None: """Test all scorer types, optional params, and multiple scorers.""" # Test scorer with test files test_files: list[ScorerTestBasedScoringFunctionTestFile] = [ {"file_path": "test_main.py", "file_contents": "def test_foo(): pass"} ] - result = builder.add_test_command_scorer( + result = mock_builder.add_test_command_scorer( "test-scorer", test_command="pytest", weight=2.0, test_files=test_files ) - assert result is builder - assert builder._scorers[0]["name"] == "test-scorer" - assert builder._scorers[0]["weight"] == 2.0 - assert builder._scorers[0]["scorer"]["type"] == "test_based_scorer" - assert builder._scorers[0]["scorer"].get("test_command") == "pytest" - assert builder._scorers[0]["scorer"].get("test_files") == test_files + assert result is mock_builder + assert mock_builder._scorers[0]["name"] == "test-scorer" + assert mock_builder._scorers[0]["weight"] == 2.0 + assert mock_builder._scorers[0]["scorer"]["type"] == "test_based_scorer" + assert mock_builder._scorers[0]["scorer"].get("test_command") == "pytest" + assert mock_builder._scorers[0]["scorer"].get("test_files") == test_files # Command scorer - builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") - assert builder._scorers[1]["scorer"]["type"] == "command_scorer" - assert builder._scorers[1]["scorer"].get("command") == "./check.sh" + mock_builder.add_shell_command_scorer("cmd-scorer", command="./check.sh") + assert mock_builder._scorers[1]["scorer"]["type"] == "command_scorer" + assert mock_builder._scorers[1]["scorer"].get("command") == "./check.sh" # Bash scorer - builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") - assert builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" - assert builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" + mock_builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'") + assert mock_builder._scorers[2]["scorer"]["type"] == "bash_script_scorer" + assert mock_builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'" # Python scorer with optional params - builder.add_python_script_scorer( + mock_builder.add_python_script_scorer( "python-scorer", python_script="print('1.0')", python_version_constraint=">=3.10", requirements_contents="numpy", ) - assert builder._scorers[3]["scorer"]["type"] == "python_script_scorer" - assert builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" - assert builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" + assert mock_builder._scorers[3]["scorer"]["type"] == "python_script_scorer" + assert mock_builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10" + assert mock_builder._scorers[3]["scorer"].get("requirements_contents") == "numpy" # AST grep scorer with optional lang - builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") - assert builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" - assert builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" - assert builder._scorers[4]["scorer"].get("lang") == "python" + mock_builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python") + assert mock_builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer" + assert mock_builder._scorers[4]["scorer"].get("pattern") == "$A.foo()" + assert mock_builder._scorers[4]["scorer"].get("lang") == "python" # Custom scorer with optional params - builder.add_custom_scorer("custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}) - assert builder._scorers[5]["scorer"]["type"] == "custom_scorer" - assert builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" - assert builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} + mock_builder.add_custom_scorer( + "custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5} + ) + assert mock_builder._scorers[5]["scorer"]["type"] == "custom_scorer" + assert mock_builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer" + assert mock_builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5} # Verify multiple scorers accumulated - assert len(builder._scorers) == 6 + assert len(mock_builder._scorers) == 6 - def test_add_scorer_rejects_invalid_weight(self, builder: ScenarioBuilder) -> None: + def test_add_scorer_rejects_invalid_weight(self, mock_builder: ScenarioBuilder) -> None: """Test that adding a scorer with zero or negative weight raises ValueError.""" with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) + mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0) with pytest.raises(ValueError, match="Scorer weight must be positive"): - builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) + mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) - def test_build_params_validation(self, builder: ScenarioBuilder) -> None: + def test_build_params_validation(self, mock_builder: ScenarioBuilder) -> None: """Test _build_params raises for missing required fields.""" # Missing problem statement - builder.add_test_command_scorer("test", test_command="pytest") + mock_builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): - builder._build_params() + mock_builder._build_params() # Missing scorer (new builder) - builder2 = ScenarioBuilder("test2", builder._client) + builder2 = ScenarioBuilder("test2", mock_builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): builder2._build_params() - def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: + def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test _build_params with all optional fields set.""" - builder.with_problem_statement("Fix the bug") - builder.with_additional_context({"hint": "line 42"}) - builder.add_test_command_scorer("tests", test_command="pytest") - builder.from_blueprint(mock_blueprint) - builder.with_working_directory("/app") - builder.with_metadata({"team": "infra"}) - builder.with_reference_output("diff content") - builder.with_required_env_vars(["API_KEY"]) - builder.with_required_secrets(["db_pass"]) - builder.with_validation_type("FORWARD") - - params = builder._build_params() + mock_builder.with_problem_statement("Fix the bug") + mock_builder.with_additional_context({"hint": "line 42"}) + mock_builder.add_test_command_scorer("tests", test_command="pytest") + mock_builder.from_blueprint(mock_blueprint) + mock_builder.with_working_directory("/app") + mock_builder.with_metadata({"team": "infra"}) + mock_builder.with_reference_output("diff content") + mock_builder.with_required_env_vars(["API_KEY"]) + mock_builder.with_required_secrets(["db_pass"]) + mock_builder.with_validation_type("FORWARD") + + params = mock_builder._build_params() assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" @@ -167,14 +167,14 @@ def test_build_params_with_all_options(self, builder: ScenarioBuilder, mock_blue assert params.get("required_secret_names") == ["db_pass"] assert params.get("validation_type") == "FORWARD" - def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None: + def test_build_params_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None: """Test that _build_params normalizes scorer weights to sum to 1.0.""" - builder.with_problem_statement("Fix the bug") - builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) - builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) - builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) + mock_builder.with_problem_statement("Fix the bug") + mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) - params = builder._build_params() + params = mock_builder._build_params() scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 @@ -187,14 +187,14 @@ def test_build_params_normalizes_weights(self, builder: ScenarioBuilder) -> None total = sum(s["weight"] for s in scorers) assert abs(total - 1.0) < 0.0001 - def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, mock_client: MagicMock) -> None: + def test_push_calls_api_and_returns_scenario(self, mock_builder: ScenarioBuilder, mock_client: MagicMock) -> None: """Test push() calls API with correct params and returns Scenario.""" mock_client.scenarios.create.return_value.id = "scn-new-123" - builder.with_problem_statement("Fix the bug") - builder.add_test_command_scorer("tests", test_command="pytest") + mock_builder.with_problem_statement("Fix the bug") + mock_builder.add_test_command_scorer("tests", test_command="pytest") - scenario = builder.push() + scenario = mock_builder.push() mock_client.scenarios.create.assert_called_once() call_kwargs = mock_client.scenarios.create.call_args.kwargs @@ -203,10 +203,10 @@ def test_push_calls_api_and_returns_scenario(self, builder: ScenarioBuilder, moc assert scenario.id == "scn-new-123" - def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: + def test_fluent_chaining(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: """Test that all builder methods can be chained fluently.""" result = ( - builder.from_blueprint(mock_blueprint) + mock_builder.from_blueprint(mock_blueprint) .with_working_directory("/app") .with_problem_statement("Fix the bug") .with_additional_context({"hint": "check main.py"}) @@ -218,8 +218,8 @@ def test_fluent_chaining(self, builder: ScenarioBuilder, mock_blueprint: Bluepri .with_validation_type("FORWARD") ) - assert result is builder - assert builder._blueprint is mock_blueprint - assert builder._working_directory == "/app" - assert builder._problem_statement == "Fix the bug" - assert len(builder._scorers) == 1 + assert result is mock_builder + assert mock_builder._blueprint is mock_blueprint + assert mock_builder._working_directory == "/app" + assert mock_builder._problem_statement == "Fix the bug" + assert len(mock_builder._scorers) == 1 From 1fffeeb26e723dac5504c15e6f430cbb96132644 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 16:56:04 -0800 Subject: [PATCH 26/31] added `preview()` method to scenario builder --- src/runloop_api_client/sdk/_types.py | 14 +--- .../sdk/async_scenario_builder.py | 19 +++--- .../sdk/scenario_builder.py | 19 +++--- tests/sdk/test_async_scenario_builder.py | 64 ++++++++++++++++++- tests/sdk/test_scenario_builder.py | 64 ++++++++++++++++++- 5 files changed, 146 insertions(+), 34 deletions(-) diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py index ef180a56e..be09f6eed 100644 --- a/src/runloop_api_client/sdk/_types.py +++ b/src/runloop_api_client/sdk/_types.py @@ -1,4 +1,4 @@ -from typing import Dict, Union, Callable, Optional +from typing import Union, Callable, Optional from typing_extensions import TypedDict from .._types import Body, Query, Headers, Timeout, NotGiven @@ -7,7 +7,6 @@ from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams from ..types.input_context import InputContext from ..types.scenario_view import ScenarioView -from ..types.scoring_contract import ScoringContract from ..types.agent_list_params import AgentListParams from ..types.devbox_list_params import DevboxListParams from ..types.object_list_params import ObjectListParams @@ -202,14 +201,5 @@ class ScenarioPreview(ScenarioView): id: Optional[str] = None # type: ignore[assignment] """The ID of the Scenario.""" - input_context: Optional[InputContextPreview] = None # type: ignore[assignment] + input_context: InputContextPreview # type: ignore[assignment] """The input context for the Scenario.""" - - metadata: Optional[Dict[str, str]] = None # type: ignore[assignment] - """User defined metadata to attach to the scenario for organization.""" - - name: Optional[str] = None # type: ignore[assignment] - """The name of the Scenario.""" - - scoring_contract: Optional[ScoringContract] = None # type: ignore[assignment] - """The scoring contract for the Scenario.""" diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index e74f1ee68..008602867 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -65,7 +65,7 @@ def __init__(self, name: str, client: AsyncRunloop) -> None: self._scorers: List[ScoringFunctionParam] = [] # Metadata and other options - self._metadata: Optional[Dict[str, str]] = None + self._metadata: Dict[str, str] = {} self._reference_output: Optional[str] = None self._required_env_vars: Optional[List[str]] = None self._required_secrets: Optional[List[str]] = None @@ -392,16 +392,15 @@ def _build_normalized_scorers(self) -> List[ScoringFunctionParam]: total_weight = sum(s["weight"] for s in self._scorers) return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] - def _build_environment_params(self) -> ScenarioEnvironmentParam: + def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]: """Build environment parameters.""" - env_params: ScenarioEnvironmentParam = {} - if self._blueprint: - env_params["blueprint_id"] = self._blueprint.id - if self._snapshot: - env_params["snapshot_id"] = self._snapshot.id - if self._working_directory: - env_params["working_directory"] = self._working_directory - return env_params + if not self._blueprint and not self._snapshot and not self._working_directory: + return None + return { + "blueprint_id": self._blueprint.id if self._blueprint else None, + "snapshot_id": self._snapshot.id if self._snapshot else None, + "working_directory": self._working_directory if self._working_directory else None, + } def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 7e0c5094d..dec8d38b6 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -65,7 +65,7 @@ def __init__(self, name: str, client: Runloop) -> None: self._scorers: List[ScoringFunctionParam] = [] # Metadata and other options - self._metadata: Optional[Dict[str, str]] = None + self._metadata: Dict[str, str] = {} self._reference_output: Optional[str] = None self._required_env_vars: Optional[List[str]] = None self._required_secrets: Optional[List[str]] = None @@ -392,16 +392,15 @@ def _build_normalized_scorers(self) -> List[ScoringFunctionParam]: total_weight = sum(s["weight"] for s in self._scorers) return [{**s, "weight": s["weight"] / total_weight} for s in self._scorers] - def _build_environment_params(self) -> ScenarioEnvironmentParam: + def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]: """Build environment parameters""" - env_params: ScenarioEnvironmentParam = {} - if self._blueprint: - env_params["blueprint_id"] = self._blueprint.id - if self._snapshot: - env_params["snapshot_id"] = self._snapshot.id - if self._working_directory: - env_params["working_directory"] = self._working_directory - return env_params + if not self._blueprint and not self._snapshot and not self._working_directory: + return None + return { + "blueprint_id": self._blueprint.id if self._blueprint else None, + "snapshot_id": self._snapshot.id if self._snapshot else None, + "working_directory": self._working_directory if self._working_directory else None, + } def _build_params(self) -> ScenarioCreateParams: """Build the scenario creation parameters. diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 79a128f11..d0d1060b8 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -6,7 +6,7 @@ import pytest -from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, AsyncScenarioBuilder +from runloop_api_client.sdk import AsyncSnapshot, AsyncBlueprint, ScenarioPreview, AsyncScenarioBuilder from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile @@ -230,3 +230,65 @@ def test_fluent_chaining(self, mock_builder: AsyncScenarioBuilder, mock_blueprin assert mock_builder._working_directory == "/app" assert mock_builder._problem_statement == "Fix the bug" assert len(mock_builder._scorers) == 1 + + def test_preview_with_no_config(self, mock_builder: AsyncScenarioBuilder) -> None: + """Test preview() works with no configuration (only name from constructor).""" + preview = mock_builder.preview() + + assert isinstance(preview, ScenarioPreview) + assert preview.name == "test-scenario" + assert preview.input_context is not None + assert preview.input_context.problem_statement is None + assert preview.input_context.additional_context is None + assert preview.scoring_contract is not None + assert len(preview.scoring_contract.scoring_function_parameters) == 0 + assert preview.environment is None + assert len(preview.metadata) == 0 + assert preview.reference_output is None + assert preview.required_environment_variables is None + assert preview.required_secret_names is None + assert preview.validation_type is None + + def test_preview_with_full_config(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: + """Test preview() with all fields configured, including weight normalization.""" + mock_builder.with_problem_statement("Fix the bug") + mock_builder.with_additional_context({"hint": "line 42"}) + mock_builder.from_blueprint(mock_blueprint) + mock_builder.with_working_directory("/app") + mock_builder.with_metadata({"team": "infra"}) + mock_builder.with_reference_output("diff content") + mock_builder.with_required_env_vars(["API_KEY"]) + mock_builder.with_required_secrets(["db_pass"]) + mock_builder.with_validation_type("FORWARD") + # Add multiple scorers with different weights to test normalization + mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) + + preview = mock_builder.preview() + + # Verify it returns ScenarioPreview + assert isinstance(preview, ScenarioPreview) + + # Verify all fields are populated + assert preview.name == "test-scenario" + assert preview.input_context is not None + assert preview.input_context.problem_statement == "Fix the bug" + assert preview.input_context.additional_context == {"hint": "line 42"} + assert preview.environment is not None + assert preview.environment.blueprint_id == "bp-123" + assert preview.environment.working_directory == "/app" + assert preview.metadata == {"team": "infra"} + assert preview.reference_output == "diff content" + assert preview.required_environment_variables == ["API_KEY"] + assert preview.required_secret_names == ["db_pass"] + assert preview.validation_type == "FORWARD" + + # Verify weights are normalized (1, 2, 3 -> 1/6, 2/6, 3/6) + assert preview.scoring_contract is not None + scorers = preview.scoring_contract.scoring_function_parameters + assert len(scorers) == 3 + assert abs(scorers[0].weight - 1 / 6) < 0.0001 + assert abs(scorers[1].weight - 2 / 6) < 0.0001 + assert abs(scorers[2].weight - 3 / 6) < 0.0001 + assert abs(sum(s.weight for s in scorers) - 1.0) < 0.0001 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 1b42df054..85617d975 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -6,7 +6,7 @@ import pytest -from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder +from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder, ScenarioPreview from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile @@ -223,3 +223,65 @@ def test_fluent_chaining(self, mock_builder: ScenarioBuilder, mock_blueprint: Bl assert mock_builder._working_directory == "/app" assert mock_builder._problem_statement == "Fix the bug" assert len(mock_builder._scorers) == 1 + + def test_preview_with_no_config(self, mock_builder: ScenarioBuilder) -> None: + """Test preview() works with no configuration (only name from constructor).""" + preview = mock_builder.preview() + + assert isinstance(preview, ScenarioPreview) + assert preview.name == "test-scenario" + assert preview.input_context is not None + assert preview.input_context.problem_statement is None + assert preview.input_context.additional_context is None + assert preview.scoring_contract is not None + assert len(preview.scoring_contract.scoring_function_parameters) == 0 + assert preview.environment is None + assert len(preview.metadata) == 0 + assert preview.reference_output is None + assert preview.required_environment_variables is None + assert preview.required_secret_names is None + assert preview.validation_type is None + + def test_preview_with_full_config(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: + """Test preview() with all fields configured, including weight normalization.""" + mock_builder.with_problem_statement("Fix the bug") + mock_builder.with_additional_context({"hint": "line 42"}) + mock_builder.from_blueprint(mock_blueprint) + mock_builder.with_working_directory("/app") + mock_builder.with_metadata({"team": "infra"}) + mock_builder.with_reference_output("diff content") + mock_builder.with_required_env_vars(["API_KEY"]) + mock_builder.with_required_secrets(["db_pass"]) + mock_builder.with_validation_type("FORWARD") + # Add multiple scorers with different weights to test normalization + mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) + mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) + mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) + + preview = mock_builder.preview() + + # Verify it returns ScenarioPreview + assert isinstance(preview, ScenarioPreview) + + # Verify all fields are populated + assert preview.name == "test-scenario" + assert preview.input_context is not None + assert preview.input_context.problem_statement == "Fix the bug" + assert preview.input_context.additional_context == {"hint": "line 42"} + assert preview.environment is not None + assert preview.environment.blueprint_id == "bp-123" + assert preview.environment.working_directory == "/app" + assert preview.metadata == {"team": "infra"} + assert preview.reference_output == "diff content" + assert preview.required_environment_variables == ["API_KEY"] + assert preview.required_secret_names == ["db_pass"] + assert preview.validation_type == "FORWARD" + + # Verify weights are normalized (1, 2, 3 -> 1/6, 2/6, 3/6) + assert preview.scoring_contract is not None + scorers = preview.scoring_contract.scoring_function_parameters + assert len(scorers) == 3 + assert abs(scorers[0].weight - 1 / 6) < 0.0001 + assert abs(scorers[1].weight - 2 / 6) < 0.0001 + assert abs(scorers[2].weight - 3 / 6) < 0.0001 + assert abs(sum(s.weight for s in scorers) - 1.0) < 0.0001 From 44ae5d02ac19b44911b88ad2770ca78a8cef5eca Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 17:51:42 -0800 Subject: [PATCH 27/31] rename `_build_params` to `build` (now publicly exposed method) --- .../sdk/async_scenario_builder.py | 4 ++-- .../sdk/scenario_builder.py | 4 ++-- tests/sdk/test_async_scenario_builder.py | 20 +++++++++---------- tests/sdk/test_scenario_builder.py | 20 +++++++++---------- tests/smoketests/sdk/test_async_scenario.py | 2 +- tests/smoketests/sdk/test_scenario.py | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 008602867..7ee58f24b 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -402,7 +402,7 @@ def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]: "working_directory": self._working_directory if self._working_directory else None, } - def _build_params(self) -> ScenarioCreateParams: + def build(self) -> ScenarioCreateParams: """Build the scenario creation parameters. Weights are automatically normalized to sum to 1.0. @@ -473,6 +473,6 @@ async def push(self, **options: Unpack[LongRequestOptions]) -> AsyncScenario: :return: Created scenario wrapper :rtype: AsyncScenario """ - params = self._build_params() + params = self.build() scenario_view = await self._client.scenarios.create(**params, **options) return AsyncScenario(self._client, scenario_view.id) diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index dec8d38b6..bd2fddeda 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -402,7 +402,7 @@ def _build_environment_params(self) -> Optional[ScenarioEnvironmentParam]: "working_directory": self._working_directory if self._working_directory else None, } - def _build_params(self) -> ScenarioCreateParams: + def build(self) -> ScenarioCreateParams: """Build the scenario creation parameters. Weights are automatically normalized to sum to 1.0. @@ -473,6 +473,6 @@ def push(self, **options: Unpack[LongRequestOptions]) -> Scenario: :return: Created scenario wrapper :rtype: Scenario """ - params = self._build_params() + params = self.build() scenario_view = self._client.scenarios.create(**params, **options) return Scenario(self._client, scenario_view.id) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index d0d1060b8..618900188 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -128,23 +128,23 @@ def test_add_scorer_rejects_invalid_weight(self, mock_builder: AsyncScenarioBuil with pytest.raises(ValueError, match="Scorer weight must be positive"): mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) - def test_build_params_validation(self, mock_builder: AsyncScenarioBuilder) -> None: - """Test _build_params raises for missing required fields.""" + def test_build_validation(self, mock_builder: AsyncScenarioBuilder) -> None: + """Test build raises for missing required fields.""" # Missing problem statement mock_builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): - mock_builder._build_params() + mock_builder.build() # Missing scorer (new builder) builder2 = AsyncScenarioBuilder("test2", mock_builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): - builder2._build_params() + builder2.build() - def test_build_params_with_all_options( + def test_build_with_all_options( self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint ) -> None: - """Test _build_params with all optional fields set.""" + """Test build with all optional fields set.""" mock_builder.with_problem_statement("Fix the bug") mock_builder.with_additional_context({"hint": "line 42"}) mock_builder.add_test_command_scorer("tests", test_command="pytest") @@ -156,7 +156,7 @@ def test_build_params_with_all_options( mock_builder.with_required_secrets(["db_pass"]) mock_builder.with_validation_type("FORWARD") - params = mock_builder._build_params() + params = mock_builder.build() assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" @@ -171,14 +171,14 @@ def test_build_params_with_all_options( assert params.get("required_secret_names") == ["db_pass"] assert params.get("validation_type") == "FORWARD" - def test_build_params_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None: - """Test that _build_params normalizes scorer weights to sum to 1.0.""" + def test_build_normalizes_weights(self, mock_builder: AsyncScenarioBuilder) -> None: + """Test that build normalizes scorer weights to sum to 1.0.""" mock_builder.with_problem_statement("Fix the bug") mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) - params = mock_builder._build_params() + params = mock_builder.build() scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 diff --git a/tests/sdk/test_scenario_builder.py b/tests/sdk/test_scenario_builder.py index 85617d975..75597d5f3 100644 --- a/tests/sdk/test_scenario_builder.py +++ b/tests/sdk/test_scenario_builder.py @@ -126,21 +126,21 @@ def test_add_scorer_rejects_invalid_weight(self, mock_builder: ScenarioBuilder) with pytest.raises(ValueError, match="Scorer weight must be positive"): mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0) - def test_build_params_validation(self, mock_builder: ScenarioBuilder) -> None: - """Test _build_params raises for missing required fields.""" + def test_build_validation(self, mock_builder: ScenarioBuilder) -> None: + """Test build raises for missing required fields.""" # Missing problem statement mock_builder.add_test_command_scorer("test", test_command="pytest") with pytest.raises(ValueError, match="Problem statement is required"): - mock_builder._build_params() + mock_builder.build() # Missing scorer (new builder) builder2 = ScenarioBuilder("test2", mock_builder._client) builder2.with_problem_statement("Fix the bug") with pytest.raises(ValueError, match="At least one scorer is required"): - builder2._build_params() + builder2.build() - def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: - """Test _build_params with all optional fields set.""" + def test_build_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None: + """Test build with all optional fields set.""" mock_builder.with_problem_statement("Fix the bug") mock_builder.with_additional_context({"hint": "line 42"}) mock_builder.add_test_command_scorer("tests", test_command="pytest") @@ -152,7 +152,7 @@ def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock mock_builder.with_required_secrets(["db_pass"]) mock_builder.with_validation_type("FORWARD") - params = mock_builder._build_params() + params = mock_builder.build() assert params["name"] == "test-scenario" assert params["input_context"]["problem_statement"] == "Fix the bug" @@ -167,14 +167,14 @@ def test_build_params_with_all_options(self, mock_builder: ScenarioBuilder, mock assert params.get("required_secret_names") == ["db_pass"] assert params.get("validation_type") == "FORWARD" - def test_build_params_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None: - """Test that _build_params normalizes scorer weights to sum to 1.0.""" + def test_build_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None: + """Test that build normalizes scorer weights to sum to 1.0.""" mock_builder.with_problem_statement("Fix the bug") mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0) mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0) mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0) - params = mock_builder._build_params() + params = mock_builder.build() scorers = list(params["scoring_contract"]["scoring_function_parameters"]) # Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6 diff --git a/tests/smoketests/sdk/test_async_scenario.py b/tests/smoketests/sdk/test_async_scenario.py index 32359f957..b0abf6a41 100644 --- a/tests/smoketests/sdk/test_async_scenario.py +++ b/tests/smoketests/sdk/test_async_scenario.py @@ -46,7 +46,7 @@ async def push_or_update_scenario(sdk_client: AsyncRunloopSDK, builder: AsyncSce new_snapshot_id = builder._snapshot.id if builder._snapshot else None # Update existing scenario with builder's params - params = builder._build_params() + params = builder.build() result = await scenario.update(**filter_params(params, SDKScenarioUpdateParams)) # Delete OLD blueprint/snapshot if they're being replaced diff --git a/tests/smoketests/sdk/test_scenario.py b/tests/smoketests/sdk/test_scenario.py index 0b23d6b24..4128cfa29 100644 --- a/tests/smoketests/sdk/test_scenario.py +++ b/tests/smoketests/sdk/test_scenario.py @@ -45,7 +45,7 @@ def push_or_update_scenario(sdk_client: RunloopSDK, builder: ScenarioBuilder) -> new_snapshot_id = builder._snapshot.id if builder._snapshot else None # Update existing scenario with builder's params - params = builder._build_params() + params = builder.build() result = scenario.update(**filter_params(params, SDKScenarioUpdateParams)) # Delete OLD blueprint/snapshot if they're being replaced From 49ff2c2190e3c547d0818ea8cf39fa9166596d23 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 17:52:07 -0800 Subject: [PATCH 28/31] formatting --- tests/sdk/test_async_scenario_builder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/sdk/test_async_scenario_builder.py b/tests/sdk/test_async_scenario_builder.py index 618900188..e20d99843 100644 --- a/tests/sdk/test_async_scenario_builder.py +++ b/tests/sdk/test_async_scenario_builder.py @@ -141,9 +141,7 @@ def test_build_validation(self, mock_builder: AsyncScenarioBuilder) -> None: with pytest.raises(ValueError, match="At least one scorer is required"): builder2.build() - def test_build_with_all_options( - self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint - ) -> None: + def test_build_with_all_options(self, mock_builder: AsyncScenarioBuilder, mock_blueprint: AsyncBlueprint) -> None: """Test build with all optional fields set.""" mock_builder.with_problem_statement("Fix the bug") mock_builder.with_additional_context({"hint": "line 42"}) From cc77ba49c11930b2ceb259585cd4f4a0e001792f Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 17:57:17 -0800 Subject: [PATCH 29/31] update docstring examples to use builder.build() instead of builder.push() --- src/runloop_api_client/sdk/async_.py | 3 ++- src/runloop_api_client/sdk/async_scenario_builder.py | 3 ++- src/runloop_api_client/sdk/scenario_builder.py | 3 ++- src/runloop_api_client/sdk/sync.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index f121a60be..857fe1891 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -782,7 +782,8 @@ class AsyncScenarioOps: ... .with_problem_statement("Fix the bug") ... .add_test_command_scorer("tests", test_command="pytest") ... ) - >>> scenario = await builder.push() + >>> params = builder.build() + >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, client: AsyncRunloop) -> None: diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 7ee58f24b..0830a337e 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -38,7 +38,8 @@ class AsyncScenarioBuilder: ... .with_problem_statement("Fix the bug in main.py") ... .add_test_command_scorer("tests", test_command="pytest") ... ) - >>> scenario = await builder.push() + >>> params = builder.build() + >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, name: str, client: AsyncRunloop) -> None: diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index bd2fddeda..f95c0872d 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -38,7 +38,8 @@ class ScenarioBuilder: ... .with_problem_statement("Fix the bug in main.py") ... .add_test_command_scorer("tests", test_command="pytest") ... ) - >>> scenario = builder.push() + >>> params = builder.build() + >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, name: str, client: Runloop) -> None: diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 22df39fe6..c34953eef 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -803,7 +803,8 @@ class ScenarioOps: ... .with_problem_statement("Fix the bug") ... .add_test_command_scorer("tests", test_command="pytest") ... ) - >>> scenario = builder.push() + >>> params = builder.build() + >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, client: Runloop) -> None: From e2c0b5c9d0bbfc9d04878909d46b2d1e9d2f69e9 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 17:57:43 -0800 Subject: [PATCH 30/31] formatting --- src/runloop_api_client/sdk/async_.py | 2 +- src/runloop_api_client/sdk/async_scenario_builder.py | 2 +- src/runloop_api_client/sdk/scenario_builder.py | 2 +- src/runloop_api_client/sdk/sync.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 857fe1891..4bcd08fc1 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -783,7 +783,7 @@ class AsyncScenarioOps: ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> params = builder.build() - >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() + >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, client: AsyncRunloop) -> None: diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index 0830a337e..d04044fc3 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -39,7 +39,7 @@ class AsyncScenarioBuilder: ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> params = builder.build() - >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() + >>> scenario = await runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, name: str, client: AsyncRunloop) -> None: diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index f95c0872d..787518fe1 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -39,7 +39,7 @@ class ScenarioBuilder: ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> params = builder.build() - >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() + >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, name: str, client: Runloop) -> None: diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index c34953eef..f215c8116 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -804,7 +804,7 @@ class ScenarioOps: ... .add_test_command_scorer("tests", test_command="pytest") ... ) >>> params = builder.build() - >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() + >>> scenario = runloop.scenario.create(**params) # equivalent to builder.push() """ def __init__(self, client: Runloop) -> None: From 5f1e260d6720d470e06ef34b340f02f4d2269505 Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Wed, 10 Dec 2025 18:04:46 -0800 Subject: [PATCH 31/31] update with_problem_statement and with_additional_context docstrings --- src/runloop_api_client/sdk/async_scenario_builder.py | 3 ++- src/runloop_api_client/sdk/scenario_builder.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/runloop_api_client/sdk/async_scenario_builder.py b/src/runloop_api_client/sdk/async_scenario_builder.py index d04044fc3..37a3aa4b5 100644 --- a/src/runloop_api_client/sdk/async_scenario_builder.py +++ b/src/runloop_api_client/sdk/async_scenario_builder.py @@ -121,7 +121,7 @@ def with_working_directory(self, directory: str) -> Self: return self def with_problem_statement(self, statement: str) -> Self: - """Set the problem statement for the scenario. + """Set the problem statement for the scenario; this will be provided as input context to the agent. :param statement: Problem statement text :type statement: str @@ -133,6 +133,7 @@ def with_problem_statement(self, statement: str) -> Self: def with_additional_context(self, context: object) -> Self: """Set additional structured context for the scenario. + This can be used to provide additional information to the agent, such as hints, examples, or other relevant information. :param context: Additional context (JSON-serializable) :type context: object diff --git a/src/runloop_api_client/sdk/scenario_builder.py b/src/runloop_api_client/sdk/scenario_builder.py index 787518fe1..e2fc15de4 100644 --- a/src/runloop_api_client/sdk/scenario_builder.py +++ b/src/runloop_api_client/sdk/scenario_builder.py @@ -121,7 +121,7 @@ def with_working_directory(self, directory: str) -> Self: return self def with_problem_statement(self, statement: str) -> Self: - """Set the problem statement for the scenario. + """Set the problem statement for the scenario; this will be provided as input context to the agent. :param statement: Problem statement text :type statement: str @@ -133,6 +133,7 @@ def with_problem_statement(self, statement: str) -> Self: def with_additional_context(self, context: object) -> Self: """Set additional structured context for the scenario. + This can be used to provide additional information to the agent, such as hints, examples, or other relevant information. :param context: Additional context (JSON-serializable) :type context: object