diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4c594677..9f2035b59 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,4 +95,4 @@ jobs: run: ./scripts/bootstrap - name: Run tests - run: ./scripts/test + run: ./scripts/test --ignore=tests/smoketests diff --git a/.github/workflows/smoketests.yml b/.github/workflows/smoketests.yml new file mode 100644 index 000000000..d1d3999ef --- /dev/null +++ b/.github/workflows/smoketests.yml @@ -0,0 +1,55 @@ +name: Smoketests + +on: + workflow_dispatch: + inputs: + environment: + description: "Target environment" + type: choice + default: dev + options: + - dev + - prod + +jobs: + smoke: + runs-on: ubuntu-latest + timeout-minutes: 120 + defaults: + run: + working-directory: api-client-python + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v4 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + uv pip install -r requirements-dev.lock + + - name: Configure environment + env: + DEV_KEY: ${{ secrets.RUNLOOP_SMOKETEST_DEV_API_KEY }} + PROD_KEY: ${{ secrets.RUNLOOP_SMOKETEST_PROD_API_KEY }} + run: | + if [ "${{ github.event.inputs.environment }}" = "prod" ]; then + echo "RUNLOOP_API_KEY=${PROD_KEY}" >> $GITHUB_ENV + echo "RUNLOOP_BASE_URL=https://api.runloop.ai" >> $GITHUB_ENV + else + echo "RUNLOOP_API_KEY=${DEV_KEY}" >> $GITHUB_ENV + echo "RUNLOOP_BASE_URL=https://api.runloop.pro" >> $GITHUB_ENV + fi + echo "DEBUG=false" >> $GITHUB_ENV + echo "RUN_SMOKETESTS=1" >> $GITHUB_ENV + echo "PYTHONPATH=${{ github.workspace }}/api-client-python/src" >> $GITHUB_ENV + + - name: Run smoke tests (pytest via uv) + env: + # Force sequential to avoid overloading remote resources + PYTEST_ADDOPTS: "-n 1 -m smoketest" + run: | + uv run pytest -q -vv tests/smoketests diff --git a/.gitignore b/.gitignore index 95ceb189a..898a822ad 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ dist .envrc codegen.log Brewfile.lock.json + +.DS_Store \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3e52c5ea7..3aef4322c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dev-dependencies = [ "respx", "pytest", "pytest-asyncio", + "pytest-timeout", "ruff", "time-machine", "nox", @@ -134,7 +135,7 @@ replacement = '[\1](https://github.com/runloopai/api-client-python/tree/main/\g< [tool.pytest.ini_options] testpaths = ["tests"] -addopts = "--tb=short -n auto" +addopts = "--tb=short -n auto --dist=loadfile -m 'not smoketest'" xfail_strict = true asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "session" @@ -142,7 +143,9 @@ filterwarnings = [ "error" ] markers = [ - "skip_if_strict: skip test if using strict validation (for prism mock server issues)" + "skip_if_strict: skip test if using strict validation (for prism mock server issues)", + "timeout: per-test timeout provided by pytest-timeout", + "smoketest: end-to-end smoke tests against real API", ] [tool.pyright] diff --git a/requirements-dev.lock b/requirements-dev.lock index 2a74d01aa..8bad418e9 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -98,6 +98,8 @@ pyright==1.1.399 pytest==8.3.3 # via pytest-asyncio # via pytest-xdist +pytest-timeout==2.3.1 + # via runloop-api-client (dev) pytest-asyncio==0.24.0 pytest-xdist==3.7.0 python-dateutil==2.8.2 diff --git a/scripts/test b/scripts/test index dbeda2d21..f2ba82342 100755 --- a/scripts/test +++ b/scripts/test @@ -55,7 +55,15 @@ fi export DEFER_PYDANTIC_BUILD=false echo "==> Running tests" -rye run pytest "$@" + +# By default, exclude smoketests unless explicitly enabled. +# This ensures PR CI doesn't run E2E smoketests unless RUN_SMOKETESTS=1. +PYTEST_ARGS=() +if [ "${RUN_SMOKETESTS}" != "1" ]; then + PYTEST_ARGS+=( -m "not smoketest" ) +fi + +rye run pytest "${PYTEST_ARGS[@]}" "$@" echo "==> Running Pydantic v1 tests" -rye run nox -s test-pydantic-v1 -- "$@" +rye run nox -s test-pydantic-v1 -- "${PYTEST_ARGS[@]}" "$@" diff --git a/tests/smoketests/README.md b/tests/smoketests/README.md new file mode 100644 index 000000000..ca0a3f187 --- /dev/null +++ b/tests/smoketests/README.md @@ -0,0 +1,27 @@ +# Smoke tests + +End-to-end smoke tests run against the real API to validate critical flows (devboxes, snapshots, blueprints, executions/log tailing, scenarios/benchmarks). + +- Local run (requires `RUNLOOP_API_KEY`): + +```bash +export RUNLOOP_API_KEY=... # required +# optionally override API base +# export RUNLOOP_BASE_URL=https://api.runloop.ai + +# Install deps and run via uv +uv pip install -r requirements-dev.lock + +# Run all tests +RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests + +# Run a single file +RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests/test_devboxes.py + +# Run a single test by name +RUN_SMOKETESTS=1 uv run pytest -q -k "test_create_and_await_running_timeout" tests/smoketests/test_devboxes.py +``` + +- GitHub Actions: add repo secret `RUNLOOP_SMOKETEST_DEV_API_KEY` and `RUNLOOP_SMOKETEST_PROD_API_KEY`. The workflow `.github/workflows/smoketests.yml` supports an input `environment` (dev|prod) and runs these tests in CI. + + diff --git a/tests/smoketests/__init__.py b/tests/smoketests/__init__.py new file mode 100644 index 000000000..2f6a2ae7a --- /dev/null +++ b/tests/smoketests/__init__.py @@ -0,0 +1,6 @@ +"""Test package for smoketests. + +Ensures relative imports like `from .utils import ...` work under pytest. +""" + + diff --git a/tests/smoketests/test_blueprints.py b/tests/smoketests/test_blueprints.py new file mode 100644 index 000000000..3ca01bc89 --- /dev/null +++ b/tests/smoketests/test_blueprints.py @@ -0,0 +1,61 @@ +import pytest + +from runloop_api_client.lib.polling import PollingConfig + +from .utils import make_client, unique_name + +pytestmark = [pytest.mark.smoketest] + + +client = make_client() + + +""" +Tests are run sequentially and can be dependent on each other. +This is to avoid overloading resources and save efficiency. +""" +_blueprint_id = None +_blueprint_name = unique_name("bp") + + +def teardown_module() -> None: + global _blueprint_id + if _blueprint_id: + try: + client.blueprints.delete(_blueprint_id) + except Exception: + pass + + +@pytest.mark.timeout(30) +def test_create_blueprint_and_await_build() -> None: + global _blueprint_id + created = client.blueprints.create_and_await_build_complete( + name=_blueprint_name, + polling_config=PollingConfig(max_attempts=180, interval_seconds=5.0, timeout_seconds=30 * 60), + ) + assert created.status == "build_complete" + _blueprint_id = created.id + + +@pytest.mark.timeout(30) +def test_start_devbox_from_base_blueprint_by_id() -> None: + assert _blueprint_id + devbox = client.devboxes.create_and_await_running( + blueprint_id=_blueprint_id, + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + assert devbox.blueprint_id == _blueprint_id + assert devbox.status == "running" + client.devboxes.shutdown(devbox.id) + + +@pytest.mark.timeout(30) +def test_start_devbox_from_base_blueprint_by_name() -> None: + devbox = client.devboxes.create_and_await_running( + blueprint_name=_blueprint_name, + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + assert devbox.blueprint_id + assert devbox.status == "running" + client.devboxes.shutdown(devbox.id) diff --git a/tests/smoketests/test_devboxes.py b/tests/smoketests/test_devboxes.py new file mode 100644 index 000000000..3ce930698 --- /dev/null +++ b/tests/smoketests/test_devboxes.py @@ -0,0 +1,73 @@ +import pytest + +from runloop_api_client.lib.polling import PollingConfig, PollingTimeout + +from .utils import make_client, unique_name + +pytestmark = [pytest.mark.smoketest] + + +client = make_client() + +""" +Tests are run sequentially and can be dependent on each other. +This is to avoid overloading resources and save efficiency. +""" +_devbox_id = None + + +@pytest.mark.timeout(30) +def test_create_devbox() -> None: + created = client.devboxes.create(name=unique_name("smoke-devbox")) + assert created.id + client.devboxes.shutdown(created.id) + + +@pytest.mark.timeout(30) +def test_await_running_create_and_await_running() -> None: + global _devbox_id + created = client.devboxes.create_and_await_running( + name=unique_name("smoketest-devbox2"), + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + assert created.status == "running" + _devbox_id = created.id + + +def test_list_devboxes() -> None: + page = client.devboxes.list(limit=10) + assert isinstance(page.devboxes, list) + assert len(page.devboxes) > 0 + + +def test_retrieve_devbox() -> None: + assert _devbox_id + view = client.devboxes.retrieve(_devbox_id) + assert view.id == _devbox_id + + +def test_shutdown_devbox() -> None: + assert _devbox_id + view = client.devboxes.shutdown(_devbox_id) + assert view.id == _devbox_id + assert view.status == "shutdown" + + +@pytest.mark.timeout(30) +def test_create_and_await_running_long_set_up() -> None: + created = client.devboxes.create_and_await_running( + name=unique_name("smoketest-devbox-await-running-long-set-up"), + launch_parameters={"launch_commands": ["sleep 70"], "keep_alive_time_seconds": 30}, + polling_config=PollingConfig(interval_seconds=5.0, timeout_seconds=80), + ) + assert created.status == "running" + + +@pytest.mark.timeout(30) +def test_create_and_await_running_timeout() -> None: + with pytest.raises(PollingTimeout): + client.devboxes.create_and_await_running( + name=unique_name("smoketest-devbox-await-running-timeout"), + launch_parameters={"launch_commands": ["sleep 70"], "keep_alive_time_seconds": 30}, + polling_config=PollingConfig(max_attempts=1, interval_seconds=0.1), + ) diff --git a/tests/smoketests/test_executions.py b/tests/smoketests/test_executions.py new file mode 100644 index 000000000..8a0acf802 --- /dev/null +++ b/tests/smoketests/test_executions.py @@ -0,0 +1,63 @@ +import pytest + +from runloop_api_client.lib.polling import PollingConfig + +from .utils import make_client, unique_name + +pytestmark = [pytest.mark.smoketest] + + +client = make_client() + + +""" +Tests are run sequentially and can be dependent on each other. +This is to avoid overloading resources and save efficiency. +""" +_devbox_id = None +_exec_id = None + + +@pytest.fixture(scope="session") +def some_function_name(): + # setup + yield + # teardown + if _devbox_id: + client.devboxes.shutdown(_devbox_id) + + +@pytest.mark.timeout(30) +def test_launch_devbox() -> None: + global _devbox_id + created = client.devboxes.create_and_await_running( + name=unique_name("exec-devbox"), + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + _devbox_id = created.id + + +@pytest.mark.timeout(30) +def test_execute_async_and_await_completion() -> None: + assert _devbox_id + global _exec_id + started = client.devboxes.executions.execute_async(_devbox_id, command="echo hello && sleep 1") + _exec_id = started.execution_id + completed = client.devboxes.executions.await_completed( + _exec_id, + devbox_id=_devbox_id, + polling_config=PollingConfig(max_attempts=120, interval_seconds=2.0, timeout_seconds=10 * 60), + ) + assert completed.status == "completed" + + +@pytest.mark.timeout(30) +def test_tail_stdout_logs() -> None: + assert _devbox_id and _exec_id + stream = client.devboxes.executions.stream_stdout_updates(execution_id=_exec_id, devbox_id=_devbox_id) + received = "" + for chunk in stream: + received += getattr(chunk, "output", "") or "" + if received: + break + assert isinstance(received, str) diff --git a/tests/smoketests/test_scenarios_benchmarks.py b/tests/smoketests/test_scenarios_benchmarks.py new file mode 100644 index 000000000..554048bf1 --- /dev/null +++ b/tests/smoketests/test_scenarios_benchmarks.py @@ -0,0 +1,78 @@ +import pytest + +from runloop_api_client.lib.polling import PollingConfig + +from .utils import make_client, unique_name + +pytestmark = [pytest.mark.smoketest] + + +client = make_client() + + +""" +Tests are run sequentially and can be dependent on each other. +This is to avoid overloading resources and save efficiency. +""" +_scenario_id = None +_run_id = None +_devbox_id = None + + +@pytest.fixture(scope="session") +def some_function_name(): + # setup + yield + # teardown + if _devbox_id: + client.devboxes.shutdown(_devbox_id) + + +@pytest.mark.timeout(30) +def test_create_scenario() -> None: + global _scenario_id + scenario = client.scenarios.create( + name=unique_name("scenario"), + input_context={"problem_statement": "echo hello"}, + scoring_contract={ + "scoring_function_parameters": [ + { + "name": "cmd-zero", + "scorer": {"type": "command_scorer", "command": "true"}, + "weight": 1, + } + ] + }, + ) + _scenario_id = scenario.id + + +@pytest.mark.timeout(30) +def test_start_scenario_run_and_await_env_ready() -> None: + assert _scenario_id + run = client.scenarios.start_run_and_await_env_ready( + scenario_id=_scenario_id, + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + assert run.scenario_id == _scenario_id + global _run_id, _devbox_id + _run_id = run.id + _devbox_id = run.devbox_id + + +@pytest.mark.timeout(30) +def test_score_and_complete_scenario_run() -> None: + assert _run_id + scored = client.scenarios.runs.score_and_complete( + _run_id, polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60) + ) + assert scored.state in {"completed", "scored", "running", "failed", "timeout", "canceled"} + + +@pytest.mark.timeout(30) +def test_create_benchmark_and_start_run() -> None: + assert _scenario_id + benchmark = client.benchmarks.create(name=unique_name("benchmark"), scenario_ids=[_scenario_id]) + assert benchmark.id + run = client.benchmarks.start_run(benchmark_id=benchmark.id) + assert run.benchmark_id == benchmark.id diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py new file mode 100644 index 000000000..c5e3daa16 --- /dev/null +++ b/tests/smoketests/test_snapshots.py @@ -0,0 +1,43 @@ +import pytest + +from runloop_api_client.lib.polling import PollingConfig + +from .utils import make_client, unique_name + +pytestmark = [pytest.mark.smoketest] + + +client = make_client() + + +""" +Tests are run sequentially and can be dependent on each other. +This is to avoid overloading resources and save efficiency. +""" +_devbox_id = None +_snapshot_id = None + + +@pytest.mark.timeout(30) +def test_snapshot_devbox() -> None: + global _devbox_id, _snapshot_id + created = client.devboxes.create_and_await_running( + name=unique_name("snap-devbox"), + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + _devbox_id = created.id + + snap = client.devboxes.snapshot_disk(_devbox_id, name=unique_name("snap")) + assert snap.id + _snapshot_id = snap.id + + +@pytest.mark.timeout(30) +def test_launch_devbox_from_snapshot() -> None: + assert _snapshot_id + launched = client.devboxes.create_and_await_running( + snapshot_id=_snapshot_id, + polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60), + ) + assert launched.snapshot_id == _snapshot_id + client.devboxes.shutdown(launched.id) diff --git a/tests/smoketests/utils.py b/tests/smoketests/utils.py new file mode 100644 index 000000000..60ab689ff --- /dev/null +++ b/tests/smoketests/utils.py @@ -0,0 +1,34 @@ +import os +import time +from typing import Any, Mapping + +from runloop_api_client import Runloop + + +def unique_name(prefix: str) -> str: + return f"{prefix}-{int(time.time() * 1000)}" + + +THIRTY_SECOND_TIMEOUT = 30 + + +def make_client(**overrides: Mapping[str, Any]) -> Runloop: + """Create a Runloop client from local src with sane defaults. + + Read RUNLOOP_BASE_URL and RUNLOOP_API_KEY from environment. + """ + + base_url = os.getenv("RUNLOOP_BASE_URL") + bearer_token = os.getenv("RUNLOOP_API_KEY") + + # Default values similar to TS smoketests + kwargs: dict[str, Any] = { + "base_url": base_url, + "bearer_token": bearer_token, + "timeout": 120.0, + "max_retries": 1, + } + if overrides: + kwargs.update(dict(overrides)) + + return Runloop(**kwargs)