Add initial smoketests

alb-rl · alb-rl · commit c9dd72f2bb32 · 2025-09-10T11:03:47.000-07:00
diff --git a/.github/workflows/smoketests.yml b/.github/workflows/smoketests.yml
@@ -0,0 +1,55 @@
+name: Smoketests
+
+on:
+  workflow_dispatch:
+    inputs:
+      environment:
+        description: "Target environment"
+        type: choice
+        default: dev
+        options:
+          - dev
+          - prod
+
+jobs:
+  smoke:
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    defaults:
+      run:
+        working-directory: api-client-python
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          uv pip install -r requirements-dev.lock
+
+      - name: Configure environment
+        env:
+          DEV_KEY: ${{ secrets.RUNLOOP_SMOKETEST_DEV_API_KEY }}
+          PROD_KEY: ${{ secrets.RUNLOOP_SMOKETEST_PROD_API_KEY }}
+        run: |
+          if [ "${{ github.event.inputs.environment }}" = "prod" ]; then
+            echo "RUNLOOP_API_KEY=${PROD_KEY}" >> $GITHUB_ENV
+            echo "RUNLOOP_BASE_URL=https://api.runloop.ai" >> $GITHUB_ENV
+          else
+            echo "RUNLOOP_API_KEY=${DEV_KEY}" >> $GITHUB_ENV
+            echo "RUNLOOP_BASE_URL=https://api.runloop.pro" >> $GITHUB_ENV
+          fi
+          echo "DEBUG=false" >> $GITHUB_ENV
+          echo "RUN_SMOKETESTS=1" >> $GITHUB_ENV
+          echo "PYTHONPATH=${{ github.workspace }}/api-client-python/src" >> $GITHUB_ENV
+
+      - name: Run smoke tests (pytest via uv)
+        env:
+          # Force sequential to avoid overloading remote resources
+          PYTEST_ADDOPTS: "-n 1"
+        run: |
+          uv run pytest -q -vv tests/smoketests
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,5 @@ dist
 .envrc
 codegen.log
 Brewfile.lock.json
+
+.DS_Store
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,7 @@ dev-dependencies = [
     "respx",
     "pytest",
     "pytest-asyncio",
+    "pytest-timeout",
     "ruff",
     "time-machine",
     "nox",
@@ -134,15 +135,16 @@ replacement = '[\1](https://github.com/runloopai/api-client-python/tree/main/\g<
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "--tb=short -n auto"
+addopts = "--tb=short -n auto --dist=loadfile"
 xfail_strict = true
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"
 filterwarnings = [
   "error"
 ]
 markers = [
-  "skip_if_strict: skip test if using strict validation (for prism mock server issues)"
+  "skip_if_strict: skip test if using strict validation (for prism mock server issues)",
+  "timeout: per-test timeout provided by pytest-timeout",
 ]
 
 [tool.pyright]
diff --git a/requirements-dev.lock b/requirements-dev.lock
@@ -98,6 +98,8 @@ pyright==1.1.399
 pytest==8.3.3
     # via pytest-asyncio
     # via pytest-xdist
+pytest-timeout==2.3.1
+    # via runloop-api-client (dev)
 pytest-asyncio==0.24.0
 pytest-xdist==3.7.0
 python-dateutil==2.8.2
diff --git a/tests/smoketests/README.md b/tests/smoketests/README.md
@@ -0,0 +1,27 @@
+# Smoke tests
+
+End-to-end smoke tests run against the real API to validate critical flows (devboxes, snapshots, blueprints, executions/log tailing, scenarios/benchmarks).
+
+- Local run (requires `RUNLOOP_API_KEY`):
+
+```bash
+export RUNLOOP_API_KEY=...  # required
+# optionally override API base
+# export RUNLOOP_BASE_URL=https://api.runloop.ai
+
+# Install deps and run via uv
+uv pip install -r requirements-dev.lock
+
+# Run all tests
+RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests
+
+# Run a single file
+RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests/test_devboxes.py
+
+# Run a single test by name
+RUN_SMOKETESTS=1 uv run pytest -q -k "test_create_and_await_running_timeout" tests/smoketests/test_devboxes.py
+```
+
+- GitHub Actions: add repo secret `RUNLOOP_SMOKETEST_DEV_API_KEY` and `RUNLOOP_SMOKETEST_PROD_API_KEY`. The workflow `.github/workflows/smoketests.yml` supports an input `environment` (dev|prod) and runs these tests in CI.
+
+
diff --git a/tests/smoketests/__init__.py b/tests/smoketests/__init__.py
@@ -0,0 +1,6 @@
+"""Test package for smoketests.
+
+Ensures relative imports like `from .utils import ...` work under pytest.
+"""
+
+
diff --git a/tests/smoketests/test_blueprints.py b/tests/smoketests/test_blueprints.py
@@ -0,0 +1,61 @@
+import os
+
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = pytest.mark.skipif(os.getenv("RUN_SMOKETESTS") != "1", reason="smoketests only run in CI")
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_blueprint_id = None
+_blueprint_name = unique_name("bp")
+
+
+def teardown_module() -> None:
+    global _blueprint_id
+    if _blueprint_id:
+        try:
+            client.blueprints.delete(_blueprint_id)
+        except Exception:
+            pass
+
+
+@pytest.mark.timeout(30)
+def test_create_blueprint_and_await_build() -> None:
+    global _blueprint_id
+    created = client.blueprints.create_and_await_build_complete(
+        name=_blueprint_name,
+        polling_config=PollingConfig(max_attempts=180, interval_seconds=5.0, timeout_seconds=30 * 60),
+    )
+    assert created.status == "build_complete"
+    _blueprint_id = created.id
+
+
+@pytest.mark.timeout(30)
+def test_start_devbox_from_base_blueprint_by_id() -> None:
+    assert _blueprint_id
+    devbox = client.devboxes.create_and_await_running(
+        blueprint_id=_blueprint_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert devbox.blueprint_id == _blueprint_id
+    assert devbox.status == "running"
+
+
+@pytest.mark.timeout(30)
+def test_start_devbox_from_base_blueprint_by_name() -> None:
+    devbox = client.devboxes.create_and_await_running(
+        blueprint_name=_blueprint_name,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert devbox.blueprint_id
+    assert devbox.status == "running"
diff --git a/tests/smoketests/test_devboxes.py b/tests/smoketests/test_devboxes.py
@@ -0,0 +1,75 @@
+import os
+
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig, PollingTimeout
+
+from .utils import make_client, unique_name
+
+pytestmark = pytest.mark.skipif(os.getenv("RUN_SMOKETESTS") != "1", reason="smoketests only run in CI")
+
+
+client = make_client()
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_devbox_id = None
+
+
+@pytest.mark.timeout(30)
+def test_create_devbox() -> None:
+    created = client.devboxes.create(name=unique_name("smoke-devbox"))
+    assert created.id
+    client.devboxes.shutdown(created.id)
+
+
+@pytest.mark.timeout(30)
+def test_await_running_create_and_await_running() -> None:
+    global _devbox_id
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("smoketest-devbox2"),
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert created.status == "running"
+    _devbox_id = created.id
+
+
+def test_list_devboxes() -> None:
+    page = client.devboxes.list(limit=10)
+    assert isinstance(page.devboxes, list)
+    assert len(page.devboxes) > 0
+
+
+def test_retrieve_devbox() -> None:
+    assert _devbox_id
+    view = client.devboxes.retrieve(_devbox_id)
+    assert view.id == _devbox_id
+
+
+def test_shutdown_devbox() -> None:
+    assert _devbox_id
+    view = client.devboxes.shutdown(_devbox_id)
+    assert view.id == _devbox_id
+    assert view.status == "shutdown"
+
+
+@pytest.mark.timeout(30)
+def test_create_and_await_running_long_set_up() -> None:
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("smoketest-devbox-await-running-long-set-up"),
+        launch_parameters={"launch_commands": ["sleep 70"]},
+        polling_config=PollingConfig(interval_seconds=5.0, timeout_seconds=80),
+    )
+    assert created.status == "running"
+
+
+@pytest.mark.timeout(30)
+def test_create_and_await_running_timeout() -> None:
+    with pytest.raises(PollingTimeout):
+        client.devboxes.create_and_await_running(
+            name=unique_name("smoketest-devbox-await-running-timeout"),
+            launch_parameters={"launch_commands": ["sleep 70"], "keep_alive_time_seconds": 30},
+            polling_config=PollingConfig(max_attempts=1, interval_seconds=0.1),
+        )
diff --git a/tests/smoketests/test_executions.py b/tests/smoketests/test_executions.py
@@ -0,0 +1,56 @@
+import os
+
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = pytest.mark.skipif(os.getenv("RUN_SMOKETESTS") != "1", reason="smoketests only run in CI")
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_devbox_id = None
+_exec_id = None
+
+
+@pytest.mark.timeout(30)
+def test_launch_devbox() -> None:
+    global _devbox_id
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("exec-devbox"),
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    _devbox_id = created.id
+
+
+@pytest.mark.timeout(30)
+def test_execute_async_and_await_completion() -> None:
+    assert _devbox_id
+    global _exec_id
+    started = client.devboxes.executions.execute_async(_devbox_id, command="echo hello && sleep 1")
+    _exec_id = started.execution_id
+    completed = client.devboxes.executions.await_completed(
+        _exec_id,
+        devbox_id=_devbox_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=2.0, timeout_seconds=10 * 60),
+    )
+    assert completed.status == "completed"
+
+
+@pytest.mark.timeout(30)
+def test_tail_stdout_logs() -> None:
+    assert _devbox_id and _exec_id
+    stream = client.devboxes.executions.stream_stdout_updates(execution_id=_exec_id, devbox_id=_devbox_id)
+    received = ""
+    for chunk in stream:
+        received += getattr(chunk, "output", "") or ""
+        if received:
+            break
+    assert isinstance(received, str)
diff --git a/tests/smoketests/test_scenarios_benchmarks.py b/tests/smoketests/test_scenarios_benchmarks.py
@@ -0,0 +1,69 @@
+import os
+
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = pytest.mark.skipif(os.getenv("RUN_SMOKETESTS") != "1", reason="smoketests only run in CI")
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_scenario_id = None
+_run_id = None
+
+
+@pytest.mark.timeout(30)
+def test_create_scenario() -> None:
+    global _scenario_id
+    scenario = client.scenarios.create(
+        name=unique_name("scenario"),
+        input_context={"problem_statement": "echo hello"},
+        scoring_contract={
+            "scoring_function_parameters": [
+                {
+                    "name": "cmd-zero",
+                    "scorer": {"type": "command_scorer", "command": "true"},
+                    "weight": 1,
+                }
+            ]
+        },
+    )
+    _scenario_id = scenario.id
+
+
+@pytest.mark.timeout(30)
+def test_start_scenario_run_and_await_env_ready() -> None:
+    assert _scenario_id
+    run = client.scenarios.start_run_and_await_env_ready(
+        scenario_id=_scenario_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert run.scenario_id == _scenario_id
+    global _run_id
+    _run_id = run.id
+
+
+@pytest.mark.timeout(30)
+def test_score_and_complete_scenario_run() -> None:
+    assert _run_id
+    scored = client.scenarios.runs.score_and_complete(
+        _run_id, polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60)
+    )
+    assert scored.state in {"completed", "scored", "running", "failed", "timeout", "canceled"}
+
+
+@pytest.mark.timeout(30)
+def test_create_benchmark_and_start_run() -> None:
+    assert _scenario_id
+    benchmark = client.benchmarks.create(name=unique_name("benchmark"), scenario_ids=[_scenario_id])
+    assert benchmark.id
+    run = client.benchmarks.start_run(benchmark_id=benchmark.id)
+    assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py
diff --git a/tests/smoketests/utils.py b/tests/smoketests/utils.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +"""Test package for smoketests.
++
 +Ensures relative imports like `from .utils import ...` work under pytest.
 +"""
++
++