diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f4c594677..9f2035b59 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -95,4 +95,4 @@ jobs:
         run: ./scripts/bootstrap
 
       - name: Run tests
-        run: ./scripts/test
+        run: ./scripts/test --ignore=tests/smoketests
diff --git a/.github/workflows/smoketests.yml b/.github/workflows/smoketests.yml
new file mode 100644
index 000000000..d1d3999ef
--- /dev/null
+++ b/.github/workflows/smoketests.yml
@@ -0,0 +1,55 @@
+name: Smoketests
+
+on:
+  workflow_dispatch:
+    inputs:
+      environment:
+        description: "Target environment"
+        type: choice
+        default: dev
+        options:
+          - dev
+          - prod
+
+jobs:
+  smoke:
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    defaults:
+      run:
+        working-directory: api-client-python
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          uv pip install -r requirements-dev.lock
+
+      - name: Configure environment
+        env:
+          DEV_KEY: ${{ secrets.RUNLOOP_SMOKETEST_DEV_API_KEY }}
+          PROD_KEY: ${{ secrets.RUNLOOP_SMOKETEST_PROD_API_KEY }}
+        run: |
+          if [ "${{ github.event.inputs.environment }}" = "prod" ]; then
+            echo "RUNLOOP_API_KEY=${PROD_KEY}" >> $GITHUB_ENV
+            echo "RUNLOOP_BASE_URL=https://api.runloop.ai" >> $GITHUB_ENV
+          else
+            echo "RUNLOOP_API_KEY=${DEV_KEY}" >> $GITHUB_ENV
+            echo "RUNLOOP_BASE_URL=https://api.runloop.pro" >> $GITHUB_ENV
+          fi
+          echo "DEBUG=false" >> $GITHUB_ENV
+          echo "RUN_SMOKETESTS=1" >> $GITHUB_ENV
+          echo "PYTHONPATH=${{ github.workspace }}/api-client-python/src" >> $GITHUB_ENV
+
+      - name: Run smoke tests (pytest via uv)
+        env:
+          # Force sequential to avoid overloading remote resources
+          PYTEST_ADDOPTS: "-n 1 -m smoketest"
+        run: |
+          uv run pytest -q -vv tests/smoketests
diff --git a/.gitignore b/.gitignore
index 95ceb189a..898a822ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,5 @@ dist
 .envrc
 codegen.log
 Brewfile.lock.json
+
+.DS_Store
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 3e52c5ea7..3aef4322c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ dev-dependencies = [
     "respx",
     "pytest",
     "pytest-asyncio",
+    "pytest-timeout",
     "ruff",
     "time-machine",
     "nox",
@@ -134,7 +135,7 @@ replacement = '[\1](https://github.com/runloopai/api-client-python/tree/main/\g<
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "--tb=short -n auto"
+addopts = "--tb=short -n auto --dist=loadfile -m 'not smoketest'"
 xfail_strict = true
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"
@@ -142,7 +143,9 @@ filterwarnings = [
   "error"
 ]
 markers = [
-  "skip_if_strict: skip test if using strict validation (for prism mock server issues)"
+  "skip_if_strict: skip test if using strict validation (for prism mock server issues)",
+  "timeout: per-test timeout provided by pytest-timeout",
+  "smoketest: end-to-end smoke tests against real API",
 ]
 
 [tool.pyright]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 2a74d01aa..8bad418e9 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -98,6 +98,8 @@ pyright==1.1.399
 pytest==8.3.3
     # via pytest-asyncio
     # via pytest-xdist
+pytest-timeout==2.3.1
+    # via runloop-api-client (dev)
 pytest-asyncio==0.24.0
 pytest-xdist==3.7.0
 python-dateutil==2.8.2
diff --git a/scripts/test b/scripts/test
index dbeda2d21..f2ba82342 100755
--- a/scripts/test
+++ b/scripts/test
@@ -55,7 +55,15 @@ fi
 export DEFER_PYDANTIC_BUILD=false
 
 echo "==> Running tests"
-rye run pytest "$@"
+
+# By default, exclude smoketests unless explicitly enabled.
+# This ensures PR CI doesn't run E2E smoketests unless RUN_SMOKETESTS=1.
+PYTEST_ARGS=()
+if [ "${RUN_SMOKETESTS}" != "1" ]; then
+  PYTEST_ARGS+=( -m "not smoketest" )
+fi
+
+rye run pytest "${PYTEST_ARGS[@]}" "$@"
 
 echo "==> Running Pydantic v1 tests"
-rye run nox -s test-pydantic-v1 -- "$@"
+rye run nox -s test-pydantic-v1 -- "${PYTEST_ARGS[@]}" "$@"
diff --git a/tests/smoketests/README.md b/tests/smoketests/README.md
new file mode 100644
index 000000000..ca0a3f187
--- /dev/null
+++ b/tests/smoketests/README.md
@@ -0,0 +1,27 @@
+# Smoke tests
+
+End-to-end smoke tests run against the real API to validate critical flows (devboxes, snapshots, blueprints, executions/log tailing, scenarios/benchmarks).
+
+- Local run (requires `RUNLOOP_API_KEY`):
+
+```bash
+export RUNLOOP_API_KEY=...  # required
+# optionally override API base
+# export RUNLOOP_BASE_URL=https://api.runloop.ai
+
+# Install deps and run via uv
+uv pip install -r requirements-dev.lock
+
+# Run all tests
+RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests
+
+# Run a single file
+RUN_SMOKETESTS=1 uv run pytest -q -vv tests/smoketests/test_devboxes.py
+
+# Run a single test by name
+RUN_SMOKETESTS=1 uv run pytest -q -k "test_create_and_await_running_timeout" tests/smoketests/test_devboxes.py
+```
+
+- GitHub Actions: add repo secret `RUNLOOP_SMOKETEST_DEV_API_KEY` and `RUNLOOP_SMOKETEST_PROD_API_KEY`. The workflow `.github/workflows/smoketests.yml` supports an input `environment` (dev|prod) and runs these tests in CI.
+
+
diff --git a/tests/smoketests/__init__.py b/tests/smoketests/__init__.py
new file mode 100644
index 000000000..2f6a2ae7a
--- /dev/null
+++ b/tests/smoketests/__init__.py
@@ -0,0 +1,6 @@
+"""Test package for smoketests.
+
+Ensures relative imports like `from .utils import ...` work under pytest.
+"""
+
+
diff --git a/tests/smoketests/test_blueprints.py b/tests/smoketests/test_blueprints.py
new file mode 100644
index 000000000..3ca01bc89
--- /dev/null
+++ b/tests/smoketests/test_blueprints.py
@@ -0,0 +1,61 @@
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = [pytest.mark.smoketest]
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_blueprint_id = None
+_blueprint_name = unique_name("bp")
+
+
+def teardown_module() -> None:
+    global _blueprint_id
+    if _blueprint_id:
+        try:
+            client.blueprints.delete(_blueprint_id)
+        except Exception:
+            pass
+
+
+@pytest.mark.timeout(30)
+def test_create_blueprint_and_await_build() -> None:
+    global _blueprint_id
+    created = client.blueprints.create_and_await_build_complete(
+        name=_blueprint_name,
+        polling_config=PollingConfig(max_attempts=180, interval_seconds=5.0, timeout_seconds=30 * 60),
+    )
+    assert created.status == "build_complete"
+    _blueprint_id = created.id
+
+
+@pytest.mark.timeout(30)
+def test_start_devbox_from_base_blueprint_by_id() -> None:
+    assert _blueprint_id
+    devbox = client.devboxes.create_and_await_running(
+        blueprint_id=_blueprint_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert devbox.blueprint_id == _blueprint_id
+    assert devbox.status == "running"
+    client.devboxes.shutdown(devbox.id)
+
+
+@pytest.mark.timeout(30)
+def test_start_devbox_from_base_blueprint_by_name() -> None:
+    devbox = client.devboxes.create_and_await_running(
+        blueprint_name=_blueprint_name,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert devbox.blueprint_id
+    assert devbox.status == "running"
+    client.devboxes.shutdown(devbox.id)
diff --git a/tests/smoketests/test_devboxes.py b/tests/smoketests/test_devboxes.py
new file mode 100644
index 000000000..3ce930698
--- /dev/null
+++ b/tests/smoketests/test_devboxes.py
@@ -0,0 +1,73 @@
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig, PollingTimeout
+
+from .utils import make_client, unique_name
+
+pytestmark = [pytest.mark.smoketest]
+
+
+client = make_client()
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_devbox_id = None
+
+
+@pytest.mark.timeout(30)
+def test_create_devbox() -> None:
+    created = client.devboxes.create(name=unique_name("smoke-devbox"))
+    assert created.id
+    client.devboxes.shutdown(created.id)
+
+
+@pytest.mark.timeout(30)
+def test_await_running_create_and_await_running() -> None:
+    global _devbox_id
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("smoketest-devbox2"),
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert created.status == "running"
+    _devbox_id = created.id
+
+
+def test_list_devboxes() -> None:
+    page = client.devboxes.list(limit=10)
+    assert isinstance(page.devboxes, list)
+    assert len(page.devboxes) > 0
+
+
+def test_retrieve_devbox() -> None:
+    assert _devbox_id
+    view = client.devboxes.retrieve(_devbox_id)
+    assert view.id == _devbox_id
+
+
+def test_shutdown_devbox() -> None:
+    assert _devbox_id
+    view = client.devboxes.shutdown(_devbox_id)
+    assert view.id == _devbox_id
+    assert view.status == "shutdown"
+
+
+@pytest.mark.timeout(30)
+def test_create_and_await_running_long_set_up() -> None:
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("smoketest-devbox-await-running-long-set-up"),
+        launch_parameters={"launch_commands": ["sleep 70"], "keep_alive_time_seconds": 30},
+        polling_config=PollingConfig(interval_seconds=5.0, timeout_seconds=80),
+    )
+    assert created.status == "running"
+
+
+@pytest.mark.timeout(30)
+def test_create_and_await_running_timeout() -> None:
+    with pytest.raises(PollingTimeout):
+        client.devboxes.create_and_await_running(
+            name=unique_name("smoketest-devbox-await-running-timeout"),
+            launch_parameters={"launch_commands": ["sleep 70"], "keep_alive_time_seconds": 30},
+            polling_config=PollingConfig(max_attempts=1, interval_seconds=0.1),
+        )
diff --git a/tests/smoketests/test_executions.py b/tests/smoketests/test_executions.py
new file mode 100644
index 000000000..8a0acf802
--- /dev/null
+++ b/tests/smoketests/test_executions.py
@@ -0,0 +1,63 @@
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = [pytest.mark.smoketest]
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_devbox_id = None
+_exec_id = None
+
+
+@pytest.fixture(scope="session")
+def some_function_name():
+    # setup
+    yield
+    # teardown
+    if _devbox_id:
+        client.devboxes.shutdown(_devbox_id)
+
+
+@pytest.mark.timeout(30)
+def test_launch_devbox() -> None:
+    global _devbox_id
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("exec-devbox"),
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    _devbox_id = created.id
+
+
+@pytest.mark.timeout(30)
+def test_execute_async_and_await_completion() -> None:
+    assert _devbox_id
+    global _exec_id
+    started = client.devboxes.executions.execute_async(_devbox_id, command="echo hello && sleep 1")
+    _exec_id = started.execution_id
+    completed = client.devboxes.executions.await_completed(
+        _exec_id,
+        devbox_id=_devbox_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=2.0, timeout_seconds=10 * 60),
+    )
+    assert completed.status == "completed"
+
+
+@pytest.mark.timeout(30)
+def test_tail_stdout_logs() -> None:
+    assert _devbox_id and _exec_id
+    stream = client.devboxes.executions.stream_stdout_updates(execution_id=_exec_id, devbox_id=_devbox_id)
+    received = ""
+    for chunk in stream:
+        received += getattr(chunk, "output", "") or ""
+        if received:
+            break
+    assert isinstance(received, str)
diff --git a/tests/smoketests/test_scenarios_benchmarks.py b/tests/smoketests/test_scenarios_benchmarks.py
new file mode 100644
index 000000000..554048bf1
--- /dev/null
+++ b/tests/smoketests/test_scenarios_benchmarks.py
@@ -0,0 +1,78 @@
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = [pytest.mark.smoketest]
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_scenario_id = None
+_run_id = None
+_devbox_id = None
+
+
+@pytest.fixture(scope="session")
+def some_function_name():
+    # setup
+    yield
+    # teardown
+    if _devbox_id:
+        client.devboxes.shutdown(_devbox_id)
+
+
+@pytest.mark.timeout(30)
+def test_create_scenario() -> None:
+    global _scenario_id
+    scenario = client.scenarios.create(
+        name=unique_name("scenario"),
+        input_context={"problem_statement": "echo hello"},
+        scoring_contract={
+            "scoring_function_parameters": [
+                {
+                    "name": "cmd-zero",
+                    "scorer": {"type": "command_scorer", "command": "true"},
+                    "weight": 1,
+                }
+            ]
+        },
+    )
+    _scenario_id = scenario.id
+
+
+@pytest.mark.timeout(30)
+def test_start_scenario_run_and_await_env_ready() -> None:
+    assert _scenario_id
+    run = client.scenarios.start_run_and_await_env_ready(
+        scenario_id=_scenario_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert run.scenario_id == _scenario_id
+    global _run_id, _devbox_id
+    _run_id = run.id
+    _devbox_id = run.devbox_id
+
+
+@pytest.mark.timeout(30)
+def test_score_and_complete_scenario_run() -> None:
+    assert _run_id
+    scored = client.scenarios.runs.score_and_complete(
+        _run_id, polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60)
+    )
+    assert scored.state in {"completed", "scored", "running", "failed", "timeout", "canceled"}
+
+
+@pytest.mark.timeout(30)
+def test_create_benchmark_and_start_run() -> None:
+    assert _scenario_id
+    benchmark = client.benchmarks.create(name=unique_name("benchmark"), scenario_ids=[_scenario_id])
+    assert benchmark.id
+    run = client.benchmarks.start_run(benchmark_id=benchmark.id)
+    assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py
new file mode 100644
index 000000000..c5e3daa16
--- /dev/null
+++ b/tests/smoketests/test_snapshots.py
@@ -0,0 +1,43 @@
+import pytest
+
+from runloop_api_client.lib.polling import PollingConfig
+
+from .utils import make_client, unique_name
+
+pytestmark = [pytest.mark.smoketest]
+
+
+client = make_client()
+
+
+"""
+Tests are run sequentially and can be dependent on each other. 
+This is to avoid overloading resources and save efficiency.
+"""
+_devbox_id = None
+_snapshot_id = None
+
+
+@pytest.mark.timeout(30)
+def test_snapshot_devbox() -> None:
+    global _devbox_id, _snapshot_id
+    created = client.devboxes.create_and_await_running(
+        name=unique_name("snap-devbox"),
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    _devbox_id = created.id
+
+    snap = client.devboxes.snapshot_disk(_devbox_id, name=unique_name("snap"))
+    assert snap.id
+    _snapshot_id = snap.id
+
+
+@pytest.mark.timeout(30)
+def test_launch_devbox_from_snapshot() -> None:
+    assert _snapshot_id
+    launched = client.devboxes.create_and_await_running(
+        snapshot_id=_snapshot_id,
+        polling_config=PollingConfig(max_attempts=120, interval_seconds=5.0, timeout_seconds=20 * 60),
+    )
+    assert launched.snapshot_id == _snapshot_id
+    client.devboxes.shutdown(launched.id)
diff --git a/tests/smoketests/utils.py b/tests/smoketests/utils.py
new file mode 100644
index 000000000..60ab689ff
--- /dev/null
+++ b/tests/smoketests/utils.py
@@ -0,0 +1,34 @@
+import os
+import time
+from typing import Any, Mapping
+
+from runloop_api_client import Runloop
+
+
+def unique_name(prefix: str) -> str:
+    return f"{prefix}-{int(time.time() * 1000)}"
+
+
+THIRTY_SECOND_TIMEOUT = 30
+
+
+def make_client(**overrides: Mapping[str, Any]) -> Runloop:
+    """Create a Runloop client from local src with sane defaults.
+
+    Read RUNLOOP_BASE_URL and RUNLOOP_API_KEY from environment.
+    """
+
+    base_url = os.getenv("RUNLOOP_BASE_URL")
+    bearer_token = os.getenv("RUNLOOP_API_KEY")
+
+    # Default values similar to TS smoketests
+    kwargs: dict[str, Any] = {
+        "base_url": base_url,
+        "bearer_token": bearer_token,
+        "timeout": 120.0,
+        "max_retries": 1,
+    }
+    if overrides:
+        kwargs.update(dict(overrides))
+
+    return Runloop(**kwargs)