nightly-e2e #25
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| # Nightly E2E tests: | |
| # | |
| # cloud-e2e Cloud inference (NVIDIA Endpoint API) on ubuntu-latest. | |
| # cloud-experimental-e2e Experimental cloud inference test (main script skips embedded | |
| # check-docs + final cleanup; follow-up steps run check-docs, | |
| # skip/05-network-policy.sh, then cleanup.sh --verify with if: always()). | |
| # gpu-e2e Local Ollama inference on a GPU self-hosted runner. | |
| # Controlled by the GPU_E2E_ENABLED repository variable. | |
| # Set vars.GPU_E2E_ENABLED to "true" in repo settings to enable. | |
| # notify-on-failure Auto-creates a GitHub issue when any E2E job fails. | |
| # | |
| # Runs directly on the runner (not inside Docker) because OpenShell bootstraps | |
| # a K3s cluster inside a privileged Docker container — nesting would break networking. | |
| # | |
| # NVIDIA_API_KEY for cloud-e2e and cloud-experimental-e2e: | |
| # - Repository secret: Settings → Secrets and variables → Actions → Repository secrets. | |
| # - Environment secret: only available if the job sets `environment: <that environment name>`. | |
| # (Storing the key under Environments / NVIDIA_API_KEY without `environment:` here leaves the | |
| # variable empty in the job — repository secrets and environment secrets are separate.) | |
| # Only runs on schedule and manual dispatch — never on PRs (secret protection). | |
| name: nightly-e2e | |
| on: | |
| schedule: | |
| - cron: "0 0 * * *" | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: nightly-e2e | |
| cancel-in-progress: true | |
| jobs: | |
| cloud-e2e: | |
| if: github.repository == 'NVIDIA/NemoClaw' | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 45 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| - name: Run cloud E2E test | |
| env: | |
| NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} | |
| NEMOCLAW_NON_INTERACTIVE: "1" | |
| NEMOCLAW_SANDBOX_NAME: "e2e-nightly" | |
| NEMOCLAW_RECREATE_SANDBOX: "1" | |
| GITHUB_TOKEN: ${{ github.token }} | |
| run: bash test/e2e/test-full-e2e.sh | |
| - name: Upload install log on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: install-log | |
| path: /tmp/nemoclaw-e2e-install.log | |
| if-no-files-found: ignore | |
| cloud-experimental-e2e: | |
| if: github.repository == 'NVIDIA/NemoClaw' | |
| runs-on: ubuntu-latest | |
| # Main suite + check-docs + network-policy skip script can exceed 45m on cold runners. | |
| timeout-minutes: 90 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| # Split Phase 5f (check-docs) and Phase 6 (cleanup) out of the main script so CI shows | |
| # failures in dedicated steps; tear-down always runs last (if: always()). | |
| - name: Run cloud-experimental E2E test | |
| env: | |
| NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} | |
| GITHUB_TOKEN: ${{ github.token }} | |
| # Non-interactive install (expect-driven Phase 3 optional). Runner has no expect; Phase 5e TUI skips if expect is absent. | |
| RUN_E2E_CLOUD_EXPERIMENTAL_INTERACTIVE_INSTALL: "0" | |
| NEMOCLAW_NON_INTERACTIVE: "1" | |
| NEMOCLAW_RECREATE_SANDBOX: "1" | |
| NEMOCLAW_POLICY_MODE: "custom" | |
| NEMOCLAW_POLICY_PRESETS: "npm,pypi" | |
| RUN_E2E_CLOUD_EXPERIMENTAL_SKIP_FINAL_CLEANUP: "1" | |
| RUN_E2E_CLOUD_EXPERIMENTAL_SKIP_CHECK_DOCS: "1" | |
| run: bash test/e2e/test-e2e-cloud-experimental.sh | |
| - name: Documentation checks (check-docs.sh) | |
| if: always() | |
| env: | |
| GITHUB_TOKEN: ${{ github.token }} | |
| run: | | |
| set -euo pipefail | |
| if [ -f "$HOME/.bashrc" ]; then | |
| # shellcheck source=/dev/null | |
| source "$HOME/.bashrc" 2>/dev/null || true | |
| fi | |
| export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" | |
| if [ -s "$NVM_DIR/nvm.sh" ]; then | |
| # shellcheck source=/dev/null | |
| . "$NVM_DIR/nvm.sh" | |
| fi | |
| if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then | |
| export PATH="$HOME/.local/bin:$PATH" | |
| fi | |
| bash test/e2e/e2e-cloud-experimental/check-docs.sh | |
| - name: Network policy checks (skip/05-network-policy.sh) | |
| if: always() | |
| env: | |
| NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} | |
| GITHUB_TOKEN: ${{ github.token }} | |
| SANDBOX_NAME: e2e-cloud-experimental | |
| NEMOCLAW_SANDBOX_NAME: e2e-cloud-experimental | |
| run: | | |
| set -euo pipefail | |
| if [ -f "$HOME/.bashrc" ]; then | |
| # shellcheck source=/dev/null | |
| source "$HOME/.bashrc" 2>/dev/null || true | |
| fi | |
| export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" | |
| if [ -s "$NVM_DIR/nvm.sh" ]; then | |
| # shellcheck source=/dev/null | |
| . "$NVM_DIR/nvm.sh" | |
| fi | |
| if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then | |
| export PATH="$HOME/.local/bin:$PATH" | |
| fi | |
| bash test/e2e/e2e-cloud-experimental/skip/05-network-policy.sh | |
| - name: Tear down cloud-experimental sandbox (always) | |
| if: always() | |
| env: | |
| SANDBOX_NAME: e2e-cloud-experimental | |
| NEMOCLAW_SANDBOX_NAME: e2e-cloud-experimental | |
| run: | | |
| set -euo pipefail | |
| if [ -f "$HOME/.bashrc" ]; then | |
| # shellcheck source=/dev/null | |
| source "$HOME/.bashrc" 2>/dev/null || true | |
| fi | |
| export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" | |
| if [ -s "$NVM_DIR/nvm.sh" ]; then | |
| # shellcheck source=/dev/null | |
| . "$NVM_DIR/nvm.sh" | |
| fi | |
| if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then | |
| export PATH="$HOME/.local/bin:$PATH" | |
| fi | |
| bash test/e2e/e2e-cloud-experimental/cleanup.sh --verify | |
| - name: Upload install log on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: install-log-cloud-experimental | |
| path: /tmp/nemoclaw-e2e-cloud-experimental-install.log | |
| if-no-files-found: ignore | |
| # ── GPU E2E (Ollama local inference) ────────────────────────── | |
| # Enable by setting repository variable GPU_E2E_ENABLED=true | |
| # (Settings → Secrets and variables → Actions → Variables) | |
| # | |
| # Runner labels: using 'self-hosted' for now. Refine to | |
| # [self-hosted, linux, x64, gpu] once NVIDIA runner labels are confirmed. | |
| gpu-e2e: | |
| if: github.repository == 'NVIDIA/NemoClaw' && vars.GPU_E2E_ENABLED == 'true' | |
| runs-on: self-hosted | |
| timeout-minutes: 60 | |
| env: | |
| NEMOCLAW_NON_INTERACTIVE: "1" | |
| NEMOCLAW_SANDBOX_NAME: "e2e-gpu-ollama" | |
| NEMOCLAW_RECREATE_SANDBOX: "1" | |
| NEMOCLAW_PROVIDER: "ollama" | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| - name: Verify GPU availability | |
| run: | | |
| echo "=== GPU Info ===" | |
| nvidia-smi | |
| echo "" | |
| echo "=== VRAM ===" | |
| nvidia-smi --query-gpu=name,memory.total --format=csv,noheader | |
| echo "" | |
| echo "=== Docker ===" | |
| docker info --format '{{.ServerVersion}}' | |
| - name: Run GPU E2E test (Ollama local inference) | |
| run: bash test/e2e/test-gpu-e2e.sh | |
| - name: Upload install log on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: gpu-e2e-install-log | |
| path: /tmp/nemoclaw-gpu-e2e-install.log | |
| if-no-files-found: ignore | |
| - name: Upload test log on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: gpu-e2e-test-log | |
| path: /tmp/nemoclaw-gpu-e2e-test.log | |
| if-no-files-found: ignore | |
| notify-on-failure: | |
| runs-on: ubuntu-latest | |
| needs: [cloud-e2e, cloud-experimental-e2e, gpu-e2e] | |
| if: ${{ always() && (needs.cloud-e2e.result == 'failure' || needs.cloud-experimental-e2e.result == 'failure' || needs.gpu-e2e.result == 'failure') }} | |
| permissions: | |
| issues: write | |
| steps: | |
| - name: Create or update failure issue | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | |
| const title = 'Nightly E2E failed'; | |
| const { data: existing } = await github.rest.issues.listForRepo({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| state: 'open', | |
| labels: 'CI/CD', | |
| per_page: 100, | |
| }); | |
| const match = existing.find(i => !i.pull_request && i.title.startsWith(title)); | |
| if (match) { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: match.number, | |
| body: `Failed again on ${new Date().toISOString().split('T')[0]}.\n\n**Run:** ${runUrl}\n**Artifacts:** Check the run artifacts for install/test logs (artifact names vary by job).`, | |
| }); | |
| } else { | |
| await github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: `${title} — ${new Date().toISOString().split('T')[0]}`, | |
| body: `The nightly E2E pipeline failed.\n\n**Run:** ${runUrl}\n**Artifacts:** Check the run artifacts for install/test logs (artifact names vary by job).`, | |
| labels: ['bug', 'CI/CD'], | |
| }); | |
| } |