diff --git a/.claude/commands/klaud-pr-status-html.md b/.claude/commands/klaud-pr-status-html.md index 6051357c6..47d6024f8 100644 --- a/.claude/commands/klaud-pr-status-html.md +++ b/.claude/commands/klaud-pr-status-html.md @@ -30,7 +30,7 @@ State buckets: - **RUNNING** — no failed checks; at least one is `QUEUED` / `IN_PROGRESS` / `PENDING`. - **READY** — no failed, no pending, and at least one `Run Sweep` check is `SUCCESS`. - **NO_SUCCESS** — sweep ran but never produced a `SUCCESS` (e.g. all matrix jobs got SKIPPED). -- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing `full-sweep-enabled` label). +- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing a sweep label such as `full-sweep-enabled` or `non-canary-full-sweep-enabled`). ```bash : > /tmp/klaud_pr_status.tsv diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 2252d8b7c..8517d1580 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -180,18 +180,18 @@ test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvi ## Reusing an Approved PR Full Sweep -If a PR has already run the full untrimmed sweep (`full-sweep-enabled` label), -a maintainer can avoid running the same sweep again after merge by leaving a -PR comment before merging: +If a PR has already run the full untrimmed sweep (`full-sweep-enabled` with a +sequential canary, or `non-canary-full-sweep-enabled` without one), a +maintainer can avoid running the same sweep again after merge by leaving a PR +comment before merging: ``` /reuse-sweep-run ``` -That reuses the latest successful `run-sweep.yml` `pull_request` run for the -PR's current head SHA. If the PR was rebased or had to merge `main` after the -successful sweep — so the current head no longer has a matching run — pin the -source run explicitly: +That reuses the latest successful `run-sweep.yml` `pull_request` run whose +commit is still part of the PR. To select a particular eligible successful +run, pin the source run explicitly: ``` /reuse-sweep-run @@ -209,10 +209,11 @@ Only comments from `OWNER`, `MEMBER`, or `COLLABORATOR` users authorize reuse. The most recent matching comment wins, so a maintainer can supersede an earlier pin by leaving a new `/reuse-sweep-run []` comment. -Reuse fails closed: if the comment is present but the `full-sweep-enabled` -label, source PR run, or artifacts cannot be validated, the push-to-main -workflow fails instead of falling back to a cluster sweep. Without the comment, -the push-to-main workflow runs the normal full sweep. +Reuse fails closed: if the comment is present but neither full-sweep label +(`full-sweep-enabled` or `non-canary-full-sweep-enabled`) is present, or if +the source PR run or artifacts cannot be validated, the push-to-main workflow +fails instead of falling back to a cluster sweep. Without the comment, the +push-to-main workflow runs the normal full sweep. ## Validation Architecture diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 77c99c974..3533e8175 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -8,6 +8,7 @@ concurrency: (github.event.action == 'labeled' || github.event.action == 'unlabeled') && github.event.label.name != 'sweep-enabled' && github.event.label.name != 'full-sweep-enabled' && + github.event.label.name != 'non-canary-full-sweep-enabled' && github.run_id || 'active' }} @@ -39,7 +40,8 @@ jobs: ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) steps: - name: Checkout code @@ -61,12 +63,14 @@ jobs: !github.event.pull_request.draft && ( contains(github.event.pull_request.labels.*.name, 'sweep-enabled') || - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') || + contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') ) && ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) ) || ( @@ -85,10 +89,13 @@ jobs: - name: Reject conflicting sweep labels if: >- github.event_name == 'pull_request' && - contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + ( + (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) || + (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) || + (contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) + ) run: | - echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to min(conc) per parallelism config." + echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to min(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)." exit 1 - name: Checkout code @@ -135,9 +142,87 @@ jobs: --ref "${{ github.ref }}" \ --workflow-id "run-sweep.yml" - sweep-multi-node-1k1k: + canary-select: needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} + if: >- + needs.setup.outputs.reuse-enabled != 'true' && + github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + runs-on: ubuntu-latest + outputs: + canary-config: ${{ steps.pick.outputs.canary-config }} + remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }} + steps: + - id: pick + env: + SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }} + run: | + selection=$(jq -c ' + def remove_one($needle): + if $needle == null then . + else + (index($needle)) as $idx + | if $idx == null then . else del(.[$idx]) end + end; + + # Canary is a benchmark-only smoke test — exclude entries + # whose primary purpose is eval (run-eval == true) so the + # picked canary never runs an eval pass. + (((.single_node["1k1k"] // []) + (.single_node["8k1k"] // [])) + | map(select(.["run-eval"] != true))) as $candidates + | (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary + | { + canary: (if $canary == null then [] else [$canary] end), + remaining: ( + . + | .single_node = (.single_node // {}) + | .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary)) + | .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary)) + ) + } + ' <<<"$SEARCH_SPACE") + echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT" + echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT" + + canary-sweep: + needs: canary-select + if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: canary / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.canary-select.outputs.canary-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + run-eval: false + + sweep-multi-node-1k1k: + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -174,8 +259,15 @@ jobs: run-eval: false sweep-multi-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -186,14 +278,22 @@ jobs: with: *multi-node-inputs sweep-single-node-1k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }} secrets: inherit with: &single-node-inputs exp-name: ${{ matrix.config.exp-name }} @@ -215,20 +315,35 @@ jobs: run-eval: ${{ matrix.config.run-eval }} sweep-single-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }} secrets: inherit with: *single-node-inputs sweep-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: agentic / strategy: @@ -259,8 +374,15 @@ jobs: scenario-type: agentic-coding sweep-multi-node-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node agentic / strategy: @@ -298,8 +420,16 @@ jobs: scenario-type: agentic-coding sweep-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: eval / strategy: @@ -328,8 +458,16 @@ jobs: eval-only: true sweep-multi-node-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node eval / strategy: @@ -368,6 +506,7 @@ jobs: collect-results: needs: [ + canary-sweep, sweep-single-node-1k1k, sweep-single-node-8k1k, sweep-agentic, @@ -381,6 +520,7 @@ jobs: always() && needs.setup.result == 'success' && ( + needs.canary-sweep.result == 'success' || needs.sweep-single-node-1k1k.result != 'skipped' || needs.sweep-single-node-8k1k.result != 'skipped' || needs.sweep-multi-node-1k1k.result != 'skipped' || @@ -592,12 +732,14 @@ jobs: !github.event.pull_request.draft && ( contains(github.event.pull_request.labels.*.name, 'sweep-enabled') || - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') || + contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') ) && ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) runs-on: ubuntu-latest permissions: diff --git a/AGENTS.md b/AGENTS.md index 173353b3d..56566ea8a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -59,12 +59,13 @@ Git: conventional commit messages. `[skip-sweep]` in commit message skips benchm ### Pull Request Sweep Labels -PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting both is rejected by the workflow's `setup` job. +PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting multiple sweep labels is rejected by the workflow's `setup` job. - `sweep-enabled` - runs the sweep with `--trim-conc` (each parallelism config reduced to its single lowest concurrency). Default for most PRs. -- `full-sweep-enabled` - runs the full intermediate concurrency sweep, identical to push-to-main. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints). +- `full-sweep-enabled` - runs the full intermediate concurrency sweep behind a sequential single-node canary gate. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints). +- `non-canary-full-sweep-enabled` - runs the full intermediate concurrency sweep without the canary gate. Use when the canary is flaky or not representative of the affected configuration. -**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled` / `full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check. +**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled`, `full-sweep-enabled`, or `non-canary-full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check. Push-to-main always runs the full untrimmed sweep unless `[skip-sweep]` is in the commit message. Trim logic lives in `trim_conc()` in `utils/process_changelog.py`: single-node entries are grouped by every non-`conc` field and only the lowest-`conc` entry per group is kept; multi-node entries have their `conc` list collapsed to `[min(conc)]`. diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md index 92eb76bfc..1f81b6a9f 100644 --- a/KLAUD_DEBUG.md +++ b/KLAUD_DEBUG.md @@ -193,7 +193,7 @@ Or check whether any other recipe on main uses the proposed tag — if zero uses ## 9. PR conventions for this repo - Image-bump / new-recipe PRs I open on behalf of the user (or that the user creates) get the **`[Klaud Cold]`** title prefix. -- Add the `full-sweep-enabled` label so a full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Without it, the sweep is mostly SKIPPED. +- Add the `full-sweep-enabled` label so a canary-gated full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Use `non-canary-full-sweep-enabled` instead only when the single-node canary is flaky or unrepresentative; it runs the full sweep without the canary gate. Without one of the sweep labels, the sweep is mostly SKIPPED. - After any code change that shifts a PR's scope (drops a recipe, changes an image tag), **update the PR title AND body in the same step** and **verify** with `gh pr view --json title,body` — `gh pr edit` silently fails (see §8). - `utils/merge_with_reuse.sh ` is the merge entrypoint; it handles the `perf-changelog.yaml` auto-append. diff --git a/utils/find_reusable_sweep_run.py b/utils/find_reusable_sweep_run.py index 8af018a8e..3f814d2e5 100644 --- a/utils/find_reusable_sweep_run.py +++ b/utils/find_reusable_sweep_run.py @@ -276,7 +276,11 @@ def main() -> int: parser.add_argument("--event-name", required=True) parser.add_argument("--ref", required=True) parser.add_argument("--workflow-id", default="run-sweep.yml") - parser.add_argument("--full-sweep-label", default="full-sweep-enabled") + parser.add_argument( + "--full-sweep-label", + default="full-sweep-enabled,non-canary-full-sweep-enabled", + help="Comma-separated PR labels treated as 'full sweep'; reuse requires at least one.", + ) parser.add_argument("--pinned-run-command", default="/reuse-sweep-run") parser.add_argument( "--allowed-author-associations", @@ -355,10 +359,16 @@ def main() -> int: pr = github_api(args.repo, f"/pulls/{pr_number}", token) labels = label_names(pr) - if args.full_sweep_label not in labels: + accepted_full_sweep_labels = { + value.strip() + for value in args.full_sweep_label.split(",") + if value.strip() + } + if not accepted_full_sweep_labels.intersection(labels): + accepted = ", ".join(sorted(accepted_full_sweep_labels)) raise RuntimeError( - f"PR #{pr_number} has {args.pinned_run_command} authorization but not " - f"{args.full_sweep_label}." + f"PR #{pr_number} has {args.pinned_run_command} authorization but is " + f"missing any of: {accepted}." ) if not pr.get("merged_at"): raise RuntimeError(f"PR #{pr_number} is not marked as merged.") diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh index 9336b81c2..a94ea0f69 100755 --- a/utils/merge_with_reuse.sh +++ b/utils/merge_with_reuse.sh @@ -38,8 +38,13 @@ PR_STATE="$(jq -r '.state' <<<"$PR_INFO")" [ "$PR_STATE" = "OPEN" ] || die "PR #${PR} is ${PR_STATE}, expected OPEN" HEAD_BRANCH="$(jq -r '.headRefName' <<<"$PR_INFO")" -HAS_FULL_SWEEP="$(jq -r '[.labels[].name] | index("full-sweep-enabled") // ""' <<<"$PR_INFO")" -[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing the 'full-sweep-enabled' label" +HAS_FULL_SWEEP="$(jq -r ' + [.labels[].name] as $names + | if (($names | index("full-sweep-enabled")) != null) + or (($names | index("non-canary-full-sweep-enabled")) != null) + then "1" else "" end +' <<<"$PR_INFO")" +[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing 'full-sweep-enabled' or 'non-canary-full-sweep-enabled' label" # Warn early if no successful run exists on any current PR commit. PR_SHAS="$(gh api "repos/${REPO}/pulls/${PR}/commits" --paginate --jq '.[].sha')" diff --git a/utils/test_find_reusable_sweep_run.py b/utils/test_find_reusable_sweep_run.py index e779bbe8b..0f8a09ac7 100644 --- a/utils/test_find_reusable_sweep_run.py +++ b/utils/test_find_reusable_sweep_run.py @@ -455,3 +455,133 @@ def fake_paginated_github_api(repo, path, token, item_key, params=None): assert outputs["reuse-enabled"] == "false" assert outputs["reuse-source-pr-number"] == "1321" assert outputs["reuse-reason"] == "PR #1321 has no /reuse-sweep-run authorization" + + +def test_main_accepts_non_canary_full_sweep_label(monkeypatch, tmp_path) -> None: + comments = [ + { + "created_at": "2026-05-13T00:00:00Z", + "author_association": "OWNER", + "body": "/reuse-sweep-run 25763404168", + }, + ] + run = { + "id": 25763404168, + "event": "pull_request", + "status": "completed", + "conclusion": "success", + "path": ".github/workflows/run-sweep.yml", + "pull_requests": [{"number": 1321}], + "run_attempt": 1, + "html_url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/25763404168", + "head_sha": "abc123", + } + + def fake_github_api(repo, path, token, params=None): + if path == "/commits/merge-sha/pulls": + return [{"number": 1321}] + if path == "/pulls/1321": + return { + "merged_at": "2026-05-13T00:01:00Z", + "labels": [{"name": "non-canary-full-sweep-enabled"}], + "head": {"sha": "abc123"}, + } + if path == "/actions/runs/25763404168": + return run + raise AssertionError(f"unexpected GitHub API path: {path}") + + def fake_paginated_github_api(repo, path, token, item_key, params=None): + if path == "/issues/1321/comments": + return comments + if path == "/pulls/1321/commits": + return [{"sha": "abc123"}] + if path == "/actions/runs/25763404168/artifacts": + return [{"name": "results_bmk"}] + raise AssertionError(f"unexpected paginated GitHub API path: {path}") + + output_path = tmp_path / "outputs" + monkeypatch.setenv("GITHUB_TOKEN", "token") + monkeypatch.setattr(reuse, "github_api", fake_github_api) + monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api) + monkeypatch.setattr( + reuse.sys, + "argv", + [ + "find_reusable_sweep_run.py", + "--repo", + "SemiAnalysisAI/InferenceX", + "--commit-sha", + "merge-sha", + "--event-name", + "push", + "--ref", + "refs/heads/main", + "--github-output", + str(output_path), + ], + ) + + assert reuse.main() == 0 + + outputs = dict(line.split("=", 1) for line in output_path.read_text().splitlines()) + assert outputs["reuse-enabled"] == "true" + + +def test_main_rejects_pr_with_neither_full_sweep_label(monkeypatch, tmp_path) -> None: + comments = [ + { + "created_at": "2026-05-13T00:00:00Z", + "author_association": "OWNER", + "body": "/reuse-sweep-run 25763404168", + }, + ] + + def fake_github_api(repo, path, token, params=None): + if path == "/commits/merge-sha/pulls": + return [{"number": 1321}] + if path == "/pulls/1321": + return { + "merged_at": "2026-05-13T00:01:00Z", + "labels": [{"name": "sweep-enabled"}], + "head": {"sha": "abc123"}, + } + raise AssertionError(f"unexpected GitHub API path: {path}") + + def fake_paginated_github_api(repo, path, token, item_key, params=None): + if path == "/issues/1321/comments": + return comments + raise AssertionError(f"unexpected paginated GitHub API path: {path}") + + output_path = tmp_path / "outputs" + monkeypatch.setenv("GITHUB_TOKEN", "token") + monkeypatch.setattr(reuse, "github_api", fake_github_api) + monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api) + monkeypatch.setattr( + reuse.sys, + "argv", + [ + "find_reusable_sweep_run.py", + "--repo", + "SemiAnalysisAI/InferenceX", + "--commit-sha", + "merge-sha", + "--event-name", + "push", + "--ref", + "refs/heads/main", + "--github-output", + str(output_path), + ], + ) + + try: + reuse.main() + except RuntimeError as error: + msg = str(error) + assert "full-sweep-enabled" in msg + assert "non-canary-full-sweep-enabled" in msg + else: + raise AssertionError( + "expected RuntimeError when PR has neither full-sweep-enabled nor " + "non-canary-full-sweep-enabled label" + )