diff --git a/.claude/commands/klaud-pr-status-html.md b/.claude/commands/klaud-pr-status-html.md
index 6051357c6..47d6024f8 100644
--- a/.claude/commands/klaud-pr-status-html.md
+++ b/.claude/commands/klaud-pr-status-html.md
@@ -30,7 +30,7 @@ State buckets:
- **RUNNING** — no failed checks; at least one is `QUEUED` / `IN_PROGRESS` / `PENDING`.
- **READY** — no failed, no pending, and at least one `Run Sweep` check is `SUCCESS`.
- **NO_SUCCESS** — sweep ran but never produced a `SUCCESS` (e.g. all matrix jobs got SKIPPED).
-- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing `full-sweep-enabled` label).
+- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing a sweep label such as `full-sweep-enabled` or `non-canary-full-sweep-enabled`).
```bash
: > /tmp/klaud_pr_status.tsv
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 2252d8b7c..8517d1580 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -180,18 +180,18 @@ test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvi
## Reusing an Approved PR Full Sweep
-If a PR has already run the full untrimmed sweep (`full-sweep-enabled` label),
-a maintainer can avoid running the same sweep again after merge by leaving a
-PR comment before merging:
+If a PR has already run the full untrimmed sweep (`full-sweep-enabled` with a
+sequential canary, or `non-canary-full-sweep-enabled` without one), a
+maintainer can avoid running the same sweep again after merge by leaving a PR
+comment before merging:
```
/reuse-sweep-run
```
-That reuses the latest successful `run-sweep.yml` `pull_request` run for the
-PR's current head SHA. If the PR was rebased or had to merge `main` after the
-successful sweep — so the current head no longer has a matching run — pin the
-source run explicitly:
+That reuses the latest successful `run-sweep.yml` `pull_request` run whose
+commit is still part of the PR. To select a particular eligible successful
+run, pin the source run explicitly:
```
/reuse-sweep-run
@@ -209,10 +209,11 @@ Only comments from `OWNER`, `MEMBER`, or `COLLABORATOR` users authorize reuse.
The most recent matching comment wins, so a maintainer can supersede an earlier
pin by leaving a new `/reuse-sweep-run []` comment.
-Reuse fails closed: if the comment is present but the `full-sweep-enabled`
-label, source PR run, or artifacts cannot be validated, the push-to-main
-workflow fails instead of falling back to a cluster sweep. Without the comment,
-the push-to-main workflow runs the normal full sweep.
+Reuse fails closed: if the comment is present but neither full-sweep label
+(`full-sweep-enabled` or `non-canary-full-sweep-enabled`) is present, or if
+the source PR run or artifacts cannot be validated, the push-to-main workflow
+fails instead of falling back to a cluster sweep. Without the comment, the
+push-to-main workflow runs the normal full sweep.
## Validation Architecture
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 77c99c974..3533e8175 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -8,6 +8,7 @@ concurrency:
(github.event.action == 'labeled' || github.event.action == 'unlabeled') &&
github.event.label.name != 'sweep-enabled' &&
github.event.label.name != 'full-sweep-enabled' &&
+ github.event.label.name != 'non-canary-full-sweep-enabled' &&
github.run_id ||
'active'
}}
@@ -39,7 +40,8 @@ jobs:
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
- github.event.label.name == 'full-sweep-enabled'
+ github.event.label.name == 'full-sweep-enabled' ||
+ github.event.label.name == 'non-canary-full-sweep-enabled'
)
steps:
- name: Checkout code
@@ -61,12 +63,14 @@ jobs:
!github.event.pull_request.draft &&
(
contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
- contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+ contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
+ contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
) &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
- github.event.label.name == 'full-sweep-enabled'
+ github.event.label.name == 'full-sweep-enabled' ||
+ github.event.label.name == 'non-canary-full-sweep-enabled'
)
) ||
(
@@ -85,10 +89,13 @@ jobs:
- name: Reject conflicting sweep labels
if: >-
github.event_name == 'pull_request' &&
- contains(github.event.pull_request.labels.*.name, 'sweep-enabled') &&
- contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+ (
+ (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) ||
+ (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) ||
+ (contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled'))
+ )
run: |
- echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to min(conc) per parallelism config."
+ echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to min(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)."
exit 1
- name: Checkout code
@@ -135,9 +142,87 @@ jobs:
--ref "${{ github.ref }}" \
--workflow-id "run-sweep.yml"
- sweep-multi-node-1k1k:
+ canary-select:
needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }}
+ if: >-
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ github.event_name == 'pull_request' &&
+ contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+ runs-on: ubuntu-latest
+ outputs:
+ canary-config: ${{ steps.pick.outputs.canary-config }}
+ remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }}
+ steps:
+ - id: pick
+ env:
+ SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }}
+ run: |
+ selection=$(jq -c '
+ def remove_one($needle):
+ if $needle == null then .
+ else
+ (index($needle)) as $idx
+ | if $idx == null then . else del(.[$idx]) end
+ end;
+
+ # Canary is a benchmark-only smoke test — exclude entries
+ # whose primary purpose is eval (run-eval == true) so the
+ # picked canary never runs an eval pass.
+ (((.single_node["1k1k"] // []) + (.single_node["8k1k"] // []))
+ | map(select(.["run-eval"] != true))) as $candidates
+ | (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary
+ | {
+ canary: (if $canary == null then [] else [$canary] end),
+ remaining: (
+ .
+ | .single_node = (.single_node // {})
+ | .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary))
+ | .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary))
+ )
+ }
+ ' <<<"$SEARCH_SPACE")
+ echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT"
+ echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT"
+
+ canary-sweep:
+ needs: canary-select
+ if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }}
+ uses: ./.github/workflows/benchmark-tmpl.yml
+ name: canary /
+ strategy:
+ fail-fast: false
+ matrix:
+ config: ${{ fromJson(needs.canary-select.outputs.canary-config) }}
+ secrets: inherit
+ with:
+ exp-name: ${{ matrix.config.exp-name }}
+ isl: ${{ matrix.config.isl }}
+ osl: ${{ matrix.config.osl }}
+ max-model-len: ${{ matrix.config.max-model-len }}
+ runner: ${{ matrix.config.runner }}
+ image: ${{ matrix.config.image }}
+ model: ${{ matrix.config.model }}
+ model-prefix: ${{ matrix.config.model-prefix }}
+ framework: ${{ matrix.config.framework }}
+ precision: ${{ matrix.config.precision }}
+ tp: ${{ matrix.config.tp }}
+ ep: ${{ matrix.config.ep }}
+ dp-attn: ${{ matrix.config.dp-attn }}
+ conc: ${{ matrix.config.conc }}
+ spec-decoding: ${{ matrix.config.spec-decoding }}
+ disagg: ${{ matrix.config.disagg }}
+ run-eval: false
+
+ sweep-multi-node-1k1k:
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null'
+ }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 1k1k /
strategy:
@@ -174,8 +259,15 @@ jobs:
run-eval: false
sweep-multi-node-8k1k:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null'
+ }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 8k1k /
strategy:
@@ -186,14 +278,22 @@ jobs:
with: *multi-node-inputs
sweep-single-node-1k1k:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' &&
+ toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]'
+ }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 1k1k /
strategy:
fail-fast: false
matrix:
- config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+ config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }}
secrets: inherit
with: &single-node-inputs
exp-name: ${{ matrix.config.exp-name }}
@@ -215,20 +315,35 @@ jobs:
run-eval: ${{ matrix.config.run-eval }}
sweep-single-node-8k1k:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' &&
+ toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]'
+ }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 8k1k /
strategy:
fail-fast: false
matrix:
- config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
+ config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }}
secrets: inherit
with: *single-node-inputs
sweep-agentic:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null'
+ }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: agentic /
strategy:
@@ -259,8 +374,15 @@ jobs:
scenario-type: agentic-coding
sweep-multi-node-agentic:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null'
+ }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node agentic /
strategy:
@@ -298,8 +420,16 @@ jobs:
scenario-type: agentic-coding
sweep-evals:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null'
+ }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: eval /
strategy:
@@ -328,8 +458,16 @@ jobs:
eval-only: true
sweep-multi-node-evals:
- needs: setup
- if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }}
+ needs: [setup, canary-select, canary-sweep]
+ if: >-
+ ${{
+ !cancelled() &&
+ needs.setup.result == 'success' &&
+ needs.setup.outputs.reuse-enabled != 'true' &&
+ (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' &&
+ toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null'
+ }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node eval /
strategy:
@@ -368,6 +506,7 @@ jobs:
collect-results:
needs:
[
+ canary-sweep,
sweep-single-node-1k1k,
sweep-single-node-8k1k,
sweep-agentic,
@@ -381,6 +520,7 @@ jobs:
always() &&
needs.setup.result == 'success' &&
(
+ needs.canary-sweep.result == 'success' ||
needs.sweep-single-node-1k1k.result != 'skipped' ||
needs.sweep-single-node-8k1k.result != 'skipped' ||
needs.sweep-multi-node-1k1k.result != 'skipped' ||
@@ -592,12 +732,14 @@ jobs:
!github.event.pull_request.draft &&
(
contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
- contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+ contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
+ contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
) &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
- github.event.label.name == 'full-sweep-enabled'
+ github.event.label.name == 'full-sweep-enabled' ||
+ github.event.label.name == 'non-canary-full-sweep-enabled'
)
runs-on: ubuntu-latest
permissions:
diff --git a/AGENTS.md b/AGENTS.md
index 173353b3d..56566ea8a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -59,12 +59,13 @@ Git: conventional commit messages. `[skip-sweep]` in commit message skips benchm
### Pull Request Sweep Labels
-PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting both is rejected by the workflow's `setup` job.
+PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting multiple sweep labels is rejected by the workflow's `setup` job.
- `sweep-enabled` - runs the sweep with `--trim-conc` (each parallelism config reduced to its single lowest concurrency). Default for most PRs.
-- `full-sweep-enabled` - runs the full intermediate concurrency sweep, identical to push-to-main. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints).
+- `full-sweep-enabled` - runs the full intermediate concurrency sweep behind a sequential single-node canary gate. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints).
+- `non-canary-full-sweep-enabled` - runs the full intermediate concurrency sweep without the canary gate. Use when the canary is flaky or not representative of the affected configuration.
-**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled` / `full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check.
+**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled`, `full-sweep-enabled`, or `non-canary-full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check.
Push-to-main always runs the full untrimmed sweep unless `[skip-sweep]` is in the commit message. Trim logic lives in `trim_conc()` in `utils/process_changelog.py`: single-node entries are grouped by every non-`conc` field and only the lowest-`conc` entry per group is kept; multi-node entries have their `conc` list collapsed to `[min(conc)]`.
diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md
index 92eb76bfc..1f81b6a9f 100644
--- a/KLAUD_DEBUG.md
+++ b/KLAUD_DEBUG.md
@@ -193,7 +193,7 @@ Or check whether any other recipe on main uses the proposed tag — if zero uses
## 9. PR conventions for this repo
- Image-bump / new-recipe PRs I open on behalf of the user (or that the user creates) get the **`[Klaud Cold]`** title prefix.
-- Add the `full-sweep-enabled` label so a full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Without it, the sweep is mostly SKIPPED.
+- Add the `full-sweep-enabled` label so a canary-gated full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Use `non-canary-full-sweep-enabled` instead only when the single-node canary is flaky or unrepresentative; it runs the full sweep without the canary gate. Without one of the sweep labels, the sweep is mostly SKIPPED.
- After any code change that shifts a PR's scope (drops a recipe, changes an image tag), **update the PR title AND body in the same step** and **verify** with `gh pr view --json title,body` — `gh pr edit` silently fails (see §8).
- `utils/merge_with_reuse.sh ` is the merge entrypoint; it handles the `perf-changelog.yaml` auto-append.
diff --git a/utils/find_reusable_sweep_run.py b/utils/find_reusable_sweep_run.py
index 8af018a8e..3f814d2e5 100644
--- a/utils/find_reusable_sweep_run.py
+++ b/utils/find_reusable_sweep_run.py
@@ -276,7 +276,11 @@ def main() -> int:
parser.add_argument("--event-name", required=True)
parser.add_argument("--ref", required=True)
parser.add_argument("--workflow-id", default="run-sweep.yml")
- parser.add_argument("--full-sweep-label", default="full-sweep-enabled")
+ parser.add_argument(
+ "--full-sweep-label",
+ default="full-sweep-enabled,non-canary-full-sweep-enabled",
+ help="Comma-separated PR labels treated as 'full sweep'; reuse requires at least one.",
+ )
parser.add_argument("--pinned-run-command", default="/reuse-sweep-run")
parser.add_argument(
"--allowed-author-associations",
@@ -355,10 +359,16 @@ def main() -> int:
pr = github_api(args.repo, f"/pulls/{pr_number}", token)
labels = label_names(pr)
- if args.full_sweep_label not in labels:
+ accepted_full_sweep_labels = {
+ value.strip()
+ for value in args.full_sweep_label.split(",")
+ if value.strip()
+ }
+ if not accepted_full_sweep_labels.intersection(labels):
+ accepted = ", ".join(sorted(accepted_full_sweep_labels))
raise RuntimeError(
- f"PR #{pr_number} has {args.pinned_run_command} authorization but not "
- f"{args.full_sweep_label}."
+ f"PR #{pr_number} has {args.pinned_run_command} authorization but is "
+ f"missing any of: {accepted}."
)
if not pr.get("merged_at"):
raise RuntimeError(f"PR #{pr_number} is not marked as merged.")
diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh
index 9336b81c2..a94ea0f69 100755
--- a/utils/merge_with_reuse.sh
+++ b/utils/merge_with_reuse.sh
@@ -38,8 +38,13 @@ PR_STATE="$(jq -r '.state' <<<"$PR_INFO")"
[ "$PR_STATE" = "OPEN" ] || die "PR #${PR} is ${PR_STATE}, expected OPEN"
HEAD_BRANCH="$(jq -r '.headRefName' <<<"$PR_INFO")"
-HAS_FULL_SWEEP="$(jq -r '[.labels[].name] | index("full-sweep-enabled") // ""' <<<"$PR_INFO")"
-[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing the 'full-sweep-enabled' label"
+HAS_FULL_SWEEP="$(jq -r '
+ [.labels[].name] as $names
+ | if (($names | index("full-sweep-enabled")) != null)
+ or (($names | index("non-canary-full-sweep-enabled")) != null)
+ then "1" else "" end
+' <<<"$PR_INFO")"
+[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing 'full-sweep-enabled' or 'non-canary-full-sweep-enabled' label"
# Warn early if no successful run exists on any current PR commit.
PR_SHAS="$(gh api "repos/${REPO}/pulls/${PR}/commits" --paginate --jq '.[].sha')"
diff --git a/utils/test_find_reusable_sweep_run.py b/utils/test_find_reusable_sweep_run.py
index e779bbe8b..0f8a09ac7 100644
--- a/utils/test_find_reusable_sweep_run.py
+++ b/utils/test_find_reusable_sweep_run.py
@@ -455,3 +455,133 @@ def fake_paginated_github_api(repo, path, token, item_key, params=None):
assert outputs["reuse-enabled"] == "false"
assert outputs["reuse-source-pr-number"] == "1321"
assert outputs["reuse-reason"] == "PR #1321 has no /reuse-sweep-run authorization"
+
+
+def test_main_accepts_non_canary_full_sweep_label(monkeypatch, tmp_path) -> None:
+ comments = [
+ {
+ "created_at": "2026-05-13T00:00:00Z",
+ "author_association": "OWNER",
+ "body": "/reuse-sweep-run 25763404168",
+ },
+ ]
+ run = {
+ "id": 25763404168,
+ "event": "pull_request",
+ "status": "completed",
+ "conclusion": "success",
+ "path": ".github/workflows/run-sweep.yml",
+ "pull_requests": [{"number": 1321}],
+ "run_attempt": 1,
+ "html_url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/25763404168",
+ "head_sha": "abc123",
+ }
+
+ def fake_github_api(repo, path, token, params=None):
+ if path == "/commits/merge-sha/pulls":
+ return [{"number": 1321}]
+ if path == "/pulls/1321":
+ return {
+ "merged_at": "2026-05-13T00:01:00Z",
+ "labels": [{"name": "non-canary-full-sweep-enabled"}],
+ "head": {"sha": "abc123"},
+ }
+ if path == "/actions/runs/25763404168":
+ return run
+ raise AssertionError(f"unexpected GitHub API path: {path}")
+
+ def fake_paginated_github_api(repo, path, token, item_key, params=None):
+ if path == "/issues/1321/comments":
+ return comments
+ if path == "/pulls/1321/commits":
+ return [{"sha": "abc123"}]
+ if path == "/actions/runs/25763404168/artifacts":
+ return [{"name": "results_bmk"}]
+ raise AssertionError(f"unexpected paginated GitHub API path: {path}")
+
+ output_path = tmp_path / "outputs"
+ monkeypatch.setenv("GITHUB_TOKEN", "token")
+ monkeypatch.setattr(reuse, "github_api", fake_github_api)
+ monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api)
+ monkeypatch.setattr(
+ reuse.sys,
+ "argv",
+ [
+ "find_reusable_sweep_run.py",
+ "--repo",
+ "SemiAnalysisAI/InferenceX",
+ "--commit-sha",
+ "merge-sha",
+ "--event-name",
+ "push",
+ "--ref",
+ "refs/heads/main",
+ "--github-output",
+ str(output_path),
+ ],
+ )
+
+ assert reuse.main() == 0
+
+ outputs = dict(line.split("=", 1) for line in output_path.read_text().splitlines())
+ assert outputs["reuse-enabled"] == "true"
+
+
+def test_main_rejects_pr_with_neither_full_sweep_label(monkeypatch, tmp_path) -> None:
+ comments = [
+ {
+ "created_at": "2026-05-13T00:00:00Z",
+ "author_association": "OWNER",
+ "body": "/reuse-sweep-run 25763404168",
+ },
+ ]
+
+ def fake_github_api(repo, path, token, params=None):
+ if path == "/commits/merge-sha/pulls":
+ return [{"number": 1321}]
+ if path == "/pulls/1321":
+ return {
+ "merged_at": "2026-05-13T00:01:00Z",
+ "labels": [{"name": "sweep-enabled"}],
+ "head": {"sha": "abc123"},
+ }
+ raise AssertionError(f"unexpected GitHub API path: {path}")
+
+ def fake_paginated_github_api(repo, path, token, item_key, params=None):
+ if path == "/issues/1321/comments":
+ return comments
+ raise AssertionError(f"unexpected paginated GitHub API path: {path}")
+
+ output_path = tmp_path / "outputs"
+ monkeypatch.setenv("GITHUB_TOKEN", "token")
+ monkeypatch.setattr(reuse, "github_api", fake_github_api)
+ monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api)
+ monkeypatch.setattr(
+ reuse.sys,
+ "argv",
+ [
+ "find_reusable_sweep_run.py",
+ "--repo",
+ "SemiAnalysisAI/InferenceX",
+ "--commit-sha",
+ "merge-sha",
+ "--event-name",
+ "push",
+ "--ref",
+ "refs/heads/main",
+ "--github-output",
+ str(output_path),
+ ],
+ )
+
+ try:
+ reuse.main()
+ except RuntimeError as error:
+ msg = str(error)
+ assert "full-sweep-enabled" in msg
+ assert "non-canary-full-sweep-enabled" in msg
+ else:
+ raise AssertionError(
+ "expected RuntimeError when PR has neither full-sweep-enabled nor "
+ "non-canary-full-sweep-enabled label"
+ )