From 5f1d953df9fef70a5698cf553d2a9733f380bf9f Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Mon, 1 Jun 2026 09:43:27 -0700 Subject: [PATCH 1/5] up --- .github/workflows/_ci-run-decision.yml | 69 +++++++++++++++++++++++ .github/workflows/pull.yml | 14 ++++- .github/workflows/trunk.yml | 69 ++++++++++++++++------- .github/workflows/update-viablestrict.yml | 4 +- .github/workflows/viable-strict-gate.yml | 49 ++++++++++++++++ 5 files changed, 181 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/_ci-run-decision.yml create mode 100644 .github/workflows/viable-strict-gate.yml diff --git a/.github/workflows/_ci-run-decision.yml b/.github/workflows/_ci-run-decision.yml new file mode 100644 index 00000000000..b58d8e30aa7 --- /dev/null +++ b/.github/workflows/_ci-run-decision.yml @@ -0,0 +1,69 @@ +name: CI Run Decision + +# Single source of truth for "should this commit force-run all CI jobs +# regardless of path filter?". Used by per-job ``if:`` gates in pull.yml +# and trunk.yml so the sampling logic isn't repeated per job. +# +# Returns ``is-full-run = 'true'`` for: +# - workflow_dispatch (manual run) +# - ciflow/* tag pushes (maintainer-forced full run) +# - push events whose SHA is in the deterministic 25% sample +# (last hex char in {0,4,8,c}) +# +# Returns ``is-full-run = 'false'`` for: +# - pull_request / pull_request_target (use path filter instead) +# - push events not matching any of the above (path-filtered runs) +# +# See ``viable-strict-gate.yml``: viable/strict only advances on +# commits where this is true, so the path-filtered fast path doesn't +# silently advance partial signal. + +on: + workflow_call: + outputs: + is-full-run: + description: "'true' if this commit should run all CI jobs regardless of path filter; 'false' otherwise." + value: ${{ jobs.decide.outputs.is-full-run }} + +jobs: + decide: + runs-on: ubuntu-latest + outputs: + is-full-run: ${{ steps.compute.outputs.is-full-run }} + steps: + - name: Compute is-full-run + id: compute + env: + EVENT_NAME: ${{ github.event_name }} + REF: ${{ github.ref }} + SHA: ${{ github.sha }} + run: | + set -eu + + IS_FULL=false + + case "$EVENT_NAME" in + workflow_dispatch) + IS_FULL=true + ;; + esac + + case "$REF" in + refs/tags/ciflow/*) + IS_FULL=true + ;; + esac + + # Deterministic 25% sample on push: SHA's last hex char in {0,4,8,c}. + # Keep in sync with the sample in viable-strict-gate.yml. + if [ "$IS_FULL" = "false" ] && [ "$EVENT_NAME" = "push" ]; then + case "$SHA" in + *0|*4|*8|*c) IS_FULL=true ;; + esac + fi + + echo "Event: $EVENT_NAME" + echo "Ref: $REF" + echo "SHA: $SHA" + echo "is-full-run: $IS_FULL" + echo "is-full-run=$IS_FULL" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index c2787681d4e..9e0dd8a2164 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -21,6 +21,14 @@ jobs: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml + # Emits is-full-run='true' for PR/dispatch/tag/sampled-push commits + # (the ~25% of pushes that should run full CI) and 'false' otherwise. + # Per-job `if:` checks this to bypass path filtering on full-run + # commits while staying path-filtered on the 75% non-sampled pushes. + run-decision: + name: CI run decision + uses: ./.github/workflows/_ci-run-decision.yml + test-qnn-wheel-packages-linux: name: test-qnn-wheel-packages-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -1517,9 +1525,8 @@ jobs: python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*" test-coreml-bc-macos: - needs: changed-files + needs: [changed-files, run-decision] if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_coreml_bc.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/utils.sh') || @@ -1527,7 +1534,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'install_executorch.sh') || contains(needs.changed-files.outputs.changed-files, 'install_requirements.py') || contains(needs.changed-files.outputs.changed-files, 'install_requirements.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/pull.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/pull.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-coreml-bc-macos (${{ matrix.runner }}) uses: pytorch/test-infra/.github/workflows/macos_job.yml@main permissions: diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index cca1fe5fe45..ad0caeaf65d 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -25,8 +25,24 @@ jobs: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml + # Emits is-full-run='true' for PR/dispatch/tag/sampled-push commits + # (the ~25% of pushes that should run full CI) and 'false' otherwise. + # Per-job `if:` checks this to bypass path filtering on full-run + # commits while staying path-filtered on the 75% non-sampled pushes. + run-decision: + name: CI run decision + uses: ./.github/workflows/_ci-run-decision.yml + test-models-macos-cpu: name: test-models-macos-cpu + needs: run-decision + # Path-filter-on-push (sampled): runs on every PR / dispatch / + # ciflow tag, and on ~25% of pushes via _ci-run-decision.yml. + # The viable-strict-gate workflow blocks viable/strict from + # advancing on the 75% non-sampled pushes. + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: matrix: @@ -146,6 +162,10 @@ jobs: test-custom-ops-macos: name: test-custom-ops-macos + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: matrix: @@ -169,6 +189,10 @@ jobs: test-selective-build-macos: name: test-selective-build-macos + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: matrix: @@ -310,14 +334,15 @@ jobs: backends/arm/test/test_arm_backend.sh "${ARM_TEST}" test-coreml-delegate: - needs: changed-files + needs: [changed-files, run-decision] + # Path-filtered: see _ci-run-decision.yml for the sampling policy. if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'examples/apple/coreml') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/setup-macos.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/setup-conda.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-coreml-delegate uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: @@ -337,9 +362,8 @@ jobs: PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh test-static-llama-ane: - needs: changed-files + needs: [changed-files, run-decision] if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'examples/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'examples/models/llama') || @@ -347,7 +371,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'extension/llm/tokenizers') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_ane_static_llama.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/utils.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-static-llama-ane uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: @@ -451,11 +476,10 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" test-llama-runner-macos: - needs: changed-files + needs: [changed-files, run-decision] # Whole-job gate (matrix cells can't be individually if'd): # mps / coreml / xnnpack+custom+quantize_kv. if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'backends/apple/mps') || contains(needs.changed-files.outputs.changed-files, 'backends/xnnpack') || @@ -467,7 +491,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'extension/llm/sampler') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_llama.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/setup-macos.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-llama-runner-mac uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: @@ -551,7 +576,13 @@ jobs: bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} --test_with_runner ${{ matrix.backend == 'torchao' && '--use_torchao_kernels' || '' }} test-multimodal-macos: - if: ${{ !github.event.pull_request.head.repo.fork }} + needs: run-decision + if: | + !github.event.pull_request.head.repo.fork && + ( + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' + ) name: test-multimodal-macos uses: pytorch/test-infra/.github/workflows/macos_job.yml@main permissions: @@ -644,15 +675,15 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" test-models-macos-coreml: - needs: changed-files + needs: [changed-files, run-decision] if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'examples/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'examples/models') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/setup-macos.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-models-macos-coreml uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: @@ -695,9 +726,8 @@ jobs: PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" test-models-macos-mps: - needs: changed-files + needs: [changed-files, run-decision] if: | - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/mps') || contains(needs.changed-files.outputs.changed-files, 'examples/apple/mps') || contains(needs.changed-files.outputs.changed-files, 'examples/models') || @@ -706,7 +736,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'extension/llm/export') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model.sh') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/setup-macos.sh') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' name: test-models-macos-mps uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: @@ -821,19 +852,19 @@ jobs: echo "::endgroup::" test-huggingface-transformers-macos: - needs: changed-files + needs: [changed-files, run-decision] # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway if: | !github.event.pull_request.head.repo.fork && ( - github.event_name != 'pull_request' || contains(needs.changed-files.outputs.changed-files, 'backends/apple/coreml') || contains(needs.changed-files.outputs.changed-files, 'extension/llm/runner') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_huggingface_optimum_model.py') || contains(needs.changed-files.outputs.changed-files, '.ci/docker/ci_commit_pins/optimum-executorch.txt') || contains(needs.changed-files.outputs.changed-files, 'install_executorch.py') || contains(needs.changed-files.outputs.changed-files, 'install_requirements.py') || - contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') + contains(needs.changed-files.outputs.changed-files, '.github/workflows/trunk.yml') || + needs.run-decision.outputs.is-full-run == 'true' ) name: test-huggingface-transformers-macos uses: pytorch/test-infra/.github/workflows/macos_job.yml@main diff --git a/.github/workflows/update-viablestrict.yml b/.github/workflows/update-viablestrict.yml index b77914d622a..36d3a3209a8 100644 --- a/.github/workflows/update-viablestrict.yml +++ b/.github/workflows/update-viablestrict.yml @@ -23,7 +23,7 @@ jobs: with: repository: pytorch/executorch stable-branch: viable/strict - requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Apple$\"]' + requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Apple$\", \"viable-strict-gate\"]' secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }} clickhouse-url: ${{ secrets.CLICKHOUSE_URL }} clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }} @@ -42,7 +42,7 @@ jobs: # Pattern matching required workflows (must match 'requires' input above) # Uses exact matching with anchors and case-insensitive matching - REQUIRED_PATTERN="^pull$|^lint$|^trunk$|^Build documentation$|^Apple$" + REQUIRED_PATTERN="^pull$|^lint$|^trunk$|^Build documentation$|^Apple$|^viable-strict-gate$" echo "### Failures by commit (recent)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/viable-strict-gate.yml b/.github/workflows/viable-strict-gate.yml new file mode 100644 index 00000000000..d055880b002 --- /dev/null +++ b/.github/workflows/viable-strict-gate.yml @@ -0,0 +1,49 @@ +name: viable-strict-gate + +# Sampled-full-run gating for viable/strict advancement. +# +# Path filtering on push to main saves runner cost but risks advancing +# viable/strict on commits where many jobs were skipped — a partial +# green from "no job ran" is indistinguishable from "everything passed" +# at the workflow-conclusion level. +# +# This workflow runs on every push to main / release branches and +# *fails* when ``_ci-run-decision.yml`` says this isn't a full-coverage +# commit (i.e. the SHA isn't sampled and there's no ciflow/* tag). +# Failure => the "viable-strict-gate" workflow conclusion is failure +# => update-viablestrict refuses to advance viable/strict. +# +# To force a full run on a specific commit (e.g. before tagging a +# release), push a ``ciflow/trunk/`` tag — on tag pushes +# ``_ci-run-decision.yml`` always returns ``is-full-run = true``. + +on: + push: + branches: + - main + - release/* + tags: + - ciflow/trunk/* + +jobs: + run-decision: + uses: ./.github/workflows/_ci-run-decision.yml + + full-run-required: + needs: run-decision + name: Full CI required for viable/strict + runs-on: ubuntu-22.04 + steps: + - name: Check whether this commit is a full-coverage run + env: + IS_FULL_RUN: ${{ needs.run-decision.outputs.is-full-run }} + run: | + set -eu + if [ "$IS_FULL_RUN" = "true" ]; then + echo "Full-coverage commit; viable/strict eligible." + exit 0 + fi + echo "::error::Non-full-run commit (path-filtered CI). viable/strict cannot advance from this commit." + echo "Full CI runs on commits whose SHA ends in 0, 4, 8, or c (~25% of commits)." + echo "To force a full run on this commit, push a 'ciflow/trunk/${{ github.sha }}' tag." + exit 1 From 2d0350dabf94a20741770aeb18a9b71c4eab5a9f Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Mon, 1 Jun 2026 10:02:38 -0700 Subject: [PATCH 2/5] up --- .../workflows/promote-to-viable-strict.yml | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 .github/workflows/promote-to-viable-strict.yml diff --git a/.github/workflows/promote-to-viable-strict.yml b/.github/workflows/promote-to-viable-strict.yml new file mode 100644 index 00000000000..ea07857f89d --- /dev/null +++ b/.github/workflows/promote-to-viable-strict.yml @@ -0,0 +1,98 @@ +name: Promote commit to viable/strict + +# Manual escape hatch for the sampled-CI gating in +# `_ci-run-decision.yml` + `viable-strict-gate.yml`. +# +# Pushes a `ciflow/trunk/` tag at a chosen commit, which: +# 1. Re-triggers `pull.yml` / `trunk.yml` against that commit with +# ``is-full-run = true`` (every gated job runs regardless of +# path filter or SHA sample). +# 2. Triggers `viable-strict-gate.yml` for that commit; the gate +# succeeds because tag pushes always count as a full-run. +# +# Once those tag-triggered runs all pass, the next +# `update-viablestrict` cron run will be able to advance viable/strict +# to the chosen commit. +# +# Use cases: +# - Bisecting a regression on a non-sampled commit. +# - Pre-release validation: pin viable/strict to a specific commit +# (e.g. release branch tip) regardless of its SHA's sample bit. +# - Recovering when recent sampled commits all happen to be red. + +on: + workflow_dispatch: + inputs: + sha: + description: "Full 40-char SHA on main / release/* to promote" + required: true + type: string + +permissions: + contents: write + +concurrency: + # One in-flight promotion at a time; safer than racing tag pushes. + group: promote-to-viable-strict + cancel-in-progress: false + +jobs: + push-ciflow-tag: + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Validate SHA and push ciflow tag + env: + SHA: ${{ inputs.sha }} + run: | + set -euo pipefail + + # Reject anything that isn't a full 40-char lowercase hex SHA. + if [[ ! "$SHA" =~ ^[0-9a-f]{40}$ ]]; then + echo "::error::Input must be a full 40-char lowercase hex SHA; got: '$SHA'" + exit 1 + fi + + # The commit must exist locally (fetch-depth: 0 above pulls + # everything, but defensively confirm it's an object). + if ! git cat-file -e "$SHA^{commit}" 2>/dev/null; then + echo "::error::SHA $SHA is not a commit in this repository." + exit 1 + fi + + # Restrict promotion to commits reachable from a release-track + # branch. Prevents tagging arbitrary commits (PR heads, + # rewritten branches, etc.) that aren't part of the official + # main/release history. + REACHABLE=false + for branch in main $(git branch -r | grep -E 'origin/release/' | sed 's|origin/||'); do + if git merge-base --is-ancestor "$SHA" "origin/$branch" 2>/dev/null; then + echo "SHA is reachable from origin/$branch" + REACHABLE=true + break + fi + done + if [ "$REACHABLE" = "false" ]; then + echo "::error::SHA $SHA is not reachable from main or any release/* branch." + exit 1 + fi + + TAG="ciflow/trunk/$SHA" + + # If the tag already exists (e.g. someone already promoted + # this commit), exit cleanly — no-op is a valid outcome. + if git ls-remote --tags --exit-code origin "refs/tags/$TAG" >/dev/null 2>&1; then + echo "Tag $TAG already exists on origin; nothing to do." + exit 0 + fi + + git config user.name "pytorchbot" + git config user.email "pytorchbot@users.noreply.github.com" + git tag "$TAG" "$SHA" + git push origin "$TAG" + + echo "::notice::Pushed $TAG. Watch the tag-triggered workflow runs (pull / trunk / viable-strict-gate); once they pass, the next update-viablestrict cron (every 30 min) will advance viable/strict." From 1e4a688253ea391e07315f4b4a67ea09c11b41bd Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Mon, 1 Jun 2026 13:38:19 -0700 Subject: [PATCH 3/5] up --- .github/workflows/_ci-run-decision.yml | 30 +++++++++++---- .github/workflows/mlx.yml | 48 ++++++++++++++++++++++-- .github/workflows/pull.yml | 4 -- .github/workflows/trunk.yml | 12 ++---- .github/workflows/viable-strict-gate.yml | 4 +- 5 files changed, 74 insertions(+), 24 deletions(-) diff --git a/.github/workflows/_ci-run-decision.yml b/.github/workflows/_ci-run-decision.yml index b58d8e30aa7..cd21f36810a 100644 --- a/.github/workflows/_ci-run-decision.yml +++ b/.github/workflows/_ci-run-decision.yml @@ -7,8 +7,8 @@ name: CI Run Decision # Returns ``is-full-run = 'true'`` for: # - workflow_dispatch (manual run) # - ciflow/* tag pushes (maintainer-forced full run) -# - push events whose SHA is in the deterministic 25% sample -# (last hex char in {0,4,8,c}) +# - push events at every 4th commit by depth from main's root +# (deterministic 25% sample, hard cap of 4 commits between samples) # # Returns ``is-full-run = 'false'`` for: # - pull_request / pull_request_target (use path filter instead) @@ -25,12 +25,23 @@ on: description: "'true' if this commit should run all CI jobs regardless of path filter; 'false' otherwise." value: ${{ jobs.decide.outputs.is-full-run }} +permissions: + contents: read + jobs: decide: runs-on: ubuntu-latest outputs: is-full-run: ${{ steps.compute.outputs.is-full-run }} steps: + # Full history needed to compute commit depth via `git rev-list --count`. + # The depth-based sample (every 4th commit) needs the SHA to be reachable + # from the repo root. + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Compute is-full-run id: compute env: @@ -54,12 +65,17 @@ jobs: ;; esac - # Deterministic 25% sample on push: SHA's last hex char in {0,4,8,c}. - # Keep in sync with the sample in viable-strict-gate.yml. + # Depth-based 25% sample on push: every 4th commit (depth from + # the repo root, mod 4 == 0). Hard guarantees: + # - Exactly 25% sample rate (no statistical variance). + # - At most 3 non-sampled commits between any two samples. + # Re-runs of the same commit always have the same outcome. if [ "$IS_FULL" = "false" ] && [ "$EVENT_NAME" = "push" ]; then - case "$SHA" in - *0|*4|*8|*c) IS_FULL=true ;; - esac + DEPTH=$(git rev-list --count "$SHA") + if [ $((DEPTH % 4)) -eq 0 ]; then + IS_FULL=true + fi + echo "Depth: $DEPTH (depth %% 4 = $((DEPTH % 4)))" fi echo "Event: $EVENT_NAME" diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml index c51f126dbe6..1e5839c7789 100644 --- a/.github/workflows/mlx.yml +++ b/.github/workflows/mlx.yml @@ -25,7 +25,19 @@ concurrency: permissions: {} jobs: + # Emits is-full-run='true' for workflow_dispatch / ciflow tag / + # sampled-push commits (every 4th main/release commit by depth). + # Returns 'false' for pull_request events — PR jobs use the workflow- + # level `paths:` filter (above) for path-based gating instead. + run-decision: + name: CI run decision + uses: ./.github/workflows/_ci-run-decision.yml + test-mlx: + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: default-packages: "" @@ -93,6 +105,10 @@ jobs: echo "::endgroup::" test-mlx-qwen35-moe: + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: default-packages: "" @@ -145,6 +161,10 @@ jobs: echo "::endgroup::" backend-tester: + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' strategy: fail-fast: false matrix: @@ -191,6 +211,10 @@ jobs: fi test-mlx-parakeet: + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: default-packages: "" @@ -248,7 +272,10 @@ jobs: # Requires HuggingFace secrets — skip on fork PRs. # Maintainers can opt-in by applying the ciflow/mlx label, which # pushes a ciflow/mlx/ tag that re-runs this workflow with secrets. - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + needs: run-decision + if: | + (github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') && + (github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true') uses: pytorch/test-infra/.github/workflows/macos_job.yml@main secrets: inherit with: @@ -309,7 +336,10 @@ jobs: test-mlx-voxtral-realtime: # Requires HuggingFace secrets — skip on fork PRs. # Maintainers can opt-in by applying the ciflow/mlx label. - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + needs: run-decision + if: | + (github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') && + (github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true') uses: pytorch/test-infra/.github/workflows/macos_job.yml@main secrets: inherit with: @@ -387,7 +417,10 @@ jobs: test-mlx-whisper: # Requires HuggingFace secrets — skip on fork PRs. # Maintainers can opt-in by applying the ciflow/mlx label. - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + needs: run-decision + if: | + (github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') && + (github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true') uses: pytorch/test-infra/.github/workflows/macos_job.yml@main secrets: inherit with: @@ -439,6 +472,10 @@ jobs: test-mlx-stories110m: + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: default-packages: "" @@ -505,7 +542,10 @@ jobs: test-mlx-llm: # Requires HuggingFace secrets — skip on fork PRs. # Maintainers can opt-in by applying the ciflow/mlx label. - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + needs: run-decision + if: | + (github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') && + (github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true') strategy: fail-fast: false matrix: diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 9e0dd8a2164..0337177e49f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -21,10 +21,6 @@ jobs: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml - # Emits is-full-run='true' for PR/dispatch/tag/sampled-push commits - # (the ~25% of pushes that should run full CI) and 'false' otherwise. - # Per-job `if:` checks this to bypass path filtering on full-run - # commits while staying path-filtered on the 75% non-sampled pushes. run-decision: name: CI run decision uses: ./.github/workflows/_ci-run-decision.yml diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index ad0caeaf65d..e65a5842484 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -25,10 +25,6 @@ jobs: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml - # Emits is-full-run='true' for PR/dispatch/tag/sampled-push commits - # (the ~25% of pushes that should run full CI) and 'false' otherwise. - # Per-job `if:` checks this to bypass path filtering on full-run - # commits while staying path-filtered on the 75% non-sampled pushes. run-decision: name: CI run decision uses: ./.github/workflows/_ci-run-decision.yml @@ -36,10 +32,6 @@ jobs: test-models-macos-cpu: name: test-models-macos-cpu needs: run-decision - # Path-filter-on-push (sampled): runs on every PR / dispatch / - # ciflow tag, and on ~25% of pushes via _ci-run-decision.yml. - # The viable-strict-gate workflow blocks viable/strict from - # advancing on the 75% non-sampled pushes. if: | github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true' @@ -397,6 +389,10 @@ jobs: test-llama-torchao-lowbit: name: test-llama-torchao-lowbit + needs: run-decision + if: | + github.event_name == 'pull_request' || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: default-packages: "" diff --git a/.github/workflows/viable-strict-gate.yml b/.github/workflows/viable-strict-gate.yml index d055880b002..38beb4cf0fc 100644 --- a/.github/workflows/viable-strict-gate.yml +++ b/.github/workflows/viable-strict-gate.yml @@ -25,6 +25,8 @@ on: tags: - ciflow/trunk/* +permissions: {} + jobs: run-decision: uses: ./.github/workflows/_ci-run-decision.yml @@ -44,6 +46,6 @@ jobs: exit 0 fi echo "::error::Non-full-run commit (path-filtered CI). viable/strict cannot advance from this commit." - echo "Full CI runs on commits whose SHA ends in 0, 4, 8, or c (~25% of commits)." + echo "Full CI runs on every 4th commit on main / release/* (depth %% 4 == 0)." echo "To force a full run on this commit, push a 'ciflow/trunk/${{ github.sha }}' tag." exit 1 From c5f6fb30f6d6a7cdc4c0aad6492f771b0ffd8d2e Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Mon, 1 Jun 2026 14:26:36 -0700 Subject: [PATCH 4/5] up --- .github/workflows/_ci-run-decision.yml | 22 ++++++---- .../workflows/promote-to-viable-strict.yml | 41 +++++++++++++++++++ .github/workflows/pull.yml | 6 ++- .github/workflows/trunk.yml | 6 ++- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/.github/workflows/_ci-run-decision.yml b/.github/workflows/_ci-run-decision.yml index cd21f36810a..99413f17d05 100644 --- a/.github/workflows/_ci-run-decision.yml +++ b/.github/workflows/_ci-run-decision.yml @@ -34,9 +34,11 @@ jobs: outputs: is-full-run: ${{ steps.compute.outputs.is-full-run }} steps: - # Full history needed to compute commit depth via `git rev-list --count`. - # The depth-based sample (every 4th commit) needs the SHA to be reachable - # from the repo root. + # Full history needed to compute commit depth via + # `git rev-list --first-parent --count`. The --first-parent flag + # follows only the linear main-branch history through merge + # commits, so the count maps 1:1 to pushes on main regardless of + # how many commits were in any merged PR. - name: Checkout uses: actions/checkout@v4 with: @@ -65,17 +67,21 @@ jobs: ;; esac - # Depth-based 25% sample on push: every 4th commit (depth from - # the repo root, mod 4 == 0). Hard guarantees: - # - Exactly 25% sample rate (no statistical variance). + # Depth-based 25% sample on push: every 4th commit on the + # linear main-branch history (depth %% 4 == 0). --first-parent + # is required — plain `git rev-list --count` would walk all + # merge parents, so the count would jump by (1 + PR_size) at + # each merge commit and the sample rate would be unpredictable. + # Hard guarantees with --first-parent: + # - Exactly 25% of pushes on main are sampled. # - At most 3 non-sampled commits between any two samples. # Re-runs of the same commit always have the same outcome. if [ "$IS_FULL" = "false" ] && [ "$EVENT_NAME" = "push" ]; then - DEPTH=$(git rev-list --count "$SHA") + DEPTH=$(git rev-list --first-parent --count "$SHA") if [ $((DEPTH % 4)) -eq 0 ]; then IS_FULL=true fi - echo "Depth: $DEPTH (depth %% 4 = $((DEPTH % 4)))" + echo "Depth: $DEPTH (first-parent; depth %% 4 = $((DEPTH % 4)))" fi echo "Event: $EVENT_NAME" diff --git a/.github/workflows/promote-to-viable-strict.yml b/.github/workflows/promote-to-viable-strict.yml index ea07857f89d..f15f9ca3236 100644 --- a/.github/workflows/promote-to-viable-strict.yml +++ b/.github/workflows/promote-to-viable-strict.yml @@ -30,6 +30,9 @@ on: permissions: contents: write + # Needed to delete the failed `viable-strict-gate` run that the + # original push triggered — see the "Delete failed gate runs" step. + actions: write concurrency: # One in-flight promotion at a time; safer than racing tag pushes. @@ -96,3 +99,41 @@ jobs: git push origin "$TAG" echo "::notice::Pushed $TAG. Watch the tag-triggered workflow runs (pull / trunk / viable-strict-gate); once they pass, the next update-viablestrict cron (every 30 min) will advance viable/strict." + + # Defense-in-depth: the push that originally landed this commit + # triggered a `viable-strict-gate` run that failed (because the + # commit wasn't sampled). The tag push above triggers a NEW run + # of the gate workflow that will succeed. Standard PyTorch viable/ + # strict resolves multiple runs by taking the latest conclusion, + # so this is usually fine — but to remove ambiguity (and keep the + # commit's HUD row clean), explicitly delete any prior failed/ + # cancelled gate runs on this SHA. + - name: Delete failed viable-strict-gate runs on this SHA + env: + GH_TOKEN: ${{ github.token }} + SHA: ${{ inputs.sha }} + REPO: ${{ github.repository }} + run: | + set -euo pipefail + + # List all viable-strict-gate runs for the SHA, filter to + # those that completed unsuccessfully, and delete each one. + # Failures here are non-fatal: the tag push above is the + # primary mechanism; this cleanup is best-effort. + RUNS=$(gh api "repos/$REPO/actions/runs?head_sha=$SHA&per_page=100" \ + --jq '.workflow_runs[] + | select(.name == "viable-strict-gate") + | select(.conclusion == "failure" or .conclusion == "cancelled" or .conclusion == "timed_out") + | .id' 2>/dev/null || true) + + if [ -z "$RUNS" ]; then + echo "No prior failed viable-strict-gate runs to clean up." + exit 0 + fi + + while IFS= read -r RUN_ID; do + [ -z "$RUN_ID" ] && continue + echo "Deleting failed viable-strict-gate run $RUN_ID" + gh api -X DELETE "repos/$REPO/actions/runs/$RUN_ID" || \ + echo "::warning::Failed to delete run $RUN_ID; continuing anyway." + done <<< "$RUNS" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 0337177e49f..1f019383dd6 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -15,8 +15,10 @@ concurrency: cancel-in-progress: true jobs: - # Emits PR diff file list; non-PR events emit '*' so the per-job - # `if:` short-circuits via `event_name != 'pull_request'`. + # Emits PR diff file list; non-PR events emit '*' (every contains() + # check returns true). Gated jobs combine this with run-decision's + # is-full-run output: on push, jobs run only if a path matches OR + # the commit is a sampled full-run. changed-files: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index e65a5842484..6e1d0ea9a88 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -19,8 +19,10 @@ concurrency: cancel-in-progress: true jobs: - # Emits PR diff file list; non-PR events emit '*' so the per-job - # `if:` short-circuits via `event_name != 'pull_request'`. + # Emits PR diff file list; non-PR events emit '*' (every contains() + # check returns true). Gated jobs combine this with run-decision's + # is-full-run output: on push, jobs run only if a path matches OR + # the commit is a sampled full-run. changed-files: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml From 964dc8ab0ed53a04a8051af2eb9a7ed68bbf3086 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Mon, 1 Jun 2026 14:47:40 -0700 Subject: [PATCH 5/5] up --- .github/workflows/_get-changed-files.yml | 76 ++++++++++++++++--- .../workflows/promote-to-viable-strict.yml | 8 +- .github/workflows/pull.yml | 15 +++- .github/workflows/trunk.yml | 15 +++- 4 files changed, 93 insertions(+), 21 deletions(-) diff --git a/.github/workflows/_get-changed-files.yml b/.github/workflows/_get-changed-files.yml index 55712b06527..7d12f23e08e 100644 --- a/.github/workflows/_get-changed-files.yml +++ b/.github/workflows/_get-changed-files.yml @@ -2,11 +2,24 @@ name: Get Changed Files on: workflow_call: + inputs: + include-push-diff: + description: | + When true, on push events the output is the diff between + `github.event.before` and `github.sha` (computed via the + GitHub Compare API). Default is false: push events emit '*', + matching the historical behavior. + type: boolean + required: false + default: false outputs: changed-files: - description: "List of changed files (space-separated) or '*' if not in a PR" + description: "Space-separated list of changed files for PR events (and push events when include-push-diff=true); '*' otherwise." value: ${{ jobs.get-changed-files.outputs.changed-files }} +permissions: + contents: read + jobs: get-changed-files: runs-on: ubuntu-latest @@ -18,26 +31,65 @@ jobs: id: get-files env: GH_TOKEN: ${{ github.token }} + INCLUDE_PUSH_DIFF: ${{ inputs.include-push-diff }} run: | - # Check if we're in a pull request context - if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then - echo "Running in PR context" + set -eu - # Get the PR number from the github context - PR_NUMBER="${{ github.event.number }}" + EVENT_NAME="${{ github.event_name }}" + REPO="${{ github.repository }}" - # Use gh CLI to get changed files in the PR with explicit repo - CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//') + # PR context: list files modified by the PR. + if [ "$EVENT_NAME" = "pull_request" ] || [ "$EVENT_NAME" = "pull_request_target" ]; then + echo "Running in PR context" + PR_NUMBER="${{ github.event.number }}" + CHANGED_FILES=$(gh api "repos/$REPO/pulls/$PR_NUMBER/files" --paginate \ + --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//') if [ -z "$CHANGED_FILES" ]; then echo "No changed files found, setting to '*'" CHANGED_FILES="*" fi - echo "Changed files: $CHANGED_FILES" echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT" + exit 0 + fi - else - echo "Not in PR context, setting changed files to '*'" - echo "changed-files=*" >> "$GITHUB_OUTPUT" + # Push context with opt-in: diff between previous tip and new + # tip via the GitHub Compare API. This is what lets path- + # filtered jobs skip on push commits that don't touch their + # relevant paths. Callers must explicitly request this with + # `include-push-diff: true` because some workflows (e.g. + # lint.yml) historically rely on the '*' value to take a + # broader code path. + if [ "$EVENT_NAME" = "push" ] && [ "$INCLUDE_PUSH_DIFF" = "true" ]; then + BEFORE="${{ github.event.before }}" + AFTER="${{ github.sha }}" + ZERO_SHA="0000000000000000000000000000000000000000" + + if [ -z "$BEFORE" ] || [ "$BEFORE" = "$ZERO_SHA" ]; then + echo "No 'before' SHA on push event (tag/branch creation or initial push); setting changed files to '*'" + echo "changed-files=*" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Running in push context: comparing $BEFORE..$AFTER" + CHANGED_FILES=$(gh api "repos/$REPO/compare/$BEFORE...$AFTER" --paginate \ + --jq '.files[]? | select(.status != "removed") | .filename' 2>/dev/null \ + | tr '\n' ' ' | sed 's/ $//' || echo "") + + if [ -z "$CHANGED_FILES" ]; then + echo "Compare returned empty; setting changed files to '*'" + echo "changed-files=*" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Changed files: $CHANGED_FILES" + echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT" + exit 0 fi + + # Default for non-PR events (push without opt-in, + # workflow_dispatch, schedule, etc.): no diff. Emit '*' to + # preserve the historical behavior. + echo "Event '$EVENT_NAME' (or include-push-diff=false): emitting '*'" + echo "changed-files=*" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/promote-to-viable-strict.yml b/.github/workflows/promote-to-viable-strict.yml index f15f9ca3236..a750bef4d0d 100644 --- a/.github/workflows/promote-to-viable-strict.yml +++ b/.github/workflows/promote-to-viable-strict.yml @@ -72,7 +72,13 @@ jobs: # rewritten branches, etc.) that aren't part of the official # main/release history. REACHABLE=false - for branch in main $(git branch -r | grep -E 'origin/release/' | sed 's|origin/||'); do + # `git for-each-ref` produces clean refnames (no leading + # whitespace, no `origin/HEAD ->` lines), unlike `git branch -r`. + BRANCHES="main" + while IFS= read -r RELEASE_BRANCH; do + BRANCHES="$BRANCHES $RELEASE_BRANCH" + done < <(git for-each-ref --format='%(refname:lstrip=3)' refs/remotes/origin/release/) + for branch in $BRANCHES; do if git merge-base --is-ancestor "$SHA" "origin/$branch" 2>/dev/null; then echo "SHA is reachable from origin/$branch" REACHABLE=true diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 1f019383dd6..fab05a57ecc 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -15,13 +15,20 @@ concurrency: cancel-in-progress: true jobs: - # Emits PR diff file list; non-PR events emit '*' (every contains() - # check returns true). Gated jobs combine this with run-decision's - # is-full-run output: on push, jobs run only if a path matches OR - # the commit is a sampled full-run. + # Emits the list of changed files for the current PR or push commit. + # On PR: PR diff. On push: diff against `github.event.before`. + # On events without a diff base (workflow_dispatch, tag creation, + # initial push), emits '*' — note that `contains('*', 'path')` is + # false (literal substring match, not glob), so path-filtered jobs + # rely on run-decision's is-full-run output for those events. changed-files: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml + with: + # Opt in to push-event diff so path-filtered jobs can skip pushes + # that don't touch their relevant paths. Without this, push events + # emit '*' and `contains('*', 'path')` is always false. + include-push-diff: true run-decision: name: CI run decision diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 6e1d0ea9a88..c8fece93e9d 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -19,13 +19,20 @@ concurrency: cancel-in-progress: true jobs: - # Emits PR diff file list; non-PR events emit '*' (every contains() - # check returns true). Gated jobs combine this with run-decision's - # is-full-run output: on push, jobs run only if a path matches OR - # the commit is a sampled full-run. + # Emits the list of changed files for the current PR or push commit. + # On PR: PR diff. On push: diff against `github.event.before`. + # On events without a diff base (workflow_dispatch, tag creation, + # initial push), emits '*' — note that `contains('*', 'path')` is + # false (literal substring match, not glob), so path-filtered jobs + # rely on run-decision's is-full-run output for those events. changed-files: name: Get changed files uses: ./.github/workflows/_get-changed-files.yml + with: + # Opt in to push-event diff so path-filtered jobs can skip pushes + # that don't touch their relevant paths. Without this, push events + # emit '*' and `contains('*', 'path')` is always false. + include-push-diff: true run-decision: name: CI run decision