From 952e121f5148f3075f4981df4808a746088a6850 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Thu, 25 Jun 2026 21:37:18 -0700 Subject: [PATCH 1/2] Fix Windows unittest CI: force CPU-only build to avoid CUDA DLL load failure The Windows CI image ships CUDA toolkits on PATH. After adding (13, 2) to SUPPORTED_CUDA_VERSIONS (#20440), install_executorch's auto-detection (setup.py: is_cuda_available() via nvcc) started returning True on the Windows runner (which has the CUDA 13.2 toolkit), so it flipped EXECUTORCH_BUILD_CUDA=ON. But the unittest jobs install CPU torch, so the resulting CUDA build of _portable_lib fails to load its CUDA DLLs at import time: ImportError: DLL load failed while importing _portable_lib causing all pytest collection to error out (unittest / unittest-editable / unittest-release on windows). Add a -cpuOnly switch to setup-windows.ps1 that forces -DEXECUTORCH_BUILD_CUDA=OFF via CMAKE_ARGS, and pass it from the CPU unittest workflow. The CUDA Windows jobs (cuda-windows.yml) keep the default and are unaffected. --- .ci/scripts/setup-windows.ps1 | 13 ++++++++++++- .github/workflows/_unittest.yml | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.ci/scripts/setup-windows.ps1 b/.ci/scripts/setup-windows.ps1 index 329e81b3cf0..2cf28e0cdfe 100644 --- a/.ci/scripts/setup-windows.ps1 +++ b/.ci/scripts/setup-windows.ps1 @@ -1,5 +1,6 @@ param ( - [string]$editable = "false" + [string]$editable = "false", + [string]$cpuOnly = "false" ) conda create --yes --quiet -n et python=3.12 @@ -13,6 +14,16 @@ conda activate et # Install test dependencies pip install -r .ci/docker/requirements-ci.txt +# The Windows CI image ships CUDA toolkits on PATH, so install_executorch +# (setup.py) auto-enables EXECUTORCH_BUILD_CUDA whenever the detected nvcc +# version is in SUPPORTED_CUDA_VERSIONS. CPU-only jobs install CPU torch, so a +# CUDA build of _portable_lib then fails to load its CUDA DLLs at import time +# ("DLL load failed while importing _portable_lib"). Force a CPU-only build +# when the caller asks for it. +if ($cpuOnly -eq 'true') { + $env:CMAKE_ARGS = "$env:CMAKE_ARGS -DEXECUTORCH_BUILD_CUDA=OFF" +} + if ($editable -eq 'true') { install_executorch.bat --editable } else { diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index a253857d2c0..e300c1541b8 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -72,7 +72,7 @@ jobs: \$ErrorActionPreference = 'Stop' \$PSNativeCommandUseErrorActionPreference = \$true - .ci/scripts/setup-windows.ps1 -editable "${{ inputs.editable }}" + .ci/scripts/setup-windows.ps1 -editable "${{ inputs.editable }}" -cpuOnly true if (\$LASTEXITCODE -ne 0) { Write-Host "Setup failed. Exit code: \$LASTEXITCODE." exit \$LASTEXITCODE From d2b3fac132667268499de05d99bbe2678621bbd7 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Thu, 25 Jun 2026 22:29:28 -0700 Subject: [PATCH 2/2] Fix Windows wheel build: force CPU-only to avoid CUDA _portable_lib DLL load failure Same root cause as the unittest fix in this PR, second site. The Windows wheel build (build-wheels-windows.yml -> .ci/scripts/wheel/) does not go through setup-windows.ps1. The Windows CI image has the CUDA 13.2 toolkit on PATH, so after #20440 added (13, 2) to SUPPORTED_CUDA_VERSIONS, install_executorch's auto-detection enables EXECUTORCH_BUILD_CUDA and bakes a CUDA _portable_lib + aoti_cuda_shims.lib into the CPU wheel. The smoke test then fails with: ImportError: DLL load failed while importing _portable_lib Windows wheels are CPU-only (with-cuda: disabled), so force -DEXECUTORCH_BUILD_CUDA=OFF via CMAKE_ARGS in pre_build_script.sh on Windows. --- .ci/scripts/wheel/pre_build_script.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.ci/scripts/wheel/pre_build_script.sh b/.ci/scripts/wheel/pre_build_script.sh index ce3652e51f1..4f3f9a60f9b 100755 --- a/.ci/scripts/wheel/pre_build_script.sh +++ b/.ci/scripts/wheel/pre_build_script.sh @@ -50,6 +50,15 @@ if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then echo "Enabling symlinks on Windows" git config core.symlinks true git checkout -f HEAD + + # Windows wheels are CPU-only (build-wheels-windows.yml sets + # with-cuda: disabled), but the Windows CI image ships a CUDA toolkit on + # PATH, which makes setup.py auto-enable EXECUTORCH_BUILD_CUDA. That bakes a + # CUDA _portable_lib into the CPU wheel, which then fails its DLL load in the + # smoke test ("DLL load failed while importing _portable_lib"). Force a + # CPU-only build. + export CMAKE_ARGS="${CMAKE_ARGS:-} -DEXECUTORCH_BUILD_CUDA=OFF" + echo "CMAKE_ARGS=${CMAKE_ARGS}" >> "${GITHUB_ENV}" fi # Manually install build requirements because `python setup.py bdist_wheel` does