diff --git a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml similarity index 96% rename from .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml rename to .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml index 128dd0f14..50a508ca3 100644 --- a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml +++ b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml similarity index 96% rename from .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml rename to .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml index 96be2927b..aa311390d 100644 --- a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml +++ b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml similarity index 96% rename from .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml rename to .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml index 1134e6b03..673574c66 100644 --- a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml +++ b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml similarity index 96% rename from .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml rename to .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml index 7ea16ec20..e413beb1a 100644 --- a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml +++ b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml similarity index 96% rename from .ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml rename to .ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml index 693408283..1c9769b94 100644 --- a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml +++ b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml similarity index 96% rename from .ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml rename to .ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml index 1721a78b1..1c7ea27a5 100644 --- a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml +++ b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml @@ -11,7 +11,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-openstack-gpu-2xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml similarity index 95% rename from .ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml rename to .ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml index 2ff370171..0e3c6fc57 100644 --- a/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml +++ b/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml @@ -1,7 +1,7 @@ MACOSX_DEPLOYMENT_TARGET: - '11.0' MACOSX_SDK_VERSION: -- '13.3' +- '14.5' blas_impl: - generic c_compiler: @@ -15,7 +15,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-macos-m4-large is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml similarity index 95% rename from .ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml rename to .ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml index e08cae1bb..a79826c93 100644 --- a/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml +++ b/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml @@ -1,7 +1,7 @@ MACOSX_DEPLOYMENT_TARGET: - '11.0' MACOSX_SDK_VERSION: -- '13.3' +- '14.5' blas_impl: - mkl c_compiler: @@ -15,7 +15,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-macos-m4-large is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml similarity index 95% rename from .ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml rename to .ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml index 9815c8bd2..6c92bb710 100644 --- a/.ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml +++ b/.ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml @@ -1,7 +1,7 @@ MACOSX_DEPLOYMENT_TARGET: - '11.0' MACOSX_SDK_VERSION: -- '13.3' +- '14.5' blas_impl: - generic c_compiler: @@ -15,7 +15,7 @@ c_stdlib_version: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -29,7 +29,7 @@ fmt: github_actions_labels: - cirun-macos-m4-large is_rc: -- 'False' +- 'True' libabseil: - '20250512' libblas: diff --git a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml similarity index 95% rename from .ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml rename to .ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml index 61349d925..e617d2fcb 100644 --- a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml +++ b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml @@ -7,7 +7,7 @@ c_stdlib: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -21,7 +21,7 @@ fmt: github_actions_labels: - cirun-azure-windows-4xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libmagma_sparse: diff --git a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml similarity index 95% rename from .ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml rename to .ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml index 8f4871b4d..b3a0d13a9 100644 --- a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml +++ b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml @@ -7,7 +7,7 @@ c_stdlib: channel_sources: - conda-forge channel_targets: -- conda-forge main +- conda-forge pytorch_rc cuda_compiler: - cuda-nvcc cuda_compiler_version: @@ -21,7 +21,7 @@ fmt: github_actions_labels: - cirun-azure-windows-4xlarge is_rc: -- 'False' +- 'True' libabseil: - '20250512' libmagma_sparse: diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index d89b5d080..d3fce2674 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -21,73 +21,73 @@ jobs: fail-fast: false matrix: include: - - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse - SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h71a96c7d + - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue + SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_hc637dd34 UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h71a96c7d', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_hc637dd34', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse - SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h17c608a0 + - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue + SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h7548ab25 UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h17c608a0', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h7548ab25', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse - SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h5b18f8bc + - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue + SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h2dde49c7 UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h5b18f8bc', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h2dde49c7', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse - SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h38f93959 + - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue + SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h0aaf175a UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h38f93959', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h0aaf175a', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse - SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_heefc8d83 + - CONFIG: linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue + SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_h562a5d25 UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_heefc8d83', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_h562a5d25', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse - SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_h56c2c839 + - CONFIG: linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue + SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_he6bea9a8 UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_h56c2c839', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_he6bea9a8', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all" - - CONFIG: osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse - SHORT_CONFIG: osx_64_blas_implgenericchannel_targetsco_h709642d7 + - CONFIG: osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue + SHORT_CONFIG: osx_64_blas_implgenericchannel_targetsco_h36e27ba7 UPLOAD_PACKAGES: True os: macos - runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implgenericchannel_targetsco_h709642d7', 'macOS', 'arm64', 'self-hosted'] - - CONFIG: osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse - SHORT_CONFIG: osx_64_blas_implmklchannel_targetsconda-_h747b3c68 + runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implgenericchannel_targetsco_h36e27ba7', 'macOS', 'arm64', 'self-hosted'] + - CONFIG: osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue + SHORT_CONFIG: osx_64_blas_implmklchannel_targetsconda-_hae519702 UPLOAD_PACKAGES: True os: macos - runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implmklchannel_targetsconda-_h747b3c68', 'macOS', 'arm64', 'self-hosted'] - - CONFIG: osx_arm64_channel_targetsconda-forge_mainis_rcFalse - SHORT_CONFIG: osx_arm64_channel_targetsconda-forge_mai_h5f57e26b + runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implmklchannel_targetsconda-_hae519702', 'macOS', 'arm64', 'self-hosted'] + - CONFIG: osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue + SHORT_CONFIG: osx_arm64_channel_targetsconda-forge_pyt_h292c857d UPLOAD_PACKAGES: True os: macos - runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_arm64_channel_targetsconda-forge_mai_h5f57e26b', 'macOS', 'arm64', 'self-hosted'] - - CONFIG: win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse - SHORT_CONFIG: win_64_channel_targetsconda-forge_maincu_hca575dce + runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_arm64_channel_targetsconda-forge_pyt_h292c857d', 'macOS', 'arm64', 'self-hosted'] + - CONFIG: win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue + SHORT_CONFIG: win_64_channel_targetsconda-forge_pytorc_h650359c2 UPLOAD_PACKAGES: True os: windows - runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_hca575dce', 'windows', 'x64', 'self-hosted'] - - CONFIG: win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse - SHORT_CONFIG: win_64_channel_targetsconda-forge_maincu_hc68ac914 + runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_pytorc_h650359c2', 'windows', 'x64', 'self-hosted'] + - CONFIG: win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue + SHORT_CONFIG: win_64_channel_targetsconda-forge_pytorc_hffeaf219 UPLOAD_PACKAGES: True os: windows - runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_hc68ac914', 'windows', 'x64', 'self-hosted'] + runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_pytorc_hffeaf219', 'windows', 'x64', 'self-hosted'] steps: - name: Checkout code diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh index 4bba44792..bac7141a9 100755 --- a/.scripts/run_osx_build.sh +++ b/.scripts/run_osx_build.sh @@ -63,8 +63,24 @@ if [[ "${sha:-}" == "" ]]; then sha=$(git rev-parse HEAD) fi -# HACK -cp .scripts/download_osx_sdk.sh $CONDA_PREFIX/bin/ +if [[ "${OSX_SDK_DIR:-}" == "" ]]; then + if [[ "${CI:-}" == "" ]]; then + echo "Please set OSX_SDK_DIR to a directory where SDKs can be downloaded to. Aborting" + exit 1 + else + export OSX_SDK_DIR=/opt/conda-sdks + /usr/bin/sudo mkdir -p "${OSX_SDK_DIR}" + /usr/bin/sudo chown "${USER}" "${OSX_SDK_DIR}" + fi +else + if tmpf=$(mktemp -p "$OSX_SDK_DIR" tmp.XXXXXXXX 2>/dev/null); then + rm -f "$tmpf" + echo "OSX_SDK_DIR is writeable without sudo, continuing" + else + echo "User-provided OSX_SDK_DIR is not writeable for current user! Aborting" + exit 1 + fi +fi echo -e "\n\nRunning the build setup script." source run_conda_forge_build_setup diff --git a/README.md b/README.md index 84e94ab74..7095e810c 100644 --- a/README.md +++ b/README.md @@ -40,14 +40,14 @@ Current release info Installing pytorch-cpu ====================== -Installing `pytorch-cpu` from the `conda-forge` channel can be achieved by adding `conda-forge` to your channels with: +Installing `pytorch-cpu` from the `conda-forge/label/pytorch_rc` channel can be achieved by adding `conda-forge/label/pytorch_rc` to your channels with: ``` -conda config --add channels conda-forge +conda config --add channels conda-forge/label/pytorch_rc conda config --set channel_priority strict ``` -Once the `conda-forge` channel has been enabled, `libtorch, pytorch, pytorch-cpu, pytorch-gpu, pytorch-tests` can be installed with `conda`: +Once the `conda-forge/label/pytorch_rc` channel has been enabled, `libtorch, pytorch, pytorch-cpu, pytorch-gpu, pytorch-tests` can be installed with `conda`: ``` conda install libtorch pytorch pytorch-cpu pytorch-gpu pytorch-tests @@ -62,26 +62,26 @@ mamba install libtorch pytorch pytorch-cpu pytorch-gpu pytorch-tests It is possible to list all of the versions of `libtorch` available on your platform with `conda`: ``` -conda search libtorch --channel conda-forge +conda search libtorch --channel conda-forge/label/pytorch_rc ``` or with `mamba`: ``` -mamba search libtorch --channel conda-forge +mamba search libtorch --channel conda-forge/label/pytorch_rc ``` Alternatively, `mamba repoquery` may provide more information: ``` # Search all versions available on your platform: -mamba repoquery search libtorch --channel conda-forge +mamba repoquery search libtorch --channel conda-forge/label/pytorch_rc # List packages depending on `libtorch`: -mamba repoquery whoneeds libtorch --channel conda-forge +mamba repoquery whoneeds libtorch --channel conda-forge/label/pytorch_rc # List dependencies of `libtorch`: -mamba repoquery depends libtorch --channel conda-forge +mamba repoquery depends libtorch --channel conda-forge/label/pytorch_rc ``` diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml index 75d8bfde4..1ce6c8f1f 100644 --- a/recipe/conda_build_config.yaml +++ b/recipe/conda_build_config.yaml @@ -7,7 +7,7 @@ cxx_compiler_version: # [aarch64] - 13 # [aarch64] MACOSX_SDK_VERSION: # [osx] - - 13.3 # [osx] + - 14.5 # [osx] channel_targets: - conda-forge main diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 5e6672850..0767a1287 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,6 +1,6 @@ # if you wish to build release candidate number X, append the version string with ".rcX" -{% set version = "2.9.1" %} -{% set build = 1 %} +{% set version = "2.10.0-rc3" %} +{% set build = 0 %} # Use a higher build number for the CUDA variant, to ensure that it's # preferred by conda's solver, and it's preferentially @@ -24,14 +24,8 @@ package: source: {% if "rc" in version %} - # - git_url: https://github.com/pytorch/pytorch.git - # git_rev: v{{ version.replace(".rc", "-rc") }} - # we cannot apply patches to submodules when checking out with git_url, because - # then conda switches the patch-application to use git, which cannot construct - # a usable ancestor from outside the submodule; the only option then is to - # pull in the submodules separately. - - url: https://github.com/pytorch/pytorch/archive/refs/tags/v{{ version }}.tar.gz - sha256: 04ae0a8babdc9cb9dfc4f8746b2b8aa0f8ed0f9e92835cc4af0bcb01e3969e51 + - url: https://download.pytorch.org/source_code/test/pytorch-v{{ version }}.tar.gz + sha256: 3200721908010568b3715332a989948ea731242681935220beef090ae608aef2 {% else %} # The "pytorch-v" tarballs contain submodules; the "pytorch-" ones don't. - url: https://github.com/pytorch/pytorch/releases/download/v{{ version }}/pytorch-v{{ version }}.tar.gz @@ -39,30 +33,27 @@ source: {% endif %} patches: - patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch - - patches/0002-Fix-duplicate-linker-script.patch # [cuda_compiler_version != "None" and aarch64] # conda-specific patch, lets us override CUDA paths - - patches/0003-Allow-overriding-CUDA-related-paths.patch + - patches/0002-Allow-overriding-CUDA-related-paths.patch # fix BLAS calling convention for openblas - - patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch - - patches/0005-Fix-FindOpenBLAS.patch + - patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch + - patches/0004-Fix-FindOpenBLAS.patch # point to headers that are now living in $PREFIX/include instead of $SP_DIR/torch/include - - patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch - - patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch - - patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch - - patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch # [win] - - patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch - # backport https://github.com/pytorch/pytorch/pull/148668 - - patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch + - patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch + - patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch + - patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch + - patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch # [win] + - patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch # skip a test that fails with numpy v2.3; still triggers as of pytorch v2.9 - - patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch + - patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch # backport https://github.com/pytorch/pytorch/pull/127702 - - patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch + - patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch # backport https://github.com/pytorch/pytorch/pull/166824 - - patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch - - patches/0015-Fix-building-kineto-against-system-fmt.patch + - patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch + - patches/0013-Fix-building-kineto-against-system-fmt.patch # backport https://github.com/pytorch/pytorch/pull/159828 - - patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch - - patches/0018-Use-Intel-LLVM-openmp.patch + - patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch + - patches/0015-Use-Intel-LLVM-openmp.patch - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch build: @@ -162,6 +153,7 @@ requirements: - eigen - zlib - fmt + - packaging run: - libblas * *{{ blas_impl }} # [blas_impl == "mkl"] run_constrained: @@ -304,6 +296,7 @@ outputs: - typing_extensions - zlib - fmt + - packaging run: - {{ pin_subpackage('libtorch', exact=True) }} - llvm-openmp @@ -446,6 +439,8 @@ outputs: {% set skips = skips ~ " or test_addbmm or test_baddbmm or test_bmm" %} # [aarch64] # doesn't crash, but gets different result on aarch + CUDA {% set skips = skips ~ " or illcondition_matrix_input_should_not_crash_cpu" %} # [aarch64 and cuda_compiler_version != "None"] + # minor inaccuracy on aarch64 (emulation?) + {% set skips = skips ~ " or (TestNN and test_upsampling_bfloat16)" %} # [aarch64] # may crash spuriously {% set skips = skips ~ " or (TestAutograd and test_profiler_seq_nr)" %} {% set skips = skips ~ " or (TestAutograd and test_profiler_propagation)" %} @@ -505,6 +500,11 @@ outputs: {% set skips = skips ~ " or test_forward_nn_Bilinear_mps_float16" %} # [osx and arm64] # "quantized engine NoQEngine is not supported" {% set skips = skips ~ " or test_qengine" %} # [osx and arm64] + # some warning-related failure, maybe it's broken by --disable-warnings? + {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cpu" %} + {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cuda" %} + # "Attempt to trace generator" + {% set skips = skips ~ " or test_lite_regional_compile_flex_attention_cuda" %} # the whole test suite takes forever, but we should get a good enough coverage # for potential packaging problems by running a fixed subset diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch index 0b209063f..e62791521 100644 --- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch +++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch @@ -1,17 +1,17 @@ -From 7eeddc8b77fbcb44ce4c5d97c4962efc242a3f75 Mon Sep 17 00:00:00 2001 +From e639e26774c2de4a0a51013e386152cb084f4f19 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sun, 1 Sep 2024 17:35:40 -0400 -Subject: [PATCH 01/13] Force usage of python 3 and error without numpy +Subject: [PATCH 01/15] Force usage of python 3 and error without numpy --- cmake/Dependencies.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index ef5c2fd4e97..72d9e298dcc 100644 +index 903c212de81..ecf8669649b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -804,9 +804,9 @@ if(BUILD_PYTHON) +@@ -818,9 +818,9 @@ if(BUILD_PYTHON) if(USE_NUMPY) list(APPEND PYTHON_COMPONENTS NumPy) endif() @@ -23,7 +23,7 @@ index ef5c2fd4e97..72d9e298dcc 100644 endif() if(NOT Python_Interpreter_FOUND) -@@ -823,7 +823,7 @@ if(BUILD_PYTHON) +@@ -837,7 +837,7 @@ if(BUILD_PYTHON) if(Python_Development.Module_FOUND) if(USE_NUMPY) if(NOT Python_NumPy_FOUND) diff --git a/recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch similarity index 89% rename from recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch rename to recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch index 2fbe631ff..43af98d46 100644 --- a/recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch +++ b/recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch @@ -1,7 +1,7 @@ -From afa9e9bc6e2f6db7af3b3a46e608bc8d2833f18d Mon Sep 17 00:00:00 2001 +From 04f3324d69b41ff7336747d4b51b098541a6b4f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 27 Nov 2024 13:47:23 +0100 -Subject: [PATCH 03/13] Allow overriding CUDA-related paths +Subject: [PATCH 02/15] Allow overriding CUDA-related paths --- cmake/Modules/FindCUDAToolkit.cmake | 2 +- @@ -22,10 +22,10 @@ index ec9ae530aa6..b7c0bd9fc51 100644 set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py -index 02ab011dd48..447628cc4c1 100644 +index 88f0fe5d309..753da8f435d 100644 --- a/tools/setup_helpers/cmake.py +++ b/tools/setup_helpers/cmake.py -@@ -314,7 +314,7 @@ class CMake: +@@ -315,7 +315,7 @@ class CMake: true_var = additional_options.get(var) if true_var is not None: build_options[true_var] = val diff --git a/recipe/patches/0002-Fix-duplicate-linker-script.patch b/recipe/patches/0002-Fix-duplicate-linker-script.patch deleted file mode 100644 index d4adbe562..000000000 --- a/recipe/patches/0002-Fix-duplicate-linker-script.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 532682e899bed8cb97b922e0d1ff92664100e521 Mon Sep 17 00:00:00 2001 -From: Jeongseok Lee -Date: Sun, 3 Nov 2024 01:12:36 -0700 -Subject: [PATCH 02/13] Fix duplicate linker script - ---- - setup.py | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/setup.py b/setup.py -index 11ca48482a7..1c5b75897df 100644 ---- a/setup.py -+++ b/setup.py -@@ -1632,7 +1632,9 @@ def main() -> None: - filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld" - ) - linker_script_path = os.path.abspath("cmake/linker_script.ld") -- os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}" -+ ldflags = os.getenv("LDFLAGS", "") -+ if f"-T{linker_script_path}" not in ldflags: -+ os.environ["LDFLAGS"] = ldflags + f" -T{linker_script_path}" - os.environ["CFLAGS"] = ( - os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections" - ) diff --git a/recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch similarity index 86% rename from recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch rename to recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch index c3a933b6f..02b24ff2e 100644 --- a/recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch +++ b/recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch @@ -1,7 +1,7 @@ -From 6af9801059f087440b2a756884d6f1fe3d91d865 Mon Sep 17 00:00:00 2001 +From edde9415778a7ceafd0eadc1b49ca216a175f607 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 18 Dec 2024 03:59:00 +0000 -Subject: [PATCH 04/13] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds +Subject: [PATCH 03/15] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds There are two calling conventions for *dotu functions @@ -31,10 +31,10 @@ functional calls. 1 file changed, 2 insertions(+) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 72d9e298dcc..f7e56828bdf 100644 +index ecf8669649b..e8d8bc58096 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -186,6 +186,7 @@ elseif(BLAS STREQUAL "OpenBLAS") +@@ -194,6 +194,7 @@ elseif(BLAS STREQUAL "OpenBLAS") set(BLAS_FOUND 1) set(BLAS_LIBRARIES ${OpenBLAS_LIB}) set(BLAS_CHECK_F2C 1) @@ -42,7 +42,7 @@ index 72d9e298dcc..f7e56828bdf 100644 elseif(BLAS STREQUAL "BLIS") find_package(BLIS REQUIRED) include_directories(SYSTEM ${BLIS_INCLUDE_DIR}) -@@ -209,6 +210,7 @@ elseif(BLAS STREQUAL "MKL") +@@ -217,6 +218,7 @@ elseif(BLAS STREQUAL "MKL") set(BLAS_INFO "mkl") set(BLAS_FOUND 1) set(BLAS_LIBRARIES ${MKL_LIBRARIES}) diff --git a/recipe/patches/0005-Fix-FindOpenBLAS.patch b/recipe/patches/0004-Fix-FindOpenBLAS.patch similarity index 78% rename from recipe/patches/0005-Fix-FindOpenBLAS.patch rename to recipe/patches/0004-Fix-FindOpenBLAS.patch index b8e4c8ae8..47e1229e8 100644 --- a/recipe/patches/0005-Fix-FindOpenBLAS.patch +++ b/recipe/patches/0004-Fix-FindOpenBLAS.patch @@ -1,22 +1,22 @@ -From 4487751a213664c3d62023da331007c712c60f79 Mon Sep 17 00:00:00 2001 +From 4a9c995a1b907f2b15cf3179b3ccdfee398c0c06 Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Thu, 16 May 2024 10:46:49 +0200 -Subject: [PATCH 05/13] Fix FindOpenBLAS +Subject: [PATCH 04/15] Fix FindOpenBLAS --- cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake -index 9ba86ba1ee0..19e1d508aa5 100644 +index 21ae9e2521e..b4aeeef4012 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake -@@ -31,22 +31,25 @@ SET(Open_BLAS_LIB_SEARCH_PATHS - $ENV{OpenBLAS_HOME}/lib - ) +@@ -36,22 +36,25 @@ IF(DEFINED ENV{OpenBLAS_LIB_NAME}) + SET(Open_BLAS_LIB_NAME $ENV{OpenBLAS_LIB_NAME}) + ENDIF() -FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS}) --FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) +-FIND_LIBRARY(OpenBLAS_LIB NAMES ${Open_BLAS_LIB_NAME} PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) - SET(OpenBLAS_FOUND ON) diff --git a/recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch b/recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch similarity index 51% rename from recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch rename to recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch index 2e7b475eb..2d1e0957b 100644 --- a/recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch +++ b/recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch @@ -1,31 +1,36 @@ -From b1d699a5efd5659ab7e1d0dd3a203d432e63ffb7 Mon Sep 17 00:00:00 2001 +From ed5cdb12101ce038ff6f4d07d26e8fd02b044925 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 23 Jan 2025 22:58:14 +1100 -Subject: [PATCH 06/13] point include paths to Conda prefix include dir +Subject: [PATCH 05/15] point include paths to Conda prefix include dir Updated to use `sysconfig.get_config_vars("prefix")` per https://github.com/conda-forge/pytorch-cpu-feedstock/issues/424 and https://github.com/conda-forge/pytorch-cpu-feedstock/issues/447. --- - torch/utils/cpp_extension.py | 5 +++++ - 1 file changed, 5 insertions(+) + torch/utils/cpp_extension.py | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py -index 902d2fe6ce0..3701edf1ce4 100644 +index f29c382f0e3..e9557d43ee1 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py -@@ -1506,9 +1506,14 @@ def include_paths(device_type: str = "cpu") -> list[str]: +@@ -1567,12 +1567,18 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str + Returns: A list of include path strings. """ +- paths = [] lib_include = os.path.join(_TORCH_PATH, 'include') + # Account for conda prefix. + conda_pieces = [sysconfig.get_config_var("prefix")] + IS_WINDOWS * ["Library"] + ["include"] + conda_include = os.path.join(*conda_pieces) - paths = [ ++ paths = [ + conda_include, - lib_include, - # Remove this once torch/torch.h is officially no longer supported for C++ extensions. -+ os.path.join(conda_include, 'torch', 'csrc', 'api', 'include'), - os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), - ] ++ ] + if torch_include_dirs: + paths.extend([ + lib_include, + # Remove this once torch/torch.h is officially no longer supported for C++ extensions. ++ os.path.join(conda_include, 'torch', 'csrc', 'api', 'include'), + os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), + ]) if device_type == "cuda" and IS_HIP_EXTENSION: diff --git a/recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch b/recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch similarity index 87% rename from recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch rename to recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch index f377341fc..65db92e67 100644 --- a/recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch +++ b/recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch @@ -1,7 +1,7 @@ -From 6c71d8d16faf7c13b786f3f35fef6ab533bad2c1 Mon Sep 17 00:00:00 2001 +From 7958145d5e1a178540033a112f4267b3e54842e1 Mon Sep 17 00:00:00 2001 From: Daniel Petry Date: Tue, 21 Jan 2025 17:45:23 -0600 -Subject: [PATCH 07/14] Add conda prefix to inductor include & lib paths +Subject: [PATCH 06/15] Add conda prefix to inductor include & lib paths Currently inductor doesn't look in conda's includes and libs. This results in errors when it tries to compile, if system versions are being used of @@ -18,10 +18,10 @@ and https://github.com/conda-forge/pytorch-cpu-feedstock/issues/447. 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py -index e2cb445ed10..89905bec464 100644 +index 6a6b7d15ae3..0a4724e5c17 100644 --- a/torch/_inductor/cpp_builder.py +++ b/torch/_inductor/cpp_builder.py -@@ -1410,10 +1410,12 @@ def get_cpp_torch_options( +@@ -1520,10 +1520,12 @@ def get_cpp_torch_options( + python_include_dirs + torch_include_dirs + omp_include_dir_paths diff --git a/recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch similarity index 83% rename from recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch rename to recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch index a2db4fc88..f98c4c330 100644 --- a/recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch +++ b/recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch @@ -1,7 +1,7 @@ -From 7520f1737851d8a812dacfb150399e701fc27769 Mon Sep 17 00:00:00 2001 +From 36cb051937278f82ead2912dd11a0d996f9d3d8c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 28 Jan 2025 14:15:34 +1100 -Subject: [PATCH 08/13] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX +Subject: [PATCH 07/15] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already: @@ -11,10 +11,10 @@ https://github.com/pytorch/pytorch/blob/v2.5.1/cmake/TorchConfig.cmake.in#L47 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index 6c095680733..d7c7a74a302 100644 +index 6650db50b01..c413b589b5f 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt -@@ -793,7 +793,7 @@ if(USE_ROCM) +@@ -825,7 +825,7 @@ if(USE_ROCM) # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) endif() diff --git a/recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch similarity index 91% rename from recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch rename to recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch index aa4e3a029..6e3b67d7d 100644 --- a/recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch +++ b/recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch @@ -1,7 +1,7 @@ -From 755c05e421afd36dc38f07208f28e6df94d48323 Mon Sep 17 00:00:00 2001 +From d823b46ccfc9cea8c74ce62f612580ba9ad51407 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 28 Jan 2025 10:58:29 +1100 -Subject: [PATCH 09/13] remove `DESTINATION lib` from CMake `install(TARGETS` +Subject: [PATCH 08/15] remove `DESTINATION lib` from CMake `install(TARGETS` directives Suggested-By: Silvio Traversaro @@ -55,10 +55,10 @@ index ef24471dba8..a410383de27 100644 endif() diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt -index 95b9f031c3e..f1ce6d1ad14 100644 +index c2fa65ba35e..3a384395e4a 100644 --- a/c10/xpu/CMakeLists.txt +++ b/c10/xpu/CMakeLists.txt -@@ -46,7 +46,7 @@ if(NOT BUILD_LIBTORCHLESS) +@@ -47,7 +47,7 @@ if(NOT BUILD_LIBTORCHLESS) $ $ ) @@ -68,10 +68,10 @@ index 95b9f031c3e..f1ce6d1ad14 100644 add_subdirectory(test) endif() diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index 6ab41b6c847..a78ad102aa6 100644 +index 6cbaecc5d2e..c6986007740 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt -@@ -567,7 +567,7 @@ if(USE_CUDA) +@@ -575,7 +575,7 @@ if(USE_CUDA) endif() target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS}) @@ -80,7 +80,7 @@ index 6ab41b6c847..a78ad102aa6 100644 if(USE_NCCL) list(APPEND Caffe2_GPU_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) -@@ -642,7 +642,7 @@ if(USE_ROCM) +@@ -656,7 +656,7 @@ if(USE_ROCM) target_link_libraries(caffe2_nvrtc hip::amdhip64 hiprtc::hiprtc) target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR}) target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__) @@ -89,7 +89,7 @@ index 6ab41b6c847..a78ad102aa6 100644 endif() if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER) -@@ -1112,7 +1112,7 @@ elseif(USE_CUDA) +@@ -1119,7 +1119,7 @@ elseif(USE_CUDA) CUDA::culibos ${CMAKE_DL_LIBS}) endif() set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG") @@ -98,7 +98,7 @@ index 6ab41b6c847..a78ad102aa6 100644 endif() if(USE_PRECOMPILED_HEADERS) -@@ -1590,17 +1590,17 @@ endif() +@@ -1594,17 +1594,17 @@ endif() caffe2_interface_library(torch torch_library) @@ -121,7 +121,7 @@ index 6ab41b6c847..a78ad102aa6 100644 target_link_libraries(torch PUBLIC torch_cpu_library) -@@ -1743,7 +1743,7 @@ if(BUILD_SHARED_LIBS) +@@ -1747,7 +1747,7 @@ if(BUILD_SHARED_LIBS) target_link_libraries(torch_global_deps torch::nvtoolsext) endif() endif() @@ -131,10 +131,10 @@ index 6ab41b6c847..a78ad102aa6 100644 # ---[ Caffe2 HIP sources. diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 1632147f022..ca8a193adb5 100644 +index 3a3ca0f1236..0e2c682fd97 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt -@@ -468,7 +468,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "") +@@ -466,7 +466,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "") set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS}) endif() diff --git a/recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch similarity index 90% rename from recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch rename to recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch index 381edb708..eea801850 100644 --- a/recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch +++ b/recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch @@ -1,24 +1,25 @@ -From 0ee779ff0a9573eeda2b67cf32ea7061d13dd75c Mon Sep 17 00:00:00 2001 +From 2a6d0bdcc31eee3a7a327ba8aea08b34a36279ee Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 30 Jan 2025 08:33:44 +1100 -Subject: [PATCH 10/13] avoid deprecated `find_package(CUDA)` in caffe2 CMake +Subject: [PATCH 09/15] avoid deprecated `find_package(CUDA)` in caffe2 CMake metadata vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from CMake --- caffe2/CMakeLists.txt | 10 +-- + cmake/Dependencies.cmake | 2 +- cmake/Summary.cmake | 10 +-- cmake/TorchConfig.cmake.in | 2 +- cmake/public/cuda.cmake | 48 ++++---------- cmake/public/utils.cmake | 127 +++++++++++++++++++++++++++++++++++++ setup.py | 2 +- - 6 files changed, 151 insertions(+), 48 deletions(-) + 7 files changed, 152 insertions(+), 49 deletions(-) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index a78ad102aa6..fb89e8b2e3c 100644 +index c6986007740..0c54f830820 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt -@@ -953,25 +953,25 @@ if(USE_ROCM) +@@ -958,25 +958,25 @@ if(USE_ROCM) "$<$:ATen/core/ATen_pch.h>") endif() elseif(USE_CUDA) @@ -49,11 +50,24 @@ index a78ad102aa6..fb89e8b2e3c 100644 torch_compile_options(torch_cuda) # see cmake/public/utils.cmake target_compile_definitions(torch_cuda PRIVATE USE_CUDA) +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index e8d8bc58096..d2168da264b 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -968,7 +968,7 @@ endif() + + # ---[ nvtx + if(USE_SYSTEM_NVTX) +- find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDA_INCLUDE_DIRS}) ++ find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDAToolkit_INCLUDE_DIRS}) + else() + find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH) + endif() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake -index 745d9ea0586..993892c6d80 100644 +index 60951d6c686..b6e2ce888dc 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake -@@ -75,7 +75,7 @@ function(caffe2_print_configuration_summary) +@@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}") message(STATUS " USE_CUDSS : ${USE_CUDSS}") message(STATUS " USE_CUFILE : ${USE_CUFILE}") @@ -62,7 +76,7 @@ index 745d9ea0586..993892c6d80 100644 message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}") message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}") if(${USE_CUDNN}) -@@ -87,7 +87,7 @@ function(caffe2_print_configuration_summary) +@@ -88,7 +88,7 @@ function(caffe2_print_configuration_summary) if(${USE_CUFILE}) message(STATUS " cufile library : ${CUDA_cuFile_LIBRARY}") endif() @@ -71,7 +85,7 @@ index 745d9ea0586..993892c6d80 100644 message(STATUS " CUDA library : ${CUDA_cuda_driver_LIBRARY}") message(STATUS " cudart library : ${CUDA_cudart_LIBRARY}") message(STATUS " cublas library : ${CUDA_cublas_LIBRARY}") -@@ -107,12 +107,12 @@ function(caffe2_print_configuration_summary) +@@ -108,12 +108,12 @@ function(caffe2_print_configuration_summary) message(STATUS " cuDSS library : ${__tmp}") endif() message(STATUS " nvrtc : ${CUDA_nvrtc_LIBRARY}") @@ -101,7 +115,7 @@ index 0b32ffa99ce..471b7154ed0 100644 if(TARGET torch::nvtoolsext) list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext) diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake -index 218c50a69c6..8ee4e2fe98b 100644 +index bc8855d23e6..b5c809daf20 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -26,8 +26,8 @@ if(NOT MSVC) @@ -112,10 +126,10 @@ index 218c50a69c6..8ee4e2fe98b 100644 -if(NOT CUDA_FOUND) +find_package(CUDAToolkit) +if(NOT CUDAToolkit_FOUND) - message(WARNING - "PyTorch: CUDA cannot be found. Depending on whether you are building " - "PyTorch or a PyTorch dependent library, the next warning / error will " -@@ -36,8 +36,6 @@ if(NOT CUDA_FOUND) + # If user explicitly set USE_CUDA=1, error out instead of falling back + if(_USE_CUDA_EXPLICITLY_SET AND USE_CUDA) + message(FATAL_ERROR +@@ -45,8 +45,6 @@ if(NOT CUDA_FOUND) return() endif() @@ -124,7 +138,7 @@ index 218c50a69c6..8ee4e2fe98b 100644 # Pass clang as host compiler, which according to the docs # Must be done before CUDA language is enabled, see # https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html -@@ -56,24 +54,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0) +@@ -65,24 +63,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0) cmake_policy(SET CMP0074 NEW) endif() @@ -155,7 +169,7 @@ index 218c50a69c6..8ee4e2fe98b 100644 # Sometimes, we may mismatch nvcc with the CUDA headers we are # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE # but the PATH is not consistent with CUDA_HOME. It's better safe -@@ -97,8 +89,8 @@ if(CUDA_FOUND) +@@ -106,8 +98,8 @@ if(CUDA_FOUND) ) if(NOT CMAKE_CROSSCOMPILING) try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file} @@ -166,7 +180,7 @@ index 218c50a69c6..8ee4e2fe98b 100644 RUN_OUTPUT_VARIABLE cuda_version_from_header COMPILE_OUTPUT_VARIABLE output_var ) -@@ -106,30 +98,14 @@ if(CUDA_FOUND) +@@ -115,30 +107,14 @@ if(CUDA_FOUND) message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var}) endif() message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header}) @@ -200,7 +214,7 @@ index 218c50a69c6..8ee4e2fe98b 100644 execute_process( COMMAND Python::Interpreter -c diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake -index 68e66bb3fc3..e02a4abf8cc 100644 +index 3cdf5fb914b..729041636eb 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -293,6 +293,133 @@ macro(torch_hip_get_arch_list store_var) @@ -338,10 +352,10 @@ index 68e66bb3fc3..e02a4abf8cc 100644 # Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST. # Usage: diff --git a/setup.py b/setup.py -index 1c5b75897df..759a756739c 100644 +index 3e82b9a6c81..e282c653a42 100644 --- a/setup.py +++ b/setup.py -@@ -1175,7 +1175,7 @@ class build_ext(setuptools.command.build_ext.build_ext): +@@ -1262,7 +1262,7 @@ class build_ext(setuptools.command.build_ext.build_ext): else: report("-- Not using cuDNN") if cmake_cache_vars["USE_CUDA"]: diff --git a/recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch b/recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch similarity index 65% rename from recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch rename to recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch index e0d52f2ef..4a8d5ce90 100644 --- a/recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch +++ b/recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch @@ -1,18 +1,18 @@ -From f38ba11d997851548be9682b37e1dba88cd7856b Mon Sep 17 00:00:00 2001 +From 574b5b7e647e207fb4595ae84632d6231e03265f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jun 2025 07:34:48 +1100 -Subject: [PATCH 12/13] skip test_norm_matrix_degenerate_shapes on numpy >=2.3 +Subject: [PATCH 10/15] skip test_norm_matrix_degenerate_shapes on numpy >=2.3 --- test/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_linalg.py b/test/test_linalg.py -index 0f6c8f20742..f54dcce901d 100644 +index 960c18007d4..68a165fd488 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py -@@ -2042,6 +2042,7 @@ class TestLinalg(TestCase): - # Test degenerate shape results match numpy for linalg.norm matrix norms +@@ -2022,6 +2022,7 @@ class TestLinalg(TestCase): + @skipIf(np.lib.NumpyVersion(np.__version__) < '2.3.0', 'Numpy changed handling of degenerate inputs in 2.3.0') @skipCUDAIfNoMagma @skipCPUIfNoLapack + @unittest.skipIf(np.lib.NumpyVersion(np.__version__) >= "2.3.0", "Error behaviour changed") diff --git a/recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch b/recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch similarity index 81% rename from recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch rename to recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch index e7f0887ae..f71ab77f7 100644 --- a/recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch +++ b/recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch @@ -1,7 +1,7 @@ -From 8686ba30882206176dbbeba67a1063d1a08ef669 Mon Sep 17 00:00:00 2001 +From 445ccbdb5657221f95bd31ebfb6b5e83f74940bf Mon Sep 17 00:00:00 2001 From: LWisteria Date: Sun, 2 Jun 2024 19:13:23 +0900 -Subject: [PATCH 13/13] Define PY_SSIZE_T_CLEAN before #include +Subject: [PATCH 11/15] Define PY_SSIZE_T_CLEAN before #include See https://docs.python.org/3/c-api/intro.html#include-files --- diff --git a/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch b/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch deleted file mode 100644 index 629f10183..000000000 --- a/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 846567d76fe17ac1c9f1154bb439c6a106445dce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= -Date: Thu, 6 Mar 2025 13:57:25 +0100 -Subject: [PATCH 11/13] Fix CUPTI lookup to include target directory - ---- - cmake/Dependencies.cmake | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index f7e56828bdf..4a9fe193830 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1626,6 +1626,7 @@ if(USE_KINETO) - endif() - - find_library(CUPTI_LIBRARY_PATH ${CUPTI_LIB_NAME} PATHS -+ ${CUDAToolkit_TARGET_DIR}/lib - ${CUDA_SOURCE_DIR} - ${CUDA_SOURCE_DIR}/extras/CUPTI/lib64 - ${CUDA_SOURCE_DIR}/lib -@@ -1633,6 +1634,7 @@ if(USE_KINETO) - NO_DEFAULT_PATH) - - find_path(CUPTI_INCLUDE_DIR cupti.h PATHS -+ ${CUDAToolkit_TARGET_DIR}/include - ${CUDA_SOURCE_DIR}/extras/CUPTI/include - ${CUDA_INCLUDE_DIRS} - ${CUDA_SOURCE_DIR} diff --git a/recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch b/recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch similarity index 86% rename from recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch rename to recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch index fea50e211..fb8c0df2d 100644 --- a/recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch +++ b/recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch @@ -1,7 +1,7 @@ -From 80d19fab767e27bc950b8e229b11294495192a1e Mon Sep 17 00:00:00 2001 +From cf7cb253294cabb9e460de8ce99f41e3dbf35360 Mon Sep 17 00:00:00 2001 From: Yukio Siraichi Date: Tue, 30 Sep 2025 01:10:13 +0000 -Subject: [PATCH 14/14] Add USE_SYSTEM options for KLEIDI, CUDNN_FRONTEND, +Subject: [PATCH 12/15] Add USE_SYSTEM options for KLEIDI, CUDNN_FRONTEND, CUTLASS, and FMT This commit adds CMake options to allow users to use system-installed versions of four libraries instead of the bundled versions @@ -10,16 +10,16 @@ Fixes #153863 PR: https://github.com/pytorch/pytorch/pull/166824 --- CMakeLists.txt | 8 +++++ - aten/src/ATen/CMakeLists.txt | 17 +++++++-- - cmake/Dependencies.cmake | 69 +++++++++++++++++++++++++++--------- + aten/src/ATen/CMakeLists.txt | 17 ++++++++-- + cmake/Dependencies.cmake | 65 ++++++++++++++++++++++++++---------- cmake/Summary.cmake | 4 +++ - 4 files changed, 80 insertions(+), 18 deletions(-) + 4 files changed, 75 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index ce7890f002d..6d55828e61e 100644 +index 5304d054e84..a45da811631 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -467,6 +467,10 @@ option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF) +@@ -487,6 +487,10 @@ option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF) option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF) option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF) option(USE_SYSTEM_NVTX "Use system-provided nvtx." OFF) @@ -30,7 +30,7 @@ index ce7890f002d..6d55828e61e 100644 option(USE_GOLD_LINKER "Use ld.gold to link" OFF) if(USE_SYSTEM_LIBS) set(USE_SYSTEM_CPUINFO ON) -@@ -486,6 +490,10 @@ if(USE_SYSTEM_LIBS) +@@ -506,6 +510,10 @@ if(USE_SYSTEM_LIBS) set(USE_SYSTEM_NCCL ON) endif() set(USE_SYSTEM_NVTX ON) @@ -42,10 +42,10 @@ index ce7890f002d..6d55828e61e 100644 # /Z7 override option When generating debug symbols, CMake default to use the diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index d7c7a74a302..9f9c20926cd 100644 +index c413b589b5f..e8076cae834 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt -@@ -666,8 +666,21 @@ if(USE_CUDA AND NOT USE_ROCM) +@@ -704,8 +704,21 @@ if(USE_CUDA AND NOT USE_ROCM) add_definitions(-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1) add_definitions(-DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1) add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED) @@ -67,13 +67,13 @@ index d7c7a74a302..9f9c20926cd 100644 + endif() + endif() - # Add FBGEMM_GENAI include directories for torch_ops.h - if(USE_FBGEMM_GENAI) + if($ENV{ATEN_STATIC_CUDA}) + if(CUDA_VERSION VERSION_LESS_EQUAL 12.9) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 4a9fe193830..8b3a1b2629a 100644 +index e8d8bc58096..cccfc4c5538 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -948,7 +948,14 @@ if(USE_CUDNN) +@@ -962,7 +962,14 @@ if(USE_CUDNN) if(CUDNN_VERSION VERSION_LESS 8.5) message(FATAL_ERROR "PyTorch needs CuDNN-8.5 or above, but found ${CUDNN_VERSION}. Builds are still possible with `USE_CUDNN=0`") endif() @@ -89,7 +89,7 @@ index 4a9fe193830..8b3a1b2629a 100644 target_include_directories(torch::cudnn INTERFACE ${CUDNN_FRONTEND_INCLUDE_DIR}) endif() -@@ -1493,7 +1500,7 @@ if(NOT INTERN_BUILD_MOBILE) +@@ -1526,7 +1533,7 @@ if(NOT INTERN_BUILD_MOBILE) message("disabling MKLDNN because USE_MKLDNN is not set") endif() @@ -98,7 +98,7 @@ index 4a9fe193830..8b3a1b2629a 100644 set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) set(AT_KLEIDIAI_ENABLED 1) -@@ -1503,6 +1510,22 @@ if(NOT INTERN_BUILD_MOBILE) +@@ -1536,6 +1543,22 @@ if(NOT INTERN_BUILD_MOBILE) list(APPEND Caffe2_DEPENDENCY_LIBS kleidiai) # Recover build options. set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) @@ -121,13 +121,19 @@ index 4a9fe193830..8b3a1b2629a 100644 endif() if(UNIX AND NOT APPLE) -@@ -1546,21 +1569,35 @@ endif() - # - # End ATen checks - # +@@ -1589,22 +1612,30 @@ endif() + # This was the default behavior before version 12.0.0. + # Since PyTorch C API depends on it, make it available for projects that + # depend on PyTorch. +-set(FMT_INSTALL ON) -set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) -set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) -add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) ++if(NOT USE_SYSTEM_FMT) ++ set(FMT_INSTALL ON) ++ set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) ++ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) ++ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -# Disable compiler feature checks for `fmt`. -# @@ -140,16 +146,6 @@ index 4a9fe193830..8b3a1b2629a 100644 - -list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) -set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) -+# Install `fmtlib` header. -+# This was the default behavior before version 12.0.0. -+# Since PyTorch C API depends on it, make it available for projects that -+# depend on PyTorch. -+if(NOT USE_SYSTEM_FMT) -+ set(FMT_INSTALL ON) -+ set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) -+ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) -+ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -+ + # Disable compiler feature checks for `fmt`. + # + # CMake compiles a little program to check compiler features. Some of our build @@ -172,10 +168,10 @@ index 4a9fe193830..8b3a1b2629a 100644 # ---[ Kineto # edge profiler depends on KinetoProfiler but it only does cpu diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake -index 993892c6d80..388e1d164ad 100644 +index b6e2ce888dc..b6e6a65441d 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake -@@ -80,7 +80,9 @@ function(caffe2_print_configuration_summary) +@@ -81,7 +81,9 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}") if(${USE_CUDNN}) message(STATUS " cuDNN version : ${CUDNN_VERSION}") @@ -185,15 +181,15 @@ index 993892c6d80..388e1d164ad 100644 if(${USE_CUSPARSELT}) message(STATUS " cuSPARSELt version : ${CUSPARSELT_VERSION}") endif() -@@ -156,6 +158,7 @@ function(caffe2_print_configuration_summary) +@@ -159,6 +161,7 @@ function(caffe2_print_configuration_summary) endif() if(${USE_KLEIDIAI}) message(STATUS " USE_KLEIDIAI : ${USE_KLEIDIAI}") + message(STATUS " USE_SYSTEM_KLEIDIAI : ${USE_SYSTEM_KLEIDIAI}") endif() + message(STATUS " USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}") message(STATUS " USE_UCC : ${USE_UCC}") - if(${USE_UCC}) -@@ -187,6 +190,7 @@ function(caffe2_print_configuration_summary) +@@ -191,6 +194,7 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}") message(STATUS " USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}") endif() diff --git a/recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch b/recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch similarity index 56% rename from recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch rename to recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch index 5fd9277ec..82e4eae0a 100644 --- a/recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch +++ b/recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch @@ -1,26 +1,26 @@ -From 2d7566334da792a27f7a01f6e8ac34975915156c Mon Sep 17 00:00:00 2001 +From 9bebb0c39c084f5f4ec083294b2f6b27b3032c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 3 Nov 2025 14:59:04 +0100 -Subject: [PATCH 15/15] Fix building kineto against system fmt +Subject: [PATCH 13/15] Fix building kineto against system fmt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Górny --- - cmake/Dependencies.cmake | 2 ++ - 1 file changed, 2 insertions(+) + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 8b3a1b2629a..b99fbc5999f 100644 +index 07cf3dec461..b523948d09f 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -1597,6 +1597,8 @@ else() +@@ -1634,7 +1634,7 @@ else() + if(NOT fmt_FOUND) message(FATAL_ERROR "Cannot find system fmt library. Please install libfmt-dev or set USE_SYSTEM_FMT=OFF") endif() - list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt) -+ # for kineto -+ add_custom_target(fmt) +- list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt) ++ list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) endif() # ---[ Kineto diff --git a/recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch b/recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch similarity index 87% rename from recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch rename to recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch index 6ac645415..4fb3a06f2 100644 --- a/recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch +++ b/recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch @@ -1,7 +1,7 @@ -From 93be0373dd974e80eb9544892319dfc1a4d29c65 Mon Sep 17 00:00:00 2001 +From 4ae2acaff849e97005c5b4607dc541d089e91599 Mon Sep 17 00:00:00 2001 From: Eddie Yan Date: Tue, 5 Aug 2025 00:49:50 +0000 -Subject: [PATCH 16/16] Attempt to fix torch.backends.cudnn.rnn import +Subject: [PATCH 14/15] Attempt to fix torch.backends.cudnn.rnn import torch.backends.cudnn module in order to expose the .conv.fp32_precision and .rnn.fp32_precision settings. However, it fails to account for the @@ -21,10 +21,10 @@ frozen flags check. 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py -index 293bb2b7e70..477b0179f05 100644 +index 0ebfe192f8d..2aafc98064b 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py -@@ -911,6 +911,7 @@ print(t.is_pinned()) +@@ -853,6 +853,7 @@ print(t.is_pinned()) self.assertEqual(torch.backends.cudnn.rnn.fp32_precision, "none") @recover_orig_fp32_precision @@ -32,7 +32,7 @@ index 293bb2b7e70..477b0179f05 100644 def test_fp32_precision_with_float32_matmul_precision(self): torch.set_float32_matmul_precision("highest") self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "ieee") -@@ -920,6 +921,7 @@ print(t.is_pinned()) +@@ -862,6 +863,7 @@ print(t.is_pinned()) self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "tf32") @recover_orig_fp32_precision @@ -41,7 +41,7 @@ index 293bb2b7e70..477b0179f05 100644 torch.backends.cudnn.conv.fp32_precision = "none" torch.backends.cudnn.rnn.fp32_precision = "tf32" diff --git a/torch/backends/cudnn/__init__.py b/torch/backends/cudnn/__init__.py -index 9c155de7c04..a4106ea32cc 100644 +index 267594531db..7d06f503e14 100644 --- a/torch/backends/cudnn/__init__.py +++ b/torch/backends/cudnn/__init__.py @@ -15,6 +15,8 @@ from torch.backends import ( @@ -53,7 +53,7 @@ index 9c155de7c04..a4106ea32cc 100644 try: from torch._C import _cudnn -@@ -212,7 +214,6 @@ class CudnnModule(PropModule): +@@ -215,7 +217,6 @@ class CudnnModule(PropModule): torch._C._get_cudnn_allow_tf32, torch._C._set_cudnn_allow_tf32 ) conv = _FP32Precision("cuda", "conv") @@ -62,7 +62,7 @@ index 9c155de7c04..a4106ea32cc 100644 _get_fp32_precision_getter("cuda", "all"), _set_fp32_precision_setter("cuda", "all"), diff --git a/torch/backends/cudnn/rnn.py b/torch/backends/cudnn/rnn.py -index 5b253e19054..8f6a3964fab 100644 +index 0dc9ca80aa6..9281234ae3e 100644 --- a/torch/backends/cudnn/rnn.py +++ b/torch/backends/cudnn/rnn.py @@ -1,5 +1,13 @@ @@ -79,8 +79,8 @@ index 5b253e19054..8f6a3964fab 100644 try: -@@ -20,7 +28,7 @@ def get_cudnn_mode(mode): - elif mode == "GRU": +@@ -24,7 +32,7 @@ def get_cudnn_mode(mode): + # pyrefly: ignore [missing-attribute] return int(_cudnn.RNNMode.gru) else: - raise Exception(f"Unknown mode: {mode}") # noqa: TRY002 @@ -88,7 +88,7 @@ index 5b253e19054..8f6a3964fab 100644 # NB: We don't actually need this class anymore (in fact, we could serialize the -@@ -42,6 +50,20 @@ class Unserializable: +@@ -46,6 +54,20 @@ class Unserializable: self.inner = None @@ -109,7 +109,7 @@ index 5b253e19054..8f6a3964fab 100644 def init_dropout_state(dropout, train, dropout_seed, dropout_state): dropout_desc_name = "desc_" + str(torch.cuda.current_device()) dropout_p = dropout if train else 0 -@@ -62,3 +84,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state): +@@ -67,3 +89,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state): ) dropout_ts = dropout_state[dropout_desc_name].get() return dropout_ts diff --git a/recipe/patches/0018-Use-Intel-LLVM-openmp.patch b/recipe/patches/0015-Use-Intel-LLVM-openmp.patch similarity index 88% rename from recipe/patches/0018-Use-Intel-LLVM-openmp.patch rename to recipe/patches/0015-Use-Intel-LLVM-openmp.patch index 624a9764e..40c203f86 100644 --- a/recipe/patches/0018-Use-Intel-LLVM-openmp.patch +++ b/recipe/patches/0015-Use-Intel-LLVM-openmp.patch @@ -1,7 +1,7 @@ -From ffbdd8bb8fa38397ad6d7958a8c4ddd80ff738eb Mon Sep 17 00:00:00 2001 +From 6161984bbdb6a3ca0ebbc5647b50c5d4db4c03f5 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 31 Jul 2025 12:34:30 -0500 -Subject: [PATCH] Use Intel/LLVM openmp +Subject: [PATCH 15/15] Use Intel/LLVM openmp This make MSVC use __kmp_* symbols available in both libiomp.dll and libiomp5md.dll instead of vcomp* symbols which are only in @@ -11,7 +11,7 @@ vcomp140.dll and libiomp5md.dll, but are missing in libiomp.dll 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake -index 8a9abff398..336a9a927a 100644 +index 8a9abff3982..336a9a927a9 100644 --- a/cmake/Modules/FindOpenMP.cmake +++ b/cmake/Modules/FindOpenMP.cmake @@ -122,9 +122,9 @@ function(_OPENMP_FLAG_CANDIDATES LANG) @@ -26,6 +26,3 @@ index 8a9abff398..336a9a927a 100644 endif() set(OMP_FLAG_PathScale "-openmp") set(OMP_FLAG_NAG "-openmp") --- -2.45.2 - diff --git a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch index fe411d716..c79c711db 100644 --- a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch +++ b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch @@ -3,10 +3,6 @@ From: "H. Vetinari" Date: Sun, 2 Feb 2025 08:54:01 +1100 Subject: [PATCH] switch away from find_package(CUDA) ---- - tensorpipe/CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - diff --git a/third_party/tensorpipe/tensorpipe/CMakeLists.txt b/third_party/tensorpipe/tensorpipe/CMakeLists.txt index efcffc2..1c3b2ca 100644 --- a/third_party/tensorpipe/tensorpipe/CMakeLists.txt