diff --git a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
index 128dd0f14..50a508ca3 100644
--- a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
+++ b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
rename to .ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
index 96be2927b..aa311390d 100644
--- a/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
+++ b/.ci_support/linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
rename to .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
index 1134e6b03..673574c66 100644
--- a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
+++ b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
rename to .ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
index 7ea16ec20..e413beb1a 100644
--- a/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
+++ b/.ci_support/linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
rename to .ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
index 693408283..1c9769b94 100644
--- a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse.yaml
+++ b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
similarity index 96%
rename from .ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
rename to .ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
index 1721a78b1..1c7ea27a5 100644
--- a/.ci_support/linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
+++ b/.ci_support/linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-openstack-gpu-2xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
similarity index 95%
rename from .ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml
rename to .ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
index 2ff370171..0e3c6fc57 100644
--- a/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse.yaml
+++ b/.ci_support/osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '11.0'
 MACOSX_SDK_VERSION:
-- '13.3'
+- '14.5'
 blas_impl:
 - generic
 c_compiler:
@@ -15,7 +15,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-macos-m4-large
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
similarity index 95%
rename from .ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml
rename to .ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
index e08cae1bb..a79826c93 100644
--- a/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse.yaml
+++ b/.ci_support/osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '11.0'
 MACOSX_SDK_VERSION:
-- '13.3'
+- '14.5'
 blas_impl:
 - mkl
 c_compiler:
@@ -15,7 +15,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-macos-m4-large
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml b/.ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
similarity index 95%
rename from .ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml
rename to .ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
index 9815c8bd2..6c92bb710 100644
--- a/.ci_support/osx_arm64_channel_targetsconda-forge_mainis_rcFalse.yaml
+++ b/.ci_support/osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '11.0'
 MACOSX_SDK_VERSION:
-- '13.3'
+- '14.5'
 blas_impl:
 - generic
 c_compiler:
@@ -15,7 +15,7 @@ c_stdlib_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -29,7 +29,7 @@ fmt:
 github_actions_labels:
 - cirun-macos-m4-large
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libblas:
diff --git a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml
similarity index 95%
rename from .ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml
rename to .ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml
index 61349d925..e617d2fcb 100644
--- a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse.yaml
+++ b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue.yaml
@@ -7,7 +7,7 @@ c_stdlib:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -21,7 +21,7 @@ fmt:
 github_actions_labels:
 - cirun-azure-windows-4xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libmagma_sparse:
diff --git a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
similarity index 95%
rename from .ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
rename to .ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
index 8f4871b4d..b3a0d13a9 100644
--- a/.ci_support/win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse.yaml
+++ b/.ci_support/win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue.yaml
@@ -7,7 +7,7 @@ c_stdlib:
 channel_sources:
 - conda-forge
 channel_targets:
-- conda-forge main
+- conda-forge pytorch_rc
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
@@ -21,7 +21,7 @@ fmt:
 github_actions_labels:
 - cirun-azure-windows-4xlarge
 is_rc:
-- 'False'
+- 'True'
 libabseil:
 - '20250512'
 libmagma_sparse:
diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml
index d89b5d080..d3fce2674 100644
--- a/.github/workflows/conda-build.yml
+++ b/.github/workflows/conda-build.yml
@@ -21,73 +21,73 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse
-            SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h71a96c7d
+          - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue
+            SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_hc637dd34
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h71a96c7d', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_hc637dd34', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse
-            SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h17c608a0
+          - CONFIG: linux_64_blas_implgenericchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue
+            SHORT_CONFIG: linux_64_blas_implgenericchannel_targets_h7548ab25
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h17c608a0', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implgenericchannel_targets_h7548ab25', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse
-            SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h5b18f8bc
+          - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue
+            SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h2dde49c7
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h5b18f8bc', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h2dde49c7', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse
-            SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h38f93959
+          - CONFIG: linux_64_blas_implmklchannel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue
+            SHORT_CONFIG: linux_64_blas_implmklchannel_targetscond_h0aaf175a
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h38f93959', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_64_blas_implmklchannel_targetscond_h0aaf175a', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_aarch64_channel_targetsconda-forge_maincuda_compiler_version12.9is_rcFalse
-            SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_heefc8d83
+          - CONFIG: linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.9is_rcTrue
+            SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_h562a5d25
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_heefc8d83', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_h562a5d25', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: linux_aarch64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse
-            SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_h56c2c839
+          - CONFIG: linux_aarch64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue
+            SHORT_CONFIG: linux_aarch64_channel_targetsconda-forge_he6bea9a8
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_h56c2c839', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-gpu-2xlarge--${{ github.run_id }}-linux_aarch64_channel_targetsconda-forge_he6bea9a8', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             CONDA_FORGE_DOCKER_RUN_ARGS: "--gpus all"
-          - CONFIG: osx_64_blas_implgenericchannel_targetsconda-forge_mainis_rcFalse
-            SHORT_CONFIG: osx_64_blas_implgenericchannel_targetsco_h709642d7
+          - CONFIG: osx_64_blas_implgenericchannel_targetsconda-forge_pytorch_rcis_rcTrue
+            SHORT_CONFIG: osx_64_blas_implgenericchannel_targetsco_h36e27ba7
             UPLOAD_PACKAGES: True
             os: macos
-            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implgenericchannel_targetsco_h709642d7', 'macOS', 'arm64', 'self-hosted']
-          - CONFIG: osx_64_blas_implmklchannel_targetsconda-forge_mainis_rcFalse
-            SHORT_CONFIG: osx_64_blas_implmklchannel_targetsconda-_h747b3c68
+            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implgenericchannel_targetsco_h36e27ba7', 'macOS', 'arm64', 'self-hosted']
+          - CONFIG: osx_64_blas_implmklchannel_targetsconda-forge_pytorch_rcis_rcTrue
+            SHORT_CONFIG: osx_64_blas_implmklchannel_targetsconda-_hae519702
             UPLOAD_PACKAGES: True
             os: macos
-            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implmklchannel_targetsconda-_h747b3c68', 'macOS', 'arm64', 'self-hosted']
-          - CONFIG: osx_arm64_channel_targetsconda-forge_mainis_rcFalse
-            SHORT_CONFIG: osx_arm64_channel_targetsconda-forge_mai_h5f57e26b
+            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_64_blas_implmklchannel_targetsconda-_hae519702', 'macOS', 'arm64', 'self-hosted']
+          - CONFIG: osx_arm64_channel_targetsconda-forge_pytorch_rcis_rcTrue
+            SHORT_CONFIG: osx_arm64_channel_targetsconda-forge_pyt_h292c857d
             UPLOAD_PACKAGES: True
             os: macos
-            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_arm64_channel_targetsconda-forge_mai_h5f57e26b', 'macOS', 'arm64', 'self-hosted']
-          - CONFIG: win_64_channel_targetsconda-forge_maincuda_compiler_version12.8is_rcFalse
-            SHORT_CONFIG: win_64_channel_targetsconda-forge_maincu_hca575dce
+            runs_on: ['cirun-macos-m4-large--${{ github.run_id }}-osx_arm64_channel_targetsconda-forge_pyt_h292c857d', 'macOS', 'arm64', 'self-hosted']
+          - CONFIG: win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_version12.8is_rcTrue
+            SHORT_CONFIG: win_64_channel_targetsconda-forge_pytorc_h650359c2
             UPLOAD_PACKAGES: True
             os: windows
-            runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_hca575dce', 'windows', 'x64', 'self-hosted']
-          - CONFIG: win_64_channel_targetsconda-forge_maincuda_compiler_versionNoneis_rcFalse
-            SHORT_CONFIG: win_64_channel_targetsconda-forge_maincu_hc68ac914
+            runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_pytorc_h650359c2', 'windows', 'x64', 'self-hosted']
+          - CONFIG: win_64_channel_targetsconda-forge_pytorch_rccuda_compiler_versionNoneis_rcTrue
+            SHORT_CONFIG: win_64_channel_targetsconda-forge_pytorc_hffeaf219
             UPLOAD_PACKAGES: True
             os: windows
-            runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_maincu_hc68ac914', 'windows', 'x64', 'self-hosted']
+            runs_on: ['cirun-azure-windows-4xlarge--${{ github.run_id }}-win_64_channel_targetsconda-forge_pytorc_hffeaf219', 'windows', 'x64', 'self-hosted']
     steps:
 
     - name: Checkout code
diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh
index 4bba44792..bac7141a9 100755
--- a/.scripts/run_osx_build.sh
+++ b/.scripts/run_osx_build.sh
@@ -63,8 +63,24 @@ if [[ "${sha:-}" == "" ]]; then
   sha=$(git rev-parse HEAD)
 fi
 
-# HACK
-cp .scripts/download_osx_sdk.sh $CONDA_PREFIX/bin/
+if [[ "${OSX_SDK_DIR:-}" == "" ]]; then
+  if [[ "${CI:-}" == "" ]]; then
+    echo "Please set OSX_SDK_DIR to a directory where SDKs can be downloaded to. Aborting"
+    exit 1
+  else
+    export OSX_SDK_DIR=/opt/conda-sdks
+    /usr/bin/sudo mkdir -p "${OSX_SDK_DIR}"
+    /usr/bin/sudo chown "${USER}" "${OSX_SDK_DIR}"
+  fi
+else
+  if tmpf=$(mktemp -p "$OSX_SDK_DIR" tmp.XXXXXXXX 2>/dev/null); then
+      rm -f "$tmpf"
+      echo "OSX_SDK_DIR is writeable without sudo, continuing"
+  else
+      echo "User-provided OSX_SDK_DIR is not writeable for current user! Aborting"
+      exit 1
+  fi
+fi
 
 echo -e "\n\nRunning the build setup script."
 source run_conda_forge_build_setup
diff --git a/README.md b/README.md
index 84e94ab74..7095e810c 100644
--- a/README.md
+++ b/README.md
@@ -40,14 +40,14 @@ Current release info
 Installing pytorch-cpu
 ======================
 
-Installing `pytorch-cpu` from the `conda-forge` channel can be achieved by adding `conda-forge` to your channels with:
+Installing `pytorch-cpu` from the `conda-forge/label/pytorch_rc` channel can be achieved by adding `conda-forge/label/pytorch_rc` to your channels with:
 
 ```
-conda config --add channels conda-forge
+conda config --add channels conda-forge/label/pytorch_rc
 conda config --set channel_priority strict
 ```
 
-Once the `conda-forge` channel has been enabled, `libtorch, pytorch, pytorch-cpu, pytorch-gpu, pytorch-tests` can be installed with `conda`:
+Once the `conda-forge/label/pytorch_rc` channel has been enabled, `libtorch, pytorch, pytorch-cpu, pytorch-gpu, pytorch-tests` can be installed with `conda`:
 
 ```
 conda install libtorch pytorch pytorch-cpu pytorch-gpu pytorch-tests
@@ -62,26 +62,26 @@ mamba install libtorch pytorch pytorch-cpu pytorch-gpu pytorch-tests
 It is possible to list all of the versions of `libtorch` available on your platform with `conda`:
 
 ```
-conda search libtorch --channel conda-forge
+conda search libtorch --channel conda-forge/label/pytorch_rc
 ```
 
 or with `mamba`:
 
 ```
-mamba search libtorch --channel conda-forge
+mamba search libtorch --channel conda-forge/label/pytorch_rc
 ```
 
 Alternatively, `mamba repoquery` may provide more information:
 
 ```
 # Search all versions available on your platform:
-mamba repoquery search libtorch --channel conda-forge
+mamba repoquery search libtorch --channel conda-forge/label/pytorch_rc
 
 # List packages depending on `libtorch`:
-mamba repoquery whoneeds libtorch --channel conda-forge
+mamba repoquery whoneeds libtorch --channel conda-forge/label/pytorch_rc
 
 # List dependencies of `libtorch`:
-mamba repoquery depends libtorch --channel conda-forge
+mamba repoquery depends libtorch --channel conda-forge/label/pytorch_rc
 ```
 
 
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 75d8bfde4..1ce6c8f1f 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -7,7 +7,7 @@ cxx_compiler_version:   # [aarch64]
   - 13                  # [aarch64]
 
 MACOSX_SDK_VERSION:         # [osx]
-  - 13.3                    # [osx]
+  - 14.5                    # [osx]
 
 channel_targets:
   - conda-forge main
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 5e6672850..0767a1287 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,6 +1,6 @@
 # if you wish to build release candidate number X, append the version string with ".rcX"
-{% set version = "2.9.1" %}
-{% set build = 1 %}
+{% set version = "2.10.0-rc3" %}
+{% set build = 0 %}
 
 # Use a higher build number for the CUDA variant, to ensure that it's
 # preferred by conda's solver, and it's preferentially
@@ -24,14 +24,8 @@ package:
 
 source:
 {% if "rc" in version %}
-  # - git_url: https://github.com/pytorch/pytorch.git
-  #   git_rev: v{{ version.replace(".rc", "-rc") }}
-  # we cannot apply patches to submodules when checking out with git_url, because
-  # then conda switches the patch-application to use git, which cannot construct
-  # a usable ancestor from outside the submodule; the only option then is to
-  # pull in the submodules separately.
-  - url: https://github.com/pytorch/pytorch/archive/refs/tags/v{{ version }}.tar.gz
-    sha256: 04ae0a8babdc9cb9dfc4f8746b2b8aa0f8ed0f9e92835cc4af0bcb01e3969e51
+  - url: https://download.pytorch.org/source_code/test/pytorch-v{{ version }}.tar.gz
+    sha256: 3200721908010568b3715332a989948ea731242681935220beef090ae608aef2
 {% else %}
   # The "pytorch-v" tarballs contain submodules; the "pytorch-" ones don't.
   - url: https://github.com/pytorch/pytorch/releases/download/v{{ version }}/pytorch-v{{ version }}.tar.gz
@@ -39,30 +33,27 @@ source:
 {% endif %}
     patches:
       - patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
-      - patches/0002-Fix-duplicate-linker-script.patch  # [cuda_compiler_version != "None" and aarch64]
       # conda-specific patch, lets us override CUDA paths
-      - patches/0003-Allow-overriding-CUDA-related-paths.patch
+      - patches/0002-Allow-overriding-CUDA-related-paths.patch
       # fix BLAS calling convention for openblas
-      - patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
-      - patches/0005-Fix-FindOpenBLAS.patch
+      - patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+      - patches/0004-Fix-FindOpenBLAS.patch
       # point to headers that are now living in $PREFIX/include instead of $SP_DIR/torch/include
-      - patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch
-      - patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch
-      - patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
-      - patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch                       # [win]
-      - patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
-      # backport https://github.com/pytorch/pytorch/pull/148668
-      - patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch
+      - patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch
+      - patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch
+      - patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+      - patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch                       # [win]
+      - patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
       # skip a test that fails with numpy v2.3; still triggers as of pytorch v2.9
-      - patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
+      - patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
       # backport https://github.com/pytorch/pytorch/pull/127702
-      - patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
+      - patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
       # backport https://github.com/pytorch/pytorch/pull/166824
-      - patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
-      - patches/0015-Fix-building-kineto-against-system-fmt.patch
+      - patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
+      - patches/0013-Fix-building-kineto-against-system-fmt.patch
       # backport https://github.com/pytorch/pytorch/pull/159828
-      - patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
-      - patches/0018-Use-Intel-LLVM-openmp.patch
+      - patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
+      - patches/0015-Use-Intel-LLVM-openmp.patch
       - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
 
 build:
@@ -162,6 +153,7 @@ requirements:
     - eigen
     - zlib
     - fmt
+    - packaging
   run:
     - libblas * *{{ blas_impl }}        # [blas_impl == "mkl"]
   run_constrained:
@@ -304,6 +296,7 @@ outputs:
         - typing_extensions
         - zlib
         - fmt
+        - packaging
       run:
         - {{ pin_subpackage('libtorch', exact=True) }}
         - llvm-openmp
@@ -446,6 +439,8 @@ outputs:
         {% set skips = skips ~ " or test_addbmm or test_baddbmm or test_bmm" %}         # [aarch64]
         # doesn't crash, but gets different result on aarch + CUDA
         {% set skips = skips ~ " or illcondition_matrix_input_should_not_crash_cpu" %}  # [aarch64 and cuda_compiler_version != "None"]
+        # minor inaccuracy on aarch64 (emulation?)
+        {% set skips = skips ~ " or (TestNN and test_upsampling_bfloat16)" %}           # [aarch64]
         # may crash spuriously
         {% set skips = skips ~ " or (TestAutograd and test_profiler_seq_nr)" %}
         {% set skips = skips ~ " or (TestAutograd and test_profiler_propagation)" %}
@@ -505,6 +500,11 @@ outputs:
         {% set skips = skips ~ " or test_forward_nn_Bilinear_mps_float16" %}            # [osx and arm64]
         # "quantized engine NoQEngine is not supported"
         {% set skips = skips ~ " or test_qengine" %}                                    # [osx and arm64]
+        # some warning-related failure, maybe it's broken by --disable-warnings?
+        {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cpu" %}
+        {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cuda" %}
+        # "Attempt to trace generator"
+        {% set skips = skips ~ " or test_lite_regional_compile_flex_attention_cuda" %}
 
         # the whole test suite takes forever, but we should get a good enough coverage
         # for potential packaging problems by running a fixed subset
diff --git a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
index 0b209063f..e62791521 100644
--- a/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
+++ b/recipe/patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -1,17 +1,17 @@
-From 7eeddc8b77fbcb44ce4c5d97c4962efc242a3f75 Mon Sep 17 00:00:00 2001
+From e639e26774c2de4a0a51013e386152cb084f4f19 Mon Sep 17 00:00:00 2001
 From: Mark Harfouche <mark.harfouche@gmail.com>
 Date: Sun, 1 Sep 2024 17:35:40 -0400
-Subject: [PATCH 01/13] Force usage of python 3 and error without numpy
+Subject: [PATCH 01/15] Force usage of python 3 and error without numpy
 
 ---
  cmake/Dependencies.cmake | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index ef5c2fd4e97..72d9e298dcc 100644
+index 903c212de81..ecf8669649b 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -804,9 +804,9 @@ if(BUILD_PYTHON)
+@@ -818,9 +818,9 @@ if(BUILD_PYTHON)
    if(USE_NUMPY)
      list(APPEND PYTHON_COMPONENTS NumPy)
    endif()
@@ -23,7 +23,7 @@ index ef5c2fd4e97..72d9e298dcc 100644
  endif()
  
  if(NOT Python_Interpreter_FOUND)
-@@ -823,7 +823,7 @@ if(BUILD_PYTHON)
+@@ -837,7 +837,7 @@ if(BUILD_PYTHON)
    if(Python_Development.Module_FOUND)
      if(USE_NUMPY)
        if(NOT Python_NumPy_FOUND)
diff --git a/recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch b/recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch
similarity index 89%
rename from recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch
rename to recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch
index 2fbe631ff..43af98d46 100644
--- a/recipe/patches/0003-Allow-overriding-CUDA-related-paths.patch
+++ b/recipe/patches/0002-Allow-overriding-CUDA-related-paths.patch
@@ -1,7 +1,7 @@
-From afa9e9bc6e2f6db7af3b3a46e608bc8d2833f18d Mon Sep 17 00:00:00 2001
+From 04f3324d69b41ff7336747d4b51b098541a6b4f3 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
 Date: Wed, 27 Nov 2024 13:47:23 +0100
-Subject: [PATCH 03/13] Allow overriding CUDA-related paths
+Subject: [PATCH 02/15] Allow overriding CUDA-related paths
 
 ---
  cmake/Modules/FindCUDAToolkit.cmake | 2 +-
@@ -22,10 +22,10 @@ index ec9ae530aa6..b7c0bd9fc51 100644
    set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}")
    set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}")
 diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
-index 02ab011dd48..447628cc4c1 100644
+index 88f0fe5d309..753da8f435d 100644
 --- a/tools/setup_helpers/cmake.py
 +++ b/tools/setup_helpers/cmake.py
-@@ -314,7 +314,7 @@ class CMake:
+@@ -315,7 +315,7 @@ class CMake:
              true_var = additional_options.get(var)
              if true_var is not None:
                  build_options[true_var] = val
diff --git a/recipe/patches/0002-Fix-duplicate-linker-script.patch b/recipe/patches/0002-Fix-duplicate-linker-script.patch
deleted file mode 100644
index d4adbe562..000000000
--- a/recipe/patches/0002-Fix-duplicate-linker-script.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 532682e899bed8cb97b922e0d1ff92664100e521 Mon Sep 17 00:00:00 2001
-From: Jeongseok Lee <jeongseok@meta.com>
-Date: Sun, 3 Nov 2024 01:12:36 -0700
-Subject: [PATCH 02/13] Fix duplicate linker script
-
----
- setup.py | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index 11ca48482a7..1c5b75897df 100644
---- a/setup.py
-+++ b/setup.py
-@@ -1632,7 +1632,9 @@ def main() -> None:
-             filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
-         )
-         linker_script_path = os.path.abspath("cmake/linker_script.ld")
--        os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
-+        ldflags = os.getenv("LDFLAGS", "")
-+        if f"-T{linker_script_path}" not in ldflags:
-+            os.environ["LDFLAGS"] = ldflags + f" -T{linker_script_path}"
-         os.environ["CFLAGS"] = (
-             os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
-         )
diff --git a/recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch b/recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
similarity index 86%
rename from recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
rename to recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
index c3a933b6f..02b24ff2e 100644
--- a/recipe/patches/0004-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
+++ b/recipe/patches/0003-Use-BLAS_USE_CBLAS_DOT-for-OpenBLAS-builds.patch
@@ -1,7 +1,7 @@
-From 6af9801059f087440b2a756884d6f1fe3d91d865 Mon Sep 17 00:00:00 2001
+From edde9415778a7ceafd0eadc1b49ca216a175f607 Mon Sep 17 00:00:00 2001
 From: Isuru Fernando <ifernando@quansight.com>
 Date: Wed, 18 Dec 2024 03:59:00 +0000
-Subject: [PATCH 04/13] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
+Subject: [PATCH 03/15] Use BLAS_USE_CBLAS_DOT for OpenBLAS builds
 
 There are two calling conventions for *dotu functions
 
@@ -31,10 +31,10 @@ functional calls.
  1 file changed, 2 insertions(+)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 72d9e298dcc..f7e56828bdf 100644
+index ecf8669649b..e8d8bc58096 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -186,6 +186,7 @@ elseif(BLAS STREQUAL "OpenBLAS")
+@@ -194,6 +194,7 @@ elseif(BLAS STREQUAL "OpenBLAS")
    set(BLAS_FOUND 1)
    set(BLAS_LIBRARIES ${OpenBLAS_LIB})
    set(BLAS_CHECK_F2C 1)
@@ -42,7 +42,7 @@ index 72d9e298dcc..f7e56828bdf 100644
  elseif(BLAS STREQUAL "BLIS")
    find_package(BLIS REQUIRED)
    include_directories(SYSTEM ${BLIS_INCLUDE_DIR})
-@@ -209,6 +210,7 @@ elseif(BLAS STREQUAL "MKL")
+@@ -217,6 +218,7 @@ elseif(BLAS STREQUAL "MKL")
      set(BLAS_INFO "mkl")
      set(BLAS_FOUND 1)
      set(BLAS_LIBRARIES ${MKL_LIBRARIES})
diff --git a/recipe/patches/0005-Fix-FindOpenBLAS.patch b/recipe/patches/0004-Fix-FindOpenBLAS.patch
similarity index 78%
rename from recipe/patches/0005-Fix-FindOpenBLAS.patch
rename to recipe/patches/0004-Fix-FindOpenBLAS.patch
index b8e4c8ae8..47e1229e8 100644
--- a/recipe/patches/0005-Fix-FindOpenBLAS.patch
+++ b/recipe/patches/0004-Fix-FindOpenBLAS.patch
@@ -1,22 +1,22 @@
-From 4487751a213664c3d62023da331007c712c60f79 Mon Sep 17 00:00:00 2001
+From 4a9c995a1b907f2b15cf3179b3ccdfee398c0c06 Mon Sep 17 00:00:00 2001
 From: Bas Zalmstra <bas@prefix.dev>
 Date: Thu, 16 May 2024 10:46:49 +0200
-Subject: [PATCH 05/13] Fix FindOpenBLAS
+Subject: [PATCH 04/15] Fix FindOpenBLAS
 
 ---
  cmake/Modules/FindOpenBLAS.cmake | 15 +++++++++------
  1 file changed, 9 insertions(+), 6 deletions(-)
 
 diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
-index 9ba86ba1ee0..19e1d508aa5 100644
+index 21ae9e2521e..b4aeeef4012 100644
 --- a/cmake/Modules/FindOpenBLAS.cmake
 +++ b/cmake/Modules/FindOpenBLAS.cmake
-@@ -31,22 +31,25 @@ SET(Open_BLAS_LIB_SEARCH_PATHS
-         $ENV{OpenBLAS_HOME}/lib
-  )
+@@ -36,22 +36,25 @@ IF(DEFINED ENV{OpenBLAS_LIB_NAME})
+   SET(Open_BLAS_LIB_NAME $ENV{OpenBLAS_LIB_NAME})
+ ENDIF()
  
 -FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS})
--FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
+-FIND_LIBRARY(OpenBLAS_LIB NAMES ${Open_BLAS_LIB_NAME} PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
 -
  SET(OpenBLAS_FOUND ON)
  
diff --git a/recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch b/recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch
similarity index 51%
rename from recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch
rename to recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch
index 2e7b475eb..2d1e0957b 100644
--- a/recipe/patches/0006-point-include-paths-to-Conda-prefix-include-dir.patch
+++ b/recipe/patches/0005-point-include-paths-to-Conda-prefix-include-dir.patch
@@ -1,31 +1,36 @@
-From b1d699a5efd5659ab7e1d0dd3a203d432e63ffb7 Mon Sep 17 00:00:00 2001
+From ed5cdb12101ce038ff6f4d07d26e8fd02b044925 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 06/13] point include paths to Conda prefix include dir
+Subject: [PATCH 05/15] point include paths to Conda prefix include dir
 
 Updated to use `sysconfig.get_config_vars("prefix")` per
 https://github.com/conda-forge/pytorch-cpu-feedstock/issues/424
 and https://github.com/conda-forge/pytorch-cpu-feedstock/issues/447.
 ---
- torch/utils/cpp_extension.py | 5 +++++
- 1 file changed, 5 insertions(+)
+ torch/utils/cpp_extension.py | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
 
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index 902d2fe6ce0..3701edf1ce4 100644
+index f29c382f0e3..e9557d43ee1 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
-@@ -1506,9 +1506,14 @@ def include_paths(device_type: str = "cpu") -> list[str]:
+@@ -1567,12 +1567,18 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str
+     Returns:
          A list of include path strings.
      """
+-    paths = []
      lib_include = os.path.join(_TORCH_PATH, 'include')
 +    # Account for conda prefix.
 +    conda_pieces = [sysconfig.get_config_var("prefix")] + IS_WINDOWS * ["Library"] + ["include"]
 +    conda_include = os.path.join(*conda_pieces)
-     paths = [
++    paths = [
 +        conda_include,
-         lib_include,
-         # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
-+        os.path.join(conda_include, 'torch', 'csrc', 'api', 'include'),
-         os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
-     ]
++    ]
+     if torch_include_dirs:
+         paths.extend([
+             lib_include,
+             # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
++            os.path.join(conda_include, 'torch', 'csrc', 'api', 'include'),
+             os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
+         ])
      if device_type == "cuda" and IS_HIP_EXTENSION:
diff --git a/recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch b/recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch
similarity index 87%
rename from recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch
rename to recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch
index f377341fc..65db92e67 100644
--- a/recipe/patches/0007-Add-conda-prefix-to-inductor-include-lib-paths.patch
+++ b/recipe/patches/0006-Add-conda-prefix-to-inductor-include-lib-paths.patch
@@ -1,7 +1,7 @@
-From 6c71d8d16faf7c13b786f3f35fef6ab533bad2c1 Mon Sep 17 00:00:00 2001
+From 7958145d5e1a178540033a112f4267b3e54842e1 Mon Sep 17 00:00:00 2001
 From: Daniel Petry <dpetry@anaconda.com>
 Date: Tue, 21 Jan 2025 17:45:23 -0600
-Subject: [PATCH 07/14] Add conda prefix to inductor include & lib paths
+Subject: [PATCH 06/15] Add conda prefix to inductor include & lib paths
 
 Currently inductor doesn't look in conda's includes and libs. This results in
 errors when it tries to compile, if system versions are being used of
@@ -18,10 +18,10 @@ and https://github.com/conda-forge/pytorch-cpu-feedstock/issues/447.
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
-index e2cb445ed10..89905bec464 100644
+index 6a6b7d15ae3..0a4724e5c17 100644
 --- a/torch/_inductor/cpp_builder.py
 +++ b/torch/_inductor/cpp_builder.py
-@@ -1410,10 +1410,12 @@ def get_cpp_torch_options(
+@@ -1520,10 +1520,12 @@ def get_cpp_torch_options(
          + python_include_dirs
          + torch_include_dirs
          + omp_include_dir_paths
diff --git a/recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch b/recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
similarity index 83%
rename from recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
rename to recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
index a2db4fc88..f98c4c330 100644
--- a/recipe/patches/0008-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
+++ b/recipe/patches/0007-make-ATEN_INCLUDE_DIR-relative-to-TORCH_INSTALL_PREF.patch
@@ -1,7 +1,7 @@
-From 7520f1737851d8a812dacfb150399e701fc27769 Mon Sep 17 00:00:00 2001
+From 36cb051937278f82ead2912dd11a0d996f9d3d8c Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 14:15:34 +1100
-Subject: [PATCH 08/13] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
+Subject: [PATCH 07/15] make ATEN_INCLUDE_DIR relative to TORCH_INSTALL_PREFIX
 
 we cannot set CMAKE_INSTALL_PREFIX without the pytorch build complaining, but we can
 use TORCH_INSTALL_PREFIX, which is set correctly relative to our CMake files already:
@@ -11,10 +11,10 @@ https://github.com/pytorch/pytorch/blob/v2.5.1/cmake/TorchConfig.cmake.in#L47
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 6c095680733..d7c7a74a302 100644
+index 6650db50b01..c413b589b5f 100644
 --- a/aten/src/ATen/CMakeLists.txt
 +++ b/aten/src/ATen/CMakeLists.txt
-@@ -793,7 +793,7 @@ if(USE_ROCM)
+@@ -825,7 +825,7 @@ if(USE_ROCM)
    # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
  endif()
  
diff --git a/recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch b/recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
similarity index 91%
rename from recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
rename to recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
index aa4e3a029..6e3b67d7d 100644
--- a/recipe/patches/0009-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
+++ b/recipe/patches/0008-remove-DESTINATION-lib-from-CMake-install-TARGETS-di.patch
@@ -1,7 +1,7 @@
-From 755c05e421afd36dc38f07208f28e6df94d48323 Mon Sep 17 00:00:00 2001
+From d823b46ccfc9cea8c74ce62f612580ba9ad51407 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Tue, 28 Jan 2025 10:58:29 +1100
-Subject: [PATCH 09/13] remove `DESTINATION lib` from CMake `install(TARGETS`
+Subject: [PATCH 08/15] remove `DESTINATION lib` from CMake `install(TARGETS`
  directives
 
 Suggested-By: Silvio Traversaro <silvio@traversaro.it>
@@ -55,10 +55,10 @@ index ef24471dba8..a410383de27 100644
  endif()
  
 diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt
-index 95b9f031c3e..f1ce6d1ad14 100644
+index c2fa65ba35e..3a384395e4a 100644
 --- a/c10/xpu/CMakeLists.txt
 +++ b/c10/xpu/CMakeLists.txt
-@@ -46,7 +46,7 @@ if(NOT BUILD_LIBTORCHLESS)
+@@ -47,7 +47,7 @@ if(NOT BUILD_LIBTORCHLESS)
        $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
        $<INSTALL_INTERFACE:include>
        )
@@ -68,10 +68,10 @@ index 95b9f031c3e..f1ce6d1ad14 100644
    add_subdirectory(test)
  endif()
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 6ab41b6c847..a78ad102aa6 100644
+index 6cbaecc5d2e..c6986007740 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -567,7 +567,7 @@ if(USE_CUDA)
+@@ -575,7 +575,7 @@ if(USE_CUDA)
    endif()
  
    target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
@@ -80,7 +80,7 @@ index 6ab41b6c847..a78ad102aa6 100644
    if(USE_NCCL)
      list(APPEND Caffe2_GPU_SRCS
        ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
-@@ -642,7 +642,7 @@ if(USE_ROCM)
+@@ -656,7 +656,7 @@ if(USE_ROCM)
    target_link_libraries(caffe2_nvrtc hip::amdhip64 hiprtc::hiprtc)
    target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR})
    target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__)
@@ -89,7 +89,7 @@ index 6ab41b6c847..a78ad102aa6 100644
  endif()
  
  if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER)
-@@ -1112,7 +1112,7 @@ elseif(USE_CUDA)
+@@ -1119,7 +1119,7 @@ elseif(USE_CUDA)
            CUDA::culibos ${CMAKE_DL_LIBS})
      endif()
      set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
@@ -98,7 +98,7 @@ index 6ab41b6c847..a78ad102aa6 100644
    endif()
  
    if(USE_PRECOMPILED_HEADERS)
-@@ -1590,17 +1590,17 @@ endif()
+@@ -1594,17 +1594,17 @@ endif()
  
  caffe2_interface_library(torch torch_library)
  
@@ -121,7 +121,7 @@ index 6ab41b6c847..a78ad102aa6 100644
  
  target_link_libraries(torch PUBLIC torch_cpu_library)
  
-@@ -1743,7 +1743,7 @@ if(BUILD_SHARED_LIBS)
+@@ -1747,7 +1747,7 @@ if(BUILD_SHARED_LIBS)
        target_link_libraries(torch_global_deps torch::nvtoolsext)
      endif()
    endif()
@@ -131,10 +131,10 @@ index 6ab41b6c847..a78ad102aa6 100644
  
  # ---[ Caffe2 HIP sources.
 diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 1632147f022..ca8a193adb5 100644
+index 3a3ca0f1236..0e2c682fd97 100644
 --- a/torch/CMakeLists.txt
 +++ b/torch/CMakeLists.txt
-@@ -468,7 +468,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
+@@ -466,7 +466,7 @@ if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
      set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
  endif()
  
diff --git a/recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch b/recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
similarity index 90%
rename from recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
rename to recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
index 381edb708..eea801850 100644
--- a/recipe/patches/0010-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
+++ b/recipe/patches/0009-avoid-deprecated-find_package-CUDA-in-caffe2-CMake-m.patch
@@ -1,24 +1,25 @@
-From 0ee779ff0a9573eeda2b67cf32ea7061d13dd75c Mon Sep 17 00:00:00 2001
+From 2a6d0bdcc31eee3a7a327ba8aea08b34a36279ee Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 30 Jan 2025 08:33:44 +1100
-Subject: [PATCH 10/13] avoid deprecated `find_package(CUDA)` in caffe2 CMake
+Subject: [PATCH 09/15] avoid deprecated `find_package(CUDA)` in caffe2 CMake
  metadata
 
 vendor the not-available-anymore function torch_cuda_get_nvcc_gencode_flag from CMake
 ---
  caffe2/CMakeLists.txt      |  10 +--
+ cmake/Dependencies.cmake   |   2 +-
  cmake/Summary.cmake        |  10 +--
  cmake/TorchConfig.cmake.in |   2 +-
  cmake/public/cuda.cmake    |  48 ++++----------
  cmake/public/utils.cmake   | 127 +++++++++++++++++++++++++++++++++++++
  setup.py                   |   2 +-
- 6 files changed, 151 insertions(+), 48 deletions(-)
+ 7 files changed, 152 insertions(+), 49 deletions(-)
 
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index a78ad102aa6..fb89e8b2e3c 100644
+index c6986007740..0c54f830820 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -953,25 +953,25 @@ if(USE_ROCM)
+@@ -958,25 +958,25 @@ if(USE_ROCM)
          "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
    endif()
  elseif(USE_CUDA)
@@ -49,11 +50,24 @@ index a78ad102aa6..fb89e8b2e3c 100644
    torch_compile_options(torch_cuda)  # see cmake/public/utils.cmake
    target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
  
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index e8d8bc58096..d2168da264b 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -968,7 +968,7 @@ endif()
+ 
+ # ---[ nvtx
+ if(USE_SYSTEM_NVTX)
+-  find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDA_INCLUDE_DIRS})
++  find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDAToolkit_INCLUDE_DIRS})
+ else()
+   find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
+ endif()
 diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
-index 745d9ea0586..993892c6d80 100644
+index 60951d6c686..b6e2ce888dc 100644
 --- a/cmake/Summary.cmake
 +++ b/cmake/Summary.cmake
-@@ -75,7 +75,7 @@ function(caffe2_print_configuration_summary)
+@@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary)
      message(STATUS "    USE_CUSPARSELT      : ${USE_CUSPARSELT}")
      message(STATUS "    USE_CUDSS           : ${USE_CUDSS}")
      message(STATUS "    USE_CUFILE          : ${USE_CUFILE}")
@@ -62,7 +76,7 @@ index 745d9ea0586..993892c6d80 100644
      message(STATUS "    USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
      message(STATUS "    USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
      if(${USE_CUDNN})
-@@ -87,7 +87,7 @@ function(caffe2_print_configuration_summary)
+@@ -88,7 +88,7 @@ function(caffe2_print_configuration_summary)
      if(${USE_CUFILE})
        message(STATUS "    cufile library    : ${CUDA_cuFile_LIBRARY}")
      endif()
@@ -71,7 +85,7 @@ index 745d9ea0586..993892c6d80 100644
      message(STATUS "    CUDA library        : ${CUDA_cuda_driver_LIBRARY}")
      message(STATUS "    cudart library      : ${CUDA_cudart_LIBRARY}")
      message(STATUS "    cublas library      : ${CUDA_cublas_LIBRARY}")
-@@ -107,12 +107,12 @@ function(caffe2_print_configuration_summary)
+@@ -108,12 +108,12 @@ function(caffe2_print_configuration_summary)
        message(STATUS "    cuDSS library       : ${__tmp}")
      endif()
      message(STATUS "    nvrtc               : ${CUDA_nvrtc_LIBRARY}")
@@ -101,7 +115,7 @@ index 0b32ffa99ce..471b7154ed0 100644
    if(TARGET torch::nvtoolsext)
      list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
 diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
-index 218c50a69c6..8ee4e2fe98b 100644
+index bc8855d23e6..b5c809daf20 100644
 --- a/cmake/public/cuda.cmake
 +++ b/cmake/public/cuda.cmake
 @@ -26,8 +26,8 @@ if(NOT MSVC)
@@ -112,10 +126,10 @@ index 218c50a69c6..8ee4e2fe98b 100644
 -if(NOT CUDA_FOUND)
 +find_package(CUDAToolkit)
 +if(NOT CUDAToolkit_FOUND)
-   message(WARNING
-     "PyTorch: CUDA cannot be found. Depending on whether you are building "
-     "PyTorch or a PyTorch dependent library, the next warning / error will "
-@@ -36,8 +36,6 @@ if(NOT CUDA_FOUND)
+   # If user explicitly set USE_CUDA=1, error out instead of falling back
+   if(_USE_CUDA_EXPLICITLY_SET AND USE_CUDA)
+     message(FATAL_ERROR
+@@ -45,8 +45,6 @@ if(NOT CUDA_FOUND)
    return()
  endif()
  
@@ -124,7 +138,7 @@ index 218c50a69c6..8ee4e2fe98b 100644
  # Pass clang as host compiler, which according to the docs
  # Must be done before CUDA language is enabled, see
  # https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
-@@ -56,24 +54,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
+@@ -65,24 +63,18 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
    cmake_policy(SET CMP0074 NEW)
  endif()
  
@@ -155,7 +169,7 @@ index 218c50a69c6..8ee4e2fe98b 100644
    # Sometimes, we may mismatch nvcc with the CUDA headers we are
    # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
    # but the PATH is not consistent with CUDA_HOME.  It's better safe
-@@ -97,8 +89,8 @@ if(CUDA_FOUND)
+@@ -106,8 +98,8 @@ if(CUDA_FOUND)
      )
    if(NOT CMAKE_CROSSCOMPILING)
      try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
@@ -166,7 +180,7 @@ index 218c50a69c6..8ee4e2fe98b 100644
        RUN_OUTPUT_VARIABLE cuda_version_from_header
        COMPILE_OUTPUT_VARIABLE output_var
        )
-@@ -106,30 +98,14 @@ if(CUDA_FOUND)
+@@ -115,30 +107,14 @@ if(CUDA_FOUND)
        message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var})
      endif()
      message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header})
@@ -200,7 +214,7 @@ index 218c50a69c6..8ee4e2fe98b 100644
    execute_process(
      COMMAND Python::Interpreter -c
 diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
-index 68e66bb3fc3..e02a4abf8cc 100644
+index 3cdf5fb914b..729041636eb 100644
 --- a/cmake/public/utils.cmake
 +++ b/cmake/public/utils.cmake
 @@ -293,6 +293,133 @@ macro(torch_hip_get_arch_list store_var)
@@ -338,10 +352,10 @@ index 68e66bb3fc3..e02a4abf8cc 100644
  # Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST.
  # Usage:
 diff --git a/setup.py b/setup.py
-index 1c5b75897df..759a756739c 100644
+index 3e82b9a6c81..e282c653a42 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -1175,7 +1175,7 @@ class build_ext(setuptools.command.build_ext.build_ext):
+@@ -1262,7 +1262,7 @@ class build_ext(setuptools.command.build_ext.build_ext):
          else:
              report("-- Not using cuDNN")
          if cmake_cache_vars["USE_CUDA"]:
diff --git a/recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch b/recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
similarity index 65%
rename from recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
rename to recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
index e0d52f2ef..4a8d5ce90 100644
--- a/recipe/patches/0012-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
+++ b/recipe/patches/0010-skip-test_norm_matrix_degenerate_shapes-on-numpy-2.3.patch
@@ -1,18 +1,18 @@
-From f38ba11d997851548be9682b37e1dba88cd7856b Mon Sep 17 00:00:00 2001
+From 574b5b7e647e207fb4595ae84632d6231e03265f Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Sat, 14 Jun 2025 07:34:48 +1100
-Subject: [PATCH 12/13] skip test_norm_matrix_degenerate_shapes on numpy >=2.3
+Subject: [PATCH 10/15] skip test_norm_matrix_degenerate_shapes on numpy >=2.3
 
 ---
  test/test_linalg.py | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/test/test_linalg.py b/test/test_linalg.py
-index 0f6c8f20742..f54dcce901d 100644
+index 960c18007d4..68a165fd488 100644
 --- a/test/test_linalg.py
 +++ b/test/test_linalg.py
-@@ -2042,6 +2042,7 @@ class TestLinalg(TestCase):
-     # Test degenerate shape results match numpy for linalg.norm matrix norms
+@@ -2022,6 +2022,7 @@ class TestLinalg(TestCase):
+     @skipIf(np.lib.NumpyVersion(np.__version__) < '2.3.0', 'Numpy changed handling of degenerate inputs in 2.3.0')
      @skipCUDAIfNoMagma
      @skipCPUIfNoLapack
 +    @unittest.skipIf(np.lib.NumpyVersion(np.__version__) >= "2.3.0", "Error behaviour changed")
diff --git a/recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch b/recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
similarity index 81%
rename from recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
rename to recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
index e7f0887ae..f71ab77f7 100644
--- a/recipe/patches/0013-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
+++ b/recipe/patches/0011-Define-PY_SSIZE_T_CLEAN-before-include-Python.h.patch
@@ -1,7 +1,7 @@
-From 8686ba30882206176dbbeba67a1063d1a08ef669 Mon Sep 17 00:00:00 2001
+From 445ccbdb5657221f95bd31ebfb6b5e83f74940bf Mon Sep 17 00:00:00 2001
 From: LWisteria <lwisteria.ao@gmail.com>
 Date: Sun, 2 Jun 2024 19:13:23 +0900
-Subject: [PATCH 13/13] Define PY_SSIZE_T_CLEAN before #include <Python.h>
+Subject: [PATCH 11/15] Define PY_SSIZE_T_CLEAN before #include <Python.h>
 
 See https://docs.python.org/3/c-api/intro.html#include-files
 ---
diff --git a/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch b/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch
deleted file mode 100644
index 629f10183..000000000
--- a/recipe/patches/0011-Fix-CUPTI-lookup-to-include-target-directory.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 846567d76fe17ac1c9f1154bb439c6a106445dce Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
-Date: Thu, 6 Mar 2025 13:57:25 +0100
-Subject: [PATCH 11/13] Fix CUPTI lookup to include target directory
-
----
- cmake/Dependencies.cmake | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index f7e56828bdf..4a9fe193830 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1626,6 +1626,7 @@ if(USE_KINETO)
-     endif()
- 
-     find_library(CUPTI_LIBRARY_PATH ${CUPTI_LIB_NAME} PATHS
-+        ${CUDAToolkit_TARGET_DIR}/lib
-         ${CUDA_SOURCE_DIR}
-         ${CUDA_SOURCE_DIR}/extras/CUPTI/lib64
-         ${CUDA_SOURCE_DIR}/lib
-@@ -1633,6 +1634,7 @@ if(USE_KINETO)
-         NO_DEFAULT_PATH)
- 
-     find_path(CUPTI_INCLUDE_DIR cupti.h PATHS
-+        ${CUDAToolkit_TARGET_DIR}/include
-         ${CUDA_SOURCE_DIR}/extras/CUPTI/include
-         ${CUDA_INCLUDE_DIRS}
-         ${CUDA_SOURCE_DIR}
diff --git a/recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch b/recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
similarity index 86%
rename from recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
rename to recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
index fea50e211..fb8c0df2d 100644
--- a/recipe/patches/0014-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
+++ b/recipe/patches/0012-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
@@ -1,7 +1,7 @@
-From 80d19fab767e27bc950b8e229b11294495192a1e Mon Sep 17 00:00:00 2001
+From cf7cb253294cabb9e460de8ce99f41e3dbf35360 Mon Sep 17 00:00:00 2001
 From: Yukio Siraichi <yukio.siraichi@gmail.com>
 Date: Tue, 30 Sep 2025 01:10:13 +0000
-Subject: [PATCH 14/14] Add USE_SYSTEM options for KLEIDI, CUDNN_FRONTEND,
+Subject: [PATCH 12/15] Add USE_SYSTEM options for KLEIDI, CUDNN_FRONTEND,
  CUTLASS, and FMT
 
 This commit adds CMake options to allow users to use system-installed versions of four libraries instead of the bundled versions
@@ -10,16 +10,16 @@ Fixes #153863
 PR: https://github.com/pytorch/pytorch/pull/166824
 ---
  CMakeLists.txt               |  8 +++++
- aten/src/ATen/CMakeLists.txt | 17 +++++++--
- cmake/Dependencies.cmake     | 69 +++++++++++++++++++++++++++---------
+ aten/src/ATen/CMakeLists.txt | 17 ++++++++--
+ cmake/Dependencies.cmake     | 65 ++++++++++++++++++++++++++----------
  cmake/Summary.cmake          |  4 +++
- 4 files changed, 80 insertions(+), 18 deletions(-)
+ 4 files changed, 75 insertions(+), 19 deletions(-)
 
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index ce7890f002d..6d55828e61e 100644
+index 5304d054e84..a45da811631 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -467,6 +467,10 @@ option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
+@@ -487,6 +487,10 @@ option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
  option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF)
  option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF)
  option(USE_SYSTEM_NVTX "Use system-provided nvtx." OFF)
@@ -30,7 +30,7 @@ index ce7890f002d..6d55828e61e 100644
  option(USE_GOLD_LINKER "Use ld.gold to link" OFF)
  if(USE_SYSTEM_LIBS)
    set(USE_SYSTEM_CPUINFO ON)
-@@ -486,6 +490,10 @@ if(USE_SYSTEM_LIBS)
+@@ -506,6 +510,10 @@ if(USE_SYSTEM_LIBS)
      set(USE_SYSTEM_NCCL ON)
    endif()
    set(USE_SYSTEM_NVTX ON)
@@ -42,10 +42,10 @@ index ce7890f002d..6d55828e61e 100644
  
  # /Z7 override option When generating debug symbols, CMake default to use the
 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index d7c7a74a302..9f9c20926cd 100644
+index c413b589b5f..e8076cae834 100644
 --- a/aten/src/ATen/CMakeLists.txt
 +++ b/aten/src/ATen/CMakeLists.txt
-@@ -666,8 +666,21 @@ if(USE_CUDA AND NOT USE_ROCM)
+@@ -704,8 +704,21 @@ if(USE_CUDA AND NOT USE_ROCM)
    add_definitions(-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1)
    add_definitions(-DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1)
    add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
@@ -67,13 +67,13 @@ index d7c7a74a302..9f9c20926cd 100644
 +    endif()
 +  endif()
  
-   # Add FBGEMM_GENAI include directories for torch_ops.h
-   if(USE_FBGEMM_GENAI)
+   if($ENV{ATEN_STATIC_CUDA})
+     if(CUDA_VERSION VERSION_LESS_EQUAL 12.9)
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 4a9fe193830..8b3a1b2629a 100644
+index e8d8bc58096..cccfc4c5538 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -948,7 +948,14 @@ if(USE_CUDNN)
+@@ -962,7 +962,14 @@ if(USE_CUDNN)
    if(CUDNN_VERSION VERSION_LESS 8.5)
      message(FATAL_ERROR "PyTorch needs CuDNN-8.5 or above, but found ${CUDNN_VERSION}. Builds are still possible with `USE_CUDNN=0`")
    endif()
@@ -89,7 +89,7 @@ index 4a9fe193830..8b3a1b2629a 100644
    target_include_directories(torch::cudnn INTERFACE ${CUDNN_FRONTEND_INCLUDE_DIR})
  endif()
  
-@@ -1493,7 +1500,7 @@ if(NOT INTERN_BUILD_MOBILE)
+@@ -1526,7 +1533,7 @@ if(NOT INTERN_BUILD_MOBILE)
      message("disabling MKLDNN because USE_MKLDNN is not set")
    endif()
  
@@ -98,7 +98,7 @@ index 4a9fe193830..8b3a1b2629a 100644
      set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
      set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
      set(AT_KLEIDIAI_ENABLED 1)
-@@ -1503,6 +1510,22 @@ if(NOT INTERN_BUILD_MOBILE)
+@@ -1536,6 +1543,22 @@ if(NOT INTERN_BUILD_MOBILE)
      list(APPEND Caffe2_DEPENDENCY_LIBS kleidiai)
      # Recover build options.
      set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
@@ -121,13 +121,19 @@ index 4a9fe193830..8b3a1b2629a 100644
    endif()
  
    if(UNIX AND NOT APPLE)
-@@ -1546,21 +1569,35 @@ endif()
- #
- # End ATen checks
- #
+@@ -1589,22 +1612,30 @@ endif()
+ # This was the default behavior before version 12.0.0.
+ # Since PyTorch C API depends on it, make it available for projects that
+ # depend on PyTorch.
+-set(FMT_INSTALL ON)
 -set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
 -set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
 -add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
++if(NOT USE_SYSTEM_FMT)
++  set(FMT_INSTALL ON)
++  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
++  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
++  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
  
 -# Disable compiler feature checks for `fmt`.
 -#
@@ -140,16 +146,6 @@ index 4a9fe193830..8b3a1b2629a 100644
 -
 -list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
 -set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
-+# Install `fmtlib` header.
-+# This was the default behavior before version 12.0.0.
-+# Since PyTorch C API depends on it, make it available for projects that
-+# depend on PyTorch.
-+if(NOT USE_SYSTEM_FMT)
-+  set(FMT_INSTALL ON)
-+  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
-+  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-+  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
-+
 +  # Disable compiler feature checks for `fmt`.
 +  #
 +  # CMake compiles a little program to check compiler features. Some of our build
@@ -172,10 +168,10 @@ index 4a9fe193830..8b3a1b2629a 100644
  # ---[ Kineto
  # edge profiler depends on KinetoProfiler but it only does cpu
 diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
-index 993892c6d80..388e1d164ad 100644
+index b6e2ce888dc..b6e6a65441d 100644
 --- a/cmake/Summary.cmake
 +++ b/cmake/Summary.cmake
-@@ -80,7 +80,9 @@ function(caffe2_print_configuration_summary)
+@@ -81,7 +81,9 @@ function(caffe2_print_configuration_summary)
      message(STATUS "    USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
      if(${USE_CUDNN})
        message(STATUS "    cuDNN version       : ${CUDNN_VERSION}")
@@ -185,15 +181,15 @@ index 993892c6d80..388e1d164ad 100644
      if(${USE_CUSPARSELT})
        message(STATUS "    cuSPARSELt version  : ${CUSPARSELT_VERSION}")
      endif()
-@@ -156,6 +158,7 @@ function(caffe2_print_configuration_summary)
+@@ -159,6 +161,7 @@ function(caffe2_print_configuration_summary)
    endif()
    if(${USE_KLEIDIAI})
      message(STATUS "  USE_KLEIDIAI          : ${USE_KLEIDIAI}")
 +    message(STATUS "    USE_SYSTEM_KLEIDIAI : ${USE_SYSTEM_KLEIDIAI}")
    endif()
+   message(STATUS "  USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}")
    message(STATUS "  USE_UCC               : ${USE_UCC}")
-   if(${USE_UCC})
-@@ -187,6 +190,7 @@ function(caffe2_print_configuration_summary)
+@@ -191,6 +194,7 @@ function(caffe2_print_configuration_summary)
      message(STATUS "    USE_VULKAN_FP16_INFERENCE    : ${USE_VULKAN_FP16_INFERENCE}")
      message(STATUS "    USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}")
    endif()
diff --git a/recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch b/recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch
similarity index 56%
rename from recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch
rename to recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch
index 5fd9277ec..82e4eae0a 100644
--- a/recipe/patches/0015-Fix-building-kineto-against-system-fmt.patch
+++ b/recipe/patches/0013-Fix-building-kineto-against-system-fmt.patch
@@ -1,26 +1,26 @@
-From 2d7566334da792a27f7a01f6e8ac34975915156c Mon Sep 17 00:00:00 2001
+From 9bebb0c39c084f5f4ec083294b2f6b27b3032c63 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
 Date: Mon, 3 Nov 2025 14:59:04 +0100
-Subject: [PATCH 15/15] Fix building kineto against system fmt
+Subject: [PATCH 13/15] Fix building kineto against system fmt
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 
 Signed-off-by: Michał Górny <mgorny@gentoo.org>
 ---
- cmake/Dependencies.cmake | 2 ++
- 1 file changed, 2 insertions(+)
+ cmake/Dependencies.cmake | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 8b3a1b2629a..b99fbc5999f 100644
+index 07cf3dec461..b523948d09f 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -1597,6 +1597,8 @@ else()
+@@ -1634,7 +1634,7 @@ else()
+   if(NOT fmt_FOUND)
      message(FATAL_ERROR "Cannot find system fmt library. Please install libfmt-dev or set USE_SYSTEM_FMT=OFF")
    endif()
-   list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt)
-+  # for kineto
-+  add_custom_target(fmt)
+-  list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt)
++  list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
  endif()
  
  # ---[ Kineto
diff --git a/recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch b/recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
similarity index 87%
rename from recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
rename to recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
index 6ac645415..4fb3a06f2 100644
--- a/recipe/patches/0016-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
+++ b/recipe/patches/0014-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
@@ -1,7 +1,7 @@
-From 93be0373dd974e80eb9544892319dfc1a4d29c65 Mon Sep 17 00:00:00 2001
+From 4ae2acaff849e97005c5b4607dc541d089e91599 Mon Sep 17 00:00:00 2001
 From: Eddie Yan <eddiey@nvidia.com>
 Date: Tue, 5 Aug 2025 00:49:50 +0000
-Subject: [PATCH 16/16] Attempt to fix torch.backends.cudnn.rnn import
+Subject: [PATCH 14/15] Attempt to fix torch.backends.cudnn.rnn import
 
 torch.backends.cudnn module in order to expose the .conv.fp32_precision
 and .rnn.fp32_precision settings. However, it fails to account for the
@@ -21,10 +21,10 @@ frozen flags check.
  3 files changed, 43 insertions(+), 2 deletions(-)
 
 diff --git a/test/test_cuda.py b/test/test_cuda.py
-index 293bb2b7e70..477b0179f05 100644
+index 0ebfe192f8d..2aafc98064b 100644
 --- a/test/test_cuda.py
 +++ b/test/test_cuda.py
-@@ -911,6 +911,7 @@ print(t.is_pinned())
+@@ -853,6 +853,7 @@ print(t.is_pinned())
              self.assertEqual(torch.backends.cudnn.rnn.fp32_precision, "none")
  
      @recover_orig_fp32_precision
@@ -32,7 +32,7 @@ index 293bb2b7e70..477b0179f05 100644
      def test_fp32_precision_with_float32_matmul_precision(self):
          torch.set_float32_matmul_precision("highest")
          self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "ieee")
-@@ -920,6 +921,7 @@ print(t.is_pinned())
+@@ -862,6 +863,7 @@ print(t.is_pinned())
          self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "tf32")
  
      @recover_orig_fp32_precision
@@ -41,7 +41,7 @@ index 293bb2b7e70..477b0179f05 100644
          torch.backends.cudnn.conv.fp32_precision = "none"
          torch.backends.cudnn.rnn.fp32_precision = "tf32"
 diff --git a/torch/backends/cudnn/__init__.py b/torch/backends/cudnn/__init__.py
-index 9c155de7c04..a4106ea32cc 100644
+index 267594531db..7d06f503e14 100644
 --- a/torch/backends/cudnn/__init__.py
 +++ b/torch/backends/cudnn/__init__.py
 @@ -15,6 +15,8 @@ from torch.backends import (
@@ -53,7 +53,7 @@ index 9c155de7c04..a4106ea32cc 100644
  
  try:
      from torch._C import _cudnn
-@@ -212,7 +214,6 @@ class CudnnModule(PropModule):
+@@ -215,7 +217,6 @@ class CudnnModule(PropModule):
          torch._C._get_cudnn_allow_tf32, torch._C._set_cudnn_allow_tf32
      )
      conv = _FP32Precision("cuda", "conv")
@@ -62,7 +62,7 @@ index 9c155de7c04..a4106ea32cc 100644
          _get_fp32_precision_getter("cuda", "all"),
          _set_fp32_precision_setter("cuda", "all"),
 diff --git a/torch/backends/cudnn/rnn.py b/torch/backends/cudnn/rnn.py
-index 5b253e19054..8f6a3964fab 100644
+index 0dc9ca80aa6..9281234ae3e 100644
 --- a/torch/backends/cudnn/rnn.py
 +++ b/torch/backends/cudnn/rnn.py
 @@ -1,5 +1,13 @@
@@ -79,8 +79,8 @@ index 5b253e19054..8f6a3964fab 100644
  
  
  try:
-@@ -20,7 +28,7 @@ def get_cudnn_mode(mode):
-     elif mode == "GRU":
+@@ -24,7 +32,7 @@ def get_cudnn_mode(mode):
+         # pyrefly: ignore [missing-attribute]
          return int(_cudnn.RNNMode.gru)
      else:
 -        raise Exception(f"Unknown mode: {mode}")  # noqa: TRY002
@@ -88,7 +88,7 @@ index 5b253e19054..8f6a3964fab 100644
  
  
  # NB: We don't actually need this class anymore (in fact, we could serialize the
-@@ -42,6 +50,20 @@ class Unserializable:
+@@ -46,6 +54,20 @@ class Unserializable:
          self.inner = None
  
  
@@ -109,7 +109,7 @@ index 5b253e19054..8f6a3964fab 100644
  def init_dropout_state(dropout, train, dropout_seed, dropout_state):
      dropout_desc_name = "desc_" + str(torch.cuda.current_device())
      dropout_p = dropout if train else 0
-@@ -62,3 +84,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state):
+@@ -67,3 +89,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state):
              )
      dropout_ts = dropout_state[dropout_desc_name].get()
      return dropout_ts
diff --git a/recipe/patches/0018-Use-Intel-LLVM-openmp.patch b/recipe/patches/0015-Use-Intel-LLVM-openmp.patch
similarity index 88%
rename from recipe/patches/0018-Use-Intel-LLVM-openmp.patch
rename to recipe/patches/0015-Use-Intel-LLVM-openmp.patch
index 624a9764e..40c203f86 100644
--- a/recipe/patches/0018-Use-Intel-LLVM-openmp.patch
+++ b/recipe/patches/0015-Use-Intel-LLVM-openmp.patch
@@ -1,7 +1,7 @@
-From ffbdd8bb8fa38397ad6d7958a8c4ddd80ff738eb Mon Sep 17 00:00:00 2001
+From 6161984bbdb6a3ca0ebbc5647b50c5d4db4c03f5 Mon Sep 17 00:00:00 2001
 From: Isuru Fernando <isuruf@gmail.com>
 Date: Thu, 31 Jul 2025 12:34:30 -0500
-Subject: [PATCH] Use Intel/LLVM openmp
+Subject: [PATCH 15/15] Use Intel/LLVM openmp
 
 This make MSVC use __kmp_* symbols available in both libiomp.dll
 and libiomp5md.dll instead of vcomp* symbols which are only in
@@ -11,7 +11,7 @@ vcomp140.dll and libiomp5md.dll, but are missing in libiomp.dll
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake
-index 8a9abff398..336a9a927a 100644
+index 8a9abff3982..336a9a927a9 100644
 --- a/cmake/Modules/FindOpenMP.cmake
 +++ b/cmake/Modules/FindOpenMP.cmake
 @@ -122,9 +122,9 @@ function(_OPENMP_FLAG_CANDIDATES LANG)
@@ -26,6 +26,3 @@ index 8a9abff398..336a9a927a 100644
      endif()
      set(OMP_FLAG_PathScale "-openmp")
      set(OMP_FLAG_NAG "-openmp")
--- 
-2.45.2
-
diff --git a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
index fe411d716..c79c711db 100644
--- a/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
+++ b/recipe/patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
@@ -3,10 +3,6 @@ From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Sun, 2 Feb 2025 08:54:01 +1100
 Subject: [PATCH] switch away from find_package(CUDA)
 
----
- tensorpipe/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
 diff --git a/third_party/tensorpipe/tensorpipe/CMakeLists.txt b/third_party/tensorpipe/tensorpipe/CMakeLists.txt
 index efcffc2..1c3b2ca 100644
 --- a/third_party/tensorpipe/tensorpipe/CMakeLists.txt