From df140b35b042e79658b92a1585c079d0749df050 Mon Sep 17 00:00:00 2001 From: Kirthi Shankar Sivamani Date: Tue, 19 May 2026 23:20:20 +0000 Subject: [PATCH 1/4] Add NS via cusolvermp to wheel build Signed-off-by: Kirthi Shankar Sivamani --- build_tools/wheel_utils/Dockerfile.aarch | 13 ++++++++++++- build_tools/wheel_utils/Dockerfile.x86 | 13 ++++++++++++- build_tools/wheel_utils/build_wheels.sh | 4 ++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/build_tools/wheel_utils/Dockerfile.aarch b/build_tools/wheel_utils/Dockerfile.aarch index c040dadcdb..4f6635f507 100644 --- a/build_tools/wheel_utils/Dockerfile.aarch +++ b/build_tools/wheel_utils/Dockerfile.aarch @@ -35,12 +35,23 @@ RUN dnf clean all RUN dnf -y install glog.aarch64 glog-devel.aarch64 RUN dnf -y install libnccl libnccl-devel libnccl-static +# expose system libs for TE CMake build. +RUN dnf -y install \ + libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \ + dnf clean all +RUN mkdir -p /opt/nvidia/cusolvermp && \ + ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \ + ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \ + echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \ + ldconfig + ENV PATH="/usr/local/cuda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}" ENV CUDA_HOME=/usr/local/cuda ENV CUDA_ROOT=/usr/local/cuda ENV CUDA_PATH=/usr/local/cuda ENV CUDADIR=/usr/local/cuda +ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp ENV NVTE_RELEASE_BUILD=1 CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"] diff --git a/build_tools/wheel_utils/Dockerfile.x86 b/build_tools/wheel_utils/Dockerfile.x86 index 2728b6b7c1..b01e443910 100644 --- a/build_tools/wheel_utils/Dockerfile.x86 +++ b/build_tools/wheel_utils/Dockerfile.x86 @@ -35,12 +35,23 @@ RUN dnf clean all RUN dnf -y install glog.x86_64 glog-devel.x86_64 RUN dnf -y install libnccl libnccl-devel libnccl-static +# expose system libs for TE CMake build. +RUN dnf -y install \ + libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \ + dnf clean all +RUN mkdir -p /opt/nvidia/cusolvermp && \ + ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \ + ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \ + echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \ + ldconfig + ENV PATH="/usr/local/cuda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}" ENV CUDA_HOME=/usr/local/cuda ENV CUDA_ROOT=/usr/local/cuda ENV CUDA_PATH=/usr/local/cuda ENV CUDADIR=/usr/local/cuda +ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp ENV NVTE_RELEASE_BUILD=1 CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"] diff --git a/build_tools/wheel_utils/build_wheels.sh b/build_tools/wheel_utils/build_wheels.sh index e9ec854dba..74a4f21dee 100644 --- a/build_tools/wheel_utils/build_wheels.sh +++ b/build_tools/wheel_utils/build_wheels.sh @@ -25,6 +25,10 @@ git submodule update --init --recursive # Install deps /opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel +# Enable optional build features. cuSolverMp is provided by the build image +# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME. +export NVTE_WITH_CUSOLVERMP=1 + if $BUILD_METAPACKAGE ; then cd /TransformerEngine NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt From 50f17532ce88d65043ab6117bf71d4d396585ba7 Mon Sep 17 00:00:00 2001 From: ksivamani Date: Tue, 2 Jun 2026 16:58:25 -0400 Subject: [PATCH 2/4] Build dep runtime Signed-off-by: ksivamani --- build_tools/utils.py | 11 +++++++++-- setup.py | 4 ++++ transformer_engine/common/__init__.py | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/build_tools/utils.py b/build_tools/utils.py index d0f5eab425..f2548b4de6 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -14,7 +14,7 @@ import sys import platform from pathlib import Path -from importlib.metadata import version as get_version +from importlib.metadata import PackageNotFoundError, distribution, version as get_version from subprocess import CalledProcessError from typing import List, Optional, Tuple, Union @@ -292,10 +292,17 @@ def cuda_version() -> Tuple[int, ...]: version_str = get_version("nvidia-cuda-runtime-cu12") version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit()) return version_tuple - except importlib.metadata.PackageNotFoundError: + except PackageNotFoundError: raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.") +def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str: + """PyPI package providing cuSolverMp runtime libraries for a CUDA major version.""" + if cuda_major is None: + cuda_major = cuda_version()[0] + return f"nvidia-cusolvermp-cu{cuda_major}" + + def get_frameworks() -> List[str]: """DL frameworks to build support for""" _frameworks: List[str] = [] diff --git a/setup.py b/setup.py index ec277b6349..3d9e1de349 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ from build_tools.utils import ( cuda_archs, cuda_version, + cusolvermp_pypi_package_name, get_frameworks, remove_dups, min_python_version_str, @@ -112,6 +113,9 @@ def setup_requirements() -> Tuple[List[str], List[str]]: ] test_reqs: List[str] = ["pytest>=8.2.1"] + if bool(int(os.getenv("NVTE_WITH_CUSOLVERMP", "0"))): + install_reqs.append(cusolvermp_pypi_package_name()) + # Framework-specific requirements if not bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): if "pytorch" in frameworks: diff --git a/transformer_engine/common/__init__.py b/transformer_engine/common/__init__.py index 40933f17a9..55235fe94c 100644 --- a/transformer_engine/common/__init__.py +++ b/transformer_engine/common/__init__.py @@ -369,6 +369,7 @@ def _load_core_library(): _, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn") system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc") system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand") + _, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False) # This additional step is necessary to be able to install TE wheels # and import TE (without any guards) in an environment where the cuda From ccaccd5cb2829ecff6491e443d1e8fd3cf64e60c Mon Sep 17 00:00:00 2001 From: Kirthi Shankar Sivamani Date: Wed, 3 Jun 2026 19:13:23 +0000 Subject: [PATCH 3/4] Fix Signed-off-by: Kirthi Shankar Sivamani --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 3d9e1de349..7f6b51c148 100644 --- a/setup.py +++ b/setup.py @@ -110,12 +110,10 @@ def setup_requirements() -> Tuple[List[str], List[str]]: "pydantic", "importlib-metadata>=1.0", "packaging", + cusolvermp_pypi_package_name(), ] test_reqs: List[str] = ["pytest>=8.2.1"] - if bool(int(os.getenv("NVTE_WITH_CUSOLVERMP", "0"))): - install_reqs.append(cusolvermp_pypi_package_name()) - # Framework-specific requirements if not bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): if "pytorch" in frameworks: From 799318095884085c39b76562ff71b5796cc66a57 Mon Sep 17 00:00:00 2001 From: ksivamani Date: Fri, 5 Jun 2026 16:57:11 -0400 Subject: [PATCH 4/4] fix Signed-off-by: ksivamani --- build_tools/utils.py | 9 +++++++++ setup.py | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/build_tools/utils.py b/build_tools/utils.py index f2548b4de6..dd8812fed2 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -296,6 +296,15 @@ def cuda_version() -> Tuple[int, ...]: raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.") +def cublas_pypi_install_requirement(cuda_major: Optional[int] = None) -> Optional[str]: + """Pip install requirement for cuBLAS PyPI package, if a minimum version is needed.""" + if cuda_major is None: + cuda_major = cuda_version()[0] + if cuda_major == 13: + return "nvidia-cublas>=13.3.0.5" + return None + + def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str: """PyPI package providing cuSolverMp runtime libraries for a CUDA major version.""" if cuda_major is None: diff --git a/setup.py b/setup.py index 7f6b51c148..cfaaef47be 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ from build_tools.utils import ( cuda_archs, cuda_version, + cublas_pypi_install_requirement, cusolvermp_pypi_package_name, get_frameworks, remove_dups, @@ -112,6 +113,9 @@ def setup_requirements() -> Tuple[List[str], List[str]]: "packaging", cusolvermp_pypi_package_name(), ] + cublas_req = cublas_pypi_install_requirement() + if cublas_req is not None: + install_reqs.append(cublas_req) test_reqs: List[str] = ["pytest>=8.2.1"] # Framework-specific requirements