Skip to content

Commit 20e185c

Browse files
authored
Add wheel support for Newton-Schulz method via cuSolverMp (#3004)
* Add NS via cusolvermp to wheel build Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> * Build dep runtime Signed-off-by: ksivamani <ksivamani@nvidia.com> * Fix Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> * fix Signed-off-by: ksivamani <ksivamani@nvidia.com> * Fix Signed-off-by: ksivamani <ksivamani@nvidia.com> * rm prev cublas req Signed-off-by: ksivamani <ksivamani@nvidia.com> --------- Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Signed-off-by: ksivamani <ksivamani@nvidia.com>
1 parent 5fdfbec commit 20e185c

6 files changed

Lines changed: 67 additions & 4 deletions

File tree

build_tools/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import sys
1515
import platform
1616
from pathlib import Path
17-
from importlib.metadata import version as get_version
17+
from importlib.metadata import PackageNotFoundError, distribution, version as get_version
1818
from subprocess import CalledProcessError
1919
from typing import List, Optional, Tuple, Union
2020

@@ -292,10 +292,17 @@ def cuda_version() -> Tuple[int, ...]:
292292
version_str = get_version("nvidia-cuda-runtime-cu12")
293293
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
294294
return version_tuple
295-
except importlib.metadata.PackageNotFoundError:
295+
except PackageNotFoundError:
296296
raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.")
297297

298298

299+
def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str:
300+
"""PyPI package providing cuSolverMp runtime libraries for a CUDA major version."""
301+
if cuda_major is None:
302+
cuda_major = cuda_version()[0]
303+
return f"nvidia-cusolvermp-cu{cuda_major}"
304+
305+
299306
def get_frameworks() -> List[str]:
300307
"""DL frameworks to build support for"""
301308
_frameworks: List[str] = []

build_tools/wheel_utils/Dockerfile.aarch

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,23 @@ RUN dnf clean all
3535
RUN dnf -y install glog.aarch64 glog-devel.aarch64
3636
RUN dnf -y install libnccl libnccl-devel libnccl-static
3737

38+
# expose system libs for TE CMake build.
39+
RUN dnf -y install \
40+
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
41+
dnf clean all
42+
RUN mkdir -p /opt/nvidia/cusolvermp && \
43+
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
44+
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
45+
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
46+
ldconfig
47+
3848
ENV PATH="/usr/local/cuda/bin:${PATH}"
39-
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
49+
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
4050
ENV CUDA_HOME=/usr/local/cuda
4151
ENV CUDA_ROOT=/usr/local/cuda
4252
ENV CUDA_PATH=/usr/local/cuda
4353
ENV CUDADIR=/usr/local/cuda
54+
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
4455
ENV NVTE_RELEASE_BUILD=1
4556

4657
CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]

build_tools/wheel_utils/Dockerfile.x86

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,23 @@ RUN dnf clean all
3535
RUN dnf -y install glog.x86_64 glog-devel.x86_64
3636
RUN dnf -y install libnccl libnccl-devel libnccl-static
3737

38+
# expose system libs for TE CMake build.
39+
RUN dnf -y install \
40+
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
41+
dnf clean all
42+
RUN mkdir -p /opt/nvidia/cusolvermp && \
43+
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
44+
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
45+
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
46+
ldconfig
47+
3848
ENV PATH="/usr/local/cuda/bin:${PATH}"
39-
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
49+
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
4050
ENV CUDA_HOME=/usr/local/cuda
4151
ENV CUDA_ROOT=/usr/local/cuda
4252
ENV CUDA_PATH=/usr/local/cuda
4353
ENV CUDADIR=/usr/local/cuda
54+
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
4455
ENV NVTE_RELEASE_BUILD=1
4556

4657
CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]

build_tools/wheel_utils/build_wheels.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ git submodule update --init --recursive
2525
# Install deps
2626
/opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel
2727

28+
# Enable optional build features. cuSolverMp is provided by the build image
29+
# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME.
30+
export NVTE_WITH_CUSOLVERMP=1
31+
2832
if $BUILD_METAPACKAGE ; then
2933
cd /TransformerEngine
3034
NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from build_tools.utils import (
2121
cuda_archs,
2222
cuda_version,
23+
cusolvermp_pypi_package_name,
2324
get_frameworks,
2425
remove_dups,
2526
min_python_version_str,
@@ -109,6 +110,7 @@ def setup_requirements() -> Tuple[List[str], List[str]]:
109110
"pydantic",
110111
"importlib-metadata>=1.0",
111112
"packaging",
113+
cusolvermp_pypi_package_name(),
112114
]
113115
test_reqs: List[str] = ["pytest>=8.2.1"]
114116

transformer_engine/common/__init__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,29 @@ def _nvidia_cudart_include_dir() -> str:
255255
return str(include_dir) if include_dir.exists() else ""
256256

257257

258+
@functools.lru_cache(maxsize=None)
259+
def _is_cusolvermp_installed_in_system() -> bool:
260+
"""Check if cuSolverMp is registered in the system library cache."""
261+
262+
if platform.system() != "Linux":
263+
return False
264+
265+
try:
266+
result = subprocess.run(
267+
["ldconfig", "-p"],
268+
capture_output=True,
269+
text=True,
270+
check=False,
271+
)
272+
except (OSError, subprocess.SubprocessError):
273+
return False
274+
275+
if result.returncode != 0:
276+
return False
277+
278+
return any("cusolvermp" in line.lower() for line in result.stdout.splitlines())
279+
280+
258281
@functools.lru_cache(maxsize=None)
259282
def _load_cuda_library_from_python(lib_name: str, strict: bool = False):
260283
"""
@@ -369,6 +392,11 @@ def _load_core_library():
369392
_, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn")
370393
system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc")
371394
system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand")
395+
_CUSOLVERMP_LIB_CTYPES = None
396+
if not _is_cusolvermp_installed_in_system() and any(
397+
_is_package_installed(p) for p in ("nvidia-cusolvermp-cu12", "nvidia-cusolvermp-cu13")
398+
):
399+
_, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False)
372400

373401
# This additional step is necessary to be able to install TE wheels
374402
# and import TE (without any guards) in an environment where the cuda

0 commit comments

Comments
 (0)