Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions build_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
import platform
from pathlib import Path
from importlib.metadata import version as get_version
from importlib.metadata import PackageNotFoundError, distribution, version as get_version
from subprocess import CalledProcessError
from typing import List, Optional, Tuple, Union

Expand Down Expand Up @@ -292,10 +292,26 @@ def cuda_version() -> Tuple[int, ...]:
version_str = get_version("nvidia-cuda-runtime-cu12")
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
return version_tuple
except importlib.metadata.PackageNotFoundError:
except PackageNotFoundError:
raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.")


def cublas_pypi_install_requirement(cuda_major: Optional[int] = None) -> Optional[str]:
"""Pip install requirement for cuBLAS PyPI package, if a minimum version is needed."""
if cuda_major is None:
cuda_major = cuda_version()[0]
if cuda_major == 13:
return "nvidia-cublas>=13.3.0.5"
return None


def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str:
"""PyPI package providing cuSolverMp runtime libraries for a CUDA major version."""
if cuda_major is None:
cuda_major = cuda_version()[0]
return f"nvidia-cusolvermp-cu{cuda_major}"


def get_frameworks() -> List[str]:
"""DL frameworks to build support for"""
_frameworks: List[str] = []
Expand Down
13 changes: 12 additions & 1 deletion build_tools/wheel_utils/Dockerfile.aarch
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,23 @@ RUN dnf clean all
RUN dnf -y install glog.aarch64 glog-devel.aarch64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
13 changes: 12 additions & 1 deletion build_tools/wheel_utils/Dockerfile.x86
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,23 @@ RUN dnf clean all
RUN dnf -y install glog.x86_64 glog-devel.x86_64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
4 changes: 4 additions & 0 deletions build_tools/wheel_utils/build_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ git submodule update --init --recursive
# Install deps
/opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel

# Enable optional build features. cuSolverMp is provided by the build image
# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME.
export NVTE_WITH_CUSOLVERMP=1
Comment on lines +28 to +30
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Three of the four advertised flags never get exported

The PR description and title claim to enable NVTE_WITH_CUSOLVERMP, NVTE_WITH_CUBLASMP, NVTE_ENABLE_NVSHMEM, and NVTE_UB_WITH_MPI in the wheel build. Only NVTE_WITH_CUSOLVERMP is exported here. Neither NVTE_WITH_CUBLASMP, NVTE_ENABLE_NVSHMEM, nor NVTE_UB_WITH_MPI are exported in build_wheels.sh, and no corresponding packages (cuBLASMP, NVSHMEM, OpenMPI) are installed in either Dockerfile. Wheels built from this script will silently omit those three features.


if $BUILD_METAPACKAGE ; then
cd /TransformerEngine
NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt
Expand Down
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from build_tools.utils import (
cuda_archs,
cuda_version,
cublas_pypi_install_requirement,
cusolvermp_pypi_package_name,
get_frameworks,
remove_dups,
min_python_version_str,
Expand Down Expand Up @@ -109,7 +111,11 @@ def setup_requirements() -> Tuple[List[str], List[str]]:
"pydantic",
"importlib-metadata>=1.0",
"packaging",
cusolvermp_pypi_package_name(),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 cuSolverMp added as unconditional install requirement

cusolvermp_pypi_package_name() is appended to install_reqs with no guard on NVTE_WITH_CUSOLVERMP, so the generated wheel's METADATA always lists nvidia-cusolvermp-cu12 (or cu13) as a mandatory runtime dependency — even when TE is built without cuSolverMp support (the default, since CMakeLists.txt has option(NVTE_WITH_CUSOLVERMP … OFF)). Any downstream user who installs a source-built wheel compiled without the flag will be forced to pull in the cuSolverMp library unnecessarily, and a pip solve in an environment that lacks the package will fail entirely.

The requirement should mirror the cmake-flag guard already used for cublasmp (line 75-80):

if bool(int(os.getenv("NVTE_WITH_CUSOLVERMP", "0"))):
    install_reqs.append(cusolvermp_pypi_package_name())

]
cublas_req = cublas_pypi_install_requirement()
if cublas_req is not None:
install_reqs.append(cublas_req)
test_reqs: List[str] = ["pytest>=8.2.1"]

# Framework-specific requirements
Expand Down
1 change: 1 addition & 0 deletions transformer_engine/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ def _load_core_library():
_, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn")
system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc")
system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand")
_, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False)
Comment thread
ksivaman marked this conversation as resolved.

# This additional step is necessary to be able to install TE wheels
# and import TE (without any guards) in an environment where the cuda
Expand Down
Loading