diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c8f99b53..da55f170e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,8 +167,8 @@ jobs: CIBW_BEFORE_ALL_MACOS: python/tools/prepare_build_environment_macos.sh CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows.sh CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt - CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 + CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 CIBW_ARCHS: ${{ matrix.arch }} CIBW_SKIP: pp* *-musllinux_* diff --git a/include/ctranslate2/devices.h b/include/ctranslate2/devices.h index 674713b8f..232307a90 100644 --- a/include/ctranslate2/devices.h +++ b/include/ctranslate2/devices.h @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef CT2_WITH_TENSOR_PARALLEL # include #endif diff --git a/python/tests/test_transformers.py b/python/tests/test_transformers.py index 324d4b8f6..ec9c8f102 100644 --- a/python/tests/test_transformers.py +++ b/python/tests/test_transformers.py @@ -987,13 +987,13 @@ def test_transformers_wav2vec2( ) device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu" - cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0)) + # cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0)) model = ctranslate2.models.Wav2Vec2( output_dir, device=device, device_index=[0], compute_type="int8", - intra_threads=cpu_threads, + intra_threads=1, inter_threads=1, ) diff --git a/python/tools/prepare_build_environment_linux.sh b/python/tools/prepare_build_environment_linux.sh index 7e1b38762..e63335d1d 100755 --- a/python/tools/prepare_build_environment_linux.sh +++ b/python/tools/prepare_build_environment_linux.sh @@ -7,7 +7,7 @@ pip install "cmake==3.22.*" if [ "$CIBW_ARCHS" == "aarch64" ]; then - OPENBLAS_VERSION=0.3.21 + OPENBLAS_VERSION=0.3.26 curl -L -O https://github.com/xianyi/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.tar.gz tar xf *.tar.gz && rm *.tar.gz cd OpenBLAS-* @@ -19,26 +19,27 @@ if [ "$CIBW_ARCHS" == "aarch64" ]; then rm -r OpenBLAS-* else - # Install CUDA 12.4: - yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo + dnf install -y dnf-plugins-core + # Install CUDA 12.8: + dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo # error mirrorlist.centos.org doesn't exists anymore. sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo - yum install --setopt=obsoletes=0 -y \ - cuda-nvcc-12-4-12.4.99-1 \ - cuda-cudart-devel-12-4-12.4.99-1 \ - libcurand-devel-12-4-10.3.5.119-1 \ - libcudnn9-devel-cuda-12-9.1.0.70-1 \ - libcublas-devel-12-4-12.4.2.65-1 \ - libnccl-2.20.5-1+cuda12.4 \ - libnccl-devel-2.20.5-1+cuda12.4 - ln -s cuda-12.4 /usr/local/cuda + dnf install --setopt=obsoletes=0 -y \ + cuda-nvcc-12-8-12.8.93-1 \ + cuda-cudart-devel-12-8-12.8.90-1 \ + libcurand-devel-12-8-10.3.9.90-1 \ + libcudnn9-devel-cuda-12-9.10.2.21-1 \ + libcublas-devel-12-8-12.8.4.1-1 \ + libnccl-2.26.2-1+cuda12.8 \ + libnccl-devel-2.26.2-1+cuda12.8 + ln -s cuda-12.8 /usr/local/cuda ONEAPI_VERSION=2025.3.0 - yum-config-manager --add-repo https://yum.repos.intel.com/oneapi + dnf config-manager --add-repo https://yum.repos.intel.com/oneapi rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - yum install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION + dnf install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION ONEDNN_VERSION=3.1.1 curl -L -O https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz diff --git a/python/tools/prepare_build_environment_windows.sh b/python/tools/prepare_build_environment_windows.sh index cede46f8f..7c8d798c5 100755 --- a/python/tools/prepare_build_environment_windows.sh +++ b/python/tools/prepare_build_environment_windows.sh @@ -3,14 +3,14 @@ set -e set -x -CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.4" -curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe -./cuda.exe -s nvcc_12.4 cudart_12.4 cublas_dev_12.4 curand_dev_12.4 +CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8" +curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_572.61_windows.exe +./cuda.exe -s nvcc_12.8 cudart_12.8 cublas_dev_12.8 curand_dev_12.8 rm cuda.exe -CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.1" -curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.1.0/local_installers/cudnn_9.1.0_windows.exe +CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.10" +curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.10.2/local_installers/cudnn_9.10.2_windows.exe ./cudnn.exe -s sleep 10 # Remove 11.8 folders @@ -18,15 +18,15 @@ rm -rf "$CUDNN_ROOT/bin/11.8" rm -rf "$CUDNN_ROOT/lib/11.8" rm -rf "$CUDNN_ROOT/include/11.8" -# Move contents of 12.4 to parent directories -mv "$CUDNN_ROOT/bin/12.4/"* "$CUDNN_ROOT/bin/" -mv "$CUDNN_ROOT/lib/12.4/"* "$CUDNN_ROOT/lib/" -mv "$CUDNN_ROOT/include/12.4/"* "$CUDNN_ROOT/include/" +# Move contents of 12.9 to parent directories +mv "$CUDNN_ROOT/bin/12.9/"* "$CUDNN_ROOT/bin/" +mv "$CUDNN_ROOT/lib/12.9/"* "$CUDNN_ROOT/lib/" +mv "$CUDNN_ROOT/include/12.9/"* "$CUDNN_ROOT/include/" -# Remove empty 12.4 folders -rmdir "$CUDNN_ROOT/bin/12.4" -rmdir "$CUDNN_ROOT/lib/12.4" -rmdir "$CUDNN_ROOT/include/12.4" +# Remove empty 12.9 folders +rmdir "$CUDNN_ROOT/bin/12.9" +rmdir "$CUDNN_ROOT/lib/12.9" +rmdir "$CUDNN_ROOT/include/12.9" cp -r "$CUDNN_ROOT"/* "$CUDA_ROOT" rm cudnn.exe