OpenNMT · Purfview · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -167,8 +167,8 @@ jobs:
           CIBW_BEFORE_ALL_MACOS: python/tools/prepare_build_environment_macos.sh
           CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows.sh
           CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt
-          CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
-          CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
+          CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28
           CIBW_ARCHS: ${{ matrix.arch }}
           CIBW_SKIP: pp* *-musllinux_*
 

diff --git a/include/ctranslate2/devices.h b/include/ctranslate2/devices.h
@@ -3,6 +3,7 @@
 #include <stdexcept>
 #include <string>
 #include <vector>
+#include <cstdint>
 #ifdef CT2_WITH_TENSOR_PARALLEL
 #  include <nccl.h>
 #endif

diff --git a/python/tests/test_transformers.py b/python/tests/test_transformers.py
@@ -987,13 +987,13 @@ def test_transformers_wav2vec2(
         )
 
         device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
-        cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0))
+        # cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0))
         model = ctranslate2.models.Wav2Vec2(
             output_dir,
             device=device,
             device_index=[0],
             compute_type="int8",
-            intra_threads=cpu_threads,
+            intra_threads=1,
             inter_threads=1,
         )
 

diff --git a/python/tools/prepare_build_environment_linux.sh b/python/tools/prepare_build_environment_linux.sh
@@ -7,7 +7,7 @@ pip install "cmake==3.22.*"
 
 if [ "$CIBW_ARCHS" == "aarch64" ]; then
 
-    OPENBLAS_VERSION=0.3.21
+    OPENBLAS_VERSION=0.3.26
     curl -L -O https://github.com/xianyi/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.tar.gz
     tar xf *.tar.gz && rm *.tar.gz
     cd OpenBLAS-*
@@ -19,26 +19,27 @@ if [ "$CIBW_ARCHS" == "aarch64" ]; then
     rm -r OpenBLAS-*
 
 else
-    # Install CUDA 12.4:
-    yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
+    dnf install -y dnf-plugins-core
+    # Install CUDA 12.8:
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
     # error mirrorlist.centos.org doesn't exists anymore.
     sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
     sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
     sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
-    yum install --setopt=obsoletes=0 -y \
-        cuda-nvcc-12-4-12.4.99-1 \
-        cuda-cudart-devel-12-4-12.4.99-1 \
-        libcurand-devel-12-4-10.3.5.119-1 \
-        libcudnn9-devel-cuda-12-9.1.0.70-1 \
-        libcublas-devel-12-4-12.4.2.65-1 \
-        libnccl-2.20.5-1+cuda12.4 \
-        libnccl-devel-2.20.5-1+cuda12.4
-    ln -s cuda-12.4 /usr/local/cuda
+    dnf install --setopt=obsoletes=0 -y \
+        cuda-nvcc-12-8-12.8.93-1 \
+        cuda-cudart-devel-12-8-12.8.90-1 \
+        libcurand-devel-12-8-10.3.9.90-1 \
+        libcudnn9-devel-cuda-12-9.10.2.21-1 \
+        libcublas-devel-12-8-12.8.4.1-1 \
+        libnccl-2.26.2-1+cuda12.8 \
+        libnccl-devel-2.26.2-1+cuda12.8
+    ln -s cuda-12.8 /usr/local/cuda
 
     ONEAPI_VERSION=2025.3.0
-    yum-config-manager --add-repo https://yum.repos.intel.com/oneapi
+    dnf config-manager --add-repo https://yum.repos.intel.com/oneapi
     rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-    yum install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION
+    dnf install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION
 
     ONEDNN_VERSION=3.1.1
     curl -L -O https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz

diff --git a/python/tools/prepare_build_environment_windows.sh b/python/tools/prepare_build_environment_windows.sh
@@ -3,30 +3,30 @@
 set -e
 set -x
 
-CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.4"
-curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
-./cuda.exe -s nvcc_12.4 cudart_12.4 cublas_dev_12.4 curand_dev_12.4
+CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8"
+curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_572.61_windows.exe
+./cuda.exe -s nvcc_12.8 cudart_12.8 cublas_dev_12.8 curand_dev_12.8
 
 rm cuda.exe
 
-CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.1"
-curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.1.0/local_installers/cudnn_9.1.0_windows.exe
+CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.10"
+curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.10.2/local_installers/cudnn_9.10.2_windows.exe
 ./cudnn.exe -s
 sleep 10
 # Remove 11.8 folders
 rm -rf "$CUDNN_ROOT/bin/11.8"
 rm -rf "$CUDNN_ROOT/lib/11.8"
 rm -rf "$CUDNN_ROOT/include/11.8"
 
-# Move contents of 12.4 to parent directories
-mv "$CUDNN_ROOT/bin/12.4/"* "$CUDNN_ROOT/bin/"
-mv "$CUDNN_ROOT/lib/12.4/"* "$CUDNN_ROOT/lib/"
-mv "$CUDNN_ROOT/include/12.4/"* "$CUDNN_ROOT/include/"
+# Move contents of 12.9 to parent directories
+mv "$CUDNN_ROOT/bin/12.9/"* "$CUDNN_ROOT/bin/"
+mv "$CUDNN_ROOT/lib/12.9/"* "$CUDNN_ROOT/lib/"
+mv "$CUDNN_ROOT/include/12.9/"* "$CUDNN_ROOT/include/"
 
-# Remove empty 12.4 folders
-rmdir "$CUDNN_ROOT/bin/12.4"
-rmdir "$CUDNN_ROOT/lib/12.4"
-rmdir "$CUDNN_ROOT/include/12.4"
+# Remove empty 12.9 folders
+rmdir "$CUDNN_ROOT/bin/12.9"
+rmdir "$CUDNN_ROOT/lib/12.9"
+rmdir "$CUDNN_ROOT/include/12.9"
 cp -r "$CUDNN_ROOT"/* "$CUDA_ROOT"
 rm cudnn.exe