Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ jobs:
CIBW_BEFORE_ALL_MACOS: python/tools/prepare_build_environment_macos.sh
CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows.sh
CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28
CIBW_ARCHS: ${{ matrix.arch }}
CIBW_SKIP: pp* *-musllinux_*

Expand Down
1 change: 1 addition & 0 deletions include/ctranslate2/devices.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <stdexcept>
#include <string>
#include <vector>
#include <cstdint>
#ifdef CT2_WITH_TENSOR_PARALLEL
# include <nccl.h>
#endif
Expand Down
4 changes: 2 additions & 2 deletions python/tests/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,13 +987,13 @@ def test_transformers_wav2vec2(
)

device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0))
# cpu_threads = int(os.environ.get("OMP_NUM_THREADS", 0))
model = ctranslate2.models.Wav2Vec2(
output_dir,
device=device,
device_index=[0],
compute_type="int8",
intra_threads=cpu_threads,
intra_threads=1,
inter_threads=1,
)

Expand Down
29 changes: 15 additions & 14 deletions python/tools/prepare_build_environment_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pip install "cmake==3.22.*"

if [ "$CIBW_ARCHS" == "aarch64" ]; then

OPENBLAS_VERSION=0.3.21
OPENBLAS_VERSION=0.3.26
curl -L -O https://github.com/xianyi/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.tar.gz
tar xf *.tar.gz && rm *.tar.gz
cd OpenBLAS-*
Expand All @@ -19,26 +19,27 @@ if [ "$CIBW_ARCHS" == "aarch64" ]; then
rm -r OpenBLAS-*

else
# Install CUDA 12.4:
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
dnf install -y dnf-plugins-core
# Install CUDA 12.8:
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
# error mirrorlist.centos.org doesn't exists anymore.
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum install --setopt=obsoletes=0 -y \
cuda-nvcc-12-4-12.4.99-1 \
cuda-cudart-devel-12-4-12.4.99-1 \
libcurand-devel-12-4-10.3.5.119-1 \
libcudnn9-devel-cuda-12-9.1.0.70-1 \
libcublas-devel-12-4-12.4.2.65-1 \
libnccl-2.20.5-1+cuda12.4 \
libnccl-devel-2.20.5-1+cuda12.4
ln -s cuda-12.4 /usr/local/cuda
dnf install --setopt=obsoletes=0 -y \
cuda-nvcc-12-8-12.8.93-1 \
cuda-cudart-devel-12-8-12.8.90-1 \
libcurand-devel-12-8-10.3.9.90-1 \
libcudnn9-devel-cuda-12-9.10.2.21-1 \
libcublas-devel-12-8-12.8.4.1-1 \
libnccl-2.26.2-1+cuda12.8 \
libnccl-devel-2.26.2-1+cuda12.8
ln -s cuda-12.8 /usr/local/cuda

ONEAPI_VERSION=2025.3.0
yum-config-manager --add-repo https://yum.repos.intel.com/oneapi
dnf config-manager --add-repo https://yum.repos.intel.com/oneapi
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
yum install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION
dnf install -y intel-oneapi-mkl-devel-$ONEAPI_VERSION

ONEDNN_VERSION=3.1.1
curl -L -O https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz
Expand Down
26 changes: 13 additions & 13 deletions python/tools/prepare_build_environment_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,30 @@
set -e
set -x

CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.4"
curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
./cuda.exe -s nvcc_12.4 cudart_12.4 cublas_dev_12.4 curand_dev_12.4
CUDA_ROOT="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8"
curl --netrc-optional -L -nv -o cuda.exe https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_572.61_windows.exe
./cuda.exe -s nvcc_12.8 cudart_12.8 cublas_dev_12.8 curand_dev_12.8

rm cuda.exe

CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.1"
curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.1.0/local_installers/cudnn_9.1.0_windows.exe
CUDNN_ROOT="C:/Program Files/NVIDIA/CUDNN/v9.10"
curl --netrc-optional -L -nv -o cudnn.exe https://developer.download.nvidia.com/compute/cudnn/9.10.2/local_installers/cudnn_9.10.2_windows.exe
./cudnn.exe -s
sleep 10
# Remove 11.8 folders
rm -rf "$CUDNN_ROOT/bin/11.8"
rm -rf "$CUDNN_ROOT/lib/11.8"
rm -rf "$CUDNN_ROOT/include/11.8"

# Move contents of 12.4 to parent directories
mv "$CUDNN_ROOT/bin/12.4/"* "$CUDNN_ROOT/bin/"
mv "$CUDNN_ROOT/lib/12.4/"* "$CUDNN_ROOT/lib/"
mv "$CUDNN_ROOT/include/12.4/"* "$CUDNN_ROOT/include/"
# Move contents of 12.9 to parent directories
mv "$CUDNN_ROOT/bin/12.9/"* "$CUDNN_ROOT/bin/"
mv "$CUDNN_ROOT/lib/12.9/"* "$CUDNN_ROOT/lib/"
mv "$CUDNN_ROOT/include/12.9/"* "$CUDNN_ROOT/include/"

# Remove empty 12.4 folders
rmdir "$CUDNN_ROOT/bin/12.4"
rmdir "$CUDNN_ROOT/lib/12.4"
rmdir "$CUDNN_ROOT/include/12.4"
# Remove empty 12.9 folders
rmdir "$CUDNN_ROOT/bin/12.9"
rmdir "$CUDNN_ROOT/lib/12.9"
rmdir "$CUDNN_ROOT/include/12.9"
cp -r "$CUDNN_ROOT"/* "$CUDA_ROOT"
rm cudnn.exe

Expand Down
Loading