diff --git a/python/openai/README.md b/python/openai/README.md index 915dbd923b..c9c14b636e 100644 --- a/python/openai/README.md +++ b/python/openai/README.md @@ -34,13 +34,6 @@ ## Pre-requisites -> [!WARNING] -> **CuPy CUDA 13 Compatibility Issue**: The Triton Inference Server Image has been upgraded to CUDA 13. You may encounter issues when using CuPy before it officially supports CUDA 13 (see [this issue](https://github.com/cupy/cupy/issues/9286) requesting CUDA 13 support). Some issues may be resolved by linking CUDA 12 shared objects to CUDA 13, for example: -> ```bash -> ln -sf /usr/local/cuda/targets/x86_64-linux/lib/libnvrtc.so.13.0.48 /usr/local/cuda/targets/x86_64-linux/lib/libnvrtc.so.12 -> export LD_LIBRARY_PATH="/usr/local/cuda/targets/x86_64-linux/lib:$LD_LIBRARY_PATH" -> ``` - 1. Docker + NVIDIA Container Runtime 2. A correctly configured `HF_TOKEN` for access to HuggingFace models. - The current examples and testing primarily use the diff --git a/qa/L0_dlpack_multi_gpu/test.sh b/qa/L0_dlpack_multi_gpu/test.sh index b2fc239b92..65dd2ad0b3 100755 --- a/qa/L0_dlpack_multi_gpu/test.sh +++ b/qa/L0_dlpack_multi_gpu/test.sh @@ -44,7 +44,7 @@ pip3 uninstall -y torch pip3 install torch==2.3.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html # Install CuPy for testing non_blocking compute streams -pip3 install cupy-cuda12x +pip3 install cupy-cuda13x if [ ${CUDA_VERSION%%.*} -gt 12 ]; then curl -L https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/linux-x86_64/cuda_nvrtc-linux-x86_64-12.9.86-archive.tar.xz \ diff --git a/qa/L0_python_api/test.sh b/qa/L0_python_api/test.sh index a29af894dc..f8d38414cc 100755 --- a/qa/L0_python_api/test.sh +++ b/qa/L0_python_api/test.sh @@ -27,11 +27,6 @@ pip3 install pytest-asyncio==0.23.8 -# Create CUDA compatibility symlink for CuPy -# TODO: Remove patch once CuPy supports CUDA 13 -ln -sf /usr/local/cuda/targets/x86_64-linux/lib/libnvrtc.so.13.0.48 /usr/local/cuda/targets/x86_64-linux/lib/libnvrtc.so.12 -export LD_LIBRARY_PATH="/usr/local/cuda/targets/x86_64-linux/lib:$LD_LIBRARY_PATH" - RET=0 set +e diff --git a/src/python/setup.py b/src/python/setup.py index 067ff61035..2c7c12a9ee 100755 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -71,7 +71,7 @@ def get_tag(self): "_c/triton_bindings.pyi", ] -gpu_extras = ["cupy-cuda12x"] +gpu_extras = ["cupy-cuda13x"] test_extras = ["pytest"] all_extras = gpu_extras + test_extras