diff --git a/docker/Makefile b/docker/Makefile index b51ae8dfc25..def1798c091 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -192,7 +192,6 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE) jenkins-rockylinux8_%: STAGE = tritondevel jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda -# [TODO] Update to NVIDIA CUDA 13.0.2 when it's available jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8 rockylinux8_%: STAGE = tritondevel diff --git a/examples/models/contrib/dit/vae_decoder_trt.py b/examples/models/contrib/dit/vae_decoder_trt.py index 1374dbcbfa6..31803a6690d 100644 --- a/examples/models/contrib/dit/vae_decoder_trt.py +++ b/examples/models/contrib/dit/vae_decoder_trt.py @@ -34,15 +34,18 @@ def export_onnx(self, onnxFile): *self.latent_shape).cuda() self.pytorch_model.cuda().eval() with torch.inference_mode(): - torch.onnx.export(self.pytorch_model, - latent, - onnxFile, - opset_version=17, - input_names=['input'], - output_names=['output'], - dynamic_axes={'input': { - 0: 'batch' - }}) + torch.onnx.export( + self.pytorch_model, + latent, + onnxFile, + opset_version=17, + input_names=['input'], + output_names=['output'], + dynamic_axes={'input': { + 0: 'batch' + }}, + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) def generate_trt_engine(self, onnxFile, planFile): print(f"Start exporting TRT model to {planFile}!") diff --git a/examples/models/core/qwenvl/vit_onnx_trt.py b/examples/models/core/qwenvl/vit_onnx_trt.py index 4b2f197db37..a993c87b47d 100644 --- a/examples/models/core/qwenvl/vit_onnx_trt.py +++ b/examples/models/core/qwenvl/vit_onnx_trt.py @@ -89,7 +89,8 @@ def export_onnx(self, onnx_file_path, pretrained_model_path, image_url): dynamic_axes={"input": { 0: "batch" }}, - ) + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) release_gc() # Further release memory print( f"Export to ONNX file successfully! The ONNX file stays in {onnx_file_path}" diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index e21837b1d8f..84daf168518 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -2358,7 +2358,8 @@ def launchTestJobs(pipeline, testFilter) def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa" trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb") - trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update && apt-get install -y cuda-toolkit-13-0") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0") } // Extra PyTorch CUDA 13.0 install for all bare-metal environments (Default PyTorch is for CUDA 12.8) if (values[6]) { diff --git a/requirements.txt b/requirements.txt index 0dc90b39a7a..2c469ae3971 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,8 +19,6 @@ pandas h5py==3.12.1 StrEnum sentencepiece>=0.1.99 -# WAR for tensorrt depending on the archived nvidia-cuda-runtime-cu13 package -nvidia-cuda-runtime-cu13==0.0.0a0 tensorrt~=10.13.0 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0. torch>=2.9.0a0,<=2.9.0 diff --git a/tensorrt_llm/tools/multimodal_builder.py b/tensorrt_llm/tools/multimodal_builder.py index de3943c5634..54cf819d1f4 100644 --- a/tensorrt_llm/tools/multimodal_builder.py +++ b/tensorrt_llm/tools/multimodal_builder.py @@ -163,13 +163,16 @@ def export_onnx(model, logger.log(trt.Logger.INFO, f"Exporting onnx to {onnx_dir}/{onnx_name}") os.makedirs(onnx_dir, exist_ok=True) - torch.onnx.export(model, - input, - f'{onnx_dir}/{onnx_name}', - opset_version=17, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes) + torch.onnx.export( + model, + input, + f'{onnx_dir}/{onnx_name}', + opset_version=17, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) def build_trt_engine(model_type, diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 4d3c94e452e..4b70aecdac2 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -355,9 +355,6 @@ triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-T triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136) accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8 SKIP (https://nvbugs/5606233) examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://nvbugs/5606233) +accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache] SKIP (https://nvbugs/5606266) +examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5606268) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197) -disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5628952) -cpp/test_e2e.py::test_benchmarks[t5-90] SKIP (https://nvbugs/5630196) -accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5630700) -accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend SKIP (https://nvbugs/5628952) -accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2] SKIP (https://nvbugs/5628952)