NVIDIA · SimengLiu-nv · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
@@ -192,7 +192,6 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
 jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
 jenkins-rockylinux8_%: STAGE = tritondevel
 jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
-# [TODO] Update to NVIDIA CUDA 13.0.2 when it's available
 jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
 
 rockylinux8_%: STAGE = tritondevel

@@ -34,15 +34,18 @@ def export_onnx(self, onnxFile):
                              *self.latent_shape).cuda()
         self.pytorch_model.cuda().eval()
         with torch.inference_mode():
-            torch.onnx.export(self.pytorch_model,
-                              latent,
-                              onnxFile,
-                              opset_version=17,
-                              input_names=['input'],
-                              output_names=['output'],
-                              dynamic_axes={'input': {
-                                  0: 'batch'
-                              }})
+            torch.onnx.export(
+                self.pytorch_model,
+                latent,
+                onnxFile,
+                opset_version=17,
+                input_names=['input'],
+                output_names=['output'],
+                dynamic_axes={'input': {
+                    0: 'batch'
+                }},
+                # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+                dynamo=False)
 
     def generate_trt_engine(self, onnxFile, planFile):
         print(f"Start exporting TRT model to {planFile}!")

@@ -89,7 +89,8 @@ def export_onnx(self, onnx_file_path, pretrained_model_path, image_url):
             dynamic_axes={"input": {
                 0: "batch"
             }},
-        )
+            # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+            dynamo=False)
         release_gc()  # Further release memory
         print(
             f"Export to ONNX file successfully! The ONNX file stays in {onnx_file_path}"

@@ -2358,7 +2358,8 @@ def launchTestJobs(pipeline, testFilter)
                             def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa"
                             trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
                             trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
-                            trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update && apt-get install -y cuda-toolkit-13-0")
+                            trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
+                            trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
                         }
                         // Extra PyTorch CUDA 13.0 install for all bare-metal environments (Default PyTorch is for CUDA 12.8)
                         if (values[6]) {

@@ -19,8 +19,6 @@ pandas
 h5py==3.12.1
 StrEnum
 sentencepiece>=0.1.99
-# WAR for tensorrt depending on the archived nvidia-cuda-runtime-cu13 package
-nvidia-cuda-runtime-cu13==0.0.0a0
 tensorrt~=10.13.0
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0.
 torch>=2.9.0a0,<=2.9.0

@@ -163,13 +163,16 @@ def export_onnx(model,
     logger.log(trt.Logger.INFO, f"Exporting onnx to {onnx_dir}/{onnx_name}")
     os.makedirs(onnx_dir, exist_ok=True)
 
-    torch.onnx.export(model,
-                      input,
-                      f'{onnx_dir}/{onnx_name}',
-                      opset_version=17,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes)
+    torch.onnx.export(
+        model,
+        input,
+        f'{onnx_dir}/{onnx_name}',
+        opset_version=17,
+        input_names=input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+        dynamo=False)
 
 
 def build_trt_engine(model_type,

@@ -355,9 +355,6 @@ triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-T
 triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
 accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8 SKIP (https://nvbugs/5606233)
 examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://nvbugs/5606233)
+accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache] SKIP (https://nvbugs/5606266)
+examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5606268)
 disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197)
-disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5628952)
-cpp/test_e2e.py::test_benchmarks[t5-90] SKIP (https://nvbugs/5630196)
-accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5630700)
-accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend SKIP (https://nvbugs/5628952)
-accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2] SKIP (https://nvbugs/5628952)