From 0e1f762c17a61e2ceb4e08bb3c135dedbf96dc20 Mon Sep 17 00:00:00 2001 From: hjjq <50634613+hjjq@users.noreply.github.com> Date: Wed, 20 May 2026 13:48:20 -0700 Subject: [PATCH 1/4] add recipes --- .github/configs/nvidia-master.yaml | 59 ++++++++ ...agg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml | 128 ++++++++++++++++++ .../8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml | 124 +++++++++++++++++ ...agg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml | 128 ++++++++++++++++++ perf-changelog.yaml | 7 + 5 files changed, 446 insertions(+) create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 43b8be880..681584205 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -8758,6 +8758,65 @@ dsv4-fp4-gb300-dynamo-vllm: ep: 16 dp-attn: true +dsv4-fp4-gb300-dynamo-vllm-mtp2: + image: vllm/vllm-openai:v0.21.0-ubuntu2404 + model: deepseek-ai/DeepSeek-V4-Pro + model-prefix: dsv4 + runner: gb300-nv + precision: fp4 + framework: dynamo-vllm + multinode: true + disagg: true + scenarios: + fixed-seq-len: + - isl: 8192 + osl: 1024 + search-space: + - conc-list: [1, 4, 8] + spec-decoding: mtp + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 1 + dp-attn: false + + - conc-list: [64, 128, 1024] + spec-decoding: mtp + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + + - conc-list: [512, 1024] + spec-decoding: mtp + prefill: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + dsv4-fp4-gb300-dynamo-sglang: image: lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647 model: deepseek-ai/DeepSeek-V4-Pro diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml new file mode 100644 index 000000000..2c7b5e060 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml @@ -0,0 +1,128 @@ +name: "svf-vllm-disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2" + +model: + path: "deepseek-v4-pro" + container: "vllm/vllm-openai:v0.21.0-ubuntu2404" + precision: "fp4" + +dynamo: + install: true + wheel: "1.2.0.dev20260426" + +setup_script: vllm-container-deps.sh + +slurm: + time_limit: "8:00:00" + +health_check: + max_attempts: 1440 + interval_seconds: 10 + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 1 + decode_workers: 1 + gpus_per_prefill: 4 + gpus_per_decode: 8 + +infra: + etcd_nats_dedicated_node: true + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + enable-ep-weight-filter: true + moe-backend: deep_gemm_mega_moe + enforce-eager: true + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 256 + max-num-batched-tokens: 16384 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + no-async-scheduling: true + block-size: 256 + gpu-memory-utilization: 0.9 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 8 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + enable-ep-weight-filter: true + moe-backend: deep_gemm_mega_moe + speculative-config: '{"method":"mtp","num_speculative_tokens":2}' + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 512 + max-cudagraph-capture-size: 512 + max-num-batched-tokens: 1024 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + block-size: 256 + compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}' + gpu-memory-utilization: 0.9 + stream-interval: 50 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "64x128x1024" + req_rate: "inf" + use_chat_template: true + custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" + +identity: + model: + repo: "deepseek-ai/DeepSeek-V4-Pro" + revision: "0366e4e064385807ea86b088a5c6c878ff23343b" + container: + image: "vllm/vllm-openai:v0.21.0-ubuntu2404" + frameworks: + dynamo: "1.2.0.dev20260426" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml new file mode 100644 index 000000000..993282a7a --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml @@ -0,0 +1,124 @@ +name: "svf-vllm-disagg-gb300-1p1d-dep4-tp4-mtp2" + +model: + path: "deepseek-v4-pro" + container: "vllm/vllm-openai:v0.21.0-ubuntu2404" + precision: "fp4" + +dynamo: + install: true + wheel: "1.2.0.dev20260426" + +setup_script: vllm-container-deps.sh + +slurm: + time_limit: "8:00:00" + +health_check: + max_attempts: 1440 + interval_seconds: 10 + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 1 + decode_workers: 1 + gpus_per_prefill: 4 + gpus_per_decode: 4 + +infra: + etcd_nats_dedicated_node: true + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + enable-ep-weight-filter: true + moe-backend: deep_gemm_mega_moe + enforce-eager: true + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 256 + max-num-batched-tokens: 16384 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + no-async-scheduling: true + block-size: 256 + gpu-memory-utilization: 0.9 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + pipeline-parallel-size: 1 + enable-expert-parallel: false + speculative-config: '{"method":"mtp","num_speculative_tokens":2}' + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 512 + max-cudagraph-capture-size: 512 + max-num-batched-tokens: 1024 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + block-size: 256 + compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}' + gpu-memory-utilization: 0.9 + stream-interval: 50 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "1x4x8" + req_rate: "inf" + use_chat_template: true + custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" + +identity: + model: + repo: "deepseek-ai/DeepSeek-V4-Pro" + revision: "0366e4e064385807ea86b088a5c6c878ff23343b" + container: + image: "vllm/vllm-openai:v0.21.0-ubuntu2404" + frameworks: + dynamo: "1.2.0.dev20260426" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml new file mode 100644 index 000000000..d3ad4fd14 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml @@ -0,0 +1,128 @@ +name: "svf-vllm-disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2" + +model: + path: "deepseek-v4-pro" + container: "vllm/vllm-openai:v0.21.0-ubuntu2404" + precision: "fp4" + +dynamo: + install: true + wheel: "1.2.0.dev20260426" + +setup_script: vllm-container-deps.sh + +slurm: + time_limit: "8:00:00" + +health_check: + max_attempts: 1440 + interval_seconds: 10 + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 2 + decode_nodes: 2 + prefill_workers: 2 + decode_workers: 1 + gpus_per_prefill: 4 + gpus_per_decode: 8 + +infra: + etcd_nats_dedicated_node: true + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + TILELANG_CLEANUP_TEMP_FILES: "1" + VLLM_USE_NCCL_SYMM_MEM: "1" + NCCL_CUMEM_ENABLE: "1" + NCCL_MNNVL_ENABLE: "1" + NCCL_NVLS_ENABLE: "1" + TORCH_SYMMMEM: "NVSHMEM" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + enable-ep-weight-filter: true + moe-backend: deep_gemm_mega_moe + enforce-eager: true + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 256 + max-num-batched-tokens: 16384 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + no-async-scheduling: true + block-size: 256 + gpu-memory-utilization: 0.9 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + served-model-name: "deepseek-ai/DeepSeek-V4-Pro" + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 8 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + enable-ep-weight-filter: true + moe-backend: deep_gemm_mega_moe + speculative-config: '{"method":"mtp","num_speculative_tokens":2}' + attention-config: '{"use_fp4_indexer_cache":true}' + max-model-len: 16384 + max-num-seqs: 512 + max-cudagraph-capture-size: 512 + max-num-batched-tokens: 1024 + trust-remote-code: true + no-enable-prefix-caching: true + no-enable-flashinfer-autotune: true + block-size: 256 + compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}' + gpu-memory-utilization: 0.9 + stream-interval: 50 + no-disable-hybrid-kv-cache-manager: true + enable-sleep-mode: true + tokenizer-mode: deepseek_v4 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "512x1024" + req_rate: "inf" + use_chat_template: true + custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" + +identity: + model: + repo: "deepseek-ai/DeepSeek-V4-Pro" + revision: "0366e4e064385807ea86b088a5c6c878ff23343b" + container: + image: "vllm/vllm-openai:v0.21.0-ubuntu2404" + frameworks: + dynamo: "1.2.0.dev20260426" diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c62d94781..851f0ec30 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3043,3 +3043,10 @@ description: - "Update SGLang image from v0.5.11-cu130 (5d old) to v0.5.12-cu130" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1475 + +- config-keys: + - dsv4-fp4-gb300-dynamo-vllm-mtp2 + description: + - "Add initial DeepSeek-V4-Pro FP4 8k/1k GB300 Dynamo vLLM MTP recipes" + - "1P/1D DEP4/TP4, 1P/1D DEP4/DEP8, 2P/1D DEP4/DEP8" + pr-link: TBD From b19081184396df56e261259598cf17b9e49eb75c Mon Sep 17 00:00:00 2001 From: hjjq <50634613+hjjq@users.noreply.github.com> Date: Wed, 20 May 2026 14:08:42 -0700 Subject: [PATCH 2/4] remove identity block --- .../8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml | 8 -------- .../deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml | 8 -------- .../8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml | 8 -------- 3 files changed, 24 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml index 2c7b5e060..c6f02d393 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep8-megamoe-mtp2.yaml @@ -118,11 +118,3 @@ benchmark: use_chat_template: true custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" -identity: - model: - repo: "deepseek-ai/DeepSeek-V4-Pro" - revision: "0366e4e064385807ea86b088a5c6c878ff23343b" - container: - image: "vllm/vllm-openai:v0.21.0-ubuntu2404" - frameworks: - dynamo: "1.2.0.dev20260426" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml index 993282a7a..002b23e17 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-tp4-mtp2.yaml @@ -114,11 +114,3 @@ benchmark: use_chat_template: true custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" -identity: - model: - repo: "deepseek-ai/DeepSeek-V4-Pro" - revision: "0366e4e064385807ea86b088a5c6c878ff23343b" - container: - image: "vllm/vllm-openai:v0.21.0-ubuntu2404" - frameworks: - dynamo: "1.2.0.dev20260426" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml index d3ad4fd14..a2cf29426 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-gb300-2p1d-dep4-dep8-megamoe-mtp2.yaml @@ -118,11 +118,3 @@ benchmark: use_chat_template: true custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer" -identity: - model: - repo: "deepseek-ai/DeepSeek-V4-Pro" - revision: "0366e4e064385807ea86b088a5c6c878ff23343b" - container: - image: "vllm/vllm-openai:v0.21.0-ubuntu2404" - frameworks: - dynamo: "1.2.0.dev20260426" From dec6b360ad7e94be21490a69b4b42423f17f5fb8 Mon Sep 17 00:00:00 2001 From: hjjq <50634613+hjjq@users.noreply.github.com> Date: Thu, 21 May 2026 11:23:06 -0700 Subject: [PATCH 3/4] gb300 runner: pin dsv4-vllm to aflowers/vllm-gb200-v0.20.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous pin (aflowers/gb200-dsv4-recipes) has a Makefile setup target that masks wget failures (uses ; instead of && and lacks set -e), so a transient GitHub release outage during the ETCD download silently leaves configs/etcd missing. The mount logic in srtctl runtime.py then skips the /configs bind mount, and the container fails with 'etcd binary not found: /configs/etcd'. vllm-gb200-v0.20.0's Makefile adds set -e to the recipe and wraps each binary download in an idempotent if-then-else block, so a failed wget aborts make setup loudly instead of cascading into a confusing container-time error. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- runners/launch_gb300-nv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index 4ef4709d2..d791ebee1 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -65,7 +65,7 @@ rm -rf "$SRT_REPO_DIR" if [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "dsv4" ]]; then git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" - git checkout aflowers/gb200-dsv4-recipes + git checkout aflowers/vllm-gb200-v0.20.0 mkdir -p recipes/vllm/deepseek-v4 cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4" recipes/vllm/deepseek-v4 else From 2f2b06aa8ae545c99d25d019630fa800f3eba7e1 Mon Sep 17 00:00:00 2001 From: hjjq <50634613+hjjq@users.noreply.github.com> Date: Fri, 22 May 2026 07:26:55 -0700 Subject: [PATCH 4/4] trigger ci