From 7e3166eca33e18e915ba80e9c89724426ff5ca23 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sun, 17 May 2026 19:44:41 -0400 Subject: [PATCH] Update dsv4-fp4-b300-sglang (+mtp) SGLang image to v0.5.12-cu130 --- .github/configs/nvidia-master.yaml | 4 ++-- perf-changelog.yaml | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index ff6386708..48fe6e6c6 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1986,7 +1986,7 @@ dsr1-fp8-b300-sglang: # until a B300-specific recipe ships. Prefix caching is disabled. # Parallelisms and concurrency ranges mirror dsv4-fp4-b200-vllm. dsv4-fp4-b300-sglang: - image: lmsysorg/sglang:deepseek-v4-b300@sha256:2fec8d7958bb0d53b50d7bf04d6ae6a7de8a35503775826e0550a45dd8c3ee15 + image: lmsysorg/sglang:v0.5.12-cu130 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b300 @@ -2027,7 +2027,7 @@ dsv4-fp4-b300-sglang: # dp-attn: true -> DP-attn + flashinfer_mxfp4 + chunked-prefill 32768 # + EAGLE (1,1,2) + mem-fraction 0.92 + max-running 256 dsv4-fp4-b300-sglang-mtp: - image: lmsysorg/sglang:deepseek-v4-b300@sha256:26e116bd211e300dbb76924d56c5cbe6cc3ee5ee2fe314859cb8774f5bc070f3 + image: lmsysorg/sglang:v0.5.12-cu130 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b300 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index fc763d93b..07bb3f38b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3022,3 +3022,10 @@ description: - "Update SGLang image from nightly-dev-cu13-20260518-c67b2870 to nightly-dev-cu13-20260519-dbac4647" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1492 + +- config-keys: + - dsv4-fp4-b300-sglang + - dsv4-fp4-b300-sglang-mtp + description: + - "Update SGLang image from SHA-pinned deepseek-v4-b300 custom build (20/18d old) to v0.5.12-cu130" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1455