Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/sglang_benchmark_workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,18 @@ jobs:
exit 1
fi

- name: Run performance benchmark
continue-on-error: true
timeout-minutes: 60
run: |
model_name=${{ matrix.model }}
if [ "$model_name" == "Qwen3-Omni" ]; then
docker exec sglang_test bash -c "scripts/ci/sglang_benchmark_workflow.sh performance $model_name /models/Qwen/Qwen3-Omni-30B-A3B-Instruct/ 4 2"
else
echo "Unknown model_name: ${model_name}"
exit 1
fi

- name: Clean Up
if: always()
run:
Expand Down
30 changes: 17 additions & 13 deletions scripts/ci/sglang_benchmark_workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ if [[ "${TYPE}" == "launch" ]]; then
elif [[ "${model_name}" == "Qwen3-Omni" ]]; then
echo "Qwen3-Omni-Server Launch"
export SGLANG_USE_AITER=1
export SGLANG_ROCM_USE_AITER_PA_ASM_PRESHUFFLE_LAYOUT=0
export SGLANG_VLM_CACHE_SIZE_MB=0
python3 -m sglang.launch_server \
--model-path "${model_path}" \
--host localhost \
Expand All @@ -68,11 +70,11 @@ if [[ "${TYPE}" == "launch" ]]; then
--ep-size ${EP} \
--trust-remote-code \
--mm-attention-backend "aiter_attn"\
Comment thread
sammysun0711 marked this conversation as resolved.
--chunked-prefill-size 16384 \
--chunked-prefill-size 32768 \
--mem-fraction-static 0.85 \
--disable-radix-cache \
--max-prefill-tokens 16384 \
--cuda-graph-max-bs 64 \
--max-prefill-tokens 32768 \
--cuda-graph-max-bs 8 \
--page-size 64 &
sglang_pid=$!
else
Expand Down Expand Up @@ -144,16 +146,18 @@ elif [[ "${TYPE}" == "evaluation" ]]; then
elif [[ "${TYPE}" == "performance" ]]; then
echo
echo "========== STARTING PERFORMANCE BENCHMARK =========="
python3 -m sglang.bench_serving \
--backend sglang-oai-chat \
--dataset-name image \
--image-count 1 \
--image-resolution 800x800 \
--random-input-len 1000 \
--random-output-len 2000 \
--max-concurrency 64 \
--num-prompts 192 \
| tee performance_benchmark_${model_name}_TP${TP}_EP${EP}.log
if [[ "${model_name}" == "Qwen3-Omni" ]]; then
python3 -m sglang.bench_serving \
Comment thread
sammysun0711 marked this conversation as resolved.
--backend sglang-oai-chat \
--dataset-name image \
--image-count 20 \
--image-resolution 960x1280 \
--random-input-len 8000 \
--random-output-len 500 \
--max-concurrency 2 \
--num-prompts 128 \
--skip-special-tokens \
| tee performance_benchmark_${model_name}_TP${TP}_EP${EP}.log

else
echo "Unknown TYPE: ${TYPE}"
Expand Down
Loading