diff --git a/.github/workflows/sglang_benchmark_workflow.yaml b/.github/workflows/sglang_benchmark_workflow.yaml index 2a424151b8d..80ea95602b3 100644 --- a/.github/workflows/sglang_benchmark_workflow.yaml +++ b/.github/workflows/sglang_benchmark_workflow.yaml @@ -159,6 +159,18 @@ jobs: exit 1 fi + - name: Run performance benchmark + continue-on-error: true + timeout-minutes: 60 + run: | + model_name=${{ matrix.model }} + if [ "$model_name" == "Qwen3-Omni" ]; then + docker exec sglang_test bash -c "scripts/ci/sglang_benchmark_workflow.sh performance $model_name /models/Qwen/Qwen3-Omni-30B-A3B-Instruct/ 4 1" + else + echo "Skip performance benchmark for model_name: ${model_name}" + exit 0 + fi + - name: Clean Up if: always() run: diff --git a/scripts/ci/sglang_benchmark_workflow.sh b/scripts/ci/sglang_benchmark_workflow.sh index a2acbc1e135..458eddfeb4a 100755 --- a/scripts/ci/sglang_benchmark_workflow.sh +++ b/scripts/ci/sglang_benchmark_workflow.sh @@ -153,16 +153,26 @@ elif [[ "${TYPE}" == "evaluation" ]]; then elif [[ "${TYPE}" == "performance" ]]; then echo echo "========== STARTING PERFORMANCE BENCHMARK ==========" - python3 -m sglang.bench_serving \ - --backend sglang-oai-chat \ - --dataset-name image \ - --image-count 1 \ - --image-resolution 800x800 \ - --random-input-len 1000 \ - --random-output-len 2000 \ - --max-concurrency 64 \ - --num-prompts 192 \ - | tee performance_benchmark_${model_name}_TP${TP}_EP${EP}.log + if [[ "${model_name}" == "Qwen3-Omni" ]]; then + python3 -m sglang.bench_serving \ + --backend sglang-oai-chat \ + --host localhost \ + --port 9000 \ + --model "${model_path}" \ + --dataset-name image \ + --image-count 10 \ + --image-resolution 960x1280 \ + --random-input-len 8000 \ + --random-output-len 500 \ + --max-concurrency 1 \ + --num-prompts 128 \ + --flush-cache \ + --skip-special-tokens \ + 2>&1 | tee performance_benchmark_${model_name}_TP${TP}_EP${EP}.log + else + echo "Unknown model_name: ${model_name}" + exit 1 + fi else echo "Unknown TYPE: ${TYPE}"