diff --git a/scripts/ci/sglang_benchmark_workflow.sh b/scripts/ci/sglang_benchmark_workflow.sh index 9bd12540750c..074804267dd6 100755 --- a/scripts/ci/sglang_benchmark_workflow.sh +++ b/scripts/ci/sglang_benchmark_workflow.sh @@ -44,7 +44,8 @@ if [[ "${TYPE}" == "launch" ]]; then --max-prefill-tokens 32768 \ --cuda-graph-max-bs 256 \ --page-size 64 \ - --attention-backend triton & + --attention-backend triton \ + --max-running-requests 128 & sglang_pid=$! else echo "Unknown model_name: ${model_name}" @@ -140,4 +141,4 @@ else echo echo "========== SGLANG BENCHMARK ${TYPE} FAILED WITH EXIT CODE $exit_code ==========" exit $exit_code -fi \ No newline at end of file +fi