Skip to content

Commit fa179ab

Browse files
DarkLight1337yewentao256
authored andcommitted
[CI/Build] Replace vllm.entrypoints.openai.api_server entrypoint with vllm serve command (#25967)
Signed-off-by: DarkLight1337 <[email protected]> Signed-off-by: yewentao256 <[email protected]>
1 parent 5c8a4a2 commit fa179ab

File tree

22 files changed

+101
-66
lines changed

22 files changed

+101
-66
lines changed

.buildkite/nightly-benchmarks/scripts/launch-server.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,18 +181,14 @@ launch_vllm_server() {
181181
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
182182
echo "Key 'fp8' exists in common params. Use neuralmagic fp8 model for convenience."
183183
model=$(echo "$common_params" | jq -r '.neuralmagic_quantized_model')
184-
server_command="python3 \
185-
-m vllm.entrypoints.openai.api_server \
184+
server_command="vllm serve $model \
186185
-tp $tp \
187-
--model $model \
188186
--port $port \
189187
$server_args"
190188
else
191189
echo "Key 'fp8' does not exist in common params."
192-
server_command="python3 \
193-
-m vllm.entrypoints.openai.api_server \
190+
server_command="vllm serve $model \
194191
-tp $tp \
195-
--model $model \
196192
--port $port \
197193
$server_args"
198194
fi

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ run_serving_tests() {
365365
continue
366366
fi
367367

368-
server_command="$server_envs python3 \
369-
-m vllm.entrypoints.openai.api_server \
368+
server_command="$server_envs vllm serve \
370369
$server_args"
371370

372371
# run the server

.buildkite/scripts/run-benchmarks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ vllm bench throughput --input-len 256 --output-len 256 --output-json throughput_
1818
bench_throughput_exit_code=$?
1919

2020
# run server-based benchmarks and upload the result to buildkite
21-
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
21+
vllm serve meta-llama/Llama-2-7b-chat-hf &
2222
server_pid=$!
2323
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
2424

benchmarks/disagg_benchmarks/disagg_overhead_benchmark.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,15 @@ benchmark() {
5555
output_len=$2
5656

5757

58-
CUDA_VISIBLE_DEVICES=0 python3 \
59-
-m vllm.entrypoints.openai.api_server \
60-
--model $model \
58+
CUDA_VISIBLE_DEVICES=0 vllm serve $model \
6159
--port 8100 \
6260
--max-model-len 10000 \
6361
--gpu-memory-utilization 0.6 \
6462
--kv-transfer-config \
6563
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
6664

6765

68-
CUDA_VISIBLE_DEVICES=1 python3 \
69-
-m vllm.entrypoints.openai.api_server \
70-
--model $model \
66+
CUDA_VISIBLE_DEVICES=1 vllm serve $model \
7167
--port 8200 \
7268
--max-model-len 10000 \
7369
--gpu-memory-utilization 0.6 \

benchmarks/disagg_benchmarks/disagg_performance_benchmark.sh

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,12 @@ wait_for_server() {
3838
launch_chunked_prefill() {
3939
model="meta-llama/Meta-Llama-3.1-8B-Instruct"
4040
# disagg prefill
41-
CUDA_VISIBLE_DEVICES=0 python3 \
42-
-m vllm.entrypoints.openai.api_server \
43-
--model $model \
41+
CUDA_VISIBLE_DEVICES=0 vllm serve $model \
4442
--port 8100 \
4543
--max-model-len 10000 \
4644
--enable-chunked-prefill \
4745
--gpu-memory-utilization 0.6 &
48-
CUDA_VISIBLE_DEVICES=1 python3 \
49-
-m vllm.entrypoints.openai.api_server \
50-
--model $model \
46+
CUDA_VISIBLE_DEVICES=1 vllm serve $model \
5147
--port 8200 \
5248
--max-model-len 10000 \
5349
--enable-chunked-prefill \
@@ -62,18 +58,14 @@ launch_chunked_prefill() {
6258
launch_disagg_prefill() {
6359
model="meta-llama/Meta-Llama-3.1-8B-Instruct"
6460
# disagg prefill
65-
CUDA_VISIBLE_DEVICES=0 python3 \
66-
-m vllm.entrypoints.openai.api_server \
67-
--model $model \
61+
CUDA_VISIBLE_DEVICES=0 vllm serve $model \
6862
--port 8100 \
6963
--max-model-len 10000 \
7064
--gpu-memory-utilization 0.6 \
7165
--kv-transfer-config \
7266
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
7367

74-
CUDA_VISIBLE_DEVICES=1 python3 \
75-
-m vllm.entrypoints.openai.api_server \
76-
--model $model \
68+
CUDA_VISIBLE_DEVICES=1 vllm serve $model \
7769
--port 8200 \
7870
--max-model-len 10000 \
7971
--gpu-memory-utilization 0.6 \

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,5 +565,5 @@ ENTRYPOINT ["./sagemaker-entrypoint.sh"]
565565

566566
FROM vllm-openai-base AS vllm-openai
567567

568-
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
568+
ENTRYPOINT ["vllm", "serve"]
569569
#################### OPENAI API SERVER ####################

docker/Dockerfile.cpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,4 +177,4 @@ RUN --mount=type=cache,target=/root/.cache/uv \
177177
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
178178
uv pip install dist/*.whl
179179

180-
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
180+
ENTRYPOINT ["vllm", "serve"]

docker/Dockerfile.ppc64le

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,4 +314,4 @@ WORKDIR /workspace/
314314

315315
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
316316

317-
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
317+
ENTRYPOINT ["vllm", "serve"]

docker/Dockerfile.s390x

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,4 +309,4 @@ USER 2000
309309
WORKDIR /home/vllm
310310

311311
# Set the default entrypoint
312-
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
312+
ENTRYPOINT ["vllm", "serve"]

docker/Dockerfile.xpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ RUN --mount=type=cache,target=/root/.cache/pip \
6969

7070
# install development dependencies (for testing)
7171
RUN python3 -m pip install -e tests/vllm_test_utils
72-
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
72+
ENTRYPOINT ["vllm", "serve"]

0 commit comments

Comments
 (0)