Skip to content

Commit 3c5974f

Browse files
authored
[CB][FP8] throw error for batch size 1 (#467)
### [CB][FP8] throw error for batch size 1 intermediate solution until #466 can be merged safely. Signed-off-by: Yannick Schnider <[email protected]>
1 parent 2adf5c3 commit 3c5974f

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

vllm_spyre/platform.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
146146
if envs_spyre.VLLM_SPYRE_ENABLE_PROMPT_LOGPROBS:
147147
raise ValueError("Prompt logprobs not supported with " \
148148
"continuous batching")
149+
if (vllm_config.model_config.quantization
150+
and vllm_config.scheduler_config.max_num_seqs == 1):
151+
raise ValueError(
152+
"Batch size 1 not supported for fp8 continuous batching.")
149153
else:
150154
# Static batching or embedding model.
151155
# Override --max-num-seqs to the biggest warmup batch size

0 commit comments

Comments
 (0)