diff --git a/vllm_spyre/platform.py b/vllm_spyre/platform.py index 1ccda6f94..c4452313e 100644 --- a/vllm_spyre/platform.py +++ b/vllm_spyre/platform.py @@ -146,6 +146,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: if envs_spyre.VLLM_SPYRE_ENABLE_PROMPT_LOGPROBS: raise ValueError("Prompt logprobs not supported with " \ "continuous batching") + if (vllm_config.model_config.quantization + and vllm_config.scheduler_config.max_num_seqs == 1): + raise ValueError( + "Batch size 1 not supported for fp8 continuous batching.") else: # Static batching or embedding model. # Override --max-num-seqs to the biggest warmup batch size