default using piecewise when mtp enabled for indexer

luccafong · luccafong · commit 3676c94e8151 · 2025-09-30T13:24:20.000-07:00
Signed-off-by: Lu Fang &lt;fanglu@fb.com&gt;
diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py
@@ -171,7 +171,7 @@ def get_max_prefill_buffer_size(vllm_config: VllmConfig):
 
 class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder):
     cudagraph_support: ClassVar[AttentionCGSupport] = \
-        AttentionCGSupport.UNIFORM_BATCH
+        AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
 
     reorder_batch_threshold: int = 1