We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent cc43fce commit 3676c94Copy full SHA for 3676c94
vllm/v1/attention/backends/mla/indexer.py
@@ -171,7 +171,7 @@ def get_max_prefill_buffer_size(vllm_config: VllmConfig):
171
172
class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder):
173
cudagraph_support: ClassVar[AttentionCGSupport] = \
174
- AttentionCGSupport.UNIFORM_BATCH
+ AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
175
176
reorder_batch_threshold: int = 1
177
0 commit comments