@@ -400,7 +400,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
400
400
"exactly equal." , mamba_padding_pct )
401
401
402
402
403
- class DeepseekV3ForCausalLM (VerifyAndUpdateConfig ):
403
+ class DeepseekV32ForCausalLM (VerifyAndUpdateConfig ):
404
404
405
405
@classmethod
406
406
def verify_and_update_config (cls , vllm_config : "VllmConfig" ) -> None :
@@ -409,20 +409,20 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
409
409
"""
410
410
hf_config = vllm_config .model_config .hf_config
411
411
412
+ # Mirror the check in vllm/model_executor/models/deepseek_v2.py
412
413
is_v32 = hasattr (hf_config , "index_topk" )
414
+ assert is_v32
413
415
414
- if is_v32 :
415
- # For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
416
- # "auto")
417
- cache_config = vllm_config .cache_config
418
- if cache_config .cache_dtype == "auto" or \
419
- cache_config .cache_dtype .startswith ("fp8" ):
420
- cache_config .cache_dtype = "fp8_ds_mla"
421
- logger .info (
422
- "Using custom fp8 kv-cache format for DeepSeekV3.2" )
423
- if cache_config .cache_dtype == "bfloat16" :
424
- cache_config .cache_dtype = "auto"
425
- logger .info ("Using bfloat16 kv-cache for DeepSeekV3.2" )
416
+ # For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
417
+ # "auto")
418
+ cache_config = vllm_config .cache_config
419
+ if cache_config .cache_dtype == "auto" or \
420
+ cache_config .cache_dtype .startswith ("fp8" ):
421
+ cache_config .cache_dtype = "fp8_ds_mla"
422
+ logger .info ("Using custom fp8 kv-cache format for DeepSeekV3.2" )
423
+ if cache_config .cache_dtype == "bfloat16" :
424
+ cache_config .cache_dtype = "auto"
425
+ logger .info ("Using bfloat16 kv-cache for DeepSeekV3.2" )
426
426
427
427
428
428
MODELS_CONFIG_MAP : dict [str , type [VerifyAndUpdateConfig ]] = {
@@ -441,5 +441,5 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
441
441
"MambaForCausalLM" : MambaModelConfig ,
442
442
"Mamba2ForCausalLM" : MambaModelConfig ,
443
443
"FalconMambaForCausalLM" : MambaModelConfig ,
444
- "DeepseekV3ForCausalLM " : DeepseekV3ForCausalLM ,
444
+ "DeepseekV32ForCausalLM " : DeepseekV32ForCausalLM ,
445
445
}
0 commit comments