diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py index c42a66d86912..fcbe6ff7c39f 100644 --- a/vllm/model_executor/models/deepseek_eagle.py +++ b/vllm/model_executor/models/deepseek_eagle.py @@ -50,6 +50,7 @@ def __init__( DeepseekV2DecoderLayer( vllm_config, prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"), + config=self.config, ) for i in range(self.config.num_hidden_layers) ]) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 03c43654d68f..76f1805aeec8 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -990,10 +990,11 @@ class DeepseekV2DecoderLayer(nn.Module): def __init__(self, vllm_config: VllmConfig, prefix: str, + config: Optional[DeepseekV2Config] = None, topk_indices_buffer: Optional[torch.Tensor] = None) -> None: super().__init__() - config = vllm_config.model_config.hf_config + config = config or vllm_config.model_config.hf_config model_config = vllm_config.model_config cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config