Skip to content

Support vl rl #2921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,25 @@ class MoEPhase(Enum):
PREFILL = 1
DECODER = 2

class ErnieArchitectures:
"""Helper class for ERNIE architecture check."""

ARCHITECTURES = {
"Ernie4_5_ForCausalLM",
"Ernie4_5_MoeForCausalLM",
"Ernie4_5_VLMoeForConditionalGeneration"
}

@classmethod
def contains_ernie_arch(cls, architectures):
"""Check if any ERNIE architecture is present in the given architectures."""
return any(arch in architectures for arch in cls.ARCHITECTURES)

@classmethod
def is_ernie_arch(cls, architecture):
"""Check if the given architecture is an ERNIE architecture."""
return architecture in cls.ARCHITECTURES

PRETRAINED_INIT_CONFIGURATION = {
"rope_theta" : 10000.0,
"num_key_value_heads" : -1,
Expand Down Expand Up @@ -108,9 +127,10 @@ def __init__(
self.vision_config = PretrainedConfig.from_dict(self.vision_config)

self.ori_vocab_size = self.vocab_size
if "Ernie4_5_ForCausalLM" in self.architectures or "Ernie4_5_MoeForCausalLM" in self.architectures:
if ErnieArchitectures.contains_ernie_arch(self.architectures):
self.ori_vocab_size = args["ori_vocab_size"]


class ParallelConfig:
"""Configuration for the distributed execution."""
def __init__(
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from fastdeploy.engine.config import ModelConfig
from fastdeploy.reasoning import ReasoningParserManager
from fastdeploy.config import ErnieArchitectures


class InputPreprocessor:
Expand Down Expand Up @@ -71,8 +72,7 @@ def create_processor(self):
self.reasoning_parser)
architectures = ModelConfig(self.model_name_or_path).architectures
if not self.enable_mm:
if "Ernie4_5_MoeForCausalLM" not in architectures \
and "Ernie4_5_ForCausalLM" not in architectures:
if not ErnieArchitectures.contains_ernie_arch(architectures):
from fastdeploy.input.text_processor import DataProcessor
self.processor = DataProcessor(
model_name_or_path=self.model_name_or_path, reasoning_parser_obj=reasoning_parser_obj)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
from concurrent.futures import ThreadPoolExecutor

from fastdeploy.config import FDConfig
from fastdeploy.config import FDConfig, ErnieArchitectures
from fastdeploy.engine.request import Request
from fastdeploy.utils import llm_logger

Expand Down Expand Up @@ -268,8 +268,7 @@ def _get_tokenizer_hf(self):
"""
try:
architectures = self.fd_config.model_config.architectures
if "Ernie4_5_MoeForCausalLM" not in architectures \
and "Ernie4_5_ForCausalLM" not in architectures:
if not ErnieArchitectures.contains_ernie_arch(architectures):

from transformers import AutoTokenizer, PreTrainedTokenizerFast
tokenizer = AutoTokenizer.from_pretrained(
Expand Down
10 changes: 5 additions & 5 deletions fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def __init__(self, fd_config: FDConfig, layer_id: int,

self.num_shared_experts = fd_config.model_config.moe_num_shared_experts
if self.num_shared_experts > 0:
self.share_experts = Ernie4_5_VLMLP(
self.shared_experts = Ernie4_5_VLMLP(
fd_config=fd_config,
intermediate_size=self.num_shared_experts *
fd_config.model_config.moe_intermediate_size[0],
Expand Down Expand Up @@ -193,11 +193,11 @@ def load_state_dict(self, state_dict):
if self.text_fused_moe.moe_use_gate_correction_bias:
state_dict.pop(self.text_fused_moe.gate_correction_bias_key)
if self.num_shared_experts > 0:
self.share_experts.load_state_dict(state_dict)
self.shared_experts.load_state_dict(state_dict)

def forward(self, hidden_states: paddle.Tensor, vl_moe_meta: VLMoEMeta):
if self.num_shared_experts > 0:
share_experts_out = self.share_experts(hidden_states)
shared_experts_out = self.shared_experts(hidden_states)
if vl_moe_meta.image_input is not None:
text_image_gather_scatter(
hidden_states,
Expand All @@ -222,7 +222,7 @@ def forward(self, hidden_states: paddle.Tensor, vl_moe_meta: VLMoEMeta):
else:
hidden_states = self.text_fused_moe(hidden_states)
if self.num_shared_experts > 0:
hidden_states += share_experts_out
hidden_states += shared_experts_out
if self.tp_size > 1:
tensor_model_parallel_all_reduce(hidden_states)
return hidden_states
Expand Down Expand Up @@ -759,4 +759,4 @@ def get_vison_parallel_split_mappings(num_layers: int):
config.vision_config.get("depth")
)

return {**mappings, **vision_mappings}
return {**mappings, **vision_mappings}
Loading