11# Copyright (c) OpenMMLab. All rights reserved.
22from .base import INPUT_MODELS
3+ from .gpt_oss import GptOssReader
34from .internlm2 import InternLM2Reader
45from .llama import LlamaModel , LlamaReader
5- from .qwen import Qwen3MoeReader
6+ from .qwen import Qwen3MoeReader , Qwen3Reader
67
78
89class InternVLReader (LlamaReader ):
@@ -34,8 +35,59 @@ def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_
3435 super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
3536
3637
38+ class InternVL3d5Reader (Qwen3Reader ):
39+ attn_layer_prefix = 'language_model.model.layers'
40+ attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
41+ tok_embeddings_key = 'language_model.model.embed_tokens.weight'
42+ norm_weight_key = 'language_model.model.norm.weight'
43+ output_weight_key = 'language_model.lm_head.weight'
44+
45+ def __init__ (self , new_params : dict , unused_params : dict , last_bin : bool , model_cfg : dict , ** kwargs ):
46+ model_cfg = model_cfg .get ('llm_config' ) or model_cfg .get ('text_config' )
47+ super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
48+
49+
50+ class InternVL3d5Qwen3MoEReader (Qwen3MoeReader ):
51+ attn_layer_prefix = 'language_model.model.layers'
52+ attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
53+ tok_embeddings_key = 'language_model.model.embed_tokens.weight'
54+ norm_weight_key = 'language_model.model.norm.weight'
55+ output_weight_key = 'language_model.lm_head.weight'
56+
57+ def __init__ (self , new_params : dict , unused_params : dict , last_bin : bool , model_cfg : dict , ** kwargs ):
58+ model_cfg = model_cfg .get ('llm_config' ) or model_cfg .get ('text_config' )
59+ super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
60+
61+
62+ class InternVL3d5GptOSSReader (GptOssReader ):
63+ attn_layer_prefix = 'language_model.model.layers'
64+ attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
65+ tok_embeddings_key = 'language_model.model.embed_tokens.weight'
66+ norm_weight_key = 'language_model.model.norm.weight'
67+ output_weight_key = 'language_model.lm_head.weight'
68+
69+ def __init__ (self , new_params : dict , unused_params : dict , last_bin : bool , model_cfg : dict , ** kwargs ):
70+ model_cfg = model_cfg .get ('llm_config' ) or model_cfg .get ('text_config' )
71+ super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
72+
73+
3774class InternS1Reader (Qwen3MoeReader ):
38- """InternVL3Reader for InternVL+Qwen3MoE model."""
75+ """InternS1Reader for internlm/InternS1 model."""
76+
77+ attn_layer_prefix = 'model.language_model.layers'
78+ attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
79+ tok_embeddings_key = 'model.language_model.embed_tokens.weight'
80+ norm_weight_key = 'model.language_model.norm.weight'
81+ output_weight_key = 'lm_head.weight'
82+
83+ def __init__ (self , new_params : dict , unused_params : dict , last_bin : bool , model_cfg : dict , ** kwargs ):
84+ model_cfg = model_cfg .get ('text_config' )
85+ if model_cfg is None :
86+ raise ValueError (f'Miss "text_config" in model config: { model_cfg } ' )
87+ super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
88+
89+
90+ class InternS1MiniReader (Qwen3Reader ):
3991
4092 attn_layer_prefix = 'model.language_model.layers'
4193 attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
@@ -58,14 +110,22 @@ def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
58110 super ().__init__ (model_path , tokenizer_path , ** kwargs )
59111 from transformers import AutoConfig
60112 config = AutoConfig .from_pretrained (model_path , trust_remote_code = True )
113+
114+ arch = config .architectures [0 ]
115+ if arch == 'InternVLChatModel' :
116+ relations = dict (InternLM2ForCausalLM = ('internlm2' , InternVL2Reader ),
117+ LlamaForCausalLM = ('llama' , InternVLReader ),
118+ Qwen2ForCausalLM = ('qwen2' , InternVLReader ),
119+ Qwen3MoeForCausalLM = ('qwen3-moe' , InternVL3d5Qwen3MoEReader ),
120+ Qwen3ForCausalLM = ('qwen3' , InternVL3d5Reader ),
121+ GptOssForCausalLM = ('gpt-oss' , InternVL3d5GptOSSReader ))
122+ elif arch == 'InternS1ForConditionalGeneration' :
123+ relations = dict (Qwen3MoeForCausalLM = ('qwen3-moe' , InternS1Reader ),
124+ Qwen3ForCausalLM = ('qwen3' , InternS1MiniReader ))
125+ else :
126+ raise ValueError ('unsupported model arch {arch}' )
61127 self .llm_config = getattr (config , 'llm_config' , None ) or getattr (config , 'text_config' , None )
62128 arch = self .llm_config .architectures [0 ]
63- relations = dict (
64- InternLM2ForCausalLM = ('internlm2' , InternVL2Reader ),
65- LlamaForCausalLM = ('llama' , InternVLReader ),
66- Qwen2ForCausalLM = ('qwen2' , InternVLReader ),
67- Qwen3MoeForCausalLM = ('qwen3-moe' , InternS1Reader ),
68- )
69129 llm_model , self .Reader = relations [arch ]
70130 self .llm_model = INPUT_MODELS .get (llm_model )(model_path = model_path , tokenizer_path = tokenizer_path , ** kwargs )
71131
0 commit comments