Skip to content

Commit d46103a

Browse files
authored
support internvl3.5 (InternLM#3886)
* Fix interns1 LLM mapping for turbomind engine (InternLM#3848) * support internvl3.5 * update docs
1 parent d5b2716 commit d46103a

File tree

7 files changed

+77
-11
lines changed

7 files changed

+77
-11
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
159159
<li>InternVL2 (1B-76B)</li>
160160
<li>InternVL2.5(MPO) (1B-78B)</li>
161161
<li>InternVL3 (1B-78B)</li>
162+
<li>InternVL3.5 (1B-241BA28B)</li>
162163
<li>Intern-S1 (241B)</li>
163164
<li>Intern-S1-mini (8.3B)</li>
164165
<li>Mono-InternVL (2B)</li>

README_ja.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
157157
<li>InternVL2 (1B-76B)</li>
158158
<li>InternVL2.5(MPO) (1B-78B)</li>
159159
<li>InternVL3 (1B-78B)</li>
160+
<li>InternVL3.5 (1B-241BA28B)</li>
160161
<li>Intern-S1 (241B)</li>
161162
<li>Intern-S1-mini (8.3B)</li>
162163
<li>Mono-InternVL (2B)</li>

README_zh-CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
161161
<li>InternVL2 (1B-76B)</li>
162162
<li>InternVL2.5(MPO) (1B-78B)</li>
163163
<li>InternVL3 (1B-78B)</li>
164+
<li>InternVL3.5 (1B-241BA28B)</li>
164165
<li>Intern-S1 (241B)</li>
165166
<li>Intern-S1-mini (8.3B)</li>
166167
<li>Mono-InternVL (2B)</li>

docs/en/supported_models/supported_models.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
4040
| InternVL2<sup>\[2\]</sup> | 1 - 2B, 8B - 76B | MLLM | Yes | Yes\* | Yes\* | Yes |
4141
| InternVL2.5(MPO)<sup>\[2\]</sup> | 1 - 78B | MLLM | Yes | Yes\* | Yes\* | Yes |
4242
| InternVL3<sup>\[2\]</sup> | 1 - 78B | MLLM | Yes | Yes\* | Yes\* | Yes |
43+
| InternVL3.5<sup>\[3\]</sup> | 1 - 241BA28B | MLLM | Yes | Yes\* | Yes\* | No |
4344
| ChemVLM | 8B - 26B | MLLM | Yes | Yes | Yes | Yes |
4445
| MiniCPM-Llama3-V-2_5 | - | MLLM | Yes | Yes | Yes | Yes |
4546
| MiniCPM-V-2_6 | - | MLLM | Yes | Yes | Yes | Yes |
@@ -103,6 +104,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
103104
| InternVL2 | 1B-76B | MLLM | Yes | Yes | Yes | - | - |
104105
| InternVL2.5(MPO) | 1B-78B | MLLM | Yes | Yes | Yes | - | - |
105106
| InternVL3 | 1B-78B | MLLM | Yes | Yes | Yes | - | - |
107+
| InternVL3.5 | 1B-241BA28B | MLLM | Yes | Yes | Yes | No | No |
106108
| Mono-InternVL<sup>\[1\]</sup> | 2B | MLLM | Yes | Yes | Yes | - | - |
107109
| ChemVLM | 8B-26B | MLLM | Yes | Yes | No | - | - |
108110
| Gemma2 | 9B-27B | LLM | Yes | Yes | Yes | - | - |

docs/zh_cn/supported_models/supported_models.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
| InternVL2 | 1-2B, 8B - 76B | MLLM | Yes | Yes\* | Yes\* | Yes |
4141
| InternVL2.5(MPO)<sup>\[2\]</sup> | 1 - 78B | MLLM | Yes | Yes\* | Yes\* | Yes |
4242
| InternVL3<sup>\[2\]</sup> | 1 - 78B | MLLM | Yes | Yes\* | Yes\* | Yes |
43+
| InternVL3.5<sup>\[3\]</sup> | 1 - 241BA28B | MLLM | Yes | Yes\* | Yes\* | No |
4344
| ChemVLM | 8B - 26B | MLLM | Yes | Yes | Yes | Yes |
4445
| MiniCPM-Llama3-V-2_5 | - | MLLM | Yes | Yes | Yes | Yes |
4546
| MiniCPM-V-2_6 | - | MLLM | Yes | Yes | Yes | Yes |
@@ -103,6 +104,7 @@
103104
| InternVL2 | 1B-76B | MLLM | Yes | Yes | Yes | - | - |
104105
| InternVL2.5(MPO) | 1B-78B | MLLM | Yes | Yes | Yes | - | - |
105106
| InternVL3 | 1B-78B | MLLM | Yes | Yes | Yes | - | - |
107+
| InternVL3.5 | 1B-241BA28B | MLLM | Yes | Yes | Yes | No | No |
106108
| Mono-InternVL<sup>\[1\]</sup> | 2B | MLLM | Yes\* | Yes | Yes | - | - |
107109
| ChemVLM | 8B-26B | MLLM | Yes | Yes | No | - | - |
108110
| Gemma2 | 9B-27B | LLM | Yes | Yes | Yes | - | - |

lmdeploy/turbomind/deploy/source_model/internvl.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
from .base import INPUT_MODELS
3+
from .gpt_oss import GptOssReader
34
from .internlm2 import InternLM2Reader
45
from .llama import LlamaModel, LlamaReader
5-
from .qwen import Qwen3MoeReader
6+
from .qwen import Qwen3MoeReader, Qwen3Reader
67

78

89
class InternVLReader(LlamaReader):
@@ -34,8 +35,59 @@ def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_
3435
super().__init__(new_params, unused_params, last_bin, model_cfg, **kwargs)
3536

3637

38+
class InternVL3d5Reader(Qwen3Reader):
39+
attn_layer_prefix = 'language_model.model.layers'
40+
attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
41+
tok_embeddings_key = 'language_model.model.embed_tokens.weight'
42+
norm_weight_key = 'language_model.model.norm.weight'
43+
output_weight_key = 'language_model.lm_head.weight'
44+
45+
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_cfg: dict, **kwargs):
46+
model_cfg = model_cfg.get('llm_config') or model_cfg.get('text_config')
47+
super().__init__(new_params, unused_params, last_bin, model_cfg, **kwargs)
48+
49+
50+
class InternVL3d5Qwen3MoEReader(Qwen3MoeReader):
51+
attn_layer_prefix = 'language_model.model.layers'
52+
attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
53+
tok_embeddings_key = 'language_model.model.embed_tokens.weight'
54+
norm_weight_key = 'language_model.model.norm.weight'
55+
output_weight_key = 'language_model.lm_head.weight'
56+
57+
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_cfg: dict, **kwargs):
58+
model_cfg = model_cfg.get('llm_config') or model_cfg.get('text_config')
59+
super().__init__(new_params, unused_params, last_bin, model_cfg, **kwargs)
60+
61+
62+
class InternVL3d5GptOSSReader(GptOssReader):
63+
attn_layer_prefix = 'language_model.model.layers'
64+
attn_layer_patten = r'language_model\.model\.layers\.([0-9]+).'
65+
tok_embeddings_key = 'language_model.model.embed_tokens.weight'
66+
norm_weight_key = 'language_model.model.norm.weight'
67+
output_weight_key = 'language_model.lm_head.weight'
68+
69+
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_cfg: dict, **kwargs):
70+
model_cfg = model_cfg.get('llm_config') or model_cfg.get('text_config')
71+
super().__init__(new_params, unused_params, last_bin, model_cfg, **kwargs)
72+
73+
3774
class InternS1Reader(Qwen3MoeReader):
38-
"""InternVL3Reader for InternVL+Qwen3MoE model."""
75+
"""InternS1Reader for internlm/InternS1 model."""
76+
77+
attn_layer_prefix = 'model.language_model.layers'
78+
attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
79+
tok_embeddings_key = 'model.language_model.embed_tokens.weight'
80+
norm_weight_key = 'model.language_model.norm.weight'
81+
output_weight_key = 'lm_head.weight'
82+
83+
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_cfg: dict, **kwargs):
84+
model_cfg = model_cfg.get('text_config')
85+
if model_cfg is None:
86+
raise ValueError(f'Miss "text_config" in model config: {model_cfg}')
87+
super().__init__(new_params, unused_params, last_bin, model_cfg, **kwargs)
88+
89+
90+
class InternS1MiniReader(Qwen3Reader):
3991

4092
attn_layer_prefix = 'model.language_model.layers'
4193
attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
@@ -58,14 +110,22 @@ def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
58110
super().__init__(model_path, tokenizer_path, **kwargs)
59111
from transformers import AutoConfig
60112
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
113+
114+
arch = config.architectures[0]
115+
if arch == 'InternVLChatModel':
116+
relations = dict(InternLM2ForCausalLM=('internlm2', InternVL2Reader),
117+
LlamaForCausalLM=('llama', InternVLReader),
118+
Qwen2ForCausalLM=('qwen2', InternVLReader),
119+
Qwen3MoeForCausalLM=('qwen3-moe', InternVL3d5Qwen3MoEReader),
120+
Qwen3ForCausalLM=('qwen3', InternVL3d5Reader),
121+
GptOssForCausalLM=('gpt-oss', InternVL3d5GptOSSReader))
122+
elif arch == 'InternS1ForConditionalGeneration':
123+
relations = dict(Qwen3MoeForCausalLM=('qwen3-moe', InternS1Reader),
124+
Qwen3ForCausalLM=('qwen3', InternS1MiniReader))
125+
else:
126+
raise ValueError('unsupported model arch {arch}')
61127
self.llm_config = getattr(config, 'llm_config', None) or getattr(config, 'text_config', None)
62128
arch = self.llm_config.architectures[0]
63-
relations = dict(
64-
InternLM2ForCausalLM=('internlm2', InternVL2Reader),
65-
LlamaForCausalLM=('llama', InternVLReader),
66-
Qwen2ForCausalLM=('qwen2', InternVLReader),
67-
Qwen3MoeForCausalLM=('qwen3-moe', InternS1Reader),
68-
)
69129
llm_model, self.Reader = relations[arch]
70130
self.llm_model = INPUT_MODELS.get(llm_model)(model_path=model_path, tokenizer_path=tokenizer_path, **kwargs)
71131

lmdeploy/turbomind/supported_models.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,8 @@ def is_supported(model_path: str):
8282
import os
8383

8484
def _is_head_dim_supported(cfg):
85-
num_attn_head = cfg.num_attention_heads
86-
hidden_size = cfg.hidden_size
87-
return (hidden_size // num_attn_head) in [128, 64]
85+
head_dim = cfg.head_dim if hasattr(cfg, 'head_dim') else cfg.hidden_size // cfg.num_attention_heads
86+
return head_dim in [128, 64]
8887

8988
support_by_turbomind = False
9089
triton_model_path = os.path.join(model_path, 'triton_models')

0 commit comments

Comments
 (0)