Skip to content

Commit 2f54ca6

Browse files
committed
[Model] Add support for deepseek-vl2-tiny model (vllm-project#12068)
Signed-off-by: Isotr0py <[email protected]>
1 parent ba222b5 commit 2f54ca6

File tree

6 files changed

+22
-21
lines changed

6 files changed

+22
-21
lines changed

Diff for: docs/source/models/supported_models.md

+2-3
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ See [this page](#generative-models) for more information on how to use generativ
618618
* - `DeepseekVLV2ForCausalLM`
619619
- DeepSeek-VL2
620620
- T + I<sup>+</sup>
621-
- `deepseek-ai/deepseek-vl2-tiny`(WIP), `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note)
621+
- `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note)
622622
-
623623
- ✅︎
624624
- ✅︎
@@ -768,9 +768,8 @@ See [this page](#generative-models) for more information on how to use generativ
768768
<sup>+</sup> Multiple items can be inputted per text prompt for this modality.
769769

770770
````{note}
771-
The `deepseek-ai/deepseek-vl2-tiny` is not supported yet.
772-
773771
To use `DeepSeek-VL2` series models, you need to install a fork version `deepseek_vl2` package:
772+
774773
```shell
775774
pip install git+https://github.com/Isotr0py/DeepSeek-VL2.git
776775
```

Diff for: examples/offline_inference/vision_language.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def run_chameleon(question: str, modality: str):
7070
def run_deepseek_vl2(question: str, modality: str):
7171
assert modality == "image"
7272

73-
model_name = "deepseek-ai/deepseek-vl2-small"
73+
model_name = "deepseek-ai/deepseek-vl2-tiny"
7474

7575
llm = LLM(model=model_name,
7676
max_model_len=4096,

Diff for: examples/offline_inference/vision_language_multi_image.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData:
5555

5656

5757
def load_deepseek_vl2(question: str, image_urls: List[str]):
58-
model_name = "deepseek-ai/deepseek-vl2-small"
58+
model_name = "deepseek-ai/deepseek-vl2-tiny"
5959

6060
llm = LLM(model=model_name,
6161
max_model_len=4096,

Diff for: tests/models/decoder_only/vision_language/test_models.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import pytest
1111
from transformers import AutoModelForVision2Seq
12+
from transformers import __version__ as TRANSFORMERS_VERSION
1213
from transformers.utils import is_flash_attn_2_available
1314

1415
from vllm.platforms import current_platform
@@ -189,30 +190,27 @@
189190
dtype="bfloat16",
190191
),
191192
"deepseek_vl_v2": VLMTestInfo(
192-
models=["deepseek-ai/deepseek-vl2-small"],
193+
models=["deepseek-ai/deepseek-vl2-tiny"],
193194
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
194-
dtype="bfloat16",
195195
prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501
196196
max_model_len=4096,
197197
max_num_seqs=2,
198198
single_image_prompts=IMAGE_ASSETS.prompts({
199-
"stop_sign": "<image>\nWhat's the color of the stop sign and car?",
200-
"cherry_blossom": "<image>\nWhat's the color of the tower?",
199+
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
200+
"cherry_blossom": "<image>\nPlease infer the season with reason in details.", # noqa: E501
201201
}),
202-
multi_image_prompt="image_1:<image>\nimage_2:<image>\nDescribe the two images shortly.", # noqa: E501
202+
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
203203
vllm_runner_kwargs={"hf_overrides": {"architectures": ["DeepseekVLV2ForCausalLM"]}}, # noqa: E501
204-
image_size_factors=[(0.10, 0.15)],
205204
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
206205
postprocess_inputs=model_utils.cast_dtype_post_processor("images"),
207206
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
208207
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
209-
num_logprobs=5,
208+
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
210209
marks=[
211210
pytest.mark.skipif(
212-
not is_flash_attn_2_available(),
213-
reason="Model needs flash-attn for numeric convergence.",
214-
),
215-
large_gpu_mark(min_gb=48),
211+
TRANSFORMERS_VERSION >= "4.48.0",
212+
reason="HF model is not compatible with transformers>=4.48.0",
213+
)
216214
],
217215
),
218216
"fuyu": VLMTestInfo(

Diff for: tests/models/registry.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,7 @@ class _HfExamplesInfo:
181181
trust_remote_code=True),
182182
"ChatGLMForConditionalGeneration": _HfExamplesInfo("chatglm2-6b",
183183
is_available_online=False),
184-
# TODO(Isotr0py): Use deepseek-vl2-tiny for test after it's supported
185-
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-small"), # noqa: E501
184+
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny"), # noqa: E501
186185
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
187186
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m"),
188187
"InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",

Diff for: vllm/model_executor/models/deepseek_vl2.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
356356
f"Only 2D tile_tag is supported currently, got: {self.tile_tag}"
357357
)
358358

359+
if self.text_config.topk_method == "noaux_tc":
360+
architectures = ["DeepseekV3ForCausalLM"]
361+
elif not self.text_config.use_mla:
362+
architectures = ["DeepseekForCausalLM"]
363+
else:
364+
architectures = ["DeepseekV2ForCausalLM"]
365+
359366
self.language_model = init_vllm_registered_model(
360367
vllm_config=vllm_config,
361368
hf_config=self.text_config,
362369
prefix=maybe_prefix(prefix, "language"),
363-
architectures=["DeepseekV3ForCausalLM"]
364-
if self.text_config.topk_method == "noaux_tc" else
365-
["DeepseekV2ForCausalLM"],
370+
architectures=architectures,
366371
)
367372

368373
self.make_empty_intermediate_tensors = (

0 commit comments

Comments
 (0)