44import torch
55
66from lmdeploy .vl .model .base import VISION_MODELS , VisonModel
7- from lmdeploy .vl .utils import hash_multimodal_data
87
98
109def check_qwen_vl_deps_install ():
@@ -26,6 +25,7 @@ class Qwen2VLModel(VisonModel):
2625 """Qwen2VL model."""
2726
2827 _arch = ['Qwen2VLForConditionalGeneration' , 'Qwen2_5_VLForConditionalGeneration' ]
28+ support_prefix_caching : bool = False
2929
3030 def build_preprocessor (self ):
3131 check_qwen_vl_deps_install ()
@@ -41,20 +41,13 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]:
4141 outputs = []
4242 for image , params in images :
4343 image = image .convert ('RGB' )
44- hash_value = None
45- if self .enable_prefix_caching :
46- hash_value = hash_multimodal_data (model_id = self .model_path , image = image , params = params )
4744 item = dict (type = 'image' , image = image )
4845 item .update ({key : params [key ] for key in params .keys () if key in optional_keys })
4946 image_inputs , _ = process_vision_info ([dict (content = [item ])])
5047 result = self .processor .image_processor (images = image_inputs , videos = None , return_tensors = 'pt' )
5148 merge_length = self .processor .image_processor .merge_size ** 2
5249 image_tokens = result ['image_grid_thw' ].prod (dim = 1 ) // merge_length
53- result .update (
54- dict (image_size = image .size ,
55- image_tokens = image_tokens ,
56- image_token_id = self .image_token_id ,
57- hash_value = hash_value ))
50+ result .update (dict (image_size = image .size , image_tokens = image_tokens , image_token_id = self .image_token_id ))
5851 outputs .append (result )
5952 messages .append (dict (role = 'preprocess' , content = outputs ))
6053 return messages
0 commit comments