Skip to content

Commit b400ff2

Browse files
authored
Hash bug fix pr (#384)
Adding model_card_name to hash Addition of model_card_name to hash helps to deal with error which occurs when two models sharing the same architecture but have different model card names and weights. In future if any more parameters are required to make the hash unique, they can be a part of the kwargs as implemeted here. --------- Signed-off-by: Dipankar Sarkar <[email protected]>
1 parent 24ecc1e commit b400ff2

File tree

4 files changed

+26
-13
lines changed

4 files changed

+26
-13
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs):
8585
kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
8686

8787
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
88-
return cls(model)
88+
return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path)
8989

9090
@property
9191
def model_name(self) -> str:
@@ -160,6 +160,7 @@ def __init__(self, model: nn.Module, **kwargs):
160160
super().__init__(model)
161161
self.model.config.use_cache = True
162162
self.num_layers = model.config.num_hidden_layers
163+
self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
163164

164165
@classmethod
165166
@with_replaced_quantizers
@@ -212,7 +213,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
212213
model, kv_offload=kv_offload
213214
)
214215

215-
return cls(model)
216+
return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path)
216217

217218
@property
218219
def model_hash(self) -> str:
@@ -226,6 +227,9 @@ def model_hash(self) -> str:
226227
mhash = hashlib.sha256()
227228
mhash.update(to_hashable(self.model.config.to_diff_dict()))
228229
mhash.update(to_hashable(self._transform_names()))
230+
231+
mhash.update(to_hashable(self.pretrained_model_name_or_path))
232+
229233
mhash = mhash.hexdigest()[:16]
230234
return mhash
231235

@@ -441,6 +445,7 @@ def model_hash(self) -> str:
441445
mhash.update(to_hashable(self.model.model.config.to_diff_dict()))
442446
mhash.update(to_hashable(self._transform_names()))
443447
mhash.update(to_hashable({"QEffVisionEncoderForTextImageToTextModel": True}))
448+
mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
444449
mhash = mhash.hexdigest()[:16]
445450
return mhash
446451

@@ -504,6 +509,7 @@ def model_hash(self) -> str:
504509
mhash.update(to_hashable(self.model.config.to_diff_dict()))
505510
mhash.update(to_hashable(self._transform_names()))
506511
mhash.update(to_hashable({"QEffCausalLMForTextImageToTextModel": True}))
512+
mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
507513
mhash = mhash.hexdigest()[:16]
508514
return mhash
509515

@@ -531,9 +537,9 @@ def __init__(
531537
raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.")
532538
self.model = model
533539
self.config = model.config
540+
self.model.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
534541
self.vision_model = QEffVisionEncoderForTextImageToTextModel(model)
535542
self.lang_model = QEffCausalLMForTextImageToTextModel(model)
536-
537543
self.input_shapes, self.output_names = None, None
538544

539545
@property
@@ -553,7 +559,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
553559

554560
kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
555561
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
556-
return cls(model, **kwargs)
562+
return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
557563

558564
@property
559565
def onnx_path(self):
@@ -878,6 +884,7 @@ def __init__(
878884
self.model.config.vision_config.use_flash_attn = "false"
879885
else:
880886
self.model.config.text_config.use_cache = True
887+
self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
881888

882889
@classmethod
883890
def from_pretrained(
@@ -900,7 +907,7 @@ def from_pretrained(
900907
config.vision_config.use_flash_attn = "false"
901908
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs)
902909

903-
return cls(model, **kwargs)
910+
return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
904911

905912
def export(
906913
self,
@@ -1139,6 +1146,7 @@ def model_hash(self) -> str:
11391146
mhash.update(to_hashable(self.model.config.to_diff_dict()))
11401147
mhash.update(to_hashable(self._transform_names()))
11411148
mhash.update(to_hashable({"QEFFAutoModelForImageTextToText1QPC": True}))
1149+
mhash.update(to_hashable(self.pretrained_model_name_or_path))
11421150
mhash = mhash.hexdigest()[:16]
11431151
return mhash
11441152

@@ -1254,7 +1262,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
12541262

12551263
kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
12561264
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
1257-
return cls(model, kv_offload=kv_offload, **kwargs)
1265+
return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
12581266

12591267

12601268
MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText}
@@ -1319,13 +1327,13 @@ def __init__(
13191327
)
13201328

13211329
super().__init__(model)
1322-
13231330
# Set use_cache=True to get KV values as output during ONNX export
13241331
self.model.config.use_cache = True
13251332
self.num_layers = model.config.num_hidden_layers
13261333
self.continuous_batching = continuous_batching
13271334
self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs)
13281335
self.is_tlm = transformed
1336+
self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
13291337

13301338
@property
13311339
def model_name(self) -> str:
@@ -1400,11 +1408,11 @@ def from_pretrained(
14001408
return MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP[model.__class__.__name__](
14011409
model, kv_offload=kv_offload
14021410
)
1403-
14041411
return cls(
14051412
model,
14061413
continuous_batching=continuous_batching,
14071414
qaic_config=qaic_config,
1415+
pretrained_model_name_or_path=pretrained_model_name_or_path,
14081416
**kwargs,
14091417
)
14101418

@@ -1416,6 +1424,7 @@ def model_hash(self) -> str:
14161424
mhash.update(to_hashable({"continuous_batching": self.continuous_batching}))
14171425
mhash.update(to_hashable({"is_tlm": self.is_tlm}))
14181426
mhash.update(to_hashable(self._transform_names()))
1427+
mhash.update(to_hashable(self.pretrained_model_name_or_path))
14191428
mhash = mhash.hexdigest()[:16]
14201429
return mhash
14211430

@@ -1756,6 +1765,7 @@ def __init__(self, model: nn.Module, **kwargs):
17561765
super().__init__(model)
17571766
self.model.config.use_cache = True
17581767
self.num_layers = model.config.num_hidden_layers
1768+
self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
17591769

17601770
@property
17611771
def model_hash(self) -> str:
@@ -1769,6 +1779,7 @@ def model_hash(self) -> str:
17691779
mhash = hashlib.sha256()
17701780
mhash.update(to_hashable(self.model.config.to_diff_dict()))
17711781
mhash.update(to_hashable(self._transform_names()))
1782+
mhash.update(to_hashable(self.pretrained_model_name_or_path))
17721783
mhash = mhash.hexdigest()[:16]
17731784
return mhash
17741785

tests/transformers/models/test_causal_lm_models.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
123123
pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch(model_hf)
124124

125125
is_tlm = False if num_speculative_tokens is None else True
126-
qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm)
126+
qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, pretrained_model_name_or_path=model_name)
127127

128128
pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)
129129

@@ -183,7 +183,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
183183
pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch_CB(model_hf)
184184
pytorch_hf_tokens = np.vstack(pytorch_hf_tokens)
185185

186-
qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm)
186+
qeff_model = QEFFAutoModelForCausalLM(
187+
model_hf, continuous_batching=True, is_tlm=is_tlm, pretrained_model_name_or_path=model_name
188+
)
187189
onnx_model_path = qeff_model.export()
188190

189191
if not get_available_device_id():
@@ -219,7 +221,7 @@ def test_causal_lm_export_with_deprecated_api(model_name):
219221
model_config["n_layer"] = 1
220222
model, _ = load_causal_lm_model(model_config)
221223
tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=model_name)
222-
qeff_model = QEFFAutoModelForCausalLM(model)
224+
qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name, pretrained_model_name_or_path=model_name)
223225
new_api_onnx_model_path = qeff_model.export()
224226
_, old_api_onnx_model_path = qualcomm_efficient_converter(
225227
model_name=model_name, model_kv=qeff_model, tokenizer=tokenizer

tests/transformers/models/test_embedding_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def check_embed_pytorch_vs_ort_vs_ai100(
4747
pt_outputs = pt_model(**inputs)
4848
pt_embeddings = pt_outputs[0][0].detach().numpy()
4949
# Pytorch transformed model
50-
qeff_model = QEFFAutoModel(pt_model)
50+
qeff_model = QEFFAutoModel(pt_model, pretrained_model_name_or_path=model_name)
5151
qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False)
5252
qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy()
5353
mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings))

tests/transformers/models/test_speech_seq2seq_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100(
314314

315315
pytorch_hf_tokens = run_seq2seq_pytorch_hf(model_hf, processor, data, sample_rate, ctx_len)
316316

317-
qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf)
317+
qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, pretrained_model_name_or_path=model_name)
318318

319319
pytorch_kv_tokens = run_seq2seq_pytorch_with_kv(qeff_model, processor, data, sample_rate, ctx_len)
320320

0 commit comments

Comments
 (0)