Skip to content

Commit f8b5db6

Browse files
authored
Removed warning and override of mxfp6 for internal use (#277)
compilation fix and enabled mxfp6 for vision encoder --------- Signed-off-by: Amit Raj <[email protected]>
1 parent 33a4b51 commit f8b5db6

File tree

1 file changed

+2
-23
lines changed

1 file changed

+2
-23
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@
5252
from QEfficient.utils.cache import to_hashable
5353
from QEfficient.utils.logging_utils import logger
5454

55-
MODELS_WITH_ACCURACY_ISSUE_FOR_MXFP6 = ["MllamaForConditionalGeneration"]
56-
5755

5856
class QEFFTransformersBase(QEFFBaseModel):
5957
"""
@@ -627,17 +625,12 @@ def compile(
627625
):
628626
self.export()
629627

630-
if mxfp6_matmul and self.model_name in MODELS_WITH_ACCURACY_ISSUE_FOR_MXFP6:
631-
logger.warning(
632-
"Due to accuracy issues of vision model fixing it's precision to fp16, while language model will be compiled for mxfp6"
633-
)
634-
635628
self.vision_model._compile(
636629
compile_dir,
637630
compile_only=True,
638631
specializations=specializations["vision"],
639632
convert_to_fp16=True,
640-
mxfp6_matmul=False,
633+
mxfp6_matmul=mxfp6_matmul,
641634
mdp_ts_num_devices=num_devices,
642635
aic_num_cores=num_cores,
643636
custom_io=custom_io_vision,
@@ -946,11 +939,6 @@ def compile(
946939
if output_name.endswith("_RetainedState"):
947940
custom_io[output_name] = kv_cache_dtype
948941

949-
if self.model_name in MODELS_WITH_ACCURACY_ISSUE_FOR_MXFP6 and mxfp6_matmul:
950-
logger.warning(
951-
f"It is advised to use fp16 precision during compilation for {self.model.__class__.__name__} to avoid accuracy issues, got mxfp6_matmul=True"
952-
)
953-
954942
self._compile(
955943
onnx_path,
956944
compile_dir,
@@ -1147,16 +1135,7 @@ class QEFFAutoModelForImageTextToText:
11471135

11481136
_hf_auto_class = AutoModelForImageTextToText
11491137

1150-
def __new__(self, model: nn.Module, kv_offload: Optional[bool] = None, **kwargs):
1151-
if model.config.architectures[0] in MODELS_WITH_ACCURACY_ISSUE_FOR_MXFP6 and not kv_offload:
1152-
# For models with mxfp6 accuracy issue, we will use kv_offload=True by default
1153-
if kv_offload is None:
1154-
kv_offload = True
1155-
else:
1156-
logger.warning(f"Advised to use kv_offload=True for {model.__class__.__name__}")
1157-
elif kv_offload is None:
1158-
kv_offload = False
1159-
1138+
def __new__(self, model: nn.Module, kv_offload: Optional[bool] = True, **kwargs):
11601139
if kv_offload:
11611140
return _QEffAutoModelForImageTextToTextDualQPC(model, **kwargs)
11621141
else:

0 commit comments

Comments
 (0)