pytorch · tom-arm · Jun 26, 2026
@@ -11,10 +11,12 @@
 import pytest
 import torch
 import torch.nn.functional as F
+from executorch.backends.arm.ao_ext import MXFPOpConfig
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.models.Qwen3_VL.qwen3_vl_test_config import (
     get_qwen3_vl_2b_instruct_checkpoint_config,
 )
+from executorch.backends.arm.test.ops.mxfp.common import MXFPTosaPipelineFP
 from executorch.backends.arm.test.tester.test_pipeline import (
     TosaPipelineFP,
     VgfPipeline,
@@ -25,6 +27,7 @@
 )
 
 input_t = Tuple[torch.Tensor, ...]
+aten_op_mxfp_linear = "torch.ops.tosa_mxfp.linear.default"
 
 
 def _make_qwen3_vl_2b_instruct_layer_config():
@@ -104,6 +107,10 @@ def _to_bfloat16_model_and_floating_inputs(
     )
 
 
+def _is_linear(module: torch.nn.Module, _fqn: str) -> bool:
+    return isinstance(module, torch.nn.Linear)
+
+
 class TextModelWrapper(Qwen3VLModelTestModule):
     def __init__(self, config) -> None:
         super().__init__()
@@ -300,3 +307,35 @@ def test_qwen3_vl_full_models_vgf_no_quant_bf16(test_case: Qwen3VLModelTestCase)
             tosa_spec="TOSA-1.0+FP+bf16",
         )
         pipeline.run()
+
+
+@pytest.mark.slow
+def test_qwen3_vl_text_model_tosa_mxfp8_bf16():
+    # The Qwen 3 VL FP8 model only quantizes the TextModel
+    model, inputs = TextModelWrapper.prepare_model_and_inputs()
+    model, inputs = _to_bfloat16_model_and_floating_inputs(model, inputs)
+    mxfp_config = MXFPOpConfig(weight_dtype=torch.float8_e4m3fn)
+    with torch.no_grad():
+        pipeline = MXFPTosaPipelineFP[input_t](
+            model,
+            inputs,
+            aten_op=aten_op_mxfp_linear,
+            exir_op=[],
+            filter_fn=_is_linear,
+            frobenius_threshold=0.1,
+            cosine_threshold=0.98,
+            mxfp_config=mxfp_config,
+            tosa_version="1.1",
+            tosa_extensions=["bf16", "mxfp"],
+        )
+        # Check all linear layers are converted to MXFP
+        linear_count = sum(
+            _is_linear(submodule, name) for name, submodule in model.named_modules()
+        )
+        pipeline.add_stage_after(
+            "export",
+            pipeline.tester.check_count,
+            {aten_op_mxfp_linear: linear_count},
+            suffix="mxfp_linear",
+        )
+        pipeline.run()