pytorch
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/rewrite_mxfp_linear.py‎
Lines changed: 17 additions & 2 deletions b/‎backends/arm/_passes/rewrite_mxfp_linear.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎backends/arm/ao_ext/mxfp.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/arm/ao_ext/mxfp.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/ao_ext/ops/mxfp_linear_op.py‎
Lines changed: 21 additions & 5 deletions b/‎backends/arm/ao_ext/ops/mxfp_linear_op.py‎
Lines changed: 21 additions & 5 deletions
diff --git a/‎backends/arm/operators/op_tosa_matmul_t_block_scaled.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/op_tosa_matmul_t_block_scaled.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/process_node.py‎
Lines changed: 59 additions & 28 deletions b/‎backends/arm/process_node.py‎
Lines changed: 59 additions & 28 deletions
diff --git a/‎backends/arm/test/misc/test_mxfp_linear_ao.py‎
Lines changed: 48 additions & 2 deletions b/‎backends/arm/test/misc/test_mxfp_linear_ao.py‎
Lines changed: 48 additions & 2 deletions
diff --git a/‎backends/arm/test/misc/test_process_node.py‎
Lines changed: 38 additions & 2 deletions b/‎backends/arm/test/misc/test_process_node.py‎
Lines changed: 38 additions & 2 deletions
@@ -18,6 +18,7 @@
 
 from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.backends.arm.constants import DQ_OPS, Q_OPS
+from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule, Node
@@ -49,6 +50,8 @@ def _ensure_uint8_io_only(self, graph_module: GraphModule) -> None:
                 continue
             if meta_val.dtype != torch.uint8:
                 continue
+            if node.meta.get(TosaSpecialDtype.meta_key()) == TosaSpecialDtype.FP4E2M1:
+                continue
             if node.op in ("placeholder", "output"):
                 continue
             if node.op == "call_function" and node.target == operator.getitem:
 
@@ -13,10 +13,22 @@
     create_node,
     get_first_fake_tensor,
 )
+from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 
 
+def _get_block_scaled_payload_dtype(qdata: torch.Tensor) -> torch.dtype:
+    if qdata.dtype == torch.uint8:
+        return torch.float4_e2m1fn_x2
+    return qdata.dtype
+
+
+def _mark_fp4_payload(node: torch.fx.Node, payload_dtype: torch.dtype) -> None:
+    if payload_dtype == torch.float4_e2m1fn_x2:
+        node.meta[TosaSpecialDtype.meta_key()] = TosaSpecialDtype.FP4E2M1
+
+
 class RewriteMXFPLinearPass(ArmPass):
     """Rewrite ``tosa_mxfp.linear`` into explicit TOSA MXFP operators.
 
@@ -90,6 +102,8 @@ def _create_block_scaled_inputs(
         input_fake = get_first_fake_tensor(input_node)
         weight_qdata_fake = get_first_fake_tensor(weight_qdata_node)
         weight_scale_fake = get_first_fake_tensor(weight_scale_node)
+        weight_dtype = _get_block_scaled_payload_dtype(weight_qdata_fake)
+        _mark_fp4_payload(weight_qdata_node, weight_dtype)
 
         batches = reduce(operator.mul, input_fake.shape[:-1], 1)
         input_reshape_shape = [1, batches, input_fake.shape[-1]]
@@ -109,13 +123,13 @@ def _create_block_scaled_inputs(
             graph=graph,
             op_target=exir_ops.backend.tosa.CAST_TO_BLOCK_SCALED.default,
             args=(input_reshaped, block_size),
-            kwargs={"output_dtype": weight_qdata_fake.dtype},
+            kwargs={"output_dtype": weight_dtype},
             from_node=mxfp_linear_node,
         )
         cast_node.meta["val"] = exir_ops.backend.tosa.CAST_TO_BLOCK_SCALED.default(
             get_first_fake_tensor(input_reshaped),
             block_size,
-            output_dtype=weight_qdata_fake.dtype,
+            output_dtype=weight_dtype,
         )
 
         input_qdata_node = create_node(
@@ -126,6 +140,7 @@ def _create_block_scaled_inputs(
             from_node=mxfp_linear_node,
         )
         input_qdata_node.meta["val"] = cast_node.meta["val"][0]
+        _mark_fp4_payload(input_qdata_node, weight_dtype)
 
         input_scale_node = create_node(
             graph=graph,
 
@@ -32,7 +32,11 @@ def block_size(self) -> int:
         return 32
 
     def __post_init__(self) -> None:
-        if self.weight_dtype not in (torch.float8_e4m3fn, torch.float8_e5m2):
+        if self.weight_dtype not in (
+            torch.float4_e2m1fn_x2,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        ):
             raise ValueError(f"Unsupported weight_dtype: {self.weight_dtype}")
         if not isinstance(self.weight_scaling_mode, ScaleCalculationMode):
             raise ValueError(
 
@@ -23,6 +23,19 @@
 )
 
 
+def _get_mx_elem_dtype(weight_qdata: torch.Tensor) -> torch.dtype:
+    if weight_qdata.dtype == torch.uint8:
+        return torch.float4_e2m1fn_x2
+    return weight_qdata.dtype
+
+
+def _get_num_input_features(weight_qdata: torch.Tensor) -> int:
+    num_input_features = weight_qdata.shape[-1]
+    if _get_mx_elem_dtype(weight_qdata) == torch.float4_e2m1fn_x2:
+        num_input_features *= 2
+    return num_input_features
+
+
 @torch.library.register_fake("tosa_mxfp::linear", lib=MXFP_TOSA_LIB)  # type: ignore[misc]
 def _mxfp_linear_fake(
     input: torch.Tensor,
@@ -39,15 +52,16 @@ def _mxfp_linear_fake(
         raise ValueError(
             f"Expected weight_qdata batch dim to be 1, got {weight_qdata.shape[0]}"
         )
-    if input.shape[-1] != weight_qdata.shape[-1]:
+    num_input_features = _get_num_input_features(weight_qdata)
+    if input.shape[-1] != num_input_features:
         raise ValueError(
             f"Input last dim {input.shape[-1]} must match linear in_features "
-            f"{weight_qdata.shape[-1]}"
+            f"{num_input_features}"
         )
     expected_scale_shape = (
         1,
         weight_qdata.shape[1],
-        weight_qdata.shape[-1] // block_size,
+        num_input_features // block_size,
     )
     if tuple(weight_scale.shape) != expected_scale_shape:
         raise ValueError(
@@ -92,17 +106,19 @@ def _mxfp_linear_cpu(
     if weight_qdata.ndim != 3 or weight_scale.ndim != 3:
         raise ValueError("Expected rank-3 weight tensors for MXFP linear")
 
+    elem_dtype = _get_mx_elem_dtype(weight_qdata)
+
     # Cast the input to block-scaled format and back again to match the
     # expected input format of the TOSA
     dequantized_input = _cast_to_block_scaled_cpu_ref(
         input,
-        weight_qdata.dtype,
+        elem_dtype,
         block_size,
     )
     dequantized_weight = to_dtype(
         weight_qdata,
         weight_scale,
-        weight_qdata.dtype,
+        elem_dtype,
         block_size,
         torch.float32,
     )
 
@@ -53,7 +53,7 @@ def define_node(
         validate_valid_dtype(
             self.target,
             [A_data, B_data],
-            [ts.DType.FP8E4M3, ts.DType.FP8E5M2],
+            [ts.DType.FP4E2M1, ts.DType.FP8E4M3, ts.DType.FP8E5M2],
             self.tosa_spec,
         )
         validate_valid_dtype(
 
@@ -56,14 +56,68 @@ def _tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray:
 
 
 def _prepare_const_values_for_tosa_dtype(
-    values: np.ndarray, tosa_dtype: ts.DType
+    values: np.ndarray, tosa_arg: TosaArg
 ) -> np.ndarray:
     """Normalize constant storage to the expected TOSA serializer dtype."""
-    if tosa_dtype == ts.DType.INT48 and values.dtype != np.int64:
+    if tosa_arg.dtype == ts.DType.INT48 and values.dtype != np.int64:
         return values.astype(np.int64)
     return values
 
 
+def _get_const_shape(values: np.ndarray, tosa_arg: TosaArg) -> list[int]:
+    """Return the TOSA logical shape for a serialized constant."""
+    if tosa_arg.dtype == ts.DType.FP4E2M1:
+        return normalize_symint(tosa_arg.shape)
+    return normalize_symint(values.shape)
+
+
+def _is_packed_fp4_const(values: np.ndarray, tosa_arg: TosaArg) -> bool:
+    """FP4 elements are pairwise in each byte of a uint8 tensor.
+
+    This function checks if the given values and TOSA argument represent a
+    packed FP4 constant.
+
+    """
+
+    return (
+        tosa_arg.dtype == ts.DType.FP4E2M1
+        and values.dtype == np.uint8
+        and values.shape[-1] * 2 == tosa_arg.shape[-1]
+    )
+
+
+def _add_const(
+    tosa_graph: Any,
+    values: np.ndarray,
+    tosa_arg: TosaArg,
+    name: str,
+) -> None:
+    """Add a constant, preserving packed FP4 storage when required."""
+    if _is_packed_fp4_const(values, tosa_arg):
+        # TOSA FP4 tensors have logical FP4 shape, but constants are stored as
+        # packed bytes (two values per byte). Add the raw bytes as INT8 first
+        # then set TOSA dtype and shape correctly on the tensor metadata.
+        tosa_graph.addConst(
+            normalize_symint(values.shape),
+            ts.DType.INT8,
+            values,
+            name=name,
+        )
+        tensor = tosa_graph.currRegion.currBasicBlock.tensors[name]
+        tensor.setDtype(ts.DType.FP4E2M1)
+        for dim, size in enumerate(normalize_symint(tosa_arg.shape)):
+            tensor.SetDimSize(dim, size)
+        return
+
+    prepared_values = _prepare_const_values_for_tosa_dtype(values, tosa_arg)
+    tosa_graph.addConst(
+        _get_const_shape(prepared_values, tosa_arg),
+        tosa_arg.dtype,
+        prepared_values,
+        name=name,
+    )
+
+
 def process_call_function(
     node: torch.fx.Node,
     tosa_graph: Any,
@@ -154,16 +208,7 @@ def process_inputs_to_parameters(
             f"{type(parameter_data).__name__}"
         )
     parameter_values = _tensor_to_numpy(parameter_data)
-    parameter_values = _prepare_const_values_for_tosa_dtype(
-        parameter_values, tosa_arg.dtype
-    )
-
-    tosa_graph.addConst(
-        normalize_symint(parameter_values.shape),
-        tosa_arg.dtype,
-        parameter_values,
-        name=tosa_arg.name,
-    )
+    _add_const(tosa_graph, parameter_values, tosa_arg, name=tosa_arg.name)
 
 
 def process_inputs_to_buffers(
@@ -188,14 +233,7 @@ def process_inputs_to_buffers(
             f"{type(buffer_data).__name__}"
         )
     buffer_values = _tensor_to_numpy(buffer_data)
-    buffer_values = _prepare_const_values_for_tosa_dtype(buffer_values, tosa_arg.dtype)
-
-    tosa_graph.addConst(
-        normalize_symint(buffer_values.shape),
-        tosa_arg.dtype,
-        buffer_values,
-        name=tosa_arg.name,
-    )
+    _add_const(tosa_graph, buffer_values, tosa_arg, name=tosa_arg.name)
 
 
 def process_inputs_to_lifted_tensor_constants(
@@ -217,14 +255,7 @@ def process_inputs_to_lifted_tensor_constants(
         f"{type(tensor).__name__}"
     )
     tensor_values = _tensor_to_numpy(tensor)
-    tensor_values = _prepare_const_values_for_tosa_dtype(tensor_values, tosa_arg.dtype)
-
-    tosa_graph.addConst(
-        normalize_symint(tensor_values.shape),
-        tosa_arg.dtype,
-        tensor_values,
-        name=tosa_arg.name,
-    )
+    _add_const(tosa_graph, tensor_values, tosa_arg, name=tosa_arg.name)
 
 
 def _is_submodule_input(
 
@@ -31,9 +31,45 @@ def test_mxfp_linear_quantize_swaps_module() -> None:
     assert tuple(model.linear.weight_scale.shape) == (1, 8, 1)
 
 
-def test_mxfp_linear_export_preserves_custom_op() -> None:
+def test_mxfp4_linear_quantize_swaps_module() -> None:
     model = LinearModule().eval()
-    to_mxfp(model, MXFPOpConfig())
+
+    to_mxfp(
+        model,
+        MXFPOpConfig(weight_dtype=torch.float4_e2m1fn_x2),
+    )
+
+    assert isinstance(model.linear, MXFPLinearOp)
+    assert model.linear.weight_qdata.dtype == torch.uint8
+    assert model.linear.weight_scale.dtype == torch.float8_e8m0fnu
+    assert tuple(model.linear.weight_qdata.shape) == (1, 8, 16)
+    assert tuple(model.linear.weight_scale.shape) == (1, 8, 1)
+
+
+def test_mxfp_linear_quantize_filter_fn_selects_modules() -> None:
+    class TwoLinearModule(torch.nn.Module):
+        def __init__(self) -> None:
+            super().__init__()
+            self.selected = torch.nn.Linear(32, 8)
+            self.skipped = torch.nn.Linear(32, 8)
+
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            return self.selected(x) + self.skipped(x)
+
+    def _is_selected_linear(module: torch.nn.Module, fqn: str) -> bool:
+        return isinstance(module, torch.nn.Linear) and fqn == "selected"
+
+    model = TwoLinearModule().eval()
+
+    to_mxfp(model, MXFPOpConfig(), filter_fn=_is_selected_linear)
+
+    assert isinstance(model.selected, MXFPLinearOp)
+    assert isinstance(model.skipped, torch.nn.Linear)
+
+
+def _test_mxfp_linear_export_preserves_custom_op(config: MXFPOpConfig) -> None:
+    model = LinearModule().eval()
+    to_mxfp(model, config)
 
     exported = export(model, (torch.randn(4, 32),), strict=False)
 
@@ -44,3 +80,13 @@ def test_mxfp_linear_export_preserves_custom_op() -> None:
     ]
 
     assert torch.ops.tosa_mxfp.linear.default in targets
+
+
+def test_mxfp_linear_export_preserves_custom_op() -> None:
+    _test_mxfp_linear_export_preserves_custom_op(MXFPOpConfig())
+
+
+def test_mxfp4_linear_export_preserves_custom_op() -> None:
+    _test_mxfp_linear_export_preserves_custom_op(
+        MXFPOpConfig(weight_dtype=torch.float4_e2m1fn_x2)
+    )
@@ -3,14 +3,18 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from types import SimpleNamespace
+from typing import cast
+
 import numpy as np
 import torch
 import tosa_serializer as ts
-from executorch.backends.arm.process_node import process_placeholder
-from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
+from executorch.backends.arm.process_node import _add_const, process_placeholder
+from executorch.backends.arm.tosa.mapping import TosaArg, TosaSpecialDtype
 from executorch.backends.arm.tosa.specification import TosaSpecification
 from executorch.exir import to_edge
 from torch._export.utils import is_param
+from tosa.TosaGraph import TosaGraph  # type: ignore[import-untyped]
 
 
 class Int32BiasModule(torch.nn.Module):
@@ -94,3 +98,35 @@ def test_process_placeholder_int48_normalizes_int32_const_values() -> None:
     assert tosa_graph.values is not None
     assert tosa_graph.values.dtype == np.int64
     assert tosa_graph.serialized_bytes == _expected_int48_bytes(module.bias)
+
+
+def test_add_const_fp4_in_packed_storage() -> None:
+    packed_values = np.array([0xDE, 0xFE, 0x6D, 0x55], dtype=np.uint8).reshape(
+        1,
+        1,
+        4,
+    )
+    tosa_arg = cast(
+        TosaArg,
+        SimpleNamespace(dtype=ts.DType.FP4E2M1, shape=(1, 1, 8)),
+    )
+    tosa_graph = ts.TosaSerializer()
+
+    _add_const(tosa_graph, packed_values, tosa_arg, name="fp4_weight")
+
+    graph = TosaGraph.GetRootAs(bytes(tosa_graph.serialize()), 0)
+    block = graph.Regions(0).Blocks(0)
+    tensors = {
+        block.Tensors(index).Name().decode(): block.Tensors(index)
+        for index in range(block.TensorsLength())
+    }
+    tensor = tensors["fp4_weight"]
+
+    assert tensor.Type() == ts.DType.FP4E2M1
+    assert [tensor.Shape(index) for index in range(tensor.ShapeLength())] == [1, 1, 8]
+    assert [tensor.Data(index) for index in range(tensor.DataLength())] == [
+        0xDE,
+        0xFE,
+        0x6D,
+        0x55,
+    ]
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ def define_node(`
`53`	`53`	`validate_valid_dtype(`
`54`	`54`	`self.target,`
`55`	`55`	`[A_data, B_data],`
`56`		`- [ts.DType.FP8E4M3, ts.DType.FP8E5M2],`
	`56`	`+ [ts.DType.FP4E2M1, ts.DType.FP8E4M3, ts.DType.FP8E5M2],`
`57`	`57`	`self.tosa_spec,`
`58`	`58`	`)`
`59`	`59`	`validate_valid_dtype(`