pytorch · rascani · May 13, 2026 · May 11, 2026 · May 11, 2026 · May 13, 2026
diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh
@@ -17,9 +17,9 @@ MODEL=$1
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
-# Quantization is the default for the cortex-m55+int8 target; run.sh's
+# Quantization is the default for the cortex-m55 target; run.sh's
 # arg parser only recognizes --no_quantize, so we omit any explicit flag.
 bash "${et_root_dir}/examples/arm/run.sh" \
     --model_name="${MODEL}" \
-    --target=cortex-m55+int8 \
+    --target=cortex-m55 \
     --bundleio
@@ -39,6 +39,7 @@
     ReplaceQuantNodesPass,
 )
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
+from executorch.backends.cortex_m.target_config import CortexMTargetConfig
 from executorch.devtools import BundledProgram, generate_etrecord
 from executorch.devtools.backend_debug import get_delegation_info
 from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
@@ -465,7 +466,16 @@ def forward(self, x):
     "TOSA-1.0+INT",
     "TOSA-1.0+FP",
     "TOSA-1.0+INT+int16",
-    "cortex-m55+int8",
+    "cortex-m0",
+    "cortex-m0plus",
+    "cortex-m3",
+    "cortex-m4",
+    "cortex-m7",
+    "cortex-m23",
+    "cortex-m33",
+    "cortex-m35p",
+    "cortex-m55",
+    "cortex-m85",
 ]
 
 
@@ -566,7 +576,7 @@ def _get_args():
         required=False,
         default="ethos-u55-128",
         choices=TARGETS,
-        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m55+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
+        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m<variant> (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
     )
     # TODO: Remove --evaluate and --evaluate_config completely after a suitable time.
     # They are deprecated and no longer functional in this script.
@@ -860,9 +870,13 @@ def _to_edge_cortex_m(
     model: GraphModule,
     example_inputs: Tuple[torch.Tensor],
     calibration_samples: Optional[List[Tuple[torch.Tensor, ...]]],
+    target_config: CortexMTargetConfig,
 ):
     """Cortex-M/CMSIS-NN compilation path with no delegation."""
-    logging.info("Using Cortex-M/CMSIS-NN compilation path (no delegation)")
+    logging.info(
+        f"Using Cortex-M/CMSIS-NN compilation path for cpu={target_config.cpu.name} "
+        f"backend={target_config.backend.name}"
+    )
 
     def _to_channels_last(x):
         if isinstance(x, torch.Tensor):
@@ -915,7 +929,9 @@ def _to_channels_last(x):
         ),
     )
 
-    pass_manager = CortexMPassManager(edge.exported_program())
+    pass_manager = CortexMPassManager(
+        edge.exported_program(), target_config=target_config
+    )
     edge._edge_programs["forward"] = pass_manager.transform()
 
     return model_quant, edge
@@ -1007,11 +1023,12 @@ def main() -> None:  # noqa: C901
     else:
         quant_mode = None
 
-    if args.target == "cortex-m55+int8":
+    if args.target.startswith("cortex-m"):
         # Cortex-M path: CMSIS-NN portable kernels, no delegation
+        target_config = CortexMTargetConfig.from_target_string(args.target)
         if args.delegate:
             logging.warning(
-                "--delegate is ignored for target 'cortex-m55+int8' "
+                f"--delegate is ignored for target {args.target!r} "
                 "(this target does not use delegated ops)."
             )
             args.delegate = False
@@ -1021,6 +1038,7 @@ def main() -> None:  # noqa: C901
             model,
             example_inputs,
             calibration_samples,
+            target_config,
         )
     elif args.delegate:
         # As we can target multiple output encodings, one must

@@ -36,6 +36,7 @@ def _ensure_cortex_m_dependencies() -> None:
 from .activation_fusion_pass import ActivationFusionPass  # noqa
 from .clamp_hardswish_pass import ClampHardswishPass  # noqa
 from .convert_to_cortex_m_pass import ConvertToCortexMPass  # noqa
+from .cortex_m_pass import CortexMPass  # noqa
 from .decompose_hardswish_pass import DecomposeHardswishPass  # noqa
 from .decompose_mean_pass import DecomposeMeanPass  # noqa
 from .quantized_clamp_activation_pass import QuantizedClampActivationPass  # noqa

@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.cortex_m.target_config import CortexMTargetConfig
+from executorch.exir.pass_base import ExportPass
+from torch.export import ExportedProgram
+
+
+class CortexMPass(ExportPass):
+    """Base class for passes that need the Cortex-M target config.
+
+    Passes that subclass this declare `exported_program` and `target_config`
+    in their `__init__`; `CortexMPassManager.transform()` injects both
+    automatically when running the pass list.
+    """
+
+    def __init__(
+        self,
+        exported_program: ExportedProgram,
+        target_config: CortexMTargetConfig,
+    ) -> None:
+        super().__init__()
+        self._exported_program = exported_program
+        self._target_config = target_config
+
+    @property
+    def exported_program(self) -> ExportedProgram:
+        return self._exported_program
+
+    @property
+    def target_config(self) -> CortexMTargetConfig:
+        return self._target_config
@@ -5,12 +5,13 @@
 
 
 import inspect
-from typing import Callable, cast, Optional, Type
+from typing import Any, Optional, Type
 
 from executorch.backends.arm._passes import (
     FoldAndAnnotateQParamsPass,
     ScalarsToAttributePass,
 )
+from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
 from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.transforms.replace_scalar_with_tensor import (
     ReplaceScalarWithTensorArgPass,
@@ -19,9 +20,6 @@
 from executorch.exir.pass_manager import PassManager
 from executorch.exir.program._program import _transform, lift_constant_tensor_pass
 from torch.export import ExportedProgram
-from torch.fx.passes.infra.pass_base import PassResult
-
-from torch.nn import Module
 
 from .activation_fusion_pass import ActivationFusionPass
 from .clamp_hardswish_pass import ClampHardswishPass
@@ -57,14 +55,33 @@ class CortexMPassManager(PassManager):
     ]
 
     def __init__(
-        self, exported_program, passes: Optional[list[PassClass]] = None
+        self,
+        exported_program: ExportedProgram | None,
+        passes: Optional[list[PassClass]] = None,
+        target_config: Optional[CortexMTargetConfig] = None,
     ) -> None:
+        """Initialize the Cortex-M pass manager.
+
+        Args:
+            exported_program: The exported program to transform. Required
+                before calling ``transform()``; may be ``None`` for callers
+                that only use ``transform_for_annotation()``.
+            passes: Optional override of the pass list. Defaults to
+                ``CortexMPassManager.pass_list``.
+            target_config: Compilation target for passes that need it.
+                Defaults to ``CortexMTargetConfig(cpu=CortexM.M55)``, which
+                resolves through cmsis_nn to the MVE backend — matching the
+                pre-config historical behaviour.
+        """
         super().__init__(passes=[])
         self.exported_program = exported_program
         # PassManager.passes is typed as callables; this manager stores pass classes which are initialized at transform time with the exported_program.
         self.passes: list[PassClass] = (  # type: ignore[assignment]
             passes if passes is not None else self.pass_list  # type: ignore[assignment]
         )
+        self.target_config: CortexMTargetConfig = target_config or CortexMTargetConfig(
+            cpu=CortexM.M55
+        )
 
     def transform_for_annotation(self, model):
         passes = self.pass_list_transform_for_annotation
@@ -73,18 +90,31 @@ def transform_for_annotation(self, model):
         return model
 
     def transform(self) -> ExportedProgram:
-        ep = self.exported_program
+        exported_program = self.exported_program
+        if not isinstance(exported_program, ExportedProgram):
+            raise ValueError(
+                f"{type(self).__name__}.transform() needs a real ExportedProgram, "
+                f"got {exported_program!r}"
+            )
+
         for pass_cls in self.passes:
+            if not isinstance(pass_cls, type):
+                raise ValueError(
+                    f"{type(self).__name__} expects pass classes, not instances; "
+                    f"got {pass_cls!r}"
+                )
+
             signature = inspect.signature(pass_cls)
+            kwargs: dict[str, Any] = {}
             if "exported_program" in signature.parameters:
-                ep_pass_ctor = cast(Callable[[ExportedProgram], ExportPass], pass_cls)
-                transform_pass = ep_pass_ctor(ep)
-            else:
-                transform_pass = pass_cls()
-            pass_callable = cast(Callable[[Module], PassResult], transform_pass)
-            ep = _transform(ep, pass_callable)
+                kwargs["exported_program"] = exported_program
+            if "target_config" in signature.parameters:
+                kwargs["target_config"] = self.target_config
+
+            transform_pass = pass_cls(**kwargs)
+            exported_program = _transform(exported_program, transform_pass)
 
         # All constant tensors should be lifted to buffers at this point, re-run
-        # lift_constant_tensor_pass in case new ones have been introduced by the passes above.
-        ep = lift_constant_tensor_pass(ep)
-        return ep
+        # lift_constant_tensor_pass in case new ones have been introduced.
+        exported_program = lift_constant_tensor_pass(exported_program)
+        return exported_program
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import auto, Enum
+from typing import Optional
+
+import cmsis_nn  # type: ignore[import-not-found, import-untyped]
+
+
+class CortexM(Enum):
+    """Cortex-M CPU variant. Names mirror cmsis_nn.CortexM so the cmsis_nn
+    enum can be looked up by name."""
+
+    M0 = auto()
+    M0PLUS = auto()
+    M3 = auto()
+    M4 = auto()
+    M7 = auto()
+    M23 = auto()
+    M33 = auto()
+    M35P = auto()
+    M55 = auto()
+    M85 = auto()
+
+
+# Per-CPU set of cmsis_nn backends the core can execute. SCALAR is
+# universal; DSP requires the Armv7E-M or Armv8-M-Mainline DSP option;
+# MVE requires Armv8.1-M Mainline with the MVE extension. The supersession
+# (SCALAR < DSP < MVE) reflects that an MVE-capable core also runs DSP
+# and scalar code, which is what makes "M55 without MVE" → DSP override
+# legitimate.
+_SUPPORTED_BACKENDS: dict[CortexM, frozenset[cmsis_nn.Backend]] = {
+    CortexM.M0: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M0PLUS: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M3: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M23: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M4: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M7: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M33: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M35P: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M55: frozenset(
+        {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE}
+    ),
+    CortexM.M85: frozenset(
+        {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE}
+    ),
+}
+
+
+@dataclass(frozen=True)
+class CortexMTargetConfig:
+    """AOT compile target configuration for the Cortex-M backend.
+
+    `cpu` selects the CPU variant. `isa` optionally overrides the cmsis_nn
+    backend that would normally be derived from `cpu` — useful for cores
+    with optional ISA extensions (M55 without MVE, M33 without DSP, etc.).
+    Overrides are validated against the CPU's architectural capability set
+    on construction; e.g. forcing MVE on an M0 raises ValueError.
+    """
+
+    cpu: CortexM
+    isa: Optional[cmsis_nn.Backend] = None
+
+    def __post_init__(self) -> None:
+        if self.isa is None:
+            return
+        supported = _SUPPORTED_BACKENDS.get(self.cpu)
+        if supported is None or self.isa not in supported:
+            allowed = sorted(b.name for b in supported) if supported else []
+            raise ValueError(
+                f"Backend {self.isa.name} is not supported on "
+                f"{self.cpu.name}; supported: {allowed}"
+            )
+
+    @property
+    def backend(self) -> cmsis_nn.Backend:
+        if self.isa is not None:
+            return self.isa
+        try:
+            cmsis_member = getattr(cmsis_nn.CortexM, self.cpu.name)
+        except AttributeError as e:
+            raise ValueError(
+                f"cmsis_nn does not yet support {self.cpu.name}; pass an "
+                f"explicit `isa=` override or wait for upstream support."
+            ) from e
+        return cmsis_nn.resolve_backend(cmsis_member)
+
+    @classmethod
+    def from_target_string(cls, target: str) -> CortexMTargetConfig:
+        """Parse a `cortex-m<variant>` target string."""
+        if not target.startswith("cortex-m"):
+            raise ValueError(
+                f"Cortex-M target string must start with 'cortex-m', "
+                f"got: {target!r}"
+            )
+        enum_name = "M" + target[len("cortex-m") :].upper()
+        try:
+            cpu = CortexM[enum_name]
+        except KeyError as e:
+            raise ValueError(
+                f"Unsupported Cortex-M target string: {target!r}. "
+                f"Supported: {sorted('cortex-m' + m.name[1:].lower() for m in CortexM)}"
+            ) from e
+        return cls(cpu=cpu)