pytorch · AdrianLundell · May 4, 2026 · May 25, 2026 · May 27, 2026 · May 27, 2026
@@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops
 
 [python.EthosUQuantizer]
 kind = "class"
-signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.EthosUQuantizer.annotate]
 kind = "function"
@@ -146,7 +146,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op
 
 [python.VgfQuantizer]
 kind = "class"
-signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.VgfQuantizer.annotate]
 kind = "function"

@@ -470,21 +470,23 @@ class TOSAQuantizer(Quantizer):
     """Manage quantization annotations for TOSA-compatible backends.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     """
 
     def __init__(
         self,
         compile_spec_or_tosa_spec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         """Create a TOSA quantizer from a TOSA spec or Arm compile spec.
 
         .. warning::
-            Setting ``use_composable_quantizer=True`` enables an experimental
-            API surface that may change without notice.
+            The composable quantizer is now the default implementation.
+            Setting ``use_composable_quantizer=False`` is deprecated and will
+            be removed in two minor releases.
 
         """
         self.use_composable_quantizer = use_composable_quantizer
@@ -496,7 +498,7 @@ def __init__(
             self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec)
         else:
             logger.info(
-                "Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701"
+                "Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701"
             )
             self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec)
 
@@ -1239,20 +1241,24 @@ class EthosUQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Ethos-U backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (EthosUCompileSpec): Backend compile specification for
             Ethos-U targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            https://github.com/pytorch/executorch/issues/17701 for details.
 
     """
 
     def __init__(
         self,
         compile_spec: EthosUCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
 
@@ -1261,19 +1267,23 @@ class VgfQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Vgf backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (VgfCompileSpec): Backend compile specification for Vgf
             targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            https://github.com/pytorch/executorch/issues/17701 for details.
 
     """
 
     def __init__(
         self,
         compile_spec: VgfCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
@@ -243,6 +243,18 @@ class PatternQuantizer(Quantizer, QuantizerReporterUser):
 
     """
 
+    PARAMETER_TARGETS = {
+        torch.ops.aten.linear.default,
+        torch.ops.aten.convolution.default,
+        torch.ops.aten.conv1d.default,
+        torch.ops.aten.conv1d.padding,
+        torch.ops.aten.conv2d.default,
+        torch.ops.aten.conv2d.padding,
+        torch.ops.aten.conv3d.default,
+        torch.ops.aten.conv3d.padding,
+        torch.ops.aten.conv_transpose2d.input,
+    }
+
     def __init__(
         self,
         quantization_config: QuantizationConfig | None,
@@ -275,75 +287,59 @@ def get_quantizer_info(self):
             support_config_path,
         )
 
-    def is_parameter(self, node: Node, model: torch.fx.GraphModule) -> bool:
-        """Returns True if the given node is a parameter of the model."""
-        try:
-            _ = model.get_parameter(node.target)  # type: ignore[arg-type]
-            return True
-        except Exception:
+    def is_weight(self, node: Node) -> bool:
+        """Returns True if node is used as a weight by all users."""
+        if node.op != "get_attr":
             return False
 
-    def is_weight(
-        self, node: Node, params: list[Node], model: torch.fx.GraphModule
-    ) -> bool:
-        """Returns True if node is the first parameter of the given
-        parameters.
-        """
-        return len(params) > 0 and node == params[0]
+        # Ensure that the node is used as a weight by all users
+        for user_node in node.users:
+            if user_node.target not in self.PARAMETER_TARGETS:
+                return False
 
-    def is_bias(
-        self, node: Node, params: list[Node], model: torch.fx.GraphModule
-    ) -> bool:
-        """Returns True if node is the second parameter of the given
-        parameters.
-        """
-        return len(params) == 2 and node == params[1]
+            args = list(user_node.args)
+            if not (len(args) > 1 and node == args[1]):
+                return False
+
+        return True
+
+    def is_bias(self, node: Node) -> bool:
+        """Returns True if node is used as a bias by all users."""
+        if node.op != "get_attr":
+            return False
+
+        # Ensure that the node is used as a bias by all users
+        for user_node in node.users:
+            if user_node.target not in self.PARAMETER_TARGETS:
+                return False
+
+            args = list(user_node.args)
+            if not (len(args) > 2 and node == args[2]):
+                return False
+
+        return True
 
     def annotate_match(
         self,
         match: list[Node],
         config: QuantizationConfig | None,
-        model: torch.fx.GraphModule,
     ) -> None:
         """Annotates a matched pattern according to the given quantization
         config.
         """
-        parameter_targets = {
-            torch.ops.aten.linear.default,
-            torch.ops.aten.convolution.default,
-            torch.ops.aten.conv1d.default,
-            torch.ops.aten.conv1d.padding,
-            torch.ops.aten.conv2d.default,
-            torch.ops.aten.conv2d.padding,
-            torch.ops.aten.conv3d.default,
-            torch.ops.aten.conv3d.padding,
-            torch.ops.aten.conv_transpose2d.input,
-        }
 
         for node in match:
             input_qspec_map = {}
             output_qspec = None
 
-            params = [n for n in node.all_input_nodes if self.is_parameter(n, model)]
-            if node.target in parameter_targets:
-                if len(params) == 0 or len(params) > 2:
-                    logger.warning(
-                        f"{node.name} is expected to have parameter tensors for weight/bias but no such inputs found, which may cause unexpected quantization annotations. This is likely caused by incorrect tensor instantiations or non-constant weight/biases."
-                    )
-            else:
-                if len(params) > 0:
-                    logger.warning(
-                        f"{node.name} is not expected to not have parameter tensors but found {[n.name for n in params]}, which may cause unexpected quantization annotations."
-                    )
-
             for input_node in node.all_input_nodes:
                 if not has_float_output(input_node):
                     continue
-                if self.is_weight(input_node, params, model):
+                if self.is_weight(input_node):
                     input_qspec_map[input_node] = (
                         config.get_weight_qspec(node) if config else None
                     )
-                elif self.is_bias(input_node, params, model):
+                elif self.is_bias(input_node):
                     input_qspec_map[input_node] = (
                         config.get_bias_qspec(node) if config else None  # type: ignore[assignment]
                     )
@@ -370,7 +366,7 @@ def annotate(self, model: torch.fx.GraphModule) -> None:  # type: ignore[overrid
         )
         for result in matches:
             if result.accepted:
-                self.annotate_match(result.pattern, self.quantization_config, model)
+                self.annotate_match(result.pattern, self.quantization_config)
                 self.report_accept(result.pattern)
             else:
                 self.report_reject(
@@ -424,6 +420,9 @@ class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
         torch.ops.aten.flip.default,
         torch.ops.aten.index_select.default,
         torch.ops.aten.index_put.default,
+        torch.ops.aten.index_put_.default,
+        torch.ops.aten.index_copy.default,
+        torch.ops.aten.index_copy_.default,
         torch.ops.aten.contiguous.default,
         torch.ops.aten.as_strided_copy.default,
         torch.ops.aten.pixel_shuffle.default,
@@ -571,6 +570,42 @@ def _get_shared_clique(self, root_node: Node) -> tuple[set[Node], list[Any]]:
 
         return shared_nodes, adjacent_qspecs
 
+    def _should_skip_while_shared_qspec(self, node: Node) -> bool:
+        return node.target == torch.ops.higher_order.while_loop and bool(
+            node.meta.get("additional_inputs")
+        )
+
+    def _annotate_while_with_additional_inputs(
+        self,
+        root_node: Node,
+        adjacent_qspecs: list[Any],
+    ) -> bool:
+        if not self._should_skip_while_shared_qspec(root_node):
+            return False
+        if len(adjacent_qspecs) == 0:
+            self.report_reject(
+                [root_node],
+                "Couldn't find any adjacent quantization spec to annotate while_loop.",
+            )
+            return True
+
+        input_qspec = adjacent_qspecs[0]
+        input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
+            n: input_qspec for n in self._get_input_nodes_with_float_output(root_node)
+        }
+        output_qspec: Optional[QuantizationSpec] = None
+        if len(self._get_user_nodes_with_float_input(root_node)) > 0:
+            output_qspec = input_qspec
+
+        _mark_node_as_quantized(
+            root_node,
+            input_qspec_map,
+            output_qspec,
+            is_quantized=True,
+        )
+        self.report_accept([root_node])
+        return True
+
     def _annotate_shared_cluster(self, root_node: Node) -> None:
         if (
             len(self._get_input_nodes_with_float_output(root_node)) == 0
@@ -592,9 +627,11 @@ def _annotate_shared_cluster(self, root_node: Node) -> None:
         node_order = {node: index for index, node in enumerate(root_node.graph.nodes)}
         ordered_nodes = sorted(shared_nodes, key=lambda node: node_order.get(node, 0))
 
+        if self._annotate_while_with_additional_inputs(root_node, adjacent_qspecs):
+            return
+
         # Ensure the root node is the first one in the graph.
         root_node = ordered_nodes[0]
-
         if len(adjacent_qspecs) > 0:
             root_node_float_inputs = self._get_input_nodes_with_float_output(root_node)
             if len(root_node_float_inputs) > 0:

@@ -21,6 +21,7 @@
 
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
+    FixedQParamsQuantizationSpec,
     QuantizationSpec,
     QuantizationSpecBase,
     SharedQuantizationSpec,
@@ -284,10 +285,18 @@ def get_input_act_qspec(self, node=None, input_node=None):
 
         For comparison operators, make sure that both inputs share the same
         quantization spec, by returning a SharedQuantizationSpec that ties the
-        quantization of both inputs together. For other operators, return the
-        default input activation spec.
+        quantization of both inputs together.
+
+        For trigonometric ops, ensure that input spec has fixed qparams.
+
+        For other operators, return the default input activation spec.
 
         """
+        # MLETORCH-1853: Fix lazy import when moving files around
+        from executorch.backends.arm.quantizer.quantization_annotator import (
+            _fixed_input_qspec_ops,
+        )
+
         if node is None or input_node is None:
             return super().get_input_act_qspec(node, input_node)
 
@@ -296,6 +305,20 @@ def get_input_act_qspec(self, node=None, input_node=None):
                 return super().get_input_act_qspec(node, input_node)
             else:
                 return SharedQuantizationSpec((node.args[0], node))
+        elif node.target in _fixed_input_qspec_ops:
+
+            input_act_qspec = super().get_input_act_qspec(node, input_node)
+            num_bits = torch.iinfo(input_act_qspec.dtype).bits
+            qparams = _fixed_input_qspec_ops[node.target][num_bits]
+            return FixedQParamsQuantizationSpec(
+                dtype=input_act_qspec.dtype,
+                scale=qparams.scale,
+                zero_point=qparams.zero_point,
+                quant_min=input_act_qspec.quant_min,
+                quant_max=input_act_qspec.quant_max,
+                qscheme=input_act_qspec.qscheme,
+                is_dynamic=input_act_qspec.is_dynamic,
+            )
 
         return super().get_input_act_qspec(node, input_node)
 

@@ -77,8 +77,6 @@ def check_pattern(cls, pattern):
     torch.ops.aten.relu_.default,
     torch.ops.aten.hardtanh.default,
     torch.ops.aten.hardtanh_.default,
-    torch.ops.aten.hardsigmoid.default,
-    torch.ops.aten.hardsigmoid_.default,
     torch.ops.aten.clamp.default,
     torch.ops.aten.clamp_.default,
 ]
@@ -168,6 +166,14 @@ def check_pattern(cls, pattern):
         (torch.ops.aten.ge.Scalar,),
         (torch.ops.aten.eq.Scalar,),
         (torch.ops.aten.ne.Scalar,),
+        (torch.ops.aten.lstm.input,),
+        (torch.ops.aten.rnn_tanh.input,),
+        (torch.ops.aten.rnn_relu.input,),
+        (torch.ops.aten.gru.input,),
+        (torch.ops.aten.asin.default,),
+        (torch.ops.aten.acos.default,),
+        (torch.ops.aten.atanh.default,),
+        (torch.ops.aten.einsum.default,),
     ]
 )
 TOSA_QUANTIZER_SUPPORT_DICT: dict[tuple[OpOverload, ...], type[PatternCheck] | None] = {

@@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool):
         )
     pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None)  # type: ignore
     pipeline.quantizer.set_module_type(torch.nn.Conv1d, None)  # type: ignore
+    pipeline.quantizer.set_io(None)  # type: ignore
 
     pipeline.run()