Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/arm/public_api_manifests/api_manifest_running.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops

[python.EthosUQuantizer]
kind = "class"
signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"

[python.EthosUQuantizer.annotate]
kind = "function"
Expand Down Expand Up @@ -146,7 +146,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op

[python.VgfQuantizer]
kind = "class"
signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"

[python.VgfQuantizer.annotate]
kind = "function"
Expand Down
38 changes: 24 additions & 14 deletions backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,21 +470,23 @@ class TOSAQuantizer(Quantizer):
"""Manage quantization annotations for TOSA-compatible backends.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

"""

def __init__(
self,
compile_spec_or_tosa_spec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
"""Create a TOSA quantizer from a TOSA spec or Arm compile spec.
Comment thread
AdrianLundell marked this conversation as resolved.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental
API surface that may change without notice.
The composable quantizer is now the default implementation.
Setting ``use_composable_quantizer=False`` is deprecated and will
be removed in two minor releases.

"""
self.use_composable_quantizer = use_composable_quantizer
Expand All @@ -496,7 +498,7 @@ def __init__(
self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec)
else:
logger.info(
"Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701"
"Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701"
)
self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec)

Expand Down Expand Up @@ -1239,20 +1241,24 @@ class EthosUQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Ethos-U backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

Args:
compile_spec (EthosUCompileSpec): Backend compile specification for
Ethos-U targets.
use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
use_composable_quantizer (bool): Whether to use the composable
quantizer implementation. Setting this to ``False`` is deprecated
and will be removed in two minor releases. See
https://github.com/pytorch/executorch/issues/17701 for details.

"""

def __init__(
self,
compile_spec: EthosUCompileSpec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
super().__init__(compile_spec, use_composable_quantizer)

Expand All @@ -1261,19 +1267,23 @@ class VgfQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Vgf backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

Args:
compile_spec (VgfCompileSpec): Backend compile specification for Vgf
targets.
use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
use_composable_quantizer (bool): Whether to use the composable
quantizer implementation. Setting this to ``False`` is deprecated
and will be removed in two minor releases. See
https://github.com/pytorch/executorch/issues/17701 for details.

"""

def __init__(
self,
compile_spec: VgfCompileSpec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
super().__init__(compile_spec, use_composable_quantizer)
133 changes: 85 additions & 48 deletions backends/arm/quantizer/arm_quantizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,18 @@ class PatternQuantizer(Quantizer, QuantizerReporterUser):

"""

PARAMETER_TARGETS = {
torch.ops.aten.linear.default,
torch.ops.aten.convolution.default,
torch.ops.aten.conv1d.default,
torch.ops.aten.conv1d.padding,
torch.ops.aten.conv2d.default,
torch.ops.aten.conv2d.padding,
torch.ops.aten.conv3d.default,
torch.ops.aten.conv3d.padding,
torch.ops.aten.conv_transpose2d.input,
}

def __init__(
self,
quantization_config: QuantizationConfig | None,
Expand Down Expand Up @@ -275,75 +287,59 @@ def get_quantizer_info(self):
support_config_path,
)

def is_parameter(self, node: Node, model: torch.fx.GraphModule) -> bool:
"""Returns True if the given node is a parameter of the model."""
try:
_ = model.get_parameter(node.target) # type: ignore[arg-type]
return True
except Exception:
def is_weight(self, node: Node) -> bool:
"""Returns True if node is used as a weight by all users."""
if node.op != "get_attr":
return False

def is_weight(
self, node: Node, params: list[Node], model: torch.fx.GraphModule
) -> bool:
"""Returns True if node is the first parameter of the given
parameters.
"""
return len(params) > 0 and node == params[0]
# Ensure that the node is used as a weight by all users
for user_node in node.users:
if user_node.target not in self.PARAMETER_TARGETS:
return False

def is_bias(
self, node: Node, params: list[Node], model: torch.fx.GraphModule
) -> bool:
"""Returns True if node is the second parameter of the given
parameters.
"""
return len(params) == 2 and node == params[1]
args = list(user_node.args)
if not (len(args) > 1 and node == args[1]):
return False

return True

def is_bias(self, node: Node) -> bool:
"""Returns True if node is used as a bias by all users."""
if node.op != "get_attr":
return False

# Ensure that the node is used as a bias by all users
for user_node in node.users:
if user_node.target not in self.PARAMETER_TARGETS:
return False

args = list(user_node.args)
if not (len(args) > 2 and node == args[2]):
return False

return True
Comment thread
AdrianLundell marked this conversation as resolved.

def annotate_match(
self,
match: list[Node],
config: QuantizationConfig | None,
model: torch.fx.GraphModule,
) -> None:
"""Annotates a matched pattern according to the given quantization
config.
"""
parameter_targets = {
torch.ops.aten.linear.default,
torch.ops.aten.convolution.default,
torch.ops.aten.conv1d.default,
torch.ops.aten.conv1d.padding,
torch.ops.aten.conv2d.default,
torch.ops.aten.conv2d.padding,
torch.ops.aten.conv3d.default,
torch.ops.aten.conv3d.padding,
torch.ops.aten.conv_transpose2d.input,
}

for node in match:
input_qspec_map = {}
output_qspec = None

params = [n for n in node.all_input_nodes if self.is_parameter(n, model)]
if node.target in parameter_targets:
if len(params) == 0 or len(params) > 2:
logger.warning(
f"{node.name} is expected to have parameter tensors for weight/bias but no such inputs found, which may cause unexpected quantization annotations. This is likely caused by incorrect tensor instantiations or non-constant weight/biases."
)
else:
if len(params) > 0:
logger.warning(
f"{node.name} is not expected to not have parameter tensors but found {[n.name for n in params]}, which may cause unexpected quantization annotations."
)

for input_node in node.all_input_nodes:
if not has_float_output(input_node):
continue
if self.is_weight(input_node, params, model):
if self.is_weight(input_node):
input_qspec_map[input_node] = (
config.get_weight_qspec(node) if config else None
)
elif self.is_bias(input_node, params, model):
elif self.is_bias(input_node):
input_qspec_map[input_node] = (
config.get_bias_qspec(node) if config else None # type: ignore[assignment]
)
Expand All @@ -370,7 +366,7 @@ def annotate(self, model: torch.fx.GraphModule) -> None: # type: ignore[overrid
)
for result in matches:
if result.accepted:
self.annotate_match(result.pattern, self.quantization_config, model)
self.annotate_match(result.pattern, self.quantization_config)
self.report_accept(result.pattern)
else:
self.report_reject(
Expand Down Expand Up @@ -424,6 +420,9 @@ class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
torch.ops.aten.flip.default,
torch.ops.aten.index_select.default,
torch.ops.aten.index_put.default,
torch.ops.aten.index_put_.default,
torch.ops.aten.index_copy.default,
torch.ops.aten.index_copy_.default,
torch.ops.aten.contiguous.default,
torch.ops.aten.as_strided_copy.default,
torch.ops.aten.pixel_shuffle.default,
Expand Down Expand Up @@ -571,6 +570,42 @@ def _get_shared_clique(self, root_node: Node) -> tuple[set[Node], list[Any]]:

return shared_nodes, adjacent_qspecs

def _should_skip_while_shared_qspec(self, node: Node) -> bool:
return node.target == torch.ops.higher_order.while_loop and bool(
node.meta.get("additional_inputs")
)

def _annotate_while_with_additional_inputs(
self,
root_node: Node,
adjacent_qspecs: list[Any],
) -> bool:
if not self._should_skip_while_shared_qspec(root_node):
return False
if len(adjacent_qspecs) == 0:
self.report_reject(
[root_node],
"Couldn't find any adjacent quantization spec to annotate while_loop.",
)
return True

input_qspec = adjacent_qspecs[0]
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
n: input_qspec for n in self._get_input_nodes_with_float_output(root_node)
}
output_qspec: Optional[QuantizationSpec] = None
if len(self._get_user_nodes_with_float_input(root_node)) > 0:
output_qspec = input_qspec

_mark_node_as_quantized(
root_node,
input_qspec_map,
output_qspec,
is_quantized=True,
)
self.report_accept([root_node])
return True

def _annotate_shared_cluster(self, root_node: Node) -> None:
if (
len(self._get_input_nodes_with_float_output(root_node)) == 0
Expand All @@ -592,9 +627,11 @@ def _annotate_shared_cluster(self, root_node: Node) -> None:
node_order = {node: index for index, node in enumerate(root_node.graph.nodes)}
ordered_nodes = sorted(shared_nodes, key=lambda node: node_order.get(node, 0))

if self._annotate_while_with_additional_inputs(root_node, adjacent_qspecs):
return

# Ensure the root node is the first one in the graph.
root_node = ordered_nodes[0]

if len(adjacent_qspecs) > 0:
root_node_float_inputs = self._get_input_nodes_with_float_output(root_node)
if len(root_node_float_inputs) > 0:
Expand Down
27 changes: 25 additions & 2 deletions backends/arm/quantizer/quantization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from torchao.quantization.pt2e.quantizer import (
DerivedQuantizationSpec,
FixedQParamsQuantizationSpec,
QuantizationSpec,
QuantizationSpecBase,
SharedQuantizationSpec,
Expand Down Expand Up @@ -284,10 +285,18 @@ def get_input_act_qspec(self, node=None, input_node=None):

For comparison operators, make sure that both inputs share the same
quantization spec, by returning a SharedQuantizationSpec that ties the
quantization of both inputs together. For other operators, return the
default input activation spec.
quantization of both inputs together.

For trigonometric ops, ensure that input spec has fixed qparams.

For other operators, return the default input activation spec.

"""
# MLETORCH-1853: Fix lazy import when moving files around
from executorch.backends.arm.quantizer.quantization_annotator import (
_fixed_input_qspec_ops,
)

if node is None or input_node is None:
return super().get_input_act_qspec(node, input_node)

Expand All @@ -296,6 +305,20 @@ def get_input_act_qspec(self, node=None, input_node=None):
return super().get_input_act_qspec(node, input_node)
else:
return SharedQuantizationSpec((node.args[0], node))
elif node.target in _fixed_input_qspec_ops:

input_act_qspec = super().get_input_act_qspec(node, input_node)
num_bits = torch.iinfo(input_act_qspec.dtype).bits
qparams = _fixed_input_qspec_ops[node.target][num_bits]
return FixedQParamsQuantizationSpec(
dtype=input_act_qspec.dtype,
scale=qparams.scale,
zero_point=qparams.zero_point,
quant_min=input_act_qspec.quant_min,
quant_max=input_act_qspec.quant_max,
qscheme=input_act_qspec.qscheme,
is_dynamic=input_act_qspec.is_dynamic,
)
Comment on lines +308 to +330

return super().get_input_act_qspec(node, input_node)

Expand Down
10 changes: 8 additions & 2 deletions backends/arm/quantizer/quantizer_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ def check_pattern(cls, pattern):
torch.ops.aten.relu_.default,
torch.ops.aten.hardtanh.default,
torch.ops.aten.hardtanh_.default,
torch.ops.aten.hardsigmoid.default,
torch.ops.aten.hardsigmoid_.default,
torch.ops.aten.clamp.default,
torch.ops.aten.clamp_.default,
]
Comment thread
AdrianLundell marked this conversation as resolved.
Expand Down Expand Up @@ -168,6 +166,14 @@ def check_pattern(cls, pattern):
(torch.ops.aten.ge.Scalar,),
(torch.ops.aten.eq.Scalar,),
(torch.ops.aten.ne.Scalar,),
(torch.ops.aten.lstm.input,),
(torch.ops.aten.rnn_tanh.input,),
(torch.ops.aten.rnn_relu.input,),
(torch.ops.aten.gru.input,),
(torch.ops.aten.asin.default,),
(torch.ops.aten.acos.default,),
(torch.ops.aten.atanh.default,),
(torch.ops.aten.einsum.default,),
]
)
TOSA_QUANTIZER_SUPPORT_DICT: dict[tuple[OpOverload, ...], type[PatternCheck] | None] = {
Expand Down
1 change: 1 addition & 0 deletions backends/arm/test/misc/test_quant_custom_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool):
)
pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None) # type: ignore
pipeline.quantizer.set_module_type(torch.nn.Conv1d, None) # type: ignore
pipeline.quantizer.set_io(None) # type: ignore

pipeline.run()
Loading
Loading