diff --git a/backends/nxp/backend/custom_delegation_options.py b/backends/nxp/backend/custom_delegation_options.py index 6f669604226..18eadc0bbbf 100644 --- a/backends/nxp/backend/custom_delegation_options.py +++ b/backends/nxp/backend/custom_delegation_options.py @@ -22,7 +22,3 @@ class CustomDelegationOptions: # not create any NeutronGraph that can be called. This is done by the partitioner itself, and is not handled by # the individual node converters. allow_no_op_partitions: bool = False - - # The new neutron converter flow has different constraints for supported operators. These need to be addressed when - # deciding is operator is delegated or not in _is_supported_on_target(). - use_new_flow_neutron_c: bool = False diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py index e3052ee1205..cb3a360f604 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py @@ -5,7 +5,6 @@ import torch - from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NeutronTargetSpec, @@ -36,7 +35,7 @@ def _is_supported_on_target( custom_delegation_options: CustomDelegationOptions, ) -> bool: - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py index b8ad7211a56..02cf73016b6 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py @@ -5,7 +5,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.ir.converter.conversion import ( aten_translator, common, @@ -22,7 +21,6 @@ from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( average_pool_2d_options, ) - from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node from torch.nn import Parameter @@ -66,7 +64,7 @@ def _is_supported_on_target( kernel = node.args[1] stride = node.args[2] - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py index 0917c03038c..7bb1a295a23 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py @@ -3,15 +3,32 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import math + +import numpy as np +import torch from executorch.backends.nxp.backend.edge_helper import try_get_arg +from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( + torch_type_to_numpy_type, +) from executorch.backends.nxp.backend.ir.converter.node_converter import ( + _is_dequant_node, + _is_quant_node, CustomDelegationOptions, is_not_qdq_node, NodeConverter, ) +from executorch.backends.nxp.backend.ir.converter.quantization_utils import ( + propagate_quantization, +) from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( BuiltinOperator, ) +from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model +from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( + maximum_options, + minimum_options, +) from executorch.backends.nxp.backend.neutron_operator_support import ( activation_supported_on_target, ) @@ -21,15 +38,26 @@ from torch.nn import Parameter +def _is_convertible_to_relu(node): + bounds = ClampConverter._get_clamp_bounds(node) + bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds) + + # Some specific bounds can be replaced with single op ReLU. + if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values(): + return False + + return True + + class ClampConverter(NodeConverter): - SUPPORTED_BOUNDS = { + RELU_COMPATIBLE_BOUNDS = { "ReluN1To1": (-1, 1), "Relu0To1": (0, 1), "Relu6": (0, 6), "Relu": (0, None), } - BOUNDS_TO_NEUTRON_IR_OP = { + BOUNDS_TO_RELU_NEUTRON_IR_OP = { (-1, 1): BuiltinOperator.RELU_N1_TO_1, (0, 1): BuiltinOperator.RELU_0_TO_1, (0, 6): BuiltinOperator.RELU6, @@ -53,6 +81,21 @@ def _is_supported_in_IR( # No NeutronIR-specific restrictions. return True + @staticmethod + def _io_quant_is_same(node: Node): + quant = next(iter(node.users.keys())) + dequant = node.args[0] + + if not _is_dequant_node(dequant): + return False + + if not _is_quant_node(quant): + return False + + q_params = quant.args[1:] + dq_params = dequant.args[1:] + return all([q == dq for q, dq in zip(q_params, dq_params)]) + @staticmethod def _is_supported_on_target( node: Node, @@ -60,20 +103,30 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - bounds = ClampConverter._get_clamp_bounds(node) + relu_compatible = _is_convertible_to_relu(node) + + if neutron_target_spec.use_new_flow_neutron_c: + io_quant_consistent = ClampConverter._io_quant_is_same(node) + quant_supported = NodeConverter.uses_quantization_type_for_io( + node, + supported_types=[torch.int8, torch.uint8], + input_indices=[0], + output_indices=[0], + ) - # Only some specific bounds are supported on the target hardware. - if bounds not in ClampConverter.SUPPORTED_BOUNDS.values(): - return False + # We either convert to ReLU -> SingleInputQuantization pattern + # or we convert to Min/Max, which requires same quantization on + # both input and output. + return (relu_compatible | io_quant_consistent) and quant_supported - return True + return relu_compatible @classmethod def supports_partitioning_result( cls, node: Node, partition_list: list[Partition], - custom_delegation_options: CustomDelegationOptions, + _: CustomDelegationOptions, neutron_target_spec: NeutronTargetSpec, parameters_mapping: dict[str, Parameter], ) -> bool: @@ -82,7 +135,10 @@ def supports_partitioning_result( # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator # and at the same time the node does not satisfy delegation requirements. # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly. - if bounds in [cls.SUPPORTED_BOUNDS["Relu"], cls.SUPPORTED_BOUNDS["Relu6"]]: + if bounds in [ + cls.RELU_COMPATIBLE_BOUNDS["Relu"], + cls.RELU_COMPATIBLE_BOUNDS["Relu6"], + ]: is_alone_in_partition = cls.is_node_alone_in_partition( node, partition_list, filter_fn=is_not_qdq_node ) @@ -91,8 +147,21 @@ def supports_partitioning_result( return True + @staticmethod + def _quantize_value( + value: int, + zp: int, + scale: float, + quant_min: int, + quant_max: int, + dtype: type = np.int8, + ) -> np.integer: + rescaled_value = round(value / scale) + zp + return dtype(np.clip(rescaled_value, quant_min, quant_max)) + def convert(self, node: Node): - """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators. + """Convert the `aten.clamp.default` operator to either + Neutron IR `Relu*` operator or combination of `Min` and `Max`. The schema is: aten::clamp( Tensor self, @@ -101,13 +170,83 @@ def convert(self, node: Node): ) -> Tensor """ self.assert_convertible(node) + to_relu = _is_convertible_to_relu(node) bounds = self._get_clamp_bounds(node) - + bounds = tuple( + v if v is not None and math.isfinite(v) else None for v in bounds + ) t_op = self._create_tflite_op_with_io_tensors(node) - # noinspection PyTypeChecker,PyUnboundLocalVariable - t_op.opcode_index = self.builder.op_code_index_for_op_type( - self.BOUNDS_TO_NEUTRON_IR_OP[bounds] - ) - self.builder.append_operators([t_op]) + if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu: + # noinspection PyTypeChecker,PyUnboundLocalVariable + t_op.opcode_index = self.builder.op_code_index_for_op_type( + self.BOUNDS_TO_RELU_NEUTRON_IR_OP[bounds] + ) + self.builder.append_operators([t_op]) + return + + q_node = node.args[0] + assert _is_dequant_node(q_node) + _, scale, zp, quant_min, quant_max, q_type = q_node.args + q_type = torch_type_to_numpy_type(q_type).type + + x = t_op.tmp_inputs[0] + y = t_op.tmp_outputs[0] + + if x.quantization is not None and y.quantization is None: + propagate_quantization(x, y) + + min_value, max_value = bounds + + if min_value is not None: + min_value = self._quantize_value( + value=min_value, + zp=zp, + scale=scale, + quant_min=quant_min, + quant_max=quant_max, + dtype=q_type, + ) + min_tensor = self.builder.create_tensor_for_data( + np.array([min_value], q_type), "min" + ) + propagate_quantization(x, min_tensor) + + if max_value is not None: + max_value = self._quantize_value( + value=max_value, + zp=zp, + scale=scale, + quant_min=quant_min, + quant_max=quant_max, + dtype=q_type, + ) + max_tensor = self.builder.create_tensor_for_data( + np.array([max_value], q_type), "max" + ) + propagate_quantization(x, max_tensor) + + if None not in bounds: + tmp_y = self.builder.duplicate_tensor(x) + tmp_x = tmp_y + propagate_quantization(x, tmp_y) + else: + tmp_y = y + tmp_x = x + + ops_to_add = [] + if max_value is not None: + min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum()) + min_op.tmp_inputs = [x, max_tensor] + min_op.tmp_outputs = [tmp_y] + ops_to_add.append(min_op) + + if min_value is not None: + max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum()) + max_op.tmp_inputs = [tmp_x, min_tensor] + max_op.tmp_outputs = [y] + ops_to_add.append(max_op) + + ops_to_add = ops_to_add if len(ops_to_add) >= 1 else [x] + self.builder.append_operators(ops_to_add) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py index e300d6bbe9f..73de100e8b2 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -7,7 +7,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.conversion import ( aten_translator, @@ -74,7 +73,7 @@ def _is_supported_on_target( MaxPool2DWithIndicesConverter._get_node_args(node) ) - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py index 0e13aeb9b44..673097dc8ae 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py @@ -4,7 +4,6 @@ # LICENSE file in the root directory of this source tree. import torch - from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, @@ -26,7 +25,7 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: if not NodeConverter.at_least_one_input_shape_matches_the_output_shape( node ): diff --git a/backends/nxp/backend/neutron_target_spec.py b/backends/nxp/backend/neutron_target_spec.py index a1d71cabddb..2d29121dd00 100644 --- a/backends/nxp/backend/neutron_target_spec.py +++ b/backends/nxp/backend/neutron_target_spec.py @@ -1,4 +1,4 @@ -# Copyright 2025 NXP +# Copyright 2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,12 +8,10 @@ from enum import Enum import torch - from executorch.backends.nxp.backend.neutron_converter_manager import ( NeutronConverterManager, ) from executorch.exir.dialects._ops import ops as exir_ops - from torch.fx import Node @@ -98,13 +96,17 @@ class NeutronTargetSpec: The functionality for probing the properties of Neutron Target. """ - def __init__(self, target: str): + def __init__(self, target: str, use_new_flow_neutron_c: bool = False): converter_manager = NeutronConverterManager() converter_manager.verify_target(target) neutron_converter = converter_manager.get_converter() self.neutron_target = neutron_converter.getNeutronTarget(target) + # The new neutron converter flow has different constraints for supported operators. These need to be addressed when + # deciding is operator is delegated or not in _is_supported_on_target(). + self.use_new_flow_neutron_c = use_new_flow_neutron_c + if self.is_subsystem(): raise ValueError( f"Target `{target}` is not a neutron-C target. Only MCU targets are supported at the moment." diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index f5e89823ee2..5c3b056bf72 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -14,7 +14,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.custom_delegation_options import ( CustomDelegationOptions, ) @@ -86,7 +85,9 @@ def neutron_compile_spec( :return: self for method chaining """ - self.config = NeutronTargetSpec(config) + self.config = NeutronTargetSpec( + config, use_new_flow_neutron_c=use_new_flow_neutron_c + ) assert ( self.output_format is None @@ -230,11 +231,11 @@ def preprocess( # noqa C901 ) tflite_model, io_formats = EdgeProgramToIRConverter().convert_program( edge_program, - neutron_target_spec=NeutronTargetSpec(target), - conversion_config=conversion_config, - custom_delegation_options=CustomDelegationOptions( - use_new_flow_neutron_c=use_new_flow_neutron_c + neutron_target_spec=NeutronTargetSpec( + target, use_new_flow_neutron_c=use_new_flow_neutron_c ), + conversion_config=conversion_config, + custom_delegation_options=CustomDelegationOptions(), ) neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert( diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 0c46678b25a..bc2cc395002 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -9,7 +9,6 @@ _get_default_passes, NeutronAtenPassManager, ) - from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.quantizer.patterns import ( AbsPattern, @@ -255,53 +254,63 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False) OpQuantizer = NeutronAtenQuantizer super().__init__( [ - OpQuantizer(AbsPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AdaptiveAvgPoolPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AddTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AbsPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + AdaptiveAvgPoolPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(AddTensorPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(AddmmPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(AvgPool1DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AvgPool2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(BatchNormPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(BMMPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(CatPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ClampPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AvgPool1DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(AvgPool2DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(BatchNormPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(BMMPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(CatPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ClampPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(Conv2dPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer( ConvTranspose2dPattern(self, is_qat=is_qat), static_qconfig ), - OpQuantizer(DropoutPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(FlattenPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(HardTanhPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(HardTanhInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(LeakyReluPattern(is_qat=is_qat), static_fc_qconfig), - OpQuantizer(LeakyReluInPlacePattern(is_qat=is_qat), static_fc_qconfig), + OpQuantizer(DropoutPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(FlattenPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(HardTanhPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + HardTanhInPlacePattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(LeakyReluPattern(self, is_qat=is_qat), static_fc_qconfig), + OpQuantizer( + LeakyReluInPlacePattern(self, is_qat=is_qat), static_fc_qconfig + ), OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(MaxPool1DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(MaxPool2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool1DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool2DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(MeanDimPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig), - OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(NegPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PadPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PermutePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PReLUPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReluPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReluInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReshapePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SigmoidPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SliceTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SoftMaxPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezeDimPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezeDimsPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SubTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TanhPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TanhInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TransposeIntPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UnsqueezePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UpsampleBilinear2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UpsampleNearest2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ViewPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MulTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(NegPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PadPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PermutePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PReLUPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReluPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReluInPlacePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReshapePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SigmoidPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SliceTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SoftMaxPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezeDimPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezeDimsPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SubTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TanhPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TanhInPlacePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TransposeIntPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(UnsqueezePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + UpsampleBilinear2DPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer( + UpsampleNearest2DPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(ViewPattern(self, is_qat=is_qat), static_qconfig), ] ) diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index bda554e0cce..c55e561963a 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -10,7 +10,9 @@ from functools import partial import torch - +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import ( + _is_convertible_to_relu, +) from executorch.backends.nxp.quantizer.utils import ( get_bias_qparams, get_bias_qparams_transp_conv, @@ -86,7 +88,8 @@ class PartitionAnchors: class QuantizationPattern(ABC): - def __init__(self, is_qat: bool = False): + def __init__(self, neutron_quantizer, is_qat: bool = False): + self.neutron_quantizer = neutron_quantizer self.is_qat = is_qat @abstractmethod @@ -115,8 +118,9 @@ class SharedSpecPattern(QuantizationPattern): def partition_types(self) -> list[torch.nn.Module]: pass - def get_anchors( - self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + @staticmethod + def get_shared_spec_anchors( + gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors | None: node = fused_partition[0].nodes[-1] assert len(fused_partition[0].input_nodes) == 1 @@ -137,15 +141,21 @@ def get_anchors( ], ) + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + return self.get_shared_spec_anchors(gm, fused_partition) + class SingleInputBasicPattern(QuantizationPattern): @abstractmethod def partition_types(self) -> list[OpOverload]: pass - def get_anchors( - self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] - ) -> PartitionAnchors | None: + @staticmethod + def get_single_input_anchors( + gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ): node = fused_partition[0].nodes[-1] return PartitionAnchors( @@ -155,11 +165,13 @@ def get_anchors( output=[(node,)], ) + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + return self.get_single_input_anchors(gm, fused_partition) + class BatchNormPattern(QuantizationPattern): - def __init__(self, is_qat: bool): - super().__init__(is_qat=is_qat) - def partition_types(self) -> list[OpOverload]: # BatchNorm quantization is needed only when in QAT mode return [torch.ops.aten.batch_norm.default] if self.is_qat else [] @@ -227,9 +239,8 @@ def partition_types(self): class AddmmPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -412,12 +423,25 @@ def get_anchors( ) -class ClampPattern(SingleInputBasicPattern): +class ClampPattern(QuantizationPattern): """Quantizer for the `aten.clamp.default` operator.""" def partition_types(self): return [torch.ops.aten.clamp.default] + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + node = fused_partition[0].nodes[-1] + + if ( + self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c + and not _is_convertible_to_relu(node) + ): + return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition) + else: + return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition) + def _is_batch_norm(node_: Node) -> bool: return node_.op == "call_function" and node_.target in [ @@ -488,9 +512,8 @@ def get_anchors( class Conv2dPattern(ConvPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -582,7 +605,7 @@ def get_anchors( class ConvTranspose2dPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( @@ -745,9 +768,8 @@ def partition_types(self): class LinearPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -836,9 +858,8 @@ def partition_types(self): class MmPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -1172,9 +1193,8 @@ class ActivationsConcatClusterPattern(QuantizationPattern): """ def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index 8f588be621d..69a1a246b1a 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -13,7 +13,6 @@ import eiq_neutron_sdk import numpy as np import torch - from executorch import exir from executorch.backends.nxp.backend.custom_delegation_options import ( CustomDelegationOptions, @@ -98,7 +97,7 @@ def _get_default_quantizer(target_spec: NeutronTargetSpec, use_qat: bool) -> Qua def to_model_input_spec( - input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]] + input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]], ) -> tuple[ModelInputSpec, ...]: match input_spec: case _ if isinstance(input_spec, Iterable) and all( @@ -122,7 +121,7 @@ def to_model_input_spec( def get_calibration_inputs_fn_from_dataset_dir(dataset_dir) -> GetCalibrationInputsFn: def _nested( - input_spec: tuple[ModelInputSpec, ...] + input_spec: tuple[ModelInputSpec, ...], ) -> Iterable[tuple[torch.Tensor, ...]]: data = sorted(os.listdir(dataset_dir)) inputs_needed = len(input_spec) @@ -156,7 +155,7 @@ def _nested( def _get_example_input( - input_spec: tuple[ModelInputSpec, ...] + input_spec: tuple[ModelInputSpec, ...], ) -> tuple[torch.Tensor, ...]: example_input = [] for spec in input_spec: @@ -193,8 +192,9 @@ def to_quantized_edge_program( use_new_flow_neutron_c: bool = False, delegate_to_npu=True, ) -> EdgeProgramManager: - _neutron_target_spec = NeutronTargetSpec(target) - custom_delegation_options.use_new_flow_neutron_c = use_new_flow_neutron_c + _neutron_target_spec = NeutronTargetSpec( + target, use_new_flow_neutron_c=use_new_flow_neutron_c + ) if get_quantizer_fn is None: get_quantizer_fn = partial( _get_default_quantizer, _neutron_target_spec, use_qat diff --git a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py index b3034ff17ed..92716b014aa 100644 --- a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py +++ b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py @@ -8,7 +8,6 @@ import kgb import numpy as np import torch - from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) @@ -32,7 +31,6 @@ from executorch.backends.nxp.tests.models import Conv2dModule from executorch.exir.dialects._ops import ops as exir_ops from parameterized import parameterized - from torch import fx from torch._ops import OpOverload from torch.export import ExportedProgram @@ -52,8 +50,8 @@ class Conv2dPatternPerChannel(QuantizationPattern): - def __init__(self, is_per_channel: bool, is_qat: bool): - super().__init__(is_qat=is_qat) + def __init__(self, neutron_quantizer, is_per_channel: bool, is_qat: bool): + super().__init__(neutron_quantizer, is_qat=is_qat) self.is_per_channel = is_per_channel def partition_types(self) -> list[OpOverload]: @@ -146,7 +144,7 @@ def test_per_channel_convolution(self, _, use_qat: bool): model, input_shape, get_quantizer_fn=lambda: NeutronAtenQuantizer( - Conv2dPatternPerChannel(is_per_channel=True, is_qat=use_qat), + Conv2dPatternPerChannel(None, is_per_channel=True, is_qat=use_qat), static_qconfig, ), use_qat=use_qat, diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py index 8ba3c97d19f..2918d160520 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py @@ -6,16 +6,34 @@ import numpy as np import pytest import torch - from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import ( + AtenModelBuilderDirector, +) +from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( + BuiltinOperator as Ops, +) +from executorch.backends.nxp.tests.executorch_pipeline import ( + ModelInputSpec, + to_quantized_edge_program, +) from executorch.backends.nxp.tests.executors import ( convert_run_compare, graph_contains_any_of_ops, ) -from executorch.exir.dialects._ops import ops as exir_ops +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.model_output_comparator import ( + NumericalStatsOutputComparator, +) +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import ( + AddTensor, + Clamp, + ExecutorchDelegateCall, +) +from executorch.backends.nxp.tests.use_qat import * # noqa: F403 @pytest.fixture(autouse=True) @@ -24,11 +42,6 @@ def reseed_model_per_test_run(): np.random.seed(23) -# noinspection PyProtectedMember -ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate -Clamp = exir_ops.edge.aten.clamp.default - - class ClampModule(torch.nn.Module): # noinspection PyShadowingBuiltins @@ -180,3 +193,119 @@ def test_convert_clamp__no_delegation__unsupported_bounds(min, max): # Make sure the `clamp` was NOT delegated. assert graph_contains_any_of_ops(delegated_ep.graph, [Clamp]) + + +class TestClampNewNeutronFlow: + @pytest.mark.parametrize( + "min, max", + [ + pytest.param(-1, 2, id="min = -1, max = 2 (Max/Min)"), + pytest.param(None, 1, id="min = None, max = 1 (Max/Min)"), + pytest.param(1, None, id="min = 1, max = None (Max/Min)"), + pytest.param(0, 2, id="min = 0, max = 2 (Max/Min)"), + pytest.param(0, 1, id="min = 0, max = 1 (Relu0To1)"), + pytest.param(-1, 1, id="min = -1, max = 1 (ReluN1To1)"), + pytest.param(0, None, id="min = 0, max = None (Relu)"), + # # Float bounds + pytest.param(-1.0, 2.0, id="min = -1.0, max = 2.0 (Max/Min)"), + pytest.param(None, 1.0, id="min = None, max = 1.0 (Max/Min)"), + pytest.param(1.0, None, id="min = 1.0, max = None (Max/Min)"), + pytest.param(1.0, float("inf"), id="min = 1.0, max = infinity (Max/Min)"), + pytest.param(-float("inf"), 1.0, id="min = infinity, max = 1.0 (Max/Min)"), + pytest.param(0.1, 0.5, id="min = 0.1, max = 0.5 (Max/Min)"), + pytest.param(0.0, 1.0, id="min = 0.0, max = 1.0 (Relu0To1)"), + pytest.param(-1.0, 1.0, id="min = -1.0, max = 1.0 (ReluN1To1)"), + pytest.param(0.0, None, id="min = 0, max = None (Relu)"), + ], + ) + def test_convert_clamp__full_pipeline(self, mocker, min, max, use_qat): + input_shape = (2, 7, 2) # Indivisible by num_macs + model = AddClampModule(min, max) + + x_input_spec = ModelInputSpec(input_shape) + comparator = NumericalStatsOutputComparator() + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={ + AddTensor: 1, + Clamp: 1, + }, + expected_non_delegated_ops={}, + ) + + lower_run_compare( + model=model, + input_spec=[x_input_spec], + dlg_model_verifier=graph_verifier, + output_comparator=comparator, + use_new_flow_neutron_c=True, + use_qat=use_qat, + ) + + # noinspection PyShadowingBuiltins + @pytest.mark.parametrize( + "min, max, expected_tflite_ops", + [ + pytest.param( + 0.1, + 0.5, + [Ops.ADD, Ops.MAXIMUM, Ops.MINIMUM], + id="min = 0.1, max = 0.5 (Max/Min)", + ), + pytest.param( + 0.0, 1.0, [Ops.ADD, Ops.RELU_0_TO_1], id="min = 0, max = 1 (Relu0To1)" + ), + pytest.param( + -1.0, + 1.0, + [Ops.ADD, Ops.RELU_N1_TO_1], + id="min = -1, max = 1 (ReluN1To1)", + ), + pytest.param( + 0.0, None, [Ops.ADD, Ops.RELU], id="min = 0, max = None (Relu)" + ), + pytest.param( + 0.0, + float("inf"), + [Ops.ADD, Ops.RELU], + id="min = 0, max = infinity (Relu)", + ), + ], + ) + def test_convert_clamp__relu_vs_maxmin(self, mocker, min, max, expected_tflite_ops): + input_shape = (23,) + model = AddClampModule(min, max) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + tflite_spy = mocker.spy(AtenModelBuilderDirector, "finish") + + delegated_ep = to_quantized_edge_program( + model, + input_shape, + use_new_flow_neutron_c=True, + ).exported_program() + + # Make sure the `clamp` was delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) + assert not graph_contains_any_of_ops(delegated_ep.graph, [Clamp]) + + intermediate_ep = converter_spy.call_args.args[1] + quant_node = list(intermediate_ep.graph.nodes)[-2] + dequant_node = list(intermediate_ep.graph.nodes)[-4] + tflite_internal_ops = list( + op.builtin_code for op in tflite_spy.spy_return.operator_codes.vector + ) + + assert graph_contains_any_of_ops(intermediate_ep.graph, [Clamp]) + assert len(tflite_internal_ops) == len(expected_tflite_ops) + 1 # Transpose + assert all(op in tflite_internal_ops for op in expected_tflite_ops) + + if len(expected_tflite_ops) == 3: + # Min/Max variant should have same input and output quantization + assert all( + q == dq for q, dq in zip(quant_node.args[1:], dequant_node.args[1:]) + ) + else: + assert not all( + q == dq for q, dq in zip(quant_node.args[1:], dequant_node.args[1:]) + ) diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py index f190ca91e1f..e79798868db 100644 --- a/backends/nxp/tests/ops_aliases.py +++ b/backends/nxp/tests/ops_aliases.py @@ -12,8 +12,10 @@ from executorch.exir.dialects._ops import ops as exir_ops Abs = exir_ops.edge.aten.abs.default +AddTensor = exir_ops.edge.aten.add.Tensor AvgPool2D = exir_ops.edge.aten.avg_pool2d.default Bmm = exir_ops.edge.aten.bmm.default +Clamp = exir_ops.edge.aten.clamp.default Convolution = exir_ops.edge.aten.convolution.default DequantizePerChannel = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default DequantizePerTensor = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index dda223c5650..b64c8463d29 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -12,7 +12,6 @@ import executorch.extension.pybindings.portable_lib import executorch.kernels.quantized # noqa F401 - import torch from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( @@ -253,7 +252,9 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): if args.debug: logging.basicConfig(level=logging.DEBUG, format=FORMAT, force=True) - neutron_target_spec = NeutronTargetSpec(target=args.target) + neutron_target_spec = NeutronTargetSpec( + target=args.target, use_new_flow_neutron_c=args.use_new_flow_neutron_c + ) # 1. pick model from one of the supported lists model, example_inputs, calibration_inputs = get_model_and_inputs_from_name(