diff --git a/backends/nxp/backend/ir/converter/conversion/common.py b/backends/nxp/backend/ir/converter/conversion/common.py index 9186f5d0ac6..72d1216e50c 100755 --- a/backends/nxp/backend/ir/converter/conversion/common.py +++ b/backends/nxp/backend/ir/converter/conversion/common.py @@ -23,8 +23,6 @@ transpose_conv_options, ) -from torch.fx import Node - def try_get_input(t_op: tflite_model.Operator, idx: int) -> tflite_model.Tensor | None: """Return the input tensors of 't_op' at index 'idx', or None if the operator doesn't have that input. @@ -135,34 +133,6 @@ def uses_shape_broadcasting(t_op: tflite_model.Operator) -> bool: ) -def node_uses_shape_broadcasting(node: Node) -> bool: - """Determine if given PyTorch fx Node uses shape broadcasting for it's input nodes or not. - - :param node: PyTorch fx Node with 'all_input_nodes' initialized. - :return: True, if the node uses shape broadcasting for it's input nodes. - False otherwise. - """ - - if node.all_input_nodes is None: - logger.e( - logger.Code.INTERNAL_ERROR, - "common.node_uses_shape_broadcasting(): 'all_input_nodes' are None!", - ) - - if len(node.all_input_nodes) == 0: - logger.e( - logger.Code.INTERNAL_ERROR, - "common.node_uses_shape_broadcasting(): Operator has no inputs!", - ) - - first_input_shape = node.all_input_nodes[0].meta["val"].shape - - return any( - input_tensor.meta["val"].shape != first_input_shape - for input_tensor in node.all_input_nodes[1:] - ) - - class OpsList: """ Holder of TFLite operator (middle_op) that can be prefixed (pre_ops) of suffixed (post_ops) diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py index c1c1830c583..3abda419e8e 100755 --- a/backends/nxp/backend/ir/converter/node_converter.py +++ b/backends/nxp/backend/ir/converter/node_converter.py @@ -16,6 +16,7 @@ input_quantization_type, output_quantization_type, ) +from executorch.backends.nxp.backend.ir import logger as logger from executorch.backends.nxp.backend.ir.conversion_context import ConversionContext from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import ( AtenModelBuilderDirector, @@ -377,3 +378,67 @@ def uses_quantization_type_for_io( ) and NodeConverter.uses_quantization_type_for_outputs( node, supported_types, output_indices ) + + @staticmethod + def uses_shape_broadcasting(node: Node) -> bool: + """Determine if given PyTorch fx Node uses shape broadcasting for it's input nodes or not. + + :param node: PyTorch fx Node with 'all_input_nodes' initialized. + :return: True, if the node uses shape broadcasting for it's input nodes. + False otherwise. + """ + + if node.all_input_nodes is None: + logger.e( + logger.Code.INTERNAL_ERROR, + "node_converter.uses_shape_broadcasting(): 'all_input_nodes' are None!", + ) + + if len(node.all_input_nodes) == 0: + logger.e( + logger.Code.INTERNAL_ERROR, + "node_converter.uses_shape_broadcasting(): Operator has no inputs!", + ) + + first_input_shape = node.all_input_nodes[0].meta["val"].shape + + return any( + input_tensor.meta["val"].shape != first_input_shape + for input_tensor in node.all_input_nodes[1:] + ) + + @staticmethod + def at_least_one_input_shape_matches_the_output_shape(node: Node) -> bool: + """Determine if given PyTorch fx Node uses at least one input shape broadcasting for it's input nodes or not. + + :param node: PyTorch fx Node with 'all_input_nodes' initialized. + :return: True, if at least one input has the same shape as the output node. + False otherwise. + """ + + if node.all_input_nodes is None: + logger.e( + logger.Code.INTERNAL_ERROR, + "node_converter.at_least_one_input_shape_matches_the_output_shape(): 'all_input_nodes' are None!", + ) + + if len(node.all_input_nodes) == 0: + logger.e( + logger.Code.INTERNAL_ERROR, + "node_converter.at_least_one_input_shape_matches_the_output_shape(): Operator has no inputs!", + ) + + output_shape = node.meta["val"].shape + + return any( + input_tensor.meta["val"].shape == output_shape + for input_tensor in node.all_input_nodes + ) + + @staticmethod + def _node_inputs_ranks_not_equal(node) -> bool: + first_input_shape = node.all_input_nodes[0].meta["val"].shape + return not all( + len(input_node.meta["val"].shape) == len(first_input_shape) + for input_node in node.all_input_nodes[1:] + ) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py index cd5aa2ead81..fd28b077b8a 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py @@ -1,11 +1,8 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.backends.nxp.backend.ir.converter.conversion.common import ( - node_uses_shape_broadcasting, -) from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NodeConverter, @@ -26,7 +23,7 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - if node_uses_shape_broadcasting(node): + if NodeConverter.uses_shape_broadcasting(node): # Shape broadcasting may require the addition of `Transpose` ops during conversion. return False diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py index d67b0aa4bcb..0e13aeb9b44 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py @@ -1,11 +1,11 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.backends.nxp.backend.ir.converter.conversion.common import ( - node_uses_shape_broadcasting, -) +import torch + +from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NodeConverter, @@ -26,19 +26,41 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - if node_uses_shape_broadcasting(node): - # Shape broadcasting may require the addition of `Transpose` ops during conversion. - return False + if custom_delegation_options.use_new_flow_neutron_c: + if not NodeConverter.at_least_one_input_shape_matches_the_output_shape( + node + ): + return False + + # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes + # Transpose is currently not supported for new flow + if any( + input_node.meta[NXP_NODE_FORMAT].is_channels_first() + for input_node in node.all_input_nodes + ) and NodeConverter._node_inputs_ranks_not_equal(node): + return False + + supported_types = [torch.int8, torch.uint8] + if not NodeConverter.uses_quantization_type_for_io( + node, supported_types, [0, 1], [0] + ): + return False + + return True + else: + if NodeConverter.uses_shape_broadcasting(node): + # Shape broadcasting may require the addition of `Transpose` ops during conversion. + return False - node_shape = node.meta["val"].shape + node_shape = node.meta["val"].shape - # Check that at least one dimension is divisible by number of MACS - # or all dimensions are equal to one - # Otherwise Neutron cannot convert it - dim_divisible = any(s % 8 == 0 for s in node_shape) or all( - s == 1 for s in node_shape - ) - return dim_divisible + # Check that at least one dimension is divisible by number of MACS + # or all dimensions are equal to one + # Otherwise Neutron cannot convert it + dim_divisible = any(s % 8 == 0 for s in node_shape) or all( + s == 1 for s in node_shape + ) + return dim_divisible @staticmethod def _is_supported_in_IR( @@ -51,9 +73,11 @@ def _is_supported_in_IR( return True - # mul.Tensor Node format: (Tensor self, Tensor other, *) def convert(self, node: Node): - """Convert 'mul_tensor' operator to NeutronIR 'Mul'.""" + """Convert 'mul_tensor' operator to NeutronIR 'Mul'. + The ExecuTorch schema is: + mul.Tensor(Tensor self, Tensor other) + """ self.assert_convertible(node) t_op = self._create_tflite_op_with_io_tensors(node) t_op.builtin_options = mul_options.Mul() diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py index e9522c87114..e97f4bf63c2 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py @@ -1,11 +1,8 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.backends.nxp.backend.ir.converter.conversion.common import ( - node_uses_shape_broadcasting, -) from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NodeConverter, @@ -26,7 +23,7 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - if node_uses_shape_broadcasting(node): + if NodeConverter.uses_shape_broadcasting(node): # Shape broadcasting may require the addition of `Transpose` ops during conversion. return False diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 60afa6bf4d2..f9cd75a7359 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -830,7 +830,7 @@ class MulTensorPattern(QuantizationPattern): Basic quantization for all inputs and output. """ - def partition_types(self) -> list[torch.nn.Module]: + def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.mul.Tensor] def get_anchors( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py index 053cd96944d..e72b988a591 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py @@ -1,4 +1,4 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -10,19 +10,30 @@ from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.executorch_pipeline import ( + ModelInputSpec, + to_quantized_edge_program, +) from executorch.backends.nxp.tests.executors import ( convert_run_compare, + graph_contains_any_of_ops, ToChannelFirstPreprocess, ToChannelLastPreprocess, ) +from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier from executorch.backends.nxp.tests.models import ( MulTensorConvModule, MulTensorModule, MulTensorOneInputModule, ) -from executorch.exir.dialects._ops import ops as exir_ops +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import ( + Convolution, + ExecutorchDelegateCall, + MulTensor, +) from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -69,7 +80,7 @@ def test_mul_tensor_quant_conversion(mocker, x_input_shape): input_data = {0: input_data_1, 1: input_data_2} exported_nodes = list(exported_program.graph.nodes) - assert exported_nodes[4].target == exir_ops.edge.aten.mul.Tensor + assert exported_nodes[4].target == MulTensor convert_run_compare( exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data @@ -93,9 +104,7 @@ def test_mul_tensor_shape_unsupported_quant_conversion(x_input_shape): nodes = list(edge_program.graph.nodes) # Input tensor shape is not supported, node is not converted - assert ( - nodes[3].target == exir_ops.edge.aten.mul.Tensor - ) # Mul Tensor is not delegated. + assert nodes[3].target == MulTensor # Mul Tensor is not delegated. @pytest.mark.parametrize( @@ -128,7 +137,7 @@ def test_mul_tensor_one_input_quant_conversion(mocker, input_shape): input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) exported_nodes = list(exported_program.graph.nodes) - assert exported_nodes[2].target == exir_ops.edge.aten.mul.Tensor + assert exported_nodes[2].target == MulTensor convert_run_compare( exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data @@ -176,8 +185,8 @@ def test_mul_tensor_w_conv_quant_conversion(mocker, x_input_shape): input_data = {0: input_data_1, 1: input_data_2} exported_nodes = list(exported_program.graph.nodes) - assert exported_nodes[12].target == exir_ops.edge.aten.convolution.default - assert exported_nodes[15].target == exir_ops.edge.aten.mul.Tensor + assert exported_nodes[12].target == Convolution + assert exported_nodes[15].target == MulTensor convert_run_compare( exported_program, @@ -207,6 +216,137 @@ def test_mul_tensor_broadcasting_unsupported_quant_conversion( nodes = list(edge_program.graph.nodes) # Broadcast is not supported, node is not converted - assert ( - nodes[6].target == exir_ops.edge.aten.mul.Tensor - ) # Mul Tensor is not delegated. + assert nodes[6].target == MulTensor # Mul Tensor is not delegated. + + +class TestMulTensorNewNeutronFlow: + @pytest.mark.parametrize( + "x_input_shape", + [ + pytest.param((1,), id="1D."), + pytest.param((6, 8), id="2D."), + pytest.param((1, 4, 8), id="3D."), + pytest.param((1, 4, 8, 8), id="4D."), + ], + ) + def test__basic_nsys_inference(self, x_input_shape): + x_input_spec = ModelInputSpec(x_input_shape) + model = MulTensorModule() + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, + exp_non_delegated_nodes=[], + ) + + lower_run_compare( + model, + [x_input_spec, x_input_spec], + graph_verifier, + use_new_flow_neutron_c=True, + ) + + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D." + ), + pytest.param( + [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))], + id="2 inputs 3D.", + ), + pytest.param( + [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D+2D." + ), + ], + ) + def test__correct_broadcast(self, input_spec): + model = MulTensorModule() + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, + exp_non_delegated_nodes=[], + ) + + lower_run_compare( + model, input_spec, graph_verifier, use_new_flow_neutron_c=True + ) + + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D." + ), + pytest.param( + [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))], + id="2 inputs 3D.", + ), + pytest.param( + [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))], + id="2 inputs 2D+3D.", + ), + ], + ) + def test__incorrect_broadcast(self, input_spec): + # Broadcast where at least one of the inputs is not equal to output is not supported + model = MulTensorModule() + + delegated_ep = to_quantized_edge_program( + model, input_spec, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `mul.Tensor` was NOT delegated. + assert not graph_contains_any_of_ops( + delegated_ep.graph, [ExecutorchDelegateCall] + ) + assert graph_contains_any_of_ops(delegated_ep.graph, [MulTensor]) + + @pytest.mark.parametrize( + "x_input_shape", + [ + pytest.param( + (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8." + ), + ], + ) + def test__w_conv(self, x_input_shape): + model = MulTensorConvModule() + + n, c, h, w = x_input_shape + y_input_spec = ModelInputSpec((n, 8, h, w)) + x_input_spec = ModelInputSpec(x_input_shape) + + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, + exp_non_delegated_nodes=[], + ) + + lower_run_compare( + model, + [x_input_spec, y_input_spec], + graph_verifier, + use_new_flow_neutron_c=True, + ) + + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))], + id="2 inputs 4D ch last + 2D ch first.", + ), + pytest.param( + [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))], + id="2 inputs 4D ch last + 3D ch first.", + ), + ], + ) + def test__w_conv_unsupported(self, input_spec): + model = MulTensorConvModule() + + delegated_ep = to_quantized_edge_program( + model, input_spec, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `mul.Tensor` was NOT delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) + assert graph_contains_any_of_ops(delegated_ep.graph, [MulTensor]) diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py index ae4189e209f..cbf008d3dbc 100644 --- a/backends/nxp/tests/ops_aliases.py +++ b/backends/nxp/tests/ops_aliases.py @@ -11,9 +11,11 @@ AvgPool2D = exir_ops.edge.aten.avg_pool2d.default Bmm = exir_ops.edge.aten.bmm.default +Convolution = exir_ops.edge.aten.convolution.default ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate HardTanh = exir_ops.edge.aten.hardtanh.default HardTanh_ = exir_ops.edge.aten.hardtanh_.default +MulTensor = exir_ops.edge.aten.mul.Tensor Slice = exir_ops.edge.aten.slice.Tensor SliceCopy = exir_ops.edge.aten.slice_copy.Tensor Softmax = exir_ops.edge.aten._softmax.default diff --git a/examples/nxp/executor_runner/nxp_executor_runner.cpp b/examples/nxp/executor_runner/nxp_executor_runner.cpp index 52a62611cb5..65f5831e5c5 100644 --- a/examples/nxp/executor_runner/nxp_executor_runner.cpp +++ b/examples/nxp/executor_runner/nxp_executor_runner.cpp @@ -446,6 +446,9 @@ int main(int argc, char* argv[]) { } closedir(datasetDir); + // Sort inputsData to ensure correct input ordering + std::sort(inputsData.begin(), inputsData.end()); + setInputs(method.get(), inputsData); status = method->execute();