diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py index 23924d364f4..0de0a5b0679 100644 --- a/backends/nxp/backend/edge_helper.py +++ b/backends/nxp/backend/edge_helper.py @@ -441,8 +441,10 @@ def output_quantization_type( │ """ users = list(node.users) - if len(users) == 1: + if output_index is None: + # Basic QDQ case (without getitem nodes). if not _is_quantize(quantize_node := users[0]): + # Broken QDQ schema. return None else: # Multiple users diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py index f2b26d6512e..851891bf1f2 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py @@ -1,11 +1,14 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import torch + from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, + NeutronTargetSpec, NodeConverter, ) from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( @@ -25,6 +28,25 @@ def _is_supported_in_IR( ) -> bool: return True + @staticmethod + def _is_supported_on_target( + node: Node, + neutron_target_spec: NeutronTargetSpec, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + + if custom_delegation_options.use_new_flow_neutron_c: + # Requirements specified by the new Neutron flow documentation. + + supported_types = [torch.int8, torch.uint8] + if not NodeConverter.uses_quantization_type_for_io( + node, supported_types, [0], None + ): + return False + + return True + def convert(self, node: Node): """Convert 'aten::abs' operator to TFLite 'Abs'.""" self.assert_convertible(node) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py index d8b3cdb3707..e300d6bbe9f 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -6,6 +6,7 @@ import operator import numpy as np +import torch from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.conversion import ( @@ -73,32 +74,54 @@ def _is_supported_on_target( MaxPool2DWithIndicesConverter._get_node_args(node) ) - output_shape = node.meta["val"][0].shape # Shape of the main output (index 0) - if output_shape[0] != 1: - # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 - return False - - # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. - stride_h = stride[0] - if stride_h not in (1, 2): - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 - return False - - channels = output_shape[1] - if channels % neutron_target_spec.get_num_macs() != 0: - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 - return False - - if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 - - # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the - # effective kernel size, which is an even stricter requirement than what Neutron imposes. - # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 - return False + if custom_delegation_options.use_new_flow_neutron_c: + # Requirements specified by the new Neutron flow documentation. + + supported_types = [torch.int8, torch.uint8] + if not NodeConverter.uses_quantization_type_for_io( + node, supported_types, [0], [0] + ): + return False + + maximum_supported_kernel_size = 4096 + # If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter + # always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so + # 4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either. + maximum_supported_stride = 4096 + + if any(k > maximum_supported_kernel_size for k in kernel_size): + return False + if any(s > maximum_supported_stride for s in stride): + return False + + else: + # Shape of the main output (index 0) + output_shape = node.meta["val"][0].shape + if output_shape[0] != 1: + # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 + return False + + # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. + stride_h = stride[0] + if stride_h not in (1, 2): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 + return False + + channels = output_shape[1] + if channels % neutron_target_spec.get_num_macs() != 0: + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 + return False + + if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 + + # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the + # effective kernel size, which is an even stricter requirement than what Neutron imposes. + # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 + return False return True diff --git a/backends/nxp/tests/dataset_creator.py b/backends/nxp/tests/dataset_creator.py index 3377b85eadd..eaf267f4fcf 100644 --- a/backends/nxp/tests/dataset_creator.py +++ b/backends/nxp/tests/dataset_creator.py @@ -45,8 +45,10 @@ def generate_samples( class RandomDatasetCreator(DatasetCreator): """Dataset creator that generates random input samples.""" - def __init__(self, num_samples=2): + def __init__(self, num_samples=2, low=0.0, high=1.0): self._num_samples = num_samples + self.low = low + self.high = high def generate_samples( self, dataset_dir: str, input_spec: list[ModelInputSpec] @@ -103,9 +105,11 @@ def _gen_samples( case _: raise ValueError(f"Unsupported dim_order: {spec.dim_order}") - sample_vector = rng.random( - np.prod(shape), torch_type_to_numpy_type(spec.dtype) - ).reshape(shape) + sample_vector = ( + rng.uniform(self.low, self.high, size=np.prod(shape)) + .astype(torch_type_to_numpy_type(spec.dtype)) + .reshape(shape) + ) file_name = ( f"{str(spec_idx).zfill(2)}.bin" if len(input_spec) > 1 diff --git a/backends/nxp/tests/graph_verifier.py b/backends/nxp/tests/graph_verifier.py index 33dee7d3407..44900b6a11b 100644 --- a/backends/nxp/tests/graph_verifier.py +++ b/backends/nxp/tests/graph_verifier.py @@ -5,42 +5,85 @@ import abc import re +from collections import defaultdict +from copy import deepcopy from dataclasses import dataclass -from typing import Union +from typing import Callable, Union +from executorch.backends.nxp.neutron_partitioner import ( + NeutronPartitioner, + NXP_DELEGATION_TAG, +) +from executorch.backends.nxp.tests.ops_aliases import ( + DequantizePerChannel, + DequantizePerTensor, + QuantizePerChannel, + QuantizePerTensor, +) + +from executorch.exir.dialects.edge._ops import EdgeOpOverload + +from pytest_mock import MockerFixture + +from torch.fx import Node from torch.fx.graph import Graph @dataclass class NonDelegatedNode: + """Represents an expected non-delegated node in the graph. + + :param node_name: The name of the node to check for + :param num_occurrences: Expected number of occurrences. If None, just verifies that at least one exists + """ + node_name: str num_occurrences: Union[int, None] = None class GraphVerifier(abc.ABC): + """Abstract base class for graph verification strategies.""" + @abc.abstractmethod def verify_graph(self, graph: Graph): - pass + """Verifies the graph meets expected criteria. - @abc.abstractmethod - def check_num_delegated_nodes(self, num_dlg_nodes: int): + :param graph: The FX graph to verify + :raises AssertionError: If the graph does not meet expectations + """ pass class BaseGraphVerifier(GraphVerifier): - """Graph verifier base class. Checks for number of delegated nodes and number of selected expected nodes.""" + """Graph verifier base class. Checks for number of delegated nodes and number of selected expected nodes. + + This verifier performs the following checks: + - The total number of delegated call nodes matches expectations + - Specific non-delegated nodes appear with the expected frequency + - No unexpected aten nodes are present in the graph + """ def __init__( self, exp_num_delegate_call_nodes: int, exp_non_delegated_nodes: list[NonDelegatedNode] = None, ): + """Initializes the BaseGraphVerifier. + + :param exp_num_delegate_call_nodes: Expected number of delegated nodes + :param exp_non_delegated_nodes: List of expected non-delegated nodes to verify + """ self.exp_non_delegated_nodes = ( exp_non_delegated_nodes if exp_non_delegated_nodes is not None else [] ) self.exp_num_delegate_call_nodes = exp_num_delegate_call_nodes def check_num_delegated_nodes(self, num_dlg_nodes): + """Checks that the number of delegated nodes matches expectations. + + :param num_dlg_nodes: Actual number of delegated nodes + :raises AssertionError: If the count doesn't match expectations + """ assert not ( num_dlg_nodes < self.exp_num_delegate_call_nodes ), f"Number of delegated nodes decreased from {self.exp_num_delegate_call_nodes} to {num_dlg_nodes}." @@ -49,6 +92,11 @@ def check_num_delegated_nodes(self, num_dlg_nodes): ), f"Number of delegated nodes increased from {self.exp_num_delegate_call_nodes} to {num_dlg_nodes}." def verify_graph(self, graph): + """Verifies the graph meets delegation and node presence expectations. + + :param graph: The FX graph to verify + :raises AssertionError: If verification fails + """ nodes = list(graph.nodes) # Check for specific non delegated nodes @@ -84,3 +132,133 @@ def verify_graph(self, graph): assert ( not unexpected_aten_fn_nodes ), f"Graphs contains unexpected aten nodes:\n{unexpected_aten_fn_nodes}." + + +# Type alias for operators - can be either EdgeOpOverload or any callable (e.g., operator.getitem). +Operator = EdgeOpOverload | Callable + + +class DetailedGraphVerifier(GraphVerifier): + """Graph verifier that checks for exact delegated and non-delegated operators. + + This verifier captures a snapshot of the graph immediately after partitioning and verifies + that specific operators were delegated/non-delegated the expected number of times. It uses + mocker to intercept the partition() call and create a deep copy of the nodes before they + can be modified. Quantization/dequantization operators are ignored by default as they are + typically not the focus of delegation verification. + """ + + default_ops_to_ignore = { + QuantizePerTensor, + QuantizePerChannel, + DequantizePerTensor, + DequantizePerChannel, + } + + def __init__( + self, + mocker: MockerFixture, + *, + expected_delegated_ops: dict[Operator, int], + expected_non_delegated_ops: dict[Operator, int], + ops_to_ignore: set[Operator] | None = None, + ): + """Initializes the DetailedGraphVerifier and patches NeutronPartitioner.partition() to capture node state. + + :param expected_delegated_ops: Dictionary mapping operators to their expected delegation count + :param expected_non_delegated_ops: Dictionary mapping operators to their expected non-delegation count + :param mocker: Pytest mocker fixture for intercepting the partition method + :param ops_to_ignore: Set of operators to ignore during verification. Defaults to quantization ops + """ + self.expected_delegated_ops = expected_delegated_ops + self.expected_non_delegated_ops = expected_non_delegated_ops + + self.ops_to_ignore = ops_to_ignore or self.default_ops_to_ignore + + # We need to use mocker to capture a copy of the nodes returned by NeutronPartitioner.partition() to access + # their partition tag. The nodes in the returned graph may be modified after partition() returns, so we + # capture a deep copy immediately when the method completes. + self.captured_partitioned_nodes: list[Node] | None = None + + # Store original partition method for the wrapper. + # Note: pytest-mock automatically restores the original method after the test completes, + # so manual cleanup is not required. + original_partition_method = NeutronPartitioner.partition + + def partition_wrapper(self_, exported_program): + """Wraps NeutronPartitioner.partition() to capture a snapshot of nodes after partitioning. + + :param self_: The NeutronPartitioner instance + :param exported_program: The ExportedProgram being partitioned + :return: The PartitionResult from the original partition method + """ + result = original_partition_method(self_, exported_program) + # Capture a deep copy of the nodes with their metadata. + # This ensures we have the exact state immediately after partitioning, + # before any subsequent transformations modify the graph. + self.captured_partitioned_nodes = list( + deepcopy(exported_program.graph.nodes) + ) + return result + + # Patch the partition method to intercept and capture results. + mocker.patch.object(NeutronPartitioner, "partition", partition_wrapper) + + def verify_graph(self, graph): + """Verifies that operators were delegated/non-delegated as expected by comparing actual counts against expectations. + + :param graph: The FX graph to verify (not directly used; we use captured nodes instead) + :raises AssertionError: If the NeutronPartitioner wasn't used or if delegation doesn't match expectations + """ + assert ( + self.captured_partitioned_nodes is not None + ), "The NeutronPartitioner was not used. Cannot access delegated nodes." + + delegated_ops = defaultdict(int) + non_delegated_ops = defaultdict(int) + + for node in self.captured_partitioned_nodes: + # Only process call_function nodes with a target + if not hasattr(node, "target") or node.op != "call_function": + continue + + # Skip operators we're configured to ignore (e.g., quantization ops) + if node.target in self.ops_to_ignore: + continue + + # Check if the node was tagged for delegation during partitioning + if NXP_DELEGATION_TAG in node.meta: + delegated_ops[node.target] += 1 + else: + non_delegated_ops[node.target] += 1 + + # All ops which were either expected to be delegated, or were actually delegated. + all_delegated_ops = list(set(self.expected_delegated_ops).union(delegated_ops)) + + # All ops which were either expected to be non-delegated, or were actually non-delegated. + all_non_delegated_ops = list( + set(self.expected_non_delegated_ops).union(non_delegated_ops) + ) + + message = "" + + # Check delegated operators + for op in all_delegated_ops: + expected_count = self.expected_delegated_ops.get(op, 0) + real_count = delegated_ops.get(op, 0) + op_name = op.name() if hasattr(op, "name") else str(op) + if expected_count != real_count: + message += f"\t`{op_name}` was delegated {real_count} times instead of the expected {expected_count} times.\n" + + # Check non-delegated operators + for op in all_non_delegated_ops: + expected_count = self.expected_non_delegated_ops.get(op, 0) + real_count = non_delegated_ops.get(op, 0) + op_name = op.name() if hasattr(op, "name") else str(op) + if expected_count != real_count: + message += f"\t`{op_name}` was NON-delegated {real_count} times instead of the expected {expected_count} times.\n" + + if message: + raise AssertionError( + "Some operators were not delegated as expected:\n" + message + ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py index 2e9a1b393ff..dfec6e85d57 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py @@ -1,4 +1,4 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -6,7 +6,6 @@ import numpy as np import pytest import torch - from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) @@ -17,6 +16,13 @@ ToChannelFirstPreprocess, ToChannelLastPreprocess, ) +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier + +from executorch.backends.nxp.tests.nsys_testing import ( + lower_run_compare, + RandomDatasetCreator, +) +from executorch.backends.nxp.tests.ops_aliases import Abs, Convolution, Relu from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram @@ -29,7 +35,7 @@ def reseed_model_per_test_run(): np.random.seed(23) -class ConvBlocksWithAbs(torch.nn.Module): +class ConvBlocksWithAbsModule(torch.nn.Module): def __init__(self, conv_in_channels: int = 3): super().__init__() self.block1 = torch.nn.Sequential( @@ -56,7 +62,7 @@ def forward(self, x): return self.block2(x) -class Abs(torch.nn.Module): +class AbsModule(torch.nn.Module): def __init__(self): super().__init__() @@ -64,28 +70,100 @@ def forward(self, x): return x.abs() -def test_conv_abs(mocker, use_qat, input_shape: tuple[int] = (1, 3, 112, 112)): - model = ConvBlocksWithAbs(conv_in_channels=input_shape[1]) +class TestAbsLegacyNeutronFlow: + def test_conv_abs( + self, mocker, use_qat, input_shape: tuple[int, ...] = (1, 3, 112, 112) + ): + model = ConvBlocksWithAbsModule(conv_in_channels=input_shape[1]) - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - quantized_program = to_quantized_edge_program( - model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False - ).exported_program() + quantized_program = to_quantized_edge_program( + model, + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, + use_new_flow_neutron_c=False, + ).exported_program() - tflite_flatbuffers_model, io_formats = converter_spy.spy_return - exported_program: ExportedProgram = converter_spy.call_args.args[1] + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + exported_program: ExportedProgram = converter_spy.call_args.args[1] - assert not graph_contains_any_of_ops( - graph=quantized_program.graph, ops=[exir_ops.edge.aten.abs.default] - ) + assert not graph_contains_any_of_ops( + graph=quantized_program.graph, ops=[exir_ops.edge.aten.abs.default] + ) + + input_data = (np.random.random(input_shape) * 50).astype(np.int8) + convert_run_compare( + exported_program, + tfl_model=tflite_flatbuffers_model, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + input_data=input_data, + atol=1.0, + ) + + +class TestAbsNewNeutronFlow: + @staticmethod + def _get_dataset_creator(): + # to test `abs` reliably, we need to include negative values + low = -255.0 + high = 255.0 + + dataset = RandomDatasetCreator(low=low, high=high) + return dataset + + def test__basic_nsys_inference(self, mocker): + input_shape = (2, 3, 6, 7) + model = AbsModule() + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={Abs: 1}, expected_non_delegated_ops={} + ) - input_data = (np.random.random(input_shape) * 50).astype(np.int8) - convert_run_compare( - exported_program, - tfl_model=tflite_flatbuffers_model, - tflite_input_preprocess=ToChannelLastPreprocess(), - tflite_output_preprocess=ToChannelFirstPreprocess(), - input_data=input_data, - atol=1.0, - ) + dataset_creator = self._get_dataset_creator() + lower_run_compare( + model, + input_shape, + graph_verifier, + dataset_creator, + use_new_flow_neutron_c=True, + ) + + def test__basic_nsys_inference__big(self, mocker): + # some operators have delegation requirement that size must be < 4096 + input_shape = (4097, 1) + model = AbsModule() + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={Abs: 1}, expected_non_delegated_ops={} + ) + + dataset_creator = self._get_dataset_creator() + lower_run_compare( + model, + input_shape, + graph_verifier, + dataset_creator, + use_new_flow_neutron_c=True, + ) + + def test_basic_nsys_inference__with_conv(self, mocker): + input_shape = (2, 3, 6, 7) + in_channels = input_shape[1] + model = ConvBlocksWithAbsModule(conv_in_channels=in_channels) + + # one `relu` ends up in the same delegated partition as `abs` + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={Abs: 1, Relu: 1}, + expected_non_delegated_ops={Relu: 1, Convolution: 2}, + ) + + dataset_creator = self._get_dataset_creator() + lower_run_compare( + model, + input_shape, + graph_verifier, + dataset_creator, + use_new_flow_neutron_c=True, + ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py index e1766c3aabd..26d615e156f 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py @@ -28,7 +28,7 @@ ToNCHWPreprocess, ToNHWCPreprocess, ) -from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule from executorch.backends.nxp.tests.nsys_testing import lower_run_compare @@ -303,25 +303,23 @@ def test_from_avg_pool_1d(mocker): class TestAvgPool2DNewNeutronFlow: - def test__basic_nsys_inference(self): + def test__basic_nsys_inference(self, mocker): input_shape = (2, 4, 6, 7) model = AvgPool2dModule(False, 0) - graph_verifier = BaseGraphVerifier( - exp_num_delegate_call_nodes=1, # Delegated AvgPool. - exp_non_delegated_nodes=[], + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) lower_run_compare( model, input_shape, graph_verifier, use_new_flow_neutron_c=True ) - def test__kernel_size_limit(self): + def test__kernel_size_limit(self, mocker): kernel_size = (1, 4096) input_shape = (1, 4) + kernel_size model = AvgPool2dModule(False, 0, kernel_size) - graph_verifier = BaseGraphVerifier( - exp_num_delegate_call_nodes=1, # Delegated AvgPool. - exp_non_delegated_nodes=[], + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) lower_run_compare( @@ -343,13 +341,12 @@ def test__kernel_size_limit_exceeded(self): ) assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D]) - def test__stride_limit(self): + def test__stride_limit(self, mocker): stride = 4096 input_shape = (1, 4, 1, 4096) model = AvgPool2dModule(False, 0, 1, stride) - graph_verifier = BaseGraphVerifier( - exp_num_delegate_call_nodes=1, # Delegated AvgPool. - exp_non_delegated_nodes=[], + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) lower_run_compare( @@ -370,3 +367,20 @@ def test__stride_limit_exceeded(self): delegated_ep.graph, [ExecutorchDelegateCall] ) assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D]) + + +class TestAvgPool1DNewNeutronFlow: + + # Just a basic test to verify that the operator gets extended to the 2D variant correctly. + def test__basic_nsys_inference__view_not_delegated(self, mocker): + input_shape = (2, 4, 6) # The old flow limited the batch size to 1. + model = AvgPool1DModule() + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={AvgPool2D: 1}, + expected_non_delegated_ops={ViewCopy: 2}, + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 6bb1000b38b..7a1c798caa3 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -3,10 +3,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import operator - import numpy as np -import pytest import torch from executorch.backends.nxp.backend.edge_program_converter import ( @@ -19,19 +16,20 @@ ToChannelFirstPreprocess, ToChannelLastPreprocess, ) +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import ( + ExecutorchDelegateCall, + GetItem, + MaxPool2DWithIndices, + Squeeze, + SqueezeDim, + SqueezeDims, + Unsqueeze, + ViewCopy, +) from executorch.backends.nxp.tests.use_qat import * # noqa F403 - -# noinspection PyProtectedMember -from executorch.exir.dialects._ops import ops as exir_ops - -ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate -GetItem = operator.getitem -MaxPool2D = exir_ops.edge.aten.max_pool2d_with_indices.default -Squeeze = exir_ops.edge.aten.squeeze.default -SqueezeDim = exir_ops.edge.aten.squeeze.dim -SqueezeDims = exir_ops.edge.aten.squeeze.dims -Unsqueeze = exir_ops.edge.aten.unsqueeze.default -ViewCopy = exir_ops.edge.aten.view_copy.default +import pytest class MaxPool1DModule(torch.nn.Module): @@ -47,7 +45,7 @@ def forward(self, x): class MaxPool2dModule(torch.nn.Module): - def __init__(self, kernel_size=3, **kwargs): + def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs): super().__init__() self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) @@ -80,7 +78,7 @@ def _verify_successful_delegation(module, converter_spy, input_shape): ).exported_program() # Make sure the MaxPool was delegated. - assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2DWithIndices]) assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) # Verify correct behavior of the converted NeutronIR model. @@ -90,7 +88,7 @@ def _verify_successful_delegation(module, converter_spy, input_shape): input_data = _generate_test_data(input_shape) # Make sure the tested program contains the `MaxPool`. - assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2DWithIndices]) assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) convert_run_compare( @@ -138,7 +136,7 @@ def _verify_no_delegation(module, input_shape): use_neutron_for_format_conversion=False, ).exported_program() - assert graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_model.graph, [MaxPool2DWithIndices]) assert graph_contains_any_of_ops(edge_model.graph, [GetItem]) assert not graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) @@ -219,7 +217,7 @@ def test_max_pool_2d__from_1d(self, mocker): # Make sure the `max_pool` was delegated. assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) - assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2DWithIndices]) # There is not `max_pool1d` in the edge dialect, so we cannot check for its absence by comparing with the target. # In order to detect any potential future changes (like the addition of `max_pool1d` to edge dialect), we check # the name of the target. @@ -240,7 +238,7 @@ def test_max_pool_2d__from_1d(self, mocker): input_data = _generate_test_data(extended_shape) # Make sure the tested program contains the `MaxPool`. - assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2DWithIndices]) assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) convert_run_compare( @@ -250,3 +248,123 @@ def test_max_pool_2d__from_1d(self, mocker): tflite_input_preprocess=ToChannelLastPreprocess(), tflite_output_preprocess=ToChannelFirstPreprocess(), ) + + +class TestMaxPool2DNewNeutronFlow: + # noinspection PyMethodMayBeStatic + def assert_delegated(self, model, input_shape, mocker): + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1}, + expected_non_delegated_ops={}, + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + ) + + # noinspection PyMethodMayBeStatic + def assert_not_delegated(self, model, input_shape): + delegated_ep = to_quantized_edge_program( + model, input_shape, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `max_pool2d` was NOT delegated. + assert not graph_contains_any_of_ops( + delegated_ep.graph, [ExecutorchDelegateCall] + ) + assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2DWithIndices]) + + def test__basic_nsys_inference(self, mocker): + input_shape = (2, 4, 6, 7) # The old flow limited the batch size to 1. + model = MaxPool2dModule() + self.assert_delegated(model, input_shape, mocker) + + def test__kernel_size_limit(self, mocker): + kernel_size = (1, 4096) + input_shape = (1, 4) + kernel_size + model = MaxPool2dModule(kernel_size) + self.assert_delegated(model, input_shape, mocker) + + def test__kernel_size_limit_exceeded(self): + kernel_size = (1, 4097) # Exceeds the kernel size limit. + input_shape = (1, 4) + kernel_size + model = MaxPool2dModule(kernel_size) + self.assert_not_delegated(model, input_shape) + + def test__stride_limit__no_padding(self, mocker): + stride = 4096 + input_shape = (1, 4, 1, 4096) + model = MaxPool2dModule(1, stride=stride) + self.assert_delegated(model, input_shape, mocker) + + def test__stride_limit_exceeded__no_padding(self): + stride = 4097 # Exceeds the stride limit. + input_shape = (1, 4, 1, 4096) + model = MaxPool2dModule(1, stride=stride) + self.assert_not_delegated(model, input_shape) + + def test__stride_limit__padding(self, mocker): + padding = 1 + stride = 4096 + input_shape = (1, 2, 3, stride) + model = MaxPool2dModule(3, stride=stride, padding=padding) + self.assert_delegated(model, input_shape, mocker) + + def test__stride_limit_exceeded__padding(self): + padding = 1 + stride = 4097 # Exceeds the stride limit. + input_shape = (1, 2, 3, stride) + model = MaxPool2dModule(3, stride=stride, padding=padding) + self.assert_not_delegated(model, input_shape) + + @pytest.mark.skip( + reason="Large padding requires large kernel size which results in an extremely slow test." + ) + def test__padding_limit(self, mocker): + # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded + # value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited + # to 4096, padding of 2048 is the limit. + padding = 2048 + kernel_size = padding * 2 + input_shape = (1, 1, 2, 3) + model = MaxPool2dModule(kernel_size, padding=padding) + self.assert_delegated(model, input_shape, mocker) + + def test__padding__max_pool_limit_exceeded(self, mocker): + # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no + # limit. This tests ensures the `MaxPool` padding limit is not a problem. + padding = 33 + kernel_size = padding * 2 + input_shape = (1, 2, 3, 4) + model = MaxPool2dModule(kernel_size, padding=padding) + self.assert_delegated(model, input_shape, mocker) + + def test__padding_to_kernel_ratio_exceeded(self): + # Both PyTorch and Neutron require the padding to be at most half of the kernel size. + kernel_size = 3 + padding = 2 # More than half of the kernel size. + input_shape = (1, 2, 3, 4) + model = MaxPool2dModule(kernel_size, padding=padding) + with pytest.raises( + RuntimeError, match="pad should be at most half of effective kernel size" + ): + to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True) + + +class TestMaxPool1DNewNeutronFlow: + + # Just a basic test to verify that the operator gets extended to the 2D variant correctly. + def test__basic_nsys_inference__view_not_delegated(self, mocker): + input_shape = (2, 4, 6) # The old flow limited the batch size to 1. + model = MaxPool1DModule() + + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1}, + expected_non_delegated_ops={ViewCopy: 2}, + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + ) diff --git a/backends/nxp/tests/nsys_testing.py b/backends/nxp/tests/nsys_testing.py index 3e2767332eb..9d25c309d25 100644 --- a/backends/nxp/tests/nsys_testing.py +++ b/backends/nxp/tests/nsys_testing.py @@ -125,7 +125,9 @@ def wrapper(*args, **kwargs): use_new_flow_neutron_c=use_new_flow_neutron_c, ) except RuntimeError as e: - if "Model converted with neutron-converter has" in str(e): + if "Model converted with neutron-converter has" in str(e) and hasattr( + dlg_model_verifier, "check_num_delegated_nodes" + ): dlg_model_verifier.check_num_delegated_nodes(e.args[1]) raise diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py index ae4189e209f..97d14f3df5c 100644 --- a/backends/nxp/tests/ops_aliases.py +++ b/backends/nxp/tests/ops_aliases.py @@ -6,20 +6,31 @@ # This file defines ops aliases for shorter and more readable test description. List is sorted alphabetically. # When finding a missing alias, add it at the correct place. +import operator + import torch from executorch.exir.dialects._ops import ops as exir_ops +Abs = exir_ops.edge.aten.abs.default AvgPool2D = exir_ops.edge.aten.avg_pool2d.default Bmm = exir_ops.edge.aten.bmm.default +Convolution = exir_ops.edge.aten.convolution.default +DequantizePerChannel = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default +DequantizePerTensor = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate +GetItem = operator.getitem HardTanh = exir_ops.edge.aten.hardtanh.default HardTanh_ = exir_ops.edge.aten.hardtanh_.default +MaxPool2DWithIndices = exir_ops.edge.aten.max_pool2d_with_indices.default +QuantizePerChannel = exir_ops.edge.quantized_decomposed.quantize_per_channel.default +QuantizePerTensor = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default Slice = exir_ops.edge.aten.slice.Tensor SliceCopy = exir_ops.edge.aten.slice_copy.Tensor Softmax = exir_ops.edge.aten._softmax.default Squeeze = exir_ops.edge.aten.squeeze.default SqueezeDim = exir_ops.edge.aten.squeeze.dim SqueezeDims = exir_ops.edge.aten.squeeze.dims +Relu = exir_ops.edge.aten.relu.default Unsqueeze = exir_ops.edge.aten.unsqueeze.default UpsampleBilinear2D = exir_ops.edge.aten.upsample_bilinear2d.vec UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec