diff --git a/backends/qualcomm/quantizer/annotators/htp_rules.py b/backends/qualcomm/quantizer/annotators/htp_rules.py index cd65d02c752..342db1cb633 100644 --- a/backends/qualcomm/quantizer/annotators/htp_rules.py +++ b/backends/qualcomm/quantizer/annotators/htp_rules.py @@ -260,6 +260,7 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None: } ), ) + node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, output_qspec=output_qspec, @@ -295,6 +296,7 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None: @register_annotator( [ torch.ops.aten.split_with_sizes.default, + torch.ops.aten.split_with_sizes_copy.default, torch.ops.aten.split.Tensor, torch.ops.aten.chunk.default, ], @@ -1203,14 +1205,22 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None: [torch.ops.aten.pixel_shuffle.default], QnnConstants.OpDepthToSpace.op_name ) class PixelShuffle(GeneralOpDef): - pass + @staticmethod + def annotate(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_in_out_obs_sharing_op(node, quantization_config) + if not _is_annotated([node]): + annotate_single_in_share_out(node, quantization_config) @register_annotator( [torch.ops.aten.pixel_unshuffle.default], QnnConstants.OpSpaceToDepth.op_name ) class PixelUnshuffle(GeneralOpDef): - pass + @staticmethod + def annotate(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_in_out_obs_sharing_op(node, quantization_config) + if not _is_annotated([node]): + annotate_single_in_share_out(node, quantization_config) @register_annotator( diff --git a/backends/qualcomm/quantizer/annotators/lpai_rules.py b/backends/qualcomm/quantizer/annotators/lpai_rules.py index 60cebfcc5c0..30a3cb1dc9d 100644 --- a/backends/qualcomm/quantizer/annotators/lpai_rules.py +++ b/backends/qualcomm/quantizer/annotators/lpai_rules.py @@ -223,6 +223,7 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None: @register_annotator( [ torch.ops.aten.split_with_sizes.default, + torch.ops.aten.split_with_sizes_copy.default, torch.ops.aten.split.Tensor, torch.ops.aten.chunk.default, ], @@ -705,14 +706,22 @@ def annotate(node: Node, quantization_config: QuantizationConfig) -> None: [torch.ops.aten.pixel_shuffle.default], QnnConstants.OpDepthToSpace.op_name ) class PixelShuffle(GeneralOpDef): - pass + @staticmethod + def annotate(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_in_out_obs_sharing_op(node, quantization_config) + if not _is_annotated([node]): + annotate_single_in_share_out(node, quantization_config) @register_annotator( [torch.ops.aten.pixel_unshuffle.default], QnnConstants.OpSpaceToDepth.op_name ) class PixelUnshuffle(GeneralOpDef): - pass + @staticmethod + def annotate(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_in_out_obs_sharing_op(node, quantization_config) + if not _is_annotated([node]): + annotate_single_in_share_out(node, quantization_config) @register_annotator( diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index d76e3ea1df7..688dddf5c2a 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -8,6 +8,7 @@ import itertools import json import logging +import operator import subprocess import sys import tempfile @@ -33,6 +34,7 @@ make_quantizer, setup_common_args_and_variables, ) +from executorch.backends.qualcomm.quantizer.rules import Q_ANNOTATION_KEY from executorch.backends.qualcomm.serialization.qc_schema import ( QnnExecuTorchBackendType, QnnExecuTorchHtpPerformanceMode, @@ -97,6 +99,7 @@ from executorch.examples.models.wav2letter import Wav2LetterModel from executorch.exir import to_edge from executorch.exir.backend.backend_api import disable_validation +from torchao.quantization.pt2e.quantizer import SharedQuantizationSpec class TestQNNFloatingPointOperator(TestQNN): @@ -1730,12 +1733,16 @@ def test_qnn_backend_permute(self): def test_qnn_backend_pixel_shuffle(self): module = PixelShuffle(2) # noqa: F405 - sample_input = (torch.ones([2, 4, 3, 3]),) + sample_input = ( + torch.arange(2 * 4 * 3 * 3, dtype=torch.float32).reshape(2, 4, 3, 3), + ) self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_pixel_unshuffle(self): module = PixelUnshuffle(2) # noqa: F405 - sample_input = (torch.ones([2, 2, 6, 6]),) + sample_input = ( + torch.arange(2 * 2 * 6 * 6, dtype=torch.float32).reshape(2, 2, 6, 6), + ) self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_pow_tensor_scalar(self): @@ -4302,16 +4309,184 @@ def test_qnn_backend_permute(self): def test_qnn_backend_pixel_shuffle(self): module = PixelShuffle(2) # noqa: F405 - sample_input = (torch.ones([2, 4, 3, 3]),) + sample_input = ( + torch.arange(2 * 4 * 3 * 3, dtype=torch.float32).reshape(2, 4, 3, 3), + ) module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_pixel_unshuffle(self): module = PixelUnshuffle(2) # noqa: F405 - sample_input = (torch.ones([2, 2, 6, 6]),) + sample_input = ( + torch.arange(2 * 2 * 6 * 6, dtype=torch.float32).reshape(2, 2, 6, 6), + ) module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def _prepare_module_for_qparam_assertions(self, module, sample_input): + backend = get_backend_type(self.backend) + quantizer = make_quantizer( + quant_dtype=QuantDtype.use_8a8w, + custom_annotations=(), + per_channel_conv=True, + per_channel_linear=False, + per_channel_embedding=False, + backend=backend, + soc_model=self.soc_model, + ) + return prepare_pt2e( + torch.export.export(module, sample_input, strict=True).module(), + quantizer, + ) + + def _assert_prepared_nodes_share_qparams( + self, module, sample_input, target_tokens + ) -> list[torch.fx.Node]: + prepared = self._prepare_module_for_qparam_assertions(module, sample_input) + matching_nodes = [ + node + for node in prepared.graph.nodes + if node.op == "call_function" + and any(target_token in str(node.target) for target_token in target_tokens) + ] + + self.assertGreater( + len(matching_nodes), + 0, + f"Failed to find node matching any of {target_tokens}", + ) + for node in matching_nodes: + self.assertIsInstance( + node.meta[Q_ANNOTATION_KEY].output_qspec, + SharedQuantizationSpec, + ) + + return matching_nodes + + def test_qnn_backend_pixel_shuffle_unshuffle_share_qparams(self): + test_cases = [ + ( + "pixel_shuffle", + PixelShuffle(2), # noqa: F405 + (torch.arange(2 * 4 * 3 * 3, dtype=torch.float32).reshape(2, 4, 3, 3),), + torch.ops.aten.pixel_shuffle.default, + ), + ( + "pixel_unshuffle", + PixelUnshuffle(2), # noqa: F405 + (torch.arange(2 * 2 * 6 * 6, dtype=torch.float32).reshape(2, 2, 6, 6),), + torch.ops.aten.pixel_unshuffle.default, + ), + ] + + for name, module, sample_input, target in test_cases: + with self.subTest(name=name): + prepared = self._prepare_module_for_qparam_assertions( + module, sample_input + ) + for node in prepared.graph.nodes: + if node.op == "call_function" and node.target == target: + self.assertIsInstance( + node.meta[Q_ANNOTATION_KEY].output_qspec, + SharedQuantizationSpec, + ) + break + else: + self.fail(f"Failed to find {target} in prepared graph") + + def test_qnn_backend_value_preserving_ops_share_qparams(self): + test_cases = [ + ( + "channel_shuffle", + ChannelShuffle(2), # noqa: F405 + (torch.randn(1, 4, 3, 3),), + ("aten.channel_shuffle",), + ), + ( + "permute", + Permute([0, 2, 3, 1]), # noqa: F405 + (torch.randn(2, 3, 4, 5),), + ("aten.permute",), + ), + ( + "pixel_shuffle", + PixelShuffle(2), # noqa: F405 + (torch.arange(2 * 4 * 3 * 3, dtype=torch.float32).reshape(2, 4, 3, 3),), + ("aten.pixel_shuffle",), + ), + ( + "pixel_unshuffle", + PixelUnshuffle(2), # noqa: F405 + (torch.arange(2 * 2 * 6 * 6, dtype=torch.float32).reshape(2, 2, 6, 6),), + ("aten.pixel_unshuffle",), + ), + ( + "repeat", + Repeat(), # noqa: F405 + (torch.randn(2, 2, 2, 2),), + ("aten.repeat",), + ), + ( + "expand_as", + ExpandAs(), # noqa: F405 + (torch.randn(3, 4),), + ("aten.expand",), + ), + ( + "reshape", + Reshape(), # noqa: F405 + (torch.randn(3, 4),), + ("aten.reshape", "aten.view"), + ), + ] + + for name, module, sample_input, target_tokens in test_cases: + with self.subTest(name=name): + self._assert_prepared_nodes_share_qparams( + module, sample_input, target_tokens + ) + + def test_qnn_backend_split_with_sizes_copy_share_qparams(self): + class SplitWithSizesCopy(torch.nn.Module): + def forward(self, x): + out = torch.ops.aten.split_with_sizes_copy.default(x, [2, 2], 1) + return out[0] + out[1] + + backend = get_backend_type(self.backend) + sample_input = ( + torch.arange(2 * 4 * 3 * 3, dtype=torch.float32).reshape(2, 4, 3, 3), + ) + quantizer = make_quantizer( + quant_dtype=QuantDtype.use_8a8w, + custom_annotations=(), + per_channel_conv=True, + per_channel_linear=False, + per_channel_embedding=False, + backend=backend, + soc_model=self.soc_model, + ) + prepared = prepare_pt2e( + torch.export.export( + SplitWithSizesCopy(), sample_input, strict=True + ).module(), + quantizer, + ) + + getitem_count = 0 + for node in prepared.graph.nodes: + if ( + node.op == "call_function" + and node.target == operator.getitem + and node.args[0].target == torch.ops.aten.split_with_sizes_copy.default + ): + self.assertIsInstance( + node.meta[Q_ANNOTATION_KEY].output_qspec, + SharedQuantizationSpec, + ) + getitem_count += 1 + + self.assertGreater(getitem_count, 0) + def test_qnn_backend_pow_tensor_scalar(self): test_comb = [ {