From 6a0fbf2d2205dfbd1cb029430b52ce5315a137c5 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 13 Oct 2025 08:04:23 +0100 Subject: [PATCH 01/22] minor improvements --- .../algorithms/smooth_quant/algorithm.py | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 701073a218b..7f7550e0f4c 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -109,11 +109,9 @@ def apply( alpha_map = self._get_alpha_map() nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) - model_transformer = ModelTransformerFactory.create(model) - transformation_layout = TransformationLayout() - node_groups = self._group_nodes_by_source(nodes_to_smooth_data, graph) + transformation_layout = TransformationLayout() for group_id, nodes in track(node_groups.items(), description="Applying Smooth Quant"): best_scale = None best_ratio = 0.0 @@ -178,6 +176,7 @@ def apply( ) transformation_layout.register(scale_insertion_command) + model_transformer = ModelTransformerFactory.create(model) transformed_model = model_transformer.transform(transformation_layout) return transformed_model @@ -204,7 +203,7 @@ def _calculate_scale_and_ratio( ratio = scales.min() / (scales.max() + eps) return scales, ratio - def _group_nodes_by_source(self, nodes_to_smooth: list[dict], nncf_graph: NNCFGraph) -> dict[tuple, list]: + def _group_nodes_by_source(self, nodes_to_smooth: list[tuple[NNCFNode, int]], nncf_graph: NNCFGraph) -> dict[tuple, list]: """ Groups nodes that will be smoothed by source (parent node). @@ -213,9 +212,7 @@ def _group_nodes_by_source(self, nodes_to_smooth: list[dict], nncf_graph: NNCFGr :return: Dictionary with the source info as key and grouped nodes as value. """ groups = defaultdict(list) - for node_data in nodes_to_smooth: - node_to_smooth = node_data["node_to_smooth"] - input_act_port = node_data["input_act_port"] + for node_to_smooth, input_act_port in nodes_to_smooth: source_node = nncf_graph.get_input_edge_by_port_id(node_to_smooth, input_act_port).from_node edge = nncf_graph.get_edge(source_node, node_to_smooth) # Such group_id (with node, ports, and shape as a hash) allows us to be confident @@ -254,15 +251,14 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) - for node_data in nodes_to_smooth_data: - node_to_smooth = node_data["node_to_smooth"] + for node_to_smooth, input_act_port in nodes_to_smooth_data: target_point = self._backend_entity.target_point( target_type=self._backend_entity.pre_layer_target_type(), target_node_name=node_to_smooth.node_name, - port_id=node_data["input_act_port"], + port_id=input_act_port, ) input_reduction_axes = self._calculate_input_reduction_axes( - graph, node_to_smooth, node_data["input_act_port"] + graph, node_to_smooth, input_act_port ) stat_collector = self._backend_entity.get_abs_max_channel_collector( self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY @@ -276,7 +272,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin ) return statistic_container - def _get_nodes_to_smooth_data(self, nncf_graph: NNCFGraph, node_metatypes: list[OperatorMetatype]) -> list[dict]: + def _get_nodes_to_smooth_data(self, nncf_graph: NNCFGraph, node_metatypes: list[OperatorMetatype]) -> list[tuple[NNCFNode, int]]: """ Collects layers whose activations will be smoothed. @@ -306,12 +302,8 @@ def _get_nodes_to_smooth_data(self, nncf_graph: NNCFGraph, node_metatypes: list[ if self._backend_entity.is_node_with_shared_weight(node_with_weight, nncf_graph): continue - nodes_to_smooth_data.append( - { - "node_to_smooth": node_with_weight, - "input_act_port": activation_port_id, - } - ) + nodes_to_smooth_data.append((node_with_weight, activation_port_id)) + return nodes_to_smooth_data def _calculate_activation_scale( From 17062a69d43ed0500b1e374318e4a7c08d51da0c Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 13 Oct 2025 08:19:54 +0100 Subject: [PATCH 02/22] minor update --- .../algorithms/smooth_quant/algorithm.py | 12 ++++++++++-- tests/cross_fw/test_templates/test_smooth_quant.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 7f7550e0f4c..911940a2ad4 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -203,7 +203,11 @@ def _calculate_scale_and_ratio( ratio = scales.min() / (scales.max() + eps) return scales, ratio - def _group_nodes_by_source(self, nodes_to_smooth: list[tuple[NNCFNode, int]], nncf_graph: NNCFGraph) -> dict[tuple, list]: + def _group_nodes_by_source( + self, + nodes_to_smooth: list[tuple[NNCFNode, int]], + nncf_graph: NNCFGraph + ) -> dict[tuple, list]: """ Groups nodes that will be smoothed by source (parent node). @@ -272,7 +276,11 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin ) return statistic_container - def _get_nodes_to_smooth_data(self, nncf_graph: NNCFGraph, node_metatypes: list[OperatorMetatype]) -> list[tuple[NNCFNode, int]]: + def _get_nodes_to_smooth_data( + self, + nncf_graph: NNCFGraph, + node_metatypes: list[OperatorMetatype] + ) -> list[tuple[NNCFNode, int]]: """ Collects layers whose activations will be smoothed. diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index 0cefaa9e791..a01689aeb40 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -227,7 +227,7 @@ def test__get_nodes_to_smooth_data(self, model_cls, references, tmpdir): algo._set_backend_entity(model) alpha_map = algo._get_alpha_map() smooth_data = algo._get_nodes_to_smooth_data(nncf_graph, alpha_map.keys()) - smooth_data = {d["node_to_smooth"].node_name: d["input_act_port"] for d in smooth_data} + smooth_data = {node.node_name: input_act_port for node, input_act_port in smooth_data} name_map = self.get_node_name_map(model_cls) assert len(name_map) == len(smooth_data) From 2d08025d9a6d36adf7ede3e3882a7f413dd9e4a5 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 13 Oct 2025 09:54:59 +0100 Subject: [PATCH 03/22] retrieve shapes from stats --- .../algorithms/smooth_quant/algorithm.py | 50 ++++++++++++++++--- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 911940a2ad4..64f16538265 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -31,10 +31,13 @@ from nncf.quantization.algorithms.algorithm import Algorithm from nncf.tensor import Tensor from nncf.tensor import functions as fns +from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator +from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") STATISTIC_BRANCH_KEY = "abs_max" +SHAPE_BRANCH_KEY = "shape" ALPHA_MAP = {"convolution": 0.05, "matmul": 0.95} @@ -108,16 +111,19 @@ def apply( self._set_backend_entity(model) alpha_map = self._get_alpha_map() - nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) - node_groups = self._group_nodes_by_source(nodes_to_smooth_data, graph) + nodes = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) + nodes = self._retrieve_shape(nodes, statistic_points) + node_groups = self._group_nodes_by_source(nodes, graph) transformation_layout = TransformationLayout() for group_id, nodes in track(node_groups.items(), description="Applying Smooth Quant"): best_scale = None best_ratio = 0.0 empty_statistic = False + + source_node, input_port_id, source_output_port_id, shape = group_id + for node_to_smooth in nodes: - source_node, input_port_id, source_output_port_id, _ = group_id activations_value = self._get_statistics_for_node( statistic_points, node_to_smooth.node_name, input_port_id ) @@ -166,9 +172,7 @@ def apply( ) transformation_layout.register(weight_update_command) - activations_by_output_id = {e.output_port_id: e for e in graph.get_output_edges(source_node)} - activations_shape = activations_by_output_id[source_output_port_id].tensor_shape - activation_scale = self._calculate_activation_scale(best_scale, activations_shape, nodes, graph) + activation_scale = self._calculate_activation_scale(best_scale, shape, nodes, graph) scale_node_name = self._create_scale_node_name(source_node.node_name, source_output_port_id) scale_insertion_command = self._backend_entity.scale_insertion_command( @@ -216,16 +220,42 @@ def _group_nodes_by_source( :return: Dictionary with the source info as key and grouped nodes as value. """ groups = defaultdict(list) - for node_to_smooth, input_act_port in nodes_to_smooth: + for node_to_smooth, input_act_port, shape in nodes_to_smooth: source_node = nncf_graph.get_input_edge_by_port_id(node_to_smooth, input_act_port).from_node edge = nncf_graph.get_edge(source_node, node_to_smooth) # Such group_id (with node, ports, and shape as a hash) allows us to be confident # that all sensitive parameters are equal for successor nodes are equal. - group_id = (source_node, input_act_port, edge.output_port_id, hash(str(edge.tensor_shape))) + + # TODO(andrey-churkin): Why hash(str(edge.tensor_shape))? + group_id = (source_node, input_act_port, edge.output_port_id, shape) groups[group_id].append(node_to_smooth) return groups + def _retrieve_shape( + self, + nodes: list[tuple[NNCFNode, int]], + statistic_points: StatisticPointsContainer + ) -> list[tuple[NNCFNode, int, tuple[int, ...]]]: + """ + :param nodes: + :param statistic_points: + :return: + """ + items = [] + for node, input_port in nodes: + for tensor_collector in statistic_points.get_algo_statistics_for_node( + node.node_name, + self._backend_entity.get_filter_fn_for_statistics(input_port, self._algorithm_key), + self._algorithm_key, + ): + stats = tensor_collector.get_statistics() + shape = tuple(stats[SHAPE_BRANCH_KEY].tolist()) + + items.append((node, input_port, shape)) + + return items + def _get_statistics_for_node( self, statistic_points: StatisticPointsContainer, node_name: str, act_port: int ) -> list[TTensor]: @@ -267,6 +297,10 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin stat_collector = self._backend_entity.get_abs_max_channel_collector( self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY ) + + # TODO(andrey-churkin): OVShapeReducer + stat_collector.register_statistic_branch(SHAPE_BRANCH_KEY, ShapeReducer(), NoopAggregator(num_samples=1, return_first=True)) + statistic_container.add_statistic_point( StatisticPoint( target_point=target_point, From 57d828f92c044db04ded4278ee9b7235f09363e3 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 13 Oct 2025 11:09:53 +0100 Subject: [PATCH 04/22] raw version --- .../common/tensor_statistics/collectors.py | 20 ++++++++++++- .../algorithms/smooth_quant/algorithm.py | 28 +++++++++++++------ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index b11c021a0a6..1a7e49615b3 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -43,13 +43,24 @@ class TensorReducerBase(ABC): the specified rule. Could handle tensors inplace or out of place. """ - def __init__(self, reduction_axes: Optional[ReductionAxes] = None, inplace: bool = False): + def __init__( + self, + reduction_axes: Optional[ReductionAxes] = None, + keep_axes: Optional[tuple[int, ...]] = None, + inplace: bool = False + ): """ :param reduction_axes: Reduction axes for reduction calculation. Equal to list(range(len(input.shape))) if empty. + :param keep_axes: :param inplace: Whether should be calculated inplace or out of place. """ + if reduction_axes is not None and keep_axes is not None: + msg = "" + raise nncf.ValidationError(msg) + self._reduction_axes = reduction_axes + self._keep_axes = keep_axes self._inplace = inplace self._keepdims = True @@ -107,6 +118,13 @@ def __hash__(self) -> int: def _get_reduction_axes(self, tensor: Tensor) -> ReductionAxes: if self._reduction_axes is not None: return self._reduction_axes + + if self._keep_axes is not None: + axes = range(tensor.ndim) + # Ensure that all axes have positive values + keep_axes = tuple(axes[i] for i in self._keep_axes) + self._reduction_axes = tuple(set(axes) - set(keep_axes)) + return tuple(range(len(tensor.shape))) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 64f16538265..d40e387a1fe 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -33,6 +33,9 @@ from nncf.tensor import functions as fns from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer +from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer +from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator +from nncf.experimental.common.tensor_statistics.collectors import TensorCollector TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -278,25 +281,32 @@ def _get_statistics_for_node( return statistics_for_node def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - statistic_container = StatisticPointsContainer() - + model_backend = get_backend(model) self._set_backend_entity(model) alpha_map = self._get_alpha_map() - nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) + statistic_container = StatisticPointsContainer() for node_to_smooth, input_act_port in nodes_to_smooth_data: target_point = self._backend_entity.target_point( target_type=self._backend_entity.pre_layer_target_type(), target_node_name=node_to_smooth.node_name, port_id=input_act_port, ) - input_reduction_axes = self._calculate_input_reduction_axes( - graph, node_to_smooth, input_act_port - ) - stat_collector = self._backend_entity.get_abs_max_channel_collector( - self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY - ) + + if model_backend == BackendType.ONNX: + keep_axis = self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port) + stat_collector = TensorCollector() + reducer = AbsMaxReducer(keep_axes=(keep_axis,)) + aggregator = MaxAggregator(num_samples=self._subset_size) + stat_collector.register_statistic_branch(STATISTIC_BRANCH_KEY, reducer, aggregator) + else: + input_reduction_axes = self._calculate_input_reduction_axes( + graph, node_to_smooth, input_act_port + ) + stat_collector = self._backend_entity.get_abs_max_channel_collector( + self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY + ) # TODO(andrey-churkin): OVShapeReducer stat_collector.register_statistic_branch(SHAPE_BRANCH_KEY, ShapeReducer(), NoopAggregator(num_samples=1, return_first=True)) From a801647a6b54495340afc1c067f566077a6a52d2 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 13 Oct 2025 13:18:44 +0100 Subject: [PATCH 05/22] minor fix --- src/nncf/experimental/common/tensor_statistics/collectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index 1a7e49615b3..18f23f5449e 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -123,7 +123,7 @@ def _get_reduction_axes(self, tensor: Tensor) -> ReductionAxes: axes = range(tensor.ndim) # Ensure that all axes have positive values keep_axes = tuple(axes[i] for i in self._keep_axes) - self._reduction_axes = tuple(set(axes) - set(keep_axes)) + return tuple(set(axes) - set(keep_axes)) return tuple(range(len(tensor.shape))) From 53c6d7283c4fb07c6c3f88ddd9159177289667c7 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 07:27:17 +0100 Subject: [PATCH 06/22] update --- src/nncf/experimental/common/tensor_statistics/collectors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index 18f23f5449e..742db09fc75 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -110,10 +110,11 @@ def __eq__(self, __o: object) -> bool: isinstance(__o, self.__class__) and self._reduction_axes == __o._reduction_axes and self._inplace == __o.inplace + and self._keep_axes == __o._keep_axes ) def __hash__(self) -> int: - return hash((self.__class__.__name__, self.inplace, self._reduction_axes)) + return hash((self.__class__.__name__, self.inplace, self._reduction_axes, self._keep_axes)) def _get_reduction_axes(self, tensor: Tensor) -> ReductionAxes: if self._reduction_axes is not None: From 97cab6a6248698ec18084784e8ba06387fadbf11 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 07:58:16 +0100 Subject: [PATCH 07/22] minor fix --- .../common/tensor_statistics/collectors.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index 742db09fc75..b3844d2baac 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -121,10 +121,12 @@ def _get_reduction_axes(self, tensor: Tensor) -> ReductionAxes: return self._reduction_axes if self._keep_axes is not None: - axes = range(tensor.ndim) - # Ensure that all axes have positive values - keep_axes = tuple(axes[i] for i in self._keep_axes) - return tuple(set(axes) - set(keep_axes)) + axes = list(range(tensor.ndim)) + if len(axes) > 1: + # Ensure that all axes have positive values + keep_axes = tuple(axes[i] for i in self._keep_axes) + return tuple(set(axes) - set(keep_axes)) + return () return tuple(range(len(tensor.shape))) From 3a96172f41bb8362f1afde83879abf7149d0fe6b Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 08:06:08 +0100 Subject: [PATCH 08/22] fix --- src/nncf/quantization/algorithms/smooth_quant/algorithm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index d40e387a1fe..55158e8d310 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -253,7 +253,11 @@ def _retrieve_shape( self._algorithm_key, ): stats = tensor_collector.get_statistics() - shape = tuple(stats[SHAPE_BRANCH_KEY].tolist()) + shape = stats[SHAPE_BRANCH_KEY] + if shape is not None: + shape = tuple(shape.tolist()) + else: + shape = tuple() items.append((node, input_port, shape)) From 8f16b32125dfd7334512275195e914f997472ebd Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 09:20:30 +0100 Subject: [PATCH 09/22] fix --- .../algorithms/smooth_quant/algorithm.py | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 55158e8d310..4d9a5c641d4 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -28,14 +28,14 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend -from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.tensor import Tensor -from nncf.tensor import functions as fns -from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator -from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator +from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator +from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.tensor import Tensor +from nncf.tensor import functions as fns TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -211,9 +211,7 @@ def _calculate_scale_and_ratio( return scales, ratio def _group_nodes_by_source( - self, - nodes_to_smooth: list[tuple[NNCFNode, int]], - nncf_graph: NNCFGraph + self, nodes_to_smooth: list[tuple[NNCFNode, int]], nncf_graph: NNCFGraph ) -> dict[tuple, list]: """ Groups nodes that will be smoothed by source (parent node). @@ -229,16 +227,14 @@ def _group_nodes_by_source( # Such group_id (with node, ports, and shape as a hash) allows us to be confident # that all sensitive parameters are equal for successor nodes are equal. - # TODO(andrey-churkin): Why hash(str(edge.tensor_shape))? + # TODO(andrey-churkin): Why hash(str(edge.tensor_shape))? group_id = (source_node, input_act_port, edge.output_port_id, shape) groups[group_id].append(node_to_smooth) return groups def _retrieve_shape( - self, - nodes: list[tuple[NNCFNode, int]], - statistic_points: StatisticPointsContainer + self, nodes: list[tuple[NNCFNode, int]], statistic_points: StatisticPointsContainer ) -> list[tuple[NNCFNode, int, tuple[int, ...]]]: """ :param nodes: @@ -305,15 +301,15 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin aggregator = MaxAggregator(num_samples=self._subset_size) stat_collector.register_statistic_branch(STATISTIC_BRANCH_KEY, reducer, aggregator) else: - input_reduction_axes = self._calculate_input_reduction_axes( - graph, node_to_smooth, input_act_port - ) + input_reduction_axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) stat_collector = self._backend_entity.get_abs_max_channel_collector( self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY ) # TODO(andrey-churkin): OVShapeReducer - stat_collector.register_statistic_branch(SHAPE_BRANCH_KEY, ShapeReducer(), NoopAggregator(num_samples=1, return_first=True)) + stat_collector.register_statistic_branch( + SHAPE_BRANCH_KEY, ShapeReducer(), NoopAggregator(num_samples=1, return_first=True) + ) statistic_container.add_statistic_point( StatisticPoint( @@ -325,9 +321,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin return statistic_container def _get_nodes_to_smooth_data( - self, - nncf_graph: NNCFGraph, - node_metatypes: list[OperatorMetatype] + self, nncf_graph: NNCFGraph, node_metatypes: list[OperatorMetatype] ) -> list[tuple[NNCFNode, int]]: """ Collects layers whose activations will be smoothed. From acb88d733dacbf59ed74add6f38d114c8d60da6d Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 09:59:04 +0100 Subject: [PATCH 10/22] fix --- src/nncf/experimental/common/tensor_statistics/collectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index b3844d2baac..56f51a4e3bc 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -47,7 +47,7 @@ def __init__( self, reduction_axes: Optional[ReductionAxes] = None, keep_axes: Optional[tuple[int, ...]] = None, - inplace: bool = False + inplace: bool = False, ): """ :param reduction_axes: Reduction axes for reduction calculation. Equal to list(range(len(input.shape))) From 104dea5b694c0ba7fbac747170ad532b1912dd2e Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 14 Oct 2025 13:30:28 +0100 Subject: [PATCH 11/22] improve code --- .../algorithms/smooth_quant/algorithm.py | 68 ++++++++++++------- .../algorithms/smooth_quant/backend.py | 35 ++++++---- .../algorithms/smooth_quant/onnx_backend.py | 13 ---- .../smooth_quant/openvino_backend.py | 21 +++--- .../algorithms/smooth_quant/torch_backend.py | 13 ---- .../smooth_quant/torch_fx_backend.py | 13 ---- .../test_templates/test_smooth_quant.py | 24 ------- 7 files changed, 74 insertions(+), 113 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 4d9a5c641d4..7a54b547bd5 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -28,10 +28,8 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend -from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator -from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.quantization.algorithms.algorithm import Algorithm from nncf.tensor import Tensor @@ -283,10 +281,11 @@ def _get_statistics_for_node( def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: model_backend = get_backend(model) self._set_backend_entity(model) + alpha_map = self._get_alpha_map() nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) - statistic_container = StatisticPointsContainer() + container = StatisticPointsContainer() for node_to_smooth, input_act_port in nodes_to_smooth_data: target_point = self._backend_entity.target_point( target_type=self._backend_entity.pre_layer_target_type(), @@ -294,31 +293,54 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin port_id=input_act_port, ) + # NOTE: TODO if model_backend == BackendType.ONNX: - keep_axis = self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port) - stat_collector = TensorCollector() - reducer = AbsMaxReducer(keep_axes=(keep_axis,)) - aggregator = MaxAggregator(num_samples=self._subset_size) - stat_collector.register_statistic_branch(STATISTIC_BRANCH_KEY, reducer, aggregator) + keep_axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) + collector = self._create_tensor_collector( + self._subset_size, + self._inplace_statistics, + keep_axes=keep_axes, + ) else: - input_reduction_axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) - stat_collector = self._backend_entity.get_abs_max_channel_collector( - self._subset_size, input_reduction_axes, self._inplace_statistics, STATISTIC_BRANCH_KEY + reduction_axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) + collector = self._create_tensor_collector( + self._subset_size, + self._inplace_statistics, + reduction_axes=reduction_axes, ) - # TODO(andrey-churkin): OVShapeReducer - stat_collector.register_statistic_branch( - SHAPE_BRANCH_KEY, ShapeReducer(), NoopAggregator(num_samples=1, return_first=True) - ) + container.add_statistic_point(StatisticPoint(target_point, collector, self._algorithm_key)) - statistic_container.add_statistic_point( - StatisticPoint( - target_point=target_point, - tensor_collector=stat_collector, - algorithm=self._algorithm_key, - ) - ) - return statistic_container + return container + + def _create_tensor_collector( + self, + num_samples: int, + inplace: bool, + keep_axes: Optional[tuple[int, ...]] = None, + reduction_axes: Optional[tuple[int, ...]] = None, + ) -> TensorCollector: + """ + :param num_samples: + :param inplace: + :param keep_axes: + :param reduction_axes: + :return: + """ + collector = TensorCollector() + + abs_max_reducer_cls = self._backend_entity.get_abs_max_reducer_cls() + collector.register_statistic_branch( + STATISTIC_BRANCH_KEY, + abs_max_reducer_cls(reduction_axes, keep_axes, inplace), + MaxAggregator(num_samples=num_samples), + ) + shape_reducer_cls = self._backend_entity.get_shape_reducer_cls() + collector.register_statistic_branch( + SHAPE_BRANCH_KEY, shape_reducer_cls(inplace), NoopAggregator(num_samples=1, return_first=True) + ) + + return collector def _get_nodes_to_smooth_data( self, nncf_graph: NNCFGraph, node_metatypes: list[OperatorMetatype] diff --git a/src/nncf/quantization/algorithms/smooth_quant/backend.py b/src/nncf/quantization/algorithms/smooth_quant/backend.py index 8b62d5bbb51..5fc003041ba 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/backend.py @@ -20,7 +20,8 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.tensor_statistics.statistic_point import StatisticPoint -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer +from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.tensor import Tensor TModel = TypeVar("TModel") @@ -97,20 +98,6 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: :return: Map with the activation & weighted ports. """ - @staticmethod - @abstractmethod - def get_abs_max_channel_collector( - num_samples: int, stats_reduction_axes: tuple[int], inplace: bool, branch_key: str - ) -> TensorCollector: - """ - Returns TensorCollector with MaxAggregator and AbsMaxReducer. - - :param stats_reduction_axes: Calculated reduction axes. - :param inplace: Whether to calculate statistic inplace or not. - :param branch_key: Specific string for branch key. - :return: TensorCollector instance. - """ - @staticmethod @abstractmethod def get_weight_value(node_with_weight: NNCFNode, model: TModel, nncf_graph: NNCFGraph) -> Tensor: @@ -199,3 +186,21 @@ def get_filter_fn_for_statistics(activation_port_id: int, algorithm_key: str) -> :param algorithm_key: Current algorithm key. :return: Backend-specific callable to filter statistic containers according to its statistic point. """ + + @staticmethod + def get_abs_max_reducer_cls() -> type[AbsMaxReducer]: + """ + Returns the backend-specific `AbsMaxReducer` class. + + :return: The `AbsMaxReducer` class. + """ + return AbsMaxReducer + + @staticmethod + def get_shape_reducer_cls() -> type[ShapeReducer]: + """ + Returns the backend-specific `ShapeReducer` class. + + :return: The `ShapeReducer` class. + """ + return ShapeReducer diff --git a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py index be3c3edfa7a..f374f350afe 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py @@ -21,9 +21,6 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.tensor_statistics.statistic_point import StatisticPoint -from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer -from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES from nncf.onnx.graph.metatypes.groups import OPERATIONS_WITH_WEIGHTS from nncf.onnx.graph.metatypes.groups import QUANTIZE_AGNOSTIC_OPERATIONS @@ -76,16 +73,6 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: return activation_port - @staticmethod - def get_abs_max_channel_collector( - num_samples: int, stats_reduction_axes: tuple[int], inplace: bool, branch_key: str - ) -> TensorCollector: - collector = TensorCollector() - reducer = AbsMaxReducer(reduction_axes=stats_reduction_axes) - aggregator = MaxAggregator(num_samples=num_samples) - collector.register_statistic_branch(branch_key, reducer, aggregator) - return collector - @staticmethod def _get_weight_tensor_port_id(node: NNCFNode) -> int: weight_ports = list(node.layer_attributes.weight_attrs) diff --git a/src/nncf/quantization/algorithms/smooth_quant/openvino_backend.py b/src/nncf/quantization/algorithms/smooth_quant/openvino_backend.py index 212242b44fe..9aeb202707c 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/openvino_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/openvino_backend.py @@ -20,8 +20,6 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.tensor_statistics.statistic_point import StatisticPoint -from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.layout import get_linear_weights_layout_from_node from nncf.openvino.graph.metatypes.groups import QUANTIZE_AGNOSTIC_OPERATIONS @@ -33,6 +31,7 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.graph.transformations.commands import OVWeightUpdateCommand from nncf.openvino.statistics.collectors import OVAbsMaxReducer +from nncf.openvino.statistics.collectors import OVShapeReducer from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend from nncf.tensor import Tensor @@ -76,16 +75,6 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: raise nncf.InternalError(msg) return activation_ports[0] - @staticmethod - def get_abs_max_channel_collector( - num_samples: int, stats_reduction_axes: tuple[int], inplace: bool, branch_key: str - ) -> TensorCollector: - collector = TensorCollector() - reducer = OVAbsMaxReducer(reduction_axes=stats_reduction_axes, inplace=inplace) - aggregator = MaxAggregator(num_samples=num_samples) - collector.register_statistic_branch(branch_key, reducer, aggregator) - return collector - @staticmethod def get_weight_value(node_with_weight: NNCFNode, model: ov.Model, nncf_graph: NNCFGraph) -> Tensor: port_id = OVSmoothQuantAlgoBackend.get_weight_tensor_port_id(node_with_weight) @@ -165,3 +154,11 @@ def filter_func(point: StatisticPoint) -> bool: ) return filter_func + + @staticmethod + def get_abs_max_reducer_cls() -> type[OVAbsMaxReducer]: + return OVAbsMaxReducer + + @staticmethod + def get_shape_reducer_cls() -> type[OVShapeReducer]: + return OVShapeReducer diff --git a/src/nncf/quantization/algorithms/smooth_quant/torch_backend.py b/src/nncf/quantization/algorithms/smooth_quant/torch_backend.py index 356f342f903..2da75ce2433 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/torch_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/torch_backend.py @@ -20,9 +20,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.quantization.quantizer_propagation.structs import QuantizationTrait from nncf.common.tensor_statistics.statistic_point import StatisticPoint -from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer -from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend from nncf.tensor import Tensor from nncf.torch.function_hook.commands import PT2ConstUpdateCommand @@ -78,16 +75,6 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: # all nodes with the metatypes have 0 activation port id. return 0 - @staticmethod - def get_abs_max_channel_collector( - num_samples: int, stats_reduction_axes: tuple[int], inplace: bool, branch_key: str - ) -> TensorCollector: - collector = TensorCollector() - reducer = AbsMaxReducer(reduction_axes=stats_reduction_axes) - aggregator = MaxAggregator(num_samples=num_samples) - collector.register_statistic_branch(branch_key, reducer, aggregator) - return collector - @staticmethod def get_weight_value(node_with_weight: NNCFNode, model: NNCFNetwork, nncf_graph: NNCFGraph) -> Tensor: if isinstance(model, GraphModelWrapper): diff --git a/src/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py b/src/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py index 883314cd3b5..e17dfef3fbd 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py @@ -20,9 +20,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.quantization.quantizer_propagation.structs import QuantizationTrait from nncf.common.tensor_statistics.statistic_point import StatisticPoint -from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer -from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.torch.fx.commands import FXApplyTransformationCommand from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node from nncf.experimental.torch.fx.transformations import constant_update_transformation_builder @@ -83,16 +80,6 @@ def is_node_with_weights(node: NNCFNode) -> bool: def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: return 0 - @staticmethod - def get_abs_max_channel_collector( - num_samples: int, stats_reduction_axes: tuple[int], inplace: bool, branch_key: str - ) -> TensorCollector: - collector = TensorCollector() - reducer = AbsMaxReducer(reduction_axes=stats_reduction_axes) - aggregator = MaxAggregator(num_samples=num_samples) - collector.register_statistic_branch(branch_key, reducer, aggregator) - return collector - @staticmethod def get_weight_value(node_with_weight: NNCFNode, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> Tensor: weight_node = get_const_node(node_with_weight, node_with_weight.metatype.weight_port_ids[0], nncf_graph) diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index a01689aeb40..cd134355884 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -19,8 +19,6 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFNode -from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer -from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.parameters import ModelType from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters @@ -170,28 +168,6 @@ def test_smooth_quant_algo(self, model_cls, reference_values, tmpdir): self.check_scales(quantized_model, reference_values, model_cls) - def test_get_abs_max_channel_collector(self, inplace_statistics: bool): - backend = self.get_backend() - reduction_axes = (3, 2, 1) - samples = 1 - - backend_tensor_collector = backend.get_abs_max_channel_collector( - num_samples=samples, - stats_reduction_axes=reduction_axes, - inplace=inplace_statistics, - branch_key="test_branch", - ) - - assert len(backend_tensor_collector.aggregators) == 1 - for aggregator in backend_tensor_collector.aggregators.values(): - assert isinstance(aggregator, MaxAggregator) - - assert len(backend_tensor_collector.reducers) == 1 - for reducer in backend_tensor_collector.reducers: - assert isinstance(reducer, AbsMaxReducer) - assert reducer.inplace == inplace_statistics - assert reducer._reduction_axes == reduction_axes - @pytest.mark.parametrize( "model_cls, references", ( From 4c1162709fb8f44fda83f947271d1d9f838cadfc Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 15 Oct 2025 07:22:07 +0100 Subject: [PATCH 12/22] minor fix --- src/nncf/quantization/algorithms/smooth_quant/algorithm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 7a54b547bd5..b9e66223486 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -102,6 +102,10 @@ def _set_backend_entity(self, model: TModel) -> None: msg = f"Cannot return backend-specific entity because {model_backend.value} is not supported!" raise nncf.UnsupportedBackendError(msg) + # Only the OpenVINO backend supports in-place statistics, so we should set this variable here. + if model_backend != BackendType.OPENVINO: + self._inplace_statistics = False + def apply( self, model: TModel, From 8b2ea5f78a5b309851f4a5eb9c3e75f326598569 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 15 Oct 2025 08:20:59 +0100 Subject: [PATCH 13/22] update --- .../common/tensor_statistics/collectors.py | 5 ++- .../algorithms/smooth_quant/algorithm.py | 44 +++++++++++++------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index 56f51a4e3bc..f4dfe2f1487 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -52,11 +52,12 @@ def __init__( """ :param reduction_axes: Reduction axes for reduction calculation. Equal to list(range(len(input.shape))) if empty. - :param keep_axes: + :param keep_axes: Axes to preserve during the reduction operation. These will be used in + `_reduce_out_of_place()` to calculate the reduction axes once the tensor shape is known. :param inplace: Whether should be calculated inplace or out of place. """ if reduction_axes is not None and keep_axes is not None: - msg = "" + msg = "Only one of `reduction_axes` or `keep_axes` should be specified, not both." raise nncf.ValidationError(msg) self._reduction_axes = reduction_axes diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index b9e66223486..8243242f4e2 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -213,12 +213,13 @@ def _calculate_scale_and_ratio( return scales, ratio def _group_nodes_by_source( - self, nodes_to_smooth: list[tuple[NNCFNode, int]], nncf_graph: NNCFGraph + self, nodes_to_smooth: list[tuple[NNCFNode, int, tuple[int, ...]]], nncf_graph: NNCFGraph ) -> dict[tuple, list]: """ Groups nodes that will be smoothed by source (parent node). - :param nodes_to_smooth: List of the nodes that will be smoothed. + :param nodes_to_smooth: A list of tuples where each tuple consists of a node, an input port, and the + shape of the tensor associated with that node and input port. :param nncf_graph: NNCFGraph instance. :return: Dictionary with the source info as key and grouped nodes as value. """ @@ -228,8 +229,6 @@ def _group_nodes_by_source( edge = nncf_graph.get_edge(source_node, node_to_smooth) # Such group_id (with node, ports, and shape as a hash) allows us to be confident # that all sensitive parameters are equal for successor nodes are equal. - - # TODO(andrey-churkin): Why hash(str(edge.tensor_shape))? group_id = (source_node, input_act_port, edge.output_port_id, shape) groups[group_id].append(node_to_smooth) @@ -239,9 +238,14 @@ def _retrieve_shape( self, nodes: list[tuple[NNCFNode, int]], statistic_points: StatisticPointsContainer ) -> list[tuple[NNCFNode, int, tuple[int, ...]]]: """ - :param nodes: - :param statistic_points: - :return: + Retrieves the shapes of tensors associated with specific nodes and input ports + from the given statistic points container. + + :param nodes: A list of tuples, each containing a node and its corresponding input port index. + :param statistic_points: Container holding statistics, used to retrieve tensor shapes. + :return: A list of tuples where each tuple consists of a node, an input port, and the + shape of the tensor associated with that node and input port. If shape information is + not available, an empty tuple is returned for the shape. """ items = [] for node, input_port in nodes: @@ -297,7 +301,12 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin port_id=input_act_port, ) - # NOTE: TODO + # NOTE:The OpenVINO backend performs in-place statistic calculations. + # To insert reduction operations into the model graph, the reduction axes must be known before inference. + # However, when using `keep_axes`, the reduction axes are determined during statistics collection. + # Therefore, `keep_axes` and `inplace` cannot be used together with the OpenVINO backend. + # For the ONNX backend, we can't calculate reduction axes before inference because the tensor shape + # (actually, only the number of dimensions (ndim) is required) is unknown for some operations. if model_backend == BackendType.ONNX: keep_axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) collector = self._create_tensor_collector( @@ -325,12 +334,18 @@ def _create_tensor_collector( reduction_axes: Optional[tuple[int, ...]] = None, ) -> TensorCollector: """ - :param num_samples: - :param inplace: - :param keep_axes: - :param reduction_axes: - :return: + Initializes and returns a configured tensor collector for the `SmoothQuant` algorithm. + + :param num_samples: Maximum number of samples to collect for the aggregator. + :param inplace: If True, statistics will be computed in-place. + :param keep_axes: Axes to preserve during the reduction operation. + :param reduction_axes: Axes over which the reduction operation is applied. + :return: A tensor collector configured with the specified reduction and aggregation logic. """ + if reduction_axes is not None and keep_axes is not None: + msg = "Only one of `reduction_axes` or `keep_axes` should be specified, not both." + raise nncf.ValidationError(msg) + collector = TensorCollector() abs_max_reducer_cls = self._backend_entity.get_abs_max_reducer_cls() @@ -354,7 +369,8 @@ def _get_nodes_to_smooth_data( :param nncf_graph: NNCFGraph instance. :param node_metatypes: Metatypes for nodes to search for. - :return: List with the data for each layer. + :return: A list of pairs, where each pair consists of a node and its corresponding + input activation port. """ nodes_with_weights = nncf_graph.get_nodes_by_metatypes(node_metatypes) nodes_to_smooth_data = [] From dd61ed7f4a73025c23f53a279c9c0772f81f72fc Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 15 Oct 2025 09:23:21 +0100 Subject: [PATCH 14/22] add tests --- .../test_reducers_and_aggregators.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index a9c4df66846..33da32dab2c 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -261,6 +261,17 @@ def test_mean_abs_max_reducer(self, axes, reference): assert len(result) == 1 assert fns.allclose(result[0], self.get_nncf_tensor(reference)) + @pytest.mark.parametrize( + "axes, reference", + [[None, 10.0], [(1, 2), 4.16666], [(2,), 6.33333], [(), 10.0]], + ) + def test_mean_abs_max_reducer_keep_axes(self, axes, reference): + reducer = MeanAbsMaxReducer(keep_axes=axes) + nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) + result = reducer._reduce_out_of_place([nncf_data]) + assert len(result) == 1 + assert fns.allclose(result[0], self.get_nncf_tensor(reference)) + @pytest.mark.parametrize( "axes, reference", [[None, 16.1666], [(0,), 64.0], [(0, 1), 36.1875], [(0, 1, 2), 16.1666]], @@ -272,6 +283,17 @@ def test_max_variance_reducer(self, axes, reference): assert len(result) == 1 assert fns.allclose(result[0], self.get_nncf_tensor(reference)) + @pytest.mark.parametrize( + "axes, reference", + [[None, 16.1666], [(1, 2), 64.0], [(2,), 36.1875], [(), 16.1666]], + ) + def test_max_variance_reducer_keep_axes(self, axes, reference): + reducer = MaxVarianceReducer(keep_axes=axes) + nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) + result = reducer._reduce_out_of_place([nncf_data]) + assert len(result) == 1 + assert fns.allclose(result[0], self.get_nncf_tensor(reference)) + @pytest.mark.parametrize( "reducer_name,ref,kwargs", [ From 92dee3eba54bd4ae1eadf8c13b07748a047b6276 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 16 Oct 2025 07:57:30 +0100 Subject: [PATCH 15/22] Apply comments --- .../algorithms/smooth_quant/algorithm.py | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 8243242f4e2..4a0afbae4ba 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -256,10 +256,7 @@ def _retrieve_shape( ): stats = tensor_collector.get_statistics() shape = stats[SHAPE_BRANCH_KEY] - if shape is not None: - shape = tuple(shape.tolist()) - else: - shape = tuple() + shape = tuple() if shape is None else tuple(shape.tolist()) items.append((node, input_port, shape)) @@ -309,18 +306,12 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin # (actually, only the number of dimensions (ndim) is required) is unknown for some operations. if model_backend == BackendType.ONNX: keep_axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) - collector = self._create_tensor_collector( - self._subset_size, - self._inplace_statistics, - keep_axes=keep_axes, - ) + reduction_axes = None else: + keep_axes = None reduction_axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) - collector = self._create_tensor_collector( - self._subset_size, - self._inplace_statistics, - reduction_axes=reduction_axes, - ) + + collector = self._create_tensor_collector(self._subset_size, keep_axes, reduction_axes) container.add_statistic_point(StatisticPoint(target_point, collector, self._algorithm_key)) @@ -329,7 +320,6 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin def _create_tensor_collector( self, num_samples: int, - inplace: bool, keep_axes: Optional[tuple[int, ...]] = None, reduction_axes: Optional[tuple[int, ...]] = None, ) -> TensorCollector: @@ -337,7 +327,6 @@ def _create_tensor_collector( Initializes and returns a configured tensor collector for the `SmoothQuant` algorithm. :param num_samples: Maximum number of samples to collect for the aggregator. - :param inplace: If True, statistics will be computed in-place. :param keep_axes: Axes to preserve during the reduction operation. :param reduction_axes: Axes over which the reduction operation is applied. :return: A tensor collector configured with the specified reduction and aggregation logic. @@ -351,12 +340,14 @@ def _create_tensor_collector( abs_max_reducer_cls = self._backend_entity.get_abs_max_reducer_cls() collector.register_statistic_branch( STATISTIC_BRANCH_KEY, - abs_max_reducer_cls(reduction_axes, keep_axes, inplace), + abs_max_reducer_cls(reduction_axes, keep_axes, self._inplace_statistics), MaxAggregator(num_samples=num_samples), ) shape_reducer_cls = self._backend_entity.get_shape_reducer_cls() collector.register_statistic_branch( - SHAPE_BRANCH_KEY, shape_reducer_cls(inplace), NoopAggregator(num_samples=1, return_first=True) + SHAPE_BRANCH_KEY, + shape_reducer_cls(self._inplace_statistics), + NoopAggregator(num_samples=1, return_first=True), ) return collector From aa8691f99adbb26b7dae293de68d0cf21304dadc Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 23 Oct 2025 10:50:05 +0100 Subject: [PATCH 16/22] Add axes mode --- .../common/tensor_statistics/collectors.py | 119 +++++++++++------- src/nncf/openvino/statistics/collectors.py | 14 +-- .../channel_alignment/openvino_backend.py | 3 +- .../algorithms/min_max/algorithm.py | 6 +- .../algorithms/smooth_quant/algorithm.py | 27 ++-- .../torch/tensor_statistics/collectors.py | 2 +- .../test_reducers_and_aggregators.py | 36 ++---- .../test_tensor_collector_batch_size.py | 2 +- tests/common/test_statistics_aggregator.py | 4 +- .../test_templates/test_channel_alignment.py | 2 +- .../test_templates/test_quantizer_config.py | 2 +- .../test_reducers_and_aggregators.py | 2 +- 12 files changed, 112 insertions(+), 107 deletions(-) diff --git a/src/nncf/experimental/common/tensor_statistics/collectors.py b/src/nncf/experimental/common/tensor_statistics/collectors.py index f4dfe2f1487..f783c8e5267 100644 --- a/src/nncf/experimental/common/tensor_statistics/collectors.py +++ b/src/nncf/experimental/common/tensor_statistics/collectors.py @@ -17,6 +17,7 @@ from collections import defaultdict from collections import deque from copy import deepcopy +from enum import Enum from typing import Any, Optional, TypeVar, Union import nncf @@ -35,6 +36,50 @@ InplaceInsertionFNType = TypeVar("InplaceInsertionFNType") AggregationAxes = tuple[int, ...] +Axes = tuple[int, ...] + + +class AxesMode(Enum): + """ + Represents different strategies for handling tensor axes. + + :param REDUCTION: Indicates that the specified axes should be reduced during an operation. + :param KEEP: Indicates that the specified axes should be preserved and not reduced during + an operation. + """ + + REDUCTION = "reduction" + KEEP = "keep" + + +def determine_reduction_axes( + ndim: int, axes: Optional[Axes] = None, axes_mode: AxesMode = AxesMode.REDUCTION +) -> ReductionAxes: + """ + Determines the set of axes along which a reduction operation should be performed + based on the specified axes mode. + + :param ndim: The number of dimensions in the input tensor. + :param axes: The axes specified for the reduction operation. If `None`, all axes + are considered (i.e., `tuple(range(ndim))`). + + :param axes_mode: Defines how the specified axes are interpreted: + - `AxesMode.REDUCTION`: the given axes will be reduced. + - `AxesMode.KEEP`: all axes except the specified ones will be reduced. + :return: The resolved set of axes along which the reduction operation should be performed. + """ + if axes is None: + return tuple(range(ndim)) + + if axes_mode == AxesMode.REDUCTION: + return axes + + all_axes = tuple(range(ndim)) + if len(all_axes) > 1: + # Ensure that all axes have positive values + keep_axes = tuple(all_axes[i] for i in axes) + return tuple(set(all_axes) - set(keep_axes)) + return () class TensorReducerBase(ABC): @@ -45,23 +90,19 @@ class TensorReducerBase(ABC): def __init__( self, - reduction_axes: Optional[ReductionAxes] = None, - keep_axes: Optional[tuple[int, ...]] = None, + axes: Optional[Axes] = None, + axes_mode: AxesMode = AxesMode.REDUCTION, inplace: bool = False, ): """ - :param reduction_axes: Reduction axes for reduction calculation. Equal to list(range(len(input.shape))) - if empty. - :param keep_axes: Axes to preserve during the reduction operation. These will be used in - `_reduce_out_of_place()` to calculate the reduction axes once the tensor shape is known. + :param axes: The axes along which the reduction operation should be applied. + If `None`, the operation will be applied to all axes (i.e., `tuple(range(tensor.ndim))`). + :param axes_mode: Determines how the specified `axes` are treated during the operation. + Use `AxesMode.REDUCTION` to reduce over the given axes, or `AxesMode.KEEP` to preserve them. :param inplace: Whether should be calculated inplace or out of place. """ - if reduction_axes is not None and keep_axes is not None: - msg = "Only one of `reduction_axes` or `keep_axes` should be specified, not both." - raise nncf.ValidationError(msg) - - self._reduction_axes = reduction_axes - self._keep_axes = keep_axes + self._axes = axes + self._axes_mode = axes_mode self._inplace = inplace self._keepdims = True @@ -109,27 +150,13 @@ def __call__(self, x: list[Tensor]): def __eq__(self, __o: object) -> bool: return ( isinstance(__o, self.__class__) - and self._reduction_axes == __o._reduction_axes + and self._axes == __o._axes + and self._axes_mode == __o._axes_mode and self._inplace == __o.inplace - and self._keep_axes == __o._keep_axes ) def __hash__(self) -> int: - return hash((self.__class__.__name__, self.inplace, self._reduction_axes, self._keep_axes)) - - def _get_reduction_axes(self, tensor: Tensor) -> ReductionAxes: - if self._reduction_axes is not None: - return self._reduction_axes - - if self._keep_axes is not None: - axes = list(range(tensor.ndim)) - if len(axes) > 1: - # Ensure that all axes have positive values - keep_axes = tuple(axes[i] for i in self._keep_axes) - return tuple(set(axes) - set(keep_axes)) - return () - - return tuple(range(len(tensor.shape))) + return hash((self.__class__.__name__, self.inplace, self._axes, self._axes_mode)) class AggregatorBase: @@ -466,35 +493,35 @@ def get_inplace_fn(self) -> Optional[InplaceInsertionFNType]: class MinReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return [fns.min(x, reduction_axes, keepdims=self._keepdims)] class MaxReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return [fns.max(x, reduction_axes, keepdims=self._keepdims)] class AbsMaxReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = fns.abs(x[0]) - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return [fns.max(x, reduction_axes, keepdims=self._keepdims)] class MeanReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return [fns.mean(x, reduction_axes, keepdims=self._keepdims)] class MeanVarianceReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) variance = fns.var(x, reduction_axes) return [fns.mean(variance)] @@ -502,7 +529,7 @@ def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: class MaxVarianceReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) variance = fns.var(x, reduction_axes) return [fns.max(variance)] @@ -510,7 +537,7 @@ def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: class MeanAbsMaxReducer(TensorReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = fns.abs(x[0]) - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) abs_max = fns.max(x, reduction_axes, keepdims=self._keepdims) return [fns.mean(abs_max)] @@ -518,40 +545,42 @@ def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: class QuantileReducerBase(TensorReducerBase): def __init__( self, - reduction_axes: Optional[ReductionAxes] = None, + axes: Optional[Axes] = None, + axes_mode: AxesMode = AxesMode.REDUCTION, quantile: Optional[Union[float, tuple[float]]] = None, inplace: bool = False, ): - super().__init__(reduction_axes=reduction_axes, inplace=False) + super().__init__(axes, axes_mode, False) self._quantile = (0.01, 0.99) if quantile is None else quantile def __eq__(self, __o: object) -> bool: return super().__eq__(__o) and self._quantile == __o._quantile def __hash__(self) -> int: - return hash((self.__class__.__name__, self.inplace, self._reduction_axes, tuple(self._quantile))) + return hash((self.__class__.__name__, self.inplace, self._axes, self._axes_mode, tuple(self._quantile))) class QuantileReducer(QuantileReducerBase): def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = x[0] - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return fns.quantile(x, self._quantile, reduction_axes, keepdims=self._keepdims) class AbsQuantileReducer(QuantileReducerBase): def __init__( self, - reduction_axes: Optional[ReductionAxes] = None, - quantile: Optional[Union[float, list[float]]] = None, + axes: Optional[Axes] = None, + axes_mode: AxesMode = AxesMode.REDUCTION, + quantile: Optional[Union[float, tuple[float]]] = None, inplace: bool = False, ): quantile = (0.99,) if quantile is None else quantile - super().__init__(reduction_axes=reduction_axes, quantile=quantile, inplace=False) + super().__init__(axes, axes_mode, quantile) def _reduce_out_of_place(self, x: list[Tensor]) -> list[Tensor]: x = fns.abs(x[0]) - reduction_axes = self._get_reduction_axes(x) + reduction_axes = determine_reduction_axes(x.ndim, self._axes, self._axes_mode) return fns.quantile(x, self._quantile, reduction_axes, keepdims=self._keepdims) @@ -575,7 +604,7 @@ def __eq__(self, __o: object) -> bool: return super().__eq__(__o) and self._channel_axis == __o._channel_axis def __hash__(self) -> int: - return hash((self.__class__.__name__, self.inplace, self._reduction_axes, self._channel_axis)) + return hash((self.__class__.__name__, self.inplace, self._axes, self._axes_mode, self._channel_axis)) ################################################## diff --git a/src/nncf/openvino/statistics/collectors.py b/src/nncf/openvino/statistics/collectors.py index e081015c138..35e01a55e99 100644 --- a/src/nncf/openvino/statistics/collectors.py +++ b/src/nncf/openvino/statistics/collectors.py @@ -44,37 +44,37 @@ class OVMinReducer(MinReducer): def get_inplace_fn(self): - return get_inplace_min_op(self._reduction_axes) + return get_inplace_min_op(self._axes) class OVMaxReducer(MaxReducer): def get_inplace_fn(self): - return get_inplace_max_op(self._reduction_axes, False) + return get_inplace_max_op(self._axes, False) class OVAbsMaxReducer(AbsMaxReducer): def get_inplace_fn(self): - return get_inplace_max_op(self._reduction_axes, True) + return get_inplace_max_op(self._axes, True) class OVMeanReducer(MeanReducer): def get_inplace_fn(self): - return get_inplace_mean_op(self._reduction_axes) + return get_inplace_mean_op(self._axes) class OVMeanVarianceReducer(MeanVarianceReducer): def get_inplace_fn(self): - return get_inplace_mean_var_op(self._reduction_axes) + return get_inplace_mean_var_op(self._axes) class OVMaxVarianceReducer(MaxVarianceReducer): def get_inplace_fn(self): - return get_inplace_max_var_op(self._reduction_axes) + return get_inplace_max_var_op(self._axes) class OVMeanAbsMaxReducer(MeanAbsMaxReducer): def get_inplace_fn(self): - return get_inplace_mean_max_op(self._reduction_axes, True) + return get_inplace_mean_max_op(self._axes, True) class OVShapeReducer(ShapeReducer): diff --git a/src/nncf/quantization/algorithms/channel_alignment/openvino_backend.py b/src/nncf/quantization/algorithms/channel_alignment/openvino_backend.py index af111c72cb4..fcfc57c12bb 100644 --- a/src/nncf/quantization/algorithms/channel_alignment/openvino_backend.py +++ b/src/nncf/quantization/algorithms/channel_alignment/openvino_backend.py @@ -20,6 +20,7 @@ from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.transformations.commands import TargetType from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase +from nncf.experimental.common.tensor_statistics.collectors import AxesMode from nncf.experimental.common.tensor_statistics.collectors import MedianAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic @@ -81,7 +82,7 @@ def get_statistic_collector( reduction_axes, q: float, num_samples: int, inplace: bool ) -> TensorStatisticCollectorBase: tensor_collector = TensorCollector(MinMaxTensorStatistic) - quantile_reducer = OVQuantileReducer(reduction_axes, (q, 1 - q), inplace) + quantile_reducer = OVQuantileReducer(reduction_axes, AxesMode.REDUCTION, (q, 1 - q), inplace) for port_id, container_key in enumerate([MinMaxTensorStatistic.MIN_STAT, MinMaxTensorStatistic.MAX_STAT]): aggregator = MedianAggregator(num_samples=num_samples, aggregation_axes=(0, 1)) diff --git a/src/nncf/quantization/algorithms/min_max/algorithm.py b/src/nncf/quantization/algorithms/min_max/algorithm.py index 643d46b61b5..5ff7c06f3b3 100644 --- a/src/nncf/quantization/algorithms/min_max/algorithm.py +++ b/src/nncf/quantization/algorithms/min_max/algorithm.py @@ -570,14 +570,12 @@ def _get_statistic_collector( else: quantile = 1 - params.quantile_outlier_prob reducer = self._backend_entity.reducer_map[statistic_type]( - reduction_axes=reduction_axes, inplace=inplace, quantile=[quantile] + axes=reduction_axes, inplace=inplace, quantile=[quantile] ) else: if use_abs_max and statistic_type == StatisticsType.MAX: statistic_type = StatisticsType.ABS_MAX - reducer = self._backend_entity.reducer_map[statistic_type]( - reduction_axes=reduction_axes, inplace=inplace - ) + reducer = self._backend_entity.reducer_map[statistic_type](axes=reduction_axes, inplace=inplace) kwargs = { "num_samples": num_samples, diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 4a0afbae4ba..a146b729cee 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -28,6 +28,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend +from nncf.experimental.common.tensor_statistics.collectors import AxesMode from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector @@ -305,13 +306,13 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin # For the ONNX backend, we can't calculate reduction axes before inference because the tensor shape # (actually, only the number of dimensions (ndim) is required) is unknown for some operations. if model_backend == BackendType.ONNX: - keep_axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) - reduction_axes = None + axes_mode = AxesMode.KEEP + axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) else: - keep_axes = None - reduction_axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) + axes_mode = AxesMode.REDUCTION + axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) - collector = self._create_tensor_collector(self._subset_size, keep_axes, reduction_axes) + collector = self._create_tensor_collector(self._subset_size, axes, axes_mode) container.add_statistic_point(StatisticPoint(target_point, collector, self._algorithm_key)) @@ -320,27 +321,25 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin def _create_tensor_collector( self, num_samples: int, - keep_axes: Optional[tuple[int, ...]] = None, - reduction_axes: Optional[tuple[int, ...]] = None, + axes: Optional[tuple[int, ...]], + axes_mode: AxesMode, ) -> TensorCollector: """ Initializes and returns a configured tensor collector for the `SmoothQuant` algorithm. :param num_samples: Maximum number of samples to collect for the aggregator. - :param keep_axes: Axes to preserve during the reduction operation. - :param reduction_axes: Axes over which the reduction operation is applied. + :param axes: The axes specified for the reduction operation. + :param axes_mode: Defines how the specified axes are interpreted: + - `AxesMode.REDUCTION`: the given axes will be reduced. + - `AxesMode.KEEP`: all axes except the specified ones will be reduced. :return: A tensor collector configured with the specified reduction and aggregation logic. """ - if reduction_axes is not None and keep_axes is not None: - msg = "Only one of `reduction_axes` or `keep_axes` should be specified, not both." - raise nncf.ValidationError(msg) - collector = TensorCollector() abs_max_reducer_cls = self._backend_entity.get_abs_max_reducer_cls() collector.register_statistic_branch( STATISTIC_BRANCH_KEY, - abs_max_reducer_cls(reduction_axes, keep_axes, self._inplace_statistics), + abs_max_reducer_cls(axes, axes_mode, self._inplace_statistics), MaxAggregator(num_samples=num_samples), ) shape_reducer_cls = self._backend_entity.get_shape_reducer_cls() diff --git a/src/nncf/torch/tensor_statistics/collectors.py b/src/nncf/torch/tensor_statistics/collectors.py index be2b8a0e319..9fe723f8339 100644 --- a/src/nncf/torch/tensor_statistics/collectors.py +++ b/src/nncf/torch/tensor_statistics/collectors.py @@ -277,7 +277,7 @@ def get_mean_percentile_statistic_collector( """ tensor_collector = TensorCollector(_get_wrapped_percentile_tensor_statistic(target_shape=scale_shape)) quantiles_to_collect = np.true_divide(percentiles_to_collect, 100) - reducer = QuantileReducer(reduction_axes=reduction_axes, quantile=quantiles_to_collect) + reducer = QuantileReducer(axes=reduction_axes, quantile=quantiles_to_collect) for output_port_id, p in enumerate(percentiles_to_collect): aggregator = MeanAggregator( aggregation_axes=aggregation_axes, diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index 33da32dab2c..8d84bf59601 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -220,7 +220,7 @@ def test_min_max_mean_reducers(self, reducer_name, ref, reducers): reduction_axes = (1, 2) input_ = np.arange(-26, 10).reshape((4, 3, 3)) for i, reduction_axes_ in enumerate([reduction_axes, None]): - reducer = reducers[reducer_name](reduction_axes=reduction_axes_, inplace=False) + reducer = reducers[reducer_name](axes=reduction_axes_, inplace=False) val = reducer([self.get_nncf_tensor(input_, Dtype.FLOAT)]) assert len(val) == 1 assert fns.allclose(val[0], self.get_nncf_tensor(ref[i])) @@ -233,7 +233,7 @@ def test_quantile_reducers(self, reducer_name, ref, reducers): input_ = np.arange(-26, 10).reshape((1, 4, 3, 3)) input_[0][0][0] = -20000 input_[0][0][1] = 10000 - reducer = reducers[reducer_name](reduction_axes=reduction_axes, inplace=False) + reducer = reducers[reducer_name](axes=reduction_axes, inplace=False) val = reducer([self.get_nncf_tensor(input_, dtype=Dtype.FLOAT)]) assert val.shape[0] == len(ref) for i, ref_ in enumerate(ref): @@ -244,7 +244,7 @@ def test_quantile_reducers(self, reducer_name, ref, reducers): [[None, 16.1666], [(0,), 14.25], [(0, 1), 15.875], [(0, 1, 2), 16.1666]], ) def test_mean_variance_reducer(self, axes, reference): - reducer = MeanVarianceReducer(reduction_axes=axes) + reducer = MeanVarianceReducer(axes) nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) result = reducer._reduce_out_of_place([nncf_data]) assert len(result) == 1 @@ -255,18 +255,7 @@ def test_mean_variance_reducer(self, axes, reference): [[None, 10.0], [(0,), 4.16666], [(0, 1), 6.33333], [(0, 1, 2), 10.0]], ) def test_mean_abs_max_reducer(self, axes, reference): - reducer = MeanAbsMaxReducer(reduction_axes=axes) - nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) - result = reducer._reduce_out_of_place([nncf_data]) - assert len(result) == 1 - assert fns.allclose(result[0], self.get_nncf_tensor(reference)) - - @pytest.mark.parametrize( - "axes, reference", - [[None, 10.0], [(1, 2), 4.16666], [(2,), 6.33333], [(), 10.0]], - ) - def test_mean_abs_max_reducer_keep_axes(self, axes, reference): - reducer = MeanAbsMaxReducer(keep_axes=axes) + reducer = MeanAbsMaxReducer(axes) nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) result = reducer._reduce_out_of_place([nncf_data]) assert len(result) == 1 @@ -277,18 +266,7 @@ def test_mean_abs_max_reducer_keep_axes(self, axes, reference): [[None, 16.1666], [(0,), 64.0], [(0, 1), 36.1875], [(0, 1, 2), 16.1666]], ) def test_max_variance_reducer(self, axes, reference): - reducer = MaxVarianceReducer(reduction_axes=axes) - nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) - result = reducer._reduce_out_of_place([nncf_data]) - assert len(result) == 1 - assert fns.allclose(result[0], self.get_nncf_tensor(reference)) - - @pytest.mark.parametrize( - "axes, reference", - [[None, 16.1666], [(1, 2), 64.0], [(2,), 36.1875], [(), 16.1666]], - ) - def test_max_variance_reducer_keep_axes(self, axes, reference): - reducer = MaxVarianceReducer(keep_axes=axes) + reducer = MaxVarianceReducer(axes) nncf_data = self.get_nncf_tensor(np.array(WEIGHT_COMPRESSION_REDUCERS_DATA), dtype=Dtype.FLOAT) result = reducer._reduce_out_of_place([nncf_data]) assert len(result) == 1 @@ -588,10 +566,10 @@ def test_mad_percentile_aggregators_not_implemented_aggregation_axes(self, MAD_p def test_reducers_name_hash_equal(self, reducer_name, reducers): params = {} if reducer_name in ["min", "max", "abs_max", "mean"]: - params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] + params["axes"] = [None, (0, 1, 3), (1, 2, 3)] params["inplace"] = [False, True] elif reducer_name in ["quantile", "abs_quantile"]: - params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] + params["axes"] = [None, (0, 1, 3), (1, 2, 3)] params["quantile"] = [[0.01, 0.99], [0.001, 0.999]] elif reducer_name == "batch_mean": params["inplace"] = [False, True] diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index 13d2559cfaa..f91b876a8a5 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -73,7 +73,7 @@ def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> Tenso collector = TensorCollector(MinMaxTensorStatistic) reduction_axes = get_reduction_axes([batch_axis], shape) aggregation_axes = (0, 1) - kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} + kwargs = {"axes": reduction_axes, "inplace": inplace} reducer = reducer(**kwargs) aggregator = aggregator( aggregation_axes=aggregation_axes, diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 64ee950c323..91fe12e8e0b 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -839,10 +839,10 @@ def test_same_collectors_different_attrs_dont_merge(self, statistics_type, test_ model = params["model"](dataset_samples) params = {} if statistics_type in [StatisticsType.MIN, StatisticsType.MAX, StatisticsType.ABS_MAX, StatisticsType.MEAN]: - params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] + params["axes"] = [None, (0, 1, 3), (1, 2, 3)] params["inplace"] = [False, True] elif statistics_type in [StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE]: - params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] + params["axes"] = [None, (0, 1, 3), (1, 2, 3)] params["quantile"] = [[0.01, 0.99], [0.001, 0.999]] elif statistics_type == "batch_mean": params["inplace"] = [False, True] diff --git a/tests/cross_fw/test_templates/test_channel_alignment.py b/tests/cross_fw/test_templates/test_channel_alignment.py index 697ee2c3505..bc5ca86daae 100644 --- a/tests/cross_fw/test_templates/test_channel_alignment.py +++ b/tests/cross_fw/test_templates/test_channel_alignment.py @@ -547,7 +547,7 @@ def test_statistic_collectors(self, inplace_ref, q_ref): assert len(statistic_collector.reducers) == 1 reducer = statistic_collector.reducers.pop() assert isinstance(reducer, QuantileReducer) - assert reducer._reduction_axes == reduction_axes_ref + assert reducer._axes == reduction_axes_ref assert np.allclose(reducer._quantile, (q_ref, 1 - q_ref)) assert len(statistic_collector.aggregators) == 2 diff --git a/tests/cross_fw/test_templates/test_quantizer_config.py b/tests/cross_fw/test_templates/test_quantizer_config.py index f9c58b4530c..71c5867560a 100644 --- a/tests/cross_fw/test_templates/test_quantizer_config.py +++ b/tests/cross_fw/test_templates/test_quantizer_config.py @@ -69,7 +69,7 @@ def check_is_mean_min_max_statistic_collector(self, tensor_collector: TensorColl assert aggrs[0].__class__ == aggrs[1].__class__ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: - return reducer._reduction_axes + return reducer._axes @staticmethod def _transform_to_inference_graph(nncf_graph: NNCFGraph, min_max_algo: MinMaxQuantization): diff --git a/tests/openvino/native/quantization/test_reducers_and_aggregators.py b/tests/openvino/native/quantization/test_reducers_and_aggregators.py index 1f0d5a65e9d..a47773ff82f 100644 --- a/tests/openvino/native/quantization/test_reducers_and_aggregators.py +++ b/tests/openvino/native/quantization/test_reducers_and_aggregators.py @@ -81,7 +81,7 @@ def test_mixed_precision_reducers(self, reducer_cls, reduction_axes, ref_value, input_ = np.arange(2 * 4 * 8).reshape(2, 4, 8) input_[:, :2] *= 2 - reducer = reducer_cls(reduction_axes=reduction_axes, inplace=inplace) + reducer = reducer_cls(axes=reduction_axes, inplace=inplace) inplace_fn = reducer.get_inplace_fn() ov_model_input = opset.parameter(input_.shape) From 1458577621b3ca10833a9657d65da109de3c8db4 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 23 Oct 2025 11:16:42 +0100 Subject: [PATCH 17/22] add tests --- tests/cross_fw/test_templates/test_smooth_quant.py | 7 +++++++ tests/openvino/native/test_smooth_quant.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index cd134355884..89c43dba40e 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -19,6 +19,8 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFNode +from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer +from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.parameters import ModelType from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters @@ -292,3 +294,8 @@ def test_get_weight_channel_axis(self, node_metatype, layer_attributes, referenc pytest.xfail("Expected exception") assert activation_channel_axis == reference_value + + def test_reducers_cls(self): + backend = self.get_backend() + assert backend.get_abs_max_reducer_cls() is AbsMaxReducer + assert backend.get_shape_reducer_cls() is ShapeReducer diff --git a/tests/openvino/native/test_smooth_quant.py b/tests/openvino/native/test_smooth_quant.py index 60780122084..a5c5290965e 100644 --- a/tests/openvino/native/test_smooth_quant.py +++ b/tests/openvino/native/test_smooth_quant.py @@ -21,6 +21,8 @@ from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype +from nncf.openvino.statistics.collectors import OVAbsMaxReducer +from nncf.openvino.statistics.collectors import OVShapeReducer from nncf.quantization.algorithms.smooth_quant.openvino_backend import OVSmoothQuantAlgoBackend from tests.cross_fw.test_templates.helpers import ConvTestModel from tests.cross_fw.test_templates.helpers import LinearMultiShapeModel @@ -182,3 +184,8 @@ def test_get_weight_channel_axis(self, node_metatype, weights_layout, reference_ @staticmethod def get_matmul_metatype(): return [OVMatMulMetatype] + + def test_reducers_cls(self): + backend = self.get_backend() + assert backend.get_abs_max_reducer_cls() is OVAbsMaxReducer + assert backend.get_shape_reducer_cls() is OVShapeReducer From 55e171fff57270c820e47ca1c07449687d8fd425 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 23 Oct 2025 11:51:54 +0100 Subject: [PATCH 18/22] add tests --- .../test_reducers_and_aggregators.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index 8d84bf59601..c6027432c5b 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -23,6 +23,7 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.common.tensor import NNCFTensor from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes +from nncf.experimental.common.tensor_statistics.collectors import AxesMode from nncf.experimental.common.tensor_statistics.collectors import HAWQAggregator from nncf.experimental.common.tensor_statistics.collectors import HistogramAggregator from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator @@ -39,6 +40,7 @@ from nncf.experimental.common.tensor_statistics.collectors import PercentileAggregator from nncf.experimental.common.tensor_statistics.collectors import RawReducer from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer +from nncf.experimental.common.tensor_statistics.collectors import determine_reduction_axes from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.tensor import Tensor from nncf.tensor import functions as fns @@ -716,3 +718,23 @@ def test_histogramm_aggregator(self, ref_hist, ref_min, ref_max, ref_aggr_min, r assert all(isinstance(val, Tensor) for val in aggr.values()) assert fns.allclose(aggr[MinMaxTensorStatistic.MIN_STAT], ref_aggr_min) assert fns.allclose(aggr[MinMaxTensorStatistic.MAX_STAT], ref_aggr_max) + + +@pytest.mark.parametrize( + "ndim, axes, axes_mode, expected_reduction_axes", + [ + [3, (0, 1), AxesMode.REDUCTION, (0, 1)], + [3, None, AxesMode.REDUCTION, (0, 1, 2)], + [3, None, AxesMode.KEEP, (0, 1, 2)], + [2, (-1,), AxesMode.KEEP, (0,)], + [2, (-2,), AxesMode.KEEP, (1,)], + [2, (0,), AxesMode.KEEP, (1,)], + [2, (1,), AxesMode.KEEP, (0,)], + [0, (), AxesMode.KEEP, ()], + ], +) +def test_determine_reduction_axes( + ndim: int, axes: tuple[int, ...], axes_mode: AxesMode, expected_reduction_axes: tuple[int, ...] +): + actual_reduction_axes = determine_reduction_axes(ndim, axes, axes_mode) + assert actual_reduction_axes == expected_reduction_axes From b9365c7d4fe44f882bdeffd8cd6f87774407de26 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 23 Oct 2025 12:07:32 +0100 Subject: [PATCH 19/22] add tests --- tests/onnx/common.py | 61 ++++++++++++++++++++++++++- tests/onnx/test_nncf_graph_builder.py | 28 ++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/tests/onnx/common.py b/tests/onnx/common.py index 50cd89176db..f2d47eefebd 100644 --- a/tests/onnx/common.py +++ b/tests/onnx/common.py @@ -34,6 +34,31 @@ def __init__(self): self._outputs = [] self._graph_name = "onnx-graph" + def add_shape(self, data: str, output: Optional[str] = None) -> str: + i = len(self._nodes) + + output = f"Shape_{i}_output" if output is None else output + self._nodes.append(onnx.helper.make_node(op_type="Shape", inputs=[data], outputs=[output], name=f"Shape_{i}")) + return output + + def add_gather(self, data: str, indices: str, axis: int = 0, output: Optional[str] = None) -> str: + i = len(self._nodes) + + output = f"Gather_{i}_output" if output is None else output + self._nodes.append( + onnx.helper.make_node( + op_type="Gather", inputs=[data, indices], outputs=[output], axis=axis, name=f"Gather_{i}" + ) + ) + return output + + def add_reshape(self, data: str, shape: str, output: Optional[str] = None) -> str: + i = len(self._nodes) + + output = f"Reshape_{i}_output" if output is None else output + self._nodes.append(onnx.helper.make_node("Reshape", inputs=[data, shape], outputs=[output])) + return output + def add_input(self, name: str, shape: tuple[int]) -> str: self._inputs.append(onnx.helper.make_tensor_value_info(name, onnx.TensorProto.FLOAT, shape)) return name @@ -63,6 +88,17 @@ def add_matmul( ) return output + def add_initializer(self, data: np.ndarray) -> str: + i = len(self._nodes) + + name = f"Initializer_{i}" + tensor_dtype = onnx.helper.np_dtype_to_tensor_dtype(data.dtype) + initializer = onnx.helper.make_tensor( + name=name, data_type=tensor_dtype, dims=data.shape, vals=data.tobytes(), raw=True + ) + self._initializers.append(initializer) + return name + def add_gemm( self, input: str, @@ -133,10 +169,33 @@ def add_selu(self, input: str, output: Optional[str] = None) -> str: self._nodes.append(onnx.helper.make_node(op_type="Selu", inputs=[input], outputs=[output], name=f"Selu_{i}")) return output + def add_constant(self, data: np.ndarray, output: Optional[str] = None) -> str: + i = len(self._nodes) + + output = f"Constant_{i}_output" if output is None else output + + tensor_dtype = onnx.helper.np_dtype_to_tensor_dtype(data.dtype) + + self._nodes.append( + onnx.helper.make_node( + "Constant", + inputs=[], + outputs=[output], + value=onnx.helper.make_tensor( + name=f"Constant_{i}", + data_type=tensor_dtype, + dims=data.shape, + vals=data.flatten(), + ), + ) + ) + + return output + def add_unsqueeze(self, input: str, axes: tuple[int, ...], output: Optional[str] = None) -> str: i = len(self._nodes) - axes_name = "Unsqueeze_{i}_axes" + axes_name = f"Unsqueeze_{i}_axes" axes_data = np.array(axes, dtype=np.int64) axes_initializer = onnx.helper.make_tensor( name=axes_name, diff --git a/tests/onnx/test_nncf_graph_builder.py b/tests/onnx/test_nncf_graph_builder.py index d81719f73f3..24314e56fe8 100644 --- a/tests/onnx/test_nncf_graph_builder.py +++ b/tests/onnx/test_nncf_graph_builder.py @@ -11,14 +11,17 @@ import os +import numpy as np import onnx import pytest import torch +from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXMatMulMetatype from nncf.onnx.graph.model_transformer import ONNXModelTransformer from nncf.onnx.graph.nncf_graph_builder import GraphConverter from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference from tests.cross_fw.shared.paths import TEST_ROOT +from tests.onnx.common import ModelBuilder from tests.onnx.conftest import ONNX_TEST_ROOT from tests.onnx.models import ALL_SYNTHETIC_MODELS from tests.onnx.models import OneConvolutionalModel @@ -112,3 +115,28 @@ def test_add_output_nodes_with_no_parents_node(): nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) path_to_dot = REFERENCE_GRAPHS_DIR / "synthetic" / "output_with_no_parents_model.dot" compare_nx_graph_with_reference(nx_graph, path_to_dot, check_edge_attrs=True) + + +@pytest.mark.parametrize("opset_version, ref_shape", [[13, ()], [19, (-1, -1, -1)]]) +def test_unknown_shape(opset_version: int, ref_shape: tuple[int, ...]): + mb = ModelBuilder() + + x = mb.add_input("x", ("batch", 3, 4, 5)) + + y = mb.add_shape(x) + y = mb.add_gather(y, mb.add_initializer(np.array(0, dtype=np.int64))) + y = mb.add_unsqueeze(y, axes=[0]) + y = mb.add_concat([y, mb.add_initializer(np.array([-1, 60], dtype=np.int64))], axis=0) + + x = mb.add_reshape(x, y) + x = mb.add_matmul(x, (60, 10)) + + mb.add_output(x, ("batch", 1, 10)) + + model = mb.build(opset_version, ir_version=9) + + graph = GraphConverter.create_nncf_graph(model) + matmul = graph.get_nodes_by_metatypes([ONNXMatMulMetatype])[0] # only 1 matmul + + for e in graph.get_input_edges(matmul): + assert e.tensor_shape == ref_shape From bbdd03738d32cf03a8be67d22257ac961a380e8e Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Fri, 24 Oct 2025 12:37:01 +0100 Subject: [PATCH 20/22] reply to comments --- .../algorithms/smooth_quant/algorithm.py | 25 +-------------- .../algorithms/smooth_quant/backend.py | 31 +++++++++++++++++++ .../algorithms/smooth_quant/onnx_backend.py | 6 ++++ 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index a146b729cee..8ad401bab30 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -21,7 +21,6 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -285,7 +284,6 @@ def _get_statistics_for_node( return statistics_for_node def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - model_backend = get_backend(model) self._set_backend_entity(model) alpha_map = self._get_alpha_map() @@ -305,12 +303,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin # Therefore, `keep_axes` and `inplace` cannot be used together with the OpenVINO backend. # For the ONNX backend, we can't calculate reduction axes before inference because the tensor shape # (actually, only the number of dimensions (ndim) is required) is unknown for some operations. - if model_backend == BackendType.ONNX: - axes_mode = AxesMode.KEEP - axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_act_port),) - else: - axes_mode = AxesMode.REDUCTION - axes = self._calculate_input_reduction_axes(graph, node_to_smooth, input_act_port) + axes_mode, axes = self._backend_entity.get_tensor_collector_axes(graph, node_to_smooth, input_act_port) collector = self._create_tensor_collector(self._subset_size, axes, axes_mode) @@ -436,22 +429,6 @@ def _calculate_weight_scale(self, scale_value: Tensor, node: NNCFNode, weights_v return weight_scale return scale_value - def _calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, input_port: int) -> tuple[int]: - """ - Returns reduction axes for specified input. - - :param nncf_graph: NNCFGraph instance. - :param node: NNCFNode to check. - :param input_port: Specified input port id. - :return: Calculated reduction axes. - """ - shape = nncf_graph.get_input_edge_by_port_id(node, input_port).tensor_shape - reduction_axes = tuple([]) - if len(shape) > 1: - channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) - reduction_axes = get_reduction_axes((channel_axis,), shape) - return reduction_axes - def _process_weight_statistics(self, node: NNCFNode, weights: Tensor) -> Tensor: """ Returns processed weight statistics for node. diff --git a/src/nncf/quantization/algorithms/smooth_quant/backend.py b/src/nncf/quantization/algorithms/smooth_quant/backend.py index 5fc003041ba..74b1afa2baa 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/backend.py @@ -19,8 +19,10 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand +from nncf.common.graph.utils import get_reduction_axes from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer +from nncf.experimental.common.tensor_statistics.collectors import AxesMode from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer from nncf.tensor import Tensor @@ -204,3 +206,32 @@ def get_shape_reducer_cls() -> type[ShapeReducer]: :return: The `ShapeReducer` class. """ return ShapeReducer + + def calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, input_port: int) -> tuple[int]: + """ + Returns reduction axes for specified input. + + :param nncf_graph: NNCFGraph instance. + :param node: NNCFNode to check. + :param input_port: Specified input port id. + :return: Calculated reduction axes. + """ + shape = nncf_graph.get_input_edge_by_port_id(node, input_port).tensor_shape + reduction_axes = tuple([]) + if len(shape) > 1: + channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) + reduction_axes = get_reduction_axes((channel_axis,), shape) + return reduction_axes + + def get_tensor_collector_axes(self, nncf_graph: NNCFGraph, node_to_smooth: NNCFNode, input_port: int): + """ + Returns axes and axes mode required for tensor collector. + + :param nncf_graph: NNCFGraph instance. + :param node: NNCFNode to smooth. + :param input_port: Specified input port id. + :return: Axes and axes mode required for tensor collector. + """ + axes_mode = AxesMode.REDUCTION + axes = self._calculate_input_reduction_axes(nncf_graph, node_to_smooth, input_port) + return axes_mode, axes diff --git a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py index f374f350afe..250b09c68e0 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py @@ -21,6 +21,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.tensor_statistics.statistic_point import StatisticPoint +from nncf.experimental.common.tensor_statistics.collectors import AxesMode from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES from nncf.onnx.graph.metatypes.groups import OPERATIONS_WITH_WEIGHTS from nncf.onnx.graph.metatypes.groups import QUANTIZE_AGNOSTIC_OPERATIONS @@ -229,3 +230,8 @@ def filter_func(point: StatisticPoint) -> bool: ) return filter_func + + def get_tensor_collector_axes(self, nncf_graph: NNCFGraph, node_to_smooth: NNCFNode, input_port: int): + axes_mode = AxesMode.KEEP + axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_port),) + return axes_mode, axes From e09ff06eb3cb405b4581ea31a32bcf2df15ff928 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Fri, 24 Oct 2025 12:49:45 +0100 Subject: [PATCH 21/22] minor fix --- src/nncf/quantization/algorithms/smooth_quant/backend.py | 4 ++-- src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/backend.py b/src/nncf/quantization/algorithms/smooth_quant/backend.py index 74b1afa2baa..0555e1e8747 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/backend.py @@ -219,7 +219,7 @@ def calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, shape = nncf_graph.get_input_edge_by_port_id(node, input_port).tensor_shape reduction_axes = tuple([]) if len(shape) > 1: - channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) + channel_axis = self.get_activation_channel_axis(node, input_port) reduction_axes = get_reduction_axes((channel_axis,), shape) return reduction_axes @@ -233,5 +233,5 @@ def get_tensor_collector_axes(self, nncf_graph: NNCFGraph, node_to_smooth: NNCFN :return: Axes and axes mode required for tensor collector. """ axes_mode = AxesMode.REDUCTION - axes = self._calculate_input_reduction_axes(nncf_graph, node_to_smooth, input_port) + axes = self.calculate_input_reduction_axes(nncf_graph, node_to_smooth, input_port) return axes_mode, axes diff --git a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py index 250b09c68e0..b88bd694a3c 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py +++ b/src/nncf/quantization/algorithms/smooth_quant/onnx_backend.py @@ -233,5 +233,5 @@ def filter_func(point: StatisticPoint) -> bool: def get_tensor_collector_axes(self, nncf_graph: NNCFGraph, node_to_smooth: NNCFNode, input_port: int): axes_mode = AxesMode.KEEP - axes = (self._backend_entity.get_activation_channel_axis(node_to_smooth, input_port),) + axes = (self.get_activation_channel_axis(node_to_smooth, input_port),) return axes_mode, axes From 6579eb451eb1dad7b4fe1fef90554e3c438c13bb Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Fri, 24 Oct 2025 13:07:27 +0100 Subject: [PATCH 22/22] apply comments --- src/nncf/quantization/algorithms/smooth_quant/algorithm.py | 2 +- tests/cross_fw/test_templates/test_smooth_quant.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py index 8ad401bab30..4205aae4f5d 100644 --- a/src/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/src/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -256,7 +256,7 @@ def _retrieve_shape( ): stats = tensor_collector.get_statistics() shape = stats[SHAPE_BRANCH_KEY] - shape = tuple() if shape is None else tuple(shape.tolist()) + shape = tuple(shape.tolist()) items.append((node, input_port, shape)) diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index 89c43dba40e..b3b54351372 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -237,7 +237,13 @@ def test_empty_stats(self, mocker, tmpdir): algo._set_backend_entity = lambda model: backend_entity mocked_transformer = mocker.MagicMock() + empty_shapes = [ + (node, port, ()) for node, port in algo._get_nodes_to_smooth_data(graph, algo._get_alpha_map().keys()) + ] mocker.patch("nncf.common.factory.ModelTransformerFactory.create", return_value=mocked_transformer) + mocker.patch( + "nncf.quantization.algorithms.smooth_quant.algorithm.SmoothQuant._retrieve_shape", return_value=empty_shapes + ) algo.apply(model, graph, algo_statistic_points) mocked_transformer.transform.assert_called_once()