-
Notifications
You must be signed in to change notification settings - Fork 265
[ONNX][SmoothQuant] Introduce new axes and axes_mode parameters #3687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
6a0fbf2
17062a6
2d08025
57d828f
a801647
53c6d72
97cab6a
3a96172
8f16b32
acb88d7
104dea5
4c11627
8b2ea5f
dd61ed7
92dee3e
aa8691f
1458577
55e171f
b9365c7
bbdd037
e09ff06
6579eb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -43,13 +43,25 @@ class TensorReducerBase(ABC): | |||||
| the specified rule. Could handle tensors inplace or out of place. | ||||||
| """ | ||||||
|
|
||||||
| def __init__(self, reduction_axes: Optional[ReductionAxes] = None, inplace: bool = False): | ||||||
| def __init__( | ||||||
| self, | ||||||
| reduction_axes: Optional[ReductionAxes] = None, | ||||||
| keep_axes: Optional[tuple[int, ...]] = None, | ||||||
|
||||||
| keep_axes: Optional[tuple[int, ...]] = None, | |
| keep_axes: Optional[Axes] = None, |
Perhaps we could rename ReductionAxes and reuse them there?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
ljaljushkin marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps we should update __hash__ methods for some of the TensorReducerBase as well
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -28,13 +28,17 @@ | |||
| from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer | ||||
| from nncf.common.utils.backend import BackendType | ||||
| from nncf.common.utils.backend import get_backend | ||||
| from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator | ||||
| from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator | ||||
| from nncf.experimental.common.tensor_statistics.collectors import TensorCollector | ||||
| from nncf.quantization.algorithms.algorithm import Algorithm | ||||
| from nncf.tensor import Tensor | ||||
| from nncf.tensor import functions as fns | ||||
|
|
||||
| TModel = TypeVar("TModel") | ||||
| TTensor = TypeVar("TTensor") | ||||
| STATISTIC_BRANCH_KEY = "abs_max" | ||||
| SHAPE_BRANCH_KEY = "shape" | ||||
| ALPHA_MAP = {"convolution": 0.05, "matmul": 0.95} | ||||
|
|
||||
|
|
||||
|
|
@@ -98,6 +102,10 @@ def _set_backend_entity(self, model: TModel) -> None: | |||
| msg = f"Cannot return backend-specific entity because {model_backend.value} is not supported!" | ||||
| raise nncf.UnsupportedBackendError(msg) | ||||
|
|
||||
| # Only the OpenVINO backend supports in-place statistics, so we should set this variable here. | ||||
| if model_backend != BackendType.OPENVINO: | ||||
| self._inplace_statistics = False | ||||
nikita-savelyevv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
|
||||
| def apply( | ||||
| self, | ||||
| model: TModel, | ||||
|
|
@@ -108,18 +116,19 @@ def apply( | |||
| self._set_backend_entity(model) | ||||
| alpha_map = self._get_alpha_map() | ||||
|
|
||||
| nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) | ||||
| model_transformer = ModelTransformerFactory.create(model) | ||||
| transformation_layout = TransformationLayout() | ||||
|
|
||||
| node_groups = self._group_nodes_by_source(nodes_to_smooth_data, graph) | ||||
| nodes = self._get_nodes_to_smooth_data(graph, alpha_map.keys()) | ||||
| nodes = self._retrieve_shape(nodes, statistic_points) | ||||
| node_groups = self._group_nodes_by_source(nodes, graph) | ||||
|
|
||||
| transformation_layout = TransformationLayout() | ||||
| for group_id, nodes in track(node_groups.items(), description="Applying Smooth Quant"): | ||||
| best_scale = None | ||||
| best_ratio = 0.0 | ||||
| empty_statistic = False | ||||
|
|
||||
| source_node, input_port_id, source_output_port_id, shape = group_id | ||||
|
|
||||
| for node_to_smooth in nodes: | ||||
| source_node, input_port_id, source_output_port_id, _ = group_id | ||||
| activations_value = self._get_statistics_for_node( | ||||
| statistic_points, node_to_smooth.node_name, input_port_id | ||||
| ) | ||||
|
|
@@ -168,16 +177,15 @@ def apply( | |||
| ) | ||||
| transformation_layout.register(weight_update_command) | ||||
|
|
||||
| activations_by_output_id = {e.output_port_id: e for e in graph.get_output_edges(source_node)} | ||||
| activations_shape = activations_by_output_id[source_output_port_id].tensor_shape | ||||
| activation_scale = self._calculate_activation_scale(best_scale, activations_shape, nodes, graph) | ||||
| activation_scale = self._calculate_activation_scale(best_scale, shape, nodes, graph) | ||||
|
|
||||
| scale_node_name = self._create_scale_node_name(source_node.node_name, source_output_port_id) | ||||
| scale_insertion_command = self._backend_entity.scale_insertion_command( | ||||
| source_node, activation_scale.data, source_output_port_id, nodes, scale_node_name | ||||
| ) | ||||
| transformation_layout.register(scale_insertion_command) | ||||
|
|
||||
| model_transformer = ModelTransformerFactory.create(model) | ||||
| transformed_model = model_transformer.transform(transformation_layout) | ||||
| return transformed_model | ||||
|
|
||||
|
|
@@ -204,27 +212,56 @@ def _calculate_scale_and_ratio( | |||
| ratio = scales.min() / (scales.max() + eps) | ||||
| return scales, ratio | ||||
|
|
||||
| def _group_nodes_by_source(self, nodes_to_smooth: list[dict], nncf_graph: NNCFGraph) -> dict[tuple, list]: | ||||
| def _group_nodes_by_source( | ||||
| self, nodes_to_smooth: list[tuple[NNCFNode, int, tuple[int, ...]]], nncf_graph: NNCFGraph | ||||
| ) -> dict[tuple, list]: | ||||
| """ | ||||
| Groups nodes that will be smoothed by source (parent node). | ||||
|
|
||||
| :param nodes_to_smooth: List of the nodes that will be smoothed. | ||||
| :param nodes_to_smooth: A list of tuples where each tuple consists of a node, an input port, and the | ||||
| shape of the tensor associated with that node and input port. | ||||
| :param nncf_graph: NNCFGraph instance. | ||||
| :return: Dictionary with the source info as key and grouped nodes as value. | ||||
| """ | ||||
| groups = defaultdict(list) | ||||
| for node_data in nodes_to_smooth: | ||||
| node_to_smooth = node_data["node_to_smooth"] | ||||
| input_act_port = node_data["input_act_port"] | ||||
| for node_to_smooth, input_act_port, shape in nodes_to_smooth: | ||||
| source_node = nncf_graph.get_input_edge_by_port_id(node_to_smooth, input_act_port).from_node | ||||
| edge = nncf_graph.get_edge(source_node, node_to_smooth) | ||||
| # Such group_id (with node, ports, and shape as a hash) allows us to be confident | ||||
| # that all sensitive parameters are equal for successor nodes are equal. | ||||
| group_id = (source_node, input_act_port, edge.output_port_id, hash(str(edge.tensor_shape))) | ||||
| group_id = (source_node, input_act_port, edge.output_port_id, shape) | ||||
| groups[group_id].append(node_to_smooth) | ||||
|
|
||||
| return groups | ||||
|
|
||||
| def _retrieve_shape( | ||||
| self, nodes: list[tuple[NNCFNode, int]], statistic_points: StatisticPointsContainer | ||||
| ) -> list[tuple[NNCFNode, int, tuple[int, ...]]]: | ||||
| """ | ||||
| Retrieves the shapes of tensors associated with specific nodes and input ports | ||||
| from the given statistic points container. | ||||
|
|
||||
| :param nodes: A list of tuples, each containing a node and its corresponding input port index. | ||||
| :param statistic_points: Container holding statistics, used to retrieve tensor shapes. | ||||
| :return: A list of tuples where each tuple consists of a node, an input port, and the | ||||
| shape of the tensor associated with that node and input port. If shape information is | ||||
| not available, an empty tuple is returned for the shape. | ||||
| """ | ||||
| items = [] | ||||
| for node, input_port in nodes: | ||||
| for tensor_collector in statistic_points.get_algo_statistics_for_node( | ||||
| node.node_name, | ||||
| self._backend_entity.get_filter_fn_for_statistics(input_port, self._algorithm_key), | ||||
| self._algorithm_key, | ||||
| ): | ||||
| stats = tensor_collector.get_statistics() | ||||
| shape = stats[SHAPE_BRANCH_KEY] | ||||
| shape = tuple() if shape is None else tuple(shape.tolist()) | ||||
|
||||
| def test_empty_statistics(self, mode, mocker): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If shape can be None only during testing and not in any real life scenario then I would suggest to properly mock the returned shape in tests, rather that adopting algorithm logic to support None shapes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually we create a method in the backend to resolve such situation, why don't you introduce a method in the backend? The comment could be placed as a docstring for the method
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It helps simplify the code and avoid duplication.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we forward this parameter in the children of the
TensorReducerBase?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done