Walk transparent ops when extracting input quant params (#20139)

ethansfng · facebook-github-bot · commit 710cea49794f · 2026-06-09T15:10:29.000-07:00
Summary:

SAM 3's encoder input feeds a transparent shape op first — placeholder → reshape (patchify) → quantize_per_tensor — whereas the original code only recognized placeholder → quantize_per_tensor (a quantize directly on the input). The change walks through transparent ops (reshape/permute/transpose/etc.) from the input to reach the first quantize, so that indirected pattern resolves.

Differential Revision: D107922730
diff --git a/backends/cadence/aot/compiler_funcs.py b/backends/cadence/aot/compiler_funcs.py
@@ -22,6 +22,28 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 QuantArgs = tuple[float, int, int, int, torch.dtype]
+TRANSPARENT_OPS: frozenset[torch._ops.OpOverloadPacket] = frozenset(
+    {
+        torch.ops.aten.view,
+        torch.ops.aten.view_copy,
+        torch.ops.aten._unsafe_view,
+        torch.ops.aten.reshape,
+        torch.ops.aten.permute,
+        torch.ops.aten.permute_copy,
+        torch.ops.aten.transpose,
+        torch.ops.aten.transpose_copy,
+        torch.ops.aten.squeeze,
+        torch.ops.aten.squeeze_copy,
+        torch.ops.aten.unsqueeze,
+        torch.ops.aten.unsqueeze_copy,
+        torch.ops.aten.slice,
+        torch.ops.aten.slice_copy,
+        torch.ops.aten.contiguous,
+        torch.ops.aten.clone,
+        torch.ops.aten.to,
+        torch.ops.aten._to_copy,
+    }
+)
 
 
 @torch.no_grad()
@@ -251,17 +273,27 @@ def extract_input_quant_params_from_graph(
     if not input_names:
         return quant_args
 
+    # Inputs are referenced by node name, which may be a placeholder or a node
+    # that unpacks/derives the input (e.g. a `getitem` off a tuple input), so
+    # look the start node up across all nodes -- not just placeholders.
+    nodes_by_name = {n.name: n for n in module.graph.nodes}
+
     for idx, name in enumerate(input_names):
-        for node in module.graph.nodes:
-            if node.op != "call_function":
+        start = nodes_by_name.get(name)
+        if start is None:
+            continue
+        seen: set[torch.fx.Node] = set()
+        to_visit: list[torch.fx.Node] = list(start.users)
+        while to_visit:
+            node = to_visit.pop()
+            if node in seen or node.op != "call_function":
                 continue
-
+            seen.add(node)
+            target_str = str(node.target)
             if (
-                node.args
-                and isinstance(node.args[0], torch.fx.Node)
-                and node.args[0].name == name
+                "quantize_per_tensor" in target_str
+                and "dequantize" not in target_str
                 and not node.name.startswith("_assert_tensor_metadata")
-                and "quantize_per_tensor" in str(node.target)
             ):
                 args = node.args[1:]
                 if len(args) >= 5:
@@ -274,6 +306,12 @@ def extract_input_quant_params_from_graph(
                     )
                     found_names.add(name)
                 break
+            target = node.target
+            if (
+                isinstance(target, torch._ops.OpOverload)
+                and target.overloadpacket in TRANSPARENT_OPS
+            ):
+                to_visit.extend(node.users)
 
     missing_names = set(input_names) - found_names
     if missing_names: