chery-picked some relevant changes

realAsma · realAsma · commit 9ebd69fbf342 · 2025-11-20T15:38:11.000-08:00
diff --git a/modelopt/torch/quantization/algorithms.py b/modelopt/torch/quantization/algorithms.py
@@ -228,13 +228,9 @@ def __init__(
 
         self.active = self.original
 
-        # Importance dict is keyed by quant_module (where the quantization is applied)
+        # Importance dict is keyed by score_module (where the score is computed)
         self._importance_dict = {
-            quant_recipe: {
-                mod: torch.zeros((), device=mod.weight.device, dtype=torch.float32)
-                for mod in self.quant_modules
-            }
-            for quant_recipe in self.choices
+            quant_recipe: dict.fromkeys(self.score_modules) for quant_recipe in self.choices
         }
 
         # Attach this hparam to each score_module's set of hparams it scores
@@ -266,7 +262,7 @@ def active(self, val: HPType | None):
     def importance(self) -> dict:
         """Return the importance dict mapping recipe and importance."""
         return {
-            quant_recipe: sum(v.cpu().item() for v in importance_dict.values())
+            quant_recipe: sum(v.cpu().item() for v in importance_dict.values() if v is not None)
             for quant_recipe, importance_dict in self._importance_dict.items()
         }
 
@@ -275,11 +271,6 @@ def attrs(self) -> list[str]:
         """Return the attributes of the hparam for repr."""
         return ["name", *super().attrs]
 
-
-def _add_auto_quantize_score(grad_output, output_diff, score_tensor):
-    score_tensor += ((grad_output.float() ** 2) * (output_diff.float() ** 2)).sum()
-
-
 class _AutoQuantizeBaseSearcher(BaseSearcher, ABC):
     """A base searcher for AutoQuantize algorithm."""
 
@@ -665,6 +656,18 @@ def run_search(self):
         QuantRecipe.fold_pqs_to_weights(self.model)
 
 
+
+
+@torch.compile
+def _get_auto_quantize_score(grad_output, output_diff):
+    return ((grad_output.float() ** 2) * (output_diff.float() ** 2)).sum()
+
+
+@torch.compile
+def _add_auto_quantize_score(grad_output, output_diff, score_tensor):
+    score_tensor += _get_auto_quantize_score(grad_output, output_diff)
+
+
 class AutoQuantizeGradientSearcher(_AutoQuantizeBaseSearcher):
     """A searcher for AutoQuantize algorithm that uses gradient based score estimation.
 
@@ -790,8 +793,14 @@ def auto_quantize_score_estimate_forward(module, input, *args, **kwargs):
         def backward_hook(module, grad_input, grad_output):
             for hparam, output_diff_dict in module.output_diff_dict.items():
                 for recipe, output_diff in output_diff_dict.items():
-                    score_tensor = hparam._importance_dict[recipe][module]
-                    _add_auto_quantize_score(grad_output[0], output_diff, score_tensor)
+                    if hparam._importance_dict[recipe][module] is None:
+                        hparam._importance_dict[recipe][module] = _get_auto_quantize_score(
+                            grad_output[0], output_diff
+                        )
+                    else:
+                        _add_auto_quantize_score(
+                            grad_output[0], output_diff, hparam._importance_dict[recipe][module]
+                        )
 
         def setup_params_for_score_estimation(name, param, params_metadata, enable_grad=True):
             # Let us delete the gradient as soon as they are computed to save memory