[PyTorch] Propagate FP8 graph weight update flag in GroupedLinear (#3052)

allenphilipj · ksivaman · web-flow · commit 1ea48eb75a20 · 2026-06-05T20:42:53.000+05:30
Fix GroupedLinear FP8 graph weight update flag

Signed-off-by: allenphilipj &lt;allenphilipj@users.noreply.github.com&gt;
Co-authored-by: allenphilipj &lt;allenphilipj@users.noreply.github.com&gt;
Co-authored-by: Kirthi Shankar Sivamani &lt;ksivamani@nvidia.com&gt;
diff --git a/transformer_engine/pytorch/module/grouped_linear.py b/transformer_engine/pytorch/module/grouped_linear.py
@@ -1696,6 +1696,15 @@ def forward(
                 f"does not match number of GEMMs ({num_gemms})."
             )
 
+        if FP8GlobalStateManager.fp8_graph_capturing():
+            skip_fp8_weight_update = (
+                FP8GlobalStateManager.quantization_state.skip_fp8_weight_update_tensor
+            )
+        else:
+            skip_fp8_weight_update = None
+        if skip_fp8_weight_update is not None:
+            is_first_microbatch = False
+
         # Preprocess input tensor
         if isinstance(inp, QuantizedTensorStorage):
             raise TypeError("GroupedLinear doesn't support input tensor in FP8.")
@@ -1754,7 +1763,7 @@ def forward(
                 is_grad_enabled,
                 weight_workspaces,
                 cache_weight,
-                None,  # skip_fp8_weight_update
+                skip_fp8_weight_update,
                 self.save_original_input,
                 debug,
             )