ROCm · ipanfilo · May 30, 2026 · May 26, 2026 · May 26, 2026 · May 27, 2026
@@ -4,7 +4,6 @@
 #
 # See LICENSE for license information.
 import os
-import subprocess
 from pathlib import Path
 
 import pytest
@@ -15,6 +14,8 @@
 from torch.utils.cpp_extension import IS_HIP_EXTENSION
 from transformer_engine.pytorch.utils import get_device_compute_capability
 
+from utils import run_proctree_with_timeout as run_subprocess
+
 
 if torch.cuda.device_count() < 2:
     pytest.skip("Comm+GEMM overlap requires at least 2 GPUs.")
@@ -88,7 +89,8 @@ def _run_gemm_with_overlap(comm_type, bulk, p2p, atomic, aggregate, quantization
         if aggregate:
             test_cmd.append("--aggregate")
 
-    result = subprocess.run(test_cmd, env=os.environ, capture_output=True, check=False)
+    result = run_subprocess(test_cmd, 120 if IS_HIP_EXTENSION else None,
+                            env=os.environ, capture_output=True, check=False)
     if (
         result.returncode != 0
         or "NUMERICAL CHECK FAILED" in result.stderr.decode()
@@ -143,7 +145,8 @@ def _run_layer_with_overlap(
         # not show up in more recent GPUs.
         os.environ["NVTE_FLASH_ATTN"] = "0"
 
-    result = subprocess.run(test_cmd, env=os.environ, capture_output=True, check=False)
+    result = run_subprocess(test_cmd, 120 if IS_HIP_EXTENSION else None,
+                            env=os.environ, capture_output=True, check=False)
 
     os.unsetenv("PYTORCH_JIT")
     os.unsetenv("NVTE_TORCH_COMPILE")

@@ -1,14 +1,18 @@
+# This file was modified for portability to AMDGPU
+# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved.
 # Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # See LICENSE for license information.
 
 import os
 import pytest
-import subprocess
 from pathlib import Path
 import transformer_engine.pytorch as te
 
 import torch
+from torch.utils.cpp_extension import IS_HIP_EXTENSION
+
+from utils import run_proctree_with_timeout as run_subprocess
 
 
 fp8_available, reason_for_no_fp8 = te.is_fp8_available(return_reason=True)
@@ -32,7 +36,8 @@ def _run_test(fp_init, sharding_dims, recipe, layer_type):
     test_cmd += ["--recipe", recipe]
     test_cmd += ["--layer-type", layer_type]
 
-    result = subprocess.run(test_cmd, env=os.environ, check=True)
+    result = run_subprocess(test_cmd, 120 if IS_HIP_EXTENSION else None, env=os.environ,
+                            check=True)
 
 
 @pytest.mark.skipif(NUM_PROCS < 4, reason="Requires 4+ GPUs")

@@ -5,12 +5,12 @@
 import os
 from typing import List
 import pytest
-import subprocess
 from pathlib import Path
 from transformer_engine.pytorch import torch_version
 from transformer_engine.pytorch.quantization import FP8GlobalStateManager
 import torch
 from run_fsdp2_fp8_model import SimpleNet
+from utils import run_proctree_with_timeout as run_subprocess
 
 fp8_available, reason_for_no_fp8 = FP8GlobalStateManager.is_fp8_available()
 mxfp8_available, reason_for_no_mxfp8 = FP8GlobalStateManager.is_mxfp8_available()
@@ -49,8 +49,11 @@ def _run_test(fp_init, recipe):
         test_cmd += ["--fp8-init"]
     test_cmd += ["--recipe", recipe]
 
-    subprocess.run(test_cmd + ['--use-fsdp2','--gradients-save-file', 'all_iters_fsdp2.pt'], env=os.environ, check=True)
-    subprocess.run(test_cmd + ['--gradients-save-file', 'all_iters_dp.pt'], env=os.environ, check=True)
+    timeout = 120
+    run_subprocess(test_cmd + ['--use-fsdp2','--gradients-save-file', 'all_iters_fsdp2.pt'],
+                   timeout, env=os.environ, check=True)
+    run_subprocess(test_cmd + ['--gradients-save-file', 'all_iters_dp.pt'], timeout,
+                   env=os.environ, check=True)
 
     # Load outputs
     output_fsdp = torch.load("all_iters_fsdp2.pt", map_location="cpu")

diff --git a/tests/pytorch/distributed/utils.py b/tests/pytorch/distributed/utils.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved.
+#
+# See LICENSE for license information.
+
+import os, signal, subprocess
+
+
+def run_proctree_with_timeout(cmd, timeout, **kwargs):
+    """Run a command in a subprocess and check for errors."""
+
+    if timeout is None:
+        return subprocess.run(cmd, **kwargs)
+
+    if "timeout" in kwargs:
+        raise ValueError("Timeout should be passed as a separate argument, not in kwargs")
+
+    capture_output = kwargs.pop("capture_output", False)
+    if capture_output:
+        kwargs["stdout"] = subprocess.PIPE
+        kwargs["stderr"] = subprocess.PIPE
+    else:
+        stdout, stderr = None, None
+
+    check = kwargs.pop("check", False)
+
+    kwargs["start_new_session"] = True  # To use killpg as termination fallback
+    p = subprocess.Popen(cmd, **kwargs)
+    try:
+        if capture_output:
+            stdout, stderr = p.communicate(timeout=timeout)
+        else:
+            p.wait(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        p.terminate()
+        try:
+            # Give the process time to terminate gracefully
+            if capture_output:
+                stdout, stderr = p.communicate(timeout=timeout)
+            else:
+                p.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            os.killpg(p.pid, signal.SIGKILL)
+            if capture_output:
+                stdout, stderr = p.communicate()
+
+    # Handle check=True
+    if check and p.returncode != 0:
+        raise subprocess.CalledProcessError(
+            cmd,
+            kwargs.get("args", None),
+            output=stdout,
+            stderr=stderr
+        )
-        raise subprocess.CalledProcessError(
-            cmd,
-            kwargs.get("args", None),
-            output=stdout,
-            stderr=stderr
-        )
+    # Handle check=True
+    if check and p.returncode != 0:
+        raise subprocess.CalledProcessError(
+            p.returncode,
+            cmd,
+            output=stdout,
+            stderr=stderr,
+        )
-        raise subprocess.CalledProcessError(
-            cmd,
-            kwargs.get("args", None),
-            output=stdout,
-            stderr=stderr
-        )
+    # Handle check=True
+    if check and p.returncode != 0:
+        raise subprocess.CalledProcessError(
+            p.returncode,
+            cmd,
+            output=stdout,
+            stderr=stderr,
+        )
+
+    return subprocess.CompletedProcess(
+        cmd,
+        p.returncode,
+        stdout,
+        stderr
+    )