ROCm · vedithal-amd · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
@@ -7,6 +7,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
 
 ### Added
 
+* Added ``--pc-sampling-rows`` analyze option to cap the PC sampling table at the top N rows (default 10); set ``0`` to show all. Must be non-negative.
+
 * Added ``--bench-only`` profile mode option to run the roofline microbenchmark standalone (without profiling an application or collecting performance counters). No application run is required. Useful for regenerating ``roofline.csv`` in an existing workload directory or running the microbenchmark on systems where only HIP is available but rocprofiler-sdk is not.
 
 * Added ``--overwrite`` profile mode option to explicitly allow replacing existing workload output.
@@ -22,6 +24,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
 
 ### Changed
 
+* `--pc-sampling-sorting-type` now defaults to `count` (was `offset`), so the PC sampling table shows the most-sampled instructions first.
+
 * Renamed the `Pct of Peak` / `PoP` analysis column to `Percent of Peak` in analysis output.
 
 * Moved `--gui` and `--tui` analyze options to experimental status. These features now require the `--experimental` flag to be enabled (e.g., `rocprof-compute analyze --experimental --gui`).

@@ -40,7 +40,8 @@ Analysis options
 ================
 For using analysis options for PC sampling the configuration needed are:
 
-* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``offset``. ``offset`` is an assembly instruction offset in the code object.
+* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``count``, which surfaces the most-sampled instructions (hotspots) first. ``offset`` is an assembly instruction offset in the code object.
+* ``--pc-sampling-rows``: Maximum number of rows shown in the PC sampling table (DEFAULT: 10). Must be a non-negative integer; use ``0`` to show all rows.
 
 **Sample command:**
 

@@ -104,6 +104,18 @@ def block_token_or_alias(s: str) -> str:
         return s
 
 
+def non_negative_int(value: str) -> int:
+    try:
+        parsed = int(value)
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"expected an integer, got {value!r}")
+    if parsed < 0:
+        raise argparse.ArgumentTypeError(
+            f"must be a non-negative integer (0 means all), got {parsed}"
+        )
+    return parsed
+
+
 def print_avail_arch(avail_arch: list[str], args: str) -> str:
     ret_str = f"List all available {args} for analysis on specified arch:"
     for arch in avail_arch:
@@ -786,10 +798,21 @@ def omniarg_parser(
         required=False,
         metavar="",
         dest="pc_sampling_sorting_type",
-        default="offset",
+        default="count",
         type=str,
+        choices=["offset", "count"],
         help="\t\tSet the sorting type of pc sampling: "
-        "offset or count (DEFAULT: offset).",
+        "offset or count (DEFAULT: count).",
+    )
+    analyze_group.add_argument(
+        "--pc-sampling-rows",
+        required=False,
+        metavar="",
+        dest="pc_sampling_rows",
+        default=10,
+        type=non_negative_int,
+        help="\t\tSpecify the maximum number of rows shown in the PC "
+        "sampling table; use 0 to show all rows (DEFAULT: 10).",
     )
 
     ## Roofline Command Line Options (analyze: visualization)

@@ -404,6 +404,7 @@ def load_pc_sampling_data_per_kernel(
     tool_data: dict[str, Any],
     sorting_type: str,
     kernel_name: Optional[str] = None,
+    num_rows: Optional[int] = None,
 ) -> pd.DataFrame:
     """Build the detailed per-instruction PC sampling table from *tool_data*.
 
@@ -413,6 +414,8 @@ def load_pc_sampling_data_per_kernel(
     :param tool_data: The parsed ``rocprofiler-sdk-tool[0]`` dict.
     :param sorting_type: "offset" or "count".
     :param kernel_name: Kernel to filter to, or None for all kernels.
+    :param num_rows: Keep only the first *num_rows* rows after sorting; None or
+        0 keeps every row.
     """
     kernel_context = f"kernel '{kernel_name}'" if kernel_name else "all kernels"
     pc_samples = tool_data["buffer_records"][
@@ -471,6 +474,10 @@ def load_pc_sampling_data_per_kernel(
         )
         return pd.DataFrame()
 
+    # num_rows of 0 or None (or a negative passed programmatically) shows all.
+    if num_rows and num_rows > 0:
+        df_sorted = df_sorted.head(num_rows)
+
     df_sorted["offset"] = df_sorted["offset"].apply(hex)
 
     # Stochastic adds issue/stall detail on top of the host_trap columns.
@@ -489,6 +496,7 @@ def load_pc_sampling_data(
     file_prefix: str,
     sorting_type: str,
     tool_data: Optional[dict[str, Any]],
+    num_rows: Optional[int] = None,
 ) -> pd.DataFrame:
     """Return the detailed per-instruction table for a single kernel or all.
 
@@ -513,6 +521,7 @@ def load_pc_sampling_data(
             pc_sampling_method,
             tool_data,
             sorting_type,
+            num_rows=num_rows,
         )
 
     if len(workload.filter_kernel_ids) > 1:
@@ -539,6 +548,7 @@ def load_pc_sampling_data(
         tool_data,
         sorting_type,
         kernel_name,
+        num_rows=num_rows,
     )
 
 
@@ -630,6 +640,7 @@ def load_non_mertrics_table(
                 df.loc[0, "from_pc_sampling"],
                 args.pc_sampling_sorting_type,
                 pc_sampling_tool_data,
+                num_rows=args.pc_sampling_rows,
             )
 
     workload.dfs.update(tmp)

@@ -7,6 +7,7 @@
 
 import argparse
 from pathlib import Path
+from unittest.mock import patch
 
 import pytest
 from common import SUPPORTED_ARCHS
@@ -113,3 +114,31 @@ def test_config_dir_requires_value(capsys):
         build_args(["--config-dir"])
     assert exc.value.code == 2
     assert "--config-dir" in capsys.readouterr().err
+
+
+def test_pc_sampling_analyze_options():
+    """Defaults, overrides, and validation for the analyze PC sampling options."""
+    defaults = build_args(["analyze"])
+    assert defaults.pc_sampling_sorting_type == "count"
+    assert defaults.pc_sampling_rows == 10
+
+    overrides = build_args([
+        "analyze",
+        "--pc-sampling-sorting-type",
+        "offset",
+        "--pc-sampling-rows",
+        "25",
+    ])
+    assert overrides.pc_sampling_sorting_type == "offset"
+    assert overrides.pc_sampling_rows == 25
+
+    # 0 is allowed and means "show all rows".
+    assert build_args(["analyze", "--pc-sampling-rows", "0"]).pc_sampling_rows == 0
+
+    # Negative row counts trigger an argparse error.
+    with patch.object(
+        argparse.ArgumentParser, "error", side_effect=SystemExit(2)
+    ) as mock_error:
+        with pytest.raises(SystemExit):
+            build_args(["analyze", "--pc-sampling-rows", "-1"])
+    mock_error.assert_called_once()
@@ -614,6 +614,22 @@ def test_load_per_kernel_offset_sort_is_numeric() -> None:
     assert df["offset"].tolist() == ["0x20", "0x100"]
 
 
+@pytest.mark.parametrize("num_rows, expected_rows", [(1, 1), (0, 2), (None, 2)])
+def test_load_per_kernel_num_rows_limit(
+    num_rows: int | None,
+    expected_rows: int,
+) -> None:
+    """num_rows caps the table after sorting; 0 or None keeps every row."""
+    df = load_pc_sampling_data_per_kernel(
+        method="host_trap",
+        tool_data=setup_per_kernel_data(),
+        kernel_name="vecCopy",
+        sorting_type="count",
+        num_rows=num_rows,
+    )
+    assert len(df) == expected_rows
+
+
 def make_per_kernel_guard_data(
     instructions: list | None,
     comments: list | None,
@@ -1240,7 +1256,7 @@ def test_load_non_mertrics_table_populates_pc_sampling_from_tool_data(
     tmp_path: Path,
 ) -> None:
     """A ``from_pc_sampling`` table is populated when tool data is provided."""
-    args = argparse.Namespace(pc_sampling_sorting_type="count")
+    args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
     workload = schema.Workload()
     workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
     tool_data = make_tool_data(**sample_tool_data_kwargs())
@@ -1254,7 +1270,7 @@ def test_load_non_mertrics_table_pc_sampling_empty_without_tool_data(
     tmp_path: Path,
 ) -> None:
     """Without tool data the ``from_pc_sampling`` table stays empty (no crash)."""
-    args = argparse.Namespace(pc_sampling_sorting_type="count")
+    args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
     workload = schema.Workload()
     workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
     load_non_mertrics_table(workload, str(tmp_path), args)