Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions projects/rocprofiler-compute/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.

### Added

* Added ``--pc-sampling-rows`` analyze option to cap the PC sampling table at the top N rows (default 10); set ``0`` to show all. Must be non-negative.

* Added ``--bench-only`` profile mode option to run the roofline microbenchmark standalone (without profiling an application or collecting performance counters). No application run is required. Useful for regenerating ``roofline.csv`` in an existing workload directory or running the microbenchmark on systems where only HIP is available but rocprofiler-sdk is not.

* Added ``--overwrite`` profile mode option to explicitly allow replacing existing workload output.
Expand All @@ -22,6 +24,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.

### Changed

* `--pc-sampling-sorting-type` now defaults to `count` (was `offset`), so the PC sampling table shows the most-sampled instructions first.

* Renamed the `Pct of Peak` / `PoP` analysis column to `Percent of Peak` in analysis output.

* Moved `--gui` and `--tui` analyze options to experimental status. These features now require the `--experimental` flag to be enabled (e.g., `rocprof-compute analyze --experimental --gui`).
Expand Down
3 changes: 2 additions & 1 deletion projects/rocprofiler-compute/docs/how-to/pc_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ Analysis options
================
For using analysis options for PC sampling the configuration needed are:

* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``offset``. ``offset`` is an assembly instruction offset in the code object.
* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``count``, which surfaces the most-sampled instructions (hotspots) first. ``offset`` is an assembly instruction offset in the code object.
* ``--pc-sampling-rows``: Maximum number of rows shown in the PC sampling table (DEFAULT: 10). Must be a non-negative integer; use ``0`` to show all rows.

**Sample command:**

Expand Down
26 changes: 24 additions & 2 deletions projects/rocprofiler-compute/src/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ def block_token_or_alias(s: str) -> str:
return s


def non_negative_int(value: str) -> int:
try:
parsed = int(value)
except ValueError:
raise argparse.ArgumentTypeError(f"expected an integer, got {value!r}")
if parsed < 0:
raise argparse.ArgumentTypeError(
f"must be a non-negative integer (0 means all), got {parsed}"
)
Comment thread
vedithal-amd marked this conversation as resolved.
return parsed


def print_avail_arch(avail_arch: list[str], args: str) -> str:
ret_str = f"List all available {args} for analysis on specified arch:"
for arch in avail_arch:
Expand Down Expand Up @@ -786,10 +798,20 @@ def omniarg_parser(
required=False,
metavar="",
dest="pc_sampling_sorting_type",
default="offset",
default="count",
type=str,
help="\t\tSet the sorting type of pc sampling: "
"offset or count (DEFAULT: offset).",
"offset or count (DEFAULT: count).",
)
Comment thread
vedithal-amd marked this conversation as resolved.
analyze_group.add_argument(
"--pc-sampling-rows",
required=False,
metavar="",
dest="pc_sampling_rows",
default=10,
type=non_negative_int,
help="\t\tSpecify the maximum number of rows shown in the PC "
"sampling table; use 0 to show all rows (DEFAULT: 10).",
)

## Roofline Command Line Options (analyze: visualization)
Expand Down
11 changes: 11 additions & 0 deletions projects/rocprofiler-compute/src/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def load_pc_sampling_data_per_kernel(
tool_data: dict[str, Any],
sorting_type: str,
kernel_name: Optional[str] = None,
num_rows: Optional[int] = None,
) -> pd.DataFrame:
"""Build the detailed per-instruction PC sampling table from *tool_data*.

Expand All @@ -413,6 +414,8 @@ def load_pc_sampling_data_per_kernel(
:param tool_data: The parsed ``rocprofiler-sdk-tool[0]`` dict.
:param sorting_type: "offset" or "count".
:param kernel_name: Kernel to filter to, or None for all kernels.
:param num_rows: Keep only the first *num_rows* rows after sorting; None or
0 keeps every row.
"""
kernel_context = f"kernel '{kernel_name}'" if kernel_name else "all kernels"
pc_samples = tool_data["buffer_records"][
Expand Down Expand Up @@ -471,6 +474,10 @@ def load_pc_sampling_data_per_kernel(
)
return pd.DataFrame()

# num_rows of 0 (or None) means show all rows; argparse rejects negatives.
if num_rows:
df_sorted = df_sorted.head(num_rows)
Comment thread
vedithal-amd marked this conversation as resolved.
Outdated

df_sorted["offset"] = df_sorted["offset"].apply(hex)

# Stochastic adds issue/stall detail on top of the host_trap columns.
Expand All @@ -489,6 +496,7 @@ def load_pc_sampling_data(
file_prefix: str,
sorting_type: str,
tool_data: Optional[dict[str, Any]],
num_rows: Optional[int] = None,
) -> pd.DataFrame:
"""Return the detailed per-instruction table for a single kernel or all.

Expand All @@ -513,6 +521,7 @@ def load_pc_sampling_data(
pc_sampling_method,
tool_data,
sorting_type,
num_rows=num_rows,
)

if len(workload.filter_kernel_ids) > 1:
Expand All @@ -539,6 +548,7 @@ def load_pc_sampling_data(
tool_data,
sorting_type,
kernel_name,
num_rows=num_rows,
)


Expand Down Expand Up @@ -630,6 +640,7 @@ def load_non_mertrics_table(
df.loc[0, "from_pc_sampling"],
args.pc_sampling_sorting_type,
pc_sampling_tool_data,
num_rows=args.pc_sampling_rows,
)

workload.dfs.update(tmp)
Expand Down
29 changes: 29 additions & 0 deletions projects/rocprofiler-compute/tests/test_argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import argparse
from pathlib import Path
from unittest.mock import patch

import pytest
from common import SUPPORTED_ARCHS
Expand Down Expand Up @@ -113,3 +114,31 @@ def test_config_dir_requires_value(capsys):
build_args(["--config-dir"])
assert exc.value.code == 2
assert "--config-dir" in capsys.readouterr().err


def test_pc_sampling_analyze_options():
"""Defaults, overrides, and validation for the analyze PC sampling options."""
defaults = build_args(["analyze"])
assert defaults.pc_sampling_sorting_type == "count"
assert defaults.pc_sampling_rows == 10

overrides = build_args([
"analyze",
"--pc-sampling-sorting-type",
"offset",
"--pc-sampling-rows",
"25",
])
assert overrides.pc_sampling_sorting_type == "offset"
assert overrides.pc_sampling_rows == 25

# 0 is allowed and means "show all rows".
assert build_args(["analyze", "--pc-sampling-rows", "0"]).pc_sampling_rows == 0

# Negative row counts trigger an argparse error.
with patch.object(
argparse.ArgumentParser, "error", side_effect=SystemExit(2)
) as mock_error:
with pytest.raises(SystemExit):
build_args(["analyze", "--pc-sampling-rows", "-1"])
mock_error.assert_called_once()
20 changes: 18 additions & 2 deletions projects/rocprofiler-compute/tests/test_pc_sampling_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,22 @@ def test_load_per_kernel_offset_sort_is_numeric() -> None:
assert df["offset"].tolist() == ["0x20", "0x100"]


@pytest.mark.parametrize("num_rows, expected_rows", [(1, 1), (0, 2), (None, 2)])
def test_load_per_kernel_num_rows_limit(
num_rows: int | None,
expected_rows: int,
) -> None:
"""num_rows caps the table after sorting; 0 or None keeps every row."""
df = load_pc_sampling_data_per_kernel(
method="host_trap",
tool_data=setup_per_kernel_data(),
kernel_name="vecCopy",
sorting_type="count",
num_rows=num_rows,
)
assert len(df) == expected_rows


def make_per_kernel_guard_data(
instructions: list | None,
comments: list | None,
Expand Down Expand Up @@ -1240,7 +1256,7 @@ def test_load_non_mertrics_table_populates_pc_sampling_from_tool_data(
tmp_path: Path,
) -> None:
"""A ``from_pc_sampling`` table is populated when tool data is provided."""
args = argparse.Namespace(pc_sampling_sorting_type="count")
args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
workload = schema.Workload()
workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
tool_data = make_tool_data(**sample_tool_data_kwargs())
Expand All @@ -1254,7 +1270,7 @@ def test_load_non_mertrics_table_pc_sampling_empty_without_tool_data(
tmp_path: Path,
) -> None:
"""Without tool data the ``from_pc_sampling`` table stays empty (no crash)."""
args = argparse.Namespace(pc_sampling_sorting_type="count")
args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
workload = schema.Workload()
workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
load_non_mertrics_table(workload, str(tmp_path), args)
Expand Down
Loading