Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions projects/rocprofiler-compute/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.

### Added

* Added ``--pc-sampling-rows`` analyze option to cap the PC sampling table at the top N rows (default 10); set ``0`` to show all. Must be non-negative.

* Added ``--bench-only`` profile mode option to run the roofline microbenchmark standalone (without profiling an application or collecting performance counters). No application run is required. Useful for regenerating ``roofline.csv`` in an existing workload directory or running the microbenchmark on systems where only HIP is available but rocprofiler-sdk is not.

* Added ``--overwrite`` profile mode option to explicitly allow replacing existing workload output.
Expand All @@ -22,6 +24,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.

### Changed

* `--pc-sampling-sorting-type` now defaults to `count` (was `offset`), so the PC sampling table shows the most-sampled instructions first.

* Renamed the `Pct of Peak` / `PoP` analysis column to `Percent of Peak` in analysis output.

* Moved `--gui` and `--tui` analyze options to experimental status. These features now require the `--experimental` flag to be enabled (e.g., `rocprof-compute analyze --experimental --gui`).
Expand Down
3 changes: 2 additions & 1 deletion projects/rocprofiler-compute/docs/how-to/pc_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ Analysis options
================
For using analysis options for PC sampling the configuration needed are:

* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``offset``. ``offset`` is an assembly instruction offset in the code object.
* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``count``, which surfaces the most-sampled instructions (hotspots) first. ``offset`` is an assembly instruction offset in the code object.
* ``--pc-sampling-rows``: Maximum number of rows shown in the PC sampling table (DEFAULT: 10). Must be a non-negative integer; use ``0`` to show all rows.

**Sample command:**

Expand Down
27 changes: 25 additions & 2 deletions projects/rocprofiler-compute/src/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ def block_token_or_alias(s: str) -> str:
return s


def non_negative_int(value: str) -> int:
try:
parsed = int(value)
except ValueError:
raise argparse.ArgumentTypeError(f"expected an integer, got {value!r}")
if parsed < 0:
raise argparse.ArgumentTypeError(
f"must be a non-negative integer (0 means all), got {parsed}"
)
Comment thread
vedithal-amd marked this conversation as resolved.
return parsed


def print_avail_arch(avail_arch: list[str], args: str) -> str:
ret_str = f"List all available {args} for analysis on specified arch:"
for arch in avail_arch:
Expand Down Expand Up @@ -786,10 +798,21 @@ def omniarg_parser(
required=False,
metavar="",
dest="pc_sampling_sorting_type",
default="offset",
default="count",
type=str,
choices=["offset", "count"],
help="\t\tSet the sorting type of pc sampling: "
"offset or count (DEFAULT: offset).",
"offset or count (DEFAULT: count).",
)
Comment thread
vedithal-amd marked this conversation as resolved.
analyze_group.add_argument(
"--pc-sampling-rows",
required=False,
metavar="",
dest="pc_sampling_rows",
default=10,
type=non_negative_int,
help="\t\tSpecify the maximum number of rows shown in the PC "
"sampling table; use 0 to show all rows (DEFAULT: 10).",
)

## Roofline Command Line Options (analyze: visualization)
Expand Down
11 changes: 11 additions & 0 deletions projects/rocprofiler-compute/src/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def load_pc_sampling_data_per_kernel(
tool_data: dict[str, Any],
sorting_type: str,
kernel_name: Optional[str] = None,
num_rows: Optional[int] = None,
) -> pd.DataFrame:
"""Build the detailed per-instruction PC sampling table from *tool_data*.

Expand All @@ -413,6 +414,8 @@ def load_pc_sampling_data_per_kernel(
:param tool_data: The parsed ``rocprofiler-sdk-tool[0]`` dict.
:param sorting_type: "offset" or "count".
:param kernel_name: Kernel to filter to, or None for all kernels.
:param num_rows: Keep only the first *num_rows* rows after sorting; None or
0 keeps every row.
"""
kernel_context = f"kernel '{kernel_name}'" if kernel_name else "all kernels"
pc_samples = tool_data["buffer_records"][
Expand Down Expand Up @@ -471,6 +474,10 @@ def load_pc_sampling_data_per_kernel(
)
return pd.DataFrame()

# num_rows of 0 or None (or a negative passed programmatically) shows all.
if num_rows and num_rows > 0:
df_sorted = df_sorted.head(num_rows)

df_sorted["offset"] = df_sorted["offset"].apply(hex)

# Stochastic adds issue/stall detail on top of the host_trap columns.
Expand All @@ -489,6 +496,7 @@ def load_pc_sampling_data(
file_prefix: str,
sorting_type: str,
tool_data: Optional[dict[str, Any]],
num_rows: Optional[int] = None,
) -> pd.DataFrame:
"""Return the detailed per-instruction table for a single kernel or all.

Expand All @@ -513,6 +521,7 @@ def load_pc_sampling_data(
pc_sampling_method,
tool_data,
sorting_type,
num_rows=num_rows,
)

if len(workload.filter_kernel_ids) > 1:
Expand All @@ -539,6 +548,7 @@ def load_pc_sampling_data(
tool_data,
sorting_type,
kernel_name,
num_rows=num_rows,
)


Expand Down Expand Up @@ -630,6 +640,7 @@ def load_non_mertrics_table(
df.loc[0, "from_pc_sampling"],
args.pc_sampling_sorting_type,
pc_sampling_tool_data,
num_rows=args.pc_sampling_rows,
)

workload.dfs.update(tmp)
Expand Down
29 changes: 29 additions & 0 deletions projects/rocprofiler-compute/tests/test_argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import argparse
from pathlib import Path
from unittest.mock import patch

import pytest
from common import SUPPORTED_ARCHS
Expand Down Expand Up @@ -113,3 +114,31 @@ def test_config_dir_requires_value(capsys):
build_args(["--config-dir"])
assert exc.value.code == 2
assert "--config-dir" in capsys.readouterr().err


def test_pc_sampling_analyze_options():
"""Defaults, overrides, and validation for the analyze PC sampling options."""
defaults = build_args(["analyze"])
assert defaults.pc_sampling_sorting_type == "count"
assert defaults.pc_sampling_rows == 10

overrides = build_args([
"analyze",
"--pc-sampling-sorting-type",
"offset",
"--pc-sampling-rows",
"25",
])
assert overrides.pc_sampling_sorting_type == "offset"
assert overrides.pc_sampling_rows == 25

# 0 is allowed and means "show all rows".
assert build_args(["analyze", "--pc-sampling-rows", "0"]).pc_sampling_rows == 0

# Negative row counts trigger an argparse error.
with patch.object(
argparse.ArgumentParser, "error", side_effect=SystemExit(2)
) as mock_error:
with pytest.raises(SystemExit):
build_args(["analyze", "--pc-sampling-rows", "-1"])
mock_error.assert_called_once()
20 changes: 18 additions & 2 deletions projects/rocprofiler-compute/tests/test_pc_sampling_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,22 @@ def test_load_per_kernel_offset_sort_is_numeric() -> None:
assert df["offset"].tolist() == ["0x20", "0x100"]


@pytest.mark.parametrize("num_rows, expected_rows", [(1, 1), (0, 2), (None, 2)])
def test_load_per_kernel_num_rows_limit(
num_rows: int | None,
expected_rows: int,
) -> None:
"""num_rows caps the table after sorting; 0 or None keeps every row."""
df = load_pc_sampling_data_per_kernel(
method="host_trap",
tool_data=setup_per_kernel_data(),
kernel_name="vecCopy",
sorting_type="count",
num_rows=num_rows,
)
assert len(df) == expected_rows


def make_per_kernel_guard_data(
instructions: list | None,
comments: list | None,
Expand Down Expand Up @@ -1240,7 +1256,7 @@ def test_load_non_mertrics_table_populates_pc_sampling_from_tool_data(
tmp_path: Path,
) -> None:
"""A ``from_pc_sampling`` table is populated when tool data is provided."""
args = argparse.Namespace(pc_sampling_sorting_type="count")
args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
workload = schema.Workload()
workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
tool_data = make_tool_data(**sample_tool_data_kwargs())
Expand All @@ -1254,7 +1270,7 @@ def test_load_non_mertrics_table_pc_sampling_empty_without_tool_data(
tmp_path: Path,
) -> None:
"""Without tool data the ``from_pc_sampling`` table stays empty (no crash)."""
args = argparse.Namespace(pc_sampling_sorting_type="count")
args = argparse.Namespace(pc_sampling_sorting_type="count", pc_sampling_rows=10)
workload = schema.Workload()
workload.dfs = {2101: pd.DataFrame({"from_pc_sampling": ["ps_file"]})}
load_non_mertrics_table(workload, str(tmp_path), args)
Expand Down
Loading