diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 88dc4220349..1686cae4fe1 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -59,6 +59,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. ### Removed +* Removed the multi-node analysis options ``--nodes``, ``--list-nodes`` (analyze mode) and the experimental ``--spatial-multiplexing`` option (profile and analyze modes). These features did not work as expected and will be redesigned in a future release. + * ``--path`` and ``--subpath`` options have been removed from profile mode. Use ``--output-directory`` instead. * Removed redundant `if (X != 0) else None` divide-by-zero guards from metric equations across all analysis YAML configurations. Division by zero is already handled by the metric evaluation engine, which returns `"N/A"` for `inf` and `NaN` results. diff --git a/projects/rocprofiler-compute/PYTHON_CODING_STYLE.md b/projects/rocprofiler-compute/PYTHON_CODING_STYLE.md index c2fa1ad5edf..79f9b30dcde 100644 --- a/projects/rocprofiler-compute/PYTHON_CODING_STYLE.md +++ b/projects/rocprofiler-compute/PYTHON_CODING_STYLE.md @@ -378,17 +378,18 @@ Each function should do **ONE** thing well. If you use "and" to describe what it ```python def create_df_pmc( - raw_data_root_dir: str, - nodes: Optional[list[str]], - spatial_multiplexing: bool, + raw_data_dir: str, kernel_verbose: int, verbose: int, config_dict: dict[str, Any], ) -> pd.DataFrame: """Load all raw pmc counters and join into one dataframe.""" - # Single responsibility: create and return a DataFrame. - # Delegates the details to a focused helper. - return _create_single_df_pmc(...) + # Single responsibility: load counters into a DataFrame and return it. + df = pd.read_csv(Path(raw_data_dir) / "pmc_perf.csv") + if config_dict.get("format_rocprof_output") == "rocpd": + df = utils_analysis.process_rocpd_csv(df) + kernel_name_shortener(df, kernel_verbose) + return df ``` **Bad:** Multiple responsibilities @@ -427,9 +428,11 @@ def pre_processing(self) -> None: # Each operation delegated to a focused helper workload.raw_pmc = file_io.create_df_pmc(...) - if args.spatial_multiplexing: - workload.raw_pmc = self.spatial_multiplex_merge_counters( - workload.raw_pmc + if self._profiling_config.get("iteration_multiplexing") is not None: + workload.raw_pmc = self.iteration_multiplex_impute_counters( + workload.raw_pmc, + policy=self._profiling_config["iteration_multiplexing"], + workload_dir=Path(path_info[0]), ) file_io.create_df_kernel_top_stats(...) @@ -452,9 +455,9 @@ def pre_processing(self) -> None: for csv_file in Path(path_info[0]).rglob("*.csv"): # ... lots of processing - # 30 lines of inline merge logic - if args.spatial_multiplexing: - # ... complex merging + # 30 lines of inline imputation logic + if self._profiling_config.get("iteration_multiplexing"): + # ... complex counter imputation # 40 lines of inline stats creation # ... more processing diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 6fcdf78fc99..3045a7b0664 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -170,7 +170,6 @@ def add_general_group( "Enable experimental feature(s):\n" " GUI (--gui)\n" " TUI (--tui)\n" - " Spatial multiplexing (--spatial-multiplexing)\n" " Torch trace (--torch-trace, --list-torch-operators, --torch-operator)\n" " PC Sampling (--pc-sampling, --pc-sampling-method, " "--pc-sampling-interval)\n" @@ -541,21 +540,6 @@ def omniarg_parser( # Experimental Features ## ---------------------------- - profile_group.add_argument( - "--spatial-multiplexing", - dest="spatial_multiplexing", - required=False, - default=None, - base_action="store", - action=ExperimentalAction, - experimental_enabled=experimental_enabled, - feature_label="Spatial multiplexing", - type=int, - nargs="*", - metavar="", - help="\t\t\tProvide Node ID and GPU number per node.", - ) - profile_group.add_argument( "--membw-analysis", dest="membw_analysis", @@ -972,40 +956,10 @@ def omniarg_parser( help="\t\tSpecify the specs to correct. e.g. " '--specs-correction="specname1:specvalue1,specname2:specvalue2"', ) - analyze_advanced_group.add_argument( - "--list-nodes", - action="store_true", - help="\t\tMulti-node option: list all node names.", - ) - analyze_advanced_group.add_argument( - "--nodes", - metavar="", - type=str, - dest="nodes", - nargs="*", - help=( - "\t\tMulti-node option: filter with node names. " - "Enable it without node names means ALL." - ), - ) ## ---------------------------- # Experimental Features ## ---------------------------- - analyze_group.add_argument( - "--spatial-multiplexing", - dest="spatial_multiplexing", - required=False, - default=False, - base_action="store_const", - action=ExperimentalAction, - experimental_enabled=experimental_enabled, - feature_label="Spatial multiplexing", - nargs=0, - const=True, - help="\t\tMode of spatial multiplexing.", - ) - analyze_group.add_argument( "--membw-analysis", dest="membw_analysis", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index 6d46686b720..17ee046b480 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -27,7 +27,6 @@ from utils.utils_analysis import ( impute_counters_iteration_multiplex, is_workload_empty, - merge_counters_spatial_multiplex, ) from utils.utils_common import ( PC_SAMPLING_BLOCK_IDS, @@ -116,7 +115,6 @@ def build_pc_sampling_only_workload( dir_path: str, args: argparse.Namespace, tool_data: Optional[dict[str, Any]], - filter_nodes: Optional[list[str]] = None, ) -> None: """Build dispatch scaffolding and tables for a run without counters.""" workload.raw_pmc = file_io.process_pc_sampling_kernel_trace(tool_data) @@ -128,9 +126,6 @@ def build_pc_sampling_only_workload( raw_data_dir=str(dir_path), filter_gpu_ids=workload.filter_gpu_ids, filter_dispatch_ids=workload.filter_dispatch_ids, - filter_nodes=( - filter_nodes if filter_nodes is not None else workload.filter_nodes - ), time_unit=args.time_unit, kernel_verbose=args.kernel_verbose, ) @@ -147,10 +142,6 @@ def set_soc(self, omni_socs: dict[str, OmniSoC_Base]) -> None: def get_socs(self) -> Optional[dict[str, OmniSoC_Base]]: return self.__socs - @demarcate - def spatial_multiplex_merge_counters(self, df: pd.DataFrame) -> pd.DataFrame: - return merge_counters_spatial_multiplex(df) - @demarcate def iteration_multiplex_impute_counters( self, df: pd.DataFrame, policy: str, workload_dir: Path @@ -230,16 +221,9 @@ def initalize_runs( ) -> OrderedDict[str, schema.Workload]: args = self.get_args() - def get_sysinfo_path(data_path: str) -> Optional[str]: - return ( - data_path - if args.nodes is None and not args.spatial_multiplexing - else file_io.find_1st_sub_dir(data_path) - ) - # load required configs for path_info in args.path: - sysinfo_path = get_sysinfo_path(path_info[0]) + sysinfo_path = path_info[0] if sysinfo_path: sys_info = pd.read_csv(f"{sysinfo_path}/sysinfo.csv") arch = sys_info.iloc[0]["gpu_arch"] @@ -255,12 +239,8 @@ def get_sysinfo_path(data_path: str) -> Optional[str]: self.load_options(normalization_filter) for path_info in args.path: - # FIXME: - # For regular single node case, load sysinfo.csv directly - # For multi-node, either the default "all", or specified some, - # pick up the one in the 1st sub_dir. We could fix it properly later. w = schema.Workload() - sysinfo_path = get_sysinfo_path(path_info[0]) + sysinfo_path = path_info[0] if sysinfo_path: w.sys_info = pd.read_csv(f"{sysinfo_path}/sysinfo.csv") if not getattr(args, "no_roof", False): @@ -353,66 +333,11 @@ def sanitize(self) -> None: for dir_info in args.path: if not any([ - args.nodes, - args.list_nodes, - args.spatial_multiplexing, profiling_config.get("iteration_multiplexing"), self.pc_sampling_only(), ]): is_workload_empty(dir_info[0]) - # FIXME: - # The proper location of this func should be in pre_processing(). - # However, because of reading soc depends on sys spec, and sys - # spec depends on sys_info. And we read sys_info too early so we - # . can not do it now. There should be a way to make it simpler. - if args.list_nodes: - # NB: - # There are 2 ways to do it: one is doing like the below, checking - # sub dirs only as we assume the profiling stage generate sub dirs - # with node name. The 2nd way would be checkign host name in each - # sub dir and very those. - nodes = [ - subdir.name - for subdir in Path(args.path[0][0]).iterdir() - if subdir.is_dir() - ] - print("Node list:", " ".join(nodes)) - sys.exit(0) - - # Validate --nodes option against workload structure - if args.nodes is not None: - for dir_info in args.path: - workload_path = dir_info[0] - valid_nodes = file_io.get_valid_nodes(workload_path) - - if not valid_nodes: - # Single-node workload: sysinfo.csv is in root, not in - # subdirectories - console_error( - "analysis", - f"The workload at '{workload_path}' is single-node " - "(sysinfo.csv is in the root directory).\n" - "The --nodes option is only supported for multi-node " - "workloads where each node subdirectory contains its " - "own sysinfo.csv.\n" - "Remove the --nodes option to analyze this " - "single-node workload.", - ) - - # If specific nodes are provided (not empty list), validate them - if args.nodes: - invalid_nodes = [n for n in args.nodes if n not in valid_nodes] - if invalid_nodes: - console_error( - "analysis", - f"Invalid node(s): {', '.join(invalid_nodes)}\n" - f"Valid nodes for '{workload_path}': " - f"{', '.join(valid_nodes)}\n" - "Each valid node must be a subdirectory " - "containing sysinfo.csv.", - ) - # Ensure analysis output does not overwrite existing files if args.output_name: if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name): @@ -681,47 +606,26 @@ def join_prof( return None def join_workload_csvs(self, workload_dir: Path) -> None: - """Join CSV files for a workload directory. - - Handles multi-node and spatial multiplexing. - - This method checks if the workload uses multi-node or spatial multiplexing, - and joins CSV files accordingly: - - Multi-node/spatial: Joins CSV files in each subdirectory (0/, 1/, 2/, etc.) - - Regular single-node: Joins CSV files in the workload directory directly + """Join results_*.csv source files into pmc_perf.csv if needed. Args: workload_dir: Path to the workload directory """ - args = self.get_args() - - # Helper to process and join CSV files in a single directory - def process_and_join_directory(directory: Path) -> None: - pmc_perf = directory / "pmc_perf.csv" - results_files = list(directory.glob("results_*.csv")) - - if pmc_perf.exists(): - console_debug(f"Using existing {pmc_perf}") - elif results_files: - console_log(f"Joining results_*.csv for {directory}...") - self.join_prof(directory, out=str(pmc_perf)) - console_log(f"Created {pmc_perf}") - else: - console_error( - f"No profiling data found in {directory}.\n" - f"Expected: pmc_perf.csv or results_*.csv\n" - f"Please run 'rocprof-compute profile' first." - ) - - # Handle multi-node and spatial multiplexing cases - if args.nodes is not None or args.spatial_multiplexing: - # Multi-node or spatial case: CSV files are in subdirectories - for subdir in workload_dir.iterdir(): - if subdir.is_dir(): - process_and_join_directory(subdir) + pmc_perf = workload_dir / "pmc_perf.csv" + results_files = list(workload_dir.glob("results_*.csv")) + + if pmc_perf.exists(): + console_debug(f"Using existing {pmc_perf}") + elif results_files: + console_log(f"Joining results_*.csv for {workload_dir}...") + self.join_prof(workload_dir, out=str(pmc_perf)) + console_log(f"Created {pmc_perf}") else: - # Regular single-node case: CSV files are in workload_dir directly - process_and_join_directory(workload_dir) + console_error( + f"No profiling data found in {workload_dir}.\n" + f"Expected: pmc_perf.csv or results_*.csv\n" + f"Please run 'rocprof-compute profile' first." + ) # ---------------------------------------------------- # Required methods to be implemented by child classes @@ -750,7 +654,6 @@ def pre_processing(self) -> None: (args.gpu_kernel, "filter_kernel_ids"), (args.gpu_id, "filter_gpu_ids"), (args.gpu_dispatch_id, "filter_dispatch_ids"), - (args.nodes, "filter_nodes"), ] for filter_list, attr_name in filter_configs: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_cli.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_cli.py index 5e8992ce6bc..5ff38db4e75 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_cli.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_cli.py @@ -80,18 +80,11 @@ def pre_processing(self) -> None: # create 'mega dataframe' workload.raw_pmc = file_io.create_df_pmc( path_info[0], - args.nodes, - args.spatial_multiplexing, args.kernel_verbose, args.verbose, self._profiling_config, ) - if args.spatial_multiplexing: - workload.raw_pmc = self.spatial_multiplex_merge_counters( - workload.raw_pmc - ) - if self._profiling_config.get("iteration_multiplexing") is not None: workload.raw_pmc = self.iteration_multiplex_impute_counters( workload.raw_pmc, @@ -104,7 +97,6 @@ def pre_processing(self) -> None: raw_data_dir=path_info[0], filter_gpu_ids=workload.filter_gpu_ids, filter_dispatch_ids=workload.filter_dispatch_ids, - filter_nodes=workload.filter_nodes, time_unit=args.time_unit, kernel_verbose=args.kernel_verbose, ) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py index bf926729307..fad27f7baa2 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py @@ -317,7 +317,6 @@ def run_analysis_metrics( def calc_pmc_df_data(self) -> dict[str, pd.DataFrame]: pmc_df_per_workload: dict[str, pd.DataFrame] = {} - args = self.get_args() for workload_path in self._runs.keys(): if not (Path(workload_path) / "pmc_perf.csv").exists(): @@ -327,9 +326,6 @@ def calc_pmc_df_data(self) -> dict[str, pd.DataFrame]: pd.read_csv(Path(workload_path) / "pmc_perf.csv") ) - if args.spatial_multiplexing: - pmc_df = self.spatial_multiplex_merge_counters(pmc_df) - if self._profiling_config.get("iteration_multiplexing") is not None: pmc_df = self.iteration_multiplex_impute_counters( pmc_df, diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 8bfbabb633d..32e316f41db 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -129,24 +129,16 @@ def generate_from_filter( self.dest_dir, args, pc_sampling_data, - filter_nodes=self._runs[self.dest_dir].filter_nodes, ) else: # Generate original raw df run_workload.raw_pmc = file_io.create_df_pmc( self.dest_dir, - args.nodes, - args.spatial_multiplexing, args.kernel_verbose, args.verbose, self._profiling_config, ) - if args.spatial_multiplexing: - run_workload.raw_pmc = self.spatial_multiplex_merge_counters( - run_workload.raw_pmc - ) - if self._profiling_config.get("iteration_multiplexing") is not None: run_workload.raw_pmc = self.iteration_multiplex_impute_counters( run_workload.raw_pmc, @@ -177,7 +169,6 @@ def generate_from_filter( raw_data_dir=str(self.dest_dir), filter_gpu_ids=run_workload.filter_gpu_ids, filter_dispatch_ids=run_workload.filter_dispatch_ids, - filter_nodes=self._runs[self.dest_dir].filter_nodes, time_unit=args.time_unit, kernel_verbose=args.kernel_verbose, ) @@ -418,16 +409,11 @@ def pre_processing(self) -> None: workload.raw_pmc = file_io.create_df_pmc( self.dest_dir, - args.nodes, - args.spatial_multiplexing, args.kernel_verbose, args.verbose, self._profiling_config, ) - if args.spatial_multiplexing: - workload.raw_pmc = self.spatial_multiplex_merge_counters(workload.raw_pmc) - if self._profiling_config.get("iteration_multiplexing") is not None: workload.raw_pmc = self.iteration_multiplex_impute_counters( workload.raw_pmc, @@ -440,7 +426,6 @@ def pre_processing(self) -> None: raw_data_dir=self.dest_dir, filter_gpu_ids=workload.filter_gpu_ids, filter_dispatch_ids=workload.filter_dispatch_ids, - filter_nodes=workload.filter_nodes, time_unit=args.time_unit, kernel_verbose=args.kernel_verbose, ) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index 7ac6ef53dca..e03b38e2959 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -364,13 +364,6 @@ def handle_analyze_args(self) -> None: "supported in --tui mode. Please remove --tui or run " "without the torch-operator flags.", ) - if args.spatial_multiplexing: - console_error( - "ml api trace", - "--torch-operator and --list-torch-operators do not yet " - "support multi-node analysis via --spatial-multiplexing. " - "Please remove one of these options.", - ) if args.output_format != "stdout": console_error( "ml api trace", @@ -390,13 +383,6 @@ def handle_analyze_args(self) -> None: "full kernel stats table will be shown regardless " "of the operator filter.", ) - if args.list_nodes: - console_warning( - "ml api trace", - "--torch-operator is ignored by --list-nodes; the " - "node enumeration does not respect the operator " - "filter.", - ) if list_torch_operators: console_warning( "ml api trace", @@ -407,13 +393,6 @@ def handle_analyze_args(self) -> None: "--torch-operator to list all operators.", ) - # Block all filters during spatial-multiplexing - if self.__args.spatial_multiplexing: - self.__args.gpu_id = None - self.__args.gpu_kernel = None - self.__args.gpu_dispatch_id = None - self.__args.nodes = None - @demarcate def handle_list_args(self) -> None: if self.__args.specs: @@ -754,12 +733,10 @@ def _run_bench_only(self) -> None: @demarcate def run_analysis(self) -> None: - # Lazy import pandas and file_io since they are only used in analysis - # mode. This keeps analysis deps out of the profile path. + # Lazy import pandas since it is only used in analysis mode. + # This keeps analysis deps out of the profile path. import pandas as pd - from utils import file_io - self.print_graphic() console_log(f"Analysis mode = {self.__analyze_mode}") @@ -792,16 +769,7 @@ def run_analysis(self) -> None: for path_list in analyzer.get_args().path: base_path = path_list[0] if isinstance(path_list, list) else path_list - # Determine sysinfo path - if ( - analyzer.get_args().nodes is None - and not analyzer.get_args().spatial_multiplexing - ): - sysinfo_path = base_path - else: - sysinfo_path = file_io.find_1st_sub_dir(base_path) - - sys_info = pd.read_csv(f"{sysinfo_path}/sysinfo.csv") + sys_info = pd.read_csv(f"{base_path}/sysinfo.csv") sys_info_dict = { key: value[0] for key, value in sys_info.to_dict("list").items() } diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 714d237056d..bb28e9755f8 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -206,13 +206,6 @@ def sanitize(self) -> None: "these options." ) - if args.spatial_multiplexing is not None: - console_error( - "ML API tracing does not yet support multi-node profiling " - "via --spatial-multiplexing. Please remove one of these " - "options." - ) - # Each --dispatch token must be a positive integer or a range # ('start:end' or 'start-end') with start <= end (1-based indexing). if args.dispatch: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index 25b97cb2d1a..77ef51caf85 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -5,7 +5,6 @@ import argparse import functools -import math import os import shutil import sys @@ -766,112 +765,52 @@ def perfmon_coalesce(self, counters: set[str]) -> None: console_debug(f"Collecting following counters: {', '.join(counters)} ") - output_files, file_count, accu_file_count = ( - self._allocate_perfmon_counter_files(counters) - ) + output_files, file_count, _ = self._allocate_perfmon_counter_files(counters) console_debug("profiling", f"perfmon_coalesce file_count {file_count}") - # TODO: rewrite the above logic for spatial_multiplexing later - if self.get_args().spatial_multiplexing: - # TODO: more error checking - if len(self.get_args().spatial_multiplexing) != 3: - console_error( - "profiling", - "multiplexing need provide node_idx node_count and gpu_count", - ) - - node_idx, node_count, gpu_count = map( - int, self.get_args().spatial_multiplexing - ) - - old_group_num = file_count + accu_file_count - new_bucket_count = node_count * gpu_count - groups_per_bucket = math.ceil( - old_group_num / new_bucket_count - ) # It equals to file num per node - max_groups_per_node = groups_per_bucket * gpu_count - - group_start = node_idx * max_groups_per_node - group_end = min((node_idx + 1) * max_groups_per_node, old_group_num) - - console_debug( - "profiling", - f"spatial_multiplexing node_idx {node_idx}, node_count {node_count}, " - f"gpu_count: {gpu_count},\n" - f"old_group_num {old_group_num}, new_bucket_count {new_bucket_count}, " - f"groups_per_bucket {groups_per_bucket},\n" - f"max_groups_per_node {max_groups_per_node}, " - f"group_start {group_start}, group_end {group_end}", - ) - - for f_idx in range(groups_per_bucket): - file_name = ( - Path(workload_perfmon_dir) - / f"pmc_perf_node_{node_idx}_{f_idx}.yaml" - ) - - pmc = [] - for g_idx in range( - group_start + f_idx * gpu_count, - min(group_end, group_start + (f_idx + 1) * gpu_count), - ): - gpu_idx = g_idx % gpu_count - for block_name in output_files[g_idx].blocks.keys(): - for ctr in output_files[g_idx].blocks[block_name].elements: - pmc.append(f"{ctr}:device={gpu_idx}") - - # Write counters to file - with open(file_name, "w", encoding="utf-8") as fd: - fd.write(yaml.dump({"jobs": [{"pmc": pmc}]}, sort_keys=False)) - else: - # Output to files - for f in output_files: - pmc_filename = workload_perfmon_dir / f"pmc_perf_{f.name}.yaml" - counter_def_filename = ( - workload_perfmon_dir / f"counter_def_{f.name}.yaml" - ) - - pmc = [] - counter_def: dict[str, Any] = {} - - for ctr in [ - ctr - for block_name in f.blocks - for ctr in f.blocks[block_name].elements - ]: - pmc.append(ctr) - # Add TCC channel counters definitions - if is_tcc_channel_counter(ctr): - counter_name = ctr.split("[")[0] - idx = int(ctr.split("[")[1].split("]")[0]) - xcd_idx = idx // int(self._mspec.l2_banks) - channel_idx = idx % int(self._mspec.l2_banks) - expression = ( - f"select({counter_name}," - f"[DIMENSION_XCC=[{xcd_idx}], " - f"DIMENSION_INSTANCE=[{channel_idx}]])" - ) - description = ( - f"{counter_name} on {xcd_idx}th XCC and " - f"{channel_idx}th channel" - ) - counter_def = add_counter_extra_config_input_yaml( - counter_def, - ctr, - description, - expression, - [self.__arch], - ) + # Output to files + for f in output_files: + pmc_filename = workload_perfmon_dir / f"pmc_perf_{f.name}.yaml" + counter_def_filename = workload_perfmon_dir / f"counter_def_{f.name}.yaml" + + pmc = [] + counter_def: dict[str, Any] = {} + + for ctr in [ + ctr for block_name in f.blocks for ctr in f.blocks[block_name].elements + ]: + pmc.append(ctr) + # Add TCC channel counters definitions + if is_tcc_channel_counter(ctr): + counter_name = ctr.split("[")[0] + idx = int(ctr.split("[")[1].split("]")[0]) + xcd_idx = idx // int(self._mspec.l2_banks) + channel_idx = idx % int(self._mspec.l2_banks) + expression = ( + f"select({counter_name}," + f"[DIMENSION_XCC=[{xcd_idx}], " + f"DIMENSION_INSTANCE=[{channel_idx}]])" + ) + description = ( + f"{counter_name} on {xcd_idx}th XCC and {channel_idx}th channel" + ) + counter_def = add_counter_extra_config_input_yaml( + counter_def, + ctr, + description, + expression, + [self.__arch], + ) - # Write counters to file - with open(pmc_filename, "w", encoding="utf-8") as fd: - fd.write(yaml.dump({"jobs": [{"pmc": pmc}]}, sort_keys=False)) + # Write counters to file + with open(pmc_filename, "w", encoding="utf-8") as fd: + fd.write(yaml.dump({"jobs": [{"pmc": pmc}]}, sort_keys=False)) - # Write counter definitions to file - if counter_def: - with open(counter_def_filename, "w", encoding="utf-8") as fp: - fp.write(yaml.dump(counter_def, sort_keys=False)) + # Write counter definitions to file + if counter_def: + with open(counter_def_filename, "w", encoding="utf-8") as fp: + fp.write(yaml.dump(counter_def, sort_keys=False)) # ---------------------------------------------------- # Required methods to be implemented by child classes diff --git a/projects/rocprofiler-compute/src/rocprof_compute_tui/analysis_tui.py b/projects/rocprofiler-compute/src/rocprof_compute_tui/analysis_tui.py index f5b7b7e1510..23b2058bebb 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_tui/analysis_tui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_tui/analysis_tui.py @@ -62,22 +62,16 @@ def pre_processing(self) -> None: workload.raw_pmc = file_io.create_df_pmc( self.path, - self.args.nodes, - self.args.spatial_multiplexing, self.args.kernel_verbose, self.args.verbose, self._profiling_config, ) - if self.args.spatial_multiplexing: - workload.raw_pmc = self.spatial_multiplex_merge_counters(workload.raw_pmc) - kernel_top_df, dispatch_info_df = file_io.create_df_kernel_top_stats( df_in=workload.raw_pmc, raw_data_dir=self.path, filter_gpu_ids=workload.filter_gpu_ids, filter_dispatch_ids=workload.filter_dispatch_ids, - filter_nodes=workload.filter_nodes, time_unit=self.args.time_unit, kernel_verbose=self.args.kernel_verbose, ) diff --git a/projects/rocprofiler-compute/src/utils/file_io.py b/projects/rocprofiler-compute/src/utils/file_io.py index 9eece6062e2..94bc497a30d 100644 --- a/projects/rocprofiler-compute/src/utils/file_io.py +++ b/projects/rocprofiler-compute/src/utils/file_io.py @@ -15,7 +15,6 @@ from utils.kernel_name_shortener import kernel_name_shortener from utils.logger import ( console_debug, - console_error, console_log, console_warning, demarcate, @@ -71,7 +70,6 @@ def create_df_kernel_top_stats( raw_data_dir: str, filter_gpu_ids: Optional[list[str]], filter_dispatch_ids: Optional[list[str]], - filter_nodes: Optional[str], time_unit: str, kernel_verbose: int, sortby: str = "sum", @@ -88,11 +86,6 @@ def create_df_kernel_top_stats( # The logic below for filters are the same as in parser.apply_filters(), # which can be merged together if need it. - if filter_nodes: - df = df.loc[ - df["Node"].astype(str).isin(normalize_filter_to_str_list(filter_nodes)) - ] - if filter_gpu_ids: df = df.loc[ df["GPU_ID"].astype(str).isin(normalize_filter_to_str_list(filter_gpu_ids)) @@ -116,8 +109,6 @@ def create_df_kernel_top_stats( dispatch_columns = ["Kernel_Name", "GPU_ID"] if "Dispatch_ID" in df.columns: dispatch_columns.insert(0, "Dispatch_ID") - if "Node" in df.columns: - dispatch_columns.insert(0, "Node") dispatch_info = df[dispatch_columns] dispatch_output_path = Path(raw_data_dir) / "pmc_dispatch_info.csv" @@ -295,9 +286,7 @@ def process_pc_sampling_kernel_trace( @demarcate def create_df_pmc( - raw_data_root_dir: str, - nodes: Optional[list[str]], - spatial_multiplexing: bool, + raw_data_dir: str, kernel_verbose: int, verbose: int, config_dict: dict[str, Any], @@ -305,73 +294,23 @@ def create_df_pmc( """ Load all raw pmc counters and join into one df. """ + pmc_perf_path = Path(raw_data_dir) / f"{schema.PMC_PERF_FILE_PREFIX}.csv" + if not pmc_perf_path.is_file(): + return pd.DataFrame() - def create_single_df_pmc( - raw_data_dir: str, node_name: Optional[str], kernel_verbose: int, verbose: int - ) -> pd.DataFrame: - pmc_perf_path = Path(raw_data_dir) / f"{schema.PMC_PERF_FILE_PREFIX}.csv" - if not pmc_perf_path.is_file(): - return pd.DataFrame() - - df = pd.read_csv(pmc_perf_path) - - if config_dict.get("format_rocprof_output") == "rocpd": - df = utils_analysis.process_rocpd_csv(df) - - # Demangle original KernelNames - # Skip for Standalone Roofline with -1 to keep full kernel names - if kernel_verbose >= 0: - kernel_name_shortener(df, kernel_verbose) - - if node_name is not None: - df.insert(0, "Node", node_name) - - if verbose >= 2: - console_debug(f"pmc_raw_data final_single_df {df.info}") - return df - - root_path = Path(raw_data_root_dir) - - # 1. spatial multiplexing case - if spatial_multiplexing: - dfs: list[pd.DataFrame] = [] - - for subdir in root_path.iterdir(): - if subdir.is_dir(): - new_df = create_single_df_pmc( - str(subdir), str(subdir.name), kernel_verbose, verbose - ) - if not new_df.empty: - dfs.append(new_df) - return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() - - # 2. regular single node case (nodes=None) - if nodes is None: - return create_single_df_pmc(raw_data_root_dir, None, kernel_verbose, verbose) - - # 3. all nodes case (nodes=[]) - if not nodes: - dfs: list[pd.DataFrame] = [] - - for subdir in root_path.iterdir(): - if subdir.is_dir(): - new_df = create_single_df_pmc( - str(subdir), str(subdir.name), kernel_verbose, verbose - ) - if not new_df.empty: - dfs.append(new_df) - return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() - - # 4. specified node list case (nodes=[...]) - dfs: list[pd.DataFrame] = [] - - for node in nodes: - node_path = root_path / node - if node_path.exists(): - new_df = create_single_df_pmc(str(node_path), node, kernel_verbose, verbose) - if not new_df.empty: - dfs.append(new_df) - return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() + df = pd.read_csv(pmc_perf_path) + + if config_dict.get("format_rocprof_output") == "rocpd": + df = utils_analysis.process_rocpd_csv(df) + + # Demangle original KernelNames + # Skip for Standalone Roofline with -1 to keep full kernel names + if kernel_verbose >= 0: + kernel_name_shortener(df, kernel_verbose) + + if verbose >= 2: + console_debug(f"pmc_raw_data final_single_df {df.info}") + return df def collect_wave_occu_per_cu(in_dir: str, out_dir: str, num_se: int) -> None: @@ -435,30 +374,3 @@ def is_single_panel_config( console_warning( "Found multiple panel config sets but incomplete for all archs." ) - - -def find_1st_sub_dir(directory: str) -> Optional[str]: - """ - Find the first sub dir in a directory - """ - dir_path = Path(directory) - try: - # Iterate over entries in the directory - for entry in dir_path.iterdir(): - if entry.is_dir(): # Check if it's a directory - return str(entry) - return None - except FileNotFoundError: - console_error(f'The directory "{directory}" does not exist.', exit=False) - - -def get_valid_nodes(directory: str) -> list[str]: - """Return subdirectory names that contain sysinfo.csv""" - dir_path = Path(directory) - if not dir_path.is_dir(): - return [] - return [ - entry.name - for entry in dir_path.iterdir() - if entry.is_dir() and (entry / "sysinfo.csv").exists() - ] diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index fbf30e06ccb..3aba1d7ef9e 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -282,16 +282,6 @@ def apply_filters( # TODO: error out properly if filters out of bound filtered_df = workload.raw_pmc - # Apply node filter - if workload.filter_nodes: - filtered_df = filtered_df.loc[ - filtered_df["Node"] - .astype(str) - .isin(normalize_filter_to_str_list(workload.filter_nodes)) - ] - if filtered_df.empty: - console_error("analysis", f"{workload.filter_nodes} is invalid") - # Apply GPU ID filter if workload.filter_gpu_ids: filtered_df = filtered_df.loc[ diff --git a/projects/rocprofiler-compute/src/utils/schema.py b/projects/rocprofiler-compute/src/utils/schema.py index 7d6a23baa48..fe4d77f90fa 100644 --- a/projects/rocprofiler-compute/src/utils/schema.py +++ b/projects/rocprofiler-compute/src/utils/schema.py @@ -43,7 +43,6 @@ class Workload: filter_kernel_ids: list[int] = field(default_factory=list) filter_gpu_ids: list[int] = field(default_factory=list) filter_dispatch_ids: list[int] = field(default_factory=list) - filter_nodes: list[str] = field(default_factory=list) avail_ips: list[int] = field(default_factory=list) roofline_peaks: pd.DataFrame = field(default_factory=pd.DataFrame) roofline_metrics: dict[int, dict[str, Any]] = field(default_factory=dict) diff --git a/projects/rocprofiler-compute/src/utils/utils_analysis.py b/projects/rocprofiler-compute/src/utils/utils_analysis.py index d4e293dd8a6..b4489b68019 100644 --- a/projects/rocprofiler-compute/src/utils/utils_analysis.py +++ b/projects/rocprofiler-compute/src/utils/utils_analysis.py @@ -736,95 +736,6 @@ def _warn_kernels_with_incomplete_coverage(incomplete_kernel_names: set[str]) -> ) -def merge_counters_spatial_multiplex(df: pd.DataFrame) -> pd.DataFrame: - """ - For spatial multiplexing, this merges counter values for the same kernel that - runs on different devices. For time stamp, start time stamp will use median - while for end time stamp, it will be equal to the summation between median - start stamp and median delta time. - """ - non_counter_column_index = [ - "Dispatch_ID", - "GPU_ID", - "Queue_ID", - "PID", - "TID", - "Grid_Size", - "Workgroup_Size", - "LDS_Per_Workgroup", - "Scratch_Per_Workitem", - "Arch_VGPR", - "Accum_VGPR", - "SGPR", - "Wave_Size", - "Kernel_Name", - "Start_Timestamp", - "End_Timestamp", - "Correlation_ID", - "Kernel_ID", - "Node", - ] - - expired_column_index = [ - "Node", - "PID", - "TID", - "Queue_ID", - ] - - kernel_name_column_name = "Kernel_Name" - if "Kernel_Name" not in df and "Name" in df: - kernel_name_column_name = "Name" - - # Find the values in Kernel_Name that occur more than once - kernel_single_occurances = df[kernel_name_column_name].value_counts().index - - # Define a list to store the merged rows - result_data: list[dict[str, Any]] = [] - - for kernel_name in kernel_single_occurances: - # Get all rows for the current kernel_name - group = df[df[kernel_name_column_name] == kernel_name] - - # Create a dictionary to store the merged row for the current group - merged_row: dict[str, Any] = {} - - # Process non-counter columns - for col in [ - col for col in non_counter_column_index if col not in expired_column_index - ]: - if col == "Start_Timestamp": - # For Start_Timestamp, take the median - merged_row[col] = group["Start_Timestamp"].median() - elif col == "End_Timestamp": - # For End_Timestamp, calculate the median delta time - delta_time = group[col] - group["Start_Timestamp"] - merged_row[col] = group["Start_Timestamp"] + delta_time.median() - else: - # For other non-counter columns, take the first occurrence (0th row) - merged_row[col] = group.iloc[0][col] - - # Process counter columns (assumed to be all columns not in - # non_counter_column_index) - counter_columns = [ - col for col in group.columns if col not in non_counter_column_index - ] - for counter_col in counter_columns: - # for counter columns, take the first non-none (or non-nan) value - current_valid_counter_group = group[group[counter_col].notna()] - first_valid_value = ( - current_valid_counter_group.iloc[0][counter_col] - if len(current_valid_counter_group) > 0 - else None - ) - merged_row[counter_col] = first_valid_value - - # Append the merged row to the result list - result_data.append(merged_row) - - return pd.DataFrame(result_data) - - def process_rocpd_csv(df: pd.DataFrame) -> pd.DataFrame: """ Merge counters across unique dispatches from the diff --git a/projects/rocprofiler-compute/tests/test_analyze_commands.py b/projects/rocprofiler-compute/tests/test_analyze_commands.py index 086368bed7a..223e54d4fe8 100644 --- a/projects/rocprofiler-compute/tests/test_analyze_commands.py +++ b/projects/rocprofiler-compute/tests/test_analyze_commands.py @@ -972,10 +972,8 @@ def __init__(self): "vecMul", ], "Dispatch_ID": [0, 1, 2, 3], - "Node": ["node0", "node0", "node1", "node1"], }) - filter_nodes = None filter_gpu_ids = None filter_kernel_ids = None filter_dispatch_ids = None @@ -996,12 +994,6 @@ def __init__(self): result = apply_filters(workload, "/tmp", False, False) assert len(result) == 2 - # Test node filter with list of strings - workload = MockWorkload() - workload.filter_nodes = ["node0", "node1"] - result = apply_filters(workload, "/tmp", False, False) - assert len(result) == 4 - # Test GPU filter with list of integers workload = MockWorkload() workload.filter_gpu_ids = [0, 1] @@ -1537,7 +1529,6 @@ def test_create_df_kernel_top_stats_returns_valid_dataframes( raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=None, - filter_nodes=None, time_unit="ns", kernel_verbose=0, sortby="sum", @@ -1587,7 +1578,6 @@ def test_create_df_kernel_top_stats_grouping_and_aggregation( raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=None, - filter_nodes=None, time_unit="ns", kernel_verbose=0, sortby="sum", @@ -1608,7 +1598,6 @@ def test_create_df_kernel_top_stats_grouping_and_aggregation( raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=None, - filter_nodes=None, time_unit="ns", kernel_verbose=0, sortby="kernel", @@ -1622,16 +1611,14 @@ def test_create_df_kernel_top_stats_grouping_and_aggregation( @pytest.mark.misc def test_create_df_kernel_top_stats_filters(): """Test GPU ID, dispatch ID (including '> n' syntax), - node filters, and empty input handling.""" + and empty input handling.""" import tempfile from utils.file_io import create_df_kernel_top_stats - # Create test data with Node column for node filtering raw_pmc_with_node = pd.DataFrame({ "Kernel_Name": ["kernel_a", "kernel_b", "kernel_a", "kernel_c"], "GPU_ID": [0, 0, 1, 0], - "Node": ["node0", "node0", "node1", "node0"], "Dispatch_ID": [1, 2, 3, 4], "Start_Timestamp": [1000, 2000, 3000, 4000], "End_Timestamp": [1500, 2800, 3400, 4200], @@ -1644,7 +1631,6 @@ def test_create_df_kernel_top_stats_filters(): raw_data_dir=temp_dir, filter_gpu_ids="0", filter_dispatch_ids=None, - filter_nodes=None, time_unit="ns", kernel_verbose=0, ) @@ -1657,7 +1643,6 @@ def test_create_df_kernel_top_stats_filters(): raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=["> 2"], - filter_nodes=None, time_unit="ns", kernel_verbose=0, ) @@ -1671,25 +1656,11 @@ def test_create_df_kernel_top_stats_filters(): raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=["1", "2"], - filter_nodes=None, time_unit="ns", kernel_verbose=0, ) assert len(dispatch_df) == 2 - # Test node filter - kernel_top_df, dispatch_df = create_df_kernel_top_stats( - df_in=raw_pmc_with_node, - raw_data_dir=temp_dir, - filter_gpu_ids=None, - filter_dispatch_ids=None, - filter_nodes="node1", - time_unit="ns", - kernel_verbose=0, - ) - assert len(dispatch_df) == 1 - assert dispatch_df.iloc[0]["Kernel_Name"] == "kernel_a" - # Test empty input handling empty_raw_pmc = pd.DataFrame({ "Kernel_Name": [], @@ -1703,7 +1674,6 @@ def test_create_df_kernel_top_stats_filters(): raw_data_dir=temp_dir, filter_gpu_ids=None, filter_dispatch_ids=None, - filter_nodes=None, time_unit="ns", kernel_verbose=0, ) @@ -1871,8 +1841,6 @@ def test_join_prof_renames_sq_accum_prev_hires_to_bucket_target(tmp_path): inst = cli_analysis.__new__(cli_analysis) args = Namespace( path=[[str(tmp_path)]], - nodes=None, - spatial_multiplexing=False, join_type="kernel", kokkos_trace=False, ) diff --git a/projects/rocprofiler-compute/tests/test_profiler_base.py b/projects/rocprofiler-compute/tests/test_profiler_base.py index bdcf1865a64..7ec3fba65c0 100644 --- a/projects/rocprofiler-compute/tests/test_profiler_base.py +++ b/projects/rocprofiler-compute/tests/test_profiler_base.py @@ -36,7 +36,6 @@ def _make_sanitize_args(remaining, torch_trace=False, **overrides): iteration_multiplexing=None, attach_pid=None, attach_duration_msec=None, - spatial_multiplexing=None, remaining=["--"] + remaining, torch_trace=torch_trace, dispatch=None, diff --git a/projects/rocprofiler-compute/tests/test_utils.py b/projects/rocprofiler-compute/tests/test_utils.py index c6a4cad051b..1885e518eb6 100644 --- a/projects/rocprofiler-compute/tests/test_utils.py +++ b/projects/rocprofiler-compute/tests/test_utils.py @@ -3462,212 +3462,6 @@ def test_is_workload_empty_pandas_import_dependency(): mock_df.dropna.assert_called_once() -# ============================================================================= -# TESTS FOR merge_counters_spatial_multiplex FUNCTION -# ============================================================================= - - -def test_merge_counters_spatial_multiplex_basic_functionality(): - """ - Test merge_counters_spatial_multiplex with basic multi-index DataFrame. - - Returns: - None: Asserts function correctly merges counter values for spatial multiplexing. - """ - import pandas as pd - - data = { - "Dispatch_ID": [1, 2, 3, 4, 5, 6], - "GPU_ID": [0, 0, 1, 1, 2, 2], - "Grid_Size": [64, 128, 256, 512, 1024, 2048], - "Workgroup_Size": [16, 32, 64, 32, 64, 128], - "LDS_Per_Workgroup": [1024, 2048, 4096, 2048, 4096, 8192], - "Scratch_Per_Workitem": [0, 0, 0, 0, 0, 0], - "Arch_VGPR": [32, 64, 96, 64, 96, 128], - "Accum_VGPR": [0, 0, 0, 0, 0, 0], - "SGPR": [16, 32, 48, 32, 48, 64], - "Wave_Size": [64, 64, 64, 64, 64, 64], - "Correlation_ID": [1001, 1002, 1003, 2001, 2002, 2003], - "Kernel_ID": [501, 502, 503, 601, 602, 603], - "Kernel_Name": [ - "kernel_a", - "kernel_a", - "kernel_b", - "kernel_c", - "kernel_c", - "kernel_d", - ], - "Start_Timestamp": [1000, 1100, 2000, 3000, 3100, 4000], - "End_Timestamp": [1200, 1300, 2500, 3400, 3500, 4800], - "Counter1": [100, 200, 300, 400, 500, 600], - } - df = pd.DataFrame(data) - - result = utils_analysis.merge_counters_spatial_multiplex(df) - - assert isinstance(result, pd.DataFrame) - - -def test_merge_counters_spatial_multiplex_kernel_name_fallback(): - """ - Test merge_counters_spatial_multiplex when Kernel_Name is missing but Name exists. - - Returns: - None: Asserts function uses Name column when Kernel_Name is not available. - """ - import pandas as pd - - data = { - "Dispatch_ID": [1, 2], - "GPU_ID": [0, 0], - "Grid_Size": [64, 128], - "Workgroup_Size": [16, 32], - "LDS_Per_Workgroup": [1024, 2048], - "Scratch_Per_Workitem": [0, 0], - "Arch_VGPR": [32, 64], - "Accum_VGPR": [0, 0], - "SGPR": [16, 32], - "Wave_Size": [64, 64], - "Correlation_ID": [1001, 1002], - "Kernel_ID": [501, 502], - "Name": ["kernel_a", "kernel_a"], - "Start_Timestamp": [1000, 1100], - "End_Timestamp": [1200, 1300], - "Counter1": [100, 200], - } - df = pd.DataFrame(data) - - # The function currently has a bug where it doesn't properly check for 'Kernel_Name' - # existence before accessing it, even though it has fallback logic for 'Name' - try: - result = utils_analysis.merge_counters_spatial_multiplex(df) - - assert isinstance(result, pd.DataFrame) - assert len(result) > 0 - - except KeyError as e: - if "'Kernel_Name'" in str(e): - pytest.skip( - "Function doesn't properly check for Kernel_Name " - "existence before accessing - needs to validate column " - "presence in the check condition" - ) - else: - raise - - -def test_merge_counters_spatial_multiplex_single_kernel_occurrence(): - """ - Test merge_counters_spatial_multiplex with kernels that appear only once. - - Returns: - None: Asserts function handles single kernel occurrences correctly. - """ - import pandas as pd - - data = { - "Dispatch_ID": [1, 2, 3], - "GPU_ID": [0, 1, 2], - "Grid_Size": [64, 128, 256], - "Workgroup_Size": [16, 32, 64], - "LDS_Per_Workgroup": [1024, 2048, 4096], - "Scratch_Per_Workitem": [0, 0, 0], - "Arch_VGPR": [32, 64, 96], - "Accum_VGPR": [0, 0, 0], - "SGPR": [16, 32, 48], - "Wave_Size": [64, 64, 64], - "Correlation_ID": [1001, 1002, 1003], - "Kernel_ID": [501, 502, 503], - "Kernel_Name": ["kernel_a", "kernel_b", "kernel_c"], - "Start_Timestamp": [1000, 2000, 3000], - "End_Timestamp": [1200, 2500, 3800], - "Counter1": [100, 200, 300], - } - df = pd.DataFrame(data) - - result = utils_analysis.merge_counters_spatial_multiplex(df) - - assert isinstance(result, pd.DataFrame) - assert len(result) == 3 - - -def test_merge_counters_spatial_multiplex_multiple_duplicate_kernels(): - """ - Test merge_counters_spatial_multiplex with multiple kernels having duplicates. - - Returns: - None: Asserts function correctly handles multiple kernel duplicates. - """ - import pandas as pd - - data = { - "Dispatch_ID": [1, 2, 3, 4, 5, 6], - "GPU_ID": [0, 0, 1, 1, 2, 2], - "Grid_Size": [64, 64, 128, 128, 256, 256], - "Workgroup_Size": [16, 16, 32, 32, 64, 64], - "LDS_Per_Workgroup": [1024, 1024, 2048, 2048, 4096, 4096], - "Scratch_Per_Workitem": [0, 0, 0, 0, 0, 0], - "Arch_VGPR": [32, 32, 64, 64, 96, 96], - "Accum_VGPR": [0, 0, 0, 0, 0, 0], - "SGPR": [16, 16, 32, 32, 48, 48], - "Wave_Size": [64, 64, 64, 64, 64, 64], - "Correlation_ID": [1001, 1002, 1003, 1004, 1005, 1006], - "Kernel_ID": [501, 502, 503, 504, 505, 506], - "Kernel_Name": [ - "kernel_a", - "kernel_a", - "kernel_b", - "kernel_b", - "kernel_c", - "kernel_c", - ], - "Start_Timestamp": [1000, 1100, 2000, 2100, 3000, 3100], - "End_Timestamp": [1200, 1300, 2500, 2600, 3800, 3900], - "Counter1": [100, 200, 300, 400, 500, 600], - } - df = pd.DataFrame(data) - - result = utils_analysis.merge_counters_spatial_multiplex(df) - - assert isinstance(result, pd.DataFrame) - assert len(result) == 3 - - -def test_merge_counters_spatial_multiplex_timestamp_median_calculation(): - """ - Test merge_counters_spatial_multiplex timestamp median calculations. - - Returns: - None: Asserts function correctly calculates median timestamps. - """ - import pandas as pd - - data = { - "Dispatch_ID": [1, 2, 3], - "GPU_ID": [0, 0, 0], - "Grid_Size": [64, 64, 64], - "Workgroup_Size": [16, 16, 16], - "LDS_Per_Workgroup": [1024, 1024, 1024], - "Scratch_Per_Workitem": [0, 0, 0], - "Arch_VGPR": [32, 32, 32], - "Accum_VGPR": [0, 0, 0], - "SGPR": [16, 16, 16], - "Wave_Size": [64, 64, 64], - "Correlation_ID": [1001, 1002, 1003], - "Kernel_ID": [501, 502, 503], - "Kernel_Name": ["kernel_a", "kernel_a", "kernel_a"], - "Start_Timestamp": [1000, 1200, 1400], - "End_Timestamp": [1500, 1700, 1900], - "Counter1": [100, 200, 300], - } - df = pd.DataFrame(data) - - result = utils_analysis.merge_counters_spatial_multiplex(df) - - assert isinstance(result, pd.DataFrame) - assert len(result) == 1 - - # ============================================================================= # Tests for convert_metric_id_to_panel_info function # ============================================================================ diff --git a/projects/rocprofiler-compute/tools/counter_grouping_inspector.py b/projects/rocprofiler-compute/tools/counter_grouping_inspector.py index f8583fa0b15..a74f43d298d 100755 --- a/projects/rocprofiler-compute/tools/counter_grouping_inspector.py +++ b/projects/rocprofiler-compute/tools/counter_grouping_inspector.py @@ -178,7 +178,6 @@ def run_soc_detect_and_coalesce( membw_analysis=False, set_selected=None, roof_only=False, - spatial_multiplexing=None, no_roof=True, device=0, ) diff --git a/projects/rocprofiler-compute/tools/metric_validation.py b/projects/rocprofiler-compute/tools/metric_validation.py index 091d6e13452..2fb99b0aca6 100755 --- a/projects/rocprofiler-compute/tools/metric_validation.py +++ b/projects/rocprofiler-compute/tools/metric_validation.py @@ -93,8 +93,6 @@ def dump_values(self) -> None: # create 'mega dataframe' raw_pmc = file_io.create_df_pmc( path_info[0], - args.nodes, - args.spatial_multiplexing, args.kernel_verbose, args.verbose, self._profiling_config,