Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 38 additions & 72 deletions scripts/gemm_analysis/process_gpu_timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,29 @@
"""

import pandas as pd
import numpy as np
import os
import glob
import argparse
from pathlib import Path
import sys

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from utils.gpu_timeline_utils import (
read_gpu_timeline_from_excel,
aggregate_gpu_timeline,
get_method_suffix,
get_aggregation_description,
print_section,
)


# =============================================================================
# Utility Functions
# =============================================================================


def geometric_mean(values):
"""Calculate geometric mean, handling zeros."""
values = np.array(values)
# Replace zeros with small value to avoid log(0)
values = np.where(values == 0, 1e-10, values)
return np.exp(np.mean(np.log(values)))


def print_section(title, char="=", width=80):
"""Print a formatted section header."""
print(f"\n{char * width}")
print(title)
print(char * width)


def parse_perf_filename(filename):
"""
Parse performance filename to extract channel config and rank.
Expand Down Expand Up @@ -95,49 +91,32 @@ def read_rank_data(rank_files):
"""
rank_data = []
for rank, file_path in rank_files:
try:
df = pd.read_excel(file_path, sheet_name="gpu_timeline")
df["rank"] = rank
df, success = read_gpu_timeline_from_excel(Path(file_path), rank=rank)
if success:
rank_data.append(df)
except Exception as e:
print(f" Warning: Could not read {os.path.basename(file_path)}: {e}")
return rank_data


def aggregate_rank_data(
rank_data, thread_config, channel_config, num_ranks, use_geo_mean
):
def add_config_metadata(df, thread_config, channel_config, num_ranks):
"""
Aggregate data across ranks and add metadata.
Add configuration metadata columns to the dataframe.

Args:
rank_data: List of DataFrames
df: Source DataFrame
thread_config: Thread configuration string (e.g., '256thread')
channel_config: Channel configuration string (e.g., '28ch')
num_ranks: Number of ranks
use_geo_mean: Whether to use geometric mean

Returns:
DataFrame: Aggregated data with metadata
DataFrame: DataFrame with added metadata columns
"""
combined = pd.concat(rank_data, ignore_index=True)

agg_func = geometric_mean if use_geo_mean else "mean"
aggregated = (
combined.groupby("type")
.agg({"time ms": agg_func, "percent": agg_func})
.reset_index()
)

# Add metadata
aggregated["thread_config"] = thread_config
aggregated["threads_num"] = int(thread_config.replace("thread", ""))
aggregated["channel_config"] = channel_config
aggregated["channels_num"] = int(channel_config.replace("ch", ""))
aggregated["full_config"] = f"{thread_config}_{channel_config}"
aggregated["num_ranks"] = num_ranks

return aggregated
df["thread_config"] = thread_config
df["threads_num"] = int(thread_config.replace("thread", ""))
df["channel_config"] = channel_config
df["channels_num"] = int(channel_config.replace("ch", ""))
df["full_config"] = f"{thread_config}_{channel_config}"
df["num_ranks"] = num_ranks
return df


def process_channel_config(channel_config, channel_groups, use_geo_mean, thread_config):
Expand All @@ -164,9 +143,8 @@ def process_channel_config(channel_config, channel_groups, use_geo_mean, thread_
print(f" No valid data for {channel_config}")
return None

aggregated = aggregate_rank_data(
rank_data, thread_config, channel_config, num_ranks, use_geo_mean
)
aggregated = aggregate_gpu_timeline(rank_data, use_geo_mean)
aggregated = add_config_metadata(aggregated, thread_config, channel_config, num_ranks)
print(f" [OK] Aggregated across {num_ranks} ranks")

return aggregated
Expand Down Expand Up @@ -203,9 +181,7 @@ def process_thread_config(thread_config, tracelens_dir, use_geo_mean):
results = []

# Process each channel configuration (sorted by channel number)
sorted_channels = sorted(
channel_groups.keys(), key=lambda x: int(x.replace("ch", ""))
)
sorted_channels = sorted(channel_groups.keys(), key=lambda x: int(x.replace("ch", "")))
for channel_config in sorted_channels:
aggregated = process_channel_config(
channel_config, channel_groups, use_geo_mean, thread_config
Expand All @@ -232,9 +208,7 @@ def create_pivot_sheet(df, value_col):
Returns:
DataFrame: Pivot table
"""
return df.pivot_table(
values=value_col, index="type", columns="full_config", aggfunc="first"
)
return df.pivot_table(values=value_col, index="type", columns="full_config", aggfunc="first")


def create_summary_sheet(df):
Expand Down Expand Up @@ -278,15 +252,9 @@ def save_excel_output(final_df, output_path):
"""
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
final_df.to_excel(writer, sheet_name="All_Data", index=False)
create_pivot_sheet(final_df, "time ms").to_excel(
writer, sheet_name="Pivot_Time_ms"
)
create_pivot_sheet(final_df, "percent").to_excel(
writer, sheet_name="Pivot_Percent"
)
create_summary_sheet(final_df).to_excel(
writer, sheet_name="Summary_By_Config", index=False
)
create_pivot_sheet(final_df, "time ms").to_excel(writer, sheet_name="Pivot_Time_ms")
create_pivot_sheet(final_df, "percent").to_excel(writer, sheet_name="Pivot_Percent")
create_summary_sheet(final_df).to_excel(writer, sheet_name="Summary_By_Config", index=False)

print(f"[SAVED] {output_path}")
print(" Sheets created:")
Expand All @@ -310,9 +278,9 @@ def print_metric_comparison(df, metric_type, description):
metric_type: Type of metric to filter
description: Description to print
"""
metric_data = df[df["type"] == metric_type][
["full_config", "time ms", "percent"]
].sort_values("time ms")
metric_data = df[df["type"] == metric_type][["full_config", "time ms", "percent"]].sort_values(
"time ms"
)
print(f"\n{description}:")
print(metric_data.to_string(index=False))

Expand Down Expand Up @@ -356,16 +324,14 @@ def process_gpu_timeline_data(sweep_dir, use_geo_mean=False):
print(f"Error: tracelens_analysis directory not found in {sweep_dir}")
return

agg_method = "Geometric Mean" if use_geo_mean else "Arithmetic Mean"
agg_method = get_aggregation_description(use_geo_mean)
print("=" * 80)
print(f"Processing GPU Timeline data from: {sweep_dir}")
print(f"Aggregation method: {agg_method}")
print("=" * 80)

# Find all thread configurations
thread_configs = [
d.name for d in tracelens_dir.iterdir() if d.is_dir() and "thread" in d.name
]
thread_configs = [d.name for d in tracelens_dir.iterdir() if d.is_dir() and "thread" in d.name]

if not thread_configs:
print("Error: No thread configuration directories found")
Expand Down Expand Up @@ -404,7 +370,7 @@ def process_gpu_timeline_data(sweep_dir, use_geo_mean=False):
final_df = final_df.sort_values(["threads_num", "channels_num", "type"])

# Save to Excel
method_suffix = "geomean" if use_geo_mean else "mean"
method_suffix = get_method_suffix(use_geo_mean)
output_path = tracelens_dir / f"gpu_timeline_all_configs_{method_suffix}.xlsx"
save_excel_output(final_df, output_path)

Expand Down
62 changes: 62 additions & 0 deletions scripts/tracelens_single_config/combine_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
import pandas as pd
import argparse
from pathlib import Path


def combine_collective_reports(baseline_path, test_path, output_path):
"""
Combine two collective reports into a single Excel file by adding a source column to the data.
"""

print(f"Loading baseline: {baseline_path}")
baseline_xl = pd.ExcelFile(baseline_path)

print(f"Loading test: {test_path}")
test_xl = pd.ExcelFile(test_path)

print(f"\nBaseline sheets: {baseline_xl.sheet_names}")
print(f"Test sheets: {test_xl.sheet_names}")

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
for sheet_name in baseline_xl.sheet_names:
if sheet_name not in test_xl.sheet_names:
print(f" Skip {sheet_name} - not in test file")
continue

baseline_df = pd.read_excel(baseline_path, sheet_name=sheet_name)
test_df = pd.read_excel(test_path, sheet_name=sheet_name)

baseline_df["source"] = "baseline"
test_df["source"] = "saleelk"

combined = pd.concat([baseline_df, test_df], ignore_index=True)

combined.to_excel(writer, sheet_name=sheet_name, index=False)
print(
f" Combined {sheet_name}: {len(baseline_df)} + {len(test_df)} = {len(combined)} rows"
)

print(f"\nSaved: {output_path}")
return 0


def main():
parser = argparse.ArgumentParser(description="Combine two collective reports")
parser.add_argument(
"--baseline", required=True, help="Path to baseline collective_all_ranks.xlsx"
)
parser.add_argument(
"--test", required=True, help="Path to test collective_all_ranks.xlsx"
)
parser.add_argument(
"--output", required=True, help="Output path for combined Excel file"
)

args = parser.parse_args()

return combine_collective_reports(args.baseline, args.test, args.output)


if __name__ == "__main__":
exit(main())
110 changes: 110 additions & 0 deletions scripts/tracelens_single_config/process_gpu_timeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python3
"""
GPU Timeline Processing for Single Configuration.

Aggregates gpu_timeline data across all ranks in a tracelens analysis directory.

Usage:
python process_gpu_timeline.py --reports-dir /path/to/individual_reports [--geo-mean]
"""

import pandas as pd
import argparse
from pathlib import Path
import sys

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from utils.gpu_timeline_utils import (
read_gpu_timeline_from_excel,
aggregate_gpu_timeline,
get_method_suffix,
get_aggregation_description,
)


def process_gpu_timeline(reports_dir: str, use_geo_mean: bool = False) -> int:
"""
Create mean/geometric mean aggregated GPU timeline across all ranks.

Args:
reports_dir: Path to directory containing perf_rank*.xlsx files
use_geo_mean: If True, use geometric mean; otherwise arithmetic mean

Returns:
int: Exit code (0 for success, 1 for error)
"""
reports_path = Path(reports_dir)

if not reports_path.exists():
print(f"Error: Directory not found: {reports_dir}")
return 1

print(f"Processing GPU timeline from: {reports_dir}")
print(f"Aggregation: {get_aggregation_description(use_geo_mean)}")

perf_files = sorted(reports_path.glob("perf_rank*.xlsx"))

if not perf_files:
print("Error: No perf_rank*.xlsx files found")
return 1

print(f"Found {len(perf_files)} rank files")

rank_data = []
for file_path in perf_files:
rank_num = int(file_path.stem.replace("perf_rank", ""))
df, success = read_gpu_timeline_from_excel(file_path, rank=rank_num)
if success:
rank_data.append(df)
print(f" Rank {rank_num}: OK")
else:
print(f" Rank {rank_num}: Error")

if not rank_data:
print("Error: No valid data loaded")
return 1

combined = pd.concat(rank_data, ignore_index=True)
aggregated = aggregate_gpu_timeline(rank_data, use_geo_mean)
aggregated["num_ranks"] = len(perf_files)

method_suffix = get_method_suffix(use_geo_mean)
output_path = reports_path.parent / f"gpu_timeline_summary_{method_suffix}.xlsx"

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
aggregated.to_excel(writer, sheet_name="Summary", index=False)

combined_sorted = combined.sort_values(["rank", "type"])
combined_sorted.to_excel(writer, sheet_name="All_Ranks_Combined", index=False)

per_rank = combined.pivot_table(
values="time ms", index="type", columns="rank", aggfunc="first"
)
per_rank.to_excel(writer, sheet_name="Per_Rank_Time_ms")

per_rank_pct = combined.pivot_table(
values="percent", index="type", columns="rank", aggfunc="first"
)
per_rank_pct.to_excel(writer, sheet_name="Per_Rank_Percent")

print(f"\nSaved: {output_path}")
print("\nSummary:")
print(aggregated.to_string(index=False))

return 0


def main():
parser = argparse.ArgumentParser(description="Aggregate GPU timeline across ranks")
parser.add_argument("--reports-dir", required=True, help="Path to individual_reports directory")
parser.add_argument("--geo-mean", action="store_true", help="Use geometric mean")

args = parser.parse_args()

return process_gpu_timeline(args.reports_dir, args.geo_mean)


if __name__ == "__main__":
exit(main())
1 change: 1 addition & 0 deletions scripts/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Utils package for shared script utilities
Loading