diff --git a/scripts/tracelens_single_config/combine_reports.py b/scripts/tracelens_single_config/combine_reports.py new file mode 100644 index 0000000..1fc2eba --- /dev/null +++ b/scripts/tracelens_single_config/combine_reports.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import pandas as pd +import argparse +from pathlib import Path + + +def combine_collective_reports(baseline_path, test_path, output_path): + """ + Combine two collective reports into a single Excel file by adding a source column to the data. + """ + + print(f"Loading baseline: {baseline_path}") + baseline_xl = pd.ExcelFile(baseline_path) + + print(f"Loading test: {test_path}") + test_xl = pd.ExcelFile(test_path) + + print(f"\nBaseline sheets: {baseline_xl.sheet_names}") + print(f"Test sheets: {test_xl.sheet_names}") + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + for sheet_name in baseline_xl.sheet_names: + if sheet_name not in test_xl.sheet_names: + print(f" Skip {sheet_name} - not in test file") + continue + + baseline_df = pd.read_excel(baseline_path, sheet_name=sheet_name) + test_df = pd.read_excel(test_path, sheet_name=sheet_name) + + baseline_df["source"] = "baseline" + test_df["source"] = "test" + + combined = pd.concat([baseline_df, test_df], ignore_index=True) + + combined.to_excel(writer, sheet_name=sheet_name, index=False) + print( + f" Combined {sheet_name}: {len(baseline_df)} + {len(test_df)} = {len(combined)} rows" + ) + + print(f"\nSaved: {output_path}") + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Combine two collective reports") + parser.add_argument( + "--baseline", required=True, help="Path to baseline collective_all_ranks.xlsx" + ) + parser.add_argument( + "--test", required=True, help="Path to test collective_all_ranks.xlsx" + ) + parser.add_argument( + "--output", required=True, help="Output path for combined Excel file" + ) + + args = parser.parse_args() + + return combine_collective_reports(args.baseline, args.test, args.output) + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/process_gpu_timeline.py b/scripts/tracelens_single_config/process_gpu_timeline.py new file mode 100644 index 0000000..145f817 --- /dev/null +++ b/scripts/tracelens_single_config/process_gpu_timeline.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +import pandas as pd +import numpy as np +import argparse +from pathlib import Path + + +def geometric_mean(values): + values = np.array(values) + values = np.where(values == 0, 1e-10, values) + return np.exp(np.mean(np.log(values))) + + +def process_gpu_timeline(reports_dir, use_geo_mean=False): + """ + Create mean/geometric mean aggregated GPU timeline across all ranks inside tracelens analysis directory. + """ + reports_path = Path(reports_dir) + + if not reports_path.exists(): + print(f"Error: Directory not found: {reports_dir}") + return 1 + + print(f"Processing GPU timeline from: {reports_dir}") + print(f"Aggregation: {'Geometric Mean' if use_geo_mean else 'Arithmetic Mean'}") + + perf_files = sorted(reports_path.glob("perf_rank*.xlsx")) + + if not perf_files: + print("Error: No perf_rank*.xlsx files found") + return 1 + + print(f"Found {len(perf_files)} rank files") + + rank_data = [] + for file_path in perf_files: + rank_num = int(file_path.stem.replace("perf_rank", "")) + try: + df = pd.read_excel(file_path, sheet_name="gpu_timeline") + df["rank"] = rank_num + rank_data.append(df) + print(f" Rank {rank_num}: OK") + except Exception as e: + print(f" Rank {rank_num}: Error - {e}") + + if not rank_data: + print("Error: No valid data loaded") + return 1 + + combined = pd.concat(rank_data, ignore_index=True) + + agg_func = geometric_mean if use_geo_mean else "mean" + aggregated = ( + combined.groupby("type") + .agg({"time ms": agg_func, "percent": agg_func}) + .reset_index() + ) + + aggregated["num_ranks"] = len(perf_files) + + method_suffix = "geomean" if use_geo_mean else "mean" + output_path = reports_path.parent / f"gpu_timeline_summary_{method_suffix}.xlsx" + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + aggregated.to_excel(writer, sheet_name="Summary", index=False) + + combined_sorted = combined.sort_values(["rank", "type"]) + combined_sorted.to_excel(writer, sheet_name="All_Ranks_Combined", index=False) + + per_rank = combined.pivot_table( + values="time ms", index="type", columns="rank", aggfunc="first" + ) + per_rank.to_excel(writer, sheet_name="Per_Rank_Time_ms") + + per_rank_pct = combined.pivot_table( + values="percent", index="type", columns="rank", aggfunc="first" + ) + per_rank_pct.to_excel(writer, sheet_name="Per_Rank_Percent") + + print(f"\nSaved: {output_path}") + print("\nSummary:") + print(aggregated.to_string(index=False)) + + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Aggregate GPU timeline across ranks") + parser.add_argument( + "--reports-dir", required=True, help="Path to individual_reports directory" + ) + parser.add_argument("--geo-mean", action="store_true", help="Use geometric mean") + + args = parser.parse_args() + + return process_gpu_timeline(args.reports_dir, args.geo_mean) + + +if __name__ == "__main__": + exit(main())