diff --git a/scripts/tracelens_single_config/README.md b/scripts/tracelens_single_config/README.md new file mode 100644 index 0000000..b5c1321 --- /dev/null +++ b/scripts/tracelens_single_config/README.md @@ -0,0 +1,99 @@ +# TraceLens Single Configuration + +Analyze PyTorch profiler traces from one training run. + +For multiple configs see [../gemm_analysis/README.md](../gemm_analysis/README.md) + +## Quick Start + +```bash +# Complete analysis +python scripts/tracelens_single_config/run_full_analysis.py \ + --baseline /path/to/baseline/traces \ + --test /path/to/test/traces \ + --output /path/to/output \ + --all + +# Skip TraceLens if already done +python scripts/tracelens_single_config/run_full_analysis.py \ + --baseline /path/to/baseline \ + --test /path/to/test \ + --output /path/to/output \ + --all --skip-tracelens +``` + +### Flags: +- `--all` - Run everything including final report +- `--gpu-timeline` - GPU timeline comparison +- `--collective` - NCCL collective comparison +- `--final-report` - Create comprehensive Excel report +- `--skip-tracelens` - Skip TraceLens report generation if already done + +### Output: +- `final_analysis_report.xlsx` - All comparisons with tables and color scale + - Color scale on percent_change: Red (worst) -> White (neutral) -> Green (best) + +### Using --skip-tracelens + +Use the same paths for `--baseline` and `--test`. The script looks for `tracelens_analysis` subdirectory: + +```bash +# Expected structure when using --skip-tracelens +baseline/ +└── tracelens_analysis/ # From previous run + ├── individual_reports/ + └── collective_reports/ + +test/ +└── tracelens_analysis/ # From previous run + ├── individual_reports/ + └── collective_reports/ +``` + +Example: +```bash +# Use same paths, script finds tracelens_analysis inside +python run_full_analysis.py \ + --baseline ~/data/baseline_traces \ + --test ~/data/test_traces \ + --output ~/results \ + --all --skip-tracelens +``` + + +## Expected Structure + +``` +traces/ +└── torch_profiler/ + ├── rank0/ + │ └── trace.json + ├── rank1/ + │ └── trace.json + └── ... +``` + +## What the Master Script Does + +The `run_full_analysis.py` script automatically handles all steps: + +1. Runs TraceLens on baseline and test traces +2. Processes GPU timelines using `process_gpu_timeline.py` +3. Combines reports using `combine_reports.py` +4. Adds comparison sheets using `add_comparison_sheets.py` and `add_collective_comparison.py` +5. Creates final report using `create_final_report.py` + +All post-processing is handled automatically - no need to run individual scripts. + + +## Scripts + +``` +run_full_analysis.py - Master script for complete pipeline +create_final_report.py - Create comprehensive Excel report +run_tracelens_single_config.sh - Main TraceLens report generation +process_gpu_timeline.py - Aggregate GPU timeline across ranks +combine_reports.py - Combine two runs +add_comparison_sheets.py - Add GPU timeline comparison sheets +add_collective_comparison.py - Add collective/NCCL comparison sheets +``` diff --git a/scripts/tracelens_single_config/add_collective_comparison.py b/scripts/tracelens_single_config/add_collective_comparison.py new file mode 100644 index 0000000..ee54d46 --- /dev/null +++ b/scripts/tracelens_single_config/add_collective_comparison.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +import pandas as pd +import argparse +from openpyxl.styles import Color +from openpyxl.formatting.rule import ColorScaleRule + + +def add_collective_comparison_sheets(input_path, output_path): + """ + Add comparison sheets to the combined collective reports. + This function will create comparison sheets for the combined collective reports. + The comparison sheets will contain the comparison of the baseline and saleelk data. + TODO : Later we need to generalize for n runs and get rid of hardcoded data labels + """ + print(f"Loading: {input_path}") + + xl = pd.ExcelFile(input_path) + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + # Copy only summary sheets + for sheet_name in xl.sheet_names: + # Only keep sheets with 'summary' in the name + if "summary" not in sheet_name.lower(): + print(f" Skip {sheet_name} (keeping only summary sheets)") + continue + df = pd.read_excel(input_path, sheet_name=sheet_name) + df.to_excel(writer, sheet_name=sheet_name, index=False) + print(f" Copied {sheet_name}") + + # Process summary sheets for comparison + for sheet_name in ["nccl_summary_implicit_sync", "nccl_summary_long"]: + if sheet_name not in xl.sheet_names: + continue + + df = pd.read_excel(input_path, sheet_name=sheet_name) + + # Separate baseline and saleelk + baseline_df = df[df["source"] == "baseline"].copy() + saleelk_df = df[df["source"] == "saleelk"].copy() + + if len(baseline_df) == 0 or len(saleelk_df) == 0: + print(f" Skip {sheet_name} - missing data") + continue + + # Create comparison dataframe + comparison = pd.DataFrame() + + # Identify key columns for grouping + group_cols = ["Collective name", "dtype", "In msg nelems"] + if not all(col in baseline_df.columns for col in group_cols): + group_cols = ["Collective name"] + + # Group and compare + baseline_grouped = baseline_df.groupby(group_cols, as_index=False) + saleelk_grouped = saleelk_df.groupby(group_cols, as_index=False) + + for name, base_group in baseline_grouped: + # Find matching saleelk group + if isinstance(name, tuple): + mask = pd.Series([True] * len(saleelk_df), index=saleelk_df.index) + for col, val in zip(group_cols, name): + mask = mask & (saleelk_df[col] == val) + else: + mask = saleelk_df[group_cols[0]] == name + + sale_group = saleelk_df.loc[mask] + + if len(sale_group) == 0: + continue + + # Create comparison row + comp_row = {} + + # Copy grouping columns + if isinstance(name, tuple): + for col, val in zip(group_cols, name): + comp_row[col] = val + else: + comp_row[group_cols[0]] = name + + # Compare numeric columns + numeric_cols = [ + "comm_latency_mean", + "algo bw (GB/s)_mean", + "bus bw (GB/s)_mean", + "Total comm latency (ms)", + "count", + ] + + for col in numeric_cols: + if col not in base_group.columns or col not in sale_group.columns: + continue + + base_val = base_group[col].values[0] + sale_val = sale_group[col].values[0] + + comp_row[f"baseline_{col}"] = base_val + comp_row[f"saleelk_{col}"] = sale_val + comp_row[f"diff_{col}"] = sale_val - base_val + + # For latency/time: positive percent_change means faster (less time) + # For bandwidth: positive percent_change means better (more bandwidth) + if "latency" in col.lower() or "time" in col.lower(): + # Lower is better - positive when saleelk is faster + pct_change = ( + (base_val - sale_val) / base_val * 100 + if base_val != 0 + else 0 + ) + comp_row[f"percent_change_{col}"] = pct_change + elif "bw" in col.lower() or "bandwidth" in col.lower(): + # Higher is better - positive when saleelk is better + pct_change = ( + (sale_val - base_val) / base_val * 100 + if base_val != 0 + else 0 + ) + comp_row[f"percent_change_{col}"] = pct_change + + comp_row[f"ratio_{col}"] = ( + sale_val / base_val if base_val != 0 else 0 + ) + + comparison = pd.concat( + [comparison, pd.DataFrame([comp_row])], ignore_index=True + ) + + # Write comparison sheet (shorten name to fit Excel's 31 char limit) + # Replace 'nccl_summary_' with 'nccl_' and '_comparison' with '_cmp' + comparison_sheet_name = ( + sheet_name.replace("nccl_summary_", "nccl_") + "_cmp" + ) + comparison.to_excel(writer, sheet_name=comparison_sheet_name, index=False) + print(f" Added {comparison_sheet_name}") + + # Add conditional formatting to percent_change columns + print(f" Applying conditional formatting to {comparison_sheet_name}...") + + ws = writer.sheets[comparison_sheet_name] + + # Format all percent_change columns with color scale + for col_idx, col in enumerate(comparison.columns, start=1): + if "percent_change" in col: + # Convert column index to Excel letter (A, B, C, ...) + if col_idx <= 26: + col_letter = chr(64 + col_idx) + else: + col_letter = chr(64 + (col_idx // 26)) + chr( + 64 + (col_idx % 26) + ) + + data_range = f"{col_letter}2:{col_letter}{len(comparison)+1}" + + # Color scale: red (min/negative) -> white (0) -> green (max/positive) + ws.conditional_formatting.add( + data_range, + ColorScaleRule( + start_type="min", + start_color="F8696B", # Red + mid_type="num", + mid_value=0, + mid_color="FFFFFF", # White + end_type="max", + end_color="63BE7B", # Green + ), + ) + + print(f" Formatted {col}") + + print(f"\nSaved: {output_path}") + print("\nNew comparison sheets added") + print("percent_change interpretation:") + print(" For latency/time: Positive = faster (less time)") + print(" For bandwidth: Positive = better (more bandwidth)") + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Add comparison sheets to combined collective reports" + ) + parser.add_argument( + "--input", required=True, help="Input combined collective Excel file" + ) + parser.add_argument( + "--output", required=True, help="Output Excel file with comparison sheets" + ) + + args = parser.parse_args() + + return add_collective_comparison_sheets(args.input, args.output) + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/add_comparison_sheets.py b/scripts/tracelens_single_config/add_comparison_sheets.py new file mode 100644 index 0000000..a50114a --- /dev/null +++ b/scripts/tracelens_single_config/add_comparison_sheets.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +import pandas as pd +import argparse +from openpyxl.styles import Color +from openpyxl.formatting.rule import ColorScaleRule + + +def add_comparison_sheets(input_path, output_path): + """ + Create comparison sheets for the combined excel file of individual reports. + """ + print(f"Loading: {input_path}") + + xl = pd.ExcelFile(input_path) + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + # Copy all original sheets + for sheet_name in xl.sheet_names: + df = pd.read_excel(input_path, sheet_name=sheet_name) + df.to_excel(writer, sheet_name=sheet_name, index=False) + print(f" Copied {sheet_name}") + + # Add comparison sheets + all_combined = pd.read_excel(input_path, sheet_name="All_Ranks_Combined") + + # Comparison 1: Side-by-side by rank + baseline_data = all_combined[all_combined["source"] == "baseline"] + saleelk_data = all_combined[all_combined["source"] == "saleelk"] + + comparison_by_rank = pd.DataFrame() + for rank in sorted(baseline_data["rank"].unique()): + base_rank = baseline_data[baseline_data["rank"] == rank].set_index("type") + sale_rank = saleelk_data[saleelk_data["rank"] == rank].set_index("type") + + for metric_type in base_rank.index: + if metric_type in sale_rank.index: + base_time = base_rank.loc[metric_type, "time ms"] + sale_time = sale_rank.loc[metric_type, "time ms"] + ratio_val = sale_time / base_time if base_time != 0 else 0 + # Percentage change: positive when saleelk is faster (takes less time) + pct_change = ( + (base_time - sale_time) / base_time * 100 + if base_time != 0 + else 0 + ) + + # Determine if better or worse + if pct_change > 1: + status = "Better" + elif pct_change < -1: + status = "Worse" + else: + status = "Similar" + + comparison_by_rank = pd.concat( + [ + comparison_by_rank, + pd.DataFrame( + { + "rank": [rank], + "type": [metric_type], + "baseline_time_ms": [base_time], + "saleelk_time_ms": [sale_time], + "diff_time_ms": [sale_time - base_time], + "percent_change": [pct_change], + "status": [status], + "ratio": [ratio_val], + "baseline_percent": [ + base_rank.loc[metric_type, "percent"] + ], + "saleelk_percent": [ + sale_rank.loc[metric_type, "percent"] + ], + "diff_percent": [ + sale_rank.loc[metric_type, "percent"] + - base_rank.loc[metric_type, "percent"] + ], + } + ), + ], + ignore_index=True, + ) + + comparison_by_rank.to_excel( + writer, sheet_name="Comparison_By_Rank", index=False + ) + print(f" Added Comparison_By_Rank") + + # Comparison 2: Summary comparison + summary = pd.read_excel(input_path, sheet_name="Summary") + baseline_summary = summary[summary["source"] == "baseline"].set_index("type") + saleelk_summary = summary[summary["source"] == "saleelk"].set_index("type") + + summary_comparison = pd.DataFrame() + for metric_type in baseline_summary.index: + if metric_type in saleelk_summary.index: + base_time = baseline_summary.loc[metric_type, "time ms"] + sale_time = saleelk_summary.loc[metric_type, "time ms"] + ratio_val = sale_time / base_time if base_time != 0 else 0 + # Percentage change: positive when saleelk is faster (takes less time) + pct_change = ( + (base_time - sale_time) / base_time * 100 if base_time != 0 else 0 + ) + + summary_comparison = pd.concat( + [ + summary_comparison, + pd.DataFrame( + { + "type": [metric_type], + "baseline_time_ms": [base_time], + "saleelk_time_ms": [sale_time], + "diff_time_ms": [sale_time - base_time], + "percent_change": [pct_change], + "ratio": [ratio_val], + "baseline_percent": [ + baseline_summary.loc[metric_type, "percent"] + ], + "saleelk_percent": [ + saleelk_summary.loc[metric_type, "percent"] + ], + "diff_percent": [ + saleelk_summary.loc[metric_type, "percent"] + - baseline_summary.loc[metric_type, "percent"] + ], + } + ), + ], + ignore_index=True, + ) + + summary_comparison.to_excel( + writer, sheet_name="Summary_Comparison", index=False + ) + print(f" Added Summary_Comparison") + + # Add conditional formatting to percent_change columns + print("\n Applying conditional formatting...") + + # Create color scale: Red (negative) -> White (0) -> Green (positive) + + # Format Comparison_By_Rank + ws_rank = writer.sheets["Comparison_By_Rank"] + # Find percent_change column + for col_idx, col in enumerate(comparison_by_rank.columns, start=1): + if col == "percent_change": + col_letter = chr(64 + col_idx) # Convert to Excel column letter + data_range = f"{col_letter}2:{col_letter}{len(comparison_by_rank)+1}" + # Color scale: red (min) -> white (0) -> green (max) + ws_rank.conditional_formatting.add( + data_range, + ColorScaleRule( + start_type="min", + start_color="F8696B", # Red + mid_type="num", + mid_value=0, + mid_color="FFFFFF", # White + end_type="max", + end_color="63BE7B", # Green + ), + ) + print(f" Formatted Comparison_By_Rank column {col}") + break + + # Format Summary_Comparison + ws_summary = writer.sheets["Summary_Comparison"] + for col_idx, col in enumerate(summary_comparison.columns, start=1): + if col == "percent_change": + col_letter = chr(64 + col_idx) + data_range = f"{col_letter}2:{col_letter}{len(summary_comparison)+1}" + # Color scale: red (min) -> white (0) -> green (max) + ws_summary.conditional_formatting.add( + data_range, + ColorScaleRule( + start_type="min", + start_color="F8696B", # Red + mid_type="num", + mid_value=0, + mid_color="FFFFFF", # White + end_type="max", + end_color="63BE7B", # Green + ), + ) + print(f" Formatted Summary_Comparison column {col}") + break + + print(f"\nSaved: {output_path}") + print("\nNew sheets:") + print(" Comparison_By_Rank - Side-by-side comparison for each rank") + print(" Summary_Comparison - Overall comparison") + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Add comparison sheets to combined GPU timeline" + ) + parser.add_argument("--input", required=True, help="Input combined Excel file") + parser.add_argument( + "--output", required=True, help="Output Excel file with comparison sheets" + ) + + args = parser.parse_args() + + return add_comparison_sheets(args.input, args.output) + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/combine_reports.py b/scripts/tracelens_single_config/combine_reports.py new file mode 100644 index 0000000..5d8bfb1 --- /dev/null +++ b/scripts/tracelens_single_config/combine_reports.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import pandas as pd +import argparse +from pathlib import Path + + +def combine_collective_reports(baseline_path, test_path, output_path): + """ + Combine two collective reports into a single Excel file by adding a source column to the data. + """ + + print(f"Loading baseline: {baseline_path}") + baseline_xl = pd.ExcelFile(baseline_path) + + print(f"Loading test: {test_path}") + test_xl = pd.ExcelFile(test_path) + + print(f"\nBaseline sheets: {baseline_xl.sheet_names}") + print(f"Test sheets: {test_xl.sheet_names}") + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + for sheet_name in baseline_xl.sheet_names: + if sheet_name not in test_xl.sheet_names: + print(f" Skip {sheet_name} - not in test file") + continue + + baseline_df = pd.read_excel(baseline_path, sheet_name=sheet_name) + test_df = pd.read_excel(test_path, sheet_name=sheet_name) + + baseline_df["source"] = "baseline" + test_df["source"] = "saleelk" + + combined = pd.concat([baseline_df, test_df], ignore_index=True) + + combined.to_excel(writer, sheet_name=sheet_name, index=False) + print( + f" Combined {sheet_name}: {len(baseline_df)} + {len(test_df)} = {len(combined)} rows" + ) + + print(f"\nSaved: {output_path}") + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Combine two collective reports") + parser.add_argument( + "--baseline", required=True, help="Path to baseline collective_all_ranks.xlsx" + ) + parser.add_argument( + "--test", required=True, help="Path to test collective_all_ranks.xlsx" + ) + parser.add_argument( + "--output", required=True, help="Output path for combined Excel file" + ) + + args = parser.parse_args() + + return combine_collective_reports(args.baseline, args.test, args.output) + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/create_final_html.py b/scripts/tracelens_single_config/create_final_html.py new file mode 100644 index 0000000..74ea0c3 --- /dev/null +++ b/scripts/tracelens_single_config/create_final_html.py @@ -0,0 +1,103 @@ +from pathlib import Path +import base64 +import argparse + +from html_report_config import ( + HTML_HEADER, + HTML_FOOTER, + OVERALL_GPU_CHARTS, + CROSS_RANK_CHARTS, + NCCL_CHARTS, +) + + +def get_image_base64(image_path): + """Read an image file and return its base64-encoded string.""" + try: + with open(image_path, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + except Exception as e: + print(f"Error getting image data from {image_path}: {e}") + return None + + +def create_chart_html(plot_dir, chart_config): + """Generate HTML for a single chart with title, image, and description.""" + image_data = get_image_base64(plot_dir / chart_config["file"]) + if image_data is None: + return "" + return f""" +

{chart_config['name']}

+ {chart_config['alt']} + {chart_config['description']} + """ + + +def create_section_html(title, plot_dir, charts): + """Generate HTML for a complete section with multiple charts.""" + section_html = f""" +

{title}

+ """ + for chart in charts: + section_html += create_chart_html(plot_dir, chart) + return section_html + + +def create_final_html(plot_file_path, output_path): + html_body = """ + + +

Performance Analysis Report

+ +
+ +

Executive Summary

+ +Comparison of GPU performance metrics between baseline and Saleelk +implementations across 8 ranks. +""" + + # Build all sections + sections = [ + create_section_html( + "1. Overall GPU Metrics Comparison", plot_file_path, OVERALL_GPU_CHARTS + ), + create_section_html( + "2. Cross-Rank Performance Comparison", plot_file_path, CROSS_RANK_CHARTS + ), + create_section_html( + "3. NCCL Collective Operations Analysis", plot_file_path, NCCL_CHARTS + ), + ] + + final_html = HTML_HEADER + html_body + "".join(sections) + HTML_FOOTER + with open(output_path, "w") as f: + f.write(final_html) + print(f"Final HTML file created at: {output_path}") + + +def main(): + parser = argparse.ArgumentParser( + description="Create a final HTML file for the analysis report." + ) + parser.add_argument( + "-p", + "--plot-files-directory", + type=Path, + required=True, + help="Path to the plot files direcotry.", + ) + parser.add_argument( + "-o", "--output-html", type=None, default=None, help="Path to the output file." + ) + args = parser.parse_args() + output_path = ( + args.output_html + if args.output_html + else args.plot_files_directory.parent / "final_analysis_report.html" + ) + create_final_html(args.plot_files_directory, output_path) + + +if __name__ == "__main__": + main() diff --git a/scripts/tracelens_single_config/create_final_plots.py b/scripts/tracelens_single_config/create_final_plots.py new file mode 100644 index 0000000..214ec5c --- /dev/null +++ b/scripts/tracelens_single_config/create_final_plots.py @@ -0,0 +1,348 @@ +import pandas as pd +import matplotlib.pyplot as plt +from pathlib import Path +import seaborn as sns + + +def plot_improvement_chart(df, output_path): + fig, ax = plt.subplots(figsize=(10, 6)) + + # Color bars based on positive/negative values + colors = ["#2ecc71" if val > 0 else "#e74c3c" for val in df["Improvement (%)"]] + + bars = ax.barh(df["Metric"], df["Improvement (%)"], color=colors) + ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax.set_axisbelow(True) + + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["left"].set_visible(False) + + # Customize the chart + ax.set_ylabel("Metric", fontsize=12) + ax.set_xlabel("Change (%)", fontsize=12) + ax.set_title( + "GPU Metrics Percentage Change (Test vs Baseline)\n(Positive = Test is better)", + fontsize=14, + fontweight="bold", + ) + + plt.tight_layout() + plt.savefig(output_path / "improvement_chart.png", dpi=150) + plt.close() + + +def plot_abs_time_comparison(df, output_path): + + fig, ax = plt.subplots(figsize=(10, 6)) + + # Set up bar positions + x = range(len(df)) + width = 0.35 + + # Create bars for Baseline and Test + bars1 = ax.bar( + [i - width / 2 for i in x], + df["Baseline"], + width, + label="Baseline", + color="#3498db", + ) + bars2 = ax.bar( + [i + width / 2 for i in x], df["Test"], width, label="Test", color="#e67e22" + ) + + # Add horizontal grid lines only + ax.xaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax.set_axisbelow(True) + + # Remove border/spines + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["left"].set_visible(False) + + # Customize the chart + ax.set_xlabel("Metric Type", fontsize=12) + ax.set_ylabel("Time (ms)", fontsize=12) + ax.set_title( + "GPU Metrics Absolute Time Comparison ", fontsize=14, fontweight="bold" + ) + ax.set_xticks(x) + ax.set_xticklabels(df["Metric"], rotation=45, ha="right") + ax.legend() + + plt.tight_layout() + plt.savefig(output_path / "abs_time_comparison.png", dpi=150) + plt.close() + + +def create_summary_charts(excel_path, output_path): + + # Read the Summary_Dashboard sheet + df = pd.read_excel(excel_path, sheet_name="Summary_Dashboard") + + plot_improvement_chart(df, output_path) + plot_abs_time_comparison(df, output_path) + # Create the horizontal bar chart + + +def plot_gpu_type_by_rank(total_time_df, output_path, title): + # Create the line plot + fig, ax = plt.subplots(figsize=(12, 6)) + + # Plot baseline total_time by rank + ax.plot( + total_time_df["rank"], + total_time_df["baseline_time_ms"], + marker="o", + linewidth=2, + markersize=8, + color="#3498db", + label="Baseline", + ) + + # Plot Saleelk (test) total_time by rank + ax.plot( + total_time_df["rank"], + total_time_df["saleelk_time_ms"], + marker="s", + linewidth=2, + markersize=8, + color="#e67e22", + label="Test", + ) + + # Add horizontal grid lines only + ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax.set_axisbelow(True) + + # Customize the chart + ax.set_xlabel("Rank", fontsize=12) + ax.set_ylabel("Total Time (ms)", fontsize=12) + ax.set_title(f"{title} Comparison across all ranks", fontsize=14, fontweight="bold") + ax.legend() + + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + +def create_gpu_time_accross_all_ranks(excel_path, output_path): + # Read the GPU_ByRank_Cmp sheet + df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp") + + # Filter for total_time rows only + for type in ["total_time", "computation_time", "total_comm_time", "idle_time"]: + total_time_df = df[df["type"] == type] + plot_gpu_type_by_rank(total_time_df, output_path / f"{type}_by_rank.png", type) + + +def plot_gpu_time_change_percentage_summaryby_rank(df, ax): + colors = ["#2ecc71" if val > 0 else "#e74c3c" for val in df["percent_change"]] + bars = ax.bar(df["rank"].astype(str), df["percent_change"], color=colors) + # Add horizontal line at 0 + ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5) + + # Add horizontal grid lines only + ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax.set_axisbelow(True) + ax.set_xlabel("Rank") + ax.set_ylabel("Percent Change (%)") + + +def create_gpu_time_change_percentage_summaryby_rank(excel_path, output_path): + # Read the GPU_ByRank_Cmp sheet + df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp") + + fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(12, 6)) + + row_types = [ + "busy_time", + "computation_time", + "exposed_comm_time", + "exposed_memcpy_time", + "idle_time", + "total_comm_time", + "total_memcpy_time", + "total_time", + ] + # Filter for total_time rows only + for i, type in enumerate(row_types): + type_df = df[df["type"] == type] + plot_gpu_time_change_percentage_summaryby_rank(type_df, ax[i // 4, i % 4]) + ax[i // 4, i % 4].set_title(f"{type}") + plt.tight_layout() + plt.savefig(output_path / "gpu_time_change_percentage_summary_by_rank.png", dpi=150) + plt.close() + + +def create_nccl_charts(excel_path, output_path): + # Read the NCCL_Charst sheet + df = pd.read_excel(excel_path, sheet_name="NCCL_ImplSync_Cmp") + df["label"] = df["Collective name"] + "\n" + df["In msg nelems"].astype(str) + x = range(len(df)) + + plot_item = { + "NCCL Communication Latency": { + "x_label": "Collective Operation (Message Size)", + "y_label": "Communication Latency (ms)", + "y_col_names": ["baseline_comm_latency_mean", "saleelk_comm_latency_mean"], + }, + "NCCL Algorithm Bandwidth": { + "x_label": "Collective Operation (Message Size)", + "y_label": "Algorithm Bandwidth (GB/s)", + "y_col_names": [ + "baseline_algo bw (GB/s)_mean", + "saleelk_algo bw (GB/s)_mean", + ], + }, + "NCCL Bus Bandwidth": { + "x_label": "Collective Operation (Message Size)", + "y_label": "Bus Bandwidth (GB/s)", + "y_col_names": [ + "baseline_bus bw (GB/s)_mean", + "saleelk_bus bw (GB/s)_mean", + ], + }, + "NCCL Total Communication Latency": { + "x_label": "Collective Operation (Message Size)", + "y_label": "Total Communication Latency (ms)", + "y_col_names": [ + "baseline_Total comm latency (ms)", + "saleelk_Total comm latency (ms)", + ], + }, + } + for item in plot_item.keys(): + fig, ax = plt.subplots(figsize=(14, 6)) + width = 0.35 + bars1 = ax.bar( + [i - width / 2 for i in x], + df[plot_item[item]["y_col_names"][0]], + width, + label="Baseline", + color="#3498db", + ) + bars2 = ax.bar( + [i + width / 2 for i in x], + df[plot_item[item]["y_col_names"][1]], + width, + label="Test", + color="#e67e22", + ) + ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax.set_axisbelow(True) + ax.set_xticks(x) + ax.set_xticklabels(df["label"], rotation=45, ha="right", fontsize=8) + ax.set_xlabel(plot_item[item]["x_label"], fontsize=12) + ax.set_ylabel(plot_item[item]["y_label"], fontsize=12) + ax.set_title(f"{item} Comparison", fontsize=14, fontweight="bold") + ax.legend() + plt.tight_layout() + plt.savefig(output_path / f'{item.replace(" ", "_")}_comparison.png', dpi=150) + plt.close() + + percentage_chart_item = { + "Comm Latency": "percent_change_comm_latency_mean", + "Algo BW": "percent_change_algo bw (GB/s)_mean", + "Bus BW": "percent_change_bus bw (GB/s)_mean", + } + fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 6)) + plot_item_index = 0 + for item in percentage_chart_item.keys(): + colors = [ + "#2ecc71" if val > 0 else "#e74c3c" + for val in df[percentage_chart_item[item]] + ] + bars = ax[plot_item_index].barh( + df["In msg nelems"].astype(str), + df[percentage_chart_item[item]], + color=colors, + ) + ax[plot_item_index].yaxis.grid(True, linestyle="--", alpha=0.7, color="gray") + ax[plot_item_index].set_axisbelow(True) + ax[plot_item_index].set_xlabel("Percent Change (%)") + ax[plot_item_index].set_title(f"{item} \n Percent Change (Positive = better)") + plot_item_index += 1 + fig.suptitle( + "NCCL Performance Percentage Change By Message Size", + fontsize=16, + fontweight="bold", + ) + plt.tight_layout() + plt.savefig( + output_path / f"NCCL_Performance_Percentage_Change_comparison.png", dpi=150 + ) + plt.close() + + +def create_gpu_time_heatmap(excel_path, output_path): + # Read the GPU_ByRank_Cmp sheet + df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp") + # Plot the GPU time heatmap + pivot_df = df.pivot(index="type", columns="rank", values="percent_change") + + # Create heatmap + fig, ax = plt.subplots(figsize=(12, 8)) + + sns.heatmap( + pivot_df, + annot=True, # Show values in cells + fmt=".1f", # Format as 1 decimal + cmap="RdYlGn", # Red-Yellow-Green colormap (red=bad, green=good) + center=0, # Center colormap at 0 + linewidths=0.5, # Add gridlines + cbar_kws={"label": "Percent Change (%)"}, + ) + + ax.set_title( + "GPU Metric Percentage Change by Rank (HeatMap) \n (Positive = Better Test)", + fontsize=14, + fontweight="bold", + ) + ax.set_xlabel("Rank", fontsize=12) + ax.set_ylabel("Metric Type", fontsize=12) + + plt.tight_layout() + plt.savefig(output_path / "gpu_time_heatmap.png", dpi=150) + plt.show() + + +def main(): + import argparse + + parser = argparse.ArgumentParser( + description="Generate improvement chart from generated reports" + ) + parser.add_argument( + "--report-path", + type=Path, + default="~/aorta/aorta_single_config/aorta/expt_compare/final_analysis_report.xlsx", + help="Path to the input Excel file (should have Summary_Dashboard sheet)", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="Path to the output directory to save PNG files", + ) + + args = parser.parse_args() + output_path = args.output if args.output else args.report_path.parent / "plots" + output_path.mkdir(exist_ok=True, parents=True) + create_summary_charts(args.report_path, output_path) + print(f"Summary charts saved to: {args.output}") + create_gpu_time_heatmap(args.report_path, output_path) + print(f"GPU time heatmap saved to: {output_path}") + create_gpu_time_accross_all_ranks(args.report_path, output_path) + print(f"GPU time across all runs saved to: {output_path}") + create_gpu_time_change_percentage_summaryby_rank(args.report_path, output_path) + print(f"GPU time change percentage summary by rank saved to: {output_path}") + create_nccl_charts(args.report_path, output_path) + print(f"NCCL communication charts saved to: {output_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/tracelens_single_config/create_final_report.py b/scripts/tracelens_single_config/create_final_report.py new file mode 100644 index 0000000..588edb3 --- /dev/null +++ b/scripts/tracelens_single_config/create_final_report.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +""" +Create final comprehensive report with combined and comparison data. +Raw data sheets are hidden and all data is formatted as Excel tables. +""" +import pandas as pd +import argparse +from pathlib import Path +from openpyxl import load_workbook +from openpyxl.worksheet.table import Table, TableStyleInfo +from openpyxl.styles import Color +from openpyxl.formatting.rule import ColorScaleRule + + +def get_column_letter(col_num): + """Convert column number to Excel column letter.""" + result = "" + while col_num > 0: + col_num -= 1 + result = chr(65 + (col_num % 26)) + result + col_num //= 26 + return result + + +def add_excel_table(worksheet, table_name, start_row=1): + """Convert worksheet data to Excel table format.""" + # Find data range + max_row = worksheet.max_row + max_col = worksheet.max_column + + if max_row <= start_row: + return # No data + + # Ensure all column headers are strings + for col_idx in range(1, max_col + 1): + cell = worksheet.cell(row=start_row, column=col_idx) + if cell.value is not None and not isinstance(cell.value, str): + cell.value = str(cell.value) + + # Create table reference using proper column letter conversion + start_cell = f"A{start_row}" + end_col_letter = get_column_letter(max_col) + end_cell = f"{end_col_letter}{max_row}" + table_ref = f"{start_cell}:{end_cell}" + + # Create table with style + try: + tab = Table(displayName=table_name, ref=table_ref) + style = TableStyleInfo( + name="TableStyleMedium2", + showFirstColumn=False, + showLastColumn=False, + showRowStripes=True, + showColumnStripes=False, + ) + tab.tableStyleInfo = style + + # Add table to worksheet + worksheet.add_table(tab) + except Exception as e: + print(f" Warning: Could not create table {table_name}: {e}") + + +def create_final_report( + gpu_combined, gpu_comparison, coll_combined, coll_comparison, output_file +): + """Create comprehensive report with all data.""" + + print("Creating comprehensive final report...") + print(f" Output: {output_file}") + + # Track sheet info for hiding/organizing + raw_sheets = [] + comparison_sheets = [] + summary_sheets = [] + + with pd.ExcelWriter(output_file, engine="openpyxl") as writer: + + # === GPU TIMELINE SHEETS === + print("\nAdding GPU Timeline sheets...") + + # Read GPU combined (raw data) + gpu_comb_xl = pd.ExcelFile(gpu_combined) + sheet_mapping = { + "Summary": "GPU_Summary_Raw", + "All_Ranks_Combined": "GPU_AllRanks_Raw", + "Per_Rank_Time_ms": "GPU_Time_Raw", + "Per_Rank_Percent": "GPU_Pct_Raw", + } + for sheet_name in gpu_comb_xl.sheet_names: + df = pd.read_excel(gpu_combined, sheet_name=sheet_name) + new_name = sheet_mapping.get(sheet_name, f"GPU_{sheet_name}_Raw") + df.to_excel(writer, sheet_name=new_name, index=False) + raw_sheets.append(new_name) + print(f" Added {new_name} (will be hidden)") + + # Read GPU comparison + gpu_comp_xl = pd.ExcelFile(gpu_comparison) + comp_mapping = { + "Summary_Comparison": "GPU_Summary_Cmp", + "Comparison_By_Rank": "GPU_ByRank_Cmp", + } + for sheet_name in gpu_comp_xl.sheet_names: + if "Comparison" in sheet_name: + df = pd.read_excel(gpu_comparison, sheet_name=sheet_name) + new_name = comp_mapping.get(sheet_name, f"GPU_{sheet_name}") + df.to_excel(writer, sheet_name=new_name, index=False) + comparison_sheets.append(new_name) + print(f" Added {new_name}") + + # === COLLECTIVE SHEETS === + print("\nAdding Collective/NCCL sheets...") + + # Read collective combined (raw data for hidden sheets) + coll_comb_xl = pd.ExcelFile(coll_combined) + coll_mapping = { + "nccl_summary_implicit_sync": "NCCL_ImplSync_Raw", + "nccl_summary_long": "NCCL_Long_Raw", + } + for sheet_name in coll_comb_xl.sheet_names: + if "summary" in sheet_name.lower(): + df = pd.read_excel(coll_combined, sheet_name=sheet_name) + new_name = coll_mapping.get(sheet_name, f"NCCL_{sheet_name}_Raw") + df.to_excel(writer, sheet_name=new_name, index=False) + raw_sheets.append(new_name) + print(f" Added {new_name} (will be hidden)") + + # Read collective comparison + coll_comp_xl = pd.ExcelFile(coll_comparison) + coll_cmp_mapping = { + "nccl_implicit_sync_cmp": "NCCL_ImplSync_Cmp", + "nccl_long_cmp": "NCCL_Long_Cmp", + } + for sheet_name in coll_comp_xl.sheet_names: + if "_cmp" in sheet_name: + df = pd.read_excel(coll_comparison, sheet_name=sheet_name) + new_name = coll_cmp_mapping.get(sheet_name, f"NCCL_{sheet_name}") + df.to_excel(writer, sheet_name=new_name, index=False) + comparison_sheets.append(new_name) + print(f" Added {new_name}") + + # === CREATE SUMMARY DASHBOARD === + print("\nCreating Summary Dashboard...") + + # Read key metrics for dashboard + gpu_summary = pd.read_excel(gpu_comparison, sheet_name="Summary_Comparison") + + # Create dashboard data + dashboard_data = { + "Metric": [], + "Baseline": [], + "Test": [], + "Improvement (%)": [], + "Status": [], + } + + # Add GPU metrics + for _, row in gpu_summary.iterrows(): + metric_type = row["type"] + dashboard_data["Metric"].append(f"GPU_{metric_type}") + dashboard_data["Baseline"].append(round(row["baseline_time_ms"], 2)) + dashboard_data["Test"].append(round(row["saleelk_time_ms"], 2)) + dashboard_data["Improvement (%)"].append(round(row["percent_change"], 2)) + dashboard_data["Status"].append( + "Better" + if row["percent_change"] > 0 + else "Worse" if row["percent_change"] < -1 else "Similar" + ) + + dashboard_df = pd.DataFrame(dashboard_data) + dashboard_df.to_excel(writer, sheet_name="Summary_Dashboard", index=False) + summary_sheets.append("Summary_Dashboard") + print(f" Added Summary_Dashboard") + + # Now modify the workbook to hide sheets and add tables + print("\nApplying formatting...") + wb = load_workbook(output_file) + + # Hide raw data sheets + for sheet_name in raw_sheets: + if sheet_name in wb.sheetnames: + wb[sheet_name].sheet_state = "hidden" + print(f" Hidden: {sheet_name}") + + # Convert all sheets to tables + for sheet_name in wb.sheetnames: + ws = wb[sheet_name] + + # Skip if sheet is empty + if ws.max_row <= 1: + continue + + # Create unique table name from sheet name (remove special chars) + table_name = ( + sheet_name.replace(" ", "_") + .replace("-", "_") + .replace("(", "") + .replace(")", "") + ) + # Ensure name starts with letter and is max 255 chars + if not table_name[0].isalpha(): + table_name = "Tbl_" + table_name + table_name = table_name[:255] + + add_excel_table(ws, table_name) + print(f" Converted to table: {sheet_name}") + + # Add conditional formatting for percent_change columns + if "Cmp" in sheet_name or "Comparison" in sheet_name: + # Find percent_change columns + for col_idx in range(1, ws.max_column + 1): + cell_value = ws.cell(row=1, column=col_idx).value + if cell_value and "percent_change" in str(cell_value): + col_letter = get_column_letter(col_idx) + data_range = f"{col_letter}2:{col_letter}{ws.max_row}" + + # Apply color scale: red (min/negative) -> white (0) -> green (max/positive) + try: + ws.conditional_formatting.add( + data_range, + ColorScaleRule( + start_type="min", + start_color="F8696B", # Red + mid_type="num", + mid_value=0, + mid_color="FFFFFF", # White + end_type="max", + end_color="63BE7B", # Green + ), + ) + print( + f" Applied color scale to {sheet_name} column {cell_value}" + ) + except Exception as e: + print( + f" Warning: Could not apply formatting to {cell_value}: {e}" + ) + + # Move Summary Dashboard to first position + if "Summary_Dashboard" in wb.sheetnames: + dashboard_sheet = wb["Summary_Dashboard"] + wb.move_sheet(dashboard_sheet, offset=-(len(wb.sheetnames) - 1)) + wb.active = 0 # Set dashboard as active sheet + print("\n Moved Summary_Dashboard to first position") + + # Save workbook + wb.save(output_file) + print(f"\nFinal report saved: {output_file}") + + # Report structure + print("\nReport Structure:") + print(" Visible Sheets (Analysis):") + print(f" - Summary_Dashboard") + for sheet in comparison_sheets: + print(f" - {sheet}") + print("\n Hidden Sheets (Raw Data):") + for sheet in raw_sheets: + print(f" - {sheet}") + print("\n All data formatted as Excel tables with filters") + print(" Percent change columns are color-coded (green=better, red=worse)") + print( + "\nUsers can unhide raw data sheets in Excel: Right-click any sheet tab → Unhide" + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Create final comprehensive report with all data", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Example: + python create_final_report.py \\ + --gpu-combined gpu_timeline_combined.xlsx \\ + --gpu-comparison gpu_timeline_comparison.xlsx \\ + --coll-combined collective_combined.xlsx \\ + --coll-comparison collective_comparison.xlsx \\ + --output final_analysis_report.xlsx + """, + ) + + parser.add_argument( + "--gpu-combined", required=True, help="Path to GPU timeline combined file" + ) + parser.add_argument( + "--gpu-comparison", required=True, help="Path to GPU timeline comparison file" + ) + parser.add_argument( + "--coll-combined", required=True, help="Path to collective combined file" + ) + parser.add_argument( + "--coll-comparison", required=True, help="Path to collective comparison file" + ) + parser.add_argument("--output", required=True, help="Output path for final report") + + args = parser.parse_args() + + # Validate inputs + for file_arg in [ + "gpu_combined", + "gpu_comparison", + "coll_combined", + "coll_comparison", + ]: + file_path = getattr(args, file_arg) + if not Path(file_path).exists(): + print(f"Error: File not found: {file_path}") + return 1 + + create_final_report( + args.gpu_combined, + args.gpu_comparison, + args.coll_combined, + args.coll_comparison, + args.output, + ) + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/html_report_config.py b/scripts/tracelens_single_config/html_report_config.py new file mode 100644 index 0000000..5f14d2c --- /dev/null +++ b/scripts/tracelens_single_config/html_report_config.py @@ -0,0 +1,119 @@ +"""Configuration constants for HTML report generation.""" + +HTML_HEADER = """ + + + +Performance Analysis Report + + +""" + +HTML_FOOTER = """ + + +""" + +# Chart configuration for each section +OVERALL_GPU_CHARTS = [ + { + "name": "Percentage Change Overview", + "file": "improvement_chart.png", + "alt": "Summary Chart", + "description": "Overall performance change across key GPU metrics. Positive values indicate improvement (Test is faster/better).", + }, + { + "name": "Absolute Time Comparison", + "file": "abs_time_comparison.png", + "alt": "Absolute Time Comparison", + "description": "Side-by-side comparison of absolute execution times for all GPU metrics.", + }, +] + +CROSS_RANK_CHARTS = [ + { + "name": "Performance Heatmap by Rank", + "file": "gpu_time_heatmap.png", + "alt": "GPU Metric Percentage Change by Rank (HeatMap)", + "description": "Comprehensive heatmap showing percent change for all metrics across all ranks. Green indicates better performance (positive % change).", + }, + { + "name": "Total Time", + "file": "total_time_by_rank.png", + "alt": "total_time by Rank", + "description": "Total execution time comparison across all ranks, showing end-to-end performance characteristics.", + }, + { + "name": "Computation Time", + "file": "computation_time_by_rank.png", + "alt": "computation_time by Rank", + "description": "Pure computation time excluding communication overhead, analyzed per rank.", + }, + { + "name": "Communication Time", + "file": "total_comm_time_by_rank.png", + "alt": "total_comm_time by Rank", + "description": "Total time spent in collective communication operations across ranks.", + }, + { + "name": "Idle Time", + "file": "idle_time_by_rank.png", + "alt": "idle_time by Rank", + "description": "GPU idle time comparison showing resource utilization efficiency per rank.", + }, + { + "name": "Detailed Percentage Change by Metric", + "file": "gpu_time_change_percentage_summaryby_rank.png", + "alt": "gpu_time_change_percentage_summaryby_rank by Rank", + "description": "Detailed breakdown of percent change for each metric type across all ranks.", + }, +] + +NCCL_CHARTS = [ + { + "name": "NCCL Communication Latency", + "file": "NCCL_Communication_Latency_comparison.png", + "alt": "NCCL Communication Latency Comparison", + "description": "Mean communication latency for NCCL allreduce operations across different message sizes", + }, + { + "name": "NCCL Algorithm Bandwidth", + "file": "NCCL_Algorithm_Bandwidth_comparison.png", + "alt": "NCCL Algorithm Bandwidth Comparison", + "description": "Algorithm bandwidth achieved for different message sizes in NCCL collective operations.", + }, + { + "name": "NCCL Bus Bandwidth", + "file": "NCCL_Bus_Bandwidth_comparison.png", + "alt": "NCCL Bus Bandwidth Comparison", + "description": "Bus bandwidth utilization across NCCL operations and message sizes.", + }, + { + "name": "NCCL Performance Percentage Change", + "file": "NCCL_Performance_Percentage_Change_comparison.png", + "alt": "NCCL Performance Percentage Change Comparison", + "description": "Percent change in communication latency and bandwidth metrics for each message sizec configuration", + }, + { + "name": "NCCL Total Communication Latency", + "file": "NCCL_Total_Communication_Latency_comparison.png", + "alt": "NCCL Total Communication Latency Comparison", + "description": "Aggregate communication latency summed across all operations for each message size.", + }, +] diff --git a/scripts/tracelens_single_config/process_gpu_timeline.py b/scripts/tracelens_single_config/process_gpu_timeline.py new file mode 100644 index 0000000..145f817 --- /dev/null +++ b/scripts/tracelens_single_config/process_gpu_timeline.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +import pandas as pd +import numpy as np +import argparse +from pathlib import Path + + +def geometric_mean(values): + values = np.array(values) + values = np.where(values == 0, 1e-10, values) + return np.exp(np.mean(np.log(values))) + + +def process_gpu_timeline(reports_dir, use_geo_mean=False): + """ + Create mean/geometric mean aggregated GPU timeline across all ranks inside tracelens analysis directory. + """ + reports_path = Path(reports_dir) + + if not reports_path.exists(): + print(f"Error: Directory not found: {reports_dir}") + return 1 + + print(f"Processing GPU timeline from: {reports_dir}") + print(f"Aggregation: {'Geometric Mean' if use_geo_mean else 'Arithmetic Mean'}") + + perf_files = sorted(reports_path.glob("perf_rank*.xlsx")) + + if not perf_files: + print("Error: No perf_rank*.xlsx files found") + return 1 + + print(f"Found {len(perf_files)} rank files") + + rank_data = [] + for file_path in perf_files: + rank_num = int(file_path.stem.replace("perf_rank", "")) + try: + df = pd.read_excel(file_path, sheet_name="gpu_timeline") + df["rank"] = rank_num + rank_data.append(df) + print(f" Rank {rank_num}: OK") + except Exception as e: + print(f" Rank {rank_num}: Error - {e}") + + if not rank_data: + print("Error: No valid data loaded") + return 1 + + combined = pd.concat(rank_data, ignore_index=True) + + agg_func = geometric_mean if use_geo_mean else "mean" + aggregated = ( + combined.groupby("type") + .agg({"time ms": agg_func, "percent": agg_func}) + .reset_index() + ) + + aggregated["num_ranks"] = len(perf_files) + + method_suffix = "geomean" if use_geo_mean else "mean" + output_path = reports_path.parent / f"gpu_timeline_summary_{method_suffix}.xlsx" + + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + aggregated.to_excel(writer, sheet_name="Summary", index=False) + + combined_sorted = combined.sort_values(["rank", "type"]) + combined_sorted.to_excel(writer, sheet_name="All_Ranks_Combined", index=False) + + per_rank = combined.pivot_table( + values="time ms", index="type", columns="rank", aggfunc="first" + ) + per_rank.to_excel(writer, sheet_name="Per_Rank_Time_ms") + + per_rank_pct = combined.pivot_table( + values="percent", index="type", columns="rank", aggfunc="first" + ) + per_rank_pct.to_excel(writer, sheet_name="Per_Rank_Percent") + + print(f"\nSaved: {output_path}") + print("\nSummary:") + print(aggregated.to_string(index=False)) + + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Aggregate GPU timeline across ranks") + parser.add_argument( + "--reports-dir", required=True, help="Path to individual_reports directory" + ) + parser.add_argument("--geo-mean", action="store_true", help="Use geometric mean") + + args = parser.parse_args() + + return process_gpu_timeline(args.reports_dir, args.geo_mean) + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tracelens_single_config/run_full_analysis.py b/scripts/tracelens_single_config/run_full_analysis.py new file mode 100644 index 0000000..5385ec4 --- /dev/null +++ b/scripts/tracelens_single_config/run_full_analysis.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python3 +""" +Master script for complete TraceLens analysis pipeline. +Runs analysis on baseline and test traces, then performs all comparisons. +""" +import argparse +import subprocess +import os +import sys +from pathlib import Path + + +def run_command(cmd, description): + """Execute a command and handle errors.""" + print(f"\n{'='*80}") + print(f"{description}") + print(f"{'='*80}") + print(f"Command: {' '.join(cmd)}") + + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + print(f"Error: {description} failed!") + print(f"Stderr: {result.stderr}") + return False + + print(result.stdout) + return True + + +def run_tracelens_analysis( + trace_dir, output_name, individual_only=False, collective_only=False +): + """Run TraceLens analysis on a single trace directory.""" + print(f"\nAnalyzing: {trace_dir}") + + # Build command + script_path = Path(__file__).parent / "run_tracelens_single_config.sh" + cmd = ["bash", str(script_path), trace_dir] + + if individual_only: + cmd.append("--individual-only") + elif collective_only: + cmd.append("--collective-only") + + return run_command(cmd, f"TraceLens analysis for {output_name}") + + +def process_gpu_timeline(reports_dir): + """Process GPU timeline from individual reports.""" + script_path = Path(__file__).parent / "process_gpu_timeline.py" + cmd = ["python3", str(script_path), "--reports-dir", reports_dir] + + return run_command(cmd, "Processing GPU timeline") + + +def combine_reports(baseline_file, test_file, output_file): + """Combine baseline and test reports.""" + script_path = Path(__file__).parent / "combine_reports.py" + cmd = [ + "python3", + str(script_path), + "--baseline", + baseline_file, + "--test", + test_file, + "--output", + output_file, + ] + + return run_command(cmd, f"Combining reports to {output_file}") + + +def add_comparison_sheets(input_file, output_file): + """Add comparison sheets for GPU timeline.""" + script_path = Path(__file__).parent / "add_comparison_sheets.py" + cmd = ["python3", str(script_path), "--input", input_file, "--output", output_file] + + return run_command(cmd, "Adding GPU timeline comparison sheets") + + +def add_collective_comparison(input_file, output_file): + """Add comparison sheets for collective operations.""" + script_path = Path(__file__).parent / "add_collective_comparison.py" + cmd = ["python3", str(script_path), "--input", input_file, "--output", output_file] + + return run_command(cmd, "Adding collective comparison sheets") + + +def create_final_report( + gpu_combined, gpu_comparison, coll_combined, coll_comparison, output_file +): + """Create comprehensive final report with all data.""" + script_path = Path(__file__).parent / "create_final_report.py" + cmd = [ + "python3", + str(script_path), + "--gpu-combined", + gpu_combined, + "--gpu-comparison", + gpu_comparison, + "--coll-combined", + coll_combined, + "--coll-comparison", + coll_comparison, + "--output", + output_file, + ] + + if run_command(cmd, "Creating comprehensive final report"): + plot_script_path = Path(__file__).parent / "create_final_plots.py" + cmd = ["python3", str(plot_script_path), "--report-path", output_file] + if run_command(cmd, "Creating final plots"): + html_script_path = Path(__file__).parent / "create_final_html.py" + cmd = [ + "python3", + str(html_script_path), + "--plot-files-directory", + str(Path(output_file).parent / "plots"), + ] + if run_command(cmd, "Creating final HTML"): + return True + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Complete TraceLens analysis pipeline with comparisons", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Full analysis with everything including final report + python run_full_analysis.py \\ + --baseline /path/to/baseline/traces \\ + --test /path/to/test/traces \\ + --output /path/to/output \\ + --all + + # Only GPU timeline comparison + python run_full_analysis.py \\ + --baseline /path/to/baseline \\ + --test /path/to/test \\ + --output /path/to/output \\ + --gpu-timeline + + # Create final report (skip TraceLens if already done) + python run_full_analysis.py \\ + --baseline /path/to/baseline \\ + --test /path/to/test \\ + --output /path/to/output \\ + --gpu-timeline --collective --final-report \\ + --skip-tracelens + """, + ) + + # Required arguments + parser.add_argument( + "--baseline", required=True, help="Path to baseline trace directory" + ) + parser.add_argument("--test", required=True, help="Path to test trace directory") + parser.add_argument( + "--output", required=True, help="Output directory for comparison results" + ) + + # Analysis options + parser.add_argument( + "--skip-tracelens", + action="store_true", + help="Skip TraceLens report generation (if already done)", + ) + parser.add_argument( + "--individual-only", + action="store_true", + help="Generate only individual reports", + ) + parser.add_argument( + "--collective-only", + action="store_true", + help="Generate only collective reports", + ) + + # Comparison options + parser.add_argument( + "--gpu-timeline", action="store_true", help="Perform GPU timeline comparison" + ) + parser.add_argument( + "--collective", action="store_true", help="Perform collective/NCCL comparison" + ) + parser.add_argument( + "--final-report", + action="store_true", + help="Create comprehensive final report with tables and hidden raw data", + ) + parser.add_argument( + "--all", + action="store_true", + help="Perform all analyses and comparisons including final report", + ) + + args = parser.parse_args() + + # Handle --all flag + if args.all: + args.gpu_timeline = True + args.collective = True + args.final_report = True + + # Validate inputs + baseline_path = Path(args.baseline) + test_path = Path(args.test) + output_path = Path(args.output) + + if not baseline_path.exists(): + print(f"Error: Baseline path not found: {args.baseline}") + return 1 + + if not test_path.exists(): + print(f"Error: Test path not found: {args.test}") + return 1 + + # Create output directory + output_path.mkdir(parents=True, exist_ok=True) + + print("\n" + "=" * 80) + print("TRACELENS FULL ANALYSIS PIPELINE") + print("=" * 80) + print(f"Baseline: {args.baseline}") + print(f"Test: {args.test}") + print(f"Output: {args.output}") + print(f"Options:") + print(f" Skip TraceLens: {args.skip_tracelens}") + print(f" GPU timeline: {args.gpu_timeline}") + print(f" Collective: {args.collective}") + print(f" Final report: {args.final_report}") + + # Step 1: Run TraceLens analysis on both directories + if not args.skip_tracelens: + print("\n" + "=" * 80) + print("STEP 1: Running TraceLens Analysis") + print("=" * 80) + + if not run_tracelens_analysis( + args.baseline, "baseline", args.individual_only, args.collective_only + ): + return 1 + + if not run_tracelens_analysis( + args.test, "test", args.individual_only, args.collective_only + ): + return 1 + else: + print("\nSkipping TraceLens report generation (--skip-tracelens flag)") + + # Determine analysis directories + baseline_analysis = baseline_path / "tracelens_analysis" + test_analysis = test_path / "tracelens_analysis" + + if not baseline_analysis.exists(): + print(f"Error: Baseline analysis not found: {baseline_analysis}") + print("Run without --skip-tracelens flag first") + return 1 + + if not test_analysis.exists(): + print(f"Error: Test analysis not found: {test_analysis}") + print("Run without --skip-tracelens flag first") + return 1 + + # Step 2: GPU Timeline Comparison + if args.gpu_timeline: + print("\n" + "=" * 80) + print("STEP 2: GPU Timeline Comparison") + print("=" * 80) + + # Process GPU timelines + baseline_reports = baseline_analysis / "individual_reports" + test_reports = test_analysis / "individual_reports" + + if not baseline_reports.exists() or not test_reports.exists(): + print( + "Error: Individual reports not found. Run without --individual-only flag" + ) + return 1 + + print("\nProcessing baseline GPU timeline...") + if not process_gpu_timeline(str(baseline_reports)): + return 1 + + print("\nProcessing test GPU timeline...") + if not process_gpu_timeline(str(test_reports)): + return 1 + + # Combine GPU timeline summaries + baseline_gpu = baseline_analysis / "gpu_timeline_summary_mean.xlsx" + test_gpu = test_analysis / "gpu_timeline_summary_mean.xlsx" + combined_gpu = output_path / "gpu_timeline_combined.xlsx" + + if not combine_reports(str(baseline_gpu), str(test_gpu), str(combined_gpu)): + return 1 + + # Add comparison sheets + gpu_comparison = output_path / "gpu_timeline_comparison.xlsx" + if not add_comparison_sheets(str(combined_gpu), str(gpu_comparison)): + return 1 + + print(f"\nGPU timeline comparison saved to: {gpu_comparison}") + + # Step 3: Collective Comparison + if args.collective: + print("\n" + "=" * 80) + print("STEP 3: Collective/NCCL Comparison") + print("=" * 80) + + baseline_collective = ( + baseline_analysis / "collective_reports" / "collective_all_ranks.xlsx" + ) + test_collective = ( + test_analysis / "collective_reports" / "collective_all_ranks.xlsx" + ) + + if not baseline_collective.exists() or not test_collective.exists(): + print( + "Error: Collective reports not found. Run without --collective-only flag" + ) + return 1 + + # Combine collective reports + combined_collective = output_path / "collective_combined.xlsx" + if not combine_reports( + str(baseline_collective), str(test_collective), str(combined_collective) + ): + return 1 + + # Add collective comparison + collective_comparison = output_path / "collective_comparison.xlsx" + if not add_collective_comparison( + str(combined_collective), str(collective_comparison) + ): + return 1 + + print(f"\nCollective comparison saved to: {collective_comparison}") + + # Step 4: Create final comprehensive report + if args.final_report and args.gpu_timeline and args.collective: + print("\n" + "=" * 80) + print("STEP 4: Creating Final Comprehensive Report") + print("=" * 80) + + gpu_combined = output_path / "gpu_timeline_combined.xlsx" + gpu_comparison = output_path / "gpu_timeline_comparison.xlsx" + collective_combined = output_path / "collective_combined.xlsx" + collective_comparison = output_path / "collective_comparison.xlsx" + final_report = output_path / "final_analysis_report.xlsx" + + if not create_final_report( + str(gpu_combined), + str(gpu_comparison), + str(collective_combined), + str(collective_comparison), + str(final_report), + ): + return 1 + + print(f"\nFinal comprehensive report saved to: {final_report}") + print(" - Summary Dashboard as first sheet") + print(" - All comparison sheets visible") + print(" - Raw data sheets hidden (can be unhidden in Excel)") + print(" - All data formatted as Excel tables with filters") + print(" - Color coding applied (green=better, red=worse)") + + # Summary + print("\n" + "=" * 80) + print("ANALYSIS COMPLETE!") + print("=" * 80) + print(f"\nResults saved to: {output_path}") + + files = list(output_path.glob("*.xlsx")) + if files: + print("\nGenerated files:") + for f in sorted(files): + print(f" - {f.name}") + + print("\nAnalysis pipeline completed successfully!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tracelens_single_config/run_tracelens_single_config.sh b/scripts/tracelens_single_config/run_tracelens_single_config.sh new file mode 100644 index 0000000..96831ff --- /dev/null +++ b/scripts/tracelens_single_config/run_tracelens_single_config.sh @@ -0,0 +1,266 @@ +#!/bin/bash +# TraceLens Analysis for Single Configuration (No Sweep) +# Usage: ./run_tracelens_single_config.sh +# +# The script accepts either: +# - Path to parent directory containing torch_profiler/ +# - Path to torch_profiler/ directory directly +# +# Examples: +# ./run_tracelens_single_config.sh /path/to/traces +# ./run_tracelens_single_config.sh /path/to/traces/torch_profiler +# +# Note: Uses GEMM-patched TraceLens wrapper to recognize ROCm Tensile kernels + +set -e + +# Get the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Use patched TraceLens wrapper for GEMM recognition +TRACELENS_WRAPPER="python $SCRIPT_DIR/../tracelens_with_gemm_patch.py" + +# Parse options +RUN_INDIVIDUAL=true +RUN_COLLECTIVE=true + +while [[ $# -gt 0 ]]; do + case $1 in + --individual-only) + RUN_COLLECTIVE=false + shift + ;; + --collective-only) + RUN_INDIVIDUAL=false + shift + ;; + *) + INPUT_DIR="$1" + shift + ;; + esac +done + +# Check if directory provided +if [ -z "$INPUT_DIR" ]; then + echo "Error: Please provide trace directory" + echo "" + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " --individual-only Generate only individual reports" + echo " --collective-only Generate only collective report" + echo "" + echo "Examples:" + echo " $0 /path/to/traces" + echo " $0 /path/to/traces --individual-only" + echo " $0 /path/to/traces --collective-only" + echo "" + exit 1 +fi + +# Verify directory exists +if [ ! -d "$INPUT_DIR" ]; then + echo "Error: Directory not found: $INPUT_DIR" + exit 1 +fi + +# Auto-detect structure: is this torch_profiler/ or its parent? +TORCH_PROF_DIR="" +BASE_DIR="" + +# Check if INPUT_DIR contains rank directories (i.e., it IS torch_profiler/) +if find "$INPUT_DIR" -maxdepth 1 -type d -name "rank*" | grep -q .; then + TORCH_PROF_DIR="$INPUT_DIR" + BASE_DIR=$(dirname "$INPUT_DIR") + echo "Detected torch_profiler directory: $TORCH_PROF_DIR" +# Check if INPUT_DIR contains torch_profiler/ subdirectory +elif [ -d "$INPUT_DIR/torch_profiler" ]; then + TORCH_PROF_DIR="$INPUT_DIR/torch_profiler" + BASE_DIR="$INPUT_DIR" + echo "Found torch_profiler subdirectory: $TORCH_PROF_DIR" +else + echo "Error: Cannot find rank directories in expected structure" + echo "" + echo "Expected one of:" + echo " 1. Directory with rank0/, rank1/, ... subdirectories (torch_profiler/)" + echo " 2. Parent directory containing torch_profiler/rank0/, rank1/, ..." + echo "" + echo "Provided: $INPUT_DIR" + exit 1 +fi + +echo "════════════════════════════════════════════════════════════════" +echo " TraceLens Analysis - Single Configuration" +echo "════════════════════════════════════════════════════════════════" +echo "" +echo "Input directory: $INPUT_DIR" +echo "Torch profiler traces: $TORCH_PROF_DIR" +echo "" + +# Create output directory in the base directory +OUTPUT_DIR="${BASE_DIR}/tracelens_analysis" +mkdir -p "$OUTPUT_DIR" +mkdir -p "$OUTPUT_DIR/individual_reports" +mkdir -p "$OUTPUT_DIR/collective_reports" + +# Detect number of ranks +NUM_RANKS=$(find "$TORCH_PROF_DIR" -maxdepth 1 -type d -name "rank*" | wc -l) + +if [ $NUM_RANKS -eq 0 ]; then + echo "Error: No rank directories found in $TORCH_PROF_DIR" + exit 1 +fi + +echo "Detected $NUM_RANKS ranks" + +# Show sample trace files +echo "" +echo "Sample trace files:" +for rank_dir in $(find "$TORCH_PROF_DIR" -maxdepth 1 -type d -name "rank*" | sort | head -3); do + rank_name=$(basename "$rank_dir") + trace_file=$(find "$rank_dir" -name "*.json" | head -1) + if [ -n "$trace_file" ]; then + echo " $rank_name: $(basename "$trace_file")" + fi +done +if [ "$RUN_INDIVIDUAL" = true ]; then + echo "" + echo "════════════════════════════════════════════════════════════════" + echo "Step 1: Generating Individual Performance Reports" + echo "════════════════════════════════════════════════════════════════" + echo "" + +# Process each rank +for rank_idx in $(seq 0 $((NUM_RANKS - 1))); do + # Try multiple directory naming patterns + RANK_DIR="" + if [ -d "$TORCH_PROF_DIR/rank${rank_idx}" ]; then + RANK_DIR="$TORCH_PROF_DIR/rank${rank_idx}" + elif [ -d "$TORCH_PROF_DIR/rank_${rank_idx}" ]; then + RANK_DIR="$TORCH_PROF_DIR/rank_${rank_idx}" + elif [ -d "$TORCH_PROF_DIR/rank_$(printf "%02d" $rank_idx)" ]; then + RANK_DIR="$TORCH_PROF_DIR/rank_$(printf "%02d" $rank_idx)" + fi + + if [ -z "$RANK_DIR" ] || [ ! -d "$RANK_DIR" ]; then + echo " Skip rank ${rank_idx} - directory not found" + continue + fi + + # Find trace file + TRACE=$(find "$RANK_DIR" -name "*.json" -type f | head -1) + + if [ -z "$TRACE" ]; then + echo "⚠️ Skip rank ${rank_idx} - no trace file found" + continue + fi + + OUTPUT="$OUTPUT_DIR/individual_reports/perf_rank${rank_idx}.xlsx" + + echo "Processing rank ${rank_idx}..." + echo " Trace: $(basename "$TRACE")" + + $TRACELENS_WRAPPER generate_perf_report \ + --profile_json_path "$TRACE" \ + --output_xlsx_path "$OUTPUT" \ + --include_unlinked_kernels \ + --short_kernel_study \ + --short_kernel_threshold_us 50 \ + --topk_ops 100 \ + --topk_roofline_ops 100 + + echo " Done: $OUTPUT" + echo "" +done + +fi + +if [ "$RUN_COLLECTIVE" = true ]; then + echo "" + echo "════════════════════════════════════════════════════════════════" + echo "Step 2: Generating Multi-Rank Collective Report" + echo "════════════════════════════════════════════════════════════════" + echo "" + +# Find a sample trace file to get the filename pattern +SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank0" -name "*.json" -type f | head -1) +if [ -z "$SAMPLE_TRACE" ]; then + # Try alternative rank naming + SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank_0" -name "*.json" -type f | head -1) +fi + +if [ -z "$SAMPLE_TRACE" ]; then + # Try rank_00 + SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank_00" -name "*.json" -type f | head -1) +fi + +if [ -n "$SAMPLE_TRACE" ]; then + OUTPUT="$OUTPUT_DIR/collective_reports/collective_all_ranks.xlsx" + + echo "Generating collective report for all $NUM_RANKS ranks..." + + # Create symlinks with consistent names for collective report + for rank_idx in $(seq 0 $((NUM_RANKS - 1))); do + RANK_DIR="$TORCH_PROF_DIR/rank${rank_idx}" + if [ -d "$RANK_DIR" ]; then + TRACE=$(find "$RANK_DIR" -name "*.json" -type f | head -1) + if [ -n "$TRACE" ]; then + ln -sf "$(basename "$TRACE")" "$RANK_DIR/trace.json" + fi + fi + done + + echo " Trace pattern: rank*/trace.json" + + $TRACELENS_WRAPPER generate_multi_rank_collective \ + --trace_pattern "$TORCH_PROF_DIR/rank*/trace.json" \ + --world_size $NUM_RANKS \ + --output_xlsx_path "$OUTPUT" \ + --detailed_analysis \ + --use_multiprocessing + + echo " Done: $OUTPUT" +else + echo " Could not generate collective report - no trace files found" +fi + +fi + +echo "" +echo "════════════════════════════════════════════════════════════════" +echo "Analysis Complete!" +echo "════════════════════════════════════════════════════════════════" +echo "" +echo "📁 Results saved to:" +echo " $OUTPUT_DIR/" +echo "" + +# Count generated reports +INDIV_COUNT=$(find "$OUTPUT_DIR/individual_reports" -name "*.xlsx" 2>/dev/null | wc -l) +COLL_COUNT=$(find "$OUTPUT_DIR/collective_reports" -name "*.xlsx" 2>/dev/null | wc -l) + +echo "Generated reports:" +echo " Individual reports (per rank): $INDIV_COUNT" +echo " Collective reports (all ranks): $COLL_COUNT" +echo "" + +echo "📊 Report Files:" +echo "" +echo "Individual Performance Reports:" +if [ $INDIV_COUNT -gt 0 ]; then + find "$OUTPUT_DIR/individual_reports" -name "*.xlsx" | sort | sed 's/^/ /' +else + echo " (none generated)" +fi +echo "" + +echo "Collective Reports:" +if [ $COLL_COUNT -gt 0 ]; then + find "$OUTPUT_DIR/collective_reports" -name "*.xlsx" | sed 's/^/ /' +else + echo " (none generated)" +fi + +echo "" +echo "Done!"