diff --git a/scripts/tracelens_single_config/README.md b/scripts/tracelens_single_config/README.md
new file mode 100644
index 0000000..b5c1321
--- /dev/null
+++ b/scripts/tracelens_single_config/README.md
@@ -0,0 +1,99 @@
+# TraceLens Single Configuration
+
+Analyze PyTorch profiler traces from one training run.
+
+For multiple configs see [../gemm_analysis/README.md](../gemm_analysis/README.md)
+
+## Quick Start
+
+```bash
+# Complete analysis
+python scripts/tracelens_single_config/run_full_analysis.py \
+  --baseline /path/to/baseline/traces \
+  --test /path/to/test/traces \
+  --output /path/to/output \
+  --all
+
+# Skip TraceLens if already done
+python scripts/tracelens_single_config/run_full_analysis.py \
+  --baseline /path/to/baseline \
+  --test /path/to/test \
+  --output /path/to/output \
+  --all --skip-tracelens
+```
+
+### Flags:
+- `--all` - Run everything including final report
+- `--gpu-timeline` - GPU timeline comparison
+- `--collective` - NCCL collective comparison
+- `--final-report` - Create comprehensive Excel report
+- `--skip-tracelens` - Skip TraceLens report generation if already done
+
+### Output:
+- `final_analysis_report.xlsx` - All comparisons with tables and color scale
+  - Color scale on percent_change: Red (worst) -> White (neutral) -> Green (best)
+
+### Using --skip-tracelens
+
+Use the same paths for `--baseline` and `--test`. The script looks for `tracelens_analysis` subdirectory:
+
+```bash
+# Expected structure when using --skip-tracelens
+baseline/
+└── tracelens_analysis/    # From previous run
+    ├── individual_reports/
+    └── collective_reports/
+
+test/
+└── tracelens_analysis/    # From previous run
+    ├── individual_reports/
+    └── collective_reports/
+```
+
+Example:
+```bash
+# Use same paths, script finds tracelens_analysis inside
+python run_full_analysis.py \
+  --baseline ~/data/baseline_traces \
+  --test ~/data/test_traces \
+  --output ~/results \
+  --all --skip-tracelens
+```
+
+
+## Expected Structure
+
+```
+traces/
+└── torch_profiler/
+    ├── rank0/
+    │   └── trace.json
+    ├── rank1/
+    │   └── trace.json
+    └── ...
+```
+
+## What the Master Script Does
+
+The `run_full_analysis.py` script automatically handles all steps:
+
+1. Runs TraceLens on baseline and test traces
+2. Processes GPU timelines using `process_gpu_timeline.py`
+3. Combines reports using `combine_reports.py`
+4. Adds comparison sheets using `add_comparison_sheets.py` and `add_collective_comparison.py`
+5. Creates final report using `create_final_report.py`
+
+All post-processing is handled automatically - no need to run individual scripts.
+
+
+## Scripts
+
+```
+run_full_analysis.py            - Master script for complete pipeline
+create_final_report.py          - Create comprehensive Excel report
+run_tracelens_single_config.sh  - Main TraceLens report generation
+process_gpu_timeline.py         - Aggregate GPU timeline across ranks
+combine_reports.py              - Combine two runs
+add_comparison_sheets.py        - Add GPU timeline comparison sheets
+add_collective_comparison.py    - Add collective/NCCL comparison sheets
+```
diff --git a/scripts/tracelens_single_config/add_collective_comparison.py b/scripts/tracelens_single_config/add_collective_comparison.py
new file mode 100644
index 0000000..ee54d46
--- /dev/null
+++ b/scripts/tracelens_single_config/add_collective_comparison.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+import pandas as pd
+import argparse
+from openpyxl.styles import Color
+from openpyxl.formatting.rule import ColorScaleRule
+
+
+def add_collective_comparison_sheets(input_path, output_path):
+    """
+    Add comparison sheets to the combined collective reports.
+    This function will create comparison sheets for the combined collective reports.
+    The comparison sheets will contain the comparison of the baseline and saleelk data.
+    TODO : Later we need to generalize for n runs and get rid of hardcoded data labels
+    """
+    print(f"Loading: {input_path}")
+
+    xl = pd.ExcelFile(input_path)
+
+    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
+        # Copy only summary sheets
+        for sheet_name in xl.sheet_names:
+            # Only keep sheets with 'summary' in the name
+            if "summary" not in sheet_name.lower():
+                print(f"  Skip {sheet_name} (keeping only summary sheets)")
+                continue
+            df = pd.read_excel(input_path, sheet_name=sheet_name)
+            df.to_excel(writer, sheet_name=sheet_name, index=False)
+            print(f"  Copied {sheet_name}")
+
+        # Process summary sheets for comparison
+        for sheet_name in ["nccl_summary_implicit_sync", "nccl_summary_long"]:
+            if sheet_name not in xl.sheet_names:
+                continue
+
+            df = pd.read_excel(input_path, sheet_name=sheet_name)
+
+            # Separate baseline and saleelk
+            baseline_df = df[df["source"] == "baseline"].copy()
+            saleelk_df = df[df["source"] == "saleelk"].copy()
+
+            if len(baseline_df) == 0 or len(saleelk_df) == 0:
+                print(f"  Skip {sheet_name} - missing data")
+                continue
+
+            # Create comparison dataframe
+            comparison = pd.DataFrame()
+
+            # Identify key columns for grouping
+            group_cols = ["Collective name", "dtype", "In msg nelems"]
+            if not all(col in baseline_df.columns for col in group_cols):
+                group_cols = ["Collective name"]
+
+            # Group and compare
+            baseline_grouped = baseline_df.groupby(group_cols, as_index=False)
+            saleelk_grouped = saleelk_df.groupby(group_cols, as_index=False)
+
+            for name, base_group in baseline_grouped:
+                # Find matching saleelk group
+                if isinstance(name, tuple):
+                    mask = pd.Series([True] * len(saleelk_df), index=saleelk_df.index)
+                    for col, val in zip(group_cols, name):
+                        mask = mask & (saleelk_df[col] == val)
+                else:
+                    mask = saleelk_df[group_cols[0]] == name
+
+                sale_group = saleelk_df.loc[mask]
+
+                if len(sale_group) == 0:
+                    continue
+
+                # Create comparison row
+                comp_row = {}
+
+                # Copy grouping columns
+                if isinstance(name, tuple):
+                    for col, val in zip(group_cols, name):
+                        comp_row[col] = val
+                else:
+                    comp_row[group_cols[0]] = name
+
+                # Compare numeric columns
+                numeric_cols = [
+                    "comm_latency_mean",
+                    "algo bw (GB/s)_mean",
+                    "bus bw (GB/s)_mean",
+                    "Total comm latency (ms)",
+                    "count",
+                ]
+
+                for col in numeric_cols:
+                    if col not in base_group.columns or col not in sale_group.columns:
+                        continue
+
+                    base_val = base_group[col].values[0]
+                    sale_val = sale_group[col].values[0]
+
+                    comp_row[f"baseline_{col}"] = base_val
+                    comp_row[f"saleelk_{col}"] = sale_val
+                    comp_row[f"diff_{col}"] = sale_val - base_val
+
+                    # For latency/time: positive percent_change means faster (less time)
+                    # For bandwidth: positive percent_change means better (more bandwidth)
+                    if "latency" in col.lower() or "time" in col.lower():
+                        # Lower is better - positive when saleelk is faster
+                        pct_change = (
+                            (base_val - sale_val) / base_val * 100
+                            if base_val != 0
+                            else 0
+                        )
+                        comp_row[f"percent_change_{col}"] = pct_change
+                    elif "bw" in col.lower() or "bandwidth" in col.lower():
+                        # Higher is better - positive when saleelk is better
+                        pct_change = (
+                            (sale_val - base_val) / base_val * 100
+                            if base_val != 0
+                            else 0
+                        )
+                        comp_row[f"percent_change_{col}"] = pct_change
+
+                    comp_row[f"ratio_{col}"] = (
+                        sale_val / base_val if base_val != 0 else 0
+                    )
+
+                comparison = pd.concat(
+                    [comparison, pd.DataFrame([comp_row])], ignore_index=True
+                )
+
+            # Write comparison sheet (shorten name to fit Excel's 31 char limit)
+            # Replace 'nccl_summary_' with 'nccl_' and '_comparison' with '_cmp'
+            comparison_sheet_name = (
+                sheet_name.replace("nccl_summary_", "nccl_") + "_cmp"
+            )
+            comparison.to_excel(writer, sheet_name=comparison_sheet_name, index=False)
+            print(f"  Added {comparison_sheet_name}")
+
+            # Add conditional formatting to percent_change columns
+            print(f"    Applying conditional formatting to {comparison_sheet_name}...")
+
+            ws = writer.sheets[comparison_sheet_name]
+
+            # Format all percent_change columns with color scale
+            for col_idx, col in enumerate(comparison.columns, start=1):
+                if "percent_change" in col:
+                    # Convert column index to Excel letter (A, B, C, ...)
+                    if col_idx <= 26:
+                        col_letter = chr(64 + col_idx)
+                    else:
+                        col_letter = chr(64 + (col_idx // 26)) + chr(
+                            64 + (col_idx % 26)
+                        )
+
+                    data_range = f"{col_letter}2:{col_letter}{len(comparison)+1}"
+
+                    # Color scale: red (min/negative) -> white (0) -> green (max/positive)
+                    ws.conditional_formatting.add(
+                        data_range,
+                        ColorScaleRule(
+                            start_type="min",
+                            start_color="F8696B",  # Red
+                            mid_type="num",
+                            mid_value=0,
+                            mid_color="FFFFFF",  # White
+                            end_type="max",
+                            end_color="63BE7B",  # Green
+                        ),
+                    )
+
+                    print(f"      Formatted {col}")
+
+    print(f"\nSaved: {output_path}")
+    print("\nNew comparison sheets added")
+    print("percent_change interpretation:")
+    print("  For latency/time: Positive = faster (less time)")
+    print("  For bandwidth: Positive = better (more bandwidth)")
+    return 0
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Add comparison sheets to combined collective reports"
+    )
+    parser.add_argument(
+        "--input", required=True, help="Input combined collective Excel file"
+    )
+    parser.add_argument(
+        "--output", required=True, help="Output Excel file with comparison sheets"
+    )
+
+    args = parser.parse_args()
+
+    return add_collective_comparison_sheets(args.input, args.output)
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/scripts/tracelens_single_config/add_comparison_sheets.py b/scripts/tracelens_single_config/add_comparison_sheets.py
new file mode 100644
index 0000000..a50114a
--- /dev/null
+++ b/scripts/tracelens_single_config/add_comparison_sheets.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+import pandas as pd
+import argparse
+from openpyxl.styles import Color
+from openpyxl.formatting.rule import ColorScaleRule
+
+
+def add_comparison_sheets(input_path, output_path):
+    """
+    Create comparison sheets for the combined excel file of individual reports.
+    """
+    print(f"Loading: {input_path}")
+
+    xl = pd.ExcelFile(input_path)
+
+    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
+        # Copy all original sheets
+        for sheet_name in xl.sheet_names:
+            df = pd.read_excel(input_path, sheet_name=sheet_name)
+            df.to_excel(writer, sheet_name=sheet_name, index=False)
+            print(f"  Copied {sheet_name}")
+
+        # Add comparison sheets
+        all_combined = pd.read_excel(input_path, sheet_name="All_Ranks_Combined")
+
+        # Comparison 1: Side-by-side by rank
+        baseline_data = all_combined[all_combined["source"] == "baseline"]
+        saleelk_data = all_combined[all_combined["source"] == "saleelk"]
+
+        comparison_by_rank = pd.DataFrame()
+        for rank in sorted(baseline_data["rank"].unique()):
+            base_rank = baseline_data[baseline_data["rank"] == rank].set_index("type")
+            sale_rank = saleelk_data[saleelk_data["rank"] == rank].set_index("type")
+
+            for metric_type in base_rank.index:
+                if metric_type in sale_rank.index:
+                    base_time = base_rank.loc[metric_type, "time ms"]
+                    sale_time = sale_rank.loc[metric_type, "time ms"]
+                    ratio_val = sale_time / base_time if base_time != 0 else 0
+                    # Percentage change: positive when saleelk is faster (takes less time)
+                    pct_change = (
+                        (base_time - sale_time) / base_time * 100
+                        if base_time != 0
+                        else 0
+                    )
+
+                    # Determine if better or worse
+                    if pct_change > 1:
+                        status = "Better"
+                    elif pct_change < -1:
+                        status = "Worse"
+                    else:
+                        status = "Similar"
+
+                    comparison_by_rank = pd.concat(
+                        [
+                            comparison_by_rank,
+                            pd.DataFrame(
+                                {
+                                    "rank": [rank],
+                                    "type": [metric_type],
+                                    "baseline_time_ms": [base_time],
+                                    "saleelk_time_ms": [sale_time],
+                                    "diff_time_ms": [sale_time - base_time],
+                                    "percent_change": [pct_change],
+                                    "status": [status],
+                                    "ratio": [ratio_val],
+                                    "baseline_percent": [
+                                        base_rank.loc[metric_type, "percent"]
+                                    ],
+                                    "saleelk_percent": [
+                                        sale_rank.loc[metric_type, "percent"]
+                                    ],
+                                    "diff_percent": [
+                                        sale_rank.loc[metric_type, "percent"]
+                                        - base_rank.loc[metric_type, "percent"]
+                                    ],
+                                }
+                            ),
+                        ],
+                        ignore_index=True,
+                    )
+
+        comparison_by_rank.to_excel(
+            writer, sheet_name="Comparison_By_Rank", index=False
+        )
+        print(f"  Added Comparison_By_Rank")
+
+        # Comparison 2: Summary comparison
+        summary = pd.read_excel(input_path, sheet_name="Summary")
+        baseline_summary = summary[summary["source"] == "baseline"].set_index("type")
+        saleelk_summary = summary[summary["source"] == "saleelk"].set_index("type")
+
+        summary_comparison = pd.DataFrame()
+        for metric_type in baseline_summary.index:
+            if metric_type in saleelk_summary.index:
+                base_time = baseline_summary.loc[metric_type, "time ms"]
+                sale_time = saleelk_summary.loc[metric_type, "time ms"]
+                ratio_val = sale_time / base_time if base_time != 0 else 0
+                # Percentage change: positive when saleelk is faster (takes less time)
+                pct_change = (
+                    (base_time - sale_time) / base_time * 100 if base_time != 0 else 0
+                )
+
+                summary_comparison = pd.concat(
+                    [
+                        summary_comparison,
+                        pd.DataFrame(
+                            {
+                                "type": [metric_type],
+                                "baseline_time_ms": [base_time],
+                                "saleelk_time_ms": [sale_time],
+                                "diff_time_ms": [sale_time - base_time],
+                                "percent_change": [pct_change],
+                                "ratio": [ratio_val],
+                                "baseline_percent": [
+                                    baseline_summary.loc[metric_type, "percent"]
+                                ],
+                                "saleelk_percent": [
+                                    saleelk_summary.loc[metric_type, "percent"]
+                                ],
+                                "diff_percent": [
+                                    saleelk_summary.loc[metric_type, "percent"]
+                                    - baseline_summary.loc[metric_type, "percent"]
+                                ],
+                            }
+                        ),
+                    ],
+                    ignore_index=True,
+                )
+
+        summary_comparison.to_excel(
+            writer, sheet_name="Summary_Comparison", index=False
+        )
+        print(f"  Added Summary_Comparison")
+
+        # Add conditional formatting to percent_change columns
+        print("\n  Applying conditional formatting...")
+
+        # Create color scale: Red (negative) -> White (0) -> Green (positive)
+
+        # Format Comparison_By_Rank
+        ws_rank = writer.sheets["Comparison_By_Rank"]
+        # Find percent_change column
+        for col_idx, col in enumerate(comparison_by_rank.columns, start=1):
+            if col == "percent_change":
+                col_letter = chr(64 + col_idx)  # Convert to Excel column letter
+                data_range = f"{col_letter}2:{col_letter}{len(comparison_by_rank)+1}"
+                # Color scale: red (min) -> white (0) -> green (max)
+                ws_rank.conditional_formatting.add(
+                    data_range,
+                    ColorScaleRule(
+                        start_type="min",
+                        start_color="F8696B",  # Red
+                        mid_type="num",
+                        mid_value=0,
+                        mid_color="FFFFFF",  # White
+                        end_type="max",
+                        end_color="63BE7B",  # Green
+                    ),
+                )
+                print(f"    Formatted Comparison_By_Rank column {col}")
+                break
+
+        # Format Summary_Comparison
+        ws_summary = writer.sheets["Summary_Comparison"]
+        for col_idx, col in enumerate(summary_comparison.columns, start=1):
+            if col == "percent_change":
+                col_letter = chr(64 + col_idx)
+                data_range = f"{col_letter}2:{col_letter}{len(summary_comparison)+1}"
+                # Color scale: red (min) -> white (0) -> green (max)
+                ws_summary.conditional_formatting.add(
+                    data_range,
+                    ColorScaleRule(
+                        start_type="min",
+                        start_color="F8696B",  # Red
+                        mid_type="num",
+                        mid_value=0,
+                        mid_color="FFFFFF",  # White
+                        end_type="max",
+                        end_color="63BE7B",  # Green
+                    ),
+                )
+                print(f"    Formatted Summary_Comparison column {col}")
+                break
+
+    print(f"\nSaved: {output_path}")
+    print("\nNew sheets:")
+    print("  Comparison_By_Rank - Side-by-side comparison for each rank")
+    print("  Summary_Comparison - Overall comparison")
+    return 0
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Add comparison sheets to combined GPU timeline"
+    )
+    parser.add_argument("--input", required=True, help="Input combined Excel file")
+    parser.add_argument(
+        "--output", required=True, help="Output Excel file with comparison sheets"
+    )
+
+    args = parser.parse_args()
+
+    return add_comparison_sheets(args.input, args.output)
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/scripts/tracelens_single_config/combine_reports.py b/scripts/tracelens_single_config/combine_reports.py
new file mode 100644
index 0000000..5d8bfb1
--- /dev/null
+++ b/scripts/tracelens_single_config/combine_reports.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+import pandas as pd
+import argparse
+from pathlib import Path
+
+
+def combine_collective_reports(baseline_path, test_path, output_path):
+    """
+    Combine two collective reports into a single Excel file by adding a source column to the data.
+    """
+
+    print(f"Loading baseline: {baseline_path}")
+    baseline_xl = pd.ExcelFile(baseline_path)
+
+    print(f"Loading test: {test_path}")
+    test_xl = pd.ExcelFile(test_path)
+
+    print(f"\nBaseline sheets: {baseline_xl.sheet_names}")
+    print(f"Test sheets: {test_xl.sheet_names}")
+
+    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
+        for sheet_name in baseline_xl.sheet_names:
+            if sheet_name not in test_xl.sheet_names:
+                print(f"  Skip {sheet_name} - not in test file")
+                continue
+
+            baseline_df = pd.read_excel(baseline_path, sheet_name=sheet_name)
+            test_df = pd.read_excel(test_path, sheet_name=sheet_name)
+
+            baseline_df["source"] = "baseline"
+            test_df["source"] = "saleelk"
+
+            combined = pd.concat([baseline_df, test_df], ignore_index=True)
+
+            combined.to_excel(writer, sheet_name=sheet_name, index=False)
+            print(
+                f"  Combined {sheet_name}: {len(baseline_df)} + {len(test_df)} = {len(combined)} rows"
+            )
+
+    print(f"\nSaved: {output_path}")
+    return 0
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Combine two collective reports")
+    parser.add_argument(
+        "--baseline", required=True, help="Path to baseline collective_all_ranks.xlsx"
+    )
+    parser.add_argument(
+        "--test", required=True, help="Path to test collective_all_ranks.xlsx"
+    )
+    parser.add_argument(
+        "--output", required=True, help="Output path for combined Excel file"
+    )
+
+    args = parser.parse_args()
+
+    return combine_collective_reports(args.baseline, args.test, args.output)
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/scripts/tracelens_single_config/create_final_html.py b/scripts/tracelens_single_config/create_final_html.py
new file mode 100644
index 0000000..74ea0c3
--- /dev/null
+++ b/scripts/tracelens_single_config/create_final_html.py
@@ -0,0 +1,103 @@
+from pathlib import Path
+import base64
+import argparse
+
+from html_report_config import (
+    HTML_HEADER,
+    HTML_FOOTER,
+    OVERALL_GPU_CHARTS,
+    CROSS_RANK_CHARTS,
+    NCCL_CHARTS,
+)
+
+
+def get_image_base64(image_path):
+    """Read an image file and return its base64-encoded string."""
+    try:
+        with open(image_path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
+    except Exception as e:
+        print(f"Error getting image data from {image_path}: {e}")
+        return None
+
+
+def create_chart_html(plot_dir, chart_config):
+    """Generate HTML for a single chart with title, image, and description."""
+    image_data = get_image_base64(plot_dir / chart_config["file"])
+    if image_data is None:
+        return ""
+    return f"""
+    <h4> {chart_config['name']} </h4>
+    <img src="data:image/png;base64,{image_data}" alt="{chart_config['alt']}" class="chart-image">
+    {chart_config['description']}
+    """
+
+
+def create_section_html(title, plot_dir, charts):
+    """Generate HTML for a complete section with multiple charts."""
+    section_html = f"""
+    <h3> {title} </h3>
+    """
+    for chart in charts:
+        section_html += create_chart_html(plot_dir, chart)
+    return section_html
+
+
+def create_final_html(plot_file_path, output_path):
+    html_body = """
+<body>
+
+<h1> Performance Analysis Report </h1>
+
+<hr>
+
+<h2> Executive Summary </h2>
+
+Comparison of GPU performance metrics between baseline and Saleelk
+implementations across 8 ranks.
+"""
+
+    # Build all sections
+    sections = [
+        create_section_html(
+            "1. Overall GPU Metrics Comparison", plot_file_path, OVERALL_GPU_CHARTS
+        ),
+        create_section_html(
+            "2. Cross-Rank Performance Comparison", plot_file_path, CROSS_RANK_CHARTS
+        ),
+        create_section_html(
+            "3. NCCL Collective Operations Analysis", plot_file_path, NCCL_CHARTS
+        ),
+    ]
+
+    final_html = HTML_HEADER + html_body + "".join(sections) + HTML_FOOTER
+    with open(output_path, "w") as f:
+        f.write(final_html)
+    print(f"Final HTML file created at: {output_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create a final HTML file for the analysis report."
+    )
+    parser.add_argument(
+        "-p",
+        "--plot-files-directory",
+        type=Path,
+        required=True,
+        help="Path to the plot files direcotry.",
+    )
+    parser.add_argument(
+        "-o", "--output-html", type=None, default=None, help="Path to the output file."
+    )
+    args = parser.parse_args()
+    output_path = (
+        args.output_html
+        if args.output_html
+        else args.plot_files_directory.parent / "final_analysis_report.html"
+    )
+    create_final_html(args.plot_files_directory, output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/tracelens_single_config/create_final_plots.py b/scripts/tracelens_single_config/create_final_plots.py
new file mode 100644
index 0000000..214ec5c
--- /dev/null
+++ b/scripts/tracelens_single_config/create_final_plots.py
@@ -0,0 +1,348 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from pathlib import Path
+import seaborn as sns
+
+
+def plot_improvement_chart(df, output_path):
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Color bars based on positive/negative values
+    colors = ["#2ecc71" if val > 0 else "#e74c3c" for val in df["Improvement (%)"]]
+
+    bars = ax.barh(df["Metric"], df["Improvement (%)"], color=colors)
+    ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+    ax.set_axisbelow(True)
+
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+
+    # Customize the chart
+    ax.set_ylabel("Metric", fontsize=12)
+    ax.set_xlabel("Change (%)", fontsize=12)
+    ax.set_title(
+        "GPU Metrics Percentage Change (Test vs Baseline)\n(Positive = Test is better)",
+        fontsize=14,
+        fontweight="bold",
+    )
+
+    plt.tight_layout()
+    plt.savefig(output_path / "improvement_chart.png", dpi=150)
+    plt.close()
+
+
+def plot_abs_time_comparison(df, output_path):
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Set up bar positions
+    x = range(len(df))
+    width = 0.35
+
+    # Create bars for Baseline and Test
+    bars1 = ax.bar(
+        [i - width / 2 for i in x],
+        df["Baseline"],
+        width,
+        label="Baseline",
+        color="#3498db",
+    )
+    bars2 = ax.bar(
+        [i + width / 2 for i in x], df["Test"], width, label="Test", color="#e67e22"
+    )
+
+    # Add horizontal grid lines only
+    ax.xaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+    ax.set_axisbelow(True)
+
+    # Remove border/spines
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+
+    # Customize the chart
+    ax.set_xlabel("Metric Type", fontsize=12)
+    ax.set_ylabel("Time (ms)", fontsize=12)
+    ax.set_title(
+        "GPU Metrics Absolute Time Comparison ", fontsize=14, fontweight="bold"
+    )
+    ax.set_xticks(x)
+    ax.set_xticklabels(df["Metric"], rotation=45, ha="right")
+    ax.legend()
+
+    plt.tight_layout()
+    plt.savefig(output_path / "abs_time_comparison.png", dpi=150)
+    plt.close()
+
+
+def create_summary_charts(excel_path, output_path):
+
+    # Read the Summary_Dashboard sheet
+    df = pd.read_excel(excel_path, sheet_name="Summary_Dashboard")
+
+    plot_improvement_chart(df, output_path)
+    plot_abs_time_comparison(df, output_path)
+    # Create the horizontal bar chart
+
+
+def plot_gpu_type_by_rank(total_time_df, output_path, title):
+    # Create the line plot
+    fig, ax = plt.subplots(figsize=(12, 6))
+
+    # Plot baseline total_time by rank
+    ax.plot(
+        total_time_df["rank"],
+        total_time_df["baseline_time_ms"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#3498db",
+        label="Baseline",
+    )
+
+    # Plot Saleelk (test) total_time by rank
+    ax.plot(
+        total_time_df["rank"],
+        total_time_df["saleelk_time_ms"],
+        marker="s",
+        linewidth=2,
+        markersize=8,
+        color="#e67e22",
+        label="Test",
+    )
+
+    # Add horizontal grid lines only
+    ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+    ax.set_axisbelow(True)
+
+    # Customize the chart
+    ax.set_xlabel("Rank", fontsize=12)
+    ax.set_ylabel("Total Time (ms)", fontsize=12)
+    ax.set_title(f"{title} Comparison across all ranks", fontsize=14, fontweight="bold")
+    ax.legend()
+
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=150)
+    plt.close()
+
+
+def create_gpu_time_accross_all_ranks(excel_path, output_path):
+    # Read the GPU_ByRank_Cmp sheet
+    df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp")
+
+    # Filter for total_time rows only
+    for type in ["total_time", "computation_time", "total_comm_time", "idle_time"]:
+        total_time_df = df[df["type"] == type]
+        plot_gpu_type_by_rank(total_time_df, output_path / f"{type}_by_rank.png", type)
+
+
+def plot_gpu_time_change_percentage_summaryby_rank(df, ax):
+    colors = ["#2ecc71" if val > 0 else "#e74c3c" for val in df["percent_change"]]
+    bars = ax.bar(df["rank"].astype(str), df["percent_change"], color=colors)
+    # Add horizontal line at 0
+    ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
+
+    # Add horizontal grid lines only
+    ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+    ax.set_axisbelow(True)
+    ax.set_xlabel("Rank")
+    ax.set_ylabel("Percent Change (%)")
+
+
+def create_gpu_time_change_percentage_summaryby_rank(excel_path, output_path):
+    # Read the GPU_ByRank_Cmp sheet
+    df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp")
+
+    fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(12, 6))
+
+    row_types = [
+        "busy_time",
+        "computation_time",
+        "exposed_comm_time",
+        "exposed_memcpy_time",
+        "idle_time",
+        "total_comm_time",
+        "total_memcpy_time",
+        "total_time",
+    ]
+    # Filter for total_time rows only
+    for i, type in enumerate(row_types):
+        type_df = df[df["type"] == type]
+        plot_gpu_time_change_percentage_summaryby_rank(type_df, ax[i // 4, i % 4])
+        ax[i // 4, i % 4].set_title(f"{type}")
+    plt.tight_layout()
+    plt.savefig(output_path / "gpu_time_change_percentage_summary_by_rank.png", dpi=150)
+    plt.close()
+
+
+def create_nccl_charts(excel_path, output_path):
+    # Read the NCCL_Charst sheet
+    df = pd.read_excel(excel_path, sheet_name="NCCL_ImplSync_Cmp")
+    df["label"] = df["Collective name"] + "\n" + df["In msg nelems"].astype(str)
+    x = range(len(df))
+
+    plot_item = {
+        "NCCL Communication Latency": {
+            "x_label": "Collective Operation (Message Size)",
+            "y_label": "Communication Latency (ms)",
+            "y_col_names": ["baseline_comm_latency_mean", "saleelk_comm_latency_mean"],
+        },
+        "NCCL Algorithm Bandwidth": {
+            "x_label": "Collective Operation (Message Size)",
+            "y_label": "Algorithm Bandwidth (GB/s)",
+            "y_col_names": [
+                "baseline_algo bw (GB/s)_mean",
+                "saleelk_algo bw (GB/s)_mean",
+            ],
+        },
+        "NCCL Bus Bandwidth": {
+            "x_label": "Collective Operation (Message Size)",
+            "y_label": "Bus Bandwidth (GB/s)",
+            "y_col_names": [
+                "baseline_bus bw (GB/s)_mean",
+                "saleelk_bus bw (GB/s)_mean",
+            ],
+        },
+        "NCCL Total Communication Latency": {
+            "x_label": "Collective Operation (Message Size)",
+            "y_label": "Total Communication Latency (ms)",
+            "y_col_names": [
+                "baseline_Total comm latency (ms)",
+                "saleelk_Total comm latency (ms)",
+            ],
+        },
+    }
+    for item in plot_item.keys():
+        fig, ax = plt.subplots(figsize=(14, 6))
+        width = 0.35
+        bars1 = ax.bar(
+            [i - width / 2 for i in x],
+            df[plot_item[item]["y_col_names"][0]],
+            width,
+            label="Baseline",
+            color="#3498db",
+        )
+        bars2 = ax.bar(
+            [i + width / 2 for i in x],
+            df[plot_item[item]["y_col_names"][1]],
+            width,
+            label="Test",
+            color="#e67e22",
+        )
+        ax.yaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+        ax.set_axisbelow(True)
+        ax.set_xticks(x)
+        ax.set_xticklabels(df["label"], rotation=45, ha="right", fontsize=8)
+        ax.set_xlabel(plot_item[item]["x_label"], fontsize=12)
+        ax.set_ylabel(plot_item[item]["y_label"], fontsize=12)
+        ax.set_title(f"{item} Comparison", fontsize=14, fontweight="bold")
+        ax.legend()
+        plt.tight_layout()
+        plt.savefig(output_path / f'{item.replace(" ", "_")}_comparison.png', dpi=150)
+        plt.close()
+
+    percentage_chart_item = {
+        "Comm Latency": "percent_change_comm_latency_mean",
+        "Algo BW": "percent_change_algo bw (GB/s)_mean",
+        "Bus BW": "percent_change_bus bw (GB/s)_mean",
+    }
+    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 6))
+    plot_item_index = 0
+    for item in percentage_chart_item.keys():
+        colors = [
+            "#2ecc71" if val > 0 else "#e74c3c"
+            for val in df[percentage_chart_item[item]]
+        ]
+        bars = ax[plot_item_index].barh(
+            df["In msg nelems"].astype(str),
+            df[percentage_chart_item[item]],
+            color=colors,
+        )
+        ax[plot_item_index].yaxis.grid(True, linestyle="--", alpha=0.7, color="gray")
+        ax[plot_item_index].set_axisbelow(True)
+        ax[plot_item_index].set_xlabel("Percent Change (%)")
+        ax[plot_item_index].set_title(f"{item} \n Percent Change (Positive = better)")
+        plot_item_index += 1
+    fig.suptitle(
+        "NCCL Performance Percentage Change By Message Size",
+        fontsize=16,
+        fontweight="bold",
+    )
+    plt.tight_layout()
+    plt.savefig(
+        output_path / f"NCCL_Performance_Percentage_Change_comparison.png", dpi=150
+    )
+    plt.close()
+
+
+def create_gpu_time_heatmap(excel_path, output_path):
+    # Read the GPU_ByRank_Cmp sheet
+    df = pd.read_excel(excel_path, sheet_name="GPU_ByRank_Cmp")
+    # Plot the GPU time heatmap
+    pivot_df = df.pivot(index="type", columns="rank", values="percent_change")
+
+    # Create heatmap
+    fig, ax = plt.subplots(figsize=(12, 8))
+
+    sns.heatmap(
+        pivot_df,
+        annot=True,  # Show values in cells
+        fmt=".1f",  # Format as 1 decimal
+        cmap="RdYlGn",  # Red-Yellow-Green colormap (red=bad, green=good)
+        center=0,  # Center colormap at 0
+        linewidths=0.5,  # Add gridlines
+        cbar_kws={"label": "Percent Change (%)"},
+    )
+
+    ax.set_title(
+        "GPU Metric Percentage Change by Rank (HeatMap) \n (Positive = Better Test)",
+        fontsize=14,
+        fontweight="bold",
+    )
+    ax.set_xlabel("Rank", fontsize=12)
+    ax.set_ylabel("Metric Type", fontsize=12)
+
+    plt.tight_layout()
+    plt.savefig(output_path / "gpu_time_heatmap.png", dpi=150)
+    plt.show()
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Generate improvement chart from generated reports"
+    )
+    parser.add_argument(
+        "--report-path",
+        type=Path,
+        default="~/aorta/aorta_single_config/aorta/expt_compare/final_analysis_report.xlsx",
+        help="Path to the input Excel file (should have Summary_Dashboard sheet)",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help="Path to the output directory to save PNG files",
+    )
+
+    args = parser.parse_args()
+    output_path = args.output if args.output else args.report_path.parent / "plots"
+    output_path.mkdir(exist_ok=True, parents=True)
+    create_summary_charts(args.report_path, output_path)
+    print(f"Summary charts saved to: {args.output}")
+    create_gpu_time_heatmap(args.report_path, output_path)
+    print(f"GPU time heatmap saved to: {output_path}")
+    create_gpu_time_accross_all_ranks(args.report_path, output_path)
+    print(f"GPU time across all runs saved to: {output_path}")
+    create_gpu_time_change_percentage_summaryby_rank(args.report_path, output_path)
+    print(f"GPU time change percentage summary by rank saved to: {output_path}")
+    create_nccl_charts(args.report_path, output_path)
+    print(f"NCCL communication charts saved to: {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/tracelens_single_config/create_final_report.py b/scripts/tracelens_single_config/create_final_report.py
new file mode 100644
index 0000000..588edb3
--- /dev/null
+++ b/scripts/tracelens_single_config/create_final_report.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+"""
+Create final comprehensive report with combined and comparison data.
+Raw data sheets are hidden and all data is formatted as Excel tables.
+"""
+import pandas as pd
+import argparse
+from pathlib import Path
+from openpyxl import load_workbook
+from openpyxl.worksheet.table import Table, TableStyleInfo
+from openpyxl.styles import Color
+from openpyxl.formatting.rule import ColorScaleRule
+
+
+def get_column_letter(col_num):
+    """Convert column number to Excel column letter."""
+    result = ""
+    while col_num > 0:
+        col_num -= 1
+        result = chr(65 + (col_num % 26)) + result
+        col_num //= 26
+    return result
+
+
+def add_excel_table(worksheet, table_name, start_row=1):
+    """Convert worksheet data to Excel table format."""
+    # Find data range
+    max_row = worksheet.max_row
+    max_col = worksheet.max_column
+
+    if max_row <= start_row:
+        return  # No data
+
+    # Ensure all column headers are strings
+    for col_idx in range(1, max_col + 1):
+        cell = worksheet.cell(row=start_row, column=col_idx)
+        if cell.value is not None and not isinstance(cell.value, str):
+            cell.value = str(cell.value)
+
+    # Create table reference using proper column letter conversion
+    start_cell = f"A{start_row}"
+    end_col_letter = get_column_letter(max_col)
+    end_cell = f"{end_col_letter}{max_row}"
+    table_ref = f"{start_cell}:{end_cell}"
+
+    # Create table with style
+    try:
+        tab = Table(displayName=table_name, ref=table_ref)
+        style = TableStyleInfo(
+            name="TableStyleMedium2",
+            showFirstColumn=False,
+            showLastColumn=False,
+            showRowStripes=True,
+            showColumnStripes=False,
+        )
+        tab.tableStyleInfo = style
+
+        # Add table to worksheet
+        worksheet.add_table(tab)
+    except Exception as e:
+        print(f"    Warning: Could not create table {table_name}: {e}")
+
+
+def create_final_report(
+    gpu_combined, gpu_comparison, coll_combined, coll_comparison, output_file
+):
+    """Create comprehensive report with all data."""
+
+    print("Creating comprehensive final report...")
+    print(f"  Output: {output_file}")
+
+    # Track sheet info for hiding/organizing
+    raw_sheets = []
+    comparison_sheets = []
+    summary_sheets = []
+
+    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
+
+        # === GPU TIMELINE SHEETS ===
+        print("\nAdding GPU Timeline sheets...")
+
+        # Read GPU combined (raw data)
+        gpu_comb_xl = pd.ExcelFile(gpu_combined)
+        sheet_mapping = {
+            "Summary": "GPU_Summary_Raw",
+            "All_Ranks_Combined": "GPU_AllRanks_Raw",
+            "Per_Rank_Time_ms": "GPU_Time_Raw",
+            "Per_Rank_Percent": "GPU_Pct_Raw",
+        }
+        for sheet_name in gpu_comb_xl.sheet_names:
+            df = pd.read_excel(gpu_combined, sheet_name=sheet_name)
+            new_name = sheet_mapping.get(sheet_name, f"GPU_{sheet_name}_Raw")
+            df.to_excel(writer, sheet_name=new_name, index=False)
+            raw_sheets.append(new_name)
+            print(f"  Added {new_name} (will be hidden)")
+
+        # Read GPU comparison
+        gpu_comp_xl = pd.ExcelFile(gpu_comparison)
+        comp_mapping = {
+            "Summary_Comparison": "GPU_Summary_Cmp",
+            "Comparison_By_Rank": "GPU_ByRank_Cmp",
+        }
+        for sheet_name in gpu_comp_xl.sheet_names:
+            if "Comparison" in sheet_name:
+                df = pd.read_excel(gpu_comparison, sheet_name=sheet_name)
+                new_name = comp_mapping.get(sheet_name, f"GPU_{sheet_name}")
+                df.to_excel(writer, sheet_name=new_name, index=False)
+                comparison_sheets.append(new_name)
+                print(f"  Added {new_name}")
+
+        # === COLLECTIVE SHEETS ===
+        print("\nAdding Collective/NCCL sheets...")
+
+        # Read collective combined (raw data for hidden sheets)
+        coll_comb_xl = pd.ExcelFile(coll_combined)
+        coll_mapping = {
+            "nccl_summary_implicit_sync": "NCCL_ImplSync_Raw",
+            "nccl_summary_long": "NCCL_Long_Raw",
+        }
+        for sheet_name in coll_comb_xl.sheet_names:
+            if "summary" in sheet_name.lower():
+                df = pd.read_excel(coll_combined, sheet_name=sheet_name)
+                new_name = coll_mapping.get(sheet_name, f"NCCL_{sheet_name}_Raw")
+                df.to_excel(writer, sheet_name=new_name, index=False)
+                raw_sheets.append(new_name)
+                print(f"  Added {new_name} (will be hidden)")
+
+        # Read collective comparison
+        coll_comp_xl = pd.ExcelFile(coll_comparison)
+        coll_cmp_mapping = {
+            "nccl_implicit_sync_cmp": "NCCL_ImplSync_Cmp",
+            "nccl_long_cmp": "NCCL_Long_Cmp",
+        }
+        for sheet_name in coll_comp_xl.sheet_names:
+            if "_cmp" in sheet_name:
+                df = pd.read_excel(coll_comparison, sheet_name=sheet_name)
+                new_name = coll_cmp_mapping.get(sheet_name, f"NCCL_{sheet_name}")
+                df.to_excel(writer, sheet_name=new_name, index=False)
+                comparison_sheets.append(new_name)
+                print(f"  Added {new_name}")
+
+        # === CREATE SUMMARY DASHBOARD ===
+        print("\nCreating Summary Dashboard...")
+
+        # Read key metrics for dashboard
+        gpu_summary = pd.read_excel(gpu_comparison, sheet_name="Summary_Comparison")
+
+        # Create dashboard data
+        dashboard_data = {
+            "Metric": [],
+            "Baseline": [],
+            "Test": [],
+            "Improvement (%)": [],
+            "Status": [],
+        }
+
+        # Add GPU metrics
+        for _, row in gpu_summary.iterrows():
+            metric_type = row["type"]
+            dashboard_data["Metric"].append(f"GPU_{metric_type}")
+            dashboard_data["Baseline"].append(round(row["baseline_time_ms"], 2))
+            dashboard_data["Test"].append(round(row["saleelk_time_ms"], 2))
+            dashboard_data["Improvement (%)"].append(round(row["percent_change"], 2))
+            dashboard_data["Status"].append(
+                "Better"
+                if row["percent_change"] > 0
+                else "Worse" if row["percent_change"] < -1 else "Similar"
+            )
+
+        dashboard_df = pd.DataFrame(dashboard_data)
+        dashboard_df.to_excel(writer, sheet_name="Summary_Dashboard", index=False)
+        summary_sheets.append("Summary_Dashboard")
+        print(f"  Added Summary_Dashboard")
+
+    # Now modify the workbook to hide sheets and add tables
+    print("\nApplying formatting...")
+    wb = load_workbook(output_file)
+
+    # Hide raw data sheets
+    for sheet_name in raw_sheets:
+        if sheet_name in wb.sheetnames:
+            wb[sheet_name].sheet_state = "hidden"
+            print(f"  Hidden: {sheet_name}")
+
+    # Convert all sheets to tables
+    for sheet_name in wb.sheetnames:
+        ws = wb[sheet_name]
+
+        # Skip if sheet is empty
+        if ws.max_row <= 1:
+            continue
+
+        # Create unique table name from sheet name (remove special chars)
+        table_name = (
+            sheet_name.replace(" ", "_")
+            .replace("-", "_")
+            .replace("(", "")
+            .replace(")", "")
+        )
+        # Ensure name starts with letter and is max 255 chars
+        if not table_name[0].isalpha():
+            table_name = "Tbl_" + table_name
+        table_name = table_name[:255]
+
+        add_excel_table(ws, table_name)
+        print(f"  Converted to table: {sheet_name}")
+
+        # Add conditional formatting for percent_change columns
+        if "Cmp" in sheet_name or "Comparison" in sheet_name:
+            # Find percent_change columns
+            for col_idx in range(1, ws.max_column + 1):
+                cell_value = ws.cell(row=1, column=col_idx).value
+                if cell_value and "percent_change" in str(cell_value):
+                    col_letter = get_column_letter(col_idx)
+                    data_range = f"{col_letter}2:{col_letter}{ws.max_row}"
+
+                    # Apply color scale: red (min/negative) -> white (0) -> green (max/positive)
+                    try:
+                        ws.conditional_formatting.add(
+                            data_range,
+                            ColorScaleRule(
+                                start_type="min",
+                                start_color="F8696B",  # Red
+                                mid_type="num",
+                                mid_value=0,
+                                mid_color="FFFFFF",  # White
+                                end_type="max",
+                                end_color="63BE7B",  # Green
+                            ),
+                        )
+                        print(
+                            f"    Applied color scale to {sheet_name} column {cell_value}"
+                        )
+                    except Exception as e:
+                        print(
+                            f"    Warning: Could not apply formatting to {cell_value}: {e}"
+                        )
+
+    # Move Summary Dashboard to first position
+    if "Summary_Dashboard" in wb.sheetnames:
+        dashboard_sheet = wb["Summary_Dashboard"]
+        wb.move_sheet(dashboard_sheet, offset=-(len(wb.sheetnames) - 1))
+        wb.active = 0  # Set dashboard as active sheet
+        print("\n  Moved Summary_Dashboard to first position")
+
+    # Save workbook
+    wb.save(output_file)
+    print(f"\nFinal report saved: {output_file}")
+
+    # Report structure
+    print("\nReport Structure:")
+    print("  Visible Sheets (Analysis):")
+    print(f"    - Summary_Dashboard")
+    for sheet in comparison_sheets:
+        print(f"    - {sheet}")
+    print("\n  Hidden Sheets (Raw Data):")
+    for sheet in raw_sheets:
+        print(f"    - {sheet}")
+    print("\n  All data formatted as Excel tables with filters")
+    print("  Percent change columns are color-coded (green=better, red=worse)")
+    print(
+        "\nUsers can unhide raw data sheets in Excel: Right-click any sheet tab → Unhide"
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create final comprehensive report with all data",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Example:
+  python create_final_report.py \\
+    --gpu-combined gpu_timeline_combined.xlsx \\
+    --gpu-comparison gpu_timeline_comparison.xlsx \\
+    --coll-combined collective_combined.xlsx \\
+    --coll-comparison collective_comparison.xlsx \\
+    --output final_analysis_report.xlsx
+        """,
+    )
+
+    parser.add_argument(
+        "--gpu-combined", required=True, help="Path to GPU timeline combined file"
+    )
+    parser.add_argument(
+        "--gpu-comparison", required=True, help="Path to GPU timeline comparison file"
+    )
+    parser.add_argument(
+        "--coll-combined", required=True, help="Path to collective combined file"
+    )
+    parser.add_argument(
+        "--coll-comparison", required=True, help="Path to collective comparison file"
+    )
+    parser.add_argument("--output", required=True, help="Output path for final report")
+
+    args = parser.parse_args()
+
+    # Validate inputs
+    for file_arg in [
+        "gpu_combined",
+        "gpu_comparison",
+        "coll_combined",
+        "coll_comparison",
+    ]:
+        file_path = getattr(args, file_arg)
+        if not Path(file_path).exists():
+            print(f"Error: File not found: {file_path}")
+            return 1
+
+    create_final_report(
+        args.gpu_combined,
+        args.gpu_comparison,
+        args.coll_combined,
+        args.coll_comparison,
+        args.output,
+    )
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/scripts/tracelens_single_config/html_report_config.py b/scripts/tracelens_single_config/html_report_config.py
new file mode 100644
index 0000000..5f14d2c
--- /dev/null
+++ b/scripts/tracelens_single_config/html_report_config.py
@@ -0,0 +1,119 @@
+"""Configuration constants for HTML report generation."""
+
+HTML_HEADER = """<!DOCTYPE html>
+<html lang="en"><head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Performance Analysis Report</title>
+<style>
+    body {
+        font-family: sans-serif;
+        line-height: 1.6;
+        margin: 0 auto;
+        padding: 20px;
+        max-width: 800px;
+    }
+    h1, h2, h3 {
+        border-bottom: 1px solid #eee;
+        padding-bottom: 10px;
+    }
+    img {
+        max-width: 100%;
+        height: auto;
+    }
+</style>
+</head>
+"""
+
+HTML_FOOTER = """
+</body>
+</html>
+"""
+
+# Chart configuration for each section
+OVERALL_GPU_CHARTS = [
+    {
+        "name": "Percentage Change Overview",
+        "file": "improvement_chart.png",
+        "alt": "Summary Chart",
+        "description": "Overall performance change across key GPU metrics. Positive values indicate improvement (Test is faster/better).",
+    },
+    {
+        "name": "Absolute Time Comparison",
+        "file": "abs_time_comparison.png",
+        "alt": "Absolute Time Comparison",
+        "description": "Side-by-side comparison of absolute execution times for all GPU metrics.",
+    },
+]
+
+CROSS_RANK_CHARTS = [
+    {
+        "name": "Performance Heatmap by Rank",
+        "file": "gpu_time_heatmap.png",
+        "alt": "GPU Metric Percentage Change by Rank (HeatMap)",
+        "description": "Comprehensive heatmap showing percent change for all metrics across all ranks. Green indicates better performance (positive % change).",
+    },
+    {
+        "name": "Total Time",
+        "file": "total_time_by_rank.png",
+        "alt": "total_time by Rank",
+        "description": "Total execution time comparison across all ranks, showing end-to-end performance characteristics.",
+    },
+    {
+        "name": "Computation Time",
+        "file": "computation_time_by_rank.png",
+        "alt": "computation_time by Rank",
+        "description": "Pure computation time excluding communication overhead, analyzed per rank.",
+    },
+    {
+        "name": "Communication Time",
+        "file": "total_comm_time_by_rank.png",
+        "alt": "total_comm_time by Rank",
+        "description": "Total time spent in collective communication operations across ranks.",
+    },
+    {
+        "name": "Idle Time",
+        "file": "idle_time_by_rank.png",
+        "alt": "idle_time by Rank",
+        "description": "GPU idle time comparison showing resource utilization efficiency per rank.",
+    },
+    {
+        "name": "Detailed Percentage Change by Metric",
+        "file": "gpu_time_change_percentage_summaryby_rank.png",
+        "alt": "gpu_time_change_percentage_summaryby_rank by Rank",
+        "description": "Detailed breakdown of percent change for each metric type across all ranks.",
+    },
+]
+
+NCCL_CHARTS = [
+    {
+        "name": "NCCL Communication Latency",
+        "file": "NCCL_Communication_Latency_comparison.png",
+        "alt": "NCCL Communication Latency Comparison",
+        "description": "Mean communication latency for NCCL allreduce operations across different message sizes",
+    },
+    {
+        "name": "NCCL Algorithm Bandwidth",
+        "file": "NCCL_Algorithm_Bandwidth_comparison.png",
+        "alt": "NCCL Algorithm Bandwidth Comparison",
+        "description": "Algorithm bandwidth achieved for different message sizes in NCCL collective operations.",
+    },
+    {
+        "name": "NCCL Bus Bandwidth",
+        "file": "NCCL_Bus_Bandwidth_comparison.png",
+        "alt": "NCCL Bus Bandwidth Comparison",
+        "description": "Bus bandwidth utilization across NCCL operations and message sizes.",
+    },
+    {
+        "name": "NCCL Performance Percentage Change",
+        "file": "NCCL_Performance_Percentage_Change_comparison.png",
+        "alt": "NCCL Performance Percentage Change Comparison",
+        "description": "Percent change in communication latency and bandwidth metrics for each message sizec configuration",
+    },
+    {
+        "name": "NCCL Total Communication Latency",
+        "file": "NCCL_Total_Communication_Latency_comparison.png",
+        "alt": "NCCL Total Communication Latency Comparison",
+        "description": "Aggregate communication latency summed across all operations for each message size.",
+    },
+]
diff --git a/scripts/tracelens_single_config/process_gpu_timeline.py b/scripts/tracelens_single_config/process_gpu_timeline.py
new file mode 100644
index 0000000..145f817
--- /dev/null
+++ b/scripts/tracelens_single_config/process_gpu_timeline.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import argparse
+from pathlib import Path
+
+
+def geometric_mean(values):
+    values = np.array(values)
+    values = np.where(values == 0, 1e-10, values)
+    return np.exp(np.mean(np.log(values)))
+
+
+def process_gpu_timeline(reports_dir, use_geo_mean=False):
+    """
+    Create mean/geometric mean aggregated GPU timeline across all ranks inside tracelens analysis directory.
+    """
+    reports_path = Path(reports_dir)
+
+    if not reports_path.exists():
+        print(f"Error: Directory not found: {reports_dir}")
+        return 1
+
+    print(f"Processing GPU timeline from: {reports_dir}")
+    print(f"Aggregation: {'Geometric Mean' if use_geo_mean else 'Arithmetic Mean'}")
+
+    perf_files = sorted(reports_path.glob("perf_rank*.xlsx"))
+
+    if not perf_files:
+        print("Error: No perf_rank*.xlsx files found")
+        return 1
+
+    print(f"Found {len(perf_files)} rank files")
+
+    rank_data = []
+    for file_path in perf_files:
+        rank_num = int(file_path.stem.replace("perf_rank", ""))
+        try:
+            df = pd.read_excel(file_path, sheet_name="gpu_timeline")
+            df["rank"] = rank_num
+            rank_data.append(df)
+            print(f"  Rank {rank_num}: OK")
+        except Exception as e:
+            print(f"  Rank {rank_num}: Error - {e}")
+
+    if not rank_data:
+        print("Error: No valid data loaded")
+        return 1
+
+    combined = pd.concat(rank_data, ignore_index=True)
+
+    agg_func = geometric_mean if use_geo_mean else "mean"
+    aggregated = (
+        combined.groupby("type")
+        .agg({"time ms": agg_func, "percent": agg_func})
+        .reset_index()
+    )
+
+    aggregated["num_ranks"] = len(perf_files)
+
+    method_suffix = "geomean" if use_geo_mean else "mean"
+    output_path = reports_path.parent / f"gpu_timeline_summary_{method_suffix}.xlsx"
+
+    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
+        aggregated.to_excel(writer, sheet_name="Summary", index=False)
+
+        combined_sorted = combined.sort_values(["rank", "type"])
+        combined_sorted.to_excel(writer, sheet_name="All_Ranks_Combined", index=False)
+
+        per_rank = combined.pivot_table(
+            values="time ms", index="type", columns="rank", aggfunc="first"
+        )
+        per_rank.to_excel(writer, sheet_name="Per_Rank_Time_ms")
+
+        per_rank_pct = combined.pivot_table(
+            values="percent", index="type", columns="rank", aggfunc="first"
+        )
+        per_rank_pct.to_excel(writer, sheet_name="Per_Rank_Percent")
+
+    print(f"\nSaved: {output_path}")
+    print("\nSummary:")
+    print(aggregated.to_string(index=False))
+
+    return 0
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Aggregate GPU timeline across ranks")
+    parser.add_argument(
+        "--reports-dir", required=True, help="Path to individual_reports directory"
+    )
+    parser.add_argument("--geo-mean", action="store_true", help="Use geometric mean")
+
+    args = parser.parse_args()
+
+    return process_gpu_timeline(args.reports_dir, args.geo_mean)
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/scripts/tracelens_single_config/run_full_analysis.py b/scripts/tracelens_single_config/run_full_analysis.py
new file mode 100644
index 0000000..5385ec4
--- /dev/null
+++ b/scripts/tracelens_single_config/run_full_analysis.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+"""
+Master script for complete TraceLens analysis pipeline.
+Runs analysis on baseline and test traces, then performs all comparisons.
+"""
+import argparse
+import subprocess
+import os
+import sys
+from pathlib import Path
+
+
+def run_command(cmd, description):
+    """Execute a command and handle errors."""
+    print(f"\n{'='*80}")
+    print(f"{description}")
+    print(f"{'='*80}")
+    print(f"Command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    if result.returncode != 0:
+        print(f"Error: {description} failed!")
+        print(f"Stderr: {result.stderr}")
+        return False
+
+    print(result.stdout)
+    return True
+
+
+def run_tracelens_analysis(
+    trace_dir, output_name, individual_only=False, collective_only=False
+):
+    """Run TraceLens analysis on a single trace directory."""
+    print(f"\nAnalyzing: {trace_dir}")
+
+    # Build command
+    script_path = Path(__file__).parent / "run_tracelens_single_config.sh"
+    cmd = ["bash", str(script_path), trace_dir]
+
+    if individual_only:
+        cmd.append("--individual-only")
+    elif collective_only:
+        cmd.append("--collective-only")
+
+    return run_command(cmd, f"TraceLens analysis for {output_name}")
+
+
+def process_gpu_timeline(reports_dir):
+    """Process GPU timeline from individual reports."""
+    script_path = Path(__file__).parent / "process_gpu_timeline.py"
+    cmd = ["python3", str(script_path), "--reports-dir", reports_dir]
+
+    return run_command(cmd, "Processing GPU timeline")
+
+
+def combine_reports(baseline_file, test_file, output_file):
+    """Combine baseline and test reports."""
+    script_path = Path(__file__).parent / "combine_reports.py"
+    cmd = [
+        "python3",
+        str(script_path),
+        "--baseline",
+        baseline_file,
+        "--test",
+        test_file,
+        "--output",
+        output_file,
+    ]
+
+    return run_command(cmd, f"Combining reports to {output_file}")
+
+
+def add_comparison_sheets(input_file, output_file):
+    """Add comparison sheets for GPU timeline."""
+    script_path = Path(__file__).parent / "add_comparison_sheets.py"
+    cmd = ["python3", str(script_path), "--input", input_file, "--output", output_file]
+
+    return run_command(cmd, "Adding GPU timeline comparison sheets")
+
+
+def add_collective_comparison(input_file, output_file):
+    """Add comparison sheets for collective operations."""
+    script_path = Path(__file__).parent / "add_collective_comparison.py"
+    cmd = ["python3", str(script_path), "--input", input_file, "--output", output_file]
+
+    return run_command(cmd, "Adding collective comparison sheets")
+
+
+def create_final_report(
+    gpu_combined, gpu_comparison, coll_combined, coll_comparison, output_file
+):
+    """Create comprehensive final report with all data."""
+    script_path = Path(__file__).parent / "create_final_report.py"
+    cmd = [
+        "python3",
+        str(script_path),
+        "--gpu-combined",
+        gpu_combined,
+        "--gpu-comparison",
+        gpu_comparison,
+        "--coll-combined",
+        coll_combined,
+        "--coll-comparison",
+        coll_comparison,
+        "--output",
+        output_file,
+    ]
+
+    if run_command(cmd, "Creating comprehensive final report"):
+        plot_script_path = Path(__file__).parent / "create_final_plots.py"
+        cmd = ["python3", str(plot_script_path), "--report-path", output_file]
+        if run_command(cmd, "Creating final plots"):
+            html_script_path = Path(__file__).parent / "create_final_html.py"
+            cmd = [
+                "python3",
+                str(html_script_path),
+                "--plot-files-directory",
+                str(Path(output_file).parent / "plots"),
+            ]
+            if run_command(cmd, "Creating final HTML"):
+                return True
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Complete TraceLens analysis pipeline with comparisons",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Full analysis with everything including final report
+  python run_full_analysis.py \\
+    --baseline /path/to/baseline/traces \\
+    --test /path/to/test/traces \\
+    --output /path/to/output \\
+    --all
+
+  # Only GPU timeline comparison
+  python run_full_analysis.py \\
+    --baseline /path/to/baseline \\
+    --test /path/to/test \\
+    --output /path/to/output \\
+    --gpu-timeline
+
+  # Create final report (skip TraceLens if already done)
+  python run_full_analysis.py \\
+    --baseline /path/to/baseline \\
+    --test /path/to/test \\
+    --output /path/to/output \\
+    --gpu-timeline --collective --final-report \\
+    --skip-tracelens
+        """,
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--baseline", required=True, help="Path to baseline trace directory"
+    )
+    parser.add_argument("--test", required=True, help="Path to test trace directory")
+    parser.add_argument(
+        "--output", required=True, help="Output directory for comparison results"
+    )
+
+    # Analysis options
+    parser.add_argument(
+        "--skip-tracelens",
+        action="store_true",
+        help="Skip TraceLens report generation (if already done)",
+    )
+    parser.add_argument(
+        "--individual-only",
+        action="store_true",
+        help="Generate only individual reports",
+    )
+    parser.add_argument(
+        "--collective-only",
+        action="store_true",
+        help="Generate only collective reports",
+    )
+
+    # Comparison options
+    parser.add_argument(
+        "--gpu-timeline", action="store_true", help="Perform GPU timeline comparison"
+    )
+    parser.add_argument(
+        "--collective", action="store_true", help="Perform collective/NCCL comparison"
+    )
+    parser.add_argument(
+        "--final-report",
+        action="store_true",
+        help="Create comprehensive final report with tables and hidden raw data",
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Perform all analyses and comparisons including final report",
+    )
+
+    args = parser.parse_args()
+
+    # Handle --all flag
+    if args.all:
+        args.gpu_timeline = True
+        args.collective = True
+        args.final_report = True
+
+    # Validate inputs
+    baseline_path = Path(args.baseline)
+    test_path = Path(args.test)
+    output_path = Path(args.output)
+
+    if not baseline_path.exists():
+        print(f"Error: Baseline path not found: {args.baseline}")
+        return 1
+
+    if not test_path.exists():
+        print(f"Error: Test path not found: {args.test}")
+        return 1
+
+    # Create output directory
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    print("\n" + "=" * 80)
+    print("TRACELENS FULL ANALYSIS PIPELINE")
+    print("=" * 80)
+    print(f"Baseline: {args.baseline}")
+    print(f"Test: {args.test}")
+    print(f"Output: {args.output}")
+    print(f"Options:")
+    print(f"  Skip TraceLens: {args.skip_tracelens}")
+    print(f"  GPU timeline: {args.gpu_timeline}")
+    print(f"  Collective: {args.collective}")
+    print(f"  Final report: {args.final_report}")
+
+    # Step 1: Run TraceLens analysis on both directories
+    if not args.skip_tracelens:
+        print("\n" + "=" * 80)
+        print("STEP 1: Running TraceLens Analysis")
+        print("=" * 80)
+
+        if not run_tracelens_analysis(
+            args.baseline, "baseline", args.individual_only, args.collective_only
+        ):
+            return 1
+
+        if not run_tracelens_analysis(
+            args.test, "test", args.individual_only, args.collective_only
+        ):
+            return 1
+    else:
+        print("\nSkipping TraceLens report generation (--skip-tracelens flag)")
+
+    # Determine analysis directories
+    baseline_analysis = baseline_path / "tracelens_analysis"
+    test_analysis = test_path / "tracelens_analysis"
+
+    if not baseline_analysis.exists():
+        print(f"Error: Baseline analysis not found: {baseline_analysis}")
+        print("Run without --skip-tracelens flag first")
+        return 1
+
+    if not test_analysis.exists():
+        print(f"Error: Test analysis not found: {test_analysis}")
+        print("Run without --skip-tracelens flag first")
+        return 1
+
+    # Step 2: GPU Timeline Comparison
+    if args.gpu_timeline:
+        print("\n" + "=" * 80)
+        print("STEP 2: GPU Timeline Comparison")
+        print("=" * 80)
+
+        # Process GPU timelines
+        baseline_reports = baseline_analysis / "individual_reports"
+        test_reports = test_analysis / "individual_reports"
+
+        if not baseline_reports.exists() or not test_reports.exists():
+            print(
+                "Error: Individual reports not found. Run without --individual-only flag"
+            )
+            return 1
+
+        print("\nProcessing baseline GPU timeline...")
+        if not process_gpu_timeline(str(baseline_reports)):
+            return 1
+
+        print("\nProcessing test GPU timeline...")
+        if not process_gpu_timeline(str(test_reports)):
+            return 1
+
+        # Combine GPU timeline summaries
+        baseline_gpu = baseline_analysis / "gpu_timeline_summary_mean.xlsx"
+        test_gpu = test_analysis / "gpu_timeline_summary_mean.xlsx"
+        combined_gpu = output_path / "gpu_timeline_combined.xlsx"
+
+        if not combine_reports(str(baseline_gpu), str(test_gpu), str(combined_gpu)):
+            return 1
+
+        # Add comparison sheets
+        gpu_comparison = output_path / "gpu_timeline_comparison.xlsx"
+        if not add_comparison_sheets(str(combined_gpu), str(gpu_comparison)):
+            return 1
+
+        print(f"\nGPU timeline comparison saved to: {gpu_comparison}")
+
+    # Step 3: Collective Comparison
+    if args.collective:
+        print("\n" + "=" * 80)
+        print("STEP 3: Collective/NCCL Comparison")
+        print("=" * 80)
+
+        baseline_collective = (
+            baseline_analysis / "collective_reports" / "collective_all_ranks.xlsx"
+        )
+        test_collective = (
+            test_analysis / "collective_reports" / "collective_all_ranks.xlsx"
+        )
+
+        if not baseline_collective.exists() or not test_collective.exists():
+            print(
+                "Error: Collective reports not found. Run without --collective-only flag"
+            )
+            return 1
+
+        # Combine collective reports
+        combined_collective = output_path / "collective_combined.xlsx"
+        if not combine_reports(
+            str(baseline_collective), str(test_collective), str(combined_collective)
+        ):
+            return 1
+
+        # Add collective comparison
+        collective_comparison = output_path / "collective_comparison.xlsx"
+        if not add_collective_comparison(
+            str(combined_collective), str(collective_comparison)
+        ):
+            return 1
+
+        print(f"\nCollective comparison saved to: {collective_comparison}")
+
+    # Step 4: Create final comprehensive report
+    if args.final_report and args.gpu_timeline and args.collective:
+        print("\n" + "=" * 80)
+        print("STEP 4: Creating Final Comprehensive Report")
+        print("=" * 80)
+
+        gpu_combined = output_path / "gpu_timeline_combined.xlsx"
+        gpu_comparison = output_path / "gpu_timeline_comparison.xlsx"
+        collective_combined = output_path / "collective_combined.xlsx"
+        collective_comparison = output_path / "collective_comparison.xlsx"
+        final_report = output_path / "final_analysis_report.xlsx"
+
+        if not create_final_report(
+            str(gpu_combined),
+            str(gpu_comparison),
+            str(collective_combined),
+            str(collective_comparison),
+            str(final_report),
+        ):
+            return 1
+
+        print(f"\nFinal comprehensive report saved to: {final_report}")
+        print("  - Summary Dashboard as first sheet")
+        print("  - All comparison sheets visible")
+        print("  - Raw data sheets hidden (can be unhidden in Excel)")
+        print("  - All data formatted as Excel tables with filters")
+        print("  - Color coding applied (green=better, red=worse)")
+
+    # Summary
+    print("\n" + "=" * 80)
+    print("ANALYSIS COMPLETE!")
+    print("=" * 80)
+    print(f"\nResults saved to: {output_path}")
+
+    files = list(output_path.glob("*.xlsx"))
+    if files:
+        print("\nGenerated files:")
+        for f in sorted(files):
+            print(f"  - {f.name}")
+
+    print("\nAnalysis pipeline completed successfully!")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/tracelens_single_config/run_tracelens_single_config.sh b/scripts/tracelens_single_config/run_tracelens_single_config.sh
new file mode 100644
index 0000000..96831ff
--- /dev/null
+++ b/scripts/tracelens_single_config/run_tracelens_single_config.sh
@@ -0,0 +1,266 @@
+#!/bin/bash
+# TraceLens Analysis for Single Configuration (No Sweep)
+# Usage: ./run_tracelens_single_config.sh <directory_path>
+#
+# The script accepts either:
+#   - Path to parent directory containing torch_profiler/
+#   - Path to torch_profiler/ directory directly
+#
+# Examples:
+#   ./run_tracelens_single_config.sh /path/to/traces
+#   ./run_tracelens_single_config.sh /path/to/traces/torch_profiler
+#
+# Note: Uses GEMM-patched TraceLens wrapper to recognize ROCm Tensile kernels
+
+set -e
+
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Use patched TraceLens wrapper for GEMM recognition
+TRACELENS_WRAPPER="python $SCRIPT_DIR/../tracelens_with_gemm_patch.py"
+
+# Parse options
+RUN_INDIVIDUAL=true
+RUN_COLLECTIVE=true
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --individual-only)
+            RUN_COLLECTIVE=false
+            shift
+            ;;
+        --collective-only)
+            RUN_INDIVIDUAL=false
+            shift
+            ;;
+        *)
+            INPUT_DIR="$1"
+            shift
+            ;;
+    esac
+done
+
+# Check if directory provided
+if [ -z "$INPUT_DIR" ]; then
+    echo "Error: Please provide trace directory"
+    echo ""
+    echo "Usage: $0 <directory_path> [options]"
+    echo ""
+    echo "Options:"
+    echo "  --individual-only    Generate only individual reports"
+    echo "  --collective-only    Generate only collective report"
+    echo ""
+    echo "Examples:"
+    echo "  $0 /path/to/traces"
+    echo "  $0 /path/to/traces --individual-only"
+    echo "  $0 /path/to/traces --collective-only"
+    echo ""
+    exit 1
+fi
+
+# Verify directory exists
+if [ ! -d "$INPUT_DIR" ]; then
+    echo "Error: Directory not found: $INPUT_DIR"
+    exit 1
+fi
+
+# Auto-detect structure: is this torch_profiler/ or its parent?
+TORCH_PROF_DIR=""
+BASE_DIR=""
+
+# Check if INPUT_DIR contains rank directories (i.e., it IS torch_profiler/)
+if find "$INPUT_DIR" -maxdepth 1 -type d -name "rank*" | grep -q .; then
+    TORCH_PROF_DIR="$INPUT_DIR"
+    BASE_DIR=$(dirname "$INPUT_DIR")
+    echo "Detected torch_profiler directory: $TORCH_PROF_DIR"
+# Check if INPUT_DIR contains torch_profiler/ subdirectory
+elif [ -d "$INPUT_DIR/torch_profiler" ]; then
+    TORCH_PROF_DIR="$INPUT_DIR/torch_profiler"
+    BASE_DIR="$INPUT_DIR"
+    echo "Found torch_profiler subdirectory: $TORCH_PROF_DIR"
+else
+    echo "Error: Cannot find rank directories in expected structure"
+    echo ""
+    echo "Expected one of:"
+    echo "  1. Directory with rank0/, rank1/, ... subdirectories (torch_profiler/)"
+    echo "  2. Parent directory containing torch_profiler/rank0/, rank1/, ..."
+    echo ""
+    echo "Provided: $INPUT_DIR"
+    exit 1
+fi
+
+echo "════════════════════════════════════════════════════════════════"
+echo "           TraceLens Analysis - Single Configuration"
+echo "════════════════════════════════════════════════════════════════"
+echo ""
+echo "Input directory: $INPUT_DIR"
+echo "Torch profiler traces: $TORCH_PROF_DIR"
+echo ""
+
+# Create output directory in the base directory
+OUTPUT_DIR="${BASE_DIR}/tracelens_analysis"
+mkdir -p "$OUTPUT_DIR"
+mkdir -p "$OUTPUT_DIR/individual_reports"
+mkdir -p "$OUTPUT_DIR/collective_reports"
+
+# Detect number of ranks
+NUM_RANKS=$(find "$TORCH_PROF_DIR" -maxdepth 1 -type d -name "rank*" | wc -l)
+
+if [ $NUM_RANKS -eq 0 ]; then
+    echo "Error: No rank directories found in $TORCH_PROF_DIR"
+    exit 1
+fi
+
+echo "Detected $NUM_RANKS ranks"
+
+# Show sample trace files
+echo ""
+echo "Sample trace files:"
+for rank_dir in $(find "$TORCH_PROF_DIR" -maxdepth 1 -type d -name "rank*" | sort | head -3); do
+    rank_name=$(basename "$rank_dir")
+    trace_file=$(find "$rank_dir" -name "*.json" | head -1)
+    if [ -n "$trace_file" ]; then
+        echo "  $rank_name: $(basename "$trace_file")"
+    fi
+done
+if [ "$RUN_INDIVIDUAL" = true ]; then
+    echo ""
+    echo "════════════════════════════════════════════════════════════════"
+    echo "Step 1: Generating Individual Performance Reports"
+    echo "════════════════════════════════════════════════════════════════"
+    echo ""
+
+# Process each rank
+for rank_idx in $(seq 0 $((NUM_RANKS - 1))); do
+    # Try multiple directory naming patterns
+    RANK_DIR=""
+    if [ -d "$TORCH_PROF_DIR/rank${rank_idx}" ]; then
+        RANK_DIR="$TORCH_PROF_DIR/rank${rank_idx}"
+    elif [ -d "$TORCH_PROF_DIR/rank_${rank_idx}" ]; then
+        RANK_DIR="$TORCH_PROF_DIR/rank_${rank_idx}"
+    elif [ -d "$TORCH_PROF_DIR/rank_$(printf "%02d" $rank_idx)" ]; then
+        RANK_DIR="$TORCH_PROF_DIR/rank_$(printf "%02d" $rank_idx)"
+    fi
+
+    if [ -z "$RANK_DIR" ] || [ ! -d "$RANK_DIR" ]; then
+        echo "  Skip rank ${rank_idx} - directory not found"
+        continue
+    fi
+
+    # Find trace file
+    TRACE=$(find "$RANK_DIR" -name "*.json" -type f | head -1)
+
+    if [ -z "$TRACE" ]; then
+        echo "⚠️  Skip rank ${rank_idx} - no trace file found"
+        continue
+    fi
+
+    OUTPUT="$OUTPUT_DIR/individual_reports/perf_rank${rank_idx}.xlsx"
+
+    echo "Processing rank ${rank_idx}..."
+    echo "  Trace: $(basename "$TRACE")"
+
+    $TRACELENS_WRAPPER generate_perf_report \
+        --profile_json_path "$TRACE" \
+        --output_xlsx_path "$OUTPUT" \
+        --include_unlinked_kernels \
+        --short_kernel_study \
+        --short_kernel_threshold_us 50 \
+        --topk_ops 100 \
+        --topk_roofline_ops 100
+
+    echo "  Done: $OUTPUT"
+    echo ""
+done
+
+fi
+
+if [ "$RUN_COLLECTIVE" = true ]; then
+    echo ""
+    echo "════════════════════════════════════════════════════════════════"
+    echo "Step 2: Generating Multi-Rank Collective Report"
+    echo "════════════════════════════════════════════════════════════════"
+    echo ""
+
+# Find a sample trace file to get the filename pattern
+SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank0" -name "*.json" -type f | head -1)
+if [ -z "$SAMPLE_TRACE" ]; then
+    # Try alternative rank naming
+    SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank_0" -name "*.json" -type f | head -1)
+fi
+
+if [ -z "$SAMPLE_TRACE" ]; then
+    # Try rank_00
+    SAMPLE_TRACE=$(find "$TORCH_PROF_DIR/rank_00" -name "*.json" -type f | head -1)
+fi
+
+if [ -n "$SAMPLE_TRACE" ]; then
+    OUTPUT="$OUTPUT_DIR/collective_reports/collective_all_ranks.xlsx"
+
+    echo "Generating collective report for all $NUM_RANKS ranks..."
+
+    # Create symlinks with consistent names for collective report
+    for rank_idx in $(seq 0 $((NUM_RANKS - 1))); do
+        RANK_DIR="$TORCH_PROF_DIR/rank${rank_idx}"
+        if [ -d "$RANK_DIR" ]; then
+            TRACE=$(find "$RANK_DIR" -name "*.json" -type f | head -1)
+            if [ -n "$TRACE" ]; then
+                ln -sf "$(basename "$TRACE")" "$RANK_DIR/trace.json"
+            fi
+        fi
+    done
+
+    echo "  Trace pattern: rank*/trace.json"
+
+    $TRACELENS_WRAPPER generate_multi_rank_collective \
+        --trace_pattern "$TORCH_PROF_DIR/rank*/trace.json" \
+        --world_size $NUM_RANKS \
+        --output_xlsx_path "$OUTPUT" \
+        --detailed_analysis \
+        --use_multiprocessing
+
+    echo "  Done: $OUTPUT"
+else
+    echo "  Could not generate collective report - no trace files found"
+fi
+
+fi
+
+echo ""
+echo "════════════════════════════════════════════════════════════════"
+echo "Analysis Complete!"
+echo "════════════════════════════════════════════════════════════════"
+echo ""
+echo "📁 Results saved to:"
+echo "   $OUTPUT_DIR/"
+echo ""
+
+# Count generated reports
+INDIV_COUNT=$(find "$OUTPUT_DIR/individual_reports" -name "*.xlsx" 2>/dev/null | wc -l)
+COLL_COUNT=$(find "$OUTPUT_DIR/collective_reports" -name "*.xlsx" 2>/dev/null | wc -l)
+
+echo "Generated reports:"
+echo "  Individual reports (per rank): $INDIV_COUNT"
+echo "  Collective reports (all ranks): $COLL_COUNT"
+echo ""
+
+echo "📊 Report Files:"
+echo ""
+echo "Individual Performance Reports:"
+if [ $INDIV_COUNT -gt 0 ]; then
+    find "$OUTPUT_DIR/individual_reports" -name "*.xlsx" | sort | sed 's/^/  /'
+else
+    echo "  (none generated)"
+fi
+echo ""
+
+echo "Collective Reports:"
+if [ $COLL_COUNT -gt 0 ]; then
+    find "$OUTPUT_DIR/collective_reports" -name "*.xlsx" | sed 's/^/  /'
+else
+    echo "  (none generated)"
+fi
+
+echo ""
+echo "Done!"