diff --git a/requirements.txt b/requirements.txt index 1e16fe4..6009e19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ seaborn>=0.12.0 # For GEMM analysis (scripts/gemm_analysis/) git+https://github.com/AMD-AGI/TraceLens.git +jinja2>=3.0.0 # For hw_queue_eval (optional - install with: pip install -e ".[hw-queue]") # click>=8.0.0 diff --git a/scripts/gemm_analysis/create_embeded_html_report.py b/scripts/gemm_analysis/create_embeded_html_report.py index c9cd7e2..59597ee 100644 --- a/scripts/gemm_analysis/create_embeded_html_report.py +++ b/scripts/gemm_analysis/create_embeded_html_report.py @@ -3,25 +3,27 @@ Create a self-contained HTML report comparing two experiment sweeps. Embeds all images as base64 for easy sharing. +Uses the report_generator framework with ComparisonReportBuilder. + TODO: Future enhancement - support multiple sweep comparisons using comma-separated input (e.g., --sweeps sweep1,sweep2,sweep3) for N-way comparisons. Current implementation focuses on pairwise comparison which covers the most common use case of A/B testing. """ -import base64 +import sys import argparse from pathlib import Path -from html_template import get_comparison_template -def image_to_base64(image_path): - """Convert an image file to base64 string""" - try: - with open(image_path, 'rb') as img_file: - return base64.b64encode(img_file.read()).decode('utf-8') - except FileNotFoundError: - print(f"Warning: Image not found: {image_path}") - return None +# Add scripts directory to path for report_generator imports +sys.path.insert(0, str(Path(__file__).parent.parent)) +from report_generator.comparison_builder import ComparisonReportBuilder + + +def get_default_config_path() -> Path: + """Return the default path to the config JSON file.""" + return Path(__file__).parent.parent / "utils" / "gemm_comparison_config.json" + def parse_args(): """Parse command line arguments""" @@ -43,105 +45,59 @@ def parse_args(): --label1 "Base ROCm" \\ --label2 "ROCm 7.0" \\ --output comparison_report.html - """ + """, ) - parser.add_argument( - '--sweep1', - type=Path, - required=True, - help='Path to first sweep directory' - ) + parser.add_argument("--sweep1", type=Path, required=True, help="Path to first sweep directory") + + parser.add_argument("--sweep2", type=Path, required=True, help="Path to second sweep directory") parser.add_argument( - '--sweep2', - type=Path, - required=True, - help='Path to second sweep directory' + "--label1", type=str, default=None, help="Label for first sweep (default: directory name)" ) parser.add_argument( - '--label1', - type=str, - default=None, - help='Label for first sweep (default: directory name)' + "--label2", type=str, default=None, help="Label for second sweep (default: directory name)" ) parser.add_argument( - '--label2', - type=str, + "--output", + type=Path, default=None, - help='Label for second sweep (default: directory name)' + help="Output HTML file path (default: sweep_comparison_report.html in current directory)", ) parser.add_argument( - '--output', + "--config", type=Path, default=None, - help='Output HTML file path (default: sweep_comparison_report.html in current directory)' + help="Path to JSON config file (default: utils/gemm_comparison_config.json)", ) return parser.parse_args() -def get_plot_images(sweep_path): - """Get paths to all plot images for a sweep""" - plots_dir = sweep_path / "tracelens_analysis" / "plots" - - return { - 'threads': plots_dir / 'variance_by_threads_boxplot.png', - 'channels': plots_dir / 'variance_by_channels_boxplot.png', - 'ranks': plots_dir / 'variance_by_ranks_boxplot.png', - 'violin': plots_dir / 'variance_violin_combined.png', - 'interaction': plots_dir / 'variance_thread_channel_interaction.png', - } - -def create_html_report(sweep1_path, sweep2_path, label1, label2, output_path): - """Create HTML report comparing two sweeps""" - - # Get sweep names from paths if labels not provided - if label1 is None: - label1 = sweep1_path.name - if label2 is None: - label2 = sweep2_path.name - - # Get image paths for both sweeps - images_sweep1 = get_plot_images(sweep1_path) - images_sweep2 = get_plot_images(sweep2_path) - - # Convert images to base64 - print("Converting images to base64...") - print(f"\nSweep 1: {label1}") - image_data = {} - for key, path in images_sweep1.items(): - print(f" Processing: {key}") - b64 = image_to_base64(path) - if b64: - image_data[f'{key}_sweep1'] = f"data:image/png;base64,{b64}" - print(f" [OK]") - else: - image_data[f'{key}_sweep1'] = "" - print(f" [MISSING] {path}") - - print(f"\nSweep 2: {label2}") - for key, path in images_sweep2.items(): - print(f" Processing: {key}") - b64 = image_to_base64(path) - if b64: - image_data[f'{key}_sweep2'] = f"data:image/png;base64,{b64}" - print(f" [OK]") - else: - image_data[f'{key}_sweep2'] = "" - print(f" [MISSING] {path}") - - # Create HTML with embedded images - html_content = get_comparison_template(label1, label2, sweep1_path, sweep2_path, image_data) - - # Write the HTML file - with open(output_path, 'w', encoding='utf-8') as f: - f.write(html_content) - - print(f"\n[OK] HTML report created: {output_path}") - print(f" File size: {output_path.stat().st_size / 1024 / 1024:.2f} MB") + +def create_comparison_report( + sweep1_path: Path, + sweep2_path: Path, + output_path: Path, + config_path: Path | None = None, + label1: str | None = None, + label2: str | None = None, +) -> Path: + """Create HTML report comparing two sweeps using ComparisonReportBuilder.""" + if config_path is None: + config_path = get_default_config_path() + + builder = ComparisonReportBuilder( + sweep1_path=sweep1_path, + sweep2_path=sweep2_path, + output_path=output_path, + config_path=config_path, + label1=label1, + label2=label2, + ) + builder.save() return output_path @@ -171,12 +127,13 @@ def main(): print() # Create the report - create_html_report( - args.sweep1, - args.sweep2, - args.label1, - args.label2, - args.output + create_comparison_report( + sweep1_path=args.sweep1, + sweep2_path=args.sweep2, + output_path=args.output, + config_path=args.config, + label1=args.label1, + label2=args.label2, ) return 0 diff --git a/scripts/gemm_analysis/html_template.py b/scripts/gemm_analysis/html_template.py deleted file mode 100644 index 98c4bc0..0000000 --- a/scripts/gemm_analysis/html_template.py +++ /dev/null @@ -1,265 +0,0 @@ -"""HTML template for GEMM sweep comparison report. - -Currently optimized for pairwise (2-sweep) comparison with side-by-side layout. -TODO: Future enhancement - support N-way comparisons with adaptive grid layout. -""" - -def get_comparison_template(label1, label2, sweep1_path, sweep2_path, image_data): - """ - Generate HTML content for sweep comparison report. - - Args: - label1: Label for first sweep - label2: Label for second sweep - sweep1_path: Path to first sweep directory - sweep2_path: Path to second sweep directory - image_data: Dictionary of base64-encoded images - - Returns: - HTML content as string - """ - return f""" - - - - GEMM Kernel Variance - Sweep Comparison - - - -
- -

GEMM Kernel Variance - Sweep Comparison

- -
-

Visual comparison of GEMM kernel performance variance between two training sweeps.

-

This report compares kernel variance across different thread counts, channel configurations, and ranks.

-
- -
- -

Sweep Information

- - - - - - - - - - - - - - -
SweepPath
Sweep 1{label1}
Sweep 2{label2}
- -
- -

Variance by Thread Count

- - - - - - - - - - -
{label1}{label2}
-Threads Sweep 1 - -Threads Sweep 2 -
- -
- -

Variance by Channel Count

- - - - - - - - - - -
{label1}{label2}
-Channels Sweep 1 - -Channels Sweep 2 -
- -
- -

Variance by Rank

- - - - - - - - - - -
{label1}{label2}
-Ranks Sweep 1 - -Ranks Sweep 2 -
- -
- -

Variance Distribution (Violin Plots)

- - - - - - - - - - -
{label1}{label2}
-Violin Sweep 1 - -Violin Sweep 2 -
- -
- -

Thread-Channel Interaction

- - - - - - - - - - -
{label1}{label2}
-Interaction Sweep 1 - -Interaction Sweep 2 -
- -
- -
-

Data Files Information

- -

Sweep 1: {label1}

- - -

Sweep 2: {label2}

- - -
- -
- - -""" diff --git a/scripts/report_generator/__init__.py b/scripts/report_generator/__init__.py new file mode 100644 index 0000000..5f682cc --- /dev/null +++ b/scripts/report_generator/__init__.py @@ -0,0 +1,13 @@ +"""Report generator package for HTML report generation.""" + +from .config_reader import ReportConfigReader +from .report_builder import HTMLReportBuilder, SingleReportBuilder +from .comparison_builder import ComparisonReportBuilder + +__all__ = [ + "ReportConfigReader", + "HTMLReportBuilder", + "SingleReportBuilder", + "ComparisonReportBuilder", +] + diff --git a/scripts/report_generator/comparison_builder.py b/scripts/report_generator/comparison_builder.py new file mode 100644 index 0000000..8d69268 --- /dev/null +++ b/scripts/report_generator/comparison_builder.py @@ -0,0 +1,143 @@ +"""Comparison Report Builder for generating side-by-side comparison reports.""" + +from pathlib import Path + +from .report_builder import HTMLReportBuilder +from .comparison_templates import ( + COMPARISON_CHART_TEMPLATE, + SWEEP_INFO_TEMPLATE, + DATA_FILES_TEMPLATE, + COMPARISON_BODY_TEMPLATE, +) + + +class ComparisonReportBuilder(HTMLReportBuilder): + """ + HTML report builder for comparing two experiment sweeps. + Extends HTMLReportBuilder with side-by-side comparison capabilities. + + Overrides: + - build_chart: Loads images from two sweep directories + - render_chart: Uses comparison template for side-by-side display + - render_body: Adds sweep info and data files sections + """ + + def __init__( + self, + sweep1_path: Path, + sweep2_path: Path, + output_path: Path, + config_path: Path, + label1: str | None = None, + label2: str | None = None, + ): + """ + Initialize the comparison report builder. + + Args: + sweep1_path: Path to first sweep directory + sweep2_path: Path to second sweep directory + output_path: Path where the HTML report will be saved + config_path: Path to the JSON configuration file + label1: Label for first sweep (default: directory name) + label2: Label for second sweep (default: directory name) + """ + super().__init__(output_path, config_path) + self.sweep1_path = sweep1_path + self.sweep2_path = sweep2_path + + # Use directory names as labels if not provided + self.label1 = label1 if label1 else sweep1_path.name + self.label2 = label2 if label2 else sweep2_path.name + + # ------------------------------------------------------------------------- + # Data/Utility Methods + # ------------------------------------------------------------------------- + + def get_plots_dir(self, sweep_path: Path) -> Path: + """Get the plots directory for a sweep.""" + return sweep_path / "tracelens_analysis" / "plots" + + # ------------------------------------------------------------------------- + # Render Methods (Jinja2 Templates) - Comparison-specific + # ------------------------------------------------------------------------- + + def render_chart(self, **kwargs) -> str: + """Render HTML layout for a side-by-side chart comparison.""" + return COMPARISON_CHART_TEMPLATE.render( + label1=self.label1, + label2=self.label2, + **kwargs, + ) + + def render_sweep_info(self) -> str: + """Render HTML layout for sweep information section.""" + return SWEEP_INFO_TEMPLATE.render( + label1=self.label1, + label2=self.label2, + ) + + def render_data_files(self) -> str: + """Render HTML layout for data files section.""" + return DATA_FILES_TEMPLATE.render( + label1=self.label1, + label2=self.label2, + sweep1_path=self.sweep1_path, + sweep2_path=self.sweep2_path, + ) + + def render_body(self, sections_html: str) -> str: + """Render HTML layout for the body with comparison-specific elements.""" + return COMPARISON_BODY_TEMPLATE.render( + title=self.get_report_title(), + summary=self.get_executive_summary(), + sweep_info=self.render_sweep_info(), + sections_html=sections_html, + data_files=self.render_data_files(), + ) + + # ------------------------------------------------------------------------- + # Build Methods (Structure Traversal) - Only override build_chart + # ------------------------------------------------------------------------- + + def build_chart(self, chart_config: dict, section_title: str) -> str: + """Build a comparison chart with images from both sweeps.""" + plots_dir1 = self.get_plots_dir(self.sweep1_path) + plots_dir2 = self.get_plots_dir(self.sweep2_path) + + image_path1 = plots_dir1 / chart_config["file"] + image_path2 = plots_dir2 / chart_config["file"] + + print(f" Processing: {chart_config['name']}") + image_data1 = self.get_image_base64(image_path1) + image_data2 = self.get_image_base64(image_path2) + + if image_data1: + print(f" Sweep 1 ({self.label1}): [OK]") + else: + print(f" Sweep 1 ({self.label1}): [MISSING] {image_path1}") + + if image_data2: + print(f" Sweep 2 ({self.label2}): [OK]") + else: + print(f" Sweep 2 ({self.label2}): [MISSING] {image_path2}") + + return self.render_chart( + title=section_title, + alt=chart_config["alt"], + image_data1=image_data1 or "", + image_data2=image_data2 or "", + ) + + # def build_body(self) -> str: + # """Build the HTML body (adds logging message).""" + # print("Converting images to base64...") + # return super().build_body() + + # def save(self) -> None: + # """Build and save the HTML report with file size info.""" + # final_html = self.build() + # with open(self.output_path, "w", encoding="utf-8") as f: + # f.write(final_html) + # print(f"\n[OK] HTML report created: {self.output_path}") + # print(f" File size: {self.output_path.stat().st_size / 1024 / 1024:.2f} MB") diff --git a/scripts/report_generator/comparison_builder.py.bkp b/scripts/report_generator/comparison_builder.py.bkp new file mode 100644 index 0000000..c8823e2 --- /dev/null +++ b/scripts/report_generator/comparison_builder.py.bkp @@ -0,0 +1,180 @@ +"""Comparison Report Builder for generating side-by-side comparison reports.""" + +from pathlib import Path +import base64 + +from .config_reader import ReportConfigReader +from .comparison_templates import ( + COMPARISON_CHART_TEMPLATE, + SWEEP_INFO_TEMPLATE, + DATA_FILES_TEMPLATE, + COMPARISON_BODY_TEMPLATE, + COMPARISON_DOCUMENT_TEMPLATE, +) + + +class ComparisonReportBuilder: + """ + HTML report builder for comparing two experiment sweeps. + Generates side-by-side comparison with embedded images. + + Structure: + - build_* methods: Handle structure traversal and data preparation + - render_* methods: Handle HTML layout using Jinja2 templates + """ + + def __init__( + self, + sweep1_path: Path, + sweep2_path: Path, + output_path: Path, + config_path: Path, + label1: str | None = None, + label2: str | None = None, + ): + self.sweep1_path = sweep1_path + self.sweep2_path = sweep2_path + self.output_path = output_path + self.config = ReportConfigReader(config_path) + + # Use directory names as labels if not provided + self.label1 = label1 if label1 else sweep1_path.name + self.label2 = label2 if label2 else sweep2_path.name + + # ------------------------------------------------------------------------- + # Data/Utility Methods + # ------------------------------------------------------------------------- + + def get_image_base64(self, image_path: Path) -> str | None: + """Read an image file and return its base64-encoded string.""" + try: + with open(image_path, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + except Exception as e: + print(f" Warning: Image not found: {image_path}") + return None + + def get_plots_dir(self, sweep_path: Path) -> Path: + """Get the plots directory for a sweep.""" + return sweep_path / "tracelens_analysis" / "plots" + + def get_report_title(self) -> str: + """Return the main title for the report.""" + return self.config.get_title() + + def get_executive_summary(self) -> str: + """Return the executive summary HTML content.""" + return self.config.get_executive_summary() + + def get_sections(self) -> list[dict]: + """Return all section configurations from config.""" + return self.config.get_all_sections() + + # ------------------------------------------------------------------------- + # Render Methods (Jinja2 Templates) + # ------------------------------------------------------------------------- + + def render_comparison_chart( + self, + title: str, + alt: str, + image_data1: str, + image_data2: str, + ) -> str: + """Render HTML layout for a side-by-side chart comparison.""" + return COMPARISON_CHART_TEMPLATE.render( + title=title, + alt=alt, + label1=self.label1, + label2=self.label2, + image_data1=image_data1 or "", + image_data2=image_data2 or "", + ) + + def render_sweep_info(self) -> str: + """Render HTML layout for sweep information section.""" + return SWEEP_INFO_TEMPLATE.render( + label1=self.label1, + label2=self.label2, + ) + + def render_data_files(self) -> str: + """Render HTML layout for data files section.""" + return DATA_FILES_TEMPLATE.render( + label1=self.label1, + label2=self.label2, + sweep1_path=self.sweep1_path, + sweep2_path=self.sweep2_path, + ) + + def render_body(self, sections_html: str) -> str: + """Render HTML layout for the body.""" + return COMPARISON_BODY_TEMPLATE.render( + title=self.get_report_title(), + summary=self.get_executive_summary(), + sweep_info=self.render_sweep_info(), + sections_html=sections_html, + data_files=self.render_data_files(), + ) + + def render_document(self, body: str) -> str: + """Render the complete HTML document layout.""" + return COMPARISON_DOCUMENT_TEMPLATE.render( + header=self.config.get_html_header(), + body=body, + footer=self.config.get_html_footer(), + ) + + # ------------------------------------------------------------------------- + # Build Methods (Structure Traversal) + # ------------------------------------------------------------------------- + + def build_comparison_chart(self, section: dict) -> str: + """Build a comparison chart section from config.""" + plots_dir1 = self.get_plots_dir(self.sweep1_path) + plots_dir2 = self.get_plots_dir(self.sweep2_path) + + image_path1 = plots_dir1 / section["file"] + image_path2 = plots_dir2 / section["file"] + + print(f" Processing: {section['id']}") + image_data1 = self.get_image_base64(image_path1) + image_data2 = self.get_image_base64(image_path2) + + if image_data1: + print(f" Sweep 1: [OK]") + else: + print(f" Sweep 1: [MISSING] {image_path1}") + + if image_data2: + print(f" Sweep 2: [OK]") + else: + print(f" Sweep 2: [MISSING] {image_path2}") + + return self.render_comparison_chart( + title=section["title"], + alt=section["alt"], + image_data1=image_data1 or "", + image_data2=image_data2 or "", + ) + + def build_body(self) -> str: + """Build the HTML body by building all comparison sections.""" + print("Converting images to base64...") + sections_html = "" + for section in self.get_sections(): + sections_html += self.build_comparison_chart(section) + return self.render_body(sections_html) + + def build(self) -> str: + """Build the complete HTML document.""" + return self.render_document(self.build_body()) + + def save(self) -> None: + """Build and save the HTML report to the output path.""" + final_html = self.build() + with open(self.output_path, "w", encoding="utf-8") as f: + f.write(final_html) + print(f"\n[OK] HTML report created: {self.output_path}") + print(f" File size: {self.output_path.stat().st_size / 1024 / 1024:.2f} MB") + diff --git a/scripts/report_generator/comparison_templates.py b/scripts/report_generator/comparison_templates.py new file mode 100644 index 0000000..b24130d --- /dev/null +++ b/scripts/report_generator/comparison_templates.py @@ -0,0 +1,95 @@ +"""Jinja2 templates for comparison HTML report generation.""" + +from jinja2 import Template + + +COMPARISON_CHART_TEMPLATE = Template( + """ +

{{ title }}

+ + + + + + + + + + +
{{ label1 }}{{ label2 }}
+{{ alt }} - {{ label1 }} + +{{ alt }} - {{ label2 }} +
+ +
+""" +) + +SWEEP_INFO_TEMPLATE = Template( + """ +

Sweep Information

+ + + + + + + + + + + + + + +
SweepPath
Sweep 1{{ label1 }}
Sweep 2{{ label2 }}
+ +
+""" +) + +DATA_FILES_TEMPLATE = Template( + """ +
+

Data Files Information

+ +

Sweep 1: {{ label1 }}

+ + +

Sweep 2: {{ label2 }}

+ + +
+""" +) + +COMPARISON_BODY_TEMPLATE = Template( + """ +

{{ title }}

+ +
+

{{ summary }}

+
+ +
+ +{{ sweep_info }} + +{{ sections_html }} + +{{ data_files }} +""" +) + +COMPARISON_DOCUMENT_TEMPLATE = Template("""{{ header }}{{ body }}{{ footer }}""") diff --git a/scripts/report_generator/config_reader.py b/scripts/report_generator/config_reader.py new file mode 100644 index 0000000..9767835 --- /dev/null +++ b/scripts/report_generator/config_reader.py @@ -0,0 +1,61 @@ +"""Configuration reader for HTML report generation.""" + +from pathlib import Path +import json + + +class ReportConfigReader: + """ + Reads and provides access to HTML report configuration from a JSON file. + """ + + def __init__(self, config_path: Path): + self.config_path = config_path + self._config = self._load_config() + + def _load_config(self) -> dict: + """Load the JSON configuration file.""" + with open(self.config_path, "r") as f: + return json.load(f) + + def get_title(self) -> str: + """Return the report title.""" + return self._config.get("title", "") + + def get_executive_summary(self) -> str: + """Return the executive summary text.""" + return self._config.get("executive_summary", "") + + def get_html_header(self) -> str: + """Return the HTML header template.""" + return self._config.get("html_header", "") + + def get_html_footer(self) -> str: + """Return the HTML footer template.""" + return self._config.get("html_footer", "") + + def get_all_sections(self) -> list[dict]: + """Return all section configurations.""" + return self._config.get("sections", []) + + def get_section_by_id(self, section_id: str) -> dict | None: + """Return a specific section by its ID.""" + for section in self.get_all_sections(): + if section.get("id") == section_id: + return section + return None + + def get_section_title(self, section_id: str) -> str: + """Return the title of a section by ID.""" + section = self.get_section_by_id(section_id) + return section.get("title", "") if section else "" + + def get_section_charts(self, section_id: str) -> list[dict]: + """Return the charts configuration for a section by ID.""" + section = self.get_section_by_id(section_id) + return section.get("charts", []) if section else [] + + def get_section_ids(self) -> list[str]: + """Return a list of all section IDs.""" + return [section.get("id", "") for section in self.get_all_sections()] + diff --git a/scripts/report_generator/report_builder.py b/scripts/report_generator/report_builder.py new file mode 100644 index 0000000..40f3c9e --- /dev/null +++ b/scripts/report_generator/report_builder.py @@ -0,0 +1,170 @@ +"""HTML Report Builder for generating reports from JSON configuration.""" + +from pathlib import Path +import base64 + +from .config_reader import ReportConfigReader +from .templates import ( + CHART_TEMPLATE, + SECTION_TEMPLATE, + BODY_TEMPLATE, + DOCUMENT_TEMPLATE, +) + + +class HTMLReportBuilder: + """ + Base HTML report builder that generates reports from JSON configuration. + Uses ReportConfigReader to populate sections and chart configurations. + + Structure: + - build_* methods: Handle structure traversal and data preparation + - render_* methods: Handle HTML layout using Jinja2 templates + + Subclasses should override: + - build_chart(): Define how to load and render chart data + - render_chart(): Define chart template (optional) + - render_body(): Add custom body sections (optional) + """ + + def __init__(self, output_path: Path, config_path: Path): + """ + Initialize the report builder. + + Args: + output_path: Path where the HTML report will be saved + config_path: Path to the JSON configuration file + """ + self.output_path = output_path + self.config = ReportConfigReader(config_path) + + # ------------------------------------------------------------------------- + # Data/Utility Methods + # ------------------------------------------------------------------------- + + def get_image_base64(self, image_path: Path) -> str | None: + """Read an image file and return its base64-encoded string.""" + try: + with open(image_path, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + except Exception as e: + print(f"Error getting image data from {image_path}: {e}") + return None + + def get_report_title(self) -> str: + """Return the main title for the report.""" + return self.config.get_title() + + def get_executive_summary(self) -> str: + """Return the executive summary HTML content.""" + return self.config.get_executive_summary() + + def get_sections(self) -> list[dict]: + """Return all section configurations from config.""" + return self.config.get_all_sections() + + # ------------------------------------------------------------------------- + # Render Methods (Jinja2 Templates) - Override for custom layouts + # ------------------------------------------------------------------------- + + def render_chart(self, **kwargs) -> str: + """Render HTML layout for a single chart using Jinja2.""" + return CHART_TEMPLATE.render(**kwargs) + + def render_section(self, title: str, charts_html: str) -> str: + """Render HTML layout for a section using Jinja2.""" + return SECTION_TEMPLATE.render( + title=title, + charts_html=charts_html, + ) + + def render_body(self, sections_html: str) -> str: + """Render HTML layout for the body using Jinja2.""" + return BODY_TEMPLATE.render( + title=self.get_report_title(), + summary=self.get_executive_summary(), + sections_html=sections_html, + ) + + def render_document(self, body: str) -> str: + """Render the complete HTML document layout using Jinja2.""" + return DOCUMENT_TEMPLATE.render( + header=self.config.get_html_header(), + body=body, + footer=self.config.get_html_footer(), + ) + + # ------------------------------------------------------------------------- + # Build Methods (Structure Traversal) - Override for custom data handling + # ------------------------------------------------------------------------- + + def build_chart(self, chart_config: dict, section_title: str) -> str: + """ + Build a chart from config. Override in subclass to define image loading. + + Args: + chart_config: Chart configuration dict with file, name, alt, description + section_title: Title of the parent section (for context) + + Returns: + Rendered chart HTML string + """ + raise NotImplementedError("Subclass must implement build_chart") + + def build_section(self, section: dict) -> str: + """Build a section by iterating through its charts.""" + charts_html = "" + charts = section.get("charts", []) + for chart in charts: + charts_html += self.build_chart(chart, section["title"]) + return self.render_section(section["title"], charts_html) + + def build_body(self) -> str: + """Traverse all sections and build the body HTML.""" + sections_html = "" + for section in self.get_sections(): + sections_html += self.build_section(section) + return self.render_body(sections_html) + + def build(self) -> str: + """Build the complete HTML document.""" + return self.render_document(self.build_body()) + + def save(self) -> None: + """Build and save the HTML report to the output path.""" + final_html = self.build() + with open(self.output_path, "w", encoding="utf-8") as f: + f.write(final_html) + print(f"Final HTML file created at: {self.output_path}") + + +class SingleReportBuilder(HTMLReportBuilder): + """ + Report builder for single-source reports with one plot directory. + Only overrides build_chart to define single-image loading. + """ + + def __init__(self, plot_dir: Path, output_path: Path, config_path: Path): + """ + Initialize the single report builder. + + Args: + plot_dir: Directory containing plot images + output_path: Path where the HTML report will be saved + config_path: Path to the JSON configuration file + """ + super().__init__(output_path, config_path) + self.plot_dir = plot_dir + + def build_chart(self, chart_config: dict, section_title: str) -> str: + """Build a single chart by loading one image from plot_dir.""" + image_path = self.plot_dir / chart_config["file"] + image_data = self.get_image_base64(image_path) + if image_data is None: + return "" + return self.render_chart( + name=chart_config["name"], + image_data=image_data, + alt=chart_config["alt"], + description=chart_config["description"], + ) diff --git a/scripts/report_generator/templates.py b/scripts/report_generator/templates.py new file mode 100644 index 0000000..cffaa08 --- /dev/null +++ b/scripts/report_generator/templates.py @@ -0,0 +1,141 @@ +"""Jinja2 templates for HTML report generation.""" + +from jinja2 import Template + + +# ============================================================================= +# Single Report Templates +# ============================================================================= + +CHART_TEMPLATE = Template( + """ +

{{ name }}

+ {{ alt }} +

{{ description }}

+""" +) + +SECTION_TEMPLATE = Template( + """ +

{{ title }}

+ {{ charts_html }} +""" +) + +BODY_TEMPLATE = Template( + """ + + +

{{ title }}

+ +
+ +

Executive Summary

+ +

{{ summary }}

+ +{{ sections_html }} + + +""" +) + +DOCUMENT_TEMPLATE = Template("""{{ header }}{{ body }}{{ footer }}""") + + +# ============================================================================= +# Comparison Report Templates (Side-by-Side) +# ============================================================================= + +COMPARISON_CHART_TEMPLATE = Template( + """ + + + + + + + + + +
{{ label1 }}{{ label2 }}
+{% if image_data1 %} +{{ alt }} - {{ label1 }} +{% else %} +

Image not available

+{% endif %} +
+{% if image_data2 %} +{{ alt }} - {{ label2 }} +{% else %} +

Image not available

+{% endif %} +
+""" +) + +COMPARISON_SECTION_TEMPLATE = Template( + """ +

{{ title }}

+{{ charts_html }} +
+""" +) + +COMPARISON_INFO_TABLE_TEMPLATE = Template( + """ +

Sweep Information

+ + + + + + + + + + + + + +
SweepPath
Sweep 1{{ label1 }} ({{ path1 }})
Sweep 2{{ label2 }} ({{ path2 }})
+
+""" +) + +COMPARISON_BODY_TEMPLATE = Template( + """ +

{{ title }}

+ +
+

{{ summary }}

+
+ +
+ +{{ info_table }} + +{{ sections_html }} + +
+

Data Files Information

+ +

Sweep 1: {{ label1 }}

+ + +

Sweep 2: {{ label2 }}

+ +
+""" +) + diff --git a/scripts/tracelens_single_config/create_final_html.py b/scripts/tracelens_single_config/create_final_html.py index 0728b95..06e0e7c 100644 --- a/scripts/tracelens_single_config/create_final_html.py +++ b/scripts/tracelens_single_config/create_final_html.py @@ -1,79 +1,29 @@ +"""Main entry point for creating HTML analysis reports.""" + +import sys from pathlib import Path -import base64 import argparse -from html_report_config import ( - HTML_HEADER, - HTML_FOOTER, - OVERALL_GPU_CHARTS, - CROSS_RANK_CHARTS, - NCCL_CHARTS, -) - - -def get_image_base64(image_path): - """Read an image file and return its base64-encoded string.""" - try: - with open(image_path, "rb") as f: - return base64.b64encode(f.read()).decode("utf-8") - except Exception as e: - print(f"Error getting image data from {image_path}: {e}") - return None - - -def create_chart_html(plot_dir, chart_config): - """Generate HTML for a single chart with title, image, and description.""" - image_data = get_image_base64(plot_dir / chart_config["file"]) - if image_data is None: - return "" - return f""" -

{chart_config['name']}

- {chart_config['alt']} - {chart_config['description']} - """ - - -def create_section_html(title, plot_dir, charts): - """Generate HTML for a complete section with multiple charts.""" - section_html = f""" -

{title}

- """ - for chart in charts: - section_html += create_chart_html(plot_dir, chart) - return section_html +# Add scripts directory to path for report_generator imports +sys.path.insert(0, str(Path(__file__).parent.parent)) +from report_generator.report_builder import SingleReportBuilder -def create_final_html(plot_file_path, output_path): - html_body = """ - +def get_default_config_path() -> Path: + """Return the default path to the config JSON file.""" + return Path(__file__).parent.parent / "utils" / "html_report_config.json" -

Performance Analysis Report

-
- -

Executive Summary

- -Comparison of GPU performance metrics between baseline and Test -implementations across 8 ranks. -""" - - # Build all sections - sections = [ - create_section_html( - "1. Overall GPU Metrics Comparison", plot_file_path, OVERALL_GPU_CHARTS - ), - create_section_html( - "2. Cross-Rank Performance Comparison", plot_file_path, CROSS_RANK_CHARTS - ), - create_section_html( - "3. NCCL Collective Operations Analysis", plot_file_path, NCCL_CHARTS - ), - ] - - final_html = HTML_HEADER + html_body + "".join(sections) + HTML_FOOTER - with open(output_path, "w") as f: - f.write(final_html) - print(f"Final HTML file created at: {output_path}") +def create_final_html( + plot_file_path: Path, + output_path: Path, + config_path: Path | None = None, +) -> None: + """Factory function to create and save a report.""" + if config_path is None: + config_path = get_default_config_path() + builder = SingleReportBuilder(plot_file_path, output_path, config_path) + builder.save() def main(): @@ -85,10 +35,17 @@ def main(): "--plot-files-directory", type=Path, required=True, - help="Path to the plot files direcotry.", + help="Path to the plot files directory.", ) parser.add_argument( - "-o", "--output-html", type=None, default=None, help="Path to the output file." + "-o", "--output-html", type=Path, default=None, help="Path to the output file." + ) + parser.add_argument( + "-c", + "--config", + type=Path, + default=None, + help="Path to the JSON config file (default: utils/html_report_config.json).", ) args = parser.parse_args() output_path = ( @@ -96,7 +53,7 @@ def main(): if args.output_html else args.plot_files_directory.parent / "final_analysis_report.html" ) - create_final_html(args.plot_files_directory, output_path) + create_final_html(args.plot_files_directory, output_path, args.config) if __name__ == "__main__": diff --git a/scripts/tracelens_single_config/html_report_config.py b/scripts/tracelens_single_config/html_report_config.py deleted file mode 100644 index 5f14d2c..0000000 --- a/scripts/tracelens_single_config/html_report_config.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Configuration constants for HTML report generation.""" - -HTML_HEADER = """ - - - -Performance Analysis Report - - -""" - -HTML_FOOTER = """ - - -""" - -# Chart configuration for each section -OVERALL_GPU_CHARTS = [ - { - "name": "Percentage Change Overview", - "file": "improvement_chart.png", - "alt": "Summary Chart", - "description": "Overall performance change across key GPU metrics. Positive values indicate improvement (Test is faster/better).", - }, - { - "name": "Absolute Time Comparison", - "file": "abs_time_comparison.png", - "alt": "Absolute Time Comparison", - "description": "Side-by-side comparison of absolute execution times for all GPU metrics.", - }, -] - -CROSS_RANK_CHARTS = [ - { - "name": "Performance Heatmap by Rank", - "file": "gpu_time_heatmap.png", - "alt": "GPU Metric Percentage Change by Rank (HeatMap)", - "description": "Comprehensive heatmap showing percent change for all metrics across all ranks. Green indicates better performance (positive % change).", - }, - { - "name": "Total Time", - "file": "total_time_by_rank.png", - "alt": "total_time by Rank", - "description": "Total execution time comparison across all ranks, showing end-to-end performance characteristics.", - }, - { - "name": "Computation Time", - "file": "computation_time_by_rank.png", - "alt": "computation_time by Rank", - "description": "Pure computation time excluding communication overhead, analyzed per rank.", - }, - { - "name": "Communication Time", - "file": "total_comm_time_by_rank.png", - "alt": "total_comm_time by Rank", - "description": "Total time spent in collective communication operations across ranks.", - }, - { - "name": "Idle Time", - "file": "idle_time_by_rank.png", - "alt": "idle_time by Rank", - "description": "GPU idle time comparison showing resource utilization efficiency per rank.", - }, - { - "name": "Detailed Percentage Change by Metric", - "file": "gpu_time_change_percentage_summaryby_rank.png", - "alt": "gpu_time_change_percentage_summaryby_rank by Rank", - "description": "Detailed breakdown of percent change for each metric type across all ranks.", - }, -] - -NCCL_CHARTS = [ - { - "name": "NCCL Communication Latency", - "file": "NCCL_Communication_Latency_comparison.png", - "alt": "NCCL Communication Latency Comparison", - "description": "Mean communication latency for NCCL allreduce operations across different message sizes", - }, - { - "name": "NCCL Algorithm Bandwidth", - "file": "NCCL_Algorithm_Bandwidth_comparison.png", - "alt": "NCCL Algorithm Bandwidth Comparison", - "description": "Algorithm bandwidth achieved for different message sizes in NCCL collective operations.", - }, - { - "name": "NCCL Bus Bandwidth", - "file": "NCCL_Bus_Bandwidth_comparison.png", - "alt": "NCCL Bus Bandwidth Comparison", - "description": "Bus bandwidth utilization across NCCL operations and message sizes.", - }, - { - "name": "NCCL Performance Percentage Change", - "file": "NCCL_Performance_Percentage_Change_comparison.png", - "alt": "NCCL Performance Percentage Change Comparison", - "description": "Percent change in communication latency and bandwidth metrics for each message sizec configuration", - }, - { - "name": "NCCL Total Communication Latency", - "file": "NCCL_Total_Communication_Latency_comparison.png", - "alt": "NCCL Total Communication Latency Comparison", - "description": "Aggregate communication latency summed across all operations for each message size.", - }, -] diff --git a/scripts/utils/gemm_comparison_config.json b/scripts/utils/gemm_comparison_config.json new file mode 100644 index 0000000..9f01bde --- /dev/null +++ b/scripts/utils/gemm_comparison_config.json @@ -0,0 +1,68 @@ +{ + "title": "GEMM Kernel Variance - Sweep Comparison", + "executive_summary": "Visual comparison of GEMM kernel performance variance between two training sweeps. This report compares kernel variance across different thread counts, channel configurations, and ranks.", + "html_header": "\n\n\n \n GEMM Kernel Variance - Sweep Comparison\n \n\n\n
\n", + "html_footer": "\n
\n\n", + "sections": [ + { + "id": "threads", + "title": "Variance by Thread Count", + "charts": [ + { + "name": "Thread Count Variance", + "file": "variance_by_threads_boxplot.png", + "alt": "Threads comparison", + "description": "Box plot showing GEMM kernel variance distribution across different thread counts." + } + ] + }, + { + "id": "channels", + "title": "Variance by Channel Count", + "charts": [ + { + "name": "Channel Count Variance", + "file": "variance_by_channels_boxplot.png", + "alt": "Channels comparison", + "description": "Box plot showing GEMM kernel variance distribution across different channel configurations." + } + ] + }, + { + "id": "ranks", + "title": "Variance by Rank", + "charts": [ + { + "name": "Rank Variance", + "file": "variance_by_ranks_boxplot.png", + "alt": "Ranks comparison", + "description": "Box plot showing GEMM kernel variance distribution across different GPU ranks." + } + ] + }, + { + "id": "violin", + "title": "Variance Distribution (Violin Plots)", + "charts": [ + { + "name": "Variance Distribution", + "file": "variance_violin_combined.png", + "alt": "Violin comparison", + "description": "Violin plots showing the full distribution of GEMM kernel variance." + } + ] + }, + { + "id": "interaction", + "title": "Thread-Channel Interaction", + "charts": [ + { + "name": "Thread-Channel Interaction", + "file": "variance_thread_channel_interaction.png", + "alt": "Interaction comparison", + "description": "Interaction plot showing how thread count and channel count jointly affect kernel variance." + } + ] + } + ] +} \ No newline at end of file diff --git a/scripts/utils/html_report_config.json b/scripts/utils/html_report_config.json new file mode 100644 index 0000000..2079f66 --- /dev/null +++ b/scripts/utils/html_report_config.json @@ -0,0 +1,104 @@ +{ + "title": "Performance Analysis Report", + "executive_summary": "Comparison of GPU performance metrics between baseline and Test implementations across 8 ranks.", + "html_header": "\n\n\n\nPerformance Analysis Report\n\n\n", + "html_footer": "\n\n\n", + "sections": [ + { + "id": "overall_gpu", + "title": "1. Overall GPU Metrics Comparison", + "charts": [ + { + "name": "Percentage Change Overview", + "file": "improvement_chart.png", + "alt": "Summary Chart", + "description": "Overall performance change across key GPU metrics. Positive values indicate improvement (Test is faster/better)." + }, + { + "name": "Absolute Time Comparison", + "file": "abs_time_comparison.png", + "alt": "Absolute Time Comparison", + "description": "Side-by-side comparison of absolute execution times for all GPU metrics." + } + ] + }, + { + "id": "cross_rank", + "title": "2. Cross-Rank Performance Comparison", + "charts": [ + { + "name": "Performance Heatmap by Rank", + "file": "gpu_time_heatmap.png", + "alt": "GPU Metric Percentage Change by Rank (HeatMap)", + "description": "Comprehensive heatmap showing percent change for all metrics across all ranks. Green indicates better performance (positive % change)." + }, + { + "name": "Total Time", + "file": "total_time_by_rank.png", + "alt": "total_time by Rank", + "description": "Total execution time comparison across all ranks, showing end-to-end performance characteristics." + }, + { + "name": "Computation Time", + "file": "computation_time_by_rank.png", + "alt": "computation_time by Rank", + "description": "Pure computation time excluding communication overhead, analyzed per rank." + }, + { + "name": "Communication Time", + "file": "total_comm_time_by_rank.png", + "alt": "total_comm_time by Rank", + "description": "Total time spent in collective communication operations across ranks." + }, + { + "name": "Idle Time", + "file": "idle_time_by_rank.png", + "alt": "idle_time by Rank", + "description": "GPU idle time comparison showing resource utilization efficiency per rank." + }, + { + "name": "Detailed Percentage Change by Metric", + "file": "gpu_time_change_percentage_summaryby_rank.png", + "alt": "gpu_time_change_percentage_summaryby_rank by Rank", + "description": "Detailed breakdown of percent change for each metric type across all ranks." + } + ] + }, + { + "id": "nccl", + "title": "3. NCCL Collective Operations Analysis", + "charts": [ + { + "name": "NCCL Communication Latency", + "file": "NCCL_Communication_Latency_comparison.png", + "alt": "NCCL Communication Latency Comparison", + "description": "Mean communication latency for NCCL allreduce operations across different message sizes" + }, + { + "name": "NCCL Algorithm Bandwidth", + "file": "NCCL_Algorithm_Bandwidth_comparison.png", + "alt": "NCCL Algorithm Bandwidth Comparison", + "description": "Algorithm bandwidth achieved for different message sizes in NCCL collective operations." + }, + { + "name": "NCCL Bus Bandwidth", + "file": "NCCL_Bus_Bandwidth_comparison.png", + "alt": "NCCL Bus Bandwidth Comparison", + "description": "Bus bandwidth utilization across NCCL operations and message sizes." + }, + { + "name": "NCCL Performance Percentage Change", + "file": "NCCL_Performance_Percentage_Change_comparison.png", + "alt": "NCCL Performance Percentage Change Comparison", + "description": "Percent change in communication latency and bandwidth metrics for each message size configuration" + }, + { + "name": "NCCL Total Communication Latency", + "file": "NCCL_Total_Communication_Latency_comparison.png", + "alt": "NCCL Total Communication Latency Comparison", + "description": "Aggregate communication latency summed across all operations for each message size." + } + ] + } + ] +} \ No newline at end of file