diff --git a/scripts/gemm_analysis/create_embeded_html_report.py b/scripts/gemm_analysis/create_embeded_html_report.py index c08a142..4df4037 100644 --- a/scripts/gemm_analysis/create_embeded_html_report.py +++ b/scripts/gemm_analysis/create_embeded_html_report.py @@ -1,3 +1,4 @@ + #!/usr/bin/env python3 """ Create a self-contained HTML report comparing two experiment sweeps. @@ -68,6 +69,13 @@ def parse_args(): default=None, help='Label for second sweep (default: directory name)' ) + + parser.add_argument( + '--variance', + type=str, + default='channel', + help='Label for second sweep (default: directory name)' + ) parser.add_argument( '--output', @@ -78,7 +86,7 @@ def parse_args(): return parser.parse_args() -def get_plot_images(sweep_path): +def get_plot_images(sweep_path, variance): """Get paths to all plot images for a sweep""" plots_dir = sweep_path / "tracelens_analysis" / "plots" @@ -88,9 +96,23 @@ def get_plot_images(sweep_path): 'ranks': plots_dir / 'variance_by_ranks_boxplot.png', 'violin': plots_dir / 'variance_violin_combined.png', 'interaction': plots_dir / 'variance_thread_channel_interaction.png', + 'gpu_summary' : plots_dir / 'comparison_summary.png', + 'busy_time' : plots_dir / f'busy_time_{variance}_variance.png', + 'computation_time' : plots_dir / f'computation_time_{variance}_variance.png', + 'exposed_comm_time' : plots_dir / f'exposed_comm_time_{variance}_variance.png', + 'exposed_memcpy_time' : plots_dir / f'exposed_memcpy_time_{variance}_variance.png', + 'idle_time' : plots_dir / f'idle_time_{variance}_variance.png', + 'total_comm_time' : plots_dir / f'total_comm_time_{variance}_variance.png', + 'total_memcpy_time' : plots_dir / f'total_memcpy_time_{variance}_variance.png', + 'total_time' : plots_dir / f'total_time_{variance}_variance.png', + 'k0' : plots_dir / 'Kernel_0.png', + 'k1' : plots_dir / 'Kernel_1.png', + 'k2' : plots_dir / 'Kernel_2.png', + 'k3' : plots_dir / 'Kernel_3.png', } -def create_html_report(sweep1_path, sweep2_path, label1, label2, output_path): + +def create_html_report(sweep1_path, sweep2_path, label1, label2, variance, output_path): """Create HTML report comparing two sweeps""" # Get sweep names from paths if labels not provided @@ -99,9 +121,10 @@ def create_html_report(sweep1_path, sweep2_path, label1, label2, output_path): if label2 is None: label2 = sweep2_path.name + # Get image paths for both sweeps - images_sweep1 = get_plot_images(sweep1_path) - images_sweep2 = get_plot_images(sweep2_path) + images_sweep1 = get_plot_images(sweep1_path, variance=variance) + images_sweep2 = get_plot_images(sweep2_path, variance=variance) # Convert images to base64 print("Converting images to base64...") @@ -349,6 +372,116 @@ def create_html_report(sweep1_path, sweep2_path, label1, label2, output_path):
+

GPU Component Time Summary

+ + + + + + + + + +
{label1}{label2}
Summary Sweep 1Summary Sweep 2
+ +
+ +

GPU Component Time over {variance}

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Component{label1}{label2}
Busy Timebusy_time Sweep 1busy_time Sweep 2
Computation Timecomputation_time Sweep 1computation_time Sweep 2
Exposed Comm Timeexposed_comm_time Sweep 1exposed_comm_time Sweep 2
Exposed Memcpy Timeexposed_memcpy_time Sweep 1exposed_memcpy_time Sweep 2
Idle Timeidle_time Sweep 1idle_time Sweep 2
Total Comm Timetotal_comm_time Sweep 1total_comm_time Sweep 2
Total Memcpy Timetotal_memcpy_time Sweep 1total_memcpy_time Sweep 2
Total Timetotal_time Sweep 1total_time Sweep 2
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Data Files Information

@@ -416,6 +549,7 @@ def main(): args.sweep2, args.label1, args.label2, + args.variance, args.output ) @@ -424,6 +558,3 @@ def main(): if __name__ == "__main__": exit(main()) - - - diff --git a/scripts/gemm_analysis/generate_comparison_report.py b/scripts/gemm_analysis/generate_comparison_report.py index 51ed992..eb37954 100644 --- a/scripts/gemm_analysis/generate_comparison_report.py +++ b/scripts/gemm_analysis/generate_comparison_report.py @@ -50,14 +50,18 @@ def get_thread_and_type_values_over_ranks_with_mean_channel(all_result) : print("Done computing geomeans across channels.") return mean_result -def plot_mean_result(output_dir, rank_length, mean_result) : +def plot_mean_result(output_dir, rank_length, mean_result, threads) : bar_width = 0.35 x_pos = np.arange(rank_length) for type, type_info in mean_result.items() : - output_file = output_dir / f"{type}.png" + output_file = output_dir / f"{type}_rank_variance.png" plt.figure() - plt.bar(x_pos - (bar_width/2), type_info['256'], bar_width, label="256", color='r') - plt.bar(x_pos + (bar_width/2), type_info['512'], bar_width, label="512", color='b') + + if(len(threads) > 1) : + plt.bar(x_pos - (bar_width/2), type_info[str(threads[0])], bar_width, label="256", color='r') + plt.bar(x_pos + (bar_width/2), type_info[str(threads[1])], bar_width, label="512", color='b') + else : + plt.bar(x_pos, type_info[str(threads[0])], bar_width, label="256", color='b') plt.ylabel("Time") plt.xlabel("Rank") plt.title(type) @@ -165,7 +169,7 @@ def main(): all_results = process_comparison_data(base_path=base_path, channel=channels, thread=thread_configs, rank=ranks) mean_results = get_thread_and_type_values_over_ranks_with_mean_channel(all_results) - plot_mean_result(output_dir, len(ranks), mean_results) + plot_mean_result(output_dir, len(ranks), mean_results, threads=thread_configs) if __name__ == "__main__": main() diff --git a/scripts/gemm_analysis/generate_comparison_report_channel_variance.py b/scripts/gemm_analysis/generate_comparison_report_channel_variance.py new file mode 100644 index 0000000..cf6349b --- /dev/null +++ b/scripts/gemm_analysis/generate_comparison_report_channel_variance.py @@ -0,0 +1,178 @@ +import argparse +from collections import defaultdict +from pathlib import Path +from openpyxl import load_workbook +from statistics import geometric_mean +import matplotlib.pyplot as plt +import numpy as np + +def process_comparison_data(base_path, channel, thread, rank) : + #all_results = defaultdict(lambda : defaultdict(lambda : defaultdict (lambda : defaultdict[list]))) + all_results = defaultdict(lambda : defaultdict (lambda : defaultdict(list))) + #all_results['busy_time']["256"]["0"].append(10) + #all_results['busy_time']["256"]["0"].append(29) + + comparison_path = base_path / "comparisons" + + col_type_name = 0 + col_256_time = 3 + col_512_time = 5 + + for t in thread : + for c in channel : + for r in rank : + file_name = f"compare_{c}ch_rank{r}_across_threads.xlsx" + file_path = comparison_path / file_name + + if not file_path.exists(): + print(f" Warning: File not found: {file_path}") + continue + + workbook = load_workbook(file_path) + gpu_sheet = workbook['gpu_timeline'] + + for i, row in enumerate(gpu_sheet.iter_rows(values_only=True)): + if i==0 : + continue + all_results[row[col_type_name]]['256'][c].append(float(row[col_256_time])) + all_results[row[col_type_name]]['512'][c].append(float(row[col_512_time])) + print("Done reading excels.") + return all_results +def get_thread_and_type_values_over_ranks_with_mean_channel(all_result) : + mean_result = defaultdict(lambda : defaultdict(list)) + + for type, type_info in all_result.items() : + mean_result[type] = {} + for t_id, rank_info in type_info.items() : + mean_result[type][t_id] = [] + for rank, ch_arr in rank_info.items() : + mean_result[type][t_id].append(geometric_mean(ch_arr)) + print("Done computing geomeans across channels.") + return mean_result + +def plot_mean_result(output_dir, channels, mean_result, threads) : + bar_width = 0.35 + x_pos = np.arange(len(channels)) + for type, type_info in mean_result.items() : + output_file = output_dir / f"{type}_channel_variance.png" + plt.figure() + + if(len(threads) > 1) : + plt.bar(x_pos - (bar_width/2), type_info[str(threads[0])], bar_width, label="256", color='r') + plt.bar(x_pos + (bar_width/2), type_info[str(threads[1])], bar_width, label="512", color='b') + else : + plt.bar(x_pos, type_info[str(threads[0])], bar_width, label="256", color='b') + plt.ylabel("Time") + plt.xlabel("Rank") + plt.xticks(x_pos, channels) + plt.title(type) + plt.legend() + plt.tight_layout() + plt.savefig(output_file) + plt.close() + print("Done plotting.") + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Analyze tracelens comparison report for components", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Use default settings + python analyze_gemm_reports.py + # Specify custom base path + python analyze_gemm_reports.py --base-path /path/to/tracelens_analysis + # Specify custom configurations + python analyze_gemm_reports.py --threads 256 512 --channels 28 42 56 70 --ranks 0 1 2 3 4 5 6 7 + # Extract top 10 kernels instead of top 5 + python analyze_gemm_reports.py --top-k 10 + # Custom output file + python analyze_gemm_reports.py --output-plot-directory path/to/directory/to/save/plots + """ + ) + + parser.add_argument( + '--base-path', + type=Path, + default=Path("/home/oyazdanb/aorta/experiments/sweep_20251121_155219/tracelens_analysis"), + help='Base path to tracelens_analysis directory (default: %(default)s)' + ) + + parser.add_argument( + '--threads', + type=int, + nargs='+', + default=[256, 512], + help='Thread configurations to analyze (default: %(default)s)' + ) + + parser.add_argument( + '--channels', + type=int, + nargs='+', + default=[28, 42, 56, 70], + help='Channel configurations to analyze (default: %(default)s)' + ) + + parser.add_argument( + '--ranks', + type=int, + nargs='+', + default=list(range(8)), + help='Ranks to analyze (default: 0 1 2 3 4 5 6 7)' + ) + + parser.add_argument( + '-o', + '--output-plot-directory', + type=Path, + default=None, + help='Output CSV filename (default: %(default)s)' + ) + + parser.add_argument( + '--types', + type=str, + nargs='+', + default=['busy_time', 'computation_time', 'exposed_comm_time'] + ) + + + return parser.parse_args() +def main(): + # Parse command line arguments + args = parse_args() + + base_path = args.base_path + thread_configs = args.threads + channels = args.channels + ranks = args.ranks + + + output_dir = args.output_plot_directory + if(args.output_plot_directory is None) : + output_dir = base_path / "plots" + + output_dir.mkdir(exist_ok=True, parents=True) + + # Validate base path + if not base_path.exists(): + print(f"Error: Base path does not exist: {base_path}") + return + + print(f"Configuration:") + print(f" Base path: {base_path}") + print(f" Threads: {thread_configs}") + print(f" Channels: {channels}") + print(f" Ranks: {ranks}") + print(f" Output plot directory: {output_dir}") + + all_results = process_comparison_data(base_path=base_path, channel=channels, thread=thread_configs, rank=ranks) + mean_results = get_thread_and_type_values_over_ranks_with_mean_channel(all_results) + plot_mean_result(output_dir, channels, mean_results, threads=thread_configs) + +if __name__ == "__main__": + main() + + diff --git a/scripts/gemm_analysis/generate_comparison_report_summary.py b/scripts/gemm_analysis/generate_comparison_report_summary.py new file mode 100644 index 0000000..99beb79 --- /dev/null +++ b/scripts/gemm_analysis/generate_comparison_report_summary.py @@ -0,0 +1,176 @@ +import argparse +from collections import defaultdict +from pathlib import Path +from openpyxl import load_workbook +from statistics import geometric_mean +import matplotlib.pyplot as plt +import numpy as np + +def process_comparison_data(base_path, channel, thread, rank) : + all_results = defaultdict(lambda : defaultdict(list)) + #all_results['busy_time']["256"]["0"].append(10) + #all_results['busy_time']["256"]["0"].append(29) + + comparison_path = base_path / "comparisons" + + col_type_name = 0 + col_256_time = 3 + col_512_time = 5 + + for t in thread : + for c in channel : + for r in rank : + file_name = f"compare_{c}ch_rank{r}_across_threads.xlsx" + file_path = comparison_path / file_name + + if not file_path.exists(): + print(f" Warning: File not found: {file_path}") + continue + + workbook = load_workbook(file_path) + gpu_sheet = workbook['gpu_timeline'] + + for i, row in enumerate(gpu_sheet.iter_rows(values_only=True)): + if i==0 : + continue + all_results[row[col_type_name]]['256'].append(float(row[col_256_time])) + all_results[row[col_type_name]]['512'].append(float(row[col_512_time])) + print("Done reading excels.") + return all_results +def get_thread_and_type_values_over_ranks_with_mean_channel(all_result) : + #mean_result = defaultdict(lambda : defaultdict(list)) + type_list = [] + mean_result = defaultdict(list) + for type, type_info in all_result.items() : + type_list.append(type) + for t_id, time_info in type_info.items() : + mean_result[t_id].append(geometric_mean(time_info)) + print("Done computing geomeans across channels.") + return type_list, mean_result + +def plot_mean_result(output_dir, types, mean_result, threads) : + bar_width = 0.35 + x_pos = np.arange(len(types)) + output_file = output_dir / "comparison_summary.png" + + plt.figure() + + if(len(threads) > 1) : + plt.bar(x_pos - (bar_width/2), mean_result[str(threads[0])], bar_width, label="256", color='r') + plt.bar(x_pos + (bar_width/2), mean_result[str(threads[1])], bar_width, label="512", color='b') + else : + plt.bar(x_pos, mean_result[str(threads[0])], bar_width, label="256", color='b') + plt.ylabel("Time") + plt.xlabel("GPU Component") + plt.xticks(x_pos, types, rotation=45, ha='right', fontsize=14) + plt.title('GPU Component Summary v/s Time') + plt.legend() + plt.tight_layout() + plt.savefig(output_file) + plt.close() + print("Done plotting.") + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Analyze tracelens comparison report for components", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Use default settings + python analyze_gemm_reports.py + # Specify custom base path + python analyze_gemm_reports.py --base-path /path/to/tracelens_analysis + # Specify custom configurations + python analyze_gemm_reports.py --threads 256 512 --channels 28 42 56 70 --ranks 0 1 2 3 4 5 6 7 + # Extract top 10 kernels instead of top 5 + python analyze_gemm_reports.py --top-k 10 + # Custom output file + python analyze_gemm_reports.py --output-plot-directory path/to/directory/to/save/plots + """ + ) + + parser.add_argument( + '--base-path', + type=Path, + default=Path("/home/oyazdanb/aorta/experiments/sweep_20251121_155219/tracelens_analysis"), + help='Base path to tracelens_analysis directory (default: %(default)s)' + ) + + parser.add_argument( + '--threads', + type=int, + nargs='+', + default=[256, 512], + help='Thread configurations to analyze (default: %(default)s)' + ) + + parser.add_argument( + '--channels', + type=int, + nargs='+', + default=[28, 42, 56, 70], + help='Channel configurations to analyze (default: %(default)s)' + ) + + parser.add_argument( + '--ranks', + type=int, + nargs='+', + default=list(range(8)), + help='Ranks to analyze (default: 0 1 2 3 4 5 6 7)' + ) + + parser.add_argument( + '-o', + '--output-plot-directory', + type=Path, + default=None, + help='Output CSV filename (default: %(default)s)' + ) + + parser.add_argument( + '--types', + type=str, + nargs='+', + default=['busy_time', 'computation_time', 'exposed_comm_time'] + ) + + + return parser.parse_args() +def main(): + # Parse command line arguments + args = parse_args() + + base_path = args.base_path + thread_configs = args.threads + channels = args.channels + ranks = args.ranks + + + output_dir = args.output_plot_directory + if(args.output_plot_directory is None) : + output_dir = base_path / "plots" + + output_dir.mkdir(exist_ok=True, parents=True) + + # Validate base path + if not base_path.exists(): + print(f"Error: Base path does not exist: {base_path}") + return + + print(f"Configuration:") + print(f" Base path: {base_path}") + print(f" Threads: {thread_configs}") + print(f" Channels: {channels}") + print(f" Ranks: {ranks}") + print(f" Output plot directory: {output_dir}") + + all_results = process_comparison_data(base_path=base_path, channel=channels, thread=thread_configs, rank=ranks) + type_list, mean_results = get_thread_and_type_values_over_ranks_with_mean_channel(all_results) + plot_mean_result(output_dir, type_list, mean_results, threads=thread_configs) + +if __name__ == "__main__": + main() + +
Index{label1}{label2}
0Kernel 0 Sweep 1Kernel 0 Sweep 2
1Kernel 1 Sweep 1Kernel 1 Sweep 2
2Kernel 2 Sweep 1Kernel 2 Sweep 2
3Kernel 3 Sweep 1Kernel 3 Sweep 2