Skip to content

[Nightly] Enhance op microbench #1602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions .github/scripts/microbench_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
"""
Microbenchmark Summary Tool - Parses performance logs and generates CSV/Excel reports
# Usage
# Summary forward op time, forward_op_summary.csv is forward summary file
python microbench_summary.py path/to/profile's log forward_op_summary.csv
# Summary backward op time, backward_op_summary.csv is backward summary file, True means summary backward, default is false.
python microbench_summary.py path/to/profile's log backward_op_summary.csv --backward
"""

import re
import pandas as pd
import glob
import os
import argparse
from pathlib import Path
from typing import Dict, List

def main():
parser = argparse.ArgumentParser(
description="Parse performance logs and generate summary reports",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("log_dir", help="Directory containing log files")
parser.add_argument("output_file", help="Output CSV file path")
parser.add_argument("--backward", action="store_true",
help="Process backward operations instead of forward")
args = parser.parse_args()

try:
df = parse_logs(args.log_dir, args.backward)
if df.empty:
print("Warning: No valid data found in log files!")
return

save_reports(df, args.output_file)
print(f"Successfully generated reports: {args.output_file} and {args.output_file.replace('.csv', '.xlsx')}")
except Exception as e:
print(f"Error: {str(e)}")
raise

def parse_logs(log_dir: str, get_backward: bool = False) -> pd.DataFrame:
data = []
columns = [
"case_name", "datatype", "op_name", "shape", "channels_last", "dim",
"output_size", "P", "reduce", "kernel_size", "stride", "replacement",
"num_samples", "scale_factor", "mode", "padding_mode", "align_corners",
"shifts", "affine", "backward", "time(us)"
]

for log_file in glob.glob(os.path.join(log_dir, "*.log")):
try:
with open(log_file) as f:
content = f.read()

case_name = Path(log_file).stem
base_op_name = case_name.split('.')[-1]
op_name, time_pattern = get_op_pattern(base_op_name, get_backward)

if get_backward and base_op_name == "l1_loss":
process_l1_loss(content, case_name, data, columns)
continue

time_matches = extract_times(content, time_pattern, get_backward)
shape_lines = re.findall(r"(shape\s*[:=].*?)(?=\n\S|$)", content)

for i, (time, unit) in enumerate(time_matches[:len(shape_lines)]):
time_us = convert_to_us(float(time), unit)
params = extract_params(shape_lines[i])

if get_backward and params.get("backward", "False") == "False":
continue

record = create_record(params, case_name, op_name, str(get_backward), time_us)
data.append([record.get(col, "") for col in columns])

except Exception as e:
print(f"Warning: Error processing {log_file} - {str(e)}")
continue

return pd.DataFrame(data, columns=columns) if data else pd.DataFrame()

def get_op_pattern(base_op_name: str, get_backward: bool) -> tuple:
op_name_map = {
'forward': {
'batch_norm': ('aten::batch_norm', 'aten::batch_norm'),
'unique': ('unique2', 'unique2'),
'fractional_max_pool2d': ('fractional_max_pool2d', r'\bfractional_max_pool2d\b'),
'fractional_max_pool3d': ('fractional_max_pool3d', r'\bfractional_max_pool3d\b'),
'adaptive_max_pool2d': ('adaptive_max_pool2d', r'\badaptive_max_pool2d\b'),
'max_pool3d': ('max_pool3d_with_indices', 'max_pool3d_with_indices '),
'max_pool2d': ('max_pool2d_with_indices', 'max_pool2d_with_indices '),
'exponential': ('exponential_', r'\bexponential_\b'),
'geometric': ('geometric_', r'\bgeometric_\b'),
'uniform': ('uniform_', r'\buniform_\b'),
'random': ('random_', r'\brandom_\b'),
'log_normal': ('log_normal_', r'\blog_normal_\b'),
'normal': ('normal_', r'\bnormal_\b'),
'bernoulli': ('bernoulli_', r'\bbernoulli_\b'),
'cauchy': ('cauchy_', r'\bcauchy_\b'),
'embedding_bag': ('_embedding_bag', r'\b_embedding_bag\b'),
'nonzero': ('nonzero', r'\bnonzero\b'),
'index_fill': ('index_fill_', r'\bindex_fill_\b'),
'index_put': ('index_put_', r'\bindex_put_\b'),
'put': ('put_', r'\bput_\b'),
'masked_fill': ('masked_fill_', r'\bmasked_fill_\b'),
'scatter_add': ('scatter_add_', r'\bscatter_add_\b'),
'scatter': ('scatter_', r'\bscatter_\b'),
'dropout': ('dropout', r'\bdropout\b'),
'layer_norm': ('layer_norm', r'\blayer_norm\b'),
'ctc_loss': ('_ctc_loss', r'\b_ctc_loss\b'),
'adaptive_avg_pool2d': ('adaptive_avg_pool2d', r'\badaptive_avg_pool2d\b'),
'softmax': ('aten::softmax', 'aten::softmax'),
'group_norm': ('aten::group_norm', 'aten::group_norm'),
},
'backward': {
'batch_norm': ('batch_norm_backward', 'batch_norm_backward'),
'fractional_max_pool2d': ('fractional_max_pool2d_backward', r'\bfractional_max_pool2d_backward\b'),
'fractional_max_pool3d': ('fractional_max_pool3d_backward', r'\bfractional_max_pool3d_backward\b'),
'adaptive_max_pool2d': ('adaptive_max_pool2d_backward', r'\badaptive_max_pool2d_backward\b'),
'max_unpool2d': ('MaxUnpool2DBackward0', 'MaxUnpool2DBackward0 '),
'max_unpool3d': ('MaxUnpool3DBackward0', 'MaxUnpool3DBackward0 '),
'max_pool3d': ('max_pool3d_with_indices_backward', 'max_pool3d_with_indices_backward '),
'max_pool2d': ('max_pool2d_with_indices_backward', 'max_pool2d_with_indices_backward '),
'col2im': ('Col2ImBackward0', 'Col2ImBackward0 '),
'im2col': ('Im2ColBackward0', 'Im2ColBackward0 '),
'flip': ('FlipBackward0', 'FlipBackward0 '),
'matmul': ('MmBackward0', 'MmBackward0 '),
'roll': ('RollBackward0', 'RollBackward0 '),
'softmax': ('softmax_backward_data', 'softmax_backward_data '),
'remainder': ('RemainderBackward0', 'RemainderBackward0 '),
'smooth_l1_loss': ('smooth_l1_loss_backward', 'smooth_l1_loss_backward'),
'l1_loss': ('l1_loss', 'l1_loss'),
}
}

mode = 'backward' if get_backward else 'forward'

for op_pattern in op_name_map[mode]:
if op_pattern in base_op_name:
return op_name_map[mode][op_pattern]

if get_backward:
return (f"{base_op_name}_backward", f"{base_op_name}_backward ")
else:
return (base_op_name, f"{base_op_name} ")

def process_l1_loss(content: str, case_name: str, data: List, columns: List):
filtered_content = [line for line in content.split('\n') if "autograd::engine" not in line]
filtered_content = '\n'.join(filtered_content)
abs_times = re.findall(r"AbsBackward0(?:\s+\S+){8}\s+(\d+\.?\d*)([a-zA-Z]*)", filtered_content)
mean_times = re.findall(r"MeanBackward0(?:\s+\S+){8}\s+(\d+\.?\d*)([a-zA-Z]*)", filtered_content)
shape_lines = re.findall(r"(shape\s*[:=].*?)(?=\n\S|$)", content)

for i, (time, unit) in enumerate(abs_times[:6]):
if i >= len(shape_lines):
break
time_us = convert_to_us(float(time), unit)
params = extract_params(shape_lines[i])
record = create_record(params, case_name, "AbsBackward0", "True", time_us)
data.append([record.get(col, "") for col in columns])

for i, (time, unit) in enumerate(mean_times):
if (i + 6) >= len(shape_lines):
break
time_us = convert_to_us(float(time), unit)
params = extract_params(shape_lines[i + 6])
record = create_record(params, case_name, "MeanBackward0", "True", time_us)
data.append([record.get(col, "") for col in columns])

def extract_times(content: str, pattern: str, get_backward: bool) -> List:
lines = content.split('\n')
results = []
for line in lines:
if get_backward and any(x in pattern for x in ["Col2ImBackward0", "Im2ColBackward0",
"FlipBackward0", "MmBackward0",
"RollBackward0", "MaxUnpool2DBackward0", "MaxUnpool3DBackward0"]):
if "autograd::engine" in line:
continue

match = re.search(fr"{pattern}.*?(?:\s+\S+){{8}}\s+(\d+\.?\d*)([a-zA-Z]*)", line)
if match:
results.append((match.group(1), match.group(2)))

return results

def create_record(params: Dict, case_name: str, op_name: str,
backward: str, time_us: float) -> Dict:
return {
"P": params.get("p", ""),
**params,
"case_name": case_name,
"op_name": op_name,
"backward": backward,
"time(us)": time_us
}

def convert_to_us(value: float, unit: str) -> float:
unit = unit.lower()
if unit == "ms":
return value * 1000
elif unit == "s":
return value * 1_000_000
return value

def extract_params(text: str) -> Dict:
params = {}
pairs = re.split(r'[;]', text.strip())

for pair in pairs:
if not any(delim in pair for delim in [':', '=']):
continue

delim = ':' if ':' in pair else '='
key, value = pair.split(delim, 1)
key = key.strip().lower()
value = value.strip()

if key in ['p', 'P']:
key = 'p'
elif key in ['dims', 'dim']:
key = 'dim'
elif key in ['shape']:
key = 'shape'

params[key] = value

return params

def save_reports(df: pd.DataFrame, csv_path: str):
os.makedirs(os.path.dirname(csv_path) or '.', exist_ok=True)
df.to_csv(csv_path, index=False, sep=';')
excel_path = csv_path.replace('.csv', '.xlsx')
df.to_excel(excel_path, index=False)


if __name__ == "__main__":
main()
Loading
Loading