Skip to content

Commit 8bba737

Browse files
authored
[BENCHMARK] added script to generate html summary (#55)
* add +x flag to bash script * commit changes to benchmarker_helper_script.sh * fix error in local predictions * add exe flag to script * column renaming and new generation script for html files * added aws configuration and upload to s3 * fixed s3 cp command * adjusted csv and html format * copy special_operations array * added timestamp to benchmark summary * added help description --------- Co-authored-by: John Calderon <[email protected]>
1 parent 1590dbc commit 8bba737

File tree

6 files changed

+311
-47
lines changed

6 files changed

+311
-47
lines changed

.github/workflows/benchmarking.yaml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
- name: generate end-to-end and per operation results
3232
run: |
3333
sudo apt install python3-pip -y
34-
pip3 install pandas
34+
pip3 install pandas dominate
3535
python3 ./experiments/process_results.py --in-dir all_results --out-e2e out_e2e --out-ops out_ops
3636
3737
- name: upload artifact
@@ -42,6 +42,24 @@ jobs:
4242
./experiments/out_e2e/
4343
./experiments/out_ops/
4444
45+
- name: Configure AWS credentials
46+
uses: aws-actions/configure-aws-credentials@v4
47+
with:
48+
aws-access-key-id: ${{ secrets.HABITAT_AWS_ACCESS_KEY }}
49+
aws-region: ${{ secrets.HABITAT_AWS_REGION }}
50+
aws-secret-access-key: ${{ secrets.HABITAT_AWS_SECRET_ACCESS_KEY }}
51+
52+
- name: Generate html pages
53+
run: |
54+
python3 ./experiments/generate_html_summary.py --e2e ./experiments/out_e2e --ops ./experiments/out_ops
55+
mkdir benchmark_results
56+
mv *.html benchmark_results
57+
58+
- name: Upload to S3
59+
run: |
60+
aws s3 cp ./benchmark_results s3://centml-releases/habitat/benchmark --recursive
61+
62+
4563
experiments-t4:
4664
runs-on: [self-hosted, dev, t4]
4765
steps:

analyzer/habitat/analysis/predictor.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import functools
22
import logging
33
import operator
4-
import os
54

65
from habitat.analysis import SPECIAL_OPERATIONS
76
from habitat.analysis.operation import PredictedOperation
@@ -191,6 +190,9 @@ def _conv2d_scale(self, operation, dest_device, unscaled=False):
191190

192191
if unscaled:
193192
return pred_dest
193+
194+
if dest_device.name == operation.device.name: #local prediction
195+
return pred_orig
194196

195197
return operation.run_time_ms * pred_dest / pred_orig
196198

@@ -228,7 +230,10 @@ def _conv_transpose2d_scale(self, operation, dest_device, unscaled=False):
228230

229231
if unscaled:
230232
return pred_dest
231-
233+
234+
if dest_device.name == operation.device.name: #local prediction
235+
return pred_orig
236+
232237
return operation.run_time_ms * pred_dest / pred_orig
233238

234239
def _linear_scale(self, operation, dest_device, unscaled=False):
@@ -268,6 +273,9 @@ def _linear_scale(self, operation, dest_device, unscaled=False):
268273
if unscaled:
269274
return pred_dest
270275

276+
if dest_device.name == operation.device.name: #local prediction
277+
return pred_orig
278+
271279
return operation.run_time_ms * pred_dest / pred_orig
272280

273281
def _bmm_scale(self, operation, dest_device, unscaled=False):
@@ -291,6 +299,9 @@ def _bmm_scale(self, operation, dest_device, unscaled=False):
291299
if unscaled:
292300
return pred_dest
293301

302+
if dest_device.name == operation.device.name: #local prediction
303+
return pred_orig
304+
294305
return operation.run_time_ms * pred_dest / pred_orig
295306

296307
def _lstm_scale(self, operation, dest_device, unscaled=False):
@@ -339,4 +350,7 @@ def _lstm_scale(self, operation, dest_device, unscaled=False):
339350
if unscaled:
340351
return pred_dest
341352

353+
if dest_device.name == operation.device.name: #local prediction
354+
return pred_orig
355+
342356
return operation.run_time_ms * pred_dest / pred_orig

experiments/benchmarker_helper_script.sh

100644100755
File mode changed.
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""
2+
THIS IS FILE IS INTENDED TO BE USED BY benchmarking.yaml
3+
IT GENERATES A SUMMARY OF CROSS-DEVICE PREDICTIONS IN HTML FORMAT
4+
"""
5+
6+
import argparse
7+
import dominate
8+
from dominate.tags import *
9+
import glob
10+
import pandas as pd
11+
import math
12+
from datetime import datetime
13+
14+
SPECIAL_OPERATIONS = {
15+
# Convolution
16+
"conv2d",
17+
"conv_transpose2d",
18+
# Matrix multiply operations
19+
"linear",
20+
"__matmul__", # calls the same kernel as linear
21+
"bmm",
22+
# Recurrent operations
23+
"lstm",
24+
"gru",
25+
"rnn_tanh",
26+
"rnn_relu",
27+
}
28+
29+
30+
BENCHMARKER_TITLE = "deepview.predict-benchmark"
31+
32+
33+
def get_pct_error_color(pct_err):
34+
pct_err = abs(pct_err)
35+
if pct_err < 0.2:
36+
return "#088567"
37+
elif 0.2 <= pct_err < 0.4:
38+
return "#ffa500"
39+
else:
40+
return "#ff0000"
41+
42+
43+
def get_pct_err(predicted, measured):
44+
return round((predicted - measured) / measured, 3)
45+
46+
47+
def generate_summary(e2e_files):
48+
doc = dominate.document(title=BENCHMARKER_TITLE)
49+
50+
with doc.head:
51+
style(
52+
"""\
53+
body {
54+
padding-left: 10px;
55+
margin-bottom: 50px;
56+
background-color: #F9F8F1;
57+
color: #2C232A;
58+
font-family: sans-serif;
59+
}
60+
.model-div {
61+
display: flex;
62+
flex-direction: row;
63+
align-items: flex-start;
64+
gap: 30px;
65+
}
66+
"""
67+
)
68+
69+
with doc:
70+
h1(f"Last update: {str(datetime.now())}")
71+
72+
for f in sorted(list(glob.glob(f"{e2e_files}/*.csv"))):
73+
table_tile = f.split("/")[-1].split("-")[0]
74+
df = pd.read_csv(f)
75+
devices_names = list(df["origin_device"].unique())
76+
with doc:
77+
h2(table_tile)
78+
with div():
79+
attr(cls="model-div")
80+
# end-to-end predictions
81+
file_table = table()
82+
table_head = thead()
83+
header_row = tr()
84+
header_row += th("org_device")
85+
header_row += th("dst_device")
86+
header_row += th("run_time_ms_predicted")
87+
header_row += th("run_time_ms_measured")
88+
header_row += th("pct_error")
89+
table_head += header_row
90+
file_table.add(table_head)
91+
92+
table_body = tbody()
93+
for _, item in df.iterrows():
94+
row = tr()
95+
row += td(item["origin_device"])
96+
row += td(item["dest_device"])
97+
row += td(round(item["run_time_ms_predicted"], 3))
98+
row += td(round(item["run_time_ms_measured"], 3))
99+
row += td(round(item["pct_error"], 3))
100+
table_body += row
101+
102+
file_table.add(table_body)
103+
104+
# cross prediction table
105+
cross_pred_table = table()
106+
table_head = thead()
107+
header_row = tr()
108+
header_row += th("from \ to")
109+
for device in devices_names:
110+
header_row += th(f"{device}")
111+
table_head += header_row
112+
cross_pred_table.add(table_head)
113+
114+
# creater NxN table with N = number of devices
115+
placeholder = [
116+
["x"] * len(devices_names) for _ in range(len(devices_names))
117+
]
118+
hyperlink_names = [
119+
["x"] * len(devices_names) for _ in range(len(devices_names))
120+
]
121+
for _, item in df.iterrows():
122+
t_row = devices_names.index(item["origin_device"])
123+
t_col = devices_names.index(item["dest_device"])
124+
model_bs = table_tile.replace("+", "-")
125+
hyperlink_item_name = f"{model_bs}-{item['origin_device']}-{item['dest_device']}-breakdown-combined.html"
126+
placeholder[t_row][t_col] = round(item["pct_error"], 3)
127+
hyperlink_names[t_row][t_col] = hyperlink_item_name
128+
129+
table_body = tbody()
130+
for i, item in enumerate(placeholder):
131+
hyperlink_list = hyperlink_names[i]
132+
row = tr()
133+
row += td(devices_names[i])
134+
for j, cross_pred in enumerate(item):
135+
if cross_pred == "x":
136+
row += td(cross_pred)
137+
else:
138+
link_name = hyperlink_list[j]
139+
color = get_pct_error_color(cross_pred)
140+
row += td(
141+
a(cross_pred, href=link_name, style=f"color:{color};")
142+
)
143+
table_body += row
144+
cross_pred_table.add(table_body)
145+
146+
footer()
147+
148+
with open("benchmark_summary.html", "w") as file:
149+
file.write(doc.render())
150+
151+
152+
def generate_details(ops_files):
153+
for f in sorted(list(glob.glob(f"{ops_files}/*.csv"))):
154+
doc = dominate.document(title=BENCHMARKER_TITLE)
155+
with doc.head:
156+
style(
157+
"""\
158+
body {
159+
padding-left: 10px;
160+
margin-bottom: 150px;
161+
background-color: #F9F8F1;
162+
color: #2C232A;
163+
font-family: sans-serif;
164+
}
165+
"""
166+
)
167+
168+
file_name = f.replace("+", "-").split("/")[-1].replace(".csv", "")
169+
df = pd.read_csv(f)
170+
df_special_ops = df[df["operation"].isin(SPECIAL_OPERATIONS)]
171+
df_no_special_ops = df[~df["operation"].isin(SPECIAL_OPERATIONS)]
172+
mlp_err = get_pct_err(
173+
df_special_ops["run_time_ms_predicted"].sum(),
174+
df_special_ops["run_time_ms_measured"].sum(),
175+
)
176+
wave_scale_err = get_pct_err(
177+
df_no_special_ops["run_time_ms_predicted"].sum(),
178+
df_no_special_ops["run_time_ms_measured"].sum(),
179+
)
180+
181+
err_tbl = [("mlp err", mlp_err), ("wave scale err", wave_scale_err)]
182+
183+
col_names = df.columns.to_list()
184+
with doc:
185+
h1(file_name)
186+
with div():
187+
err_table = table()
188+
table_head = thead()
189+
header_row = tr()
190+
header_row += th("category")
191+
header_row += th("pct err")
192+
table_head += header_row
193+
err_table.add(table_head)
194+
195+
table_body = tbody()
196+
for name, err in err_tbl:
197+
row = tr()
198+
row += td(name)
199+
row += td(err)
200+
table_body += row
201+
err_table.add(table_body)
202+
203+
br()
204+
205+
with div():
206+
file_table = table()
207+
table_head = thead()
208+
header_row = tr()
209+
for c in col_names:
210+
header_row += th(c)
211+
table_head += header_row
212+
file_table.add(table_head)
213+
214+
table_body = tbody()
215+
for _, item in df.iterrows():
216+
row = tr()
217+
row += td(item["operation"])
218+
row += td(round(item["run_time_ms_predicted"], 3))
219+
row += td(round(item["unscaled_predicted_ms"], 3))
220+
row += td(round(item["run_time_ms_measured"], 3))
221+
row += td(round(item["wgt_pred_time"], 3))
222+
row += td(round(item["pct_error"], 3))
223+
row += td(
224+
item["args"]
225+
if isinstance(item["args"], str) or not math.isnan(item["args"])
226+
else "[]"
227+
)
228+
row += td(round(item["ktime_local_ms"], 3))
229+
row += td(round(item["runtime_local_ms"], 3))
230+
row += td(round(item["predicted_local_ms"], 3))
231+
table_body += row
232+
233+
file_table.add(table_body)
234+
235+
with open(f"{file_name}.html", "w") as f:
236+
f.write(doc.render())
237+
238+
239+
if __name__ == "__main__":
240+
parser = argparse.ArgumentParser()
241+
parser.add_argument("--e2e", required=True, help="Path to e2e folder")
242+
parser.add_argument("--ops", required=True, help="Path to ops folder")
243+
args = parser.parse_args()
244+
245+
generate_summary(args.e2e)
246+
generate_details(args.ops)

0 commit comments

Comments
 (0)