This repository was archived by the owner on Feb 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 502
/
Copy pathparse_data.py
196 lines (162 loc) · 6.36 KB
/
parse_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import glob
import os
import re
from decimal import Decimal
import distro
from ...util.constants import LOG
from ..constants import UNKNOWN_RESULT
from .oltpbench.config_parser import parse_config_file
from .oltpbench.res_parser import parse_res_file
from .oltpbench.summary_parser import parse_summary_file
def parse_oltpbench_data(results_dir):
"""
Collect the information needed to send to the performance storage service
from the files produced by OLTPBench.
Args
-----
results_dir : str
The directory where the OLTPBench results were stored
Returns
--------
metadata : dict
The metadata of the OLTPBench test
timestamp : int
When the test was run in milliseconds
type : str
The benchmark type (i.e. tpcc)
parameters : dict
The parameters that were used to run the test
metrics : dict
The metrics gathered from the result of the test
"""
env_metadata = _parse_jenkins_env_vars()
files_metadata, timestamp, benchmark_type, parameters, metrics = parse_oltpbench_files(
results_dir)
metadata = {**env_metadata, **files_metadata}
return metadata, timestamp, benchmark_type, parameters, metrics
def parse_microbenchmark_data(artifact_processor_comparison):
"""
Collect the information needed to send to the performance storage service
from the files produced by the microbenchmark
Args:
artifact_processor_comparison (dict): The comparison object generated
by the artifact processor
Returns:
metadata (dict): The metadata of the microbenchmark test
test_suite (str): The name of the test suite
test_name (str): The name of the specific benchmark test
metrics (dict): The metrics gathered from the result of the test
"""
metadata = parse_standard_metadata()
test_suite, test_name, metrics = parse_microbenchmark_comparison(
artifact_processor_comparison)
return metadata, test_suite, test_name, metrics
def parse_standard_metadata():
"""
Gather the standard metadata information from Jenkins and the DBMS.
Returns
-------
The metadata obtained from Jenkins and the DBMS.
Warnings
--------
Underlying implementation is hacky right now.
"""
return {**_parse_jenkins_env_vars(), **_parse_db_metadata()}
def _parse_jenkins_env_vars():
"""
Parse environment variables from Jenkins and the OS.
Returns
-------
metadata : dict
Metadata about the Jenkins environment.
WARNING: Note that cpu_socket is a completely garbage value.
TODO(WAN): I'd remove cpu_socket except I'm afraid of breakages.
"""
# TODO find a way to get the socket number of
os_cpu_socket = 'true'
metadata = {
'jenkins': {
'jenkins_job_id': os.environ['BUILD_ID'],
},
'github': {
'git_branch': os.environ['GIT_BRANCH'],
'git_commit_id': os.environ['GIT_COMMIT'],
},
'environment': {
'os_version': ' '.join(distro.linux_distribution()),
'cpu_number': os.cpu_count(),
'cpu_socket': os_cpu_socket
}
}
return metadata
def parse_oltpbench_files(results_dir):
"""
Parse information from the config and summary files generated by OLTPBench.
Parameters
----------
results_dir : str
The directory where OLTPBench results are stored.
Returns
-------
metadata : dict
An object containing metadata information.
timestamp : int
The timestamp when the benchmark was created, in milliseconds.
TODO(WAN): wtf is this?
benchmark_type : str
The benchmark that was run (e.g., tatp, noop).
parameters : dict
Information about the parameters with which the test was run.
metrics : dict
The summary measurements that were gathered from the test.
"""
def hack_rename(old_glob_target, new_name):
"""
Wan wants to avoid a rabbit hole of refactoring.
Therefore the new OLTPBench files are being renamed to match old expectations here.
"""
matches = glob.glob(old_glob_target)
assert len(matches) == 1
os.rename(matches[0], new_name)
hack_rename(f'{results_dir}/*.results.csv', f'{results_dir}/oltpbench.res')
hack_rename(f'{results_dir}/*.raw.csv', f'{results_dir}/oltpbench.csv')
hack_rename(f'{results_dir}/*.samples.csv', f'{results_dir}/oltpbench.samples')
hack_rename(f'{results_dir}/*.summary.json', f'{results_dir}/oltpbench.summary')
hack_rename(f'{results_dir}/*.params.json', f'{results_dir}/oltpbench.params')
hack_rename(f'{results_dir}/*.metrics.json', f'{results_dir}/oltpbench.metrics')
hack_rename(f'{results_dir}/*.config.xml', f'{results_dir}/oltpbench.expconfig')
config_parameters = parse_config_file(results_dir + '/oltpbench.expconfig')
metadata, timestamp, benchmark_type, summary_parameters, metrics = parse_summary_file(
results_dir + '/oltpbench.summary')
metrics['incremental_metrics'] = parse_res_file(results_dir +
'/oltpbench.res')
parameters = {**summary_parameters, **config_parameters}
return metadata, timestamp, benchmark_type, parameters, metrics
def parse_microbenchmark_comparison(artifact_processor_comparison):
""" Extract the relevant information from the artifact_processor_comparison
and parse out the test name and suite"""
metrics_fields = [
'throughput', 'stdev_throughput', 'tolerance', 'status', 'iterations',
'ref_throughput', 'num_results'
]
test_suite, test_name = artifact_processor_comparison.get(
'suite'), artifact_processor_comparison.get('test')
metrics = {}
for key, value in artifact_processor_comparison.items():
if key in metrics_fields:
metrics[key] = round(value, 15) if isinstance(
value, (float, Decimal)) else value
return test_suite, test_name, metrics
def _parse_db_metadata():
"""
Parse metadata from the DBMS.
Returns
-------
metadata : dict
A dictionary containing metadata about the database.
Warnings
--------
Giant hack that hardcodes version number.
If the hack is unsuccessful, it defaults to UNKNOWN_RESULT.
"""
return {'noisepage': {'db_version': '1.0.0'}}