Skip to content

Commit e517138

Browse files
committed
download_benchmarks: Script for downloading benchmark results from CircleCI
1 parent 803585d commit e517138

File tree

5 files changed

+630
-3
lines changed

5 files changed

+630
-3
lines changed

.circleci/config.yml

+10-3
Original file line numberDiff line numberDiff line change
@@ -666,9 +666,16 @@ jobs:
666666
name: Install pip
667667
command: apt -q update && apt install -y python3-pip
668668
- run:
669-
name: Install pylint
670-
command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
671-
# also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
669+
name: Install pylint and dependencies of the scripts that will be linted
670+
command: python3 -m pip install
671+
pylint
672+
z3-solver
673+
pygments-lexer-solidity
674+
parsec
675+
tabulate
676+
deepdiff
677+
colorama
678+
requests
672679
- run:
673680
name: Linting Python Scripts
674681
command: ./scripts/pylint_all.py

scripts/common/git_helpers.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import subprocess
2+
3+
4+
def run_git_command(command):
5+
process = subprocess.run(
6+
command,
7+
encoding='utf8',
8+
capture_output=True,
9+
check=True,
10+
)
11+
return process.stdout.strip()
12+
13+
14+
def git_current_branch():
15+
return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])
16+
17+
18+
def git_commit_hash(ref: str = 'HEAD'):
19+
return run_git_command(['git', 'rev-parse', '--verify', ref])

scripts/common/rest_api_helpers.py

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
from pathlib import Path
2+
from typing import List, Mapping, Optional
3+
import functools
4+
import json
5+
import operator
6+
import shutil
7+
8+
import requests
9+
10+
11+
class APIHelperError(Exception):
12+
pass
13+
14+
class DataUnavailable(APIHelperError):
15+
pass
16+
17+
class InvalidResponse(APIHelperError):
18+
pass
19+
20+
class FileAlreadyExists(APIHelperError):
21+
pass
22+
23+
24+
def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
25+
if debug_requests:
26+
print(f'REQUEST URL: {url}')
27+
if len(params) > 0:
28+
print(f'QUERY: {params}')
29+
30+
response = requests.get(url, params=params)
31+
response.raise_for_status()
32+
33+
if debug_requests:
34+
json_response = response.json()
35+
print('========== RESPONSE ==========')
36+
if json_response is not None:
37+
print(json.dumps(json_response, indent=4))
38+
else:
39+
print(response.content)
40+
print('==============================')
41+
42+
return response.json()
43+
44+
45+
def download_file(url: str, target_path: Path, overwrite=False):
46+
if not overwrite and target_path.exists():
47+
raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")
48+
49+
with requests.get(url, stream=True) as request:
50+
with open(target_path, 'wb') as target_file:
51+
shutil.copyfileobj(request.raw, target_file)
52+
53+
54+
class Github:
55+
BASE_URL = 'https://api.github.com'
56+
57+
project_slug: str
58+
debug_requests: bool
59+
60+
def __init__(self, project_slug: str, debug_requests: bool):
61+
self.project_slug = project_slug
62+
self.debug_requests = debug_requests
63+
64+
def pull_request(self, pr_id: int) -> dict:
65+
return query_api(
66+
f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
67+
{},
68+
self.debug_requests
69+
)
70+
71+
72+
class CircleCI:
73+
# None might be a more logical default for max_pages but in most cases we'll actually
74+
# want some limit to prevent flooding the API with requests in case of a bug.
75+
DEFAULT_MAX_PAGES = 10
76+
BASE_URL = 'https://circleci.com/api/v2'
77+
78+
project_slug: str
79+
debug_requests: bool
80+
81+
def __init__(self, project_slug: str, debug_requests: bool):
82+
self.project_slug = project_slug
83+
self.debug_requests = debug_requests
84+
85+
def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
86+
assert 'page-token' not in params
87+
88+
page_count = 0
89+
next_page_token = None
90+
while max_pages is None or page_count < max_pages:
91+
if next_page_token is not None:
92+
params = {**params, 'page-token': next_page_token}
93+
94+
json_response = query_api(url, params, self.debug_requests)
95+
96+
yield json_response['items']
97+
next_page_token = json_response['next_page_token']
98+
page_count += 1
99+
if next_page_token is None:
100+
break
101+
102+
def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
103+
return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])
104+
105+
def pipelines(
106+
self,
107+
branch: Optional[str] = None,
108+
commit_hash: Optional[str] = None,
109+
excluded_trigger_types: List[str] = None,
110+
) -> List[dict]:
111+
if excluded_trigger_types is None:
112+
excluded_trigger_types = []
113+
114+
for items in self.paginated_query_api_iterator(
115+
f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
116+
{'branch': branch} if branch is not None else {},
117+
max_pages=10,
118+
):
119+
matching_items = [
120+
item
121+
for item in items
122+
if (
123+
(commit_hash is None or item['vcs']['revision'] == commit_hash) and
124+
item['trigger']['type'] not in excluded_trigger_types
125+
)
126+
]
127+
if len(matching_items) > 0:
128+
return matching_items
129+
130+
return []
131+
132+
def workflows(self, pipeline_id: str) -> dict:
133+
return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})
134+
135+
def jobs(self, workflow_id: str) -> Mapping[str, dict]:
136+
items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
137+
jobs_by_name = {job['name']: job for job in items}
138+
139+
assert len(jobs_by_name) <= len(items)
140+
if len(jobs_by_name) < len(items):
141+
raise InvalidResponse("Job names in the workflow are not unique.")
142+
143+
return jobs_by_name
144+
145+
def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
146+
jobs = self.jobs(workflow_id)
147+
if name not in jobs:
148+
raise DataUnavailable(f"Job {name} is not present in the workflow.")
149+
150+
if require_success and jobs[name]['status'] != 'success':
151+
raise DataUnavailable(
152+
f"Job {name} has failed or is still running. "
153+
f"Current status: {jobs[name]['status']}."
154+
)
155+
156+
return jobs[name]
157+
158+
def artifacts(self, job_number: int) -> Mapping[str, dict]:
159+
items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
160+
artifacts_by_name = {artifact['path']: artifact for artifact in items}
161+
162+
assert len(artifacts_by_name) <= len(items)
163+
if len(artifacts_by_name) < len(items):
164+
raise InvalidResponse("Names of artifacts attached to the job are not unique.")
165+
166+
return artifacts_by_name
167+
168+
@staticmethod
169+
def latest_item(items: dict) -> dict:
170+
sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
171+
return sorted_items[0] if len(sorted_items) > 0 else None
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env python3
2+
3+
from argparse import ArgumentParser, Namespace
4+
from pathlib import Path
5+
from typing import Mapping, Optional
6+
import sys
7+
8+
import requests
9+
10+
# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it
11+
# pragma pylint: disable=import-error,wrong-import-position
12+
SCRIPTS_DIR = Path(__file__).parent.parent
13+
sys.path.insert(0, str(SCRIPTS_DIR))
14+
15+
from common.git_helpers import git_current_branch, git_commit_hash
16+
from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file
17+
# pragma pylint: enable=import-error,wrong-import-position
18+
19+
20+
def process_commandline() -> Namespace:
21+
script_description = (
22+
"Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. "
23+
"If no options are specified, downloads results for the currently checked out git branch."
24+
)
25+
26+
parser = ArgumentParser(description=script_description)
27+
28+
target_definition = parser.add_mutually_exclusive_group()
29+
target_definition.add_argument(
30+
'--branch',
31+
dest='branch',
32+
help="Git branch that the job ran on.",
33+
)
34+
target_definition.add_argument(
35+
'--pr',
36+
dest='pull_request_id',
37+
type=int,
38+
help="Github PR ID that the job ran on.",
39+
)
40+
target_definition.add_argument(
41+
'--base-of-pr',
42+
dest='base_of_pr',
43+
type=int,
44+
help="ID of a Github PR that's based on top of the branch we're interested in."
45+
)
46+
47+
parser.add_argument(
48+
'--any-commit',
49+
dest='ignore_commit_hash',
50+
default=False,
51+
action='store_true',
52+
help="Include pipelines that ran on a different commit as long as branch/PR matches."
53+
)
54+
parser.add_argument(
55+
'--overwrite',
56+
dest='overwrite',
57+
default=False,
58+
action='store_true',
59+
help="If artifacts already exist on disk, overwrite them.",
60+
)
61+
parser.add_argument(
62+
'--debug-requests',
63+
dest='debug_requests',
64+
default=False,
65+
action='store_true',
66+
help="Print detailed info about performed API requests and received responses.",
67+
)
68+
69+
return parser.parse_args()
70+
71+
72+
def download_benchmark_artifact(
73+
artifacts: Mapping[str, dict],
74+
benchmark_name: str,
75+
branch: str,
76+
commit_hash: str,
77+
overwrite: bool,
78+
silent: bool = False
79+
):
80+
if not silent:
81+
print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.")
82+
83+
artifact_path = f'reports/externalTests/{benchmark_name}.json'
84+
85+
if artifact_path not in artifacts:
86+
raise RuntimeError(f"Missing artifact: {artifact_path}.")
87+
88+
download_file(
89+
artifacts[artifact_path]['url'],
90+
Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'),
91+
overwrite,
92+
)
93+
94+
95+
def download_benchmarks(
96+
branch: Optional[str],
97+
pull_request_id: Optional[int],
98+
base_of_pr: Optional[int],
99+
ignore_commit_hash: bool = False,
100+
overwrite: bool = False,
101+
debug_requests: bool = False,
102+
silent: bool = False,
103+
):
104+
github = Github('ethereum/solidity', debug_requests)
105+
circleci = CircleCI('ethereum/solidity', debug_requests)
106+
107+
expected_commit_hash = None
108+
if branch is None and pull_request_id is None and base_of_pr is None:
109+
branch = git_current_branch()
110+
expected_commit_hash = git_commit_hash()
111+
elif branch is not None:
112+
expected_commit_hash = git_commit_hash(branch)
113+
elif pull_request_id is not None:
114+
pr_info = github.pull_request(pull_request_id)
115+
branch = pr_info['head']['ref']
116+
expected_commit_hash = pr_info['head']['sha']
117+
elif base_of_pr is not None:
118+
pr_info = github.pull_request(base_of_pr)
119+
branch = pr_info['base']['ref']
120+
expected_commit_hash = pr_info['base']['sha']
121+
122+
if not silent:
123+
print(
124+
f"Looking for pipelines that ran on branch {branch}" +
125+
(f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).")
126+
)
127+
128+
pipeline = circleci.latest_item(circleci.pipelines(
129+
branch,
130+
expected_commit_hash if not ignore_commit_hash else None,
131+
# Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did,
132+
# they would likely be running a different set of external tests.
133+
excluded_trigger_types=['schedule'],
134+
))
135+
if pipeline is None:
136+
raise RuntimeError("No matching pipelines found.")
137+
138+
actual_commit_hash = pipeline['vcs']['revision']
139+
workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id']
140+
benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True)
141+
142+
artifacts = circleci.artifacts(int(benchmark_collector_job['job_number']))
143+
144+
download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent)
145+
download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent)
146+
147+
148+
def main():
149+
try:
150+
options = process_commandline()
151+
download_benchmarks(
152+
options.branch,
153+
options.pull_request_id,
154+
options.base_of_pr,
155+
options.ignore_commit_hash,
156+
options.overwrite,
157+
options.debug_requests,
158+
)
159+
160+
return 0
161+
except APIHelperError as exception:
162+
print(f"[ERROR] {exception}", file=sys.stderr)
163+
return 1
164+
except requests.exceptions.HTTPError as exception:
165+
print(f"[ERROR] {exception}", file=sys.stderr)
166+
return 1
167+
except RuntimeError as exception:
168+
print(f"[ERROR] {exception}", file=sys.stderr)
169+
return 1
170+
171+
if __name__ == '__main__':
172+
sys.exit(main())

0 commit comments

Comments
 (0)