download_benchmarks: Script for downloading benchmark results from CircleCI

cameel · cameel · commit e51713874884 · 2022-04-07T13:04:43.000+02:00
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -666,9 +666,16 @@ jobs:
           name: Install pip
           command: apt -q update && apt install -y python3-pip
       - run:
-          name: Install pylint
-          command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
-          # also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
+          name: Install pylint and dependencies of the scripts that will be linted
+          command: python3 -m pip install
+            pylint
+            z3-solver
+            pygments-lexer-solidity
+            parsec
+            tabulate
+            deepdiff
+            colorama
+            requests
       - run:
           name: Linting Python Scripts
           command: ./scripts/pylint_all.py
diff --git a/scripts/common/git_helpers.py b/scripts/common/git_helpers.py
@@ -0,0 +1,19 @@
+import subprocess
+
+
+def run_git_command(command):
+    process = subprocess.run(
+        command,
+        encoding='utf8',
+        capture_output=True,
+        check=True,
+    )
+    return process.stdout.strip()
+
+
+def git_current_branch():
+    return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])
+
+
+def git_commit_hash(ref: str = 'HEAD'):
+    return run_git_command(['git', 'rev-parse', '--verify', ref])
diff --git a/scripts/common/rest_api_helpers.py b/scripts/common/rest_api_helpers.py
@@ -0,0 +1,171 @@
+from pathlib import Path
+from typing import List, Mapping, Optional
+import functools
+import json
+import operator
+import shutil
+
+import requests
+
+
+class APIHelperError(Exception):
+    pass
+
+class DataUnavailable(APIHelperError):
+    pass
+
+class InvalidResponse(APIHelperError):
+    pass
+
+class FileAlreadyExists(APIHelperError):
+    pass
+
+
+def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
+    if debug_requests:
+        print(f'REQUEST URL: {url}')
+        if len(params) > 0:
+            print(f'QUERY: {params}')
+
+    response = requests.get(url, params=params)
+    response.raise_for_status()
+
+    if debug_requests:
+        json_response = response.json()
+        print('========== RESPONSE ==========')
+        if json_response is not None:
+            print(json.dumps(json_response, indent=4))
+        else:
+            print(response.content)
+        print('==============================')
+
+    return response.json()
+
+
+def download_file(url: str, target_path: Path, overwrite=False):
+    if not overwrite and target_path.exists():
+        raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")
+
+    with requests.get(url, stream=True) as request:
+        with open(target_path, 'wb') as target_file:
+            shutil.copyfileobj(request.raw, target_file)
+
+
+class Github:
+    BASE_URL = 'https://api.github.com'
+
+    project_slug: str
+    debug_requests: bool
+
+    def __init__(self, project_slug: str, debug_requests: bool):
+        self.project_slug = project_slug
+        self.debug_requests = debug_requests
+
+    def pull_request(self, pr_id: int) -> dict:
+        return query_api(
+            f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
+            {},
+            self.debug_requests
+        )
+
+
+class CircleCI:
+    # None might be a more logical default for max_pages but in most cases we'll actually
+    # want some limit to prevent flooding the API with requests in case of a bug.
+    DEFAULT_MAX_PAGES = 10
+    BASE_URL = 'https://circleci.com/api/v2'
+
+    project_slug: str
+    debug_requests: bool
+
+    def __init__(self, project_slug: str, debug_requests: bool):
+        self.project_slug = project_slug
+        self.debug_requests = debug_requests
+
+    def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
+        assert 'page-token' not in params
+
+        page_count = 0
+        next_page_token = None
+        while max_pages is None or page_count < max_pages:
+            if next_page_token is not None:
+                params = {**params, 'page-token': next_page_token}
+
+            json_response = query_api(url, params, self.debug_requests)
+
+            yield json_response['items']
+            next_page_token = json_response['next_page_token']
+            page_count += 1
+            if next_page_token is None:
+                break
+
+    def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
+        return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])
+
+    def pipelines(
+        self,
+        branch: Optional[str] = None,
+        commit_hash: Optional[str] = None,
+        excluded_trigger_types: List[str] = None,
+    ) -> List[dict]:
+        if excluded_trigger_types is None:
+            excluded_trigger_types = []
+
+        for items in self.paginated_query_api_iterator(
+            f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
+            {'branch': branch} if branch is not None else {},
+            max_pages=10,
+        ):
+            matching_items = [
+                item
+                for item in items
+                if (
+                    (commit_hash is None or item['vcs']['revision'] == commit_hash) and
+                    item['trigger']['type'] not in excluded_trigger_types
+                )
+            ]
+            if len(matching_items) > 0:
+                return matching_items
+
+        return []
+
+    def workflows(self, pipeline_id: str) -> dict:
+        return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})
+
+    def jobs(self, workflow_id: str) -> Mapping[str, dict]:
+        items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
+        jobs_by_name = {job['name']: job for job in items}
+
+        assert len(jobs_by_name) <= len(items)
+        if len(jobs_by_name) < len(items):
+            raise InvalidResponse("Job names in the workflow are not unique.")
+
+        return jobs_by_name
+
+    def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
+        jobs = self.jobs(workflow_id)
+        if name not in jobs:
+            raise DataUnavailable(f"Job {name} is not present in the workflow.")
+
+        if require_success and jobs[name]['status'] != 'success':
+            raise DataUnavailable(
+                f"Job {name} has failed or is still running. "
+                f"Current status: {jobs[name]['status']}."
+            )
+
+        return jobs[name]
+
+    def artifacts(self, job_number: int) -> Mapping[str, dict]:
+        items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
+        artifacts_by_name = {artifact['path']: artifact for artifact in items}
+
+        assert len(artifacts_by_name) <= len(items)
+        if len(artifacts_by_name) < len(items):
+            raise InvalidResponse("Names of artifacts attached to the job are not unique.")
+
+        return artifacts_by_name
+
+    @staticmethod
+    def latest_item(items: dict) -> dict:
+        sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
+        return sorted_items[0] if len(sorted_items) > 0 else None
diff --git a/scripts/externalTests/download_benchmarks.py b/scripts/externalTests/download_benchmarks.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Mapping, Optional
+import sys
+
+import requests
+
+# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it
+# pragma pylint: disable=import-error,wrong-import-position
+SCRIPTS_DIR = Path(__file__).parent.parent
+sys.path.insert(0, str(SCRIPTS_DIR))
+
+from common.git_helpers import git_current_branch, git_commit_hash
+from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file
+# pragma pylint: enable=import-error,wrong-import-position
+
+
+def process_commandline() -> Namespace:
+    script_description = (
+        "Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. "
+        "If no options are specified, downloads results for the currently checked out git branch."
+    )
+
+    parser = ArgumentParser(description=script_description)
+
+    target_definition = parser.add_mutually_exclusive_group()
+    target_definition.add_argument(
+        '--branch',
+        dest='branch',
+        help="Git branch that the job ran on.",
+    )
+    target_definition.add_argument(
+        '--pr',
+        dest='pull_request_id',
+        type=int,
+        help="Github PR ID that the job ran on.",
+    )
+    target_definition.add_argument(
+        '--base-of-pr',
+        dest='base_of_pr',
+        type=int,
+        help="ID of a Github PR that's based on top of the branch we're interested in."
+    )
+
+    parser.add_argument(
+        '--any-commit',
+        dest='ignore_commit_hash',
+        default=False,
+        action='store_true',
+        help="Include pipelines that ran on a different commit as long as branch/PR matches."
+    )
+    parser.add_argument(
+        '--overwrite',
+        dest='overwrite',
+        default=False,
+        action='store_true',
+        help="If artifacts already exist on disk, overwrite them.",
+    )
+    parser.add_argument(
+        '--debug-requests',
+        dest='debug_requests',
+        default=False,
+        action='store_true',
+        help="Print detailed info about performed API requests and received responses.",
+    )
+
+    return parser.parse_args()
+
+
+def download_benchmark_artifact(
+    artifacts: Mapping[str, dict],
+    benchmark_name: str,
+    branch: str,
+    commit_hash: str,
+    overwrite: bool,
+    silent: bool = False
+):
+    if not silent:
+        print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.")
+
+    artifact_path = f'reports/externalTests/{benchmark_name}.json'
+
+    if artifact_path not in artifacts:
+        raise RuntimeError(f"Missing artifact: {artifact_path}.")
+
+    download_file(
+        artifacts[artifact_path]['url'],
+        Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'),
+        overwrite,
+    )
+
+
+def download_benchmarks(
+    branch: Optional[str],
+    pull_request_id: Optional[int],
+    base_of_pr: Optional[int],
+    ignore_commit_hash: bool = False,
+    overwrite: bool = False,
+    debug_requests: bool = False,
+    silent: bool = False,
+):
+    github = Github('ethereum/solidity', debug_requests)
+    circleci = CircleCI('ethereum/solidity', debug_requests)
+
+    expected_commit_hash = None
+    if branch is None and pull_request_id is None and base_of_pr is None:
+        branch = git_current_branch()
+        expected_commit_hash = git_commit_hash()
+    elif branch is not None:
+        expected_commit_hash = git_commit_hash(branch)
+    elif pull_request_id is not None:
+        pr_info = github.pull_request(pull_request_id)
+        branch = pr_info['head']['ref']
+        expected_commit_hash = pr_info['head']['sha']
+    elif base_of_pr is not None:
+        pr_info = github.pull_request(base_of_pr)
+        branch = pr_info['base']['ref']
+        expected_commit_hash = pr_info['base']['sha']
+
+    if not silent:
+        print(
+            f"Looking for pipelines that ran on branch {branch}" +
+            (f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).")
+        )
+
+    pipeline = circleci.latest_item(circleci.pipelines(
+        branch,
+        expected_commit_hash if not ignore_commit_hash else None,
+        # Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did,
+        # they would likely be running a different set of external tests.
+        excluded_trigger_types=['schedule'],
+    ))
+    if pipeline is None:
+        raise RuntimeError("No matching pipelines found.")
+
+    actual_commit_hash = pipeline['vcs']['revision']
+    workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id']
+    benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True)
+
+    artifacts = circleci.artifacts(int(benchmark_collector_job['job_number']))
+
+    download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent)
+    download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent)
+
+
+def main():
+    try:
+        options = process_commandline()
+        download_benchmarks(
+            options.branch,
+            options.pull_request_id,
+            options.base_of_pr,
+            options.ignore_commit_hash,
+            options.overwrite,
+            options.debug_requests,
+        )
+
+        return 0
+    except APIHelperError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+    except requests.exceptions.HTTPError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+    except RuntimeError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/test/scripts/test_externalTests_benchmark_downloader.py b/test/scripts/test_externalTests_benchmark_downloader.py