diff --git a/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py b/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py index ff65da319b..3bd018aec3 100644 --- a/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py +++ b/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py @@ -8,7 +8,7 @@ # Debugger from augur.tasks.github.util.github_paginator import GithubApiResult from augur.application.db.lib import get_repo_by_repo_id, bulk_insert_dicts, execute_sql, get_contributors_by_github_user_id - +from typing_extensions import deprecated ##TODO: maybe have a TaskSession class that holds information about the database, logger, config, etc. @@ -107,7 +107,7 @@ def request_dict_from_endpoint(logger, session, url, timeout_wait=10): return response_data - +@deprecated("Please use GithubDataAcess.endpoint_url() instead") def create_endpoint_from_email(email): # Note: I added "+type:user" to avoid having user owned organizations be returned # Also stopped splitting per note above. @@ -117,7 +117,7 @@ def create_endpoint_from_email(email): return url - +@deprecated("Please use GithubDataAcess.endpoint_url() instead") def create_endpoint_from_commit_sha(logger, commit_sha, repo_id): logger.debug( f"Trying to create endpoint from commit hash: {commit_sha}") diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 60cc9e1e66..96343b2886 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -75,10 +75,15 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[ logger.debug(f"Collecting pull requests for {owner}/{repo}") - url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc&sort=updated" - github_data_access = GithubDataAccess(key_auth, logger) + search_args = {"state": "all", "direction": "desc", "sort": "updated"} + url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls", search_args) + + if not github_data_access.check_prs_enabled(owner, repo): + logger.info(f"{owner}/{repo}: Pull requests appear to be disabled for this repo. Skipping.") + return + num_pages = github_data_access.get_resource_page_count(url) logger.debug(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests") diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 6df216dfda..99496a9627 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -44,6 +44,27 @@ def __init__(self, key_manager, logger: logging.Logger, feature="rest"): self.key = None self.expired_keys_for_request = [] + def endpoint_url(self, path: str, params: dict = None) -> str: + """Build a URL for a github endpoint using the specified path and query parameters + + Args: + path (str): the path to use (i.e. "/users/MoralCode") + params (dict): optional query parameters to add to the url, as a dict + + Returns: + str: the full URL to the specified resource. + """ + # using pythons url processing library helps handle accidental + # inclusion of query parameters in the path string, ensuring all query + # parameters are properly encoded and escaped + + if not path.startswith("/"): + path = "/" + path + + url = "https://api.github.com" + path + + return self.__add_query_params(url, params or {}) + def get_resource_count(self, url): # set per_page to 100 explicitly so we know each page is 100 long @@ -60,6 +81,20 @@ def get_resource_count(self, url): return (100 * (num_pages -1)) + len(data) + def check_prs_enabled(self, owner: str, repo: str,) -> bool: + """ + Checks whether pull requests are enabled for a repository. + Returns False if PRs are disabled (404 on /pulls) and true if there are PRs. + """ + try: + url = self.endpoint_url(f"repos/{owner}/{repo}/pulls", {"per_page": "1"}) + self.get_resource_page_count(url) + return True + except UrlNotFoundException: + self.logger.info(f"{owner}/{repo}: Pull requests are disabled. Skipping PR collection.") + return False + + def paginate_resource(self, url): response = self.make_request_with_retries(url)