Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Debugger
from augur.tasks.github.util.github_paginator import GithubApiResult
from augur.application.db.lib import get_repo_by_repo_id, bulk_insert_dicts, execute_sql, get_contributors_by_github_user_id

from typing_extensions import deprecated

##TODO: maybe have a TaskSession class that holds information about the database, logger, config, etc.

Expand Down Expand Up @@ -107,7 +107,7 @@ def request_dict_from_endpoint(logger, session, url, timeout_wait=10):

return response_data


@deprecated("Please use GithubDataAcess.endpoint_url() instead")
def create_endpoint_from_email(email):
# Note: I added "+type:user" to avoid having user owned organizations be returned
# Also stopped splitting per note above.
Expand All @@ -117,7 +117,7 @@ def create_endpoint_from_email(email):

return url


@deprecated("Please use GithubDataAcess.endpoint_url() instead")
def create_endpoint_from_commit_sha(logger, commit_sha, repo_id):
logger.debug(
f"Trying to create endpoint from commit hash: {commit_sha}")
Expand Down
9 changes: 7 additions & 2 deletions augur/tasks/github/pull_requests/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,15 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[

logger.debug(f"Collecting pull requests for {owner}/{repo}")

url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc&sort=updated"

github_data_access = GithubDataAccess(key_auth, logger)

search_args = {"state": "all", "direction": "desc", "sort": "updated"}
url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls", search_args)

if not github_data_access.check_prs_enabled(owner, repo):
logger.info(f"{owner}/{repo}: Pull requests appear to be disabled for this repo. Skipping.")
return

num_pages = github_data_access.get_resource_page_count(url)

logger.debug(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests")
Expand Down
35 changes: 35 additions & 0 deletions augur/tasks/github/util/github_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,27 @@ def __init__(self, key_manager, logger: logging.Logger, feature="rest"):
self.key = None
self.expired_keys_for_request = []

def endpoint_url(self, path: str, params: dict = None) -> str:
"""Build a URL for a github endpoint using the specified path and query parameters

Args:
path (str): the path to use (i.e. "/users/MoralCode")
params (dict): optional query parameters to add to the url, as a dict

Returns:
str: the full URL to the specified resource.
"""
# using pythons url processing library helps handle accidental
# inclusion of query parameters in the path string, ensuring all query
# parameters are properly encoded and escaped

if not path.startswith("/"):
path = "/" + path

url = "https://api.github.com" + path

return self.__add_query_params(url, params or {})

def get_resource_count(self, url):

# set per_page to 100 explicitly so we know each page is 100 long
Expand All @@ -60,6 +81,20 @@ def get_resource_count(self, url):

return (100 * (num_pages -1)) + len(data)

def check_prs_enabled(self, owner: str, repo: str,) -> bool:
"""
Checks whether pull requests are enabled for a repository.
Returns False if PRs are disabled (404 on /pulls) and true if there are PRs.
"""
try:
url = self.endpoint_url(f"repos/{owner}/{repo}/pulls", {"per_page": "1"})
self.get_resource_page_count(url)
return True
except UrlNotFoundException:
self.logger.info(f"{owner}/{repo}: Pull requests are disabled. Skipping PR collection.")
return False


def paginate_resource(self, url):

response = self.make_request_with_retries(url)
Expand Down
Loading