augurlabs · MoralCode · Mar 19, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
@@ -8,7 +8,7 @@
 # Debugger
 from augur.tasks.github.util.github_paginator import GithubApiResult
 from augur.application.db.lib import get_repo_by_repo_id, bulk_insert_dicts, execute_sql, get_contributors_by_github_user_id
-
+from typing_extensions import deprecated
 
 ##TODO: maybe have a TaskSession class that holds information about the database, logger, config, etc.
 
@@ -107,7 +107,7 @@ def request_dict_from_endpoint(logger, session, url, timeout_wait=10):
 
     return response_data
 
-
+@deprecated("Please use GithubDataAcess.endpoint_url() instead")
 def create_endpoint_from_email(email):
     # Note: I added "+type:user" to avoid having user owned organizations be returned
     # Also stopped splitting per note above.
@@ -117,7 +117,7 @@ def create_endpoint_from_email(email):
 
     return url
 
-
+@deprecated("Please use GithubDataAcess.endpoint_url() instead")
 def create_endpoint_from_commit_sha(logger, commit_sha, repo_id):
     logger.debug(
         f"Trying to create endpoint from commit hash: {commit_sha}")

@@ -75,10 +75,15 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[
 
     logger.debug(f"Collecting pull requests for {owner}/{repo}")
 
-    url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc&sort=updated"
-
     github_data_access = GithubDataAccess(key_auth, logger)
 
+    search_args = {"state": "all", "direction": "desc", "sort": "updated"}
+    url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls", search_args)
+
+    if not github_data_access.check_prs_enabled(owner, repo):
+        logger.info(f"{owner}/{repo}: Pull requests appear to be disabled for this repo. Skipping.")
+        return
+
     num_pages = github_data_access.get_resource_page_count(url)
 
     logger.debug(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests")

@@ -44,6 +44,27 @@ def __init__(self, key_manager, logger: logging.Logger, feature="rest"):
         self.key = None
         self.expired_keys_for_request = []
 
+    def endpoint_url(self, path: str, params: dict = None) -> str:
+        """Build a URL for a github endpoint using the specified path and query parameters
+
+        Args:
+            path (str): the path to use (i.e. "/users/MoralCode")
+            params (dict): optional query parameters to add to the url, as a dict
+
+        Returns:
+            str: the full URL to the specified resource.
+        """
+        # using pythons url processing library helps handle accidental
+        # inclusion of query parameters in the path string, ensuring all query
+        # parameters are properly encoded and escaped
+
+        if not path.startswith("/"):
+            path = "/" + path
+
+        url = "https://api.github.com" + path
+
+        return self.__add_query_params(url, params or {})
+
     def get_resource_count(self, url):
 
         # set per_page to 100 explicitly so we know each page is 100 long
@@ -60,6 +81,20 @@ def get_resource_count(self, url):
 
         return (100 * (num_pages -1)) + len(data)
 
+    def check_prs_enabled(self, owner: str, repo: str,) -> bool:
+        """
+        Checks whether pull requests are enabled for a repository.
+        Returns False if PRs are disabled (404 on /pulls) and true if there are PRs.
+        """
+        try:
+            url = self.endpoint_url(f"repos/{owner}/{repo}/pulls", {"per_page": "1"})
+            self.get_resource_page_count(url)
+            return True
+        except UrlNotFoundException:
+            self.logger.info(f"{owner}/{repo}: Pull requests are disabled. Skipping PR collection.")
+            return False
+
+
     def paginate_resource(self, url):
 
         response = self.make_request_with_retries(url)