Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions augur/tasks/github/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
from augur.application.db.data_parse import *
from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException, ResourceGoneException
from augur.tasks.github.util.github_task_session import GithubTaskManifest
from augur.tasks.util.worker_util import remove_duplicate_dicts
from augur.tasks.github.util.util import get_owner_repo
Expand Down Expand Up @@ -124,8 +124,8 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger
try:
messages = list(github_data_access.paginate_resource(comment_url))
all_data += messages
except UrlNotFoundException:
logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 404. Skipping.")
except (UrlNotFoundException, ResourceGoneException) as e:
logger.info(f"{task_name}: Skipping {comment_url}: {e}")
Comment on lines -127 to +128
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a good start but i think we can do better by checking for missing pr/issue messages sooner

I think its worth either:

  • checking a few urls and if all of them are 404, then just skip all the rest of the message collection
  • checking this sooner and returning from the function earlier/sooner

skipped_urls += 1

if len(all_data) >= message_batch_size:
Expand Down
Loading