Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions augur/tasks/git/util/facade_worker/facade_worker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def insert_or_update_data(self, query, **bind_args)-> None:
def inc_repos_processed(self):
self.repos_processed += 1

def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple:
def run_git_command(self, cmd: list[str], timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple:
"""
Execute a git command with timeout handling.

Expand All @@ -277,12 +277,11 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False,
stdout_content is empty string if capture_output=False
"""
if operation_description is None:
operation_description = cmd
operation_description = ' '.join(cmd)

try:
# Common options for all subprocess.run calls
run_options = {
'shell': True,
'timeout': timeout,
'check': False
}
Expand Down
51 changes: 27 additions & 24 deletions augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import os
import pathlib
import sqlalchemy as s
from .utilitymethods import update_repo_log, get_absolute_repo_path
from .utilitymethods import update_repo_log, get_absolute_repo_path, parse_remote_default_branch
from sqlalchemy.orm.exc import NoResultFound
from augur.application.db.models.augur_data import *
from augur.application.db.models.augur_operations import CollectionStatus
Expand Down Expand Up @@ -148,7 +148,7 @@ def git_repo_initialize(facade_helper, session, repo_git):

facade_helper.log_activity('Verbose', f"Cloning: {git}")

cmd = f"git -C {repo_path} clone '{git}' {repo_name}"
cmd = ["git", "-C", repo_path, "clone", git, repo_name]
return_code, _ = facade_helper.run_git_command(
cmd,
timeout=7200, # 2 hours for large repos
Expand Down Expand Up @@ -320,7 +320,7 @@ def git_repo_updates(facade_helper, repo_git):

try:

firstpull = (f"git -C {absolute_path} pull")
firstpull = ["git", "-C", absolute_path, "pull"]

return_code_remote, _ = facade_helper.run_git_command(
firstpull,
Expand All @@ -340,24 +340,26 @@ def git_repo_updates(facade_helper, repo_git):

# session.log_activity('Verbose', f'remote default is {logremotedefault}.')

getremotedefault = (
f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"]

return_code_remote, remotedefault = facade_helper.run_git_command(
return_code_remote, output = facade_helper.run_git_command(
getremotedefault,
timeout=60, # 1 minute for remote query
capture_output=True,
operation_description='get remote default branch'
)

remotedefault = ""
if return_code_remote == 0 and output:
remotedefault = parse_remote_default_branch(output)

facade_helper.log_activity(
'Verbose', f'remote default getting checked out is: {remotedefault}.')

getremotedefault = (
f"git -C {absolute_path} checkout {remotedefault}")
getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault]

facade_helper.log_activity(
'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ")
'Verbose', f"get remote default command is: \n \n git -C {absolute_path} checkout {remotedefault} \n \n ")

return_code_remote_default_again, _ = facade_helper.run_git_command(
getremotedefault,
Expand All @@ -368,7 +370,7 @@ def git_repo_updates(facade_helper, repo_git):

if return_code_remote_default_again == 0:
facade_helper.log_activity('Verbose', "local checkout worked.")
cmd = (f"git -C {absolute_path} pull")
cmd = ["git", "-C", absolute_path, "pull"]

return_code, _ = facade_helper.run_git_command(
cmd,
Expand All @@ -384,7 +386,7 @@ def git_repo_updates(facade_helper, repo_git):

finally:

cmd = (f"git -C {absolute_path} pull")
cmd = ["git", "-C", absolute_path, "pull"]

return_code, _ = facade_helper.run_git_command(
cmd,
Expand All @@ -411,20 +413,22 @@ def git_repo_updates(facade_helper, repo_git):

# session.log_activity('Verbose', f'remote default is {logremotedefault}.')

getremotedefault = (
f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"]

return_code_remote, remotedefault = facade_helper.run_git_command(
return_code_remote, output = facade_helper.run_git_command(
Comment on lines -417 to +418
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feels like we are maybe duplicating a lot of code here. Can we maybe refactor some of this git access type of stuff into shared functions?

getremotedefault,
timeout=60, # 1 minute for remote query
capture_output=True,
operation_description='get remote default branch'
)

remotedefault = ""
if return_code_remote == 0 and output:
remotedefault = parse_remote_default_branch(output)

try:

getremotedefault = (
f"git -C {absolute_path} checkout {remotedefault}")
getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault]

return_code_remote_default, _ = facade_helper.run_git_command(
getremotedefault,
Expand All @@ -436,7 +440,7 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity(
'Verbose', f'get remote default result (return code): {return_code_remote_default}')

getcurrentbranch = (f"git -C {absolute_path} branch")
getcurrentbranch = ["git", "-C", absolute_path, "branch"]

return_code_local, localdefault = facade_helper.run_git_command(
getcurrentbranch,
Expand All @@ -448,8 +452,7 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity(
'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.')

cmd_checkout_default = (
f"git -C {absolute_path} checkout {remotedefault}")
cmd_checkout_default = ["git", "-C", absolute_path, "checkout", remotedefault]

cmd_checkout_default_wait, _ = facade_helper.run_git_command(
cmd_checkout_default,
Expand All @@ -458,9 +461,9 @@ def git_repo_updates(facade_helper, repo_git):
operation_description=f'git checkout {remotedefault}'
)

cmdpull2 = (f"git -C {absolute_path} pull")
cmdpull2 = ["git", "-C", absolute_path, "pull"]

cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}")
cmd_reset = ["git", "-C", absolute_path, "reset", "--hard", f"origin/{remotedefault}"]

cmd_reset_wait, _ = facade_helper.run_git_command(
cmd_reset,
Expand All @@ -469,7 +472,7 @@ def git_repo_updates(facade_helper, repo_git):
operation_description=f'git reset --hard origin/{remotedefault}'
)

cmd_clean = (f"git -C {absolute_path} clean -df")
cmd_clean = ["git", "-C", absolute_path, "clean", "-df"]

return_code_clean, _ = facade_helper.run_git_command(
cmd_clean,
Expand All @@ -483,9 +486,9 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity('Verbose', f'Second pass failed: {e}.')
pass

cmdpull2 = (f"git -C {absolute_path} pull")
cmdpull2 = ["git", "-C", absolute_path, "pull"]


print(cmdpull2)
return_code, _ = facade_helper.run_git_command(
cmdpull2,
timeout=600, # 10 minutes for git pull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):

def get_parent_commits_set(absolute_repo_path, facade_helper, logger=None):

cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path)
cmd = ["git", "--git-dir", absolute_repo_path, "log", "--ignore-missing", "--pretty=format:%H"]

# Use facade_helper's unified git command runner
return_code, stdout = facade_helper.run_git_command(
Expand Down Expand Up @@ -219,4 +219,16 @@ def update_facade_scheduling_fields(repo_git, weight, commit_count):
session.commit()



def parse_remote_default_branch(git_remote_output):
"""
Parses the output of 'git remote show origin' to find the HEAD branch.
"""
if not git_remote_output:
return ""

for line in git_remote_output.split('\\n'):
if "HEAD branch" in line:
parts = line.split(":", 1)
if len(parts) > 1:
return parts[1].strip()
return ""
Loading