Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions augur/tasks/git/util/facade_worker/facade_worker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def insert_or_update_data(self, query, **bind_args)-> None:
def inc_repos_processed(self):
self.repos_processed += 1

def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple:
def run_git_command(self, cmd: list[str], timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple:
"""
Execute a git command with timeout handling.

Expand All @@ -277,12 +277,11 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False,
stdout_content is empty string if capture_output=False
"""
if operation_description is None:
operation_description = cmd
operation_description = ' '.join(cmd)

try:
# Common options for all subprocess.run calls
run_options = {
'shell': True,
'timeout': timeout,
'check': False
}
Expand Down
51 changes: 27 additions & 24 deletions augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import os
import pathlib
import sqlalchemy as s
from .utilitymethods import update_repo_log, get_absolute_repo_path
from .utilitymethods import update_repo_log, get_absolute_repo_path, parse_remote_default_branch
from sqlalchemy.orm.exc import NoResultFound
from augur.application.db.models.augur_data import *
from augur.application.db.models.augur_operations import CollectionStatus
Expand Down Expand Up @@ -148,7 +148,7 @@ def git_repo_initialize(facade_helper, session, repo_git):

facade_helper.log_activity('Verbose', f"Cloning: {git}")

cmd = f"git -C {repo_path} clone '{git}' {repo_name}"
cmd = ["git", "-C", repo_path, "clone", git, repo_name]
return_code, _ = facade_helper.run_git_command(
cmd,
timeout=7200, # 2 hours for large repos
Expand Down Expand Up @@ -320,7 +320,7 @@ def git_repo_updates(facade_helper, repo_git):

try:

firstpull = (f"git -C {absolute_path} pull")
firstpull = ["git", "-C", absolute_path, "pull"]

return_code_remote, _ = facade_helper.run_git_command(
firstpull,
Expand All @@ -340,24 +340,26 @@ def git_repo_updates(facade_helper, repo_git):

# session.log_activity('Verbose', f'remote default is {logremotedefault}.')

getremotedefault = (
f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"]

return_code_remote, remotedefault = facade_helper.run_git_command(
return_code_remote, output = facade_helper.run_git_command(
getremotedefault,
timeout=60, # 1 minute for remote query
capture_output=True,
operation_description='get remote default branch'
)

remotedefault = ""
if return_code_remote == 0 and output:
remotedefault = parse_remote_default_branch(output)

facade_helper.log_activity(
'Verbose', f'remote default getting checked out is: {remotedefault}.')

getremotedefault = (
f"git -C {absolute_path} checkout {remotedefault}")
getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault]

facade_helper.log_activity(
'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ")
'Verbose', f"get remote default command is: \n \n {' '.join(getremotedefault)} \n \n ")

return_code_remote_default_again, _ = facade_helper.run_git_command(
getremotedefault,
Expand All @@ -368,7 +370,7 @@ def git_repo_updates(facade_helper, repo_git):

if return_code_remote_default_again == 0:
facade_helper.log_activity('Verbose', "local checkout worked.")
cmd = (f"git -C {absolute_path} pull")
cmd = ["git", "-C", absolute_path, "pull"]

return_code, _ = facade_helper.run_git_command(
cmd,
Expand All @@ -384,7 +386,7 @@ def git_repo_updates(facade_helper, repo_git):

finally:

cmd = (f"git -C {absolute_path} pull")
cmd = ["git", "-C", absolute_path, "pull"]

return_code, _ = facade_helper.run_git_command(
cmd,
Expand All @@ -411,20 +413,22 @@ def git_repo_updates(facade_helper, repo_git):

# session.log_activity('Verbose', f'remote default is {logremotedefault}.')

getremotedefault = (
f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"]

return_code_remote, remotedefault = facade_helper.run_git_command(
return_code_remote, output = facade_helper.run_git_command(
Comment on lines -417 to +418
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feels like we are maybe duplicating a lot of code here. Can we maybe refactor some of this git access type of stuff into shared functions?

getremotedefault,
timeout=60, # 1 minute for remote query
capture_output=True,
operation_description='get remote default branch'
)

remotedefault = ""
if return_code_remote == 0 and output:
remotedefault = parse_remote_default_branch(output)

try:

getremotedefault = (
f"git -C {absolute_path} checkout {remotedefault}")
getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault]

return_code_remote_default, _ = facade_helper.run_git_command(
getremotedefault,
Expand All @@ -436,7 +440,7 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity(
'Verbose', f'get remote default result (return code): {return_code_remote_default}')

getcurrentbranch = (f"git -C {absolute_path} branch")
getcurrentbranch = ["git", "-C", absolute_path, "branch"]

return_code_local, localdefault = facade_helper.run_git_command(
getcurrentbranch,
Expand All @@ -448,8 +452,7 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity(
'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.')

cmd_checkout_default = (
f"git -C {absolute_path} checkout {remotedefault}")
cmd_checkout_default = ["git", "-C", absolute_path, "checkout", remotedefault]

cmd_checkout_default_wait, _ = facade_helper.run_git_command(
cmd_checkout_default,
Expand All @@ -458,9 +461,9 @@ def git_repo_updates(facade_helper, repo_git):
operation_description=f'git checkout {remotedefault}'
)

cmdpull2 = (f"git -C {absolute_path} pull")
cmdpull2 = ["git", "-C", absolute_path, "pull"]

cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}")
cmd_reset = ["git", "-C", absolute_path, "reset", "--hard", f"origin/{remotedefault}"]

cmd_reset_wait, _ = facade_helper.run_git_command(
cmd_reset,
Expand All @@ -469,7 +472,7 @@ def git_repo_updates(facade_helper, repo_git):
operation_description=f'git reset --hard origin/{remotedefault}'
)

cmd_clean = (f"git -C {absolute_path} clean -df")
cmd_clean = ["git", "-C", absolute_path, "clean", "-df"]

return_code_clean, _ = facade_helper.run_git_command(
cmd_clean,
Expand All @@ -483,9 +486,9 @@ def git_repo_updates(facade_helper, repo_git):
facade_helper.log_activity('Verbose', f'Second pass failed: {e}.')
pass

cmdpull2 = (f"git -C {absolute_path} pull")
cmdpull2 = ["git", "-C", absolute_path, "pull"]


print(cmdpull2)
return_code, _ = facade_helper.run_git_command(
cmdpull2,
timeout=600, # 10 minutes for git pull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):

def get_parent_commits_set(absolute_repo_path, facade_helper, logger=None):

cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path)
cmd = ["git", "--git-dir", absolute_repo_path, "log", "--ignore-missing", "--pretty=format:%H"]

# Use facade_helper's unified git command runner
return_code, stdout = facade_helper.run_git_command(
Expand Down Expand Up @@ -219,4 +219,16 @@ def update_facade_scheduling_fields(repo_git, weight, commit_count):
session.commit()



def parse_remote_default_branch(git_remote_output):
"""
Parses the output of 'git remote show origin' to find the HEAD branch.
"""
if not git_remote_output:
return ""

for line in git_remote_output.split('\\n'):
if "HEAD branch" in line:
parts = line.split(":", 1)
if len(parts) > 1:
return parts[1].strip()
return ""
Loading