From c67890388b493affe564e46ba9324429919e8e24 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Thu, 15 Jan 2026 18:21:00 +0530 Subject: [PATCH 1/4] fix(#3577): remove shell=True and convert commands to argument lists to prevent command injection Signed-off-by: guptapratykshh --- .../facade_worker/facade_worker/config.py | 7 ++- .../facade_worker/facade_worker/repofetch.py | 50 ++++++++----------- .../facade_worker/utilitymethods.py | 2 +- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index a83d09390c..7165945826 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -257,7 +257,7 @@ def insert_or_update_data(self, query, **bind_args)-> None: def inc_repos_processed(self): self.repos_processed += 1 - def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple: + def run_git_command(self, cmd: list, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple: """ Execute a git command with timeout handling. @@ -265,7 +265,7 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, consistent timeout handling and error logging across all facade operations. Args: - cmd: The git command to execute + cmd: The git command to execute as a list of arguments (e.g., ["git", "clone", url]) timeout: Timeout in seconds capture_output: If True, capture stdout/stderr; if False, discard them operation_description: Human-readable description for error logging @@ -277,12 +277,11 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, stdout_content is empty string if capture_output=False """ if operation_description is None: - operation_description = cmd + operation_description = ' '.join(cmd) try: # Common options for all subprocess.run calls run_options = { - 'shell': True, 'timeout': timeout, 'check': False } diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 6e911f6fd9..c8551805b7 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -148,7 +148,7 @@ def git_repo_initialize(facade_helper, session, repo_git): facade_helper.log_activity('Verbose', f"Cloning: {git}") - cmd = f"git -C {repo_path} clone '{git}' {repo_name}" + cmd = ["git", "-C", repo_path, "clone", git, repo_name] return_code, _ = facade_helper.run_git_command( cmd, timeout=7200, # 2 hours for large repos @@ -320,7 +320,7 @@ def git_repo_updates(facade_helper, repo_git): try: - firstpull = (f"git -C {absolute_path} pull") + firstpull = ["git", "-C", absolute_path, "pull"] return_code_remote, _ = facade_helper.run_git_command( firstpull, @@ -340,27 +340,24 @@ def git_repo_updates(facade_helper, repo_git): # session.log_activity('Verbose', f'remote default is {logremotedefault}.') - getremotedefault = ( - f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") + getremotedefault = ["git", "-C", absolute_path, "symbolic-ref", "refs/remotes/origin/HEAD"] return_code_remote, remotedefault = facade_helper.run_git_command( getremotedefault, - timeout=60, # 1 minute for remote query + timeout=60, capture_output=True, operation_description='get remote default branch' ) + if return_code_remote == 0 and remotedefault: + remotedefault = remotedefault.split('/')[-1] facade_helper.log_activity( 'Verbose', f'remote default getting checked out is: {remotedefault}.') - getremotedefault = ( - f"git -C {absolute_path} checkout {remotedefault}") - - facade_helper.log_activity( - 'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ") + checkout_cmd = ["git", "-C", absolute_path, "checkout", remotedefault] return_code_remote_default_again, _ = facade_helper.run_git_command( - getremotedefault, + checkout_cmd, timeout=600, # 10 minutes for git checkout capture_output=False, operation_description=f'git checkout {remotedefault}' @@ -368,7 +365,7 @@ def git_repo_updates(facade_helper, repo_git): if return_code_remote_default_again == 0: facade_helper.log_activity('Verbose', "local checkout worked.") - cmd = (f"git -C {absolute_path} pull") + cmd = ["git", "-C", absolute_path, "pull"] return_code, _ = facade_helper.run_git_command( cmd, @@ -384,7 +381,7 @@ def git_repo_updates(facade_helper, repo_git): finally: - cmd = (f"git -C {absolute_path} pull") + cmd = ["git", "-C", absolute_path, "pull"] return_code, _ = facade_helper.run_git_command( cmd, @@ -411,23 +408,23 @@ def git_repo_updates(facade_helper, repo_git): # session.log_activity('Verbose', f'remote default is {logremotedefault}.') - getremotedefault = ( - f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") + getremotedefault = ["git", "-C", absolute_path, "symbolic-ref", "refs/remotes/origin/HEAD"] return_code_remote, remotedefault = facade_helper.run_git_command( getremotedefault, - timeout=60, # 1 minute for remote query + timeout=60, capture_output=True, operation_description='get remote default branch' ) + if return_code_remote == 0 and remotedefault: + remotedefault = remotedefault.split('/')[-1] try: - getremotedefault = ( - f"git -C {absolute_path} checkout {remotedefault}") + checkout_cmd = ["git", "-C", absolute_path, "checkout", remotedefault] return_code_remote_default, _ = facade_helper.run_git_command( - getremotedefault, + checkout_cmd, timeout=600, # 10 minutes for git checkout capture_output=False, operation_description=f'git checkout {remotedefault}' @@ -436,7 +433,7 @@ def git_repo_updates(facade_helper, repo_git): facade_helper.log_activity( 'Verbose', f'get remote default result (return code): {return_code_remote_default}') - getcurrentbranch = (f"git -C {absolute_path} branch") + getcurrentbranch = ["git", "-C", absolute_path, "branch"] return_code_local, localdefault = facade_helper.run_git_command( getcurrentbranch, @@ -448,8 +445,7 @@ def git_repo_updates(facade_helper, repo_git): facade_helper.log_activity( 'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.') - cmd_checkout_default = ( - f"git -C {absolute_path} checkout {remotedefault}") + cmd_checkout_default = ["git", "-C", absolute_path, "checkout", remotedefault] cmd_checkout_default_wait, _ = facade_helper.run_git_command( cmd_checkout_default, @@ -458,9 +454,9 @@ def git_repo_updates(facade_helper, repo_git): operation_description=f'git checkout {remotedefault}' ) - cmdpull2 = (f"git -C {absolute_path} pull") + cmdpull2 = ["git", "-C", absolute_path, "pull"] - cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") + cmd_reset = ["git", "-C", absolute_path, "reset", "--hard", f"origin/{remotedefault}"] cmd_reset_wait, _ = facade_helper.run_git_command( cmd_reset, @@ -469,7 +465,7 @@ def git_repo_updates(facade_helper, repo_git): operation_description=f'git reset --hard origin/{remotedefault}' ) - cmd_clean = (f"git -C {absolute_path} clean -df") + cmd_clean = ["git", "-C", absolute_path, "clean", "-df"] return_code_clean, _ = facade_helper.run_git_command( cmd_clean, @@ -483,9 +479,7 @@ def git_repo_updates(facade_helper, repo_git): facade_helper.log_activity('Verbose', f'Second pass failed: {e}.') pass - cmdpull2 = (f"git -C {absolute_path} pull") - - print(cmdpull2) + cmdpull2 = ["git", "-C", absolute_path, "pull"] return_code, _ = facade_helper.run_git_command( cmdpull2, timeout=600, # 10 minutes for git pull diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index 92546002ae..105eaf484c 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -107,7 +107,7 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): def get_parent_commits_set(absolute_repo_path, facade_helper, logger=None): - cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path) + cmd = ["git", "--git-dir", absolute_repo_path, "log", "--ignore-missing", "--pretty=format:%H"] # Use facade_helper's unified git command runner return_code, stdout = facade_helper.run_git_command( From 3da58cecabd22ea0adbadcd9c69010a59404ce27 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Thu, 15 Jan 2026 22:16:58 +0530 Subject: [PATCH 2/4] fix(#3577): implement python-based remote default parsing and strict typing Replaces git symbolic-ref with git remote show origin parsed in Python to ensure accuracy while preventing command injection. Updates type hints. Signed-off-by: guptapratykshh --- .../facade_worker/facade_worker/config.py | 4 +- .../facade_worker/facade_worker/repofetch.py | 47 +++++++++++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 7165945826..ee1a4c2f2b 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -257,7 +257,7 @@ def insert_or_update_data(self, query, **bind_args)-> None: def inc_repos_processed(self): self.repos_processed += 1 - def run_git_command(self, cmd: list, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple: + def run_git_command(self, cmd: list[str], timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple: """ Execute a git command with timeout handling. @@ -265,7 +265,7 @@ def run_git_command(self, cmd: list, timeout: int, capture_output: bool = False, consistent timeout handling and error logging across all facade operations. Args: - cmd: The git command to execute as a list of arguments (e.g., ["git", "clone", url]) + cmd: The git command to execute timeout: Timeout in seconds capture_output: If True, capture stdout/stderr; if False, discard them operation_description: Human-readable description for error logging diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index c8551805b7..95e8c1bd1e 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -340,24 +340,34 @@ def git_repo_updates(facade_helper, repo_git): # session.log_activity('Verbose', f'remote default is {logremotedefault}.') - getremotedefault = ["git", "-C", absolute_path, "symbolic-ref", "refs/remotes/origin/HEAD"] + getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"] - return_code_remote, remotedefault = facade_helper.run_git_command( + return_code_remote, output = facade_helper.run_git_command( getremotedefault, - timeout=60, + timeout=60, # 1 minute for remote query capture_output=True, operation_description='get remote default branch' ) - if return_code_remote == 0 and remotedefault: - remotedefault = remotedefault.split('/')[-1] + + remotedefault = "" + if return_code_remote == 0 and output: + for line in output.split('\n'): + if "HEAD branch" in line: + parts = line.split(":", 1) + if len(parts) > 1: + remotedefault = parts[1].strip() + break facade_helper.log_activity( 'Verbose', f'remote default getting checked out is: {remotedefault}.') - checkout_cmd = ["git", "-C", absolute_path, "checkout", remotedefault] + getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault] + + facade_helper.log_activity( + 'Verbose', f"get remote default command is: \n \n git -C {absolute_path} checkout {remotedefault} \n \n ") return_code_remote_default_again, _ = facade_helper.run_git_command( - checkout_cmd, + getremotedefault, timeout=600, # 10 minutes for git checkout capture_output=False, operation_description=f'git checkout {remotedefault}' @@ -408,23 +418,30 @@ def git_repo_updates(facade_helper, repo_git): # session.log_activity('Verbose', f'remote default is {logremotedefault}.') - getremotedefault = ["git", "-C", absolute_path, "symbolic-ref", "refs/remotes/origin/HEAD"] + getremotedefault = ["git", "-C", absolute_path, "remote", "show", "origin"] - return_code_remote, remotedefault = facade_helper.run_git_command( + return_code_remote, output = facade_helper.run_git_command( getremotedefault, - timeout=60, + timeout=60, # 1 minute for remote query capture_output=True, operation_description='get remote default branch' ) - if return_code_remote == 0 and remotedefault: - remotedefault = remotedefault.split('/')[-1] + + remotedefault = "" + if return_code_remote == 0 and output: + for line in output.split('\n'): + if "HEAD branch" in line: + parts = line.split(":", 1) + if len(parts) > 1: + remotedefault = parts[1].strip() + break try: - checkout_cmd = ["git", "-C", absolute_path, "checkout", remotedefault] + getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault] return_code_remote_default, _ = facade_helper.run_git_command( - checkout_cmd, + getremotedefault, timeout=600, # 10 minutes for git checkout capture_output=False, operation_description=f'git checkout {remotedefault}' @@ -480,6 +497,8 @@ def git_repo_updates(facade_helper, repo_git): pass cmdpull2 = ["git", "-C", absolute_path, "pull"] + + print(cmdpull2) return_code, _ = facade_helper.run_git_command( cmdpull2, timeout=600, # 10 minutes for git pull From a71e04f9b347e2eeac65d0140c702aa8ab3d45a3 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Thu, 15 Jan 2026 23:51:16 +0530 Subject: [PATCH 3/4] refactor: extract remote branch parsing to helper and clean up debug code Signed-off-by: guptapratykshh --- .../facade_worker/facade_worker/repofetch.py | 18 ++++-------------- .../facade_worker/utilitymethods.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 95e8c1bd1e..c8476ba256 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -30,7 +30,7 @@ import os import pathlib import sqlalchemy as s -from .utilitymethods import update_repo_log, get_absolute_repo_path +from .utilitymethods import update_repo_log, get_absolute_repo_path, parse_remote_default_branch from sqlalchemy.orm.exc import NoResultFound from augur.application.db.models.augur_data import * from augur.application.db.models.augur_operations import CollectionStatus @@ -351,12 +351,7 @@ def git_repo_updates(facade_helper, repo_git): remotedefault = "" if return_code_remote == 0 and output: - for line in output.split('\n'): - if "HEAD branch" in line: - parts = line.split(":", 1) - if len(parts) > 1: - remotedefault = parts[1].strip() - break + remotedefault = parse_remote_default_branch(output) facade_helper.log_activity( 'Verbose', f'remote default getting checked out is: {remotedefault}.') @@ -429,12 +424,7 @@ def git_repo_updates(facade_helper, repo_git): remotedefault = "" if return_code_remote == 0 and output: - for line in output.split('\n'): - if "HEAD branch" in line: - parts = line.split(":", 1) - if len(parts) > 1: - remotedefault = parts[1].strip() - break + remotedefault = parse_remote_default_branch(output) try: @@ -498,7 +488,7 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = ["git", "-C", absolute_path, "pull"] - print(cmdpull2) + return_code, _ = facade_helper.run_git_command( cmdpull2, timeout=600, # 10 minutes for git pull diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index 105eaf484c..8316ce16b1 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -219,4 +219,16 @@ def update_facade_scheduling_fields(repo_git, weight, commit_count): session.commit() - +def parse_remote_default_branch(git_remote_output): + """ + Parses the output of 'git remote show origin' to find the HEAD branch. + """ + if not git_remote_output: + return "" + + for line in git_remote_output.split('\\n'): + if "HEAD branch" in line: + parts = line.split(":", 1) + if len(parts) > 1: + return parts[1].strip() + return "" From 8d7d2a38db33881f052d6ea6defcf76083245ddd Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Fri, 16 Jan 2026 01:04:12 +0530 Subject: [PATCH 4/4] style: revert logging format to use joined command list Signed-off-by: guptapratykshh --- augur/tasks/git/util/facade_worker/facade_worker/repofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index c8476ba256..e60db9772d 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -359,7 +359,7 @@ def git_repo_updates(facade_helper, repo_git): getremotedefault = ["git", "-C", absolute_path, "checkout", remotedefault] facade_helper.log_activity( - 'Verbose', f"get remote default command is: \n \n git -C {absolute_path} checkout {remotedefault} \n \n ") + 'Verbose', f"get remote default command is: \n \n {' '.join(getremotedefault)} \n \n ") return_code_remote_default_again, _ = facade_helper.run_git_command( getremotedefault,