diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index f8dfa4b58bc8..0db1eb270fab 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -28,6 +28,7 @@ ) from galaxy.datatypes.metadata import DictParameter, ListParameter, MetadataElement, MetadataParameter from galaxy.datatypes.sniff import build_sniff_from_prefix +from galaxy.datatypes.util.generic_util import call_pysam_index from galaxy.util import nice_size, sqlite from galaxy.util.checkers import is_bz2, is_gzip from . import data, dataproviders @@ -488,13 +489,8 @@ def dataset_content_needs_grooming(self, file_name): # If pysam fails to index a file it will write to stderr, # and this causes the set_meta script to fail. So instead # we start another process and discard stderr. - if index_flag == '-b': - # IOError: No such file or directory: '-b' if index_flag is set to -b (pysam 0.15.4) - cmd = ['python', '-c', f"import pysam; pysam.set_verbosity(0); pysam.index('{file_name}', '{index_name}')"] - else: - cmd = ['python', '-c', f"import pysam; pysam.set_verbosity(0); pysam.index('{index_flag}', '{file_name}', '{index_name}')"] - with open(os.devnull, 'w') as devnull: - subprocess.check_call(cmd, stderr=devnull, shell=False) + call_pysam_index(file_name, index_name, index_flag=index_flag, + stderr=os.devnull) needs_sorting = False except subprocess.CalledProcessError: needs_sorting = True @@ -516,11 +512,7 @@ def set_meta(self, dataset, overwrite=True, **kwd): index_file = dataset.metadata.bam_csi_index if not index_file: index_file = dataset.metadata.spec[spec_key].param.new_file(dataset=dataset) - if index_flag == '-b': - # IOError: No such file or directory: '-b' if index_flag is set to -b (pysam 0.15.4) - pysam.index(dataset.file_name, index_file.file_name) - else: - pysam.index(index_flag, dataset.file_name, index_file.file_name) + call_pysam_index(dataset.file_name, index_file.file_name, index_flag=index_flag) dataset.metadata.bam_index = index_file def sniff(self, file_name): @@ -691,7 +683,7 @@ def get_cram_version(self, filename): def set_index_file(self, dataset, index_file): try: - pysam.index(dataset.file_name, index_file.file_name) + call_pysam_index(dataset.file_name, index_file.file_name) return True except Exception as exc: log.warning('%s, set_index_file Exception: %s', self, exc) diff --git a/lib/galaxy/datatypes/util/generic_util.py b/lib/galaxy/datatypes/util/generic_util.py index 574f09586c09..a41dbf3888a0 100644 --- a/lib/galaxy/datatypes/util/generic_util.py +++ b/lib/galaxy/datatypes/util/generic_util.py @@ -16,3 +16,21 @@ def count_special_lines(word, filename, invert=False): except commands.CommandLineException: return 0 return int(out) + + +def call_pysam_index(self, file_name, index_name, index_flag=None, stderr=None): + """ + The pysam.index call can block the GIL, which can pause all threads, including + the heartbeat thread. Therefore, start it as an external process. + """ + if index_flag == '-b' or not index_flag: + # IOError: No such file or directory: '-b' if index_flag is set to -b (pysam 0.15.4) + cmd = ['python', '-c', f"import pysam; pysam.set_verbosity(0); pysam.index('{file_name}', '{index_name}')"] + else: + cmd = ['python', '-c', + f"import pysam; pysam.set_verbosity(0); pysam.index('{index_flag}', '{file_name}', '{index_name}')"] + if stderr: + with open(stderr, 'w') as stderr: + subprocess.check_call(cmd, stderr=stderr, shell=False) + else: + subprocess.check_call(cmd, shell=False)