Skip to content

Commit

Permalink
Merge pull request #116 from martinghunt/mafft
Browse files Browse the repository at this point in the history
Add mafft
  • Loading branch information
martinghunt authored Jan 14, 2025
2 parents bb52a43 + 5bc6b36 commit 8be5b4e
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 2 deletions.
17 changes: 16 additions & 1 deletion .ci/install_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,26 @@ cd $install_root
mv read-it-and-keep/src/readItAndKeep .
rm -rf read-it-and-keep

#_________________________ mafft __________________________#
# Can't apt get mafft because the version is too old and doesn't work
# with how we call from python (avoid using files). Install from source.
# See https://mafft.cbrc.jp/alignment/software/installation_without_root.html
cd $install_root
wget https://mafft.cbrc.jp/alignment/software/mafft-7.525-without-extensions-src.tgz
tar xf mafft-7.525-without-extensions-src.tgz
cd mafft-7.525-without-extensions/core
sed -i "s~PREFIX = /usr/local~PREFIX = $install_root/mafft_install~" Makefile
make
make install
cd $install_root
cp -s mafft_install/bin/mafft .
rm -rf mafft-7.525-without-extensions*

#________________________ varifier __________________________#
cd $install_root
git clone https://github.com/iqbal-lab-org/varifier.git
cd varifier
git checkout 8bc8726ed3cdb337dc47b62515e709759e451137
git checkout 940d4503671f5dd09ce51116be6e77c85d50ada5
pip3 install .
cd ..
rm -rf varifier
Expand Down
5 changes: 5 additions & 0 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ def test_load_single_seq_fasta():
utils.load_single_seq_fasta(infile)


def test_seq_length_of_single_seq_fasta():
infile = os.path.join(data_dir, "load_single_seq_fasta.ok.fa")
assert utils.seq_length_of_single_seq_fasta(infile) == 4


def test_check_tech_and_reads_options():
f = utils.check_tech_and_reads_options
options = mock.Mock()
Expand Down
1 change: 1 addition & 0 deletions tests/varifier_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_run_varifier():
varifier_out,
50,
900,
force_use_mafft=True,
)
assert set(got_seqs.values()) == {None}
assert got_vcf_header is None
Expand Down
5 changes: 5 additions & 0 deletions viridian/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ def main(args=None):
help="Use this to not gzip the final output FASTA file and log.json file",
action="store_true",
)
advanced_parser.add_argument(
"--force_mafft",
help="Force use of mafft for global align between consensus and ref, instead of nucmer-based method. By default, mafft only used when the reference genome is longer than 30kbp",
action="store_true",
)

# --------------------------- ref options --------------------------------
ref_parser = argparse.ArgumentParser(add_help=False)
Expand Down
3 changes: 3 additions & 0 deletions viridian/one_sample_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def __init__(
temp_root=None,
fix_small_indels=True,
gzip_files=True,
force_mafft=False,
):
self.tech = tech
self.outdir = os.path.abspath(outdir)
Expand Down Expand Up @@ -111,6 +112,7 @@ def __init__(
if self.gzip_files:
self.json_log_file += ".gz"
self.final_masked_fasta += ".gz"
self.force_mafft = force_mafft

def set_command_line_dict(self):
# Make a dict of the command line options to go in the JSON output file.
Expand Down Expand Up @@ -481,6 +483,7 @@ def run_varifier(self):
sanitise_gaps=not self.force_consensus,
indel_fix_length=indel_fix_length,
debug=self.debug,
force_use_mafft=self.force_mafft,
)
if error_message is not None:
logging.warning(
Expand Down
1 change: 1 addition & 0 deletions viridian/tasks/run_one_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,5 @@ def run(options):
command_line_args=options,
temp_root=options.tmp_dir,
gzip_files=not options.no_gzip,
force_mafft=options.force_mafft,
)
4 changes: 4 additions & 0 deletions viridian/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def load_single_seq_fasta(infile):
return ref


def seq_length_of_single_seq_fasta(infile):
return len(load_single_seq_fasta(infile))


def check_tech_and_reads_options(args):
if args.run_accession is not None:
return True
Expand Down
7 changes: 6 additions & 1 deletion viridian/varifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ def run_varifier(
sanitise_gaps=False,
indel_fix_length=None,
debug=False,
force_use_mafft=False,
):
use_mafft_opt = ""
if force_use_mafft or utils.seq_length_of_single_seq_fasta(ref_fasta) > 30_000:
use_mafft_opt = "--use_mafft"

debug = "--debug" if debug else ""
if sanitise_gaps:
unmasked_cons_fa = os.path.join(outdir, "04.qry_sanitised_gaps.fa")
Expand All @@ -32,7 +37,7 @@ def run_varifier(
"msa_unmasked_consensus": None,
"msa_ref": None,
}
command = f"varifier make_truth_vcf {debug} {sanitise_gaps_opt} {indel_fix} --global_align --global_align_min_coord {min_coord} --global_align_max_coord {max_coord} {cons_fasta} {ref_fasta} {outdir}"
command = f"varifier make_truth_vcf {debug} {sanitise_gaps_opt} {indel_fix} --global_align {use_mafft_opt} --global_align_min_coord {min_coord} --global_align_max_coord {max_coord} {cons_fasta} {ref_fasta} {outdir}"

try:
utils.syscall(command)
Expand Down

0 comments on commit 8be5b4e

Please sign in to comment.