Skip to content

Commit

Permalink
load samples and genomes from old config if not provided to iterate
Browse files Browse the repository at this point in the history
  • Loading branch information
AroneyS committed Nov 27, 2023
1 parent 17cd3cb commit e433bb0
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 3 deletions.
23 changes: 20 additions & 3 deletions ibis/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,18 @@ def combine_genome_singlem(genome_singlem, new_genome_singlem, path):
f.write(g.read())

def iterate(args):
if not (args.genomes or args.forward):
logging.info("Loading inputs from old config")
config_path = os.path.join(args.coassemble_output, "config.yaml")
old_config = load_config(config_path)

if not args.genomes:
args.genomes = [v for _,v in old_config["genomes"].items()]
args.no_genomes = False
if not args.forward:
args.forward = [v for _,v in old_config["reads_1"].items()]
args.reverse = [v for _,v in old_config["reads_2"].items()]

logging.info("Evaluating new bins")
if args.new_genomes_list:
args.new_genomes = read_list(args.new_genomes_list)
Expand Down Expand Up @@ -1169,8 +1181,9 @@ def base_argument_verification(args):
if (args.forward and args.forward_list) or (args.reverse and args.reverse_list) or (args.genomes and args.genomes_list):
raise Exception("General and list arguments are mutually exclusive")

def coassemble_argument_verification(args):
base_argument_verification(args)
def coassemble_argument_verification(args, iterate=False):
if not iterate:
base_argument_verification(args)
if (args.sample_query or args.sample_query_list or args.sample_query_dir) and not (args.sample_singlem or args.sample_singlem_list or args.sample_singlem_dir):
raise Exception("Input SingleM query (--sample-query) requires SingleM otu tables (--sample-singlem) for coverage")
if args.assemble_unmapped and args.single_assembly:
Expand Down Expand Up @@ -1233,7 +1246,11 @@ def coassemble_output_argument_verification(args):
raise Exception("Single assembly is incompatible with Ibis iterate")
if not args.aviary_outputs and not (args.new_genomes or args.new_genomes_list):
raise Exception("New genomes or aviary outputs must be provided for iteration")
coassemble_argument_verification(args)
if (args.forward and args.forward_list) or (args.reverse and args.reverse_list) or (args.genomes and args.genomes_list):
raise Exception("General and list arguments are mutually exclusive")
if not (args.genomes or args.forward) and not args.coassemble_output:
raise Exception("Reference genomes or forward reads must be provided if --coassemble-output not given")
coassemble_argument_verification(args, iterate=True)
iterate(args)

elif args.subparser_name == "build":
Expand Down
47 changes: 47 additions & 0 deletions test/data/mock_coassemble/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
%YAML 1.1
---
Ibis_version: 0.9.6
max_threads: 8
coassembly_samples: []
singlem_metapackage: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/singlem_metapackage.smpkg
genomes:
GB_GCA_013286235.1: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/GB_GCA_013286235.1.fna
taxa_of_interest: ''
max_coassembly_size:
assemble_unmapped: false
run_qc: false
sra: false
prodigal_meta: false
single_assembly: false
no_genomes: false
new_genomes: false
exclude_coassemblies:
appraise_sequence_identity: 0.86
min_coassembly_coverage: 10
num_coassembly_samples: 2
max_coassembly_samples: 2
max_recovery_samples: 20
unmapping_min_appraised: 0.1
unmapping_max_identity: 99
unmapping_max_alignment: 99
aviary_speed: fast
run_aviary: false
aviary_gtdbtk: gtdb_release
aviary_checkm2: CheckM2_database
aviary_memory: 500
aviary_threads: 64
test: false
mock_sra: false
aviary_dryrun: false
conda_prefix: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/.conda
tmpdir: /tmp
reads_1:
sample_1: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_1.1.fq
sample_2: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_2.1.fq
sample_3: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_3.1.fq
reads_2:
sample_1: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_1.2.fq
sample_2: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_2.2.fq
sample_3: /mnt/hpccs01/home/aroneys/src/ibis_dev/test/data/sample_3.2.fq
snakemake_profile: ''
cluster_retries: 0
66 changes: 66 additions & 0 deletions test/test_iterate.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,72 @@ def test_iterate(self):
with open(cluster_path) as f:
self.assertEqual(expected, f.read())

def test_iterate_minimal(self):
with in_tempdir():
cmd = (
f"ibis iterate "
f"--coassemble-output {MOCK_COASSEMBLE} "
f"--coassemble-unbinned {MOCK_UNBINNED} "
f"--coassemble-binned {MOCK_BINNED} "
f"--aviary-outputs {MOCK_COASSEMBLIES} "
f"--elusive-clusters {ELUSIVE_CLUSTERS} "
f"--singlem-metapackage {METAPACKAGE} "
f"--output test "
f"--conda-prefix {path_to_conda} "
)
extern.run(cmd)

config_path = os.path.join("test", "config.yaml")
self.assertTrue(os.path.exists(config_path))
config = load_configfile(config_path)
NEW_GENOMES = " ".join([
os.path.join(MOCK_COASSEMBLE, "coassemble", "coassembly_0", "recover", "bins", "final_bins", "iteration_0-coassembly_0-0.fna"),
os.path.join(MOCK_COASSEMBLE, "coassemble", "coassembly_0", "recover", "bins", "final_bins", "iteration_0-coassembly_0-1.fna"),
])
genomes = {
os.path.splitext(os.path.basename(g))[0]: g.replace(MOCK_COASSEMBLE + "/coassemble/coassembly_0/recover/bins/final_bins/", os.getcwd() + "/test/recovered_bins/")
for g in (GENOMES + " " + NEW_GENOMES).split(" ")
}
self.assertEqual(genomes, config["genomes"])

reads_1 = {
os.path.splitext(os.path.basename(r))[0].removesuffix(".1"): r
for r in SAMPLE_READS_FORWARD.split(" ")
}
self.assertEqual(reads_1, config["reads_1"])

reads_2 = {
os.path.splitext(os.path.basename(r))[0].removesuffix(".2"): r
for r in SAMPLE_READS_REVERSE.split(" ")
}
self.assertEqual(reads_2, config["reads_2"])

cluster_path = os.path.join("test", "coassemble", "target", "elusive_clusters.tsv")
self.assertTrue(os.path.exists(cluster_path))
expected = "\n".join(
[
"\t".join([
"samples",
"length",
"total_targets",
"total_size",
"recover_samples",
"coassembly",
]),
"\t".join([
"sample_1,sample_3",
"2",
"1",
"8456",
"sample_1,sample_3",
"coassembly_0"
]),
""
]
)
with open(cluster_path) as f:
self.assertEqual(expected, f.read())

def test_iterate_genome_input(self):
with in_tempdir():
cmd = (
Expand Down

0 comments on commit e433bb0

Please sign in to comment.