Skip to content

Commit f53c157

Browse files
committed
Merge branch 'master' of github.com:AroneyS/ibis
2 parents 7de3e20 + 1c57c1e commit f53c157

4 files changed

Lines changed: 146 additions & 4 deletions

File tree

ibis/workflow/coassemble.smk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ rule query_processing:
211211
params:
212212
sequence_identity=config["appraise_sequence_identity"],
213213
window_size=60,
214+
taxa_of_interest=config["taxa_of_interest"],
214215
threads:
215216
64
216217
script:

ibis/workflow/scripts/query_processing.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,19 @@ def processing(
2020
query_read,
2121
pipe_read,
2222
SEQUENCE_IDENTITY=0.86,
23-
WINDOW_SIZE=60):
23+
WINDOW_SIZE=60,
24+
TAXA_OF_INTEREST=None):
2425

2526
if len(query_read) == 0:
2627
empty_output = pl.DataFrame(schema=OUTPUT_COLUMNS)
2728
return empty_output, empty_output
2829

30+
# Filter TAXA_OF_INTEREST
31+
if TAXA_OF_INTEREST:
32+
pipe_read = pipe_read.filter(
33+
pl.col("taxonomy").str.contains(TAXA_OF_INTEREST, literal=True)
34+
)
35+
2936
appraised = query_read.rename(
3037
# Rename to match appraise output
3138
# Query output: query_name, query_sequence, divergence, num_hits, coverage, sample, marker, hit_sequence, taxonomy
@@ -64,7 +71,8 @@ def pipeline(
6471
query_reads,
6572
pipe_reads,
6673
SEQUENCE_IDENTITY=0.86,
67-
WINDOW_SIZE=60):
74+
WINDOW_SIZE=60,
75+
TAXA_OF_INTEREST=None):
6876

6977
print(f"Polars using {str(pl.threadpool_size())} threads")
7078

@@ -73,7 +81,8 @@ def pipeline(
7381
pl.read_csv(query, separator="\t"),
7482
pl.read_csv(pipe, separator="\t"),
7583
SEQUENCE_IDENTITY,
76-
WINDOW_SIZE)
84+
WINDOW_SIZE,
85+
TAXA_OF_INTEREST)
7786
yield binned, unbinned
7887

7988
if __name__ == "__main__":
@@ -82,6 +91,7 @@ def pipeline(
8291

8392
SEQUENCE_IDENTITY = snakemake.params.sequence_identity
8493
WINDOW_SIZE = snakemake.params.window_size
94+
TAXA_OF_INTEREST = snakemake.params.taxa_of_interest
8595
query_reads = snakemake.input.query_reads
8696
pipe_reads = snakemake.input.pipe_reads
8797
binned_path = snakemake.output.binned
@@ -91,7 +101,8 @@ def pipeline(
91101
query_reads,
92102
pipe_reads,
93103
SEQUENCE_IDENTITY=SEQUENCE_IDENTITY,
94-
WINDOW_SIZE=WINDOW_SIZE
104+
WINDOW_SIZE=WINDOW_SIZE,
105+
TAXA_OF_INTEREST=TAXA_OF_INTEREST
95106
)
96107

97108
with open(binned_path, "ab") as binned_file, open(unbinned_path, "ab") as unbinned_file:

test/test_coassemble.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,111 @@ def test_coassemble_query_input(self):
334334
with open(cluster_path) as f:
335335
self.assertEqual(expected, f.read())
336336

337+
def test_coassemble_query_input_taxa_of_interest(self):
338+
with in_tempdir():
339+
cmd = (
340+
f"ibis coassemble "
341+
f"--forward {SAMPLE_READS_FORWARD} "
342+
f"--reverse {SAMPLE_READS_REVERSE} "
343+
f"--genomes {GENOMES} "
344+
f"--genome-transcripts {GENOME_TRANSCRIPTS} "
345+
f"--sample-query {SAMPLE_QUERY} "
346+
f"--sample-singlem {SAMPLE_QUERY_SINGLEM} "
347+
f"--sample-read-size {SAMPLE_READ_SIZE} "
348+
f"--taxa-of-interest \"p__Actinobacteriota\" "
349+
f"--output test "
350+
f"--conda-prefix {path_to_conda} "
351+
f"--snakemake-args \"cluster_graph\" "
352+
)
353+
extern.run(cmd)
354+
355+
config_path = os.path.join("test", "config.yaml")
356+
self.assertTrue(os.path.exists(config_path))
357+
358+
binned_path = os.path.join("test", "coassemble", "appraise", "binned.otu_table.tsv")
359+
self.assertTrue(os.path.exists(binned_path))
360+
expected = "\n".join(
361+
[
362+
"\t".join(["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "found_in"]),
363+
""
364+
]
365+
)
366+
with open(binned_path) as f:
367+
self.assertEqual(expected, f.read())
368+
369+
unbinned_path = os.path.join("test", "coassemble", "appraise", "unbinned.otu_table.tsv")
370+
self.assertTrue(os.path.exists(unbinned_path))
371+
expected = "\n".join(
372+
[
373+
"\t".join([
374+
"gene",
375+
"sample",
376+
"sequence",
377+
"num_hits",
378+
"coverage",
379+
"taxonomy",
380+
"found_in",
381+
]),
382+
"\t".join([
383+
"S3.7.ribosomal_protein_S7",
384+
"sample_1",
385+
"TACCAGGTCCCGGTCGAGGTCCGTCCGATCCGCCAGACGACGCTCGCCCTGCGCTGGCTC",
386+
"5",
387+
"8.21",
388+
"Root; d__Bacteria; p__Actinobacteriota; c__Actinomycetia; o__Mycobacteriales; f__Mycobacteriaceae; g__Nocardia; s__Nocardia grenadensis",
389+
"",
390+
]),
391+
"\t".join([
392+
"S3.7.ribosomal_protein_S7",
393+
"sample_1",
394+
"TATCAGGTGCCTATTGAGGTAAGACCTGAAAGAAGACAGACTTTAGCGCTTCGCTGGATA",
395+
"1",
396+
"1.64",
397+
"Root; d__Bacteria; p__Actinobacteriota; c__Actinomycetia; o__Mycobacteriales; f__Mycobacteriaceae; g__Nocardia; s__Nocardia grenadensis2",
398+
"",
399+
]),
400+
"\t".join([
401+
"S3.7.ribosomal_protein_S7",
402+
"sample_1",
403+
"TATCAGGTGCCTATTGAGGTAAGACCTGAAAGAAGACAGACTTTAGCGCTTCGCTGGATC",
404+
"5",
405+
"8.21",
406+
"Root; d__Bacteria; p__Actinobacteriota; c__Actinomycetia; o__Mycobacteriales; f__Mycobacteriaceae; g__Nocardia; s__Nocardia grenadensis3",
407+
"",
408+
]),
409+
"\t".join([
410+
"S3.7.ribosomal_protein_S7",
411+
"sample_2",
412+
"TACCAGGTCCCGGTCGAGGTCCGTCCGATCCGCCAGACGACGCTCGCCCTGCGCTGGCTC",
413+
"3",
414+
"4.92",
415+
"Root; d__Bacteria; p__Actinobacteriota; c__Actinomycetia; o__Mycobacteriales; f__Mycobacteriaceae; g__Nocardia; s__Nocardia grenadensis",
416+
"",
417+
]),
418+
"\t".join([
419+
"S3.7.ribosomal_protein_S7",
420+
"sample_3",
421+
"TATCAGGTGCCTATTGAGGTAAGACCTGAAAGAAGACAGACTTTAGCGCTTCGCTGGATA",
422+
"6",
423+
"9.85",
424+
"Root; d__Bacteria; p__Actinobacteriota; c__Actinomycetia; o__Mycobacteriales; f__Mycobacteriaceae; g__Nocardia; s__Nocardia grenadensis2",
425+
"",
426+
]),
427+
""
428+
]
429+
)
430+
with open(unbinned_path) as f:
431+
self.assertEqual(expected, f.read())
432+
433+
edges_path = os.path.join("test", "coassemble", "target", "targets.tsv")
434+
self.assertTrue(os.path.exists(edges_path))
435+
436+
edges_path = os.path.join("test", "coassemble", "target", "elusive_edges.tsv")
437+
self.assertTrue(os.path.exists(edges_path))
438+
439+
cluster_path = os.path.join("test", "coassemble", "target", "elusive_clusters.tsv")
440+
self.assertTrue(os.path.exists(cluster_path))
441+
337442
def test_coassemble_single_assembly(self):
338443
with in_tempdir():
339444
cmd = (

test/test_query_processing.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,31 @@ def test_query_processing_remove_EIF(self):
224224
self.assertDataFrameEqual(expected_binned, observed_binned)
225225
self.assertDataFrameEqual(expected_unbinned, observed_unbinned)
226226

227+
def test_query_processing_target_taxa(self):
228+
query = pl.DataFrame([
229+
["sample_1", "AAA", 1, 5, 10, "genome_1", "S3.1", "AAA", "Root; d__Bacteria"],
230+
["sample_1", "AAB", 10, 5, 10, "genome_1", "S3.1", "AAA", "Root; d__Bacteria"],
231+
["sample_1", "CCC", 1, 5, 10, "genome_1", "S3.1", "AAA", "Root"],
232+
["sample_1", "CCD", 10, 5, 10, "genome_1", "S3.1", "AAA", "Root"],
233+
], schema=QUERY_COLUMNS)
234+
pipe = pl.DataFrame([
235+
["S3.1", "sample_1", "AAA", 5, 10, "Root; d__Bacteria; p__Planctomycetota"],
236+
["S3.1", "sample_1", "AAB", 5, 10, "Root; d__Bacteria; p__Planctomycetota"],
237+
["S3.1", "sample_1", "CCC", 5, 10, "Root"],
238+
["S3.1", "sample_1", "CCD", 5, 10, "Root"],
239+
], schema=PIPE_COLUMNS)
240+
241+
expected_binned = pl.DataFrame([
242+
["S3.1", "sample_1", "AAA", 5, 10, "Root; d__Bacteria; p__Planctomycetota", "genome_1"],
243+
], schema=APPRAISE_COLUMNS)
244+
expected_unbinned = pl.DataFrame([
245+
["S3.1", "sample_1", "AAB", 5, 10, "Root; d__Bacteria; p__Planctomycetota", None],
246+
], schema=APPRAISE_COLUMNS)
247+
248+
observed_binned, observed_unbinned = processing(query, pipe, TAXA_OF_INTEREST="p__Planctomycetota")
249+
self.assertDataFrameEqual(expected_binned, observed_binned)
250+
self.assertDataFrameEqual(expected_unbinned, observed_unbinned)
251+
227252

228253
if __name__ == '__main__':
229254
unittest.main()

0 commit comments

Comments
 (0)