Skip to content

Commit 5e46038

Browse files
Merge pull request #52 from KCCG/release/1.1.0
Release/1.1.0
2 parents cb2394e + 02566c7 commit 5e46038

37 files changed

+357
-39
lines changed

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ FROM python:3.10.6-slim
33

44
ARG TAG
55

6-
# Install freebayes dependencies
6+
# Install dependencies
77
RUN apt-get update -yqq && \
88
apt-get install -yqq \
99
make build-essential libssl-dev zlib1g-dev libbz2-dev \
1010
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
11-
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix \
11+
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix vcfanno \
1212
freebayes && \
1313
apt-get clean
1414

Dockerfile.dev

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ FROM python:3.10.6-slim
33

44
ARG TAG
55

6-
# Install freebayes dependencies
6+
# Install dependencies
77
RUN apt-get update -yqq && \
88
apt-get install -yqq \
99
make build-essential libssl-dev zlib1g-dev libbz2-dev \
1010
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
11-
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix \
11+
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix vcfanno \
1212
freebayes && \
1313
apt-get clean
1414

395 Bytes
Binary file not shown.
Binary file not shown.
70.3 KB
Binary file not shown.
109 Bytes
Binary file not shown.
47.3 KB
Binary file not shown.
126 Bytes
Binary file not shown.
24 KB
Binary file not shown.
125 Bytes
Binary file not shown.
84.5 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.

mitylib/call.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Mitochondrial variant calling."""
2+
23
import subprocess
34
import logging
45
import os.path
@@ -78,7 +79,7 @@ def run(self):
7879
else:
7980
logger.setLevel(logging.INFO)
8081

81-
if (self.bam_list):
82+
if self.bam_list:
8283
self.get_files_from_list()
8384
self.run_checks()
8485
self.set_strings()
@@ -228,7 +229,8 @@ def run_checks(self):
228229

229230
def bam_has_rg(self, bam):
230231
"""
231-
Check whether a BAM or CRAM file contains a valid @RG header, which is critical for accurate variant calling with mity.
232+
Check whether a BAM or CRAM file contains a valid @RG header,
233+
which is critical for accurate variant calling with mity.
232234
233235
Parameters:
234236
- bam (str): Path to a BAM or CRAM file.
@@ -275,7 +277,8 @@ def make_prefix(self, file_name: str, prefix: str = None):
275277
276278
Parameters:
277279
file_name (str): The filename, including extensions (e.g., .vcf, .bam, .cram, .bed).
278-
prefix (str, optional): An optional custom prefix. If None, the function generates a prefix from the file name.
280+
prefix (str, optional): An optional custom prefix. If None, the function generates a
281+
prefix from the file name.
279282
280283
Returns:
281284
str: The generated or custom prefix for the Mity function.
@@ -291,24 +294,25 @@ def make_prefix(self, file_name: str, prefix: str = None):
291294
)
292295
else:
293296
raise ValueError("Unsupported file type")
294-
297+
295298
def get_files_from_list(self, die: bool = True):
296299
"""
297300
Get the list of BAM / CRAM files from the provided file
298301
"""
299302
if len(self.files) > 1:
300-
raise ValueError("--bam-file-list Argument expects only 1 file to be provided.")
301-
with open(self.files[0], 'r') as f:
303+
raise ValueError(
304+
"--bam-file-list Argument expects only 1 file to be provided."
305+
)
306+
with open(self.files[0], "r") as f:
302307
self.files = f.read().splitlines()
303308

304-
305309
def check_missing_file(self, file_list: str, die: bool = True):
306310
"""
307311
Check if input files exist.
308312
"""
309313
missing_files = []
310314
for item in file_list:
311-
if item.lower().startswith('http'):
315+
if item.lower().startswith("http"):
312316
try:
313317
urllib.request.urlopen(item).getcode()
314318
except:

mitylib/commands.py

+219
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
Authors: Clare Puttick, Mark Cowley, Trent Zeng, Christian Fares
88
License: MIT
99
"""
10+
1011
import argparse
1112
import logging
13+
import os
1214

1315
from mitylib import call, normalise, report, merge, util
1416
from ._version import __version__
@@ -247,10 +249,13 @@ def _cmd_report(args):
247249
report.Report(
248250
debug=args.debug,
249251
vcfs=args.vcf,
252+
contig=args.contig,
250253
prefix=args.prefix,
251254
min_vaf=args.min_vaf,
252255
output_dir=args.output_dir,
253256
keep=args.keep,
257+
vcfanno_config=args.vcfanno_config,
258+
report_config=args.report_config,
254259
)
255260

256261

@@ -287,6 +292,25 @@ def _cmd_report(args):
287292
required=False,
288293
help="Keep all intermediate files",
289294
)
295+
P_report.add_argument(
296+
"--contig",
297+
choices=["MT", "chrM"],
298+
default="MT",
299+
required=False,
300+
help="Contig used for annotation purposes",
301+
)
302+
P_report.add_argument(
303+
"--custom-vcfanno-config",
304+
action="store",
305+
help="Provide a custom vcfanno-config.toml for custom annotations.",
306+
dest="vcfanno_config",
307+
)
308+
P_report.add_argument(
309+
"--custom-report-config",
310+
action="store",
311+
help="Provide a custom report-config.yaml for custom report generation.",
312+
dest="report_config",
313+
)
290314
P_report.set_defaults(func=_cmd_report)
291315

292316

@@ -349,6 +373,201 @@ def _cmd_merge(args):
349373
)
350374
P_merge.set_defaults(func=_cmd_merge)
351375

376+
377+
# runall -----------------------------------------------------------------------
378+
379+
380+
def _cmd_runall(args):
381+
"""Run MITY call, normalise and report all in one go."""
382+
logging.info("mity %s", __version__)
383+
logging.info("mity runall")
384+
385+
genome = util.MityUtil.select_reference_genome(args.reference, None)
386+
args.reference = util.MityUtil.select_reference_fasta(args.reference, None)
387+
388+
call.Call(
389+
debug=args.debug,
390+
files=args.files,
391+
reference=args.reference,
392+
genome=genome,
393+
prefix=args.prefix,
394+
min_mq=args.min_mq,
395+
min_bq=args.min_bq,
396+
min_af=args.min_af,
397+
min_ac=args.min_ac,
398+
p=args.p,
399+
# normalise flag is set to true instead of running normalise separately
400+
normalise=True,
401+
output_dir=args.output_dir,
402+
region=args.region,
403+
bam_list=args.bam_file_list,
404+
keep=args.keep,
405+
)
406+
407+
logging.debug("mity call and normalise completed")
408+
409+
# This makes use of the uniform naming scheme of mity command outputs.
410+
normalised_vcf_path = os.path.join(
411+
args.output_dir, args.prefix + ".normalise.vcf.gz"
412+
)
413+
414+
logging.debug("assumed mity normalise vcf output path is: %s", normalised_vcf_path)
415+
416+
# matching argparse quirk
417+
normalised_vcf_path = [normalised_vcf_path]
418+
419+
report.Report(
420+
debug=args.debug,
421+
vcfs=normalised_vcf_path,
422+
contig=args.contig,
423+
prefix=args.prefix,
424+
min_vaf=args.min_vaf,
425+
output_dir=args.output_dir,
426+
keep=args.keep,
427+
vcfanno_config=args.vcfanno_config,
428+
report_config=args.report_config,
429+
)
430+
431+
432+
P_runall = AP_subparsers.add_parser("runall", help=_cmd_runall.__doc__)
433+
P_runall.add_argument(
434+
"-d", "--debug", action="store_true", help="Enter debug mode", required=False
435+
)
436+
P_runall.add_argument(
437+
"files",
438+
action="append",
439+
nargs="+",
440+
help="BAM / CRAM files to run the analysis on. If --bam-file-list is included, this argument is the file containing the list of bam/cram files.",
441+
)
442+
P_runall.add_argument(
443+
"--reference",
444+
choices=["hs37d5", "hg19", "hg38", "mm10"],
445+
default="hs37d5",
446+
required=False,
447+
help="Reference genome version to use. Default: hs37d5",
448+
)
449+
# For the runall command, we mandate that the prefix option is set. This is not
450+
# true for regular mity call, normalise or report separately.
451+
P_runall.add_argument(
452+
"--prefix",
453+
action="store",
454+
required=True,
455+
help="Output files will be named with PREFIX",
456+
)
457+
P_runall.add_argument(
458+
"--min-mapping-quality",
459+
action="store",
460+
type=int,
461+
default=30,
462+
help="Exclude alignments from analysis if they have a "
463+
"mapping quality less than MIN_MAPPING_QUALITY. "
464+
"Default: 30",
465+
dest="min_mq",
466+
)
467+
P_runall.add_argument(
468+
"--min-base-quality",
469+
action="store",
470+
type=int,
471+
default=24,
472+
help="Exclude alleles from analysis if their supporting "
473+
"base quality is less than MIN_BASE_QUALITY. "
474+
"Default: 24",
475+
dest="min_bq",
476+
)
477+
P_runall.add_argument(
478+
"--min-alternate-fraction",
479+
action="store",
480+
type=float,
481+
default=0.01,
482+
help="Require at least MIN_ALTERNATE_FRACTION "
483+
"observations supporting an alternate allele within "
484+
"a single individual in the in order to evaluate the "
485+
"position. Default: 0.01, range = [0,1]",
486+
dest="min_af",
487+
)
488+
P_runall.add_argument(
489+
"--min-alternate-count",
490+
action="store",
491+
type=int,
492+
default=4,
493+
help="Require at least MIN_ALTERNATE_COUNT observations "
494+
"supporting an alternate allele within a single "
495+
"individual in order to evaluate the position. "
496+
"Default: 4",
497+
dest="min_ac",
498+
)
499+
P_runall.add_argument(
500+
"--p",
501+
action="store",
502+
type=float,
503+
default=0.002,
504+
help="Minimum noise level. This is used to calculate QUAL score. "
505+
"Default: 0.002, range = [0,1]",
506+
dest="p",
507+
)
508+
P_runall.add_argument(
509+
"--output-dir",
510+
action="store",
511+
type=str,
512+
default=".",
513+
help="Output files will be saved in OUTPUT_DIR. " "Default: '.' ",
514+
dest="output_dir",
515+
)
516+
P_runall.add_argument(
517+
"--region",
518+
action="store",
519+
type=str,
520+
default=None,
521+
help="Region of MT genome to call variants in. "
522+
"If unset will call variants in entire MT genome as specified in BAM header. "
523+
"Default: Entire MT genome. ",
524+
dest="region",
525+
)
526+
P_runall.add_argument(
527+
"--bam-file-list",
528+
action="store_true",
529+
default=False,
530+
help="Treat the file as a text file of BAM files to be processed."
531+
" The path to each file should be on one row per bam file.",
532+
dest="bam_file_list",
533+
)
534+
P_runall.add_argument(
535+
"-k",
536+
"--keep",
537+
action="store_true",
538+
required=False,
539+
help="Keep all intermediate files",
540+
)
541+
P_runall.add_argument(
542+
"--min_vaf",
543+
action="store",
544+
type=float,
545+
default=0,
546+
help="A variant must have at least this VAF to be included in the report. Default: "
547+
"0.",
548+
)
549+
P_runall.add_argument(
550+
"--contig",
551+
choices=["MT", "chrM"],
552+
default="MT",
553+
required=False,
554+
help="Contig used for annotation purposes",
555+
)
556+
P_runall.add_argument(
557+
"--custom-vcfanno-config",
558+
action="store",
559+
help="Provide a custom vcfanno-config.toml for custom annotations.",
560+
dest="vcfanno_config",
561+
)
562+
P_runall.add_argument(
563+
"--custom-report-config",
564+
action="store",
565+
help="Provide a custom report-config.yaml for custom report generation.",
566+
dest="report_config",
567+
)
568+
P_runall.set_defaults(func=_cmd_runall)
569+
570+
352571
# version ----------------------------------------------------------------------
353572

354573

File renamed without changes.
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
[[annotation]]
2+
file="annot_chrm/chrm_anticodon_positions.bed.gz"
3+
columns=[4]
4+
names=["anticodon"]
5+
ops=["self"]
6+
7+
[[annotation]]
8+
file="annot_chrm/chrm_gtf_annotations.bed.gz"
9+
columns=[4,5]
10+
names=["GENE","GENE_BIOTYPE"]
11+
ops=["self", "self"]
12+
13+
[[annotation]]
14+
file="annot_chrm/chrm_haplotype_data.vcf.gz"
15+
fields=["phylotree_mut","phylotree_haplotype"]
16+
ops=["self", "self"]
17+
18+
[[annotation]]
19+
file="annot_chrm/chrm_mgrb_variants.vcf.gz"
20+
fields = ["MGRB_FILTER","MGRB_AN","MGRB_AC","MGRB_frequency"]
21+
ops = ["self","self","self","self"]
22+
23+
[[annotation]]
24+
file="annot_chrm/chrm_mito_dna_func_loc.bed.gz"
25+
columns=[4,7]
26+
names=["Map_Locus", "Description"]
27+
ops=["self", "self"]
28+
29+
[[annotation]]
30+
file="annot_chrm/chrm_mitomap_panel_annotations.vcf.gz"
31+
fields = ["locus_mitomap","num_references_mitomap","variant_amino_acid_change_mitomap","codon_position_mitomap","codon_number_mitomap","disease_mitomap","num_disease_references_mitomap","RNA_mitomap","homoplasmy_mitomap","heteroplasmy_mitomap","status_mitomap","disease_amino_acid_change_mitomap","GenBank_frequency_mitomap","commercial_panels"]
32+
ops=["self","self","self","self","self","self","self","self","self","self","self","self","self","self"]
33+
34+
[[annotation]]
35+
file="annot_chrm/chrm_mitotip_score_fixed_del.vcf.gz"
36+
fields=["MitoTip_score","MitoTip_percentile","MitoTip_interpretation"]
37+
ops=["self","self","self"]

0 commit comments

Comments
 (0)