diff --git a/bio/gatk/validatevariants/environment.yaml b/bio/gatk/validatevariants/environment.yaml new file mode 100644 index 00000000000..9559e675cbb --- /dev/null +++ b/bio/gatk/validatevariants/environment.yaml @@ -0,0 +1,7 @@ +channels: + - bioconda + - conda-forge + - defaults +dependencies: + - gatk4 ==4.1.4.1 + - snakemake-wrapper-utils ==0.1.3 \ No newline at end of file diff --git a/bio/gatk/validatevariants/meta.yaml b/bio/gatk/validatevariants/meta.yaml new file mode 100644 index 00000000000..d98eee89dca --- /dev/null +++ b/bio/gatk/validatevariants/meta.yaml @@ -0,0 +1,12 @@ +name: GATK ValidateVariants +description: Interleave two paired-end FASTA/Q files +url: https://gatk.broadinstitute.org/hc/en-us/articles/360037057272-ValidateVariants +authors: + - Graeme Ford +input: + vcf: VCF file to be validated +output: + - VCF output file +params: + extra: any extra commands as a string +notes: Multiple threads can be used during compression of the output file with ``pigz``. diff --git a/bio/gatk/validatevariants/test/Snakefile b/bio/gatk/validatevariants/test/Snakefile new file mode 100644 index 00000000000..e5ddd640ceb --- /dev/null +++ b/bio/gatk/validatevariants/test/Snakefile @@ -0,0 +1,13 @@ +rule vcf_spec_validation: + input: + vcf="sample.vcf", + output: + "results/sample_VALID.vcf", + log: + "results/sample_VALID.log", + params: + R="genome.fasta", + resources: + mem_mb=1024, + wrapper: + "master/bio/gatk/validatevariants" diff --git a/bio/gatk/validatevariants/test/genome.dict b/bio/gatk/validatevariants/test/genome.dict new file mode 100644 index 00000000000..128d1d54cb3 --- /dev/null +++ b/bio/gatk/validatevariants/test/genome.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 +@SQ SN:ref LN:45 M5:7a66cae8ab14aef8d635bc80649e730b UR:file:/home/johannes/scms/snakemake-wrappers/bio/picard/createsequencedictionary/test/genome.fasta +@SQ SN:ref2 LN:40 M5:1636753510ec27476fdd109a6684680e UR:file:/home/johannes/scms/snakemake-wrappers/bio/picard/createsequencedictionary/test/genome.fasta diff --git a/bio/gatk/validatevariants/test/genome.fasta b/bio/gatk/validatevariants/test/genome.fasta new file mode 100644 index 00000000000..afe990a63bc --- /dev/null +++ b/bio/gatk/validatevariants/test/genome.fasta @@ -0,0 +1,4 @@ +>ref +AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT +>ref2 +aggttttataaaacaattaagtctacagagcaactacgcg diff --git a/bio/gatk/validatevariants/test/genome.fasta.fai b/bio/gatk/validatevariants/test/genome.fasta.fai new file mode 100644 index 00000000000..3daa621e608 --- /dev/null +++ b/bio/gatk/validatevariants/test/genome.fasta.fai @@ -0,0 +1,2 @@ +ref 45 5 45 46 +ref2 40 57 40 41 diff --git a/bio/gatk/validatevariants/test/sample.vcf b/bio/gatk/validatevariants/test/sample.vcf new file mode 100644 index 00000000000..f45893d467d --- /dev/null +++ b/bio/gatk/validatevariants/test/sample.vcf @@ -0,0 +1,18 @@ +##fileformat=VCFv4.0 +##fileDate=20170110 +##source=pindel +##reference=hg38 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a +ref 8 . A G . PASS END=140434574;HOMLEN=5;HOMSEQ=AAAAA;SVLEN=35;SVTYPE=INS GT:AD 0/0:317,6 +ref2 10 . A AGTTA . PASS END=55238278;HOMLEN=7;HOMSEQ=CTGCCAC;SVLEN=-51;SVTYPE=DEL GT:AD 0/0:40789,1734 diff --git a/bio/gatk/validatevariants/wrapper.py b/bio/gatk/validatevariants/wrapper.py new file mode 100644 index 00000000000..b0010ce420e --- /dev/null +++ b/bio/gatk/validatevariants/wrapper.py @@ -0,0 +1,25 @@ +__author__ = "Graeme Ford" +__copyright__ = "Copyright 2021, Graeme Ford" +__email__ = "graeme.ford@tuks.co.za" +__license__ = "MIT" + +from snakemake.shell import shell +from snakemake_wrapper_utils.java import get_java_opts + +extra = snakemake.params.get("extra", "") +java_opts = get_java_opts(snakemake) + +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +extra = snakemake.params.get("extra", "") + + +shell( + "gatk " + "--java-options '{java_opts}' " + "ValidateVariants " + "-V {snakemake.input.vcf} " + "{extra} " + "{log} " + "> {output}" +)