Skip to content

Commit

Permalink
[refs #110] Init commit for Picard tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
mark-welsh committed Sep 12, 2019
1 parent 0386ef8 commit 574b6a2
Show file tree
Hide file tree
Showing 5 changed files with 373 additions and 0 deletions.
61 changes: 61 additions & 0 deletions picard/CalcReadGroupChecksum.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version 1.0

task CalcReadGroupChecksum {
input {
File ? java
File picard

File input_bam
File input_bam_idx
String filename_prefix

String ? userString

Array[String] modules = []
Float memory = 16
Int cpu = 1
}

Int jvm_memory = round(memory)
String output_filename = filename_prefix + ".read_group_md5"

command {
set -Eeuxo pipefail;

for MODULE in ~{sep=' ' modules}; do
module load $MODULE
done;

~{default="java" java} \
-Xmx~{jvm_memory}g \
-jar ~{default="picard" picard} CalculateReadGroupChecksum \
INPUT=~{input_bam} \
~{userString} \
OUTPUT=~{output_filename};
}

output {
File md5_file = "~{output_filename}"
}

runtime {
memory: jvm_memory * 1.5 + " GB"
cpu: cpu
}

parameter_meta {
java: "Path to Java."
picard: "Picard jar file."
input_bam: "BAM file to generate checksum for"
filename_prefix: "prefix for output files."
memory: "GB of RAM to use at runtime."
cpu: "Number of CPUs to use at runtime."
}

meta {
author: "Mark Welsh"
email: "[email protected]"
picard_version: "2.19.0"
version: "0.1.0"
}
}
91 changes: 91 additions & 0 deletions picard/CollectMultipleMetrics.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
version 1.0

task CollectMultipleMetrics {
input {
File ? java
File picard
Array[String] programs = ["CollectAlignmentSummaryMetrics",
"CollectInsertSizeMetrics",
"CollectSequencingArtifactMetrics",
"CollectGcBiasMetrics",
"QualityScoreDistribution"]

File input_bam
String filename_prefix

File reference
File reference_idx
File reference_dict

String validation_stringency = "SILENT"
String ? sort_order
String ? userString

Array[String] modules = []
Float memory = 16
Int cpu = 1
}

Int jvm_memory = round(memory)

command {
set -Eeuxo pipefail;

for MODULE in ~{sep=' ' modules}; do
module load $MODULE
done;

~{default="java" java} \
-Xmx~{jvm_memory}g \
-jar ~{default="picard" picard} CollectMultipleMetrics \
~{sep=" " prefix("PROGRAM=", programs)} \
~{userString} \
VALIDATION_STRINGENCY=~{validation_stringency} \
REFERENCE_SEQUENCE=~{reference} \
INPUT=~{input_bam} \
~{"ASSUME_SORT_ORDER=" + sort_order} \
OUTPUT=~{filename_prefix};
}

output {
File ? alignment_summary_metrics = "~{filename_prefix}" + ".alignment_summary_metrics"
File ? bait_bias_detail_metrics = "~{filename_prefix}" + ".bait_bias_detail_metrics"
File ? bait_bias_summary_metrics = "~{filename_prefix}" + ".bait_bias_summary_metrics"
File ? base_distribution_metrics = "~{filename_prefix}" + ".base_distribution_by_cycle_metrics"
File ? error_summary_metrics = "~{filename_prefix}" + ".error_summary_metrics"
File ? gc_bias_detail_metrics = "~{filename_prefix}" + ".gc_bias.detail_metrics"
File ? gc_bias_summary_metrics = "~{filename_prefix}" + ".gc_bias.summary_metrics"
File ? insert_size_metrics = "~{filename_prefix}" + ".insert_size_metrics"
File ? pre_adapter_detail_metrics = "~{filename_prefix}" + ".pre_adapter_detail_metrics"
File ? pre_adapter_summary_metrics = "~{filename_prefix}" + ".pre_adapter_summary_metrics"
File ? quality_by_cycle_metrics = "~{filename_prefix}" + ".quality_by_cycle_metrics"
File ? quality_distribution_metrics = "~{filename_prefix}" + ".quality_distribution_metrics"
}

runtime {
memory: jvm_memory * 1.5 + " GB"
cpu: cpu
}

parameter_meta {
java: "Path to Java."
picard: "Picard jar file."
programs: "List of strings, one string for each metrics tool to run."
reference: "Reference sequence file."
reference_idx: "Reference sequence index (.fai)."
reference_dict: "Reference sequence dictionary (.dict)."
filename_prefix: "prefix for output files."
input_bam: "Sorted BAM file."
validation_stringency: "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."
userString: "An optional parameter which allows the user to specify additions to the command line at run time."
memory: "GB of RAM to use at runtime."
cpu: "Number of CPUs to use at runtime."
}

meta {
author: "Mark Welsh"
email: "[email protected]"
picard_version: "2.19.0"
version: "0.1.0"
}
}
63 changes: 63 additions & 0 deletions picard/CollectQualityYieldMetrics.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
version 1.0

task CollectQualityYieldMetrics {
input {
File ? java
File picard

File input_bam
String filename_prefix

String use_original_qualities = "true"
String ? userString

Array[String] modules = []
Float memory = 16
Int cpu = 1
}

Int jvm_memory = round(memory)
String output_filename = filename_prefix + ".unmapped.quality_yield_metrics"

command {
set -Eeuxo pipefail;

for MODULE in ~{sep=' ' modules}; do
module load $MODULE
done;

~{default="java" java} \
-Xmx~{jvm_memory}g \
-jar ~{default="picard" picard} CollectQualityYieldMetrics \
~{userString} \
USE_ORIGINAL_QUALITIES=~{use_original_qualities} \
INPUT=~{input_bam} \
OUTPUT=~{filename_prefix};
}

output {
File quality_yield_metrics = "~{output_filename}"
}

runtime {
memory: jvm_memory * 1.5 + " GB"
cpu: cpu
}

parameter_meta {
java: "Path to Java."
picard: "Picard jar file."
filename_prefix: "prefix for output files."
input_bam: "Sorted BAM file."
userString: "An optional parameter which allows the user to specify additions to the command line at run time."
memory: "GB of RAM to use at runtime."
cpu: "Number of CPUs to use at runtime."
}

meta {
author: "Mark Welsh"
email: "[email protected]"
picard_version: "2.19.0"
version: "0.1.0"
}
}
80 changes: 80 additions & 0 deletions picard/CollectVariantCallingMetrics.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
version 1.0

task CollectVariantCallingMetrics {
input {
File ? java
File picard

File input_gvcf
File input_gvcf_idx
String filename_prefix

Array[File] intervals

File reference
File reference_idx
File reference_dict
File dbsnp
File dbsnp_idx

String ? userString

Array[String] modules = []
Float memory = 16
Int cpu = 16
}

Int jvm_memory = round(memory)
String output_filename = filename_prefix + ".variant_calling_metrics.txt"

command {
set -Eeuxo pipefail;

for MODULE in ~{sep=' ' modules}; do
module load $MODULE
done;

~{default="java" java} \
-Xmx~{jvm_memory}g \
-jar ~{default="picard" picard} CollectVariantCallingMetrics \
INPUT=~{input_gvcf} \
OUTPUT=~{filename_prefix} \
SEQUENCE_DICTIONARY=~{reference_dict} \
DBSNP=~{dbsnp} \
THREAD_COUNT=~{cpu} \
~{userString} \
GVCF_INPUT=true \
INTERVALS=~{sep=" " intervals}
}

output {
File gvcf_metrics = "~{output_filename}"
}

runtime {
memory: jvm_memory * 1.5 + " GB"
cpu: cpu
}

parameter_meta {
java: "Path to Java."
picard: "Picard jar file."
reference: "Reference sequence file."
reference_idx: "Reference sequence index (.fai)."
reference_dict: "Reference sequence dictionary (.dict)."
filename_prefix: "prefix for output files."
dbsnp: "dbSNP VCF file. This is required by this Picard command."
intervals: "An interval list file that contains the locations of the targets. Default value: null. This option must be specified at least 1 times."
userString: "An optional parameter which allows the user to specify additions to the command line at run time."
memory: "GB of RAM to use at runtime."
cpu: "Number of CPUs to use at runtime."
}

meta {
author: "Mark Welsh"
email: "[email protected]"
picard_version: "2.19.0"
version: "0.1.0"
}
}

78 changes: 78 additions & 0 deletions picard/CollectWgsMetrics.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
version 1.0

task CollectWgsMetrics {
input {
File ? java
File picard

File input_bam
File input_bam_idx
String filename_prefix

Array[File] intervals

File reference
File reference_idx
File reference_dict

String validation_stringency = "SILENT"
String userString = "USE_FAST_ALGORITHM=true"

Array[String] modules = []
Float memory = 16
Int cpu = 1
}

Int jvm_memory = round(memory)
String output_filename = filename_prefix + ".wgs_metrics.txt"

command {
set -Eeuxo pipefail;

for MODULE in ~{sep=' ' modules}; do
module load $MODULE
done;

~{default="java" java} \
-Xmx~{jvm_memory}g \
-jar ~{default="picard" picard} CollectWgsMetrics \
INPUT=~{input_bam} \
OUTPUT=~{output_filename} \
REFERENCE_SEQUENCE=~{reference} \
VALIDATION_STRINGENCY=~{validation_stringency} \
~{userString} \
INTERVALS=~{sep=" " intervals}
}

output {
File wgs_metrics = "~{output_filename}"
}

runtime {
memory: jvm_memory * 1.5 + " GB"
cpu: cpu
}

parameter_meta {
java: "Path to Java."
picard: "Picard jar file."
reference: "Reference sequence file."
reference_idx: "Reference sequence index (.fai)."
reference_dict: "Reference sequence dictionary (.dict)."
filename_prefix: "prefix for output files."
input_bam: "Sorted BAM file."
input_bam_idx: "Sorted BAM index file."
validation_stringency: "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."
intervals: "Interval list files to operate over."
userString: "An optional parameter which allows the user to specify additions to the command line at run time."
memory: "GB of RAM to use at runtime."
cpu: "Number of CPUs to use at runtime."
}

meta {
author: "Mark Welsh"
email: "[email protected]"
picard_version: "2.19.0"
version: "0.1.0"
}
}

0 comments on commit 574b6a2

Please sign in to comment.