From 24ab69c60931154daa41b32c557cbc0df411b735 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Thu, 15 Feb 2024 14:58:21 -0500 Subject: [PATCH 1/8] updating objects --- .../metaworkflows/Hi-C_alignment_GRCh38.yaml | 7 + .../Illumina_alignment_GRCh38.yaml | 741 +++++++++++++++++- .../metaworkflows/ONT_alignment_GRCh38.yaml | 506 +++++++++++- .../PacBio_alignment_GRCh38.yaml | 506 +++++++++++- .../hifi_long_reads_alignment_GRCh38.yaml | 9 + ...ifi_long_reads_alignment_merge_GRCh38.yaml | 12 +- ...nt_long_reads_alignment_merge_GRCh38.yaml} | 14 +- ...ired-end_short_reads_alignment_GRCh38.yaml | 12 +- ...rt_reads_alignment_distributed_GRCh38.yaml | 18 +- ...ling_TNhaplotyper2_distributed_GRCh38.yaml | 22 +- ...plotyper2_distributed_TNfilter_GRCh38.yaml | 32 +- 11 files changed, 1832 insertions(+), 47 deletions(-) rename portal_objects/metaworkflows/{ont_long_reads_alignment_GRCh38.yaml => ont_long_reads_alignment_merge_GRCh38.yaml} (93%) diff --git a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml index 5fa192f..02ee214 100644 --- a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml @@ -88,6 +88,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -154,6 +155,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_BWA-MEM_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml index c55ffa0..a1f7100 100644 --- a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml @@ -1,7 +1,7 @@ ## Pipeline information ##################################### # General information for the pipeline ############################################################# -name: paired-end_short_reads_alignment_distributed_GRCh38 +name: Illumina_alignment_GRCh38 description: Pipeline to align paired-end short reads data for a single sample. | Run Sentieon BWA-MEM for alignment, incorporate read groups, | mark duplicate reads, and recalibrate base and indel scores. | @@ -65,16 +65,129 @@ input: library_id: argument_type: parameter.string + length_required: + # Reads shorter than length_required will be discarded + argument_type: parameter.integer + shards_index: # These indexes need to match the number of shards in shards_file argument_type: parameter.array value: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"] + read_length: + argument_type: parameter.integer + value: 150 + ## Workflows and dependencies ############################### # Information for the workflows and their dependencies ############################################################# workflows: +## Pre-processing ########################################### +# Steps to pre-process input files +# for alignment +############################################################# + + ## Workflow definition ##################### + # fastp_paired-end + ############################################ + fastp_paired-end: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_r1_fastq_gz: + argument_type: file.fastq_gz + source_argument_name: input_files_r1_fastq_gz + scatter: 1 + + input_file_r2_fastq_gz: + argument_type: file.fastq_gz + source_argument_name: input_files_r2_fastq_gz + scatter: 1 + + # Parameter argument + trim_poly_g: + # Force polyG tail trimming + argument_type: parameter.boolean + value: True + + disable_quality_filtering: + # If this option is specified, quality filtering is disabled + argument_type: parameter.boolean + value: True + + disable_adapter_trimming: + # If this option is specified, adapter trimming is disabled + argument_type: parameter.boolean + value: True + + length_required: + argument_type: parameter.integer + + nthreads: + argument_type: parameter.integer + value: 16 + + ## Output ########################## + # Output files for the workflow + #################################### + output: + + # File output + output_file_r1_fastq_gz: + description: fastp pre-processed FASTQ + read_pair_number: "R1" + data_category: + - Sequencing Reads + data_type: + - Unaligned Reads + output_status: Final Output + + output_file_r2_fastq_gz: + description: fastp pre-processed FASTQ + read_pair_number: "R2" + data_category: + - Sequencing Reads + data_type: + - Unaligned Reads + output_status: Final Output + + output_file_json: + description: fastp summary JSON + data_category: + - Quality Control + data_type: + - Statistics + + output_file_failed_fastq_gz: + description: fastp failed reads FASTQ + data_category: + - Sequencing Reads + data_type: + - Unaligned Reads + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.2xlarge + - m5.2xlarge + - m6i.2xlarge + - m6a.2xlarge + ebs_size: "3x" + ebs_optimized: True + spot_instance: True + run_name: run_fastp_paired-end + behavior_on_capacity_limit: wait_and_retry + +## Alignment ################################################ +# Alignment and post-processing steps +# to generate the final output +############################################################# + ## Workflow definition ##################### # sentieon_bwa-mem_sort ############################################ @@ -91,13 +204,13 @@ workflows: # File argument input_file_r1_fastq_gz: argument_type: file.fastq_gz - source_argument_name: input_files_r1_fastq_gz - scatter: 1 + source: fastp_paired-end + source_argument_name: output_file_r1_fastq_gz input_file_r2_fastq_gz: argument_type: file.fastq_gz - source_argument_name: input_files_r2_fastq_gz - scatter: 1 + source: fastp_paired-end + source_argument_name: output_file_r2_fastq_gz genome_reference_fasta: argument_type: file.fa @@ -117,6 +230,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -176,6 +290,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -237,7 +352,8 @@ workflows: data_category: - Sequencing Reads data_type: - - Aligned Reads + - Statistics + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -313,6 +429,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -331,7 +448,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # shards_to_ReadGroups + # shards_to_ReadGroups ############################################ shards_to_ReadGroups: @@ -371,6 +488,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -426,6 +544,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -444,7 +563,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -480,6 +599,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_BWA-MEM_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### @@ -494,3 +619,603 @@ workflows: spot_instance: False run_name: run_samtools_merge behavior_on_capacity_limit: wait_and_retry + +## Quality Controls ######################################### +# Steps to calculate quality metrics for +# the final output +############################################################# + + ## Workflow definition ##################### + ############################################ + samtools_stats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools stats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_stats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_flagstat: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools flagstat output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_flagstat + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_idxstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools idxstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_idxstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectAlignmentSummaryMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectAlignmentSummaryMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectAlignmentSummaryMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectBaseDistributionByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectBaseDistributionByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectBaseDistributionByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectBaseDistributionByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectGcBiasMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectGcBiasMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_summary_txt: + description: picard CollectGcBiasMetrics summary TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectGcBiasMetrics output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectGcBiasMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectInsertSizeMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectInsertSizeMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_histogram_pdf: + description: picard CollectInsertSizeMetrics output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectInsertSizeMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectWgsMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + # Parameter argument + read_length: + argument_type: parameter.integer + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectWgsMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - r5.2xlarge + - r5n.2xlarge + - r5a.2xlarge + - r6i.2xlarge + - r6a.2xlarge + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: False + run_name: run_picard_CollectWgsMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_MeanQualityByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard MeanQualityByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard MeanQualityByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_MeanQualityByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + bamstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: bamstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_bamstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + # parse-qc_BAM_Quality_Metrics_paired-end + ############################################ + parse-qc_BAM_Quality_Metrics_paired-end: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + mount: True + # this file is actually not used by the code but is needed + # to specify where the quality metrics object need to be linked + + SAMTOOLS_stats_OUTPUT: + argument_type: file.txt + source: samtools_stats + source_argument_name: output_file_txt + + SAMTOOLS_flagstat_OUTPUT: + argument_type: file.txt + source: samtools_flagstat + source_argument_name: output_file_txt + + SAMTOOLS_idxstats_OUTPUT: + argument_type: file.txt + source: samtools_idxstats + source_argument_name: output_file_txt + + PICARD_CollectAlignmentSummaryMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectAlignmentSummaryMetrics + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_OUTPUT: + argument_type: file.txt + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_PDF: + argument_type: file.pdf + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_chart_pdf + + PICARD_CollectGcBiasMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_file_txt + + PICARD_CollectGcBiasMetrics_SUMMARY: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_summary_txt + + PICARD_CollectGcBiasMetrics_PDF: + argument_type: file.pdf + source: picard_CollectGcBiasMetrics + source_argument_name: output_chart_pdf + + PICARD_CollectInsertSizeMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectInsertSizeMetrics + source_argument_name: output_file_txt + + PICARD_CollectInsertSizeMetrics_PDF: + argument_type: file.pdf + source: picard_CollectInsertSizeMetrics + source_argument_name: output_histogram_pdf + + PICARD_CollectWgsMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectWgsMetrics + source_argument_name: output_file_txt + + PICARD_MeanQualityByCycle_OUTPUT: + argument_type: file.txt + source: picard_MeanQualityByCycle + source_argument_name: output_file_txt + + PICARD_MeanQualityByCycle_PDF: + argument_type: file.pdf + source: picard_MeanQualityByCycle + source_argument_name: output_chart_pdf + + BAMSTATS_OUTPUT: + argument_type: file.txt + source: bamstats + source_argument_name: output_file_txt + + ## Output ########################## + #################################### + output: + + # File output + qc_values_json: + description: QC output JSON + data_category: + - Quality Control + data_type: + - Statistics + + metrics_zip: + description: QC compressed output + data_category: + - Quality Control + data_type: + - Statistics + - Image + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: 10 + ebs_optimized: True + spot_instance: True + run_name: run_parse-qc_BAM_Quality_Metrics_paired-end + behavior_on_capacity_limit: wait_and_retry diff --git a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml index 00469ae..5fa3d34 100644 --- a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml @@ -1,7 +1,7 @@ ## Pipeline information ##################################### # General information for the pipeline ############################################################# -name: ont_long_reads_alignment_GRCh38 +name: ONT_alignment_GRCh38 description: Run Sentieon minimap2 to align input FASTQ files from Oxford Nanopore Technology. | Link methyl tags and read groups from corresponding unaligned input BAM files. | Sort the alignment BAM files by coordinates. | @@ -46,6 +46,11 @@ input: ############################################################# workflows: +## Alignment ################################################ +# Alignment and post-processing steps +# to generate the final output +############################################################# + ## Workflow definition ##################### # sentieon_minimap2_sort ############################################ @@ -59,6 +64,7 @@ workflows: #################################### input: + # File argument input_file_fastq_gz: argument_type: file.fastq_gz source_argument_name: input_files_fastq_gz @@ -83,6 +89,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -143,6 +150,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -198,6 +206,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -212,7 +221,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -248,6 +257,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_minimap2_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### @@ -262,3 +277,490 @@ workflows: spot_instance: False run_name: run_samtools_merge behavior_on_capacity_limit: wait_and_retry + +## Quality Controls ######################################### +# Steps to calculate quality metrics for +# the final output +############################################################# + + ## Workflow definition ##################### + ############################################ + samtools_stats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools stats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_stats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_flagstat: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools flagstat output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_flagstat + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_idxstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools idxstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_idxstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectAlignmentSummaryMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectAlignmentSummaryMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectAlignmentSummaryMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectBaseDistributionByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectBaseDistributionByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectBaseDistributionByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectBaseDistributionByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectGcBiasMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectGcBiasMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_summary_txt: + description: picard CollectGcBiasMetrics summary TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectGcBiasMetrics output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectGcBiasMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_MeanQualityByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard MeanQualityByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard MeanQualityByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_MeanQualityByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + bamstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: bamstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_bamstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + # parse-qc_BAM_Quality_Metrics_single-end + ############################################ + parse-qc_BAM_Quality_Metrics_single-end: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + mount: True + # this file is actually not used by the code but is needed + # to specify where the quality metrics object need to be linked + + SAMTOOLS_stats_OUTPUT: + argument_type: file.txt + source: samtools_stats + source_argument_name: output_file_txt + + SAMTOOLS_flagstat_OUTPUT: + argument_type: file.txt + source: samtools_flagstat + source_argument_name: output_file_txt + + SAMTOOLS_idxstats_OUTPUT: + argument_type: file.txt + source: samtools_idxstats + source_argument_name: output_file_txt + + PICARD_CollectAlignmentSummaryMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectAlignmentSummaryMetrics + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_OUTPUT: + argument_type: file.txt + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_PDF: + argument_type: file.pdf + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_chart_pdf + + PICARD_CollectGcBiasMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_file_txt + + PICARD_CollectGcBiasMetrics_SUMMARY: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_summary_txt + + PICARD_CollectGcBiasMetrics_PDF: + argument_type: file.pdf + source: picard_CollectGcBiasMetrics + source_argument_name: output_chart_pdf + + PICARD_MeanQualityByCycle_OUTPUT: + argument_type: file.txt + source: picard_MeanQualityByCycle + source_argument_name: output_file_txt + + PICARD_MeanQualityByCycle_PDF: + argument_type: file.pdf + source: picard_MeanQualityByCycle + source_argument_name: output_chart_pdf + + BAMSTATS_OUTPUT: + argument_type: file.txt + source: bamstats + source_argument_name: output_file_txt + + ## Output ########################## + #################################### + output: + + # File output + qc_values_json: + description: QC output JSON + data_category: + - Quality Control + data_type: + - Statistics + + metrics_zip: + description: QC compressed output + data_category: + - Quality Control + data_type: + - Statistics + - Image + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: 10 + ebs_optimized: True + spot_instance: True + run_name: run_parse-qc_BAM_Quality_Metrics_single-end + behavior_on_capacity_limit: wait_and_retry diff --git a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml index 8e81671..e2f392e 100644 --- a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml @@ -1,7 +1,7 @@ ## Pipeline information ##################################### # General information for the pipeline ############################################################# -name: hifi_long_reads_alignment_merge_GRCh38 +name: PacBio_alignment_GRCh38 description: Run pbmm2 to align unaligned BAM files from PacBio HiFi. | Sort the alignment BAM files by coordinates. | Merge the aligned BAM files. | @@ -38,6 +38,11 @@ input: ############################################################# workflows: +## Alignment ################################################ +# Alignment and post-processing steps +# to generate the final output +############################################################# + ## Workflow definition ##################### # pbmm2 ############################################ @@ -51,6 +56,7 @@ workflows: #################################### input: + # File argument input_file_reads: argument_type: file.bam source_argument_name: input_files_bam @@ -59,6 +65,7 @@ workflows: genome_reference_fasta: argument_type: file.fa + # Parameter argument nthreads_sorting: argument_type: parameter.integer value: 4 @@ -79,6 +86,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -139,6 +147,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -153,7 +162,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -189,6 +198,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-pbmm2_1.13.0 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### @@ -203,3 +218,490 @@ workflows: spot_instance: False run_name: run_samtools_merge behavior_on_capacity_limit: wait_and_retry + +## Quality Controls ######################################### +# Steps to calculate quality metrics for +# the final output +############################################################# + + ## Workflow definition ##################### + ############################################ + samtools_stats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools stats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_stats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_flagstat: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools flagstat output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_flagstat + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_idxstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools idxstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_idxstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectAlignmentSummaryMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectAlignmentSummaryMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectAlignmentSummaryMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectBaseDistributionByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectBaseDistributionByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectBaseDistributionByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectBaseDistributionByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectGcBiasMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectGcBiasMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_summary_txt: + description: picard CollectGcBiasMetrics summary TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectGcBiasMetrics output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectGcBiasMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_MeanQualityByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard MeanQualityByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard MeanQualityByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_MeanQualityByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + bamstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: bamstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_bamstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + # parse-qc_BAM_Quality_Metrics_single-end + ############################################ + parse-qc_BAM_Quality_Metrics_single-end: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: samtools_merge + source_argument_name: output_file_bam + mount: True + # this file is actually not used by the code but is needed + # to specify where the quality metrics object need to be linked + + SAMTOOLS_stats_OUTPUT: + argument_type: file.txt + source: samtools_stats + source_argument_name: output_file_txt + + SAMTOOLS_flagstat_OUTPUT: + argument_type: file.txt + source: samtools_flagstat + source_argument_name: output_file_txt + + SAMTOOLS_idxstats_OUTPUT: + argument_type: file.txt + source: samtools_idxstats + source_argument_name: output_file_txt + + PICARD_CollectAlignmentSummaryMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectAlignmentSummaryMetrics + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_OUTPUT: + argument_type: file.txt + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_PDF: + argument_type: file.pdf + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_chart_pdf + + PICARD_CollectGcBiasMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_file_txt + + PICARD_CollectGcBiasMetrics_SUMMARY: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_summary_txt + + PICARD_CollectGcBiasMetrics_PDF: + argument_type: file.pdf + source: picard_CollectGcBiasMetrics + source_argument_name: output_chart_pdf + + PICARD_MeanQualityByCycle_OUTPUT: + argument_type: file.txt + source: picard_MeanQualityByCycle + source_argument_name: output_file_txt + + PICARD_MeanQualityByCycle_PDF: + argument_type: file.pdf + source: picard_MeanQualityByCycle + source_argument_name: output_chart_pdf + + BAMSTATS_OUTPUT: + argument_type: file.txt + source: bamstats + source_argument_name: output_file_txt + + ## Output ########################## + #################################### + output: + + # File output + qc_values_json: + description: QC output JSON + data_category: + - Quality Control + data_type: + - Statistics + + metrics_zip: + description: QC compressed output + data_category: + - Quality Control + data_type: + - Statistics + - Image + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: 10 + ebs_optimized: True + spot_instance: True + run_name: run_parse-qc_BAM_Quality_Metrics_single-end + behavior_on_capacity_limit: wait_and_retry diff --git a/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml b/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml index 72fbbcc..73e4061 100644 --- a/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml @@ -50,6 +50,7 @@ workflows: #################################### input: + # File argument input_file_reads: argument_type: file.bam source_argument_name: input_files_bam @@ -58,6 +59,7 @@ workflows: genome_reference_fasta: argument_type: file.fa + # Parameter argument nthreads_sorting: argument_type: parameter.integer value: 4 @@ -78,6 +80,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -138,6 +141,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-pbmm2_1.13.0 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml b/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml index 8e81671..d4fd630 100644 --- a/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml +++ b/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml @@ -51,6 +51,7 @@ workflows: #################################### input: + # File argument input_file_reads: argument_type: file.bam source_argument_name: input_files_bam @@ -59,6 +60,7 @@ workflows: genome_reference_fasta: argument_type: file.fa + # Parameter argument nthreads_sorting: argument_type: parameter.integer value: 4 @@ -79,6 +81,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -139,6 +142,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -153,7 +157,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -189,6 +193,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-pbmm2_1.13.0 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/ont_long_reads_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml similarity index 93% rename from portal_objects/metaworkflows/ont_long_reads_alignment_GRCh38.yaml rename to portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml index 00469ae..965c9eb 100644 --- a/portal_objects/metaworkflows/ont_long_reads_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml @@ -1,7 +1,7 @@ ## Pipeline information ##################################### # General information for the pipeline ############################################################# -name: ont_long_reads_alignment_GRCh38 +name: ont_long_reads_alignment_merge_GRCh38 description: Run Sentieon minimap2 to align input FASTQ files from Oxford Nanopore Technology. | Link methyl tags and read groups from corresponding unaligned input BAM files. | Sort the alignment BAM files by coordinates. | @@ -59,6 +59,7 @@ workflows: #################################### input: + # File argument input_file_fastq_gz: argument_type: file.fastq_gz source_argument_name: input_files_fastq_gz @@ -83,6 +84,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -143,6 +145,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -198,6 +201,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -212,7 +216,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -248,6 +252,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_minimap2_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml index 7602507..f29571f 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml @@ -100,6 +100,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -166,6 +167,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -180,7 +182,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -216,6 +218,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -262,6 +265,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -323,6 +327,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_BWA-MEM_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml index c55ffa0..3e2460a 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml @@ -117,6 +117,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -176,6 +177,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -237,7 +239,8 @@ workflows: data_category: - Sequencing Reads data_type: - - Aligned Reads + - Statistics + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -313,6 +316,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -331,7 +335,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # shards_to_ReadGroups + # shards_to_ReadGroups ############################################ shards_to_ReadGroups: @@ -371,6 +375,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -426,6 +431,7 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -444,7 +450,7 @@ workflows: behavior_on_capacity_limit: wait_and_retry ## Workflow definition ##################### - # samtools_merge + # samtools_merge ############################################ samtools_merge: @@ -480,6 +486,12 @@ workflows: - Sequencing Reads data_type: - Aligned Reads + output_status: Final Output + # These fields are required to link metadata for the naming + software: smaht:Software-Sentieon_BWA-MEM_202308.01 + alignment_details: + - Sorted + reference_genome: GRCh38 ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_GRCh38.yaml index 03b1a15..d1a4cd3 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_GRCh38.yaml @@ -100,14 +100,13 @@ workflows: # File output output_file_vcf_gz: - description: TNhaplotyper2 output VCF (shard) + description: sentieon TNhaplotyper2 output VCF (shard) data_category: - - Variant Calls - data_type: - Somatic Variant Calls - variant_type: - - Single Nucleotide Variant - - Insertion-deletion + data_type: + - SNV + - Indel + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -159,14 +158,13 @@ workflows: # File output output_file_vcf_gz: - description: TNhaplotyper2 output VCF + description: sentieon TNhaplotyper2 output VCF data_category: - - Variant Calls - data_type: - Somatic Variant Calls - variant_type: - - Single Nucleotide Variant - - Insertion-deletion + data_type: + - SNV + - Indel + output_status: Final Output ## EC2 Configuration to use ######## #################################### diff --git a/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_TNfilter_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_TNfilter_GRCh38.yaml index 65ce42b..df774a1 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_TNfilter_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_variant_calling_TNhaplotyper2_distributed_TNfilter_GRCh38.yaml @@ -101,28 +101,29 @@ workflows: # File output output_file_vcf_gz: - description: TNhaplotyper2 output VCF (shard) + description: sentieon TNhaplotyper2 output VCF (shard) data_category: - - Variant Calls - data_type: - Somatic Variant Calls - variant_type: - - Single Nucleotide Variant - - Insertion-deletion + data_type: + - SNV + - Indel + s3_lifecycle_category: no_storage output_file_priors: - description: OrientationBias output TXT (shard) + description: sentieon OrientationBias output TXT (shard) data_category: - - Variant Calls + - Somatic Variant Calls data_type: - Statistics + s3_lifecycle_category: no_storage output_file_contamination: - description: ContaminationModel output TXT (shard) + description: sentieon ContaminationModel output TXT (shard) data_category: - - Variant Calls + - Somatic Variant Calls data_type: - Statistics + s3_lifecycle_category: no_storage ## EC2 Configuration to use ######## #################################### @@ -193,14 +194,13 @@ workflows: # File output output_file_vcf_gz: - description: TNfilter output VCF + description: sentieon TNfilter output VCF data_category: - - Variant Calls - data_type: - Somatic Variant Calls - variant_type: - - Single Nucleotide Variant - - Insertion-deletion + data_type: + - SNV + - Indel + output_status: Final Output ## EC2 Configuration to use ######## #################################### From d64dafc0b0c518113ae23b0af3b04cc83c25cdd5 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Fri, 16 Feb 2024 11:06:44 -0500 Subject: [PATCH 2/8] . --- .../metaworkflows/Hi-C_alignment_GRCh38.yaml | 490 +++++++++++++++++- .../Illumina_alignment_GRCh38.yaml | 12 +- .../metaworkflows/ONT_alignment_GRCh38.yaml | 6 +- .../PacBio_alignment_GRCh38.yaml | 5 +- 4 files changed, 506 insertions(+), 7 deletions(-) diff --git a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml index 02ee214..9301771 100644 --- a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml @@ -2,8 +2,9 @@ # General information for the pipeline ############################################################# name: Hi-C_alignment_GRCh38 -description: Pipeline to align paired-end short reads Hi-C data for a single sample. | +description: Pipeline to align paired-end Illumina Hi-C data. | Run Sentieon BWA-MEM for alignment, and incorporate read groups. | + Implemented to run per single sample and library. | Build hg38/GRCh38 category: @@ -173,3 +174,490 @@ workflows: spot_instance: True run_name: run_AddReadGroups behavior_on_capacity_limit: wait_and_retry + +## Quality Controls ######################################### +# Steps to calculate quality metrics for +# the final output +############################################################# + + ## Workflow definition ##################### + ############################################ + samtools_stats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools stats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_stats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_flagstat: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + # Parameter argument + nthreads: + argument_type: parameter.integer + value: 2 + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools flagstat output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_flagstat + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + samtools_idxstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: samtools idxstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_samtools_idxstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectAlignmentSummaryMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectAlignmentSummaryMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectAlignmentSummaryMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectBaseDistributionByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectBaseDistributionByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectBaseDistributionByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectBaseDistributionByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_CollectGcBiasMetrics: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + genome_reference_fasta: + argument_type: file.fa + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard CollectGcBiasMetrics output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_summary_txt: + description: picard CollectGcBiasMetrics summary TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard CollectGcBiasMetrics output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_CollectGcBiasMetrics + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + picard_MeanQualityByCycle: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: picard MeanQualityByCycle output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + output_chart_pdf: + description: picard MeanQualityByCycle output PDF + data_category: + - Quality Control + data_type: + - Image + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_picard_MeanQualityByCycle + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + ############################################ + bamstats: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + + ## Output ########################## + #################################### + output: + + # File output + output_file_txt: + description: bamstats output TXT + data_category: + - Quality Control + data_type: + - Statistics + s3_lifecycle_category: no_storage + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - m5a.large + - m5.large + - m6i.large + - m6a.large + ebs_size: "1.1x" + ebs_optimized: True + spot_instance: True + run_name: run_bamstats + behavior_on_capacity_limit: wait_and_retry + + ## Workflow definition ##################### + # parse-qc_BAM_Quality_Metrics_single-end + ############################################ + parse-qc_BAM_Quality_Metrics_single-end: + + ## Specific arguments ############## + #################################### + input: + + # File argument + input_file_bam: + argument_type: file.bam + source: AddReadGroups + source_argument_name: output_file_bam + mount: True + # this file is actually not used by the code but is needed + # to specify where the quality metrics object need to be linked + + SAMTOOLS_stats_OUTPUT: + argument_type: file.txt + source: samtools_stats + source_argument_name: output_file_txt + + SAMTOOLS_flagstat_OUTPUT: + argument_type: file.txt + source: samtools_flagstat + source_argument_name: output_file_txt + + SAMTOOLS_idxstats_OUTPUT: + argument_type: file.txt + source: samtools_idxstats + source_argument_name: output_file_txt + + PICARD_CollectAlignmentSummaryMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectAlignmentSummaryMetrics + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_OUTPUT: + argument_type: file.txt + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_file_txt + + PICARD_CollectBaseDistributionByCycle_PDF: + argument_type: file.pdf + source: picard_CollectBaseDistributionByCycle + source_argument_name: output_chart_pdf + + PICARD_CollectGcBiasMetrics_OUTPUT: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_file_txt + + PICARD_CollectGcBiasMetrics_SUMMARY: + argument_type: file.txt + source: picard_CollectGcBiasMetrics + source_argument_name: output_summary_txt + + PICARD_CollectGcBiasMetrics_PDF: + argument_type: file.pdf + source: picard_CollectGcBiasMetrics + source_argument_name: output_chart_pdf + + PICARD_MeanQualityByCycle_OUTPUT: + argument_type: file.txt + source: picard_MeanQualityByCycle + source_argument_name: output_file_txt + + PICARD_MeanQualityByCycle_PDF: + argument_type: file.pdf + source: picard_MeanQualityByCycle + source_argument_name: output_chart_pdf + + BAMSTATS_OUTPUT: + argument_type: file.txt + source: bamstats + source_argument_name: output_file_txt + + ## Output ########################## + #################################### + output: + + # File output + qc_values_json: + description: QC output JSON + data_category: + - Quality Control + data_type: + - Statistics + + metrics_zip: + description: QC compressed output + data_category: + - Quality Control + data_type: + - Statistics + - Image + + ## EC2 Configuration to use ######## + #################################### + config: + instance_type: + - t3.small + ebs_size: 10 + ebs_optimized: True + spot_instance: True + run_name: run_parse-qc_BAM_Quality_Metrics_single-end + behavior_on_capacity_limit: wait_and_retry diff --git a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml index a1f7100..7b3842d 100644 --- a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml @@ -2,12 +2,16 @@ # General information for the pipeline ############################################################# name: Illumina_alignment_GRCh38 -description: Pipeline to align paired-end short reads data for a single sample. | +description: End-to-end alignment pipeline for paired-end Illumina data. | + Run fastp to pre-process input FASTQ files and remove polyG artifacts. | Run Sentieon BWA-MEM for alignment, incorporate read groups, | mark duplicate reads, and recalibrate base and indel scores. | - The output is an analysis-ready file in BAM format. | - Build hg38/GRCh38. | - Implemented to run in distributed mode and processing reads by lane + Sort the alignment BAM files by coordinates. | + Merge the aligned BAM files. | + Calculate quality metrics. | + Implemented to run in distributed mode and | + processing reads by lane, per single sample and library. | + Build hg38/GRCh38 category: - Alignment diff --git a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml index 5fa3d34..e530354 100644 --- a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml @@ -2,9 +2,13 @@ # General information for the pipeline ############################################################# name: ONT_alignment_GRCh38 -description: Run Sentieon minimap2 to align input FASTQ files from Oxford Nanopore Technology. | +description: End-to-end alignment pipeline for Oxford Nanopore Technology. | + Run Sentieon minimap2 to align input FASTQ files. | Link methyl tags and read groups from corresponding unaligned input BAM files. | Sort the alignment BAM files by coordinates. | + Merge the aligned BAM files. | + Calculate quality metrics. | + Implemented to run per single sample and library. | Build hg38/GRCh38 category: diff --git a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml index e2f392e..9a0c4e8 100644 --- a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml @@ -2,9 +2,12 @@ # General information for the pipeline ############################################################# name: PacBio_alignment_GRCh38 -description: Run pbmm2 to align unaligned BAM files from PacBio HiFi. | +description: End-to-end alignment pipeline for PacBio data. | + Run pbmm2 to align unaligned input BAM files. | Sort the alignment BAM files by coordinates. | Merge the aligned BAM files. | + Calculate quality metrics. | + Implemented to run per single sample and library. | Build hg38/GRCh38 category: From 14aa6c7854c2593b4a490ed502050aa300572896 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Fri, 23 Feb 2024 14:19:03 -0500 Subject: [PATCH 3/8] updated submodules --- alignment-pipelines | 2 +- qc-pipelines | 2 +- sentieon-pipelines | 2 +- shared-pipelines | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/alignment-pipelines b/alignment-pipelines index ff0fe5b..fa2ef75 160000 --- a/alignment-pipelines +++ b/alignment-pipelines @@ -1 +1 @@ -Subproject commit ff0fe5b3db2ac095979e8a72ee5601c57924ec4b +Subproject commit fa2ef75a91aed5b852681c609f2c0e96ce816eb1 diff --git a/qc-pipelines b/qc-pipelines index 0399490..94ddb5f 160000 --- a/qc-pipelines +++ b/qc-pipelines @@ -1 +1 @@ -Subproject commit 039949028b63fd89d709f9df7da70cc861a7796d +Subproject commit 94ddb5fedfb447566d05010701fa7ac2ecd70fb2 diff --git a/sentieon-pipelines b/sentieon-pipelines index 2b6dc6b..08c59ad 160000 --- a/sentieon-pipelines +++ b/sentieon-pipelines @@ -1 +1 @@ -Subproject commit 2b6dc6badfe5473ae8c1ba90fc621b4248504dac +Subproject commit 08c59ad918e5ba0f0e06185b0affe4c3d6f12a15 diff --git a/shared-pipelines b/shared-pipelines index 28dfcd5..522f36e 160000 --- a/shared-pipelines +++ b/shared-pipelines @@ -1 +1 @@ -Subproject commit 28dfcd547ec79d34d397667e5cbea3f9cb1f325f +Subproject commit 522f36ede83addd8cfcc3e3bbde4389d31c3a87b From 539636a440e6ff81ed422fd3dd068e3dd08c0654 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Mon, 26 Feb 2024 12:19:23 -0500 Subject: [PATCH 4/8] software cpf fields is a list now to match schema --- portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml | 3 ++- portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml | 3 ++- portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml | 3 ++- portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml | 3 ++- .../metaworkflows/hifi_long_reads_alignment_GRCh38.yaml | 3 ++- .../metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml | 3 ++- .../metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml | 3 ++- .../metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml | 3 ++- .../paired-end_short_reads_alignment_distributed_GRCh38.yaml | 3 ++- 9 files changed, 18 insertions(+), 9 deletions(-) diff --git a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml index 9301771..5e3f4a2 100644 --- a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml @@ -158,7 +158,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_BWA-MEM_202308.01 + software: + - smaht:Software-Sentieon_BWA-MEM_202308.01 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml index 7b3842d..c95f954 100644 --- a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml @@ -605,7 +605,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_BWA-MEM_202308.01 + software: + - smaht:Software-Sentieon_BWA-MEM_202308.01 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml index e530354..d43db5c 100644 --- a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml @@ -263,7 +263,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_minimap2_202308.01 + software: + - smaht:Software-Sentieon_minimap2_202308.01 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml index 9a0c4e8..c3d0bb0 100644 --- a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml @@ -203,7 +203,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-pbmm2_1.13.0 + software: + - smaht:Software-pbmm2_1.13.0 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml b/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml index 73e4061..e7048e2 100644 --- a/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/hifi_long_reads_alignment_GRCh38.yaml @@ -143,7 +143,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-pbmm2_1.13.0 + software: + - smaht:Software-pbmm2_1.13.0 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml b/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml index d4fd630..f643858 100644 --- a/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml +++ b/portal_objects/metaworkflows/hifi_long_reads_alignment_merge_GRCh38.yaml @@ -195,7 +195,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-pbmm2_1.13.0 + software: + - smaht:Software-pbmm2_1.13.0 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml b/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml index 965c9eb..5cd4009 100644 --- a/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml +++ b/portal_objects/metaworkflows/ont_long_reads_alignment_merge_GRCh38.yaml @@ -254,7 +254,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_minimap2_202308.01 + software: + - smaht:Software-Sentieon_minimap2_202308.01 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml index f29571f..9b01805 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_alignment_GRCh38.yaml @@ -329,7 +329,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_BWA-MEM_202308.01 + software: + - smaht:Software-Sentieon_BWA-MEM_202308.01 alignment_details: - Sorted reference_genome: GRCh38 diff --git a/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml b/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml index 3e2460a..c21c0d7 100644 --- a/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml +++ b/portal_objects/metaworkflows/paired-end_short_reads_alignment_distributed_GRCh38.yaml @@ -488,7 +488,8 @@ workflows: - Aligned Reads output_status: Final Output # These fields are required to link metadata for the naming - software: smaht:Software-Sentieon_BWA-MEM_202308.01 + software: + - smaht:Software-Sentieon_BWA-MEM_202308.01 alignment_details: - Sorted reference_genome: GRCh38 From 1ba1516357f99598dc53710195af5dabb3c18a65 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Tue, 27 Feb 2024 14:11:05 -0500 Subject: [PATCH 5/8] . --- portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml | 2 ++ portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml | 2 ++ portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml | 1 + portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml | 1 + smaht-pipeline-utils | 2 +- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml index 5e3f4a2..f1eaa1a 100644 --- a/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Hi-C_alignment_GRCh38.yaml @@ -5,10 +5,12 @@ name: Hi-C_alignment_GRCh38 description: Pipeline to align paired-end Illumina Hi-C data. | Run Sentieon BWA-MEM for alignment, and incorporate read groups. | Implemented to run per single sample and library. | + Calculate quality metrics. | Build hg38/GRCh38 category: - Alignment + - Quality Control ## General arguments ######################################## # Pipeline input, reference files, and general arguments diff --git a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml index c95f954..d5a29e9 100644 --- a/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/Illumina_alignment_GRCh38.yaml @@ -14,7 +14,9 @@ description: End-to-end alignment pipeline for paired-end Illumina data. | Build hg38/GRCh38 category: + - Read Manipulation - Alignment + - Quality Control ## General arguments ######################################## # Pipeline input, reference files, and general arguments diff --git a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml index d43db5c..4c85dfd 100644 --- a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml @@ -13,6 +13,7 @@ description: End-to-end alignment pipeline for Oxford Nanopore Technology. | category: - Alignment + - Quality Control ## General arguments ######################################## # Pipeline input, reference files, and general arguments diff --git a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml index c3d0bb0..da20cb7 100644 --- a/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/PacBio_alignment_GRCh38.yaml @@ -12,6 +12,7 @@ description: End-to-end alignment pipeline for PacBio data. | category: - Alignment + - Quality Control ## General arguments ######################################## # Pipeline input, reference files, and general arguments diff --git a/smaht-pipeline-utils b/smaht-pipeline-utils index 8038605..af5cb1e 160000 --- a/smaht-pipeline-utils +++ b/smaht-pipeline-utils @@ -1 +1 @@ -Subproject commit 8038605e74bd8ff8a26c7929e50bfc291abe818d +Subproject commit af5cb1e600590f0bb056e0206ad89f084d8e6b74 From 596cd8ad138f5a0b1dd185df35e3e84cbb84973e Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Tue, 27 Feb 2024 15:48:06 -0500 Subject: [PATCH 6/8] . --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 2e688fe..efb67fb 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,7 @@ deploy-base: --post-software \ --post-file-format \ --post-file-reference \ + --post-reference-genome \ --post-workflow \ --post-metaworkflow \ --post-wfl \ @@ -48,6 +49,7 @@ deploy-all: --post-software \ --post-file-format \ --post-file-reference \ + --post-reference-genome \ --post-workflow \ --post-metaworkflow \ --post-wfl \ From 8b53ed985810e85c5e114b86f36c7782b351d7cf Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Wed, 28 Feb 2024 11:05:07 -0500 Subject: [PATCH 7/8] . --- shared-pipelines | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared-pipelines b/shared-pipelines index 522f36e..062a6e1 160000 --- a/shared-pipelines +++ b/shared-pipelines @@ -1 +1 @@ -Subproject commit 522f36ede83addd8cfcc3e3bbde4389d31c3a87b +Subproject commit 062a6e128571b68f7650d473d60304974aab00b0 From 2d2260e7411bf7ffaca62687efbf77e0297c1d04 Mon Sep 17 00:00:00 2001 From: Michele Berselli Date: Tue, 5 Mar 2024 10:58:50 -0500 Subject: [PATCH 8/8] . --- portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml | 8 ++++---- shared-pipelines | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml index 4c85dfd..7ed556c 100644 --- a/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml +++ b/portal_objects/metaworkflows/ONT_alignment_GRCh38.yaml @@ -325,10 +325,10 @@ workflows: #################################### config: instance_type: - - m5a.large - - m5.large - - m6i.large - - m6a.large + - m5a.xlarge + - m5.xlarge + - m6i.xlarge + - m6a.xlarge ebs_size: "1.1x" ebs_optimized: True spot_instance: True diff --git a/shared-pipelines b/shared-pipelines index 062a6e1..44888f8 160000 --- a/shared-pipelines +++ b/shared-pipelines @@ -1 +1 @@ -Subproject commit 062a6e128571b68f7650d473d60304974aab00b0 +Subproject commit 44888f87aafcca49f48a15ee2fe070712bf90218