diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml deleted file mode 100644 index a639c3fa..00000000 --- a/.github/workflows/claude-code-review.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Claude Code Review - -on: - pull_request: - types: [opened, synchronize, ready_for_review, reopened] - # Optional: Only run on specific file changes - # paths: - # - "src/**/*.ts" - # - "src/**/*.tsx" - # - "src/**/*.js" - # - "src/**/*.jsx" - -jobs: - claude-review: - # Optional: Filter by PR author - # if: | - # github.event.pull_request.user.login == 'external-contributor' || - # github.event.pull_request.user.login == 'new-developer' || - # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' - - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude Code Review - id: claude-review - uses: anthropics/claude-code-action@v1 - with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - plugin_marketplaces: "https://github.com/anthropics/claude-code.git" - plugins: "code-review@claude-code-plugins" - prompt: "/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}" - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml deleted file mode 100644 index 9471a059..00000000 --- a/.github/workflows/claude.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Claude Code - -on: - issue_comment: - types: [created] - pull_request_review_comment: - types: [created] - issues: - types: [opened, assigned] - pull_request_review: - types: [submitted] - -jobs: - claude: - if: | - (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || - (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - actions: read # Required for Claude to read CI results on PRs - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude Code - id: claude - uses: anthropics/claude-code-action@v1 - with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - - # This is an optional setting that allows Claude to read CI results on PRs - additional_permissions: | - actions: read - - # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. - # prompt: 'Update the pull request description to include a summary of changes.' - - # Optional: Add claude_args to customize behavior and configuration - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options - # claude_args: '--allowed-tools Bash(gh pr:*)' diff --git a/.gitignore b/.gitignore index 9e307203..c5c144e3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ testing* null/ .nf-test .nf-test.log +out/ diff --git a/.nf-core.yml b/.nf-core.yml index 662ad88c..07124956 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -27,7 +27,7 @@ lint: - validation.summary.beforeText - validation.summary.afterText schema_params: false -nf_core_version: 3.5.1 +nf_core_version: 3.5.2 repository_type: pipeline template: author: Jonas Demeulemeester @@ -39,4 +39,4 @@ template: outdir: . skip_features: - fastqc - version: 1.0.0 + version: 1.1.0dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f89d7ad..1de261e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0dev - [date] +## v1.1.0dev - [2026-04-10] + +### `Added` + +- [#152](https://github.com/IntGenomicsLab/lrsomatic/pull/152) - Integrated modkit module for long-read base modification detection and analysis. +- [#149](https://github.com/IntGenomicsLab/lrsomatic/pull/149) - Added DeepVariant and DeepSomatic modules for germline and somatic variant calling from long-read sequencing data. +- [#147](https://github.com/IntGenomicsLab/lrsomatic/pull/147) - Implemented whatshap_stats module to generate phase block statistics and phasing quality metrics. +- [#141](https://github.com/IntGenomicsLab/lrsomatic/pull/141) - Added output of phased variants in separate VCF files for improved downstream analysis. +- [#143](https://github.com/IntGenomicsLab/lrsomatic/pull/143) - Added Severus min_support parameter. +- [#145](https://github.com/IntGenomicsLab/lrsomatic/pull/145) - Integrated MultiQC and nanoplot for comprehensive QC reporting with long-read sequencing metrics. +- [#149](https://github.com/IntGenomicsLab/lrsomatic/pull/149) - Added GPU support for Clair3, DeepVariant, and fibertools +- [#150](https://github.com/IntGenomicsLab/lrsomatic/pull/150) - Added Claude GitHub Actions workflows for automated code review and PR assistance. + +### `Changed` + +- [#137](https://github.com/IntGenomicsLab/lrsomatic/pull/137) - Bulk module versions update. Fixed some issues with Wakhan. +- [#138](https://github.com/IntGenomicsLab/lrsomatic/pull/138) - Perform QC before merging replicates +- [#152](https://github.com/IntGenomicsLab/lrsomatic/pull/152) - Updated container versions and dependencies for modkit and related tools. +- [#149](https://github.com/IntGenomicsLab/lrsomatic/pull/149) - Refactored variant calling workflow to support both DeepVariant and existing callers with improved configuration handling. +- [#140](https://github.com/IntGenomicsLab/lrsomatic/pull/140) - Improved documentation with additional pipeline usage examples and configuration guidance. + +### `Fixed` + +- [#137](https://github.com/IntGenomicsLab/lrsomatic/pull/137) - Resolved Nextflow strict syntax compliance issues for compatibility with latest Nextflow versions. +- [#118](https://github.com/IntGenomicsLab/lrsomatic/pull/118) - Updated nf-core template components to align with latest pipeline standards. +- [#149](https://github.com/IntGenomicsLab/lrsomatic/pull/149) - Corrected bcftools and vcfsplit operations for accurate variant filtering and merging. + +## v1.0.0 - [28 Nov 2025] Initial release of IntGenomicsLab/lrsomatic, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index 19daeb37..e13600d0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -8,6 +8,10 @@ > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. +## [LRSomatic](https://doi.org/10.64898/2026.02.26.707772) + +> LRSomatic: a highly scalable and robust pipeline for somatic variant calling in long-read sequencing data Robert A. Forsyth*, Luuk Harbers*, Amber Verhasselt, Ana-Lucía Rocha Iraizós, Sidi Yang, Joris Vande Velde, Christopher Davies, Nischalan Pillay, Laurens Lambrechts, Jonas Demeulemeester bioRxiv 2026.02.26.707772; doi: https://doi.org/10.64898/2026.02.26.707772 + ## Pipeline tools - [ASCAT](https://pubmed.ncbi.nlm.nih.gov/20837533/) @@ -95,3 +99,9 @@ - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + +## Resources + +- [ASAP Panel of Normals](https://www.biorxiv.org/content/10.64898/2026.03.15.711881v1) + + > Olga M. Sigalova, Alexandra Pančíková, Julie De Man, Koen Theunis, Gert J. Hulselmans, Vasileios Konstantakos, Bram Stuyven, Anton De Brabandere, Jarne Geurts, Antonina Mikorska, Shinjini Mukherjee, Sara Abouelasrar Salama, Katy Vandereyken, Kristofer Davie, Lukas Mahieu, Charles H. Adler, Thomas G. Beach, Geidy E. Serrano, Thierry Voet, Jonas Demeulemeester, Stein Aerts. Modeling cis-regulatory variation in human brain enhancers across a large Parkinson’s Disease cohort bioRxiv 2026.03.15.711881; doi: https://doi.org/10.64898/2026.03.15.711881 diff --git a/README.md b/README.md index d4ecfde9..dd8d110e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ # IntGenomicsLab/lrsomatic +[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic) [![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829) +[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -21,6 +23,8 @@ It can be run in both **matched tumour-normal** and **tumour-only mode**, offeri Developed using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems. +For more information on how to run the pipeline, you can also go [here](https://intgenomicslab.github.io/lrsomatic). + ## Pipeline summary ![image](./assets/lrsomatic_1.0.png) @@ -161,7 +165,13 @@ If you would like to contribute to this pipeline, please see the [contributing g ## Citations -If you use IntGenomicsLab/lrsomatic for your analysis, please cite it using the following doi: [10.5281/zenodo.17751829](https://doi.org/10.5281/zenodo.17751829) +If you use `IntGenomicsLab/lrsomatic` for your analysis, please cite it using the following: + +> LRSomatic: a highly scalable and robust pipeline for somatic variant calling in long-read sequencing data +> +> Robert A. Forsyth*, Luuk Harbers*, Amber Verhasselt, Ana-Lucía Rocha Iraizós, Sidi Yang, Joris Vande Velde, Christopher Davies, Nischalan Pillay, Laurens Lambrechts, Jonas Demeulemeester +> +> bioRxiv 2026.02.26.707772; doi: https://doi.org/10.64898/2026.02.26.707772 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/lrsomatic_1.0.png b/assets/lrsomatic_1.0.png index dd803e0c..fe203b75 100644 Binary files a/assets/lrsomatic_1.0.png and b/assets/lrsomatic_1.0.png differ diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4705d98d..95c83b7c 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the IntGenomicsLab/lrsomatic analysis pipeline. + This report has been generated by the IntGenomicsLab/lrsomatic analysis pipeline. report_section_order: "IntGenomicsLab-lrsomatic-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index 89ccc0bb..d078dd00 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -50,6 +50,14 @@ }, "clairS_model": { "type": "string" + }, + "tumor_replicate": { + "type": "integer", + "default": 1 + }, + "normal_replicate": { + "type": "integer", + "default": 1 } }, "required": ["sample", "bam_tumor", "platform", "sex", "fiber"] diff --git a/conf/base.config b/conf/base.config index 84625158..df70b052 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,26 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_gpu_high { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 8 * task.attempt } + memory = { 48.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_gpu_very_high { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 96.GB * task.attempt } + time = { 10.h * task.attempt } + } + withLabel:process_gpu_very_high_memory { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 128.GB * task.attempt } + } withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } @@ -52,11 +72,14 @@ process { time = { 16.h * task.attempt } } withLabel:process_long { - time = { 20.h * task.attempt } + time = { 24.h * task.attempt } } withLabel:process_high_memory { memory = { 200.GB * task.attempt } } + withLabel:process_short { + time = { 1.h } + } withLabel:error_ignore { errorStrategy = 'ignore' } @@ -64,8 +87,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withLabel: process_gpu { - ext.use_gpu = { workflow.profile.contains('gpu') } - accelerator = { workflow.profile.contains('gpu') ? 1 : null } - } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 6e0a9349..d9c42c81 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -16,6 +16,8 @@ params { genome_name = "hg38" ascat_alleles = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/ascat/G1000_alleles_hg38.zip" ascat_loci = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/ascat/G1000_loci_hg38.zip" + ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip" + ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip" centromere_bed = "https://raw.githubusercontent.com/KolmogorovLab/Wakhan/refs/heads/main/src/annotations/grch38.cen_coord.curated.bed" pon_file = "https://raw.githubusercontent.com/KolmogorovLab/Severus/refs/heads/main/pon/PoN_1000G_hg38.tsv.gz" bed_file = "https://raw.githubusercontent.com/KolmogorovLab/Severus/refs/heads/main/vntrs/human_GRCh38_no_alt_analysis_set.trf.bed" @@ -31,6 +33,7 @@ params { genome_name = "CHM13" ascat_alleles = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/ascat/G1000_alleles_CHM13.zip" ascat_loci = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/ascat/G1000_loci_CHM13.zip" + ascat_loci_gc = "https://github.com/IntGenomicsLab/test-datasets/raw/refs/heads/main/references/ascat/GCcontent_SNPloci_CHM13.txt.zip" centromere_bed = "https://raw.githubusercontent.com/KolmogorovLab/Wakhan/refs/heads/main/src/annotations/chm13v2_cen_coord.bed" pon_file = "https://raw.githubusercontent.com/KolmogorovLab/Severus/refs/heads/main/pon/PoN_1000G_chm13.tsv.gz" bed_file = "https://raw.githubusercontent.com/KolmogorovLab/Severus/refs/heads/main/vntrs/chm13.bed" @@ -40,6 +43,7 @@ params { dbsnp = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_dbsnp.vcf.gz" onekgenomes = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_1kgenomes.vcf.gz" colors = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_colors.vcf.gz" + asap = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/WGS_CHM13_ASAP.vcf.gz" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/modules.config b/conf/modules.config index d82196bb..25de8b9b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,20 +41,39 @@ process { ] } - withName: '.*:CRAMINO_PRE' { + ext.prefix = { "${meta.id}_${meta.type}" } ext.args = '--ubam' publishDir = [ - path: { "${params.outdir}/${meta.id}/qc/${meta.type}/cramino_ubam" }, + path: { "${params.outdir}/${meta.id}/qc/${meta.type}/cramino_ubam_rep${meta.replicate}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.arrow') ? null : filename } ] } withName: '.*:CRAMINO_POST' { + ext.prefix = { "${meta.id}_${meta.type}" } publishDir = [ path: { "${params.outdir}/${meta.id}/qc/${meta.type}/cramino_aln" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.arrow') ? null : filename } + ] + } + + withName: '.*:NANOPLOT_PRE' { + ext.prefix = { "${meta.id}_${meta.type}_ubam" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/qc/${meta.type}/nanoplot_ubam_rep${meta.replicate}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:NANOPLOT_POST' { + ext.prefix = { "${meta.id}_${meta.type}_aln" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/qc/${meta.type}/nanoplot_aln" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -77,6 +96,16 @@ process { ] } + withName: '.*:WHATSHAP_STATS' { + ext.prefix = { "${meta.id}_whatshap_stats" } + ext.args = { "--sample ${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/qc/whatshap_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // // Preprocessing and other processes // @@ -87,6 +116,132 @@ process { ] } + withName: '.*:BCFTOOLS_NORM' { + ext.prefix = { "${meta.id}.${meta.caller}_norm" } + ext.args = { + "-Oz" + } + publishDir = [ + enabled: false + ] + } + + withName: '.*:BCFTOOLS_ISEC' { + ext.prefix = { "${meta.id}_isec" } + ext.args ={ + "-Oz" + } + publishDir = [ + enabled: false + ] + } + withName: '.*STANDARDIZE_AF' { + ext.prefix = { "${meta.id}.${meta.caller}_standardized" } + ext.args = { + meta.rename_to == 'VAF' + ? "--rename-annots <(printf 'FORMAT/AF\\tFORMAT/VAF\\n') -Oz -W=tbi" + : "--rename-annots <(printf 'FORMAT/VAF\\tFORMAT/AF\\n') -Oz -W=tbi" + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_ANNOTATE' { + ext.prefix = { "${meta.id}.${meta.caller}" } + ext.args = { + '''-h <(echo '##INFO=') \ + -c CHROM,POS,REF,ALT,INFO/CALLER \ + -Oz \ + -W=tbi''' + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_QUERY' { + ext.args = { + "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.caller}\n'" + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + meta.platform == 'pb' + ? '--channel_list "BASE_CHANNELS,haplotype,base_6ma" --alt_aligned_pileup "diff_channels" --pileup_image_width "147"' + : '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepvariant" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) return null + if ((filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) && !params.generate_gvcf) return null + return filename + } + ] + + } + + withName: '.*DEEPVARIANT_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' ? ("--checkpoint '/opt/models/pacbio' ") : ("--checkpoint '/opt/models/ont_r104'") + } + publishDir = [ + enabled : false + ] + } + + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + meta.paired_data + ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + '--process_somatic=true' + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepsomatic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) return null + if ((filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) && !params.generate_gvcf) return null + return filename + } + ] + } + + withName: '.*DEEPSOMATIC_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' + ? (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/pacbio'" + : "--checkpoint '/opt/models/deepsomatic/pacbio_tumor_only'" ) + : (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/ont'" + : "--checkpoint '/opt/models/deepsomatic/ont_tumor_only'") + } + publishDir = [ + enabled : false + ] + } + + withName: '.*:UNZIP_.*' { publishDir = [ enabled: false @@ -132,7 +287,7 @@ process { // withName: '.*:MINIMAP2_ALIGN' { - ext.prefix = { "${meta.id}_mapped" } + ext.prefix = { "${meta.id}_${meta.type}_mapped" } ext.args = { [ meta.platform == 'pb' ? ( params.minimap2_pb_model ? "-ax $params.minimap2_pb_model" : "-ax map-hifi" ) : @@ -189,7 +344,8 @@ process { // Phasing processes // - withName: '.*:LONGPHASE_PHASE' { + withName: '.*:LONGPHASE_PHASE_GERMLINE' { + ext.prefix = { "germline_smallvariants" } ext.args = { [ meta.platform == 'pb' ? '--pb' : '--ont', @@ -197,11 +353,26 @@ process { ].join(' ').trim() } publishDir = [ - enabled: false + path: { "${params.outdir}/${meta.id}/variants/phased" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: '.*:LONGPHASE_HAPLOTAG' { + withName: '.*:LONGPHASE_PHASE_SOMATIC' { + ext.prefix = { "somatic_smallvariants" } + ext.args = { + [ + meta.platform == 'pb' ? '--pb' : '--ont', + "--indels", + ].join(' ').trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/phased" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:LONGPHASE_HAPLOTAG*' { ext.prefix = { "${meta.id}_${meta.type}" } publishDir = [ path: { "${params.outdir}/${meta.id}/bamfiles" }, @@ -209,13 +380,19 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: '.*:LONGPHASE_MODCALL.*' { + publishDir = [ + enabled: false + ] + } // // Structural variant calling processes // withName: '.*:SEVERUS' { - ext.args = '--min-support 3 --output-read-ids ' + ext.prefix = "." + ext.args = { "--min-support ${params.severus_minsupport} --output-read-ids " } publishDir = [ path: { "${params.outdir}/${meta.id}/variants/severus" }, mode: params.publish_dir_mode, @@ -227,18 +404,55 @@ process { // Small variant calling processes // withName: '.*:BCFTOOLS_CONCAT' { - ext.args = '--output-type z -a' + ext.args = {'-Oz -a -W=tbi'} publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_SORT' { - ext.arge = '--output-type z' + ext.prefix = { "${meta.id}_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:GERMLINE_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_germline_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:SOMATIC_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:GERMLINE_CONSENSUS:SORT_POST_NORM' { + ext.prefix = { "${meta.id}.${meta.caller}_norm_sorted" } + ext.args = { '-Oz -W=tbi' } + publishDir = [ + enabled: false + ] + } + withName: '.*:SOMATIC_CONSENSUS:SORT_POST_NORM' { + ext.prefix = { "${meta.id}.${meta.caller}_norm_sorted" } + ext.args = { '-Oz -W=tbi' } + publishDir = [ + enabled: false + ] + } + withName: '.*:PAIRED_SMALLVAR_SOMATIC:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} publishDir = [ enabled: false ] } withName: '.*:CLAIRSTO' { + ext.args = { "--sample_name ${meta.id}" } publishDir = [ path: { "${params.outdir}/${meta.id}/variants/clairsto" }, mode: params.publish_dir_mode, @@ -256,6 +470,7 @@ process { } withName: '.*:CLAIR3' { + ext.args = { "--sample_name=${meta.id}" } publishDir = [ path: { "${params.outdir}/${meta.id}/variants/clair3" }, mode: params.publish_dir_mode, @@ -278,7 +493,8 @@ process { "min_map_qual": params.ascat_min_map_qual, "longread_bins": params.ascat_longread_bins, "allele_counter_flags": params.ascat_allelecounter_flags, - "penalty": params.ascat_penalty + "penalty": params.ascat_penalty, + "pdf_plots": params.ascat_pdf_plots ] } publishDir = [ path: { "${params.outdir}/${meta.id}/ascat" }, @@ -292,8 +508,7 @@ process { [ params.wakhan_chroms ? "--contigs ${params.wakhan_chroms}" : (meta.sex == "female" ? "--contigs chr1-22,chrX" : "--contigs chr1-22,chrX,chrY"), - "--pdf-enable", - "--centromere ${params.centromere_bed}" + "--pdf-enable" ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/${meta.id}/wakhan" }, diff --git a/conf/test.config b/conf/test.config index 416ed610..8494fbf9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,6 +18,29 @@ process { time: '1.h' ] } + + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } + } params { @@ -29,9 +52,10 @@ params { fasta = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/GRCh38_chr19.fasta.gz" // Additional params - genome = "GRCh38" + genome = "CHM13" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" skip_wakhan = true skip_ascat = true + skip_modkit = true } diff --git a/docs/output.md b/docs/output.md index 29d1c7a3..61d82b28 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,53 +8,82 @@ The directories listed below will be created in the results directory after the ### Output Example +The pipeline produces per-sample output directories. Two modes exist depending on whether a matched normal sample is provided. + +**Tumor-only sample** (no matched normal, `-TO` variant callers): + ``` -├── Sample 1 +├── Sample ID │ ├── ascat │ ├── bamfiles +│ ├── methylation +│ │ └── tumor +│ │ └── modkit_pileup │ ├── qc │ │ ├── tumor │ │ │ ├── cramino_aln -│ │ │ ├── cramino_ubam +│ │ │ ├── cramino_ubam_rep1 │ │ │ ├── fibertoolsrs │ │ │ ├── mosdepth -│ │ │ ├── samtools +│ │ │ ├── nanoplot_aln +│ │ │ ├── nanoplot_ubam_rep1 +│ │ │ └── samtools +│ │ └── whatshap_stats │ ├── variants -│ │ ├──clairS-TO -│ │ ├──severus +│ │ ├── clairsto +│ │ ├── deepsomatic +│ │ ├── deepvariant +│ │ ├── phased +│ │ └── severus │ ├── vep -│ │ ├── germline │ │ ├── somatic -│ │ ├── SVs -│ -├── Sample 2 +│ │ └── SVs +│ └── wakhan +``` + +**Paired tumor + normal sample**: + +``` +├── Sample ID │ ├── ascat │ ├── bamfiles +│ ├── methylation +│ │ ├── tumor +│ │ │ └── modkit_pileup +│ │ └── normal +│ │ └── modkit_pileup │ ├── qc │ │ ├── tumor │ │ │ ├── cramino_aln -│ │ │ ├── cramino_ubam +│ │ │ ├── cramino_ubam_rep1 │ │ │ ├── fibertoolsrs │ │ │ ├── mosdepth -│ │ │ ├── samtools +│ │ │ ├── nanoplot_aln +│ │ │ ├── nanoplot_ubam_rep1 +│ │ │ └── samtools │ │ ├── normal │ │ │ ├── cramino_aln -│ │ │ ├── cramino_ubam +│ │ │ ├── cramino_ubam_rep1 │ │ │ ├── fibertoolsrs │ │ │ ├── mosdepth -│ │ │ ├── samtools +│ │ │ ├── nanoplot_aln +│ │ │ ├── nanoplot_ubam_rep1 +│ │ │ └── samtools +│ │ └── whatshap_stats │ ├── variants │ │ ├── clair3 -│ │ ├── clairS -│ │ ├── severus +│ │ ├── clairs +│ │ ├── deepsomatic +│ │ ├── deepvariant +│ │ ├── phased +│ │ └── severus │ ├── vep │ │ ├── germline │ │ ├── somatic -│ │ ├── SVs -│ ├── wakhan +│ │ └── SVs +│ └── wakhan ├── pipeline_info -├── multiqc - +└── multiqc ``` ### `ascat` @@ -66,11 +95,14 @@ The directories listed below will be created in the results directory after the ├── ascat │ ├── sample.before_correction.sample.tumour.germline.png │ ├── sample.before_correction.sample.tumour.tumour.png +│ ├── sample.after_correction.sample.tumour.germline.png +│ ├── sample.after_correction.sample.tumour.tumour.png │ ├── sample.cnvs.txt │ ├── sample.metrics.txt │ ├── sample.normal_alleleFrequencies_chr(1-22,X).txt │ ├── sample.purityploidy.txt │ ├── sample.segments.txt +│ ├── sample.segments_raw.txt │ ├── sample.tumour_alleleFrequencies_chr(1-22,X).txt │ ├── sample.tumour_normalBAF_rawBAF.txt │ ├── sample.tumour_normalBAF.txt @@ -90,6 +122,7 @@ The directories listed below will be created in the results directory after the | `sample.normal_alleleFrequencies_chr(1-22,X).txt` | a tsv file describing the snp counts for the normal sample at each position and their respective depths | | `sample.purityploidy.txt` | a tsv file describing the purity and ploidy values of the sample | | `sample.segments.txt` | a tsv file describing each chromosome segment and it's major and minor copy number | +| `sample.segments_raw.txt` | a tsv file describing each chromosome segment and it's major and minor rounded and raw copy number | | `sample.tumour_alleleFrequencies_chr(1-22,X).txt` | a tsv file describing the snp counts for the tumor sample at each position and their respective depths | | `sample.tumour_normalBAF_rawBAF.txt` | a tsv file with the raw BAF values in the normal sample | | `sample.tumour_normalBAF.txt` | a tsv file with the BAF values in the normal sample | @@ -131,34 +164,74 @@ The directories listed below will be created in the results directory after the
Output files +QC outputs are placed under `tumor/` for all samples, and additionally under `normal/` for paired tumor + normal samples. `whatshap_stats/` appears at the top level of `qc/`. + ``` ├── qc │ ├── tumor │ │ ├── cramino_aln -│ │ │ ├── sample.cramino.txt -│ │ ├── cramino_ubam -│ │ │ ├── sample.cramino.txt +│ │ │ ├── sample_tumor_cramino.txt +│ │ ├── cramino_ubam_rep1 +│ │ │ ├── sample_tumor_cramino.txt │ │ ├── fibertoolsrs │ │ │ ├── sample_qc.txt │ │ ├── mosdepth │ │ │ ├── sample.mosdepth.global.dist.txt │ │ │ ├── sample.mosdepth.summary.txt +│ │ ├── nanoplot_aln +│ │ │ ├── sample_tumor_aln_NanoStats.txt +│ │ │ ├── sample_tumor_aln_NanoPlot-report.html +│ │ ├── nanoplot_ubam_rep1 +│ │ │ ├── sample_tumor_ubam_NanoStats.txt +│ │ │ ├── sample_tumor_ubam_NanoPlot-report.html │ │ ├── samtools │ │ │ ├── sample.flagstat │ │ │ ├── sample.idxstats │ │ │ ├── sample.stats +│ ├── normal # paired samples only +│ │ └── [same subdirectories as tumor] +│ ├── whatshap_stats +│ │ ├── sample.stats.tsv +│ │ ├── sample.blocklist.tsv +``` + +| File | Description | +| ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| `cramino_aln/sample_{type}_cramino.txt` | cramino QC summary statistics for the aligned bam file | +| `cramino_ubam_rep1/sample_{type}_cramino.txt` | cramino QC summary statistics for the unaligned bam files | +| `fibertoolsrs/sample_qc.txt` | fibertools QC summary for the bam file | +| `mosdepth/sample.mosdepth.global.dist.txt` | a cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value | +| `mosdepth/sample.mosdepth.summary.txt` | overall summary file from mosdepth tool | +| `nanoplot_aln/sample_{type}_aln_NanoStats.txt` | NanoPlot summary statistics for the aligned BAM file | +| `nanoplot_aln/sample_{type}_aln_NanoPlot-report.html` | NanoPlot interactive HTML report for the aligned BAM file | +| `nanoplot_ubam_rep1/sample_{type}_ubam_NanoStats.txt` | NanoPlot summary statistics for the unaligned BAM file | +| `nanoplot_ubam_rep1/sample_{type}_ubam_NanoPlot-report.html` | NanoPlot interactive HTML report for the unaligned BAM file | +| `samtools/sample.flagstat` | a summary of the counts of different samtools flags | +| `samtools/sample.idxstats` | a summary of the number of mapped and unmapped reads | +| `samtools/sample.stats` | summary statistics from the bamfile | +| `whatshap_stats/sample.stats.tsv` | WhatsHap phasing statistics per chromosome including phase block N50 and switch error rates | +| `whatshap_stats/sample.blocklist.tsv` | list of all phase blocks with their genomic coordinates | + +
+ +### `methylation` + +
+Output files + +``` +├── methylation +│ ├── tumor +│ │ └── modkit_pileup +│ │ └── sample.bed.gz +│ ├── normal # paired samples only +│ │ └── modkit_pileup +│ │ └── sample.bed.gz ``` -| File | Description | -| ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | -| `cramino_aln/sample.cramino.txt` | cramino QC summary statistics for the aligned bam file | -| `cramino_ubam/sample.cramino.txt` | cramino QC summary statistics for the unaligned bam files | -| `fibertoolsrs/sample_qc.txt` | fibertools QC summary for the bam file | -| `mosdepth/sample.mosdepth.global.dist.txt` | a cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value | -| `mosdepth/sample.mosdepth.summary.txt` | overall summary file from mosdepth tool | -| `samtools/sample.flagstat` | a summary of the counts of different samtools flags | -| `samtools/sample.idxstats` | a summary of the number of mapped and unmapped reads | -| `samtools/sample.stats` | summary statistics from the bamfile | +| File | Description | +| -------------------------------------------- | ----------------------------------------------------------------------------------- | +| `{tumor,normal}/modkit_pileup/sample.bed.gz` | Modkit pileup BED file containing per-CpG methylation frequency and coverage values |
@@ -182,8 +255,10 @@ The directories listed below will be created in the results directory after the #### `clairS` +Present in **paired** (tumor + normal) samples. + ``` -├── clairS +├── clairs │ ├── indel.vcf.gz │ ├── indel.vcf.gz.tbi │ ├── snv.vcf.gz @@ -199,8 +274,10 @@ The directories listed below will be created in the results directory after the #### `clairS-TO` +Present in **tumor-only** samples (no matched normal). + ``` -├── clairS +├── clairsto │ ├── germline.vcf.gz │ ├── germline.vcf.gz.tbi │ ├── indel.vcf.gz @@ -220,7 +297,7 @@ The directories listed below will be created in the results directory after the | `snv.vcf.gz` | Raw SNV calls in vcf format | | `snv.vcf.gz.tbi` | Index for SNV calls | | `somatic.vcf.gz` | SNV and indel calls marked as PASS and without a germline tag | -| `somatic.vcf.gz` | Index for osmatic small variatn calls | +| `somatic.vcf.gz.tbi` | Index for somatic small variant calls | #### `severus` @@ -262,6 +339,63 @@ The directories listed below will be created in the results directory after the | `read_qual.txt` | file containing quality statistics about identified segements | | `severus.log` | log file | +#### `deepvariant` + +DeepVariant germline small variant calls. Present in all samples. + +``` +├── deepvariant +│ ├── sample.vcf.gz +│ ├── sample.vcf.gz.tbi +│ ├── sample.g.vcf.gz # only when --generate_gvcf is true +│ ├── sample.g.vcf.gz.tbi # only when --generate_gvcf is true +``` + +| File | Description | +| --------------------- | ------------------------------------------------------------------------------- | +| `sample.vcf.gz` | DeepVariant germline SNV and indel calls in VCF format | +| `sample.vcf.gz.tbi` | Index for DeepVariant germline calls | +| `sample.g.vcf.gz` | DeepVariant gVCF file with calls at all positions (only with `--generate_gvcf`) | +| `sample.g.vcf.gz.tbi` | Index for DeepVariant gVCF (only with `--generate_gvcf`) | + +#### `deepsomatic` + +DeepSomatic somatic small variant calls. Present in all samples. + +``` +├── deepsomatic +│ ├── sample.vcf.gz +│ ├── sample.vcf.gz.tbi +│ ├── sample.g.vcf.gz # only when --generate_gvcf is true +│ ├── sample.g.vcf.gz.tbi # only when --generate_gvcf is true +``` + +| File | Description | +| --------------------- | ------------------------------------------------------------------------------- | +| `sample.vcf.gz` | DeepSomatic somatic SNV and indel calls in VCF format | +| `sample.vcf.gz.tbi` | Index for DeepSomatic somatic calls | +| `sample.g.vcf.gz` | DeepSomatic gVCF file with calls at all positions (only with `--generate_gvcf`) | +| `sample.g.vcf.gz.tbi` | Index for DeepSomatic gVCF (only with `--generate_gvcf`) | + +#### `phased` + +Phased variant calls produced by Longphase. Present in all samples. + +``` +├── phased +│ ├── germline_smallvariants.vcf.gz +│ ├── germline_smallvariants.vcf.gz.tbi +│ ├── somatic_smallvariants.vcf.gz +│ ├── somatic_smallvariants.vcf.gz.tbi +``` + +| File | Description | +| ----------------------------------- | ---------------------------------------------------------------- | +| `germline_smallvariants.vcf.gz` | Longphase-phased germline SNV/indel VCF with haplotype (PS) tags | +| `germline_smallvariants.vcf.gz.tbi` | Index for the phased germline VCF | +| `somatic_smallvariants.vcf.gz` | Longphase-phased somatic SNV/indel VCF with haplotype (PS) tags | +| `somatic_smallvariants.vcf.gz.tbi` | Index for the phased somatic VCF | + ### `vep` @@ -331,13 +465,15 @@ The directories listed below will be created in the results directory after the │ │ ├── chr{1-22,X,Y}_cov.html │ │ ├── chr{1-22,X,Y}.pdf │ │ ├── COVERAGE_INDEX.html -│ ├── phasing output +│ ├── phasing_output │ │ ├── chr{1-22,X,Y}_phase_correction_0.html │ │ ├── chr{1-22,X,Y}_phase_correction_1.html │ │ ├── chr{1-22,X,Y}_without_phase_correction.html │ │ ├── chr{1-22,X,Y}.pdf │ │ ├── sample.rephased.vcf.gz │ │ ├── sample.rephased.vcf.gz.tbi +│ ├── snps_loh_plots +│ │ ├── chr{1-22,X,Y}_snps_loh.html │ ├── sample_heatmap_ploidy_purity.html │ ├── sample_heatmap_ploidy_purity.html.pdf │ ├── sample_optimized_peak.html @@ -345,38 +481,38 @@ The directories listed below will be created in the results directory after the ``` -| File | Description | -| ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | -| `{ploidy}_{purity}_{confidence}/bed_output/genes_copynumber_states.bed` | bed file containing allele specific copy number values with coverage information | -| `{ploidy}_{purity}_{confidence}/bed_output/loh_regions.bed` | bed file containing positions of loss of heterozygosity regions | -| `{ploidy}_{purity}_{confidence}/bed_output/sample_{ploidy}_{purity}_{confidence}_HP_1.bed` | bed file containing copy number states, coverage, and SV breakpoints for haplotype 1 | -| `{ploidy}_{purity}_{confidence}/bed_output/sample_{ploidy}_{purity}_{confidence}_HP_2.bed` | bed file containing copy number states, coverage, and SV breakpoints for haplotype 2 | -| `{ploidy}_{purity}_{confidence}/variation_plots/chr{1-22,X,Y}_cn.html` | html based plotly plot of copy number and coverage for individual chromosomes | -| `{ploidy}_{purity}_{confidence}/variation_plots/chr{1-22,X,Y}_cn.pdf` | pdf based plotly plot of copy number and coverage for individual chromosomes | -| `{ploidy}_{purity}_{confidence}/variation_plots/CN_VARIATION_INDEX.html` | unclear html plot | -| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genes_genome.html` | html plots of copy number variations in highlighted genes | -| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genes_genome.pdf` | pdf plots of copy number variations in highlighted genes | -| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genome_copynumbers_details.html` | genome-wide html copy number plots with coverage information on same axis | -| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genome_copynumbers_details.pdf` | genome-wide pdf copy number plots with coverage information on same axis | -| `coverage_data/{0-23}_SNP.csv` | CSV of coverage data per chromosome | -| `coverage_data/coverage_ps.csv` | CSV of overall haplotype specific coverage data | -| `coverage_data/coverage.csv` | CSV of overall coverage data | -| `coverage_data/phase_corrected_coverage.csv` | CSV of overall phase-corrected coverage data | -| `coverage_data/pileup_SNPs.csv` | CSV of SNP pileup data | -| `coverage_plots/chr{1-22,X,Y}_cov.html` | chromosome specific html coverage plots | -| `coverage_plots/chr{1-22,X,Y}_cov.pdf` | chromosome specific pdf coverage plots | -| `coverage_plots/COVERAGE_INDEX.html` | unclear html plot | -| `phasing_output/chr{1-23,X,Y}_phase_correction_0.html` | Phase-switch error correction plot per chromosome | -| `phasing_output/chr{1-23,X,Y}_phase_correction_1.html` | Phase-switch error correction plot per chromosome | -| `phasing_output/chr{1-22,X,Y}_without_phase_correction.html` | Phase-switch error without phase correction plot per chromosome | -| `phasing_output/chr{1-22,X,Y}.pdf` | Phase-switch error correction plot | -| `phasing_output/PHASE_CORRECTION_INDEX` | unclear html plot | -| `phasing_output/sample_rephased.vcf.gz` | phase corrected SNP vcf file | -| `phasing_output/sample_rephased.vcf.gz.tbi` | phase corrected SNP vcf index file | -| `sample_heatmap_ploidy_purity.html` | heatmap html plot of purity ploidy fit | -| `sample_heatmap_ploidy_purity.html.pdf` | heatmap html plot of purity ploidy fit | -| `sample_optimized_peak.html` | optimization peak plot | -| `solutions_ranks.tsv` | rank of potential purity ploidy solutions | +| File | Description | +| ------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------- | +| `{ploidy}_{purity}_{confidence}/bed_output/genes_copynumber_states.bed` | bed file containing allele specific copy number values with coverage information | +| `{ploidy}_{purity}_{confidence}/bed_output/loh_regions.bed` | bed file containing positions of loss of heterozygosity regions | +| `{ploidy}_{purity}_{confidence}/bed_output/sample_{ploidy}_{purity}_{confidence}_HP_1.bed` | bed file containing copy number states, coverage, and SV breakpoints for haplotype 1 | +| `{ploidy}_{purity}_{confidence}/bed_output/sample_{ploidy}_{purity}_{confidence}_HP_2.bed` | bed file containing copy number states, coverage, and SV breakpoints for haplotype 2 | +| `{ploidy}_{purity}_{confidence}/variation_plots/chr{1-22,X,Y}_cn.html` | html based plotly plot of copy number and coverage for individual chromosomes | +| `{ploidy}_{purity}_{confidence}/variation_plots/chr{1-22,X,Y}_cn.pdf` | pdf based plotly plot of copy number and coverage for individual chromosomes | +| `{ploidy}_{purity}_{confidence}/variation_plots/CN_VARIATION_INDEX.html` | unclear html plot | +| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genes_genome.html` | html plots of copy number variations in highlighted genes | +| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genes_genome.pdf` | pdf plots of copy number variations in highlighted genes | +| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genome_copynumbers_details.html` | genome-wide html copy number plots with coverage information on same axis | +| `{ploidy}_{purity}_{confidence}/sample_{purity}_{ploidy}_{confidence}_genome_copynumbers_details.pdf` | genome-wide pdf copy number plots with coverage information on same axis | +| `coverage_data/{0-23}_SNP.csv` | CSV of coverage data per chromosome | +| `coverage_data/coverage_ps.csv` | CSV of overall haplotype specific coverage data | +| `coverage_data/coverage.csv` | CSV of overall coverage data | +| `coverage_data/phase_corrected_coverage.csv` | CSV of overall phase-corrected coverage data | +| `coverage_data/pileup_SNPs.csv` | CSV of SNP pileup data | +| `coverage_plots/chr{1-22,X,Y}_cov.html` | chromosome specific html coverage plots | +| `coverage_plots/chr{1-22,X,Y}_cov.pdf` | chromosome specific pdf coverage plots | +| `coverage_plots/COVERAGE_INDEX.html` | unclear html plot | +| `phasing_output/chr{1-23,X,Y}_phase_correction_0.html` | Phase-switch error correction plot per chromosome | +| `phasing_output/chr{1-23,X,Y}_phase_correction_1.html` | Phase-switch error correction plot per chromosome | +| `phasing_output/chr{1-22,X,Y}_without_phase_correction.html` | Phase-switch error without phase correction plot per chromosome | +| `phasing_output/chr{1-22,X,Y}.pdf` | Phase-switch error correction plot | +| `phasing_output/sample_rephased.vcf.gz` | phase corrected SNP vcf file | +| `phasing_output/sample_rephased.vcf.gz.tbi` | phase corrected SNP vcf index file | +| `snps_loh_plots/chr{1-22,X,Y}_snps_loh.html` | interactive HTML plots of SNP allele frequencies and loss of heterozygosity regions per chromosome | +| `sample_heatmap_ploidy_purity.html` | heatmap html plot of purity ploidy fit | +| `sample_heatmap_ploidy_purity.html.pdf` | heatmap pdf plot of purity ploidy fit | +| `sample_optimized_peak.html` | optimization peak plot | +| `solutions_ranks.tsv` | rank of potential purity ploidy solutions | @@ -465,20 +601,23 @@ The directories listed below will be created in the results directory after the ``` ├── pipeline_info -│ ├── execution_report_{DATE}.html │ ├── execution_timeline_{DATE}.html │ ├── execution_trace_{DATE}.txt -│ ├── lrsomatic_softwar_mqc_versions.yml +│ ├── final_sample_disk_usage.tsv +│ ├── lrsomatic_software_mqc_versions.yml │ ├── params_{DATE}.json -│ ├── pipeline_dag_{DATE}/html +│ ├── pipeline_dag_{DATE}.html +│ ├── raw_task_disk_usage.tsv ``` -| File | Description | -| ------------------------------------ | ------------------------------------------------------------------------------------------- | -| `execution_report_{DATE}.hmtl` | summary of pipeline resource and timing usage in a html report | -| `execution_timeline_{DATE}.hmtl` | a graphical summary of the timing of each module's task over the course of the pipeline run | -| `lrsomatic_softwar_mqc_versions.yml` | summary of the versions of each tool used by the pipeline | -| `params_{DATE}.json` | summary of the paramaters used in the pipeline | -| `pipeline_dag_{DATE}.html` | flow chart summarizing the pipeline run | +| File | Description | +| ------------------------------------- | ------------------------------------------------------------------------------------------- | +| `execution_timeline_{DATE}.html` | a graphical summary of the timing of each module's task over the course of the pipeline run | +| `execution_trace_{DATE}.txt` | detailed per-task resource usage log (CPU, memory, wall time) | +| `final_sample_disk_usage.tsv` | summary of disk usage per sample at pipeline completion | +| `lrsomatic_software_mqc_versions.yml` | summary of the versions of each tool used by the pipeline | +| `params_{DATE}.json` | summary of the parameters used in the pipeline run | +| `pipeline_dag_{DATE}.html` | flow chart summarizing the pipeline structure | +| `raw_task_disk_usage.tsv` | per-task disk usage across all pipeline tasks | diff --git a/docs/usage.md b/docs/usage.md index 30fa00af..a6447322 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,6 +97,34 @@ genome: 'GRCh37' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +## CHM13 Support + +Our pipeline fully supports CHM13 and most reference and annotation files are automatically downloaded when specifying `--genome CHM13`. + +However, VEP will need a bit of additional setup. The VEP cache for CHM13 needs to be manually downloaded. This can be done using the following code. Feel free to change any of the paths, ensuring that the correct path is pointed to in the pipeline parameters. + +Download CHM13 Cache: + +```bash +cd $HOME/.vep +curl -O https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/variation/2022_10/indexed_vep_cache/Homo_sapiens-GCA_009914755.4-2022_10.tar.gz +tar xzf Homo_sapiens-GCA_009914755.4-2022_10.tar.gz +``` + +Then you can run the pipeline as follows: + +```bash +nextflow run IntGenomicsLab/lrsomatic \ + --input samplesheet.csv \ + --outdir ./results \ + --genome CHM13 \ + --vep_cache $HOME/.vep \ + --vep_cache_version 107 \ + -profile docker +``` + +If you want to run with a CHM13 reference without using `--genome CHM13` (for example, via a custom FASTA or configuration), you must also specify `--vep_genome T2T-CHM13v2.0` and `--vep_species homo_sapiens_gca009914755v4`. + ### Pipeline options | Parameter | Description | @@ -107,48 +135,120 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c #### Skipping options: -| Parameter | Description | -| ----------------- | ----------------------------------------------------------------------------------------------------------- | -| `--skip_qc` | A boolean to skip all QC steps, including `mosdepth`, `samtools`,`fibertools`, `cramino`. Default = `false` | -| `--skip_fiber` | A boolean to skip all `fibertools` related modules. Default = `false` | -| `--skip_cramino` | A boolean to skip `cramino`. Default = `false` | -| `--skip_mosdepth` | A boolean to skip `mosdepth`. Default = `false` | -| `--skip_ascat` | A boolean to skip `ascat`. Default = `false` | -| `--skip_bamstats` | A boolean to skip `bamstats`. Default = `false` | -| `--skip_wakhan` | A boolean to skip `wakhan`. Default = `false` | -| `--skip_vep` | A boolean to skip `vep`. Default = `false` | +| Parameter | Description | +| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `--skip_qc` | A boolean to skip all QC steps, including `mosdepth`, `samtools`,`fibertools`, `cramino`. Default = `false` | +| `--skip_fiber` | A boolean to skip all `fibertools` related modules. Default = `false` | +| `--skip_cramino` | A boolean to skip `cramino`. Default = `false` | +| `--skip_mosdepth` | A boolean to skip `mosdepth`. Default = `false` | +| `--skip_ascat` | A boolean to skip `ascat`. Default = `false` | +| `--skip_bamstats` | A boolean to skip `bamstats`. Default = `false` | +| `--skip_wakhan` | A boolean to skip `wakhan`. Default = `false` | +| `--skip_vep` | A boolean to skip `vep`. Default = `false` | +| `--skip_m6a` | A boolean to skip `fibertools_m6a`, used if you have m6a calls but would still like nucleosome positions for PacBio data (ONT data is required to have m6a calls). Default = `false` | +| `--skip_nanoplot` | A boolean to skip NanoPlot QC on aligned and unaligned BAM files. Default = `false` | +| `--skip_normalfiber` | A boolean to skip fibertools processing for the normal sample. Default = `false` | +| `--skip_modcall` | A boolean to skip modkit methylation calling. Default = `false` | +| `--skip_modkit` | A boolean to skip the modkit pileup step. Default = `false` | +| `--skip_whatshapstats` | A boolean to skip WhatsHap phasing statistics. Default = `false` | #### VEP options: -| Parameter | Description | -| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -| `--vep_cache` | Full path to a vep cache. If left blank, this will default to pulling from this [Annotation Cache Storage](https://annotation-cache.github.io/). | -| `--vep_cache_version` | Integer specifying version of vep cache. Default = `113` | -| `--vep_args` | A string specifying arguments to vep. Default = `"--everything --filter_common --per_gene --total_length --offline --format vcf"` | -| `--vep_custom` | A full path to a vcf file containing custom variants for annotation. Must be bgzipped and have `.vcf.gz` format. Default = `null` | -| `--vep_custom_tbi` | A full path to a index file for cutom vcf for vep. Default = `null` | +| Parameter | Description | +| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `--vep_cache` | Full path to a vep cache. If left blank, this will default to pulling from this [Annotation Cache Storage](https://annotation-cache.github.io/). | +| `--vep_cache_version` | Integer specifying version of vep cache. Default = `113` | +| `--vep_args` | A string specifying arguments to vep. Default = `"--everything --filter_common --per_gene --total_length --offline --format vcf"` | +| `--vep_custom` | A full path to a vcf file containing custom variants for annotation. Must be bgzipped and have `.vcf.gz` format. Default = `null` | +| `--vep_custom_tbi` | A full path to a index file for cutom vcf for vep. Default = `null` | +| `--download_vep_cache` | A boolean to automatically download the VEP cache if not found locally. Default = `false` | #### Minimap2 Options -| Parameter | Description | -| ----------------------------- | ------------------------------------------------------------------------------------------- | -| `--minimap2_ont_model` | specifies which model to use minimap2 with for ONT samples. Default = `null` | -| `--minimap2_pb_model` | specifies which model to use minimap2 with for PacBio samples. Default = `null` | -| `--save_secondary_alignments` | A boolean to specify if secondary alignmetns are kept in aligned bam file. Defualt = `true` | +| Parameter | Description | +| ---------------------------- | ------------------------------------------------------------------------------------------- | +| `--minimap2_ont_model` | specifies which model to use minimap2 with for ONT samples. Default = `null` | +| `--minimap2_pb_model` | specifies which model to use minimap2 with for PacBio samples. Default = `null` | +| `--save_secondary_alignment` | A boolean to specify if secondary alignments are kept in aligned bam file. Default = `true` | #### ASCAT Options -| Parameter | Description | -| ----------------------------- | ------------------------------------------------------------------------------------------------- | -| `--ascat_ploidy` | integer to enforce a given ploidy value. Default = `null` | -| `--ascat_purity` | integer to enforce a given purity value. Default = `null` | -| `--ascat_min_base_qual` | integer to specify a minimum base quality for ascat's allele counter. Default = `20` | -| `--ascat_min_counts` | integer to specify a minimum number of counts for ascat's allele counter. Default = `10` | -| `--ascat_min_map_qual` | integer to specify a minimum mapping quality for ascat's allele counter. Default = `10` | -| `--ascat_penalty` | integer to specify a penalty value for ascat. Default = `150` | -| `--ascat_longread_bins` | integer to specify the binsize for ascat long reads. Default = `2000` | -| `--ascat_allelecounter_flags` | flags to pass to ascat's allele counter. Default = `"-f 0"` | -| `--ascat_chroms` | string to enforce a subset of chromosomes on the sample, ie `"(c(1:21,'X','Y')). Default = `null` | +| Parameter | Description | +| ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--ascat_ploidy` | integer to enforce a given ploidy value. Default = `null` | +| `--ascat_purity` | integer to enforce a given purity value. Default = `null` | +| `--ascat_min_base_qual` | integer to specify a minimum base quality for ascat's allele counter. Default = `20` | +| `--ascat_min_counts` | integer to specify a minimum number of counts for ascat's allele counter. Default = `10` | +| `--ascat_min_map_qual` | integer to specify a minimum mapping quality for ascat's allele counter. Default = `10` | +| `--ascat_penalty` | integer to specify a penalty value for ascat. Default = `150` | +| `--ascat_longread_bins` | integer to specify the binsize for ascat long reads. Default = `2000` | +| `--ascat_allelecounter_flags` | flags to pass to ascat's allele counter. Default = `"-f 0"` | +| `--ascat_chroms` | string to enforce a subset of chromosomes on the sample, ie `"(c(1:21,'X','Y')). Default = `null` | +| `--ascat_allele_files` | A full path to a zipped folder containing allele files for [ASCAT](https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS). Must be zipped and have `.zip` format. Default = `null` | +| `--ascat_loci_files` | A full path to a zipped folder containing loci files for [ASCAT](https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS). Must be zipped and have `.zip` format. Default = `null` | +| `--ascat_gc_file` | A full path to a GC correction file for [ASCAT](https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS). Optionally can be zipped and have either `.txt` or `.txt.zip` format. Default = `null` | +| `--ascat_rt_file` | A full path to a replication timing correction file for [ASCAT](https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS). Optionally can be zipped and have either `.txt` or `.txt.zip` format. Default = `null` | +| `--ascat_pdf_plots` | string to enable output pltos in pdf format. Default = `false` | + +#### Fibertools Options + +| Parameter | Description | +| ------------------- | -------------------------------------------------------------------------------- | +| `--autocorrelation` | A boolean to enable autocorrelation computation in fibertools. Default = `false` | + +#### SEVERUS Options + +| Parameter | Description | +| ---------------------- | ------------------------------------------------------------------------------------ | +| `--severus_minsupport` | Minimum number of supporting reads required for SEVERUS to call an SV. Default = `5` | + +#### WAKHAN Options + +| Parameter | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------- | +| `--wakhan_chroms` | A string specifying a subset of chromosomes for WAKHAN to process, e.g. `"chr1,chr2"`. Default = `null` | + +#### Variant Filtering and Combining Options + +These options control how variants from multiple callers are filtered and merged. + +| Parameter | Description | +| ------------------------------ | --------------------------------------------------------------------------------------------------- | +| `--germline_var_keep` | Expression or threshold for retaining germline variants after calling. Default = `null` | +| `--somatic_var_keep` | Expression or threshold for retaining somatic variants after calling. Default = `null` | +| `--germline_var_combine` | Strategy for combining germline variant caller outputs (e.g. union, intersection). Default = `null` | +| `--somatic_var_combine` | Strategy for combining somatic variant caller outputs (e.g. union, intersection). Default = `null` | +| `--prioritize_caller_germline` | Comma-separated caller priority order used when combining germline calls. Default = `null` | +| `--prioritize_caller_somatic` | Comma-separated caller priority order used when combining somatic calls. Default = `null` | + +#### PON Options + +| Parameter | Description | +| ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--clairsto_pon_vcfs` | Full path to one or more Panel of Normals VCF files for ClairS-TO small variant filtering. Default = `null` | +| `--clairsto_pon_flags` | Population allele matching flags for ClairS-TO PON VCFs (one per VCF, comma-separated). Default = `null` | +| `--deepsomatic_pon_vcfs` | Full path to one or more bgzipped, tabix-indexed PON VCF files (for example, `.vcf.gz`) passed to DeepSomatic `--population_vcfs`. If not set, uses container-bundled defaults in tumor-only mode or no PON in paired mode. Default = `null` | + +#### Advanced Options + +| Parameter | Description | +| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--use_gpu` | A boolean to enable GPU acceleration for DeepVariant and DeepSomatic. Requires a GPU-enabled compute environment. Default = `false` | +| `--generate_gvcf` | A boolean to enable gVCF output from DeepVariant (germline) and DeepSomatic (somatic). gVCF files include calls at all positions, not just variant sites. Default = `false` | + +#### Genome-Derived Parameters + +The following parameters are automatically populated from the `--genome` iGenomes configuration and do not normally need to be set manually. They can be overridden when using a custom genome or reference build not present in the iGenomes configuration. + +| Parameter | Description | +| ------------------ | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `--fasta` | Full path to the reference FASTA file. Auto-populated from `--genome`. Override for custom genomes. | +| `--bed_file` | BED file of callable/target regions passed to SEVERUS for SV calling. Auto-populated from `--genome`. | +| `--pon_file` | Panel of Normals VCF file used by SEVERUS for somatic SV filtering. Auto-populated from `--genome`. | +| `--centromere_bed` | BED file of centromere coordinates passed to WAKHAN. Auto-populated from `--genome`. | +| `--genome_name` | Assembly name string passed to ASCAT for genome-specific reference file selection. Auto-populated from `--genome`. | +| `--vep_genome` | VEP genome identifier (e.g. `GRCh38`, `T2T-CHM13v2.0`). Auto-populated from `--genome`. Override for CHM13 or custom assemblies. | +| `--vep_species` | VEP species identifier. Auto-populated from `--genome`. Override for non-standard assemblies (e.g. `homo_sapiens_gca009914755v4` for CHM13). | ### Updating the pipeline @@ -204,7 +304,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` diff --git a/modules.json b/modules.json index 7bc71b09..e7597448 100644 --- a/modules.json +++ b/modules.json @@ -7,132 +7,198 @@ "nf-core": { "ascat": { "branch": "master", - "git_sha": "98ffb090029d17a9fb8de75dadcfe6bc8b6377ec", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "installed_by": ["modules"], + "patch": "modules/nf-core/ascat/ascat.diff" + }, + "bcftools/annotate": { + "branch": "master", + "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", "installed_by": ["modules"] }, "bcftools/concat": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules", "vcf_gather_bcftools"] }, + "bcftools/isec": { + "branch": "master", + "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/isec/bcftools-isec.diff" + }, "bcftools/merge": { "branch": "master", - "git_sha": "f17049e03697726ace7499d2fe342f892594f6f3", + "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules"] }, + "bcftools/query": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/query/bcftools-query.diff" + }, "bcftools/sort": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": ["vcf_gather_bcftools"], + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules", "vcf_gather_bcftools"], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, + "deepvariant/callvariants": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff" + }, + "deepvariant/makeexamples": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff" + }, + "deepvariant/postprocessvariants": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff" + }, "ensemblvep/download": { "branch": "master", - "git_sha": "2fcc53751152a999bfc9c24f75f494b9e5bb338f", + "git_sha": "90cdd21fd96ccbdb3bc90797ca69570d18391055", "installed_by": ["modules"] }, "ensemblvep/vep": { "branch": "master", - "git_sha": "0567eee9276d4a358e5f9f01c810a149fbd241f8", + "git_sha": "890fdcff71928fc1470d3e669d4c430c8c770297", "installed_by": ["modules"], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, "longphase/haplotag": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", "installed_by": ["modules"] }, "longphase/phase": { "branch": "master", - "git_sha": "47983538e45e539f783ed8ab0d1c96d39df2af8f", + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", "installed_by": ["modules"], "patch": "modules/nf-core/longphase/phase/longphase-phase.diff" }, "minimap2/align": { "branch": "master", - "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b", - "installed_by": ["modules"] + "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", + "installed_by": ["modules"], + "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "minimap2/index": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "git_sha": "14980f759266eec42dac401fcafeb83d6c957b41", "installed_by": ["modules"] }, "modkit/pileup": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] + "git_sha": "3d81317a30d1016b533982d6b84df07713ae520a", + "installed_by": ["modules"], + "patch": "modules/nf-core/modkit/pileup/modkit-pileup.diff" }, "mosdepth": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49", + "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", + "installed_by": ["modules"] + }, + "nanoplot": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c", "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "git_sha": "f84336b7fa91a65aa61d215b8c109fbb8e4b4ac6", "installed_by": ["modules"] }, "samtools/cat": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "f9edc59be2fe25bb6fc73ca4dfc0d28246f2a2d6", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": ["bam_stats_samtools"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": ["bam_stats_samtools"] }, "samtools/index": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "f4eab7945952dc4934224309701a49913ea05ae6", - "installed_by": ["modules"] + "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", + "installed_by": ["bam_stats_samtools"], + "patch": "modules/nf-core/samtools/stats/samtools-stats.diff" }, "severus": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", "installed_by": ["modules"], "patch": "modules/nf-core/severus/severus.diff" }, "untar": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", "installed_by": ["modules"] }, "unzip": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", "installed_by": ["modules"] }, "wget": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] + }, + "whatshap/stats": { + "branch": "master", + "git_sha": "bfab71f4d68c1aaff09335a3433e7b2836918b2a", + "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_stats_samtools": { + "branch": "master", + "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", + "installed_by": ["subworkflows"] + }, + "deepvariant": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["subworkflows"], + "patch": "subworkflows/nf-core/deepvariant/deepvariant.diff" + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", @@ -140,12 +206,12 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "271e7fc14eb1320364416d996fb077421f3faed2", + "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["subworkflows"] } } diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index 85d793fa..6adf6934 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -1,14 +1,14 @@ process CLAIR3 { tag "$meta.id" - label 'process_very_high' + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0': - 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0' }" + (params.use_gpu ? 'docker://hkubal/clair3-gpu:v1.2.0' : 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0') : + (params.use_gpu ? 'docker.io/hkubal/clair3-gpu:v1.2.0' : 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0') }" input: - tuple val(meta), path(bam), path(bai), path(model), val(platform) + tuple val(meta) , path(bam), path(bai), path(model), val(platform) tuple val(meta2), path(reference) tuple val(meta3), path(index) @@ -17,43 +17,36 @@ process CLAIR3 { tuple val(meta), path("*merge_output.vcf.gz.tbi"), emit: tbi tuple val(meta), path("*phased_merge_output.vcf.gz"), emit: phased_vcf, optional: true tuple val(meta), path("*phased_merge_output.vcf.gz.tbi"), emit: phased_tbi, optional: true - path "versions.yml", emit: versions + tuple val("${task.process}"), val('clair3'), eval("run_clair3.sh --version |& sed '1!d ; s/Clair3 v//'"), topic: versions, emit: versions_clair3 when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + def use_gpu = task.ext.use_gpu as boolean + """ + ${use_gpu ? 'export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}' : ':'} run_clair3.sh \\ - --bam_fn=$bam \\ - --ref_fn=$reference \\ - --threads=$task.cpus \\ + --bam_fn=${bam} \\ + --ref_fn=${reference} \\ + --threads=${task.cpus} \\ --output=. \\ - --platform=$platform \\ - --model=$model \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') - END_VERSIONS + --platform=${platform} \\ + --model=${model} \\ + --sample_name=${prefix} \\ + ${use_gpu ? '--use_gpu --device=cuda:0' : ''} \\ + ${args} """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ echo "" | gzip > ${prefix}.phased_merge_output.vcf.gz touch ${prefix}.phased_merge_output.vcf.gz.tbi echo "" | gzip > ${prefix}.merge_output.vcf.gz touch ${prefix}.merge_output.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') - END_VERSIONS """ } diff --git a/modules/local/clair3/meta.yml b/modules/local/clair3/meta.yml index cecf7564..8f2e06f3 100644 --- a/modules/local/clair3/meta.yml +++ b/modules/local/clair3/meta.yml @@ -69,54 +69,72 @@ input: description: reference index file pattern: "*.fai" output: - - vcf: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - pattern: "*.{vcf,vcf.gz}" - - "*merge_output.vcf.gz": - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - pattern: "*.{vcf,vcf.gz}" - - tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - "*merge_output.vcf.gz.tbi": - type: file - description: index for vcf files - pattern: "*.{vcf.tbi,vcf.tbi.gz}" - - phased_vcf: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - "*phased_merge_output.vcf.gz": - type: file - description: phased vcf - pattern: "*.{vcf,vcf.gz}" - - phased_tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - "*phased_merge_output.vcf.gz.tbi": - type: file - description: index for vcf files - pattern: "*.{vcf.tbi,vcf.tbi.gz}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" + - "*merge_output.vcf.gz": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" + tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*merge_output.vcf.gz.tbi": + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" + phased_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*phased_merge_output.vcf.gz": + type: file + description: phased vcf + pattern: "*.{vcf,vcf.gz}" + phased_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*phased_merge_output.vcf.gz.tbi": + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" + versions_clair3: + - - ${task.process}: + type: string + description: The process the versions were collected from + - clair3: + type: string + description: The tool name + - "run_clair3.sh --version |& sed '1!d ; s/Clair3 v//'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - clair3: + type: string + description: The tool name + - "run_clair3.sh --version |& sed '1!d ; s/Clair3 v//'": + type: string + description: The command used to generate the version of the tool + authors: - "@robert-a-forsyth" maintainers: diff --git a/modules/local/clairs/main.nf b/modules/local/clairs/main.nf index 29837fd9..a7a310b5 100644 --- a/modules/local/clairs/main.nf +++ b/modules/local/clairs/main.nf @@ -3,8 +3,8 @@ process CLAIRS { label 'process_very_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/hkubal/clairs:v0.4.1': - 'docker.io/hkubal/clairs:v0.4.1' }" + 'docker.io/hkubal/clairs:v0.4.4': + 'docker.io/hkubal/clairs:v0.4.4' }" input: tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai), val(model) @@ -14,14 +14,14 @@ process CLAIRS { output: tuple val(meta), path("*.vcf.gz"), emit: vcfs tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi - path "versions.yml", emit: versions + tuple val("${task.process}"), val('clairs'), eval("/opt/bin/run_clairs --version |& sed '1!d ; s/run_clairs //'"), topic: versions, emit: versions_clairs when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' """ /opt/bin/run_clairs \ @@ -30,6 +30,7 @@ process CLAIRS { --ref_fn $reference \\ --threads $task.cpus \\ --platform $model \\ + --sample_name ${prefix} \\ --output_dir . \\ --output_prefix snvs \\ $args @@ -38,27 +39,14 @@ process CLAIRS { rm snv.vcf.gz rm snv.vcf.gz.tbi fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clairs: \$(/opt/bin/run_clairs --version |& sed '1!d ; s/run_clairs //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ echo "" | gzip > snvs.vcf.gz touch snvs.vcf.gz.tbi echo "" | gzip > indel.vcf.gz touch indel.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clairs: \$(/opt/bin/run_clairs --version |& sed '1!d ; s/run_clairs //') - END_VERSIONS """ } diff --git a/modules/local/clairs/meta.yml b/modules/local/clairs/meta.yml index 46c5b6ce..1c28ae45 100644 --- a/modules/local/clairs/meta.yml +++ b/modules/local/clairs/meta.yml @@ -1,20 +1,21 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "clairs" -description: write your description here +description: ClairS is a deep-learning method for long-read somatic small variant calling keywords: + - somatic - small - variant - SNV - indel + - long-read tools: - "clairs": - description: "" + description: "ClairS is a deep-learning method for long-read somatic small variant calling" homepage: "https://github.com/HKU-BAL/ClairS" documentation: "https://github.com/HKU-BAL/ClairS" tool_dev_url: "https://github.com/HKU-BAL/ClairS" - doi: "10.1101/2023.08.17.553778 " + doi: "10.1101/2023.08.17.553778" licence: ["BSD-3-clause"] - identifier: biotools:clairs input: - - meta: @@ -25,7 +26,7 @@ input: type: file description: | BAM file for tumor sample - pattern: "*.vcf.gz" + pattern: "*.bam" - tumor_bai: type: file description: | @@ -64,29 +65,47 @@ input: index file for the reference fasta file pattern: "*.fai" output: - - vcf: - - meta: - type: map - description: | - Groovy Map containing sample information - - "*.vcf.gz": - type: file - description: VCF file containing small somatic variants - pattern: "*.vcf.gz" - - tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - - "*.vcf.gz.tbi": - type: file - description: VCF index file for small somatic variants - pattern: "*.vcf.gz.tbi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + - "*.vcf.gz": + type: file + description: VCF file containing small somatic variants + pattern: "*.vcf.gz" + tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + - "*.vcf.gz.tbi": + type: file + description: VCF index file for small somatic variants + pattern: "*.vcf.gz.tbi" + versions_clairs: + - - ${task.process}: + type: string + description: The process the versions were collected from + - clairs: + type: string + description: The tool name + - "run_clairs --version |& sed '1!d ; s/run_clairs //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "run_clairs --version |& sed '1!d ; s/run_clairs //'": + type: string + description: The command used to generate the version of the tool + authors: - "@robert-a-forsyth" maintainers: diff --git a/modules/local/clairsto/main.nf b/modules/local/clairsto/main.nf index 130baebd..7147061e 100644 --- a/modules/local/clairsto/main.nf +++ b/modules/local/clairsto/main.nf @@ -4,24 +4,20 @@ process CLAIRSTO { label 'process_very_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/hkubal/clairs-to:v0.4.0': - 'docker.io/hkubal/clairs-to:v0.4.0' }" + 'docker.io/hkubal/clairs-to:v0.4.2': + 'docker.io/hkubal/clairs-to:v0.4.2' }" input: - tuple val(meta), path(tumor_bam), path(tumor_bai), val(model) + tuple val(meta), path(tumor_bam), path(tumor_bai), val(model), path(pon_vcfs), val(pon_flags) tuple val(meta2), path(reference) tuple val(meta3), path(index) - path(dbSNP) - path(colors) - path(onekgenomes) - path(gnomad) output: tuple val(meta), path("indel.vcf.gz"), emit: indel_vcf tuple val(meta), path("indel.vcf.gz.tbi"), emit: indel_tbi tuple val(meta), path("snv.vcf.gz"), emit: snv_vcf tuple val(meta), path("snv.vcf.gz.tbi"), emit: snv_tbi - path "versions.yml", emit: versions + tuple val("${task.process}"), val('clairsto'), eval("run_clairs_to --version |& sed '1!d ; s/run_clairs_to //'"), topic: versions, emit: versions_clairsto when: task.ext.when == null || task.ext.when @@ -30,11 +26,8 @@ process CLAIRSTO { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def conda_prefix = workflow.containerEngine == 'singularity' ? '--conda_prefix /opt/micromamba/envs/clairs-to' : '' - def gnomad = gnomad ?: 'gnomad.r2.1.af-ge-0.001.sites.vcf.gz' - def dbSNP = dbSNP ?: 'dbsnp.b138.non-somatic.sites.vcf.gz' - def onekgenomes = onekgenomes ?: '1000g-pon.sites.vcf.gz' - def colors = colors ?: 'colors-pon.sites.vcf.gz' - + def pon_string = pon_vcfs.join(',') + def flags_string = pon_flags.join(',') """ /opt/bin/run_clairs_to \ @@ -43,33 +36,19 @@ process CLAIRSTO { --platform $model \\ --threads $task.cpus \\ --output_dir . \\ - --panel_of_normals "${gnomad},${dbSNP},${onekgenomes},${colors}" \\ - --panel_of_normals_require_allele_matching 'True,True,False,False' \\ + --sample_name ${prefix} \\ + --panel_of_normals ${pon_string} \\ + --panel_of_normals_require_allele_matching ${flags_string} \\ $conda_prefix \\ $args \\ - - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clairsto: \$(/opt/bin/run_clairs_to --version |& sed '1!d ; s/run_clairs_to //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ mkdir -p output echo "" | gzip > snv.vcf.gz touch snv.vcf.gz.tbi echo "" | gzip > indel.vcf.gz touch indel.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clairsto: \$(/opt/bin/run_clairs_to --version |& sed '1!d ; s/run_clairs_to //') - END_VERSIONS """ } diff --git a/modules/local/clairsto/meta.yml b/modules/local/clairsto/meta.yml index f283f56a..39201688 100644 --- a/modules/local/clairsto/meta.yml +++ b/modules/local/clairsto/meta.yml @@ -1,21 +1,22 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "clairsto" -description: write your description here +description: ClairS-TO is a deep-learning method for long-read tumor-only somatic small variant calling keywords: + - somatic - small - variant - SNV - indel - tumor-only + - long-read tools: - - "clairs": - description: "" + - "clairsto": + description: "ClairS-TO is a deep-learning method for long-read tumor-only somatic small variant calling" homepage: "https://github.com/HKU-BAL/ClairS-TO" documentation: "https://github.com/HKU-BAL/ClairS-TO" tool_dev_url: "https://github.com/HKU-BAL/ClairS-TO" - doi: "10.1101/2023.08.17.553778 " + doi: "10.1038/s41467-025-64547-z" licence: ["BSD-3-clause"] - identifier: biotools:clairsto input: - - meta: @@ -26,7 +27,7 @@ input: type: file description: | BAM file for tumor sample - pattern: "*.vcf.gz" + pattern: "*.bam" - tumor_bai: type: file description: | @@ -35,70 +36,110 @@ input: - model: type: string description: | - Name for ClairS model + Name for ClairS-TO model (platform: ont or hifi) - - meta2: type: map description: | - Groovy Map containing sample information + Groovy Map containing reference information - reference: type: file description: | - A reference fasta file - pattern: "*.fasta" + Reference genome fasta file + pattern: "*.{fasta,fa,fna}" - - meta3: type: map description: | - Groovy Map containing sample information + Groovy Map containing reference index information - index: type: file description: | - index file for the reference fasta file + Reference genome fasta index file pattern: "*.fai" + - - dbSNP: + type: file + description: | + dbSNP VCF file for panel of normals + pattern: "*.vcf.gz" + - - colors: + type: file + description: | + COLORS panel of normals VCF file + pattern: "*.vcf.gz" + - - onekgenomes: + type: file + description: | + 1000 Genomes panel of normals VCF file + pattern: "*.vcf.gz" + - - gnomad: + type: file + description: | + gnomAD panel of normals VCF file + pattern: "*.vcf.gz" + output: - - indel_vcf: - - meta: - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz" - - "*/indel.vcf.gz": - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz" - - indel_tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz.tbi" - - "*/indel.vcf.gz.tbi": - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz.tbi" - - snv_vcf: - - meta: - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz" - - "*/snv.vcf.gz": - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz" - - snv_tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz.tbi" - - "*/snv.vcf.gz.tbi": - type: map - description: | - Groovy Map containing sample information - pattern: "*.vcf.gz.tbi" + indel_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "indel.vcf.gz": + type: file + description: Indel somatic variants VCF file + pattern: "indel.vcf.gz" + indel_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "indel.vcf.gz.tbi": + type: file + description: Indel VCF index file + pattern: "indel.vcf.gz.tbi" + snv_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "snv.vcf.gz": + type: file + description: SNV somatic variants VCF file + pattern: "snv.vcf.gz" + snv_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "snv.vcf.gz.tbi": + type: file + description: SNV VCF index file + pattern: "*.vcf.gz.tbi" + versions_clairsto: + - - ${task.process}: + type: string + description: The process the versions were collected from + - clairsto: + type: string + description: The tool name + - "/opt/bin/run_clairs_to --version |& sed '1!d ; s/run_clairs_to //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - clairsto: + type: string + description: The tool name + - "/opt/bin/run_clairs_to --version |& sed '1!d ; s/run_clairs_to //'": + type: string + description: The command used to generate the version of the tool + authors: - "@robert-a-forsyth" maintainers: diff --git a/modules/local/cramino/main.nf b/modules/local/cramino/main.nf index ce64c8e7..e93d100f 100644 --- a/modules/local/cramino/main.nf +++ b/modules/local/cramino/main.nf @@ -1,18 +1,19 @@ process CRAMINO { tag "$meta.id" - label 'process_single' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cramino:1.0.0--h3dc2dae_0': - 'biocontainers/cramino:1.0.0--h3dc2dae_0' }" + 'https://depot.galaxyproject.org/singularity/cramino:1.3.0--h3dc2dae_0': + 'biocontainers/cramino:1.3.0--h3dc2dae_0' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.txt"), emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path("*.arrow"), emit: arrow + tuple val("${task.process}"), val('cramino'), eval("cramino --version |& sed '1!d ; s/cramino //'"), topic: versions, emit: versions_cramino when: task.ext.when == null || task.ext.when @@ -21,33 +22,13 @@ process CRAMINO { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - - - - """ - cramino $args $bam > ${prefix}_cramino.txt - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cramino: \$(cramino --version |& sed '1!d ; s/cramino //') - END_VERSIONS + cramino $args $bam --arrow ${prefix}.arrow > ${prefix}_cramino.txt """ - - - - stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_cramino.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cramino: \$(cramino --version |& sed '1!d ; s/cramino //') - END_VERSIONS """ } diff --git a/modules/local/cramino/meta.yml b/modules/local/cramino/meta.yml index b63a18af..93fbea6c 100644 --- a/modules/local/cramino/meta.yml +++ b/modules/local/cramino/meta.yml @@ -2,32 +2,29 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "cramino" ## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: A tool for quick quality assessment of cram and bam files, intended for long read sequencing. keywords: - - sort - - example - - genomics + - quality + - assessment + - cram + - bam + - long-read tools: - "cramino": - ## TODO nf-core: Add a description and other details for the software below description: "A tool for very fast quality assessment of long read cram/bam files." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + homepage: "https://github.com/wdecoster/cramino" + documentation: "https://github.com/wdecoster/cramino" + tool_dev_url: "https://github.com/wdecoster/cramino" + doi: "10.1093/bioinformatics/btad311" licence: ["MIT"] - identifier: + identifier: biotools:cramino -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -37,30 +34,40 @@ input: - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + txt: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.txt": + type: file + description: Quality assessment text file + pattern: "*.txt" + versions_cramino: + - - ${task.process}: + type: string + description: The process the versions were collected from + - cramino: + type: string + description: The tool name + - "cramino --version |& sed '1!d ; s/cramino //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - cramino: + type: string + description: The tool name + - "cramino --version |& sed '1!d ; s/cramino //'": + type: string + description: The command used to generate the version of the tool authors: - "@alexanRNA" diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf new file mode 100644 index 00000000..d5cc0ab9 --- /dev/null +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -0,0 +1,50 @@ +process DEEPSOMATIC_CALLVARIANTS { + tag "$meta.id" + label "${params.use_gpu ? 'process_gpu_high' : 'process_very_high'}" + label "${params.use_gpu ? '' : 'process_long'}" + + //Conda is not supported at the moment + container params.use_gpu ? "docker.io/google/deepsomatic:1.7.0-gpu" : "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz") , emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}@.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/local/deepsomatic/makeexamples/main.nf b/modules/local/deepsomatic/makeexamples/main.nf new file mode 100644 index 00000000..dba74c16 --- /dev/null +++ b/modules/local/deepsomatic/makeexamples/main.nf @@ -0,0 +1,103 @@ +process DEEPSOMATIC_MAKEEXAMPLES { + tag "$meta.id" + label 'process_very_high' + label 'process_long' + + //Conda is not supported at the moment + container params.use_gpu ? "docker.io/google/deepsomatic:1.7.0-gpu" : "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(normal_input), path(normal_index), path(tumor_input), path(tumor_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(ds_pon) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}") , emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz") , emit: gvcf, optional:true + tuple val(meta), path("${prefix}_call_variant_outputs.tfrecord-*-of-*.gz", arity: "0..*") , emit: small_model_calls + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def normalReadsArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--reads_normal \"${normal_input}\"" : "" + def normalSampleArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--sample_name_normal \"${prefix}_normal\"" : "" + def gvcf_arg = params.generate_gvcf ? "--gvcf \"./${prefix}.gvcf.tfrecord@${task.cpus}.gz\"" : "" + def isTumorOnly = !(meta.paired_data) + + // Build list of PON VCF file paths (excluding .tbi index files) + def ponFiles = [] + if (ds_pon?.toString() && ds_pon.toString() != '[]') { + ponFiles = (ds_pon instanceof List) + ? ds_pon.findAll { f -> !f.toString().endsWith('.tbi') } + : [ds_pon] + } + def nPonFiles = ponFiles.size() + def ponArrayLiteral = ponFiles.collect { f -> "${f}" }.join(' ') + + // Shell block to prepare the PON VCF (merge if multiple, copy if single, skip if none) + // Runs before make_examples_somatic so the result is available as merged_pon.vcf.gz + def ponPrepareBlock = (isTumorOnly && nPonFiles > 0) ? """ + # Prepare PON VCF for --population_vcfs: merge multiple databases into one sorted+indexed file, + # or copy a single VCF. DeepSomatic requires no chromosome overlap across population VCFs. + _PON_VCFS=( ${ponArrayLiteral} ) + if [ \${#_PON_VCFS[@]} -gt 1 ]; then + gzip -dc "\${_PON_VCFS[0]}" | grep '^##fileformat' > _pon_hdr.txt + for vcf in "\${_PON_VCFS[@]}"; do gzip -dc "\$vcf" | grep '^##' | grep -v '^##fileformat'; done | sort -T . -u >> _pon_hdr.txt + gzip -dc "\${_PON_VCFS[0]}" | grep '^#CHROM' >> _pon_hdr.txt + for vcf in "\${_PON_VCFS[@]}"; do gzip -dc "\$vcf" | grep -v '^#'; done \\ + | sort -T . -t\$'\\t' -k1,1V -k2,2n | uniq > _pon_data.txt + cat _pon_hdr.txt _pon_data.txt | bgzip -c > merged_pon.vcf.gz + rm _pon_hdr.txt _pon_data.txt + else + cp "\${_PON_VCFS[0]}" merged_pon.vcf.gz + fi + tabix -p vcf merged_pon.vcf.gz + """ : "" + + // --population_vcfs argument for make_examples_somatic + def ponArg = "" + if (isTumorOnly) { + ponArg = nPonFiles > 0 + ? '--population_vcfs "merged_pon.vcf.gz"' + : '--population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz "' + } + // In paired mode ponArg stays "" (no --population_vcfs, matching prior behaviour) + + """ + ${ponPrepareBlock} + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples_somatic \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads_tumor "${tumor_input}" \\ + ${normalReadsArg} \\ + --sample_name_tumor "${prefix}" \\ + ${normalSampleArg} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + ${gvcf_arg} \\ + ${ponArg} \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.gz + done + """ +} diff --git a/modules/local/deepsomatic/postprocessvariants/main.nf b/modules/local/deepsomatic/postprocessvariants/main.nf new file mode 100644 index 00000000..b2b0c4ba --- /dev/null +++ b/modules/local/deepsomatic/postprocessvariants/main.nf @@ -0,0 +1,132 @@ +process DEEPSOMATIC_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_high' + label 'process_short' + + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), val(small_model_calls), val(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(pon_vcf) + + output: + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}"), emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz"), emit: gvcf, optional: true + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}"), emit: gvcf_index, optional: true + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def isTumorOnly = !(meta.paired_data) + + + def gvcf_arg = "" + if (gvcf_tfrecords) { + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + gvcf_arg = "--nonvariant_site_tfrecord_path \"${gvcf_tfrecords_logical_name}\" --gvcf_outfile \"${prefix}.g.vcf.gz\"" + } + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls && small_model_calls.size() > 0) { + def small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + def small_model_tfrecord_name = small_model_matcher[0][1] + def small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + def small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + // Build list of PON VCF file paths (excluding .tbi index files) + def ponFiles = [] + if (pon_vcf?.toString() && pon_vcf.toString() != '[]') { + ponFiles = (pon_vcf instanceof List) + ? pon_vcf.findAll { f -> !f.toString().endsWith('.tbi') } + : [pon_vcf] + } + def nPonFiles = ponFiles.size() + def ponArrayLiteral = ponFiles.collect { f -> "${f}" }.join(' ') + + // Shell block to prepare the PON VCF for --pon_filtering (merge if multiple, copy if single) + def ponPrepareBlock = (isTumorOnly && nPonFiles > 0) ? """ + # Prepare PON VCF for --pon_filtering: merge multiple databases into one sorted+indexed file, + # or copy a single VCF. DeepSomatic requires a single VCF for --pon_filtering. + _PON_VCFS=( ${ponArrayLiteral} ) + if [ \${#_PON_VCFS[@]} -gt 1 ]; then + gzip -dc "\${_PON_VCFS[0]}" | grep '^##fileformat' > _pon_hdr.txt + for vcf in "\${_PON_VCFS[@]}"; do gzip -dc "\$vcf" | grep '^##' | grep -v '^##fileformat'; done | sort -T . -u >> _pon_hdr.txt + gzip -dc "\${_PON_VCFS[0]}" | grep '^#CHROM' >> _pon_hdr.txt + for vcf in "\${_PON_VCFS[@]}"; do gzip -dc "\$vcf" | grep -v '^#'; done \\ + | sort -T . -t\$'\\t' -k1,1V -k2,2n | uniq > _pon_data.txt + cat _pon_hdr.txt _pon_data.txt | bgzip -c > merged_pon.vcf.gz + rm _pon_hdr.txt _pon_data.txt + else + cp "\${_PON_VCFS[0]}" merged_pon.vcf.gz + fi + tabix -p vcf merged_pon.vcf.gz + """ : "" + + // --pon_filtering argument for postprocess_variants (tumor-only only) + def ponFilterArg = "" + if (isTumorOnly) { + ponFilterArg = nPonFiles > 0 + ? '--pon_filtering "merged_pon.vcf.gz"' + : '--pon_filtering "/opt/models/deepsomatic/pons/PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz"' + } + // Paired samples: ponFilterArg stays "" (no PON filtering) + + """ + ${ponPrepareBlock} + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --process_somatic=true \\ + ${regions} \\ + ${small_model_arg} \\ + ${gvcf_arg} \\ + ${ponFilterArg} \\ + --cpus ${task.cpus} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/local/fibertoolsrs/fire/main.nf b/modules/local/fibertoolsrs/fire/main.nf index 08d2dbaf..eed76d97 100644 --- a/modules/local/fibertoolsrs/fire/main.nf +++ b/modules/local/fibertoolsrs/fire/main.nf @@ -1,48 +1,19 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FIBERTOOLSRS_FIRE { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.6.2--h3b373d1_0': - 'biocontainers/fibertools-rs:0.6.2--h3b373d1_0' }" + 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.8.1--h3b373d1_0': + 'biocontainers/fibertools-rs:0.8.1--h3b373d1_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('fibertoolsrs'), eval("ft --version |& sed 's/^fibertools-rs v//; s/\tgit-details.*//'"), topic: versions, emit: versions_fibertoolsrs when: task.ext.when == null || task.ext.when @@ -50,15 +21,6 @@ process FIBERTOOLSRS_FIRE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ ft \\ fire \\ @@ -66,26 +28,11 @@ process FIBERTOOLSRS_FIRE { -t $task.cpus \\ $bam \\ ${prefix}_fire.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch ${prefix}_fire.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ } diff --git a/modules/local/fibertoolsrs/fire/meta.yml b/modules/local/fibertoolsrs/fire/meta.yml index 63f0a8b0..708c654e 100644 --- a/modules/local/fibertoolsrs/fire/meta.yml +++ b/modules/local/fibertoolsrs/fire/meta.yml @@ -1,33 +1,28 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "fibertoolsrs_fire" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Fibertools-rs FIRE (Fiber-seq Inferred Regulatory Elements) predicts regulatory elements from fiber-seq data keywords: - - sort - - example - - genomics + - fiberseq + - regulatory-elements + - FIRE + - long-read + - epigenetics tools: - "fibertoolsrs": - ## TODO nf-core: Add a description and other details for the software below - description: "Mitchell Vollger's rust tools for fiberseq data." + description: "DNA-m6A calling and integrated long-read epigenetic and genetic analysis with fibertools" homepage: "https://fiberseq.github.io/fibertools/fibertools.html" documentation: "https://fiberseq.github.io/fibertools/fibertools.html" tool_dev_url: "https://github.com/fiberseq/fibertools-rs" - doi: "" + doi: "10.1101/gr.279095.124" licence: ["MIT"] - identifier: -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -37,30 +32,43 @@ input: - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + versions_fibertoolsrs: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool authors: - "@MariosEft97" diff --git a/modules/local/fibertoolsrs/nucleosomes/main.nf b/modules/local/fibertoolsrs/nucleosomes/main.nf index 6722aab2..db42d106 100644 --- a/modules/local/fibertoolsrs/nucleosomes/main.nf +++ b/modules/local/fibertoolsrs/nucleosomes/main.nf @@ -1,48 +1,19 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FIBERTOOLSRS_NUCLEOSOMES { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.6.2--h3b373d1_0': - 'biocontainers/fibertools-rs:0.6.2--h3b373d1_0' }" + 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.8.1--h3b373d1_0': + 'biocontainers/fibertools-rs:0.8.1--h3b373d1_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('fibertoolsrs'), eval("ft --version |& sed 's/^fibertools-rs v//; s/\tgit-details.*//'"), topic: versions, emit: versions_fibertoolsrs when: task.ext.when == null || task.ext.when @@ -50,15 +21,7 @@ process FIBERTOOLSRS_NUCLEOSOMES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ ft \\ add-nucleosomes \\ @@ -66,26 +29,11 @@ process FIBERTOOLSRS_NUCLEOSOMES { -t $task.cpus \\ $bam \\ ${prefix}_nuc.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch ${prefix}_nuc.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ } diff --git a/modules/local/fibertoolsrs/nucleosomes/meta.yml b/modules/local/fibertoolsrs/nucleosomes/meta.yml index 1863d9f1..120eea88 100644 --- a/modules/local/fibertoolsrs/nucleosomes/meta.yml +++ b/modules/local/fibertoolsrs/nucleosomes/meta.yml @@ -1,33 +1,29 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "fibertoolssrs_nucleosomes" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +name: "fibertoolsrs_nucleosomes" +description: Fibertools-rs add-nucleosomes adds nucleosome positions to fiber-seq BAM files keywords: - - sort - - example - - genomics + - fiberseq + - nucleosomes + - long-read + - epigenetics + - chromatin tools: - - "fibertoolssrs": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: - identifier: + - "fibertoolsrs": + description: "DNA-m6A calling and integrated long-read epigenetic and genetic analysis with fibertools" + homepage: "https://fiberseq.github.io/fibertools/fibertools.html" + documentation: "https://fiberseq.github.io/fibertools/fibertools.html" + tool_dev_url: "https://github.com/fiberseq/fibertools-rs" + doi: "10.1101/gr.279095.124" + licence: ["MIT"] -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -36,31 +32,43 @@ input: - edam: "http://edamontology.org/format_25722" - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" - -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + versions_fibertoolsrs: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool authors: - "@robert-a-forsyth" diff --git a/modules/local/fibertoolsrs/predictm6a/main.nf b/modules/local/fibertoolsrs/predictm6a/main.nf index e91572b9..bb355bfe 100644 --- a/modules/local/fibertoolsrs/predictm6a/main.nf +++ b/modules/local/fibertoolsrs/predictm6a/main.nf @@ -1,48 +1,19 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FIBERTOOLSRS_PREDICTM6A { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.6.2--h3b373d1_0': - 'biocontainers/fibertools-rs:0.6.2--h3b373d1_0' }" + 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.8.1--h3b373d1_0': + 'biocontainers/fibertools-rs:0.8.1--h3b373d1_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('fibertoolsrs'), eval("ft --version |& sed 's/^fibertools-rs v//; s/\tgit-details.*//'"), topic: versions, emit: versions_fibertoolsrs when: task.ext.when == null || task.ext.when @@ -50,15 +21,7 @@ process FIBERTOOLSRS_PREDICTM6A { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ ft \\ predict-m6a \\ @@ -66,26 +29,12 @@ process FIBERTOOLSRS_PREDICTM6A { -t $task.cpus \\ $bam \\ ${prefix}_m6a.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ touch ${prefix}_m6a.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ } diff --git a/modules/local/fibertoolsrs/predictm6a/meta.yml b/modules/local/fibertoolsrs/predictm6a/meta.yml index f9b25d64..dfb5b2e1 100644 --- a/modules/local/fibertoolsrs/predictm6a/meta.yml +++ b/modules/local/fibertoolsrs/predictm6a/meta.yml @@ -1,33 +1,29 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "fibertoolsrs_predictm6a" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Fibertools-rs predict-m6a predicts m6A modifications from fiber-seq data keywords: - - sort - - example - - genomics + - fiberseq + - m6A + - methylation + - long-read + - epigenetics + - DNA-modification tools: - "fibertoolsrs": - ## TODO nf-core: Add a description and other details for the software below - description: "Mitchell Vollger's rust tools for fiberseq data." + description: "DNA-m6A calling and integrated long-read epigenetic and genetic analysis with fibertools" homepage: "https://fiberseq.github.io/fibertools/fibertools.html" documentation: "https://fiberseq.github.io/fibertools/fibertools.html" tool_dev_url: "https://github.com/fiberseq/fibertools-rs" - doi: "" + doi: "10.1101/gr.279095.124" licence: ["MIT"] - identifier: -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -37,30 +33,43 @@ input: - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + versions_fibertoolsrs: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool authors: - "@MariosEft97" diff --git a/modules/local/fibertoolsrs/qc/main.nf b/modules/local/fibertoolsrs/qc/main.nf index b99424d6..db89d71a 100644 --- a/modules/local/fibertoolsrs/qc/main.nf +++ b/modules/local/fibertoolsrs/qc/main.nf @@ -1,47 +1,18 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FIBERTOOLSRS_QC { tag "$meta.id" label 'process_very_high' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.6.2--h3b373d1_0': - 'biocontainers/fibertools-rs:0.6.2--h3b373d1_0' }" + 'https://depot.galaxyproject.org/singularity/fibertools-rs:0.8.1--h3b373d1_0': + 'biocontainers/fibertools-rs:0.8.1--h3b373d1_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.txt"), emit: qc_txt - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('fibertoolsrs'), eval("ft --version |& sed 's/^fibertools-rs v//; s/\tgit-details.*//'"), topic: versions, emit: versions_fibertoolsrs when: task.ext.when == null || task.ext.when @@ -49,15 +20,6 @@ process FIBERTOOLSRS_QC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ ft \\ qc \\ @@ -65,26 +27,12 @@ process FIBERTOOLSRS_QC { -t $task.cpus \\ $bam \\ ${prefix}_qc.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ touch ${prefix}_qc.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fibertoolsrs: \$(ft --version |& sed '1!d ; s/ft //') - END_VERSIONS """ } diff --git a/modules/local/fibertoolsrs/qc/meta.yml b/modules/local/fibertoolsrs/qc/meta.yml index 3a15bee4..ffce7d2a 100644 --- a/modules/local/fibertoolsrs/qc/meta.yml +++ b/modules/local/fibertoolsrs/qc/meta.yml @@ -1,33 +1,28 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "fibertoolsrs_qc" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Fibertools-rs qc generates quality control statistics for fiber-seq BAM files keywords: - - sort - - example - - genomics + - fiberseq + - qc + - quality-control + - long-read + - epigenetics tools: - "fibertoolsrs": - ## TODO nf-core: Add a description and other details for the software below - description: "Mitchell Vollger's rust tools for fiberseq data." + description: "DNA-m6A calling and integrated long-read epigenetic and genetic analysis with fibertools" homepage: "https://fiberseq.github.io/fibertools/fibertools.html" documentation: "https://fiberseq.github.io/fibertools/fibertools.html" tool_dev_url: "https://github.com/fiberseq/fibertools-rs" - doi: "" + doi: "10.1101/gr.279095.124" licence: ["MIT"] - identifier: -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -37,30 +32,39 @@ input: - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + qc_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.txt": + type: file + description: QC text file + pattern: "*.txt" + versions_fibertoolsrs: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fibertoolsrs: + type: string + description: The tool name + - "ft --version |& sed '1!d ; s/ft //'": + type: string + description: The command used to generate the version of the tool authors: - "@MariosEft97" diff --git a/modules/local/longphase/modcall/environment.yml b/modules/local/longphase/modcall/environment.yml new file mode 100644 index 00000000..f436bdae --- /dev/null +++ b/modules/local/longphase/modcall/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf new file mode 100644 index 00000000..9417ad54 --- /dev/null +++ b/modules/local/longphase/modcall/main.nf @@ -0,0 +1,51 @@ +process LONGPHASE_MODCALL { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.vcf") , emit: mod_vcf + tuple val(meta), path("*.log") , emit: log , optional: true + tuple val("${task.process}"), val('longphase'), eval("longphase --version | head -n 1 | sed 's/Version: //'"), topic: versions, emit: versions_longphase + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + longphase \\ + modcall \\ + $args \\ + --threads 1 \\ + -o ${prefix} \\ + --reference ${fasta} \\ + -b ${bam} \\ + --out-prefix ${prefix} + + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def log = args.contains('--log') ? "touch ${prefix}.log" : '' + """ + touch ${prefix}.vcf + ${log} + """ +} diff --git a/modules/local/metaextract/main.nf b/modules/local/metaextract/main.nf index 7f032250..fb3c474d 100644 --- a/modules/local/metaextract/main.nf +++ b/modules/local/metaextract/main.nf @@ -12,7 +12,7 @@ process METAEXTRACT { output: tuple val(meta), env(basecall_model), env(kinetics) , emit: meta_ext - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when @@ -20,30 +20,22 @@ process METAEXTRACT { script: def args = task.ext.args ?: '' def ont = meta.platform == 'ont' + basecall_model = '' + kinetics = '' """ - basecall_model="" - kinetics="" + export basecall_model="${basecall_model}" + export kinetics="${kinetics}" if [ $ont = 'true' ]; then basecall_model=\$(samtools view -H "${bam}" ${args} | awk -F'basecall_model=' '/basecall_model=/ {print \$2; exit}' | awk '{print \$1}' | tr -d '[:space:]') else kinetics=\$(samtools view -H ${bam} | awk '/--keep-kinetics/ {found=1} END {print (found ? "true" : "false")}') basecall_model="hifi_revio" fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/local/metaextract/meta.yml b/modules/local/metaextract/meta.yml index 542861a6..7850791b 100644 --- a/modules/local/metaextract/meta.yml +++ b/modules/local/metaextract/meta.yml @@ -1,33 +1,30 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "metaextract" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Extract basecall model and kinetics metadata from BAM file headers keywords: - - sort - - example - - genomics + - metadata + - bam + - header + - basecall + - kinetics + - long-read tools: - - "metaextract": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: - identifier: + - "samtools": + description: "Tools for manipulating next-generation sequencing data" + homepage: "http://www.htslib.org/" + documentation: "http://www.htslib.org/doc/samtools.html" + tool_dev_url: "https://github.com/samtools/samtools" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:samtools -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - bam: type: file description: Sorted BAM/CRAM/SAM file @@ -37,30 +34,41 @@ input: - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + meta_ext: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - env(basecall_model): + type: string + description: Basecall model extracted from BAM header + - env(kinetics): + type: string + description: Kinetics information extracted from BAM header + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool authors: - "@robert-a-forsyth" diff --git a/modules/local/vcfsplit/main.nf b/modules/local/vcfsplit/main.nf index ec69277d..f6156d34 100644 --- a/modules/local/vcfsplit/main.nf +++ b/modules/local/vcfsplit/main.nf @@ -16,14 +16,12 @@ process VCFSPLIT { tuple val(meta), path("*germline.vcf.gz") , emit: germline_vcf tuple val(meta), path("*germline.vcf.gz.tbi") , emit: germline_tbi - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version |& sed '1!d ; s/bcftools //'"), topic: versions, emit: versions_bcftools when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" """ bcftools view -i 'FILTER="PASS"' $indel_vcf | bgzip -c > indels_pass.vcf.gz @@ -33,8 +31,8 @@ process VCFSPLIT { bcftools concat -a -Oz -o somatic.vcf.gz indels_pass.vcf.gz snv_pass.vcf.gz tabix -p vcf somatic.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $indel_vcf | bgzip -c > indels_filtered.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $snv_vcf | bgzip -c > snv_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $indel_vcf | bgzip -c > indels_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $snv_vcf | bgzip -c > snv_filtered.vcf.gz tabix -p vcf indels_filtered.vcf.gz tabix -p vcf snv_filtered.vcf.gz bcftools concat -a -Oz -o germline_tmp.vcf.gz indels_filtered.vcf.gz snv_filtered.vcf.gz @@ -47,24 +45,13 @@ process VCFSPLIT { # Cleanup intermediate files rm indels_pass.vcf.gz snv_pass.vcf.gz rm indels_pass.vcf.gz.tbi snv_pass.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vcfsplit: \$(bcftools --version |& sed '1!d ; s/bcftools //') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" """ echo "" | gzip > somatic.vcf.gz echo "" | gzip > germline.vcf.gz echo "" | gzip > somatic.vcf.gz.tbi echo "" | gzip > germline.vcf.gz.tbi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vcfsplit: \$(bcftools --version |& sed '1!d ; s/bcftools //') - END_VERSIONS """ } diff --git a/modules/local/vcfsplit/meta.yml b/modules/local/vcfsplit/meta.yml index 8f115005..464141a2 100644 --- a/modules/local/vcfsplit/meta.yml +++ b/modules/local/vcfsplit/meta.yml @@ -1,66 +1,102 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "vcfsplit" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Split somatic VCF files into somatic and germline variants based on FILTER field keywords: - - sort - - example - - genomics + - vcf + - somatic + - germline + - variant-filtering + - split tools: - - "vcfsplit": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: - identifier: + - "bcftools": + description: "Tools for variant calling and manipulating VCFs and BCFs" + homepage: "http://samtools.github.io/bcftools/bcftools.html" + documentation: "http://www.htslib.org/doc/bcftools.html" + tool_dev_url: "https://github.com/samtools/bcftools" + doi: "10.1093/gigascience/giab008" + licence: ["MIT"] + identifier: biotools:bcftools -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + e.g. `[ id:'sample1' ]` + - snv_vcf: + type: file + description: SNV VCF file from somatic caller + pattern: "*.vcf.gz" + - indel_vcf: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + description: Indel VCF file from somatic caller + pattern: "*.vcf.gz" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + somatic_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*somatic.vcf.gz": + type: file + description: Somatic variants VCF file + pattern: "*somatic.vcf.gz" + somatic_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*somatic.vcf.gz.tbi": + type: file + description: Somatic variants VCF index file + pattern: "*somatic.vcf.gz.tbi" + germline_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*germline.vcf.gz": + type: file + description: Germline variants VCF file + pattern: "*germline.vcf.gz" + germline_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*germline.vcf.gz.tbi": + type: file + description: Germline variants VCF index file + pattern: "*germline.vcf.gz.tbi" + + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version |& sed '1!d ; s/bcftools //'": + type: string + description: The command used to generate the version of the tool - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version |& sed '1!d ; s/bcftools //'": + type: string + description: The command used to generate the version of the tool authors: - "@robert-a-forsyth" diff --git a/modules/local/wakhan/environment.yml b/modules/local/wakhan/environment.yml index a1450d1b..e26b3ee4 100644 --- a/modules/local/wakhan/environment.yml +++ b/modules/local/wakhan/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::wakhan=0.1.1" + - "bioconda::wakhan=0.4.2" diff --git a/modules/local/wakhan/main.nf b/modules/local/wakhan/main.nf index 8bd5d5b9..a3e05037 100644 --- a/modules/local/wakhan/main.nf +++ b/modules/local/wakhan/main.nf @@ -1,32 +1,16 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process WAKHAN { tag "$meta.id" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/wakhan:0.2.0--pyhdfd78af_1': - 'biocontainers/wakhan:0.2.0--pyhdfd78af_1' }" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://robertaforsyth/wakhan:0.4.2_iss58': + 'robertaforsyth/wakhan:0.4.2_iss58' }" input: tuple val(meta), path(tumor_input), path(tumor_index), path(normal_input), path(normal_index), path(vcf), path(breakpoints) tuple val(meta2), path(reference) + path(centromere_bed) output: tuple val(meta), path("*/*_genes_genome.html") , emit: genes_genome_html @@ -42,17 +26,18 @@ process WAKHAN { tuple val(meta), path("*/vcf_output/*_wakhan_cna_*.vcf") , emit: vcf_files tuple val(meta), path("*_heatmap_ploidy_purity.html") , emit: heatmap_html tuple val(meta), path("*_heatmap_ploidy_purity.html.pdf") , emit: heatmap_pdf - tuple val(meta), path("*_optimized_peak.html") , emit: optimized_peak_html tuple val(meta), path("coverage_data/*.csv") , emit: coverage_csv + tuple val(meta), path("coverage_data/*.png") , emit: coverage_png tuple val(meta), path("coverage_plots/*.html") , emit: coverage_plots_html tuple val(meta), path("coverage_plots/*.pdf") , emit: coverage_plots_pdf tuple val(meta), path("phasing_output/*.html") , emit: phasing_html tuple val(meta), path("phasing_output/*.pdf") , emit: phasing_pdf - tuple val(meta), path("phasing_output/*.rephased.vcf.gz") , emit: rephased_vcf - tuple val(meta), path("phasing_output/*.rephased.vcf.gz.csi") , emit: rephased_vcf_index + tuple val(meta), path("phasing_output/*rephased.vcf.gz") , emit: rephased_vcf + tuple val(meta), path("phasing_output/*rephased.vcf.gz.csi") , emit: rephased_vcf_index tuple val(meta), path("snps_loh_plots/*_genome_snps_ratio_loh.html") , emit: snps_loh_plot, optional: true tuple val(meta), path("solutions_ranks.tsv") , emit: solutions_ranks - path "versions.yml" , emit: versions + // WARN: Manually update version information as tool does not provide on CLI + tuple val("${task.process}"), val('wakhan'), val("0.4.2"), topic: versions, emit: versions_wakhan when: task.ext.when == null || task.ext.when @@ -61,40 +46,26 @@ process WAKHAN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def phased_vcf = normal_input ? "--normal-phased-vcf $vcf" : "--tumor-phased-vcf $vcf" - // WARN: Version information not provided by tool on CLI. Please update this string when upgrading BLAZE code - def VERSION = "0.2.0" + def centromere = centromere_bed ? "--centromere \$PWD/${centromere_bed}" : "" + """ wakhan \\ + all \\ --target-bam ${tumor_input} \\ --breakpoints ${breakpoints} \\ --reference ${reference} \\ --genome-name ${prefix} \\ --out-dir-plots . \\ ${phased_vcf} \\ + ${centromere} \\ ${args} \\ --threads ${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wakhan: $VERSION - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "0.2.0" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wakhan: $VERSION - END_VERSIONS """ } diff --git a/modules/local/wakhan/meta.yml b/modules/local/wakhan/meta.yml index 6695af4a..ee0a90c1 100644 --- a/modules/local/wakhan/meta.yml +++ b/modules/local/wakhan/meta.yml @@ -1,66 +1,315 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "wakhan" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: A tool to analyze haplotype-specific chromosome-scale somatic copy number aberrations and aneuploidy using long reads keywords: - - sort - - example - - genomics + - copy-number + - aneuploidy + - somatic + - haplotype-specific + - long-read + - structural-variation tools: - "wakhan": - ## TODO nf-core: Add a description and other details for the software below - description: "A tool to analyze haplotype-specific chromosome-scale somatic copy number aberrations and aneuploidy using long reads" - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + description: "A tool to analyze haplotype-specific chromosome-scale somatic copy number aberrations and aneuploidy using long reads (Oxford Nanopore, PacBio)" + homepage: "https://github.com/KolmogorovLab/Wakhan" + documentation: "https://github.com/KolmogorovLab/Wakhan" + tool_dev_url: "https://github.com/KolmogorovLab/Wakhan" + doi: "10.64898/2025.12.11.25342098v1" licence: ["MIT"] - identifier: -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + e.g. `[ id:'sample1' ]` + - tumor_input: + type: file + description: Tumor BAM file + pattern: "*.bam" + - tumor_index: + type: file + description: Tumor BAM index file + pattern: "*.bai" + - normal_input: + type: file + description: Normal BAM file (optional) + pattern: "*.bam" + - normal_index: + type: file + description: Normal BAM index file (optional) + pattern: "*.bai" + - vcf: + type: file + description: Phased VCF file + pattern: "*.vcf.gz" + - breakpoints: + type: file + description: Breakpoints file from structural variant caller + pattern: "*.{bed,vcf}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + - reference: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa,fna}" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + genes_genome_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genes_genome.html": + type: file + description: Genes genome HTML plot + pattern: "*/*_genes_genome.html" + genes_genome_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genes_genome.pdf": + type: file + description: Genes genome PDF plot + pattern: "*/*_genes_genome.pdf" + breakpoints_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_breakpoints.html": + type: file + description: Genome copy numbers with breakpoints HTML plot + pattern: "*/*_genome_copynumbers_breakpoints.html" + breakpoints_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_breakpoints.pdf": + type: file + description: Genome copy numbers with breakpoints PDF plot + pattern: "*/*_genome_copynumbers_breakpoints.pdf" + breakpoints_subclonal_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_breakpoints_subclonal.html": + type: file + description: Genome copy numbers with subclonal breakpoints HTML plot + pattern: "*/*_genome_copynumbers_breakpoints_subclonal.html" + breakpoints_subclonal_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_breakpoints_subclonal.pdf": + type: file + description: Genome copy numbers with subclonal breakpoints PDF plot + pattern: "*/*_genome_copynumbers_breakpoints_subclonal.pdf" + copynumbers_details_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_details.html": + type: file + description: Genome copy numbers details HTML plot + pattern: "*/*_genome_copynumbers_details.html" + copynumbers_details_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/*_genome_copynumbers_details.pdf": + type: file + description: Genome copy numbers details PDF plot + pattern: "*/*_genome_copynumbers_details.pdf" + bed_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/bed_output/*.bed": + type: file + description: BED output files + pattern: "*/bed_output/*.bed" + variation_plots: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/variation_plots/*.html": + type: file + description: Variation plots HTML files + pattern: "*/variation_plots/*.html" + vcf_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*/vcf_output/*_wakhan_cna_*.vcf": + type: file + description: Wakhan CNA VCF files + pattern: "*/vcf_output/*_wakhan_cna_*.vcf" + heatmap_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_heatmap_ploidy_purity.html": + type: file + description: Heatmap ploidy purity HTML plot + pattern: "*_heatmap_ploidy_purity.html" + heatmap_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_heatmap_ploidy_purity.html.pdf": + type: file + description: Heatmap ploidy purity PDF plot + pattern: "*_heatmap_ploidy_purity.html.pdf" + optimized_peak_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_optimized_peak.html": + type: file + description: Optimized peak HTML plot + pattern: "*_optimized_peak.html" + coverage_csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "coverage_data/*.csv": + type: file + description: Coverage data CSV files + pattern: "coverage_data/*.csv" + coverage_plots_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "coverage_plots/*.html": + type: file + description: Coverage plots HTML files + pattern: "coverage_plots/*.html" + coverage_plots_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "coverage_plots/*.pdf": + type: file + description: Coverage plots PDF files + pattern: "coverage_plots/*.pdf" + phasing_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "phasing_output/*.html": + type: file + description: Phasing output HTML plots + pattern: "phasing_output/*.html" + phasing_pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "phasing_output/*.pdf": + type: file + description: Phasing output PDF plots + pattern: "phasing_output/*.pdf" + rephased_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "phasing_output/*.rephased.vcf.gz": + type: file + description: Rephased VCF file + pattern: "phasing_output/*.rephased.vcf.gz" + rephased_vcf_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "phasing_output/*.rephased.vcf.gz.csi": + type: file + description: Rephased VCF index file + pattern: "phasing_output/*.rephased.vcf.gz.csi" + snps_loh_plot: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "snps_loh_plots/*_genome_snps_ratio_loh.html": + type: file + description: SNPs LOH plot HTML file + pattern: "snps_loh_plots/*_genome_snps_ratio_loh.html" + solutions_ranks: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "solutions_ranks.tsv": + type: file + description: Solutions ranks TSV file + pattern: "solutions_ranks.tsv" + versions_wakhan: + - - ${task.process}: + type: string + description: The process the versions were collected from + - wakhan: + type: string + description: The tool name + - "0.2.0": + type: string + description: The version of wakhan - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - wakhan: + type: string + description: The tool name + - "0.2.0": + type: string + description: The version of wakhan authors: - "@ljwharbers" diff --git a/modules/nf-core/ascat/ascat.diff b/modules/nf-core/ascat/ascat.diff new file mode 100644 index 00000000..345cc304 --- /dev/null +++ b/modules/nf-core/ascat/ascat.diff @@ -0,0 +1,496 @@ +Changes in component 'nf-core/ascat' +'modules/nf-core/ascat/meta.yml' is unchanged +Changes in 'ascat/main.nf': +--- modules/nf-core/ascat/main.nf ++++ modules/nf-core/ascat/main.nf +@@ -1,211 +1,189 @@ + process ASCAT { +- tag "${meta.id}" ++ tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" +- container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container +- ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4c/4cf02c7911ee5e974ce7db978810770efbd8d872ff5ab3462d2a11bcf022fab5/data' +- : 'community.wave.seqera.io/library/ascat_cancerit-allelecount:c3e8749fa4af0e99'}" ++ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? ++ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4c/4cf02c7911ee5e974ce7db978810770efbd8d872ff5ab3462d2a11bcf022fab5/data': ++ 'community.wave.seqera.io/library/ascat_cancerit-allelecount:c3e8749fa4af0e99' }" + + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) +- path allele_files +- path loci_files +- path bed_file +- path fasta +- path gc_file +- path rt_file ++ val(genomeVersion) ++ path(allele_files) ++ path(loci_files) ++ path(bed_file) // optional ++ path(fasta) // optional ++ path(gc_file) // optional ++ path(rt_file) // optional + + output: +- tuple val(meta), path("*alleleFrequencies_chr*.txt"), emit: allelefreqs +- tuple val(meta), path("*BAF.txt"), emit: bafs +- tuple val(meta), path("*cnvs.txt"), emit: cnvs +- tuple val(meta), path("*LogR.txt"), emit: logrs +- tuple val(meta), path("*metrics.txt"), emit: metrics +- tuple val(meta), path("*png"), emit: png +- tuple val(meta), path("*purityploidy.txt"), emit: purityploidy +- tuple val(meta), path("*segments.txt"), emit: segments +- path "versions.yml", emit: versions ++ tuple val(meta), path("*alleleFrequencies_chr*.txt"), emit: allelefreqs ++ tuple val(meta), path("*BAF.txt"), emit: bafs ++ tuple val(meta), path("*cnvs.txt"), emit: cnvs ++ tuple val(meta), path("*LogR.txt"), emit: logrs ++ tuple val(meta), path("*metrics.txt"), emit: metrics ++ tuple val(meta), path("*png"), emit: png ++ tuple val(meta), path("*pdf"), emit: pdf, optional: true ++ tuple val(meta), path("*purityploidy.txt"), emit: purityploidy ++ tuple val(meta), path("*segments.txt"), emit: segments ++ tuple val(meta), path("*segments_raw.txt"), emit: segments_raw, optional: true ++ path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: +- def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" +- +- def gender = args.gender ? "${args.gender}" : "NULL" +- def genomeVersion = args.genomeVersion ? "${args.genomeVersion}" : "NULL" +- def purity = args.purity ? "${args.purity}" : "NULL" +- def ploidy = args.ploidy ? "${args.ploidy}" : "NULL" +- def gc_input = gc_file ? "${gc_file}" : "NULL" +- def rt_input = rt_file ? "${rt_file}" : "NULL" +- +- def minCounts_arg = args.minCounts ? ", minCounts = ${args.minCounts}" : "" +- def bed_file_arg = bed_file ? ", BED_file = '${bed_file}'" : "" +- def chrom_names_arg = args.chrom_names ? ", chrom_names = ${args.chrom_names}" : "" +- def min_base_qual_arg = args.min_base_qual ? ", min_base_qual = ${args.min_base_qual}" : "" +- def min_map_qual_arg = args.min_map_qual ? ", min_map_qual = ${args.min_map_qual}" : "" +- def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ", skip_allele_counting_tumour = ${args.skip_allele_counting_tumour}" : "" +- def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ", skip_allele_counting_normal = ${args.skip_allele_counting_normal}" : "" +- +- if (args.additional_allelecounter_flags && fasta) { +- additional_allelecounter_arg = ", additional_allelecounter_flags = \"${args.additional_allelecounter_flags} -r ${fasta}\" " +- } +- else if (args.additional_allelecounter_flags) { +- additional_allelecounter_arg = ", additional_allelecounter_flags = \"${args.additional_allelecounter_flags}\" " +- } +- else if (fasta) { +- additional_allelecounter_arg = ", additional_allelecounter_flags = '-r \"${fasta}\"'" +- } +- else { +- additional_allelecounter_arg = "" +- } +- ++ def args = task.ext.args ?: '' ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ def gender = args.gender ? "$args.gender" : "NULL" ++ def purity = args.purity ? "$args.purity" : "NULL" ++ def ploidy = args.ploidy ? "$args.ploidy" : "NULL" ++ def penalty = args.penalty ? "$args.penalty" : "NULL" ++ def gc_input = gc_file ? "$gc_file" : "NULL" ++ def rt_input = rt_file ? "$rt_file" : "NULL" ++ def pdf_plots = (args.pdf_plots ?: false) ? "TRUE" : "FALSE" ++ def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" ++ def bed_file_arg = bed_file ? ",BED_file = '$bed_file'": "" ++ def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : "" ++ def min_base_qual_arg = args.min_base_qual ? ",min_base_qual = $args.min_base_qual" : "" ++ def min_map_qual_arg = args.min_map_qual ? ",min_map_qual = $args.min_map_qual" : "" ++ def fasta_arg = fasta ? ",ref.fasta = '$fasta'" : "" ++ def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ",skip_allele_counting_tumour = $args.skip_allele_counting_tumour" : "" ++ def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ",skip_allele_counting_normal = $args.skip_allele_counting_normal" : "" ++ ++ def normal_exists = input_normal ? 'TRUE' : 'FALSE' ++ def normal_bam = input_normal ? ",normalseqfile = '$input_normal'" : "" ++ def normal_name = input_normal ? ",normalname = '${prefix}.normal'" : "" ++ def longread_bins = args.longread_bins ? ",loci_binsize = $args.longread_bins" : "" ++ def allele_counter_flags = args.allele_counter_flags ? ",additional_allelecounter_flags = '$args.allele_counter_flags'" : "" + """ + #!/usr/bin/env Rscript + library(RColorBrewer) + library(ASCAT) + options(bitmapType='cairo') + +- if(dir.exists("${allele_files}")) { +- # expected production use of a directory +- allele_path = normalizePath("${allele_files}") +- allele_prefix = paste0(allele_path, "/", "${allele_files}", "_chr") +- } else if(file.exists("${allele_files}")) { +- # expected testing use of a single file +- allele_path = basename(normalizePath("${allele_files}")) +- allele_prefix = sub('_chr[0-9]+\\\\.txt\$', "_chr", allele_path) +- } else { +- stop("The specified allele files do not exist.") +- } +- +- if(length(Sys.glob(paste0(allele_prefix,"*")) ) == 0) { +- stop(paste("No allele files found matching", allele_prefix)) +- } +- +- if(dir.exists("${loci_files}")) { +- # expected production use of a directory +- loci_path = normalizePath("${loci_files}") +- loci_prefix = paste0(loci_path, "/", "${loci_files}", "_chr") +- } else if(file.exists("${loci_files}")) { +- # expected testing use of a single file +- loci_path = basename(normalizePath("${loci_files}")) +- loci_prefix = sub('_chr[0-9]+\\\\.txt\$', "_chr", loci_path) +- } else { +- stop("The specified loci files do not exist.") +- } +- +- if(length(Sys.glob(paste0(loci_prefix,"*")) ) == 0) { +- stop(paste("No loci files found matching", loci_prefix)) +- } +- +- # Prepare from BAM files ++ #build prefixes: ++ allele_path = normalizePath("$allele_files") ++ allele_prefix = paste0(allele_path, "/", "$allele_files", "_chr") ++ ++ loci_path = normalizePath("$loci_files") ++ loci_prefix = paste0(loci_path, "/", "$loci_files", "_chr") ++ ++ #prepare from BAM files + ascat.prepareHTS( +- tumourseqfile = "${input_tumor}", +- normalseqfile = "${input_normal}", +- tumourname = paste0("${prefix}", ".tumour"), +- normalname = paste0("${prefix}", ".normal"), ++ tumourseqfile = "$input_tumor", ++ tumourname = paste0("$prefix", ".tumour"), + allelecounter_exe = "alleleCounter", + alleles.prefix = allele_prefix, + loci.prefix = loci_prefix, +- gender = "${gender}", +- genomeVersion = "${genomeVersion}", +- nthreads = ${task.cpus} +- ${minCounts_arg} +- ${bed_file_arg} +- ${chrom_names_arg} +- ${min_base_qual_arg} +- ${min_map_qual_arg} +- ${skip_allele_counting_tumour_arg} +- ${skip_allele_counting_normal_arg} +- ${additional_allelecounter_arg} +- , seed = 42 ++ gender = "$gender", ++ genomeVersion = "$genomeVersion", ++ nthreads = $task.cpus ++ $normal_bam ++ $normal_name ++ $minCounts_arg ++ $bed_file_arg ++ $chrom_names_arg ++ $min_base_qual_arg ++ $min_map_qual_arg ++ $longread_bins ++ $fasta_arg ++ $allele_counter_flags ++ $skip_allele_counting_tumour_arg ++ $skip_allele_counting_normal_arg, ++ seed = 42 + ) + +- # Load the data +- ascat.bc = ascat.loadData( +- Tumor_LogR_file = paste0("${prefix}", ".tumour_tumourLogR.txt"), +- Tumor_BAF_file = paste0("${prefix}", ".tumour_tumourBAF.txt"), +- Germline_LogR_file = paste0("${prefix}", ".tumour_normalLogR.txt"), +- Germline_BAF_file = paste0("${prefix}", ".tumour_normalBAF.txt"), +- genomeVersion = "${genomeVersion}", +- gender = "${gender}" +- ) +- +- # Plot the raw data +- ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".before_correction.")) +- +- # Optional LogRCorrection +- if("${gc_input}" != "NULL") { +- +- if(dir.exists("${gc_input}")) { +- # sarek production use of an unzipped folder containing one file +- gc_input = list.files("${gc_input}", recursive = TRUE, full.names = TRUE) +- if(length(gc_input) != 1 | !file.exists(gc_input)) { +- stop("A single gc_input should be provided!") +- } +- } else if(file.exists("${gc_input}")) { +- gc_input = normalizePath("${gc_input}") +- } else { +- stop("gc_input must be a file or folder containing one file") +- } +- +- if("${rt_input}" != "NULL"){ +- +- if(dir.exists("${rt_input}")) { +- # sarek production use of an unzipped folder containing one file +- rt_input = list.files("${rt_input}", recursive = TRUE, full.names = TRUE) +- if(length(rt_input) != 1 | !file.exists(rt_input)) { +- stop("A single rt_input should be provided!") +- } +- } else if(file.exists("${rt_input}")) { +- rt_input = normalizePath("${rt_input}") +- } else { +- stop("rt_input must be a file or folder containing one file") +- } +- ++ ++ #Load the data ++ if($normal_exists) { ++ print("normal exists") ++ ascat.bc = ascat.loadData( ++ Tumor_LogR_file = paste0("$prefix", ".tumour_tumourLogR.txt"), ++ Tumor_BAF_file = paste0("$prefix", ".tumour_tumourBAF.txt"), ++ Germline_LogR_file = paste0("$prefix", ".tumour_normalLogR.txt"), ++ Germline_BAF_file = paste0("$prefix", ".tumour_normalBAF.txt"), ++ genomeVersion = "$genomeVersion", ++ gender = "$gender" ++ ) ++ } else { ++ print("normal does not exist") ++ ascat.bc = ascat.loadData( ++ Tumor_LogR_file = paste0("$prefix", ".tumour_tumourLogR.txt"), ++ Tumor_BAF_file = paste0("$prefix", ".tumour_tumourBAF.txt"), ++ genomeVersion = "$genomeVersion", ++ gender = "$gender") ++ gg = ascat.predictGermlineGenotypes(ascat.bc, platform = "WGS_hg38_50X") ++ ++ } ++ print("printing ascat.bc") ++ print(ascat.bc) ++ ++ #Plot the raw data ++ ascat.plotRawData(ascat.bc, img.prefix = paste0("$prefix", ".before_correction.")) ++ ++ # optional LogRCorrection ++ if("$gc_input" != "NULL") { ++ gc_input = paste0(normalizePath("$gc_input")) ++ ++ if("$rt_input" != "NULL"){ ++ rt_input = paste0(normalizePath("$rt_input")) + ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = gc_input, replictimingfile = rt_input) +- # Plot raw data after correction +- ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".after_correction_gc_rt.")) ++ #Plot raw data after correction ++ ascat.plotRawData(ascat.bc, img.prefix = paste0("$prefix", ".after_correction_gc_rt.")) + } + else { +- ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = gc_input, replictimingfile = ${rt_input}) +- # Plot raw data after correction +- ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".after_correction_gc.")) ++ ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = gc_input) ++ #Plot raw data after correction ++ ascat.plotRawData(ascat.bc, img.prefix = paste0("$prefix", ".after_correction_gc.")) + } + } + +- # Segment the data +- ascat.bc = ascat.aspcf(ascat.bc, seed=42) +- +- # Plot the segmented data ++ #Segment the data ++ if($normal_exists) { ++ ascat.bc = ascat.aspcf(ascat.bc, seed=42, penalty = $penalty) ++ } else { ++ ascat.bc = ascat.aspcf(ascat.bc, seed=42, penalty = $penalty, ascat.gg = gg) ++ } ++ ++ #Plot the segmented data + ascat.plotSegmentedData(ascat.bc) + +- # Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, +- # and discrete copy numbers +- # If psi and rho are manually set: +- if (!is.null(${purity}) && !is.null(${ploidy})){ +- ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=${purity}, psi_manual=${ploidy}) +- } else if(!is.null(${purity}) && is.null(${ploidy})){ +- ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=${purity}) +- } else if(!is.null(${ploidy}) && is.null(${purity})){ +- ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=${ploidy}) ++ #Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers ++ #If psi and rho are manually set: ++ if (!is.null($purity) && !is.null($ploidy)){ ++ ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy, pdfPlot = $pdf_plots) ++ } else if(!is.null($purity) && is.null($ploidy)){ ++ ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, pdfPlot = $pdf_plots) ++ } else if(!is.null($ploidy) && is.null($purity)){ ++ ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy, pdfPlot = $pdf_plots) + } else { +- ascat.output <- ascat.runAscat(ascat.bc, gamma=1) +- } +- +- # Extract metrics from ASCAT profiles ++ ascat.output <- ascat.runAscat(ascat.bc, gamma=1, pdfPlot = $pdf_plots) ++ } ++ ++ #Extract metrics from ASCAT profiles + QC = ascat.metrics(ascat.bc,ascat.output) + +- # Write out segmented regions (including regions with one copy of each allele) +- write.table(ascat.output[["segments"]], file=paste0("${prefix}", ".segments.txt"), sep="\t", quote=F, row.names=F) +- +- # Write out CNVs in bed format ++ #Write out segmented regions (including regions with one copy of each allele) ++ write.table(ascat.output[["segments"]], file=paste0("$prefix", ".segments.txt"), sep="\t", quote=F, row.names=F) ++ ++ #Write out raw segmented regions (including regions with one copy of each allele) ++ tryCatch({ # In case segments_raw is not selected ++ write.table( ++ ascat.output[["segments_raw"]], ++ file = paste0("$prefix", ".segments_raw.txt"), ++ sep = "\t", quote = FALSE, row.names = FALSE ++ ) ++ }, error = function(e) { ++ message("Error in writing segments_raw: ", conditionMessage(e)) ++ }) ++ ++ #Write out CNVs in bed format + cnvs=ascat.output[["segments"]][2:6] +- write.table(cnvs, file=paste0("${prefix}",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) +- +- # Write out purity and ploidy info ++ write.table(cnvs, file=paste0("$prefix",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) ++ ++ #Write out purity and ploidy info + summary <- tryCatch({ + matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) { + # error handler picks up where error was generated +@@ -214,46 +192,48 @@ + } + ) + colnames(summary) <- c("AberrantCellFraction","Ploidy") +- write.table(summary, file=paste0("${prefix}",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) +- +- write.table(QC, file=paste0("${prefix}", ".metrics.txt"), sep="\t", quote=F, row.names=F) +- +- # Version export ++ write.table(summary, file=paste0("$prefix",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) ++ ++ write.table(QC, file=paste0("$prefix", ".metrics.txt"), sep="\t", quote=F, row.names=F) ++ ++ # version export + f <- file("versions.yml","w") + alleleCounter_version = system(paste("alleleCounter --version"), intern = T) +- ascat_version = as.character(packageVersion('ASCAT')) +- writeLines(paste0('"', "${task.process}", '"', ":"), f) ++ ascat_version = sessionInfo()\$otherPkgs\$ASCAT\$Version ++ writeLines(paste0('"', "$task.process", '"', ":"), f) ++ writeLines(paste(" alleleCounter:", alleleCounter_version), f) + writeLines(paste(" ascat:", ascat_version), f) +- writeLines(paste(" alleleCounter:", alleleCounter_version), f) + close(f) ++ + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ +- touch ${prefix}.after_correction.gc_rt.test.tumour.germline.png +- touch ${prefix}.after_correction.gc_rt.test.tumour.tumour.png +- touch ${prefix}.before_correction.test.tumour.germline.png +- touch ${prefix}.before_correction.test.tumour.tumour.png +- touch ${prefix}.cnvs.txt +- touch ${prefix}.metrics.txt +- touch ${prefix}.normal_alleleFrequencies_chr21.txt +- touch ${prefix}.normal_alleleFrequencies_chr22.txt +- touch ${prefix}.purityploidy.txt +- touch ${prefix}.segments.txt +- touch ${prefix}.tumour.ASPCF.png +- touch ${prefix}.tumour.sunrise.png +- touch ${prefix}.tumour_alleleFrequencies_chr21.txt +- touch ${prefix}.tumour_alleleFrequencies_chr22.txt +- touch ${prefix}.tumour_normalBAF.txt +- touch ${prefix}.tumour_normalLogR.txt +- touch ${prefix}.tumour_tumourBAF.txt +- touch ${prefix}.tumour_tumourLogR.txt +- +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- bioconductor-ascat: \$(Rscript -e "library(ASCAT); cat(as.character(packageVersion('ASCAT')))") +- alleleCounter: \$(alleleCounter --version) +- END_VERSIONS +- """ ++ echo stub > ${prefix}.after_correction.gc_rt.test.tumour.germline.png ++ echo stub > ${prefix}.after_correction.gc_rt.test.tumour.tumour.png ++ echo stub > ${prefix}.before_correction.test.tumour.germline.png ++ echo stub > ${prefix}.before_correction.test.tumour.tumour.png ++ echo stub > ${prefix}.cnvs.txt ++ echo stub > ${prefix}.metrics.txt ++ echo stub > ${prefix}.normal_alleleFrequencies_chr21.txt ++ echo stub > ${prefix}.normal_alleleFrequencies_chr22.txt ++ echo stub > ${prefix}.purityploidy.txt ++ echo stub > ${prefix}.segments.txt ++ echo stub > ${prefix}.segments_raw.txt ++ echo stub > ${prefix}.tumour.ASPCF.png ++ echo stub > ${prefix}.tumour.sunrise.png ++ echo stub > ${prefix}.tumour_alleleFrequencies_chr21.txt ++ echo stub > ${prefix}.tumour_alleleFrequencies_chr22.txt ++ echo stub > ${prefix}.tumour_normalBAF.txt ++ echo stub > ${prefix}.tumour_normalLogR.txt ++ echo stub > ${prefix}.tumour_tumourBAF.txt ++ echo stub > ${prefix}.tumour_tumourLogR.txt ++ ++ echo "${task.process}:" > versions.yml ++ echo ' alleleCounter: 4.3.0' >> versions.yml ++ echo ' ascat: 3.2.0' >> versions.yml ++ ++ """ ++ + } + +'modules/nf-core/ascat/environment.yml' is unchanged +Changes in 'ascat/tests/main.nf.test': +--- modules/nf-core/ascat/tests/main.nf.test ++++ modules/nf-core/ascat/tests/main.nf.test +@@ -55,6 +55,7 @@ + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, ++ process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } +@@ -109,6 +110,7 @@ + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, ++ process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } +@@ -163,6 +165,7 @@ + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, ++ process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } +@@ -218,6 +221,7 @@ + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, ++ process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } +@@ -296,6 +300,7 @@ + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, ++ process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + +'modules/nf-core/ascat/tests/main.nf.test.snap' is unchanged +'modules/nf-core/ascat/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/ascat/environment.yml b/modules/nf-core/ascat/environment.yml index c5cfc59e..d19645cb 100644 --- a/modules/nf-core/ascat/environment.yml +++ b/modules/nf-core/ascat/environment.yml @@ -4,5 +4,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ascat=3.1.1 + - bioconda::ascat=3.2.0 - bioconda::cancerit-allelecount=4.3.0 diff --git a/modules/nf-core/ascat/main.nf b/modules/nf-core/ascat/main.nf index 366a4ba4..846e9d6a 100644 --- a/modules/nf-core/ascat/main.nf +++ b/modules/nf-core/ascat/main.nf @@ -4,8 +4,8 @@ process ASCAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:03f4a075e359bb32a613b098d13dba7b4c8c967f-0': - 'biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:03f4a075e359bb32a613b098d13dba7b4c8c967f-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4c/4cf02c7911ee5e974ce7db978810770efbd8d872ff5ab3462d2a11bcf022fab5/data': + 'community.wave.seqera.io/library/ascat_cancerit-allelecount:c3e8749fa4af0e99' }" input: tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) @@ -42,7 +42,7 @@ process ASCAT { def penalty = args.penalty ? "$args.penalty" : "NULL" def gc_input = gc_file ? "$gc_file" : "NULL" def rt_input = rt_file ? "$rt_file" : "NULL" - + def pdf_plots = (args.pdf_plots ?: false) ? "TRUE" : "FALSE" def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" def bed_file_arg = bed_file ? ",BED_file = '$bed_file'": "" def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : "" @@ -56,7 +56,7 @@ process ASCAT { def normal_bam = input_normal ? ",normalseqfile = '$input_normal'" : "" def normal_name = input_normal ? ",normalname = '${prefix}.normal'" : "" def longread_bins = args.longread_bins ? ",loci_binsize = $args.longread_bins" : "" - def allele_counter_flags = args.allele_counter_flags ? ",additional_allelecounter_flags = '$args.allele_counter_flags'" : "" + def allele_counter_flags = args.allele_counter_flags ? ",additional_allelecounter_flags = '$args.allele_counter_flags'" : "" """ #!/usr/bin/env Rscript library(RColorBrewer) @@ -153,13 +153,13 @@ process ASCAT { #Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers #If psi and rho are manually set: if (!is.null($purity) && !is.null($ploidy)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy) + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy, pdfPlot = $pdf_plots) } else if(!is.null($purity) && is.null($ploidy)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity) + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, pdfPlot = $pdf_plots) } else if(!is.null($ploidy) && is.null($purity)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy) + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy, pdfPlot = $pdf_plots) } else { - ascat.output <- ascat.runAscat(ascat.bc, gamma=1) + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, pdfPlot = $pdf_plots) } #Extract metrics from ASCAT profiles @@ -172,7 +172,7 @@ process ASCAT { tryCatch({ # In case segments_raw is not selected write.table( ascat.output[["segments_raw"]], - file = paste0(prefix, ".segments_raw.txt"), + file = paste0("$prefix", ".segments_raw.txt"), sep = "\t", quote = FALSE, row.names = FALSE ) }, error = function(e) { @@ -220,6 +220,7 @@ process ASCAT { echo stub > ${prefix}.normal_alleleFrequencies_chr22.txt echo stub > ${prefix}.purityploidy.txt echo stub > ${prefix}.segments.txt + echo stub > ${prefix}.segments_raw.txt echo stub > ${prefix}.tumour.ASPCF.png echo stub > ${prefix}.tumour.sunrise.png echo stub > ${prefix}.tumour_alleleFrequencies_chr21.txt @@ -231,9 +232,8 @@ process ASCAT { echo "${task.process}:" > versions.yml echo ' alleleCounter: 4.3.0' >> versions.yml - echo ' ascat: 3.0.0' >> versions.yml + echo ' ascat: 3.2.0' >> versions.yml """ - -} \ No newline at end of file +} diff --git a/modules/nf-core/ascat/meta.yml b/modules/nf-core/ascat/meta.yml index db7c9292..d69674ec 100644 --- a/modules/nf-core/ascat/meta.yml +++ b/modules/nf-core/ascat/meta.yml @@ -30,47 +30,57 @@ input: modifying chromosome notation in bam files please follow https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. pattern: "*.{bam,cram}" + ontologies: [] - index_normal: type: file description: index for normal_bam/cram pattern: "*.{bai,crai}" + ontologies: [] - input_tumor: type: file description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation pattern: "*.{bam,cram}" + ontologies: [] - index_tumor: type: file description: index for tumor_bam/cram pattern: "*.{bai,crai}" - - - allele_files: - type: file - description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS - - - loci_files: - type: file - description: loci files for ASCAT WGS. Loci files without chromosome notation - can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS - Make sure the chromosome notation matches the bam/cram input files. To add - the chromosome notation to loci files (hg19/hg38) if necessary, you can run - this command `if [[ $(samtools view | head -n1 | cut -f3)\" - == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; - done; fi` - - - bed_file: - type: file - description: Bed file for ASCAT WES (optional, but recommended for WES) - - - fasta: - type: file - description: Reference fasta file (optional) - - - gc_file: - type: file - description: GC correction file (optional) - Used to do logR correction of the - tumour sample(s) with genomic GC content - - - rt_file: - type: file - description: replication timing correction file (optional, provide only in combination - with gc_file) + ontologies: [] + - allele_files: + type: file + description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + ontologies: [] + - loci_files: + type: file + description: loci files for ASCAT WGS. Loci files without chromosome notation + can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + Make sure the chromosome notation matches the bam/cram input files. To add the + chromosome notation to loci files (hg19/hg38) if necessary, you can run this + command `if [[ $(samtools view | head -n1 | cut -f3)\" == + *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; + done; fi` + ontologies: [] + - bed_file: + type: file + description: Bed file for ASCAT WES (optional, but recommended for WES) + ontologies: [] + - fasta: + type: file + description: Reference fasta file (optional) + ontologies: [] + - gc_file: + type: file + description: GC correction file (optional) - Used to do logR correction of the + tumour sample(s) with genomic GC content + ontologies: [] + - rt_file: + type: file + description: replication timing correction file (optional, provide only in combination + with gc_file) + ontologies: [] output: - - allelefreqs: - - meta: + allelefreqs: + - - meta: type: map description: | Groovy Map containing sample information @@ -79,8 +89,9 @@ output: type: file description: Files containing allee frequencies per chromosome pattern: "*{alleleFrequencies_chr*.txt}" - - bafs: - - meta: + ontologies: [] + bafs: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,8 +99,9 @@ output: - "*BAF.txt": type: file description: BAF file - - cnvs: - - meta: + ontologies: [] + cnvs: + - - meta: type: map description: | Groovy Map containing sample information @@ -97,8 +109,9 @@ output: - "*cnvs.txt": type: file description: CNV file - - logrs: - - meta: + ontologies: [] + logrs: + - - meta: type: map description: | Groovy Map containing sample information @@ -106,8 +119,9 @@ output: - "*LogR.txt": type: file description: LogR file - - metrics: - - meta: + ontologies: [] + metrics: + - - meta: type: map description: | Groovy Map containing sample information @@ -116,8 +130,9 @@ output: type: file description: File containing quality metrics pattern: "*.{metrics.txt}" - - png: - - meta: + ontologies: [] + png: + - - meta: type: map description: | Groovy Map containing sample information @@ -126,8 +141,9 @@ output: type: file description: ASCAT plots pattern: "*.{png}" - - purityploidy: - - meta: + ontologies: [] + purityploidy: + - - meta: type: map description: | Groovy Map containing sample information @@ -136,8 +152,9 @@ output: type: file description: File with purity and ploidy data pattern: "*.{purityploidy.txt}" - - segments: - - meta: + ontologies: [] + segments: + - - meta: type: map description: | Groovy Map containing sample information @@ -146,11 +163,14 @@ output: type: file description: File with segments data pattern: "*.{segments.txt}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@aasNGC" - "@lassefolkersen" diff --git a/modules/nf-core/ascat/tests/main.nf.test b/modules/nf-core/ascat/tests/main.nf.test new file mode 100644 index 00000000..759abb38 --- /dev/null +++ b/modules/nf-core/ascat/tests/main.nf.test @@ -0,0 +1,345 @@ +nextflow_process { + + name "Test Process ASCAT" + script "../main.nf" + process "ASCAT" + + tag "modules" + tag "modules_nfcore" + tag "ascat" + config "./nextflow.config" + + test("human - bam - GC") { + when { + params { + module_args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21")' + ] + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr21.txt', checkIfExists: true)] + input[2] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr21.txt', checkIfExists: true)] + input[3] = [] + input[4] = [] + input[5] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/GC_G1000_hg38_21.txt', checkIfExists: true)] + input[6] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.allelefreqs, + process.out.bafs, + process.out.cnvs, + // Logrs Tumour has a float margin discrepancy in conda due to + // log and mean transformation + process.out.logrs.collect{it[1].collect{file(it).name}}, + process.out.metrics, + // This discrepancy affect the png generated + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, + process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("human - bam - GC - additional_allelecounter") { + when { + params { + module_args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21")', + additional_allelecounter_flags: '-f 0' + ] + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr21.txt', checkIfExists: true)] + input[2] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr21.txt', checkIfExists: true)] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.allelefreqs, + process.out.bafs, + process.out.cnvs, + // Logrs Tumour has a float margin discrepancy in conda due to + // log and mean transformation + process.out.logrs.collect{it[1].collect{file(it).name}}, + process.out.metrics, + // This discrepancy affect the png generated + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, + process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("human - cram - GC - RT") { + + when { + params { + module_args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21")' + ] + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai', checkIfExists: true) + ] + input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr21.txt', checkIfExists: true)] + input[2] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr21.txt', checkIfExists: true)] + input[3] = [] + input[4] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)] + input[5] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/GC_G1000_hg38_21.txt', checkIfExists: true)] + input[6] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/RT_G1000_hg38_21.txt', checkIfExists: true)] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.allelefreqs, + process.out.bafs, + process.out.cnvs, + // Logrs Tumour has a float margin discrepancy in conda due to + // log and mean transformation + process.out.logrs.collect{it[1].collect{file(it).name}}, + process.out.metrics, + // This discrepancy affect the png generated + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, + process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("human - cram - GC - RT - additional_allelecounter") { + + when { + params { + module_args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21")', + additional_allelecounter_flags: '-f 0' + ] + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai', checkIfExists: true) + ] + input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr21.txt', checkIfExists: true)] + input[2] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr21.txt', checkIfExists: true)] + input[3] = [] + input[4] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)] + input[5] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/GC_G1000_hg38_21.txt', checkIfExists: true)] + input[6] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/RT_G1000_hg38_21.txt', checkIfExists: true)] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.allelefreqs, + process.out.bafs, + process.out.cnvs, + // Logrs Tumour has a float margin discrepancy in conda due to + // log and mean transformation + process.out.logrs.collect{it[1].collect{file(it).name}}, + process.out.metrics, + // This discrepancy affect the png generated + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, + process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("human - bam - GC - folder inputs") { + when { + params { + module_args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21")' + ] + allele_folder = "G1000_alleles_hg38" + loci_folder = "G1000_loci_hg38" + gc_folder = "GC_G1000_hg38_21" + rt_folder = "RT_G1000_hg38_21" + } + process { + """ + + file(params.allele_folder).mkdirs() + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr21.txt', checkIfExists: true) + .copyTo(params.allele_folder + '/G1000_alleles_hg38_chr21.txt') + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr22.txt', checkIfExists: true) + .copyTo(params.allele_folder + '/G1000_alleles_hg38_chr22.txt') + + file(params.loci_folder).mkdirs() + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr21.txt', checkIfExists: true) + .copyTo(params.loci_folder + '/G1000_loci_hg38_chr21.txt') + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr22.txt', checkIfExists: true) + .copyTo(params.loci_folder + '/G1000_loci_hg38_chr22.txt') + + file(params.gc_folder).mkdirs() + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/GC_G1000_hg38_21.txt', checkIfExists: true) + .copyTo(params.gc_folder + '/GC_G1000_hg38_21.txt') + + file(params.rt_folder).mkdirs() + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/RT_G1000_hg38_21.txt', checkIfExists: true) + .copyTo(params.rt_folder + '/RT_G1000_hg38_21.txt') + + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = file(params.allele_folder, checkIfExists: true) + input[2] = file(params.loci_folder, checkIfExists: true) + input[3] = [] + input[4] = [] + input[5] = file(params.gc_folder, checkIfExists: true) + input[6] = file(params.rt_folder, checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.allelefreqs, + process.out.bafs, + process.out.cnvs, + // Logrs Tumour has a float margin discrepancy in conda due to + // log and mean transformation + process.out.logrs.collect{it[1].collect{file(it).name}}, + process.out.metrics, + // This discrepancy affect the png generated + process.out.png.collect{it[1].collect{file(it).name}}, + process.out.purityploidy, + process.out.segments, + process.out.segments_raw, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("human - bam - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ascat/tests/main.nf.test.snap b/modules/nf-core/ascat/tests/main.nf.test.snap new file mode 100644 index 00000000..7db5d76d --- /dev/null +++ b/modules/nf-core/ascat/tests/main.nf.test.snap @@ -0,0 +1,677 @@ +{ + "human - bam - GC": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,627382ea8ab013d2fb3307a4f9abb058", + "test.tumour_alleleFrequencies_chr21.txt:md5,79000d7e2f57c3492204a19649d327b1" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,bee93180f7346edc1ead76f2ac150290", + "test.tumour_normalBAF_rawBAF.txt:md5,4c6e12d9e04ac14e115796a08c82a1a9", + "test.tumour_tumourBAF.txt:md5,33bab5381edd65675458e72a492c4ef1", + "test.tumour_tumourBAF_rawBAF.txt:md5,7bab22a530cb8294542a6bc6cace9ae5" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + "test.tumour_normalLogR.txt", + "test.tumour_tumourLogR.txt" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,6b7f492c84c2ee3038c9f2856a87b496" + ] + ], + [ + [ + "test.after_correction_gc.test.tumour.germline.png", + "test.after_correction_gc.test.tumour.tumour.png", + "test.before_correction.test.tumour.germline.png", + "test.before_correction.test.tumour.tumour.png", + "test.tumour.ASPCF.png", + "test.tumour.sunrise.png" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,75598081a811afded7d7c1a0d08405ac" + ], + { + "ASCAT": { + "ascat": "3.2.0", + "alleleCounter": "4.3.0" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-25T12:07:29.534085987" + }, + "human - bam - GC - folder inputs": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,627382ea8ab013d2fb3307a4f9abb058", + "test.tumour_alleleFrequencies_chr21.txt:md5,79000d7e2f57c3492204a19649d327b1" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,bee93180f7346edc1ead76f2ac150290", + "test.tumour_normalBAF_rawBAF.txt:md5,4c6e12d9e04ac14e115796a08c82a1a9", + "test.tumour_tumourBAF.txt:md5,33bab5381edd65675458e72a492c4ef1", + "test.tumour_tumourBAF_rawBAF.txt:md5,7bab22a530cb8294542a6bc6cace9ae5" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + "test.tumour_normalLogR.txt", + "test.tumour_tumourLogR.txt" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,3502b78584e5251db0fcbd71ed134480" + ] + ], + [ + [ + "test.after_correction_gc_rt.test.tumour.germline.png", + "test.after_correction_gc_rt.test.tumour.tumour.png", + "test.before_correction.test.tumour.germline.png", + "test.before_correction.test.tumour.tumour.png", + "test.tumour.ASPCF.png", + "test.tumour.sunrise.png" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,75598081a811afded7d7c1a0d08405ac" + ], + { + "ASCAT": { + "ascat": "3.2.0", + "alleleCounter": "4.3.0" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-25T12:18:54.510148361" + }, + "human - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.normal_alleleFrequencies_chr22.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_alleleFrequencies_chr21.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_alleleFrequencies_chr22.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_tumourBAF.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalLogR.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_tumourLogR.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.after_correction.gc_rt.test.tumour.germline.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.after_correction.gc_rt.test.tumour.tumour.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.before_correction.test.tumour.germline.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.before_correction.test.tumour.tumour.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour.ASPCF.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour.sunrise.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + "versions.yml:md5,5c8663bfecf62e5eb460f76ee167d410" + ], + "allelefreqs": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.normal_alleleFrequencies_chr22.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_alleleFrequencies_chr21.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_alleleFrequencies_chr22.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bafs": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_tumourBAF.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "cnvs": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "logrs": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalLogR.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour_tumourLogR.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.after_correction.gc_rt.test.tumour.germline.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.after_correction.gc_rt.test.tumour.tumour.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.before_correction.test.tumour.germline.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.before_correction.test.tumour.tumour.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour.ASPCF.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tumour.sunrise.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "purityploidy": [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "segments": [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5c8663bfecf62e5eb460f76ee167d410" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-16T12:14:45.315601183" + }, + "human - cram - GC - RT - additional_allelecounter": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,627382ea8ab013d2fb3307a4f9abb058", + "test.tumour_alleleFrequencies_chr21.txt:md5,79000d7e2f57c3492204a19649d327b1" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,bee93180f7346edc1ead76f2ac150290", + "test.tumour_normalBAF_rawBAF.txt:md5,4c6e12d9e04ac14e115796a08c82a1a9", + "test.tumour_tumourBAF.txt:md5,33bab5381edd65675458e72a492c4ef1", + "test.tumour_tumourBAF_rawBAF.txt:md5,7bab22a530cb8294542a6bc6cace9ae5" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + "test.tumour_normalLogR.txt", + "test.tumour_tumourLogR.txt" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,3502b78584e5251db0fcbd71ed134480" + ] + ], + [ + [ + "test.after_correction_gc_rt.test.tumour.germline.png", + "test.after_correction_gc_rt.test.tumour.tumour.png", + "test.before_correction.test.tumour.germline.png", + "test.before_correction.test.tumour.tumour.png", + "test.tumour.ASPCF.png", + "test.tumour.sunrise.png" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,75598081a811afded7d7c1a0d08405ac" + ], + { + "ASCAT": { + "ascat": "3.2.0", + "alleleCounter": "4.3.0" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-20T12:46:05.832790403" + }, + "human - cram - GC - RT": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,627382ea8ab013d2fb3307a4f9abb058", + "test.tumour_alleleFrequencies_chr21.txt:md5,79000d7e2f57c3492204a19649d327b1" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,bee93180f7346edc1ead76f2ac150290", + "test.tumour_normalBAF_rawBAF.txt:md5,4c6e12d9e04ac14e115796a08c82a1a9", + "test.tumour_tumourBAF.txt:md5,33bab5381edd65675458e72a492c4ef1", + "test.tumour_tumourBAF_rawBAF.txt:md5,7bab22a530cb8294542a6bc6cace9ae5" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + "test.tumour_normalLogR.txt", + "test.tumour_tumourLogR.txt" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,3502b78584e5251db0fcbd71ed134480" + ] + ], + [ + [ + "test.after_correction_gc_rt.test.tumour.germline.png", + "test.after_correction_gc_rt.test.tumour.tumour.png", + "test.before_correction.test.tumour.germline.png", + "test.before_correction.test.tumour.tumour.png", + "test.tumour.ASPCF.png", + "test.tumour.sunrise.png" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,75598081a811afded7d7c1a0d08405ac" + ], + { + "ASCAT": { + "ascat": "3.2.0", + "alleleCounter": "4.3.0" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-16T15:51:14.807808184" + }, + "human - bam - GC - additional_allelecounter": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.normal_alleleFrequencies_chr21.txt:md5,627382ea8ab013d2fb3307a4f9abb058", + "test.tumour_alleleFrequencies_chr21.txt:md5,79000d7e2f57c3492204a19649d327b1" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.tumour_normalBAF.txt:md5,bee93180f7346edc1ead76f2ac150290", + "test.tumour_normalBAF_rawBAF.txt:md5,4c6e12d9e04ac14e115796a08c82a1a9", + "test.tumour_tumourBAF.txt:md5,33bab5381edd65675458e72a492c4ef1", + "test.tumour_tumourBAF_rawBAF.txt:md5,7bab22a530cb8294542a6bc6cace9ae5" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + "test.tumour_normalLogR.txt", + "test.tumour_tumourLogR.txt" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.metrics.txt:md5,6e473be3fb2226d493e48a73df2f4501" + ] + ], + [ + [ + "test.before_correction.test.tumour.germline.png", + "test.before_correction.test.tumour.tumour.png", + "test.tumour.ASPCF.png", + "test.tumour.sunrise.png" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,75598081a811afded7d7c1a0d08405ac" + ], + { + "ASCAT": { + "ascat": "3.2.0", + "alleleCounter": "4.3.0" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-20T12:45:23.273839886" + } +} \ No newline at end of file diff --git a/modules/nf-core/ascat/tests/nextflow.config b/modules/nf-core/ascat/tests/nextflow.config new file mode 100644 index 00000000..8e4f96a1 --- /dev/null +++ b/modules/nf-core/ascat/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "ASCAT" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..18778cc2 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,81 @@ +process BCFTOOLS_ANNOTATE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(columns), path(header_lines), path(rename_chrs) + + output: + tuple val(meta), path("${prefix}.${extension}"), emit: vcf + tuple val(meta), path("${prefix}.${extension}.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def columns_file = columns ? "--columns-file ${columns}" : '' + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_command = !index ? "bcftools index ${input}" : '' + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${index_command} + + bcftools \\ + annotate \\ + ${args} \\ + ${annotations_file} \\ + ${columns_file} \\ + ${header_file} \\ + ${rename_chrs_file} \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + ${input} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") ? "csi" : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..86331661 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,112 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + ontologies: [] + - index: + type: file + description: Index of the query VCF or BCF file + ontologies: [] + - annotations: + type: file + description: Bgzip-compressed file with annotations + ontologies: [] + - annotations_index: + type: file + description: Index of the annotations file + ontologies: [] + - columns: + type: file + description: List of columns in the annotations file, one name per row + ontologies: [] + - header_lines: + type: file + description: Contains lines to append to the output VCF header + ontologies: [] + - rename_chrs: + type: file + description: Rename annotations according to this file containing "old_name new_name\n" + pairs separated by whitespaces, each on a separate line. + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}": + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..3e1d2573 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,429 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + config "./nextflow.config" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi") { + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi") { + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + header = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]) + .combine(header) + .combine(channel.of([[]])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + columns = channel.of('INFO/ICB', 'INFO/HOB', 'INFO/DP4').collectFile(name:"columns.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ]).combine(columns) + .combine(channel.of([[], []])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + headers = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + rename = channel.of('MT192765.1 renamed').collectFile(name:"rename.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]).combine(headers) + .combine(rename) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub") { + + options "-stub" + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..10af196a --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,440 @@ +{ + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:19.618749659" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:43.350060834" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:36.101003418" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:34.19449127" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:26.927815399" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:28.891823681" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:12.400301681" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:50.375384421" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:21.320211288" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:13.274072987" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:05.094685409" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:57.906382655" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/nextflow.config b/modules/nf-core/bcftools/annotate/tests/nextflow.config new file mode 100644 index 00000000..10235100 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = "${params.args_modules}" + ext.prefix = { "${meta.id}_ann" } +} diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml index ba863b38..cb55500b 100644 --- a/modules/nf-core/bcftools/concat/environment.yml +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 - # renovate: datasource=conda depName=bioconda/bcftools - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf index 5415b069..269da8e0 100644 --- a/modules/nf-core/bcftools/concat/main.nf +++ b/modules/nf-core/bcftools/concat/main.nf @@ -1,72 +1,73 @@ process BCFTOOLS_CONCAT { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': - 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" input: tuple val(meta), path(vcfs), path(tbi) output: - tuple val(meta), path("${prefix}.${extension}") , emit: vcf + tuple val(meta), path("${prefix}.${extension}"), emit: vcf tuple val(meta), path("${prefix}.${extension}.tbi"), emit: tbi, optional: true tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" def tbi_names = tbi.findAll { file -> !(file instanceof List) }.collect { file -> file.name } def create_input_index = vcfs.collect { vcf -> tbi_names.contains(vcf.name + ".tbi") || tbi_names.contains(vcf.name + ".csi") ? "" : "tabix ${vcf}" }.join("\n ") - extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" - def input = vcfs.sort{it.toString()}.join(" ") + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def input = vcfs.sort { vcf -> vcf.toString() }.join(" ") """ ${create_input_index} bcftools concat \\ --output ${prefix}.${extension} \\ - $args \\ - --threads $task.cpus \\ + ${args} \\ + --threads ${task.cpus} \\ ${input} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" - def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : - args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : - args.contains("--write-index") || args.contains("-W") ? "csi" : - "" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" """ ${create_cmd} ${prefix}.${extension} ${create_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml index 5e14b5a3..1734a8c3 100644 --- a/modules/nf-core/bcftools/concat/meta.yml +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -67,13 +67,27 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] pattern: "*.csi" + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool authors: - "@abhi18av" - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test index 442f4b4e..58e19991 100644 --- a/modules/nf-core/bcftools/concat/tests/main.nf.test +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -19,12 +19,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -37,7 +37,6 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index") { @@ -50,12 +49,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -69,12 +68,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi") { @@ -87,12 +85,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -106,12 +104,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi") { @@ -124,12 +121,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -143,12 +140,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.tbi[0][1].endsWith(".tbi") } ) } - } @@ -162,8 +158,8 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [] ] @@ -177,7 +173,6 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { @@ -191,12 +186,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -209,7 +204,6 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub") { @@ -223,12 +217,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -242,7 +236,6 @@ nextflow_process { { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub") { @@ -256,12 +249,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -275,7 +268,6 @@ nextflow_process { { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub") { @@ -289,12 +281,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true) ], [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) ] ] """ @@ -308,7 +300,6 @@ nextflow_process { { assert process.out.tbi[0][1].endsWith(".tbi") } ) } - } diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap index b82169c9..5edf5beb 100644 --- a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap @@ -6,7 +6,7 @@ { "id": "test3" }, - "test3_vcf.vcf.gz:md5,85db49dd1629d60e1165f491df6348f6" + "test3_vcf.vcf.gz:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], [ @@ -20,15 +20,21 @@ [ ], - [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T11:26:25.259752" + "timestamp": "2026-01-20T11:59:52.705734505" }, "homo_sapiens - [[vcf1, vcf2], []]": { "content": [ @@ -38,7 +44,7 @@ { "id": "test3" }, - "test3.vcf:md5,bba76244f79c4e307bd0c4c09095885f" + "test3.vcf:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], "1": [ @@ -48,7 +54,11 @@ ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -61,19 +71,23 @@ { "id": "test3" }, - "test3.vcf:md5,bba76244f79c4e307bd0c4c09095885f" + "test3.vcf:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T10:03:17.250013" + "timestamp": "2026-01-20T12:00:08.498123366" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { "content": [ @@ -93,7 +107,11 @@ ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -109,16 +127,20 @@ "test3.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T09:52:50.140505" + "timestamp": "2026-01-20T12:00:15.986212097" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub": { "content": [ @@ -143,7 +165,11 @@ ] ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ [ @@ -164,16 +190,20 @@ "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T09:52:55.544079" + "timestamp": "2026-01-20T12:00:24.007593166" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi": { "content": [ @@ -182,7 +212,7 @@ { "id": "test3" }, - "test3_vcf.vcf.gz:md5,85db49dd1629d60e1165f491df6348f6" + "test3_vcf.vcf.gz:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], [ @@ -196,15 +226,21 @@ "test3_vcf.vcf.gz.tbi" ] ], - [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T11:26:31.742638" + "timestamp": "2026-01-20T12:00:00.937832062" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]]": { "content": [ @@ -214,7 +250,7 @@ { "id": "test3" }, - "test3.vcf:md5,bba76244f79c4e307bd0c4c09095885f" + "test3.vcf:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], "1": [ @@ -224,7 +260,11 @@ ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -237,19 +277,23 @@ { "id": "test3" }, - "test3.vcf:md5,bba76244f79c4e307bd0c4c09095885f" + "test3.vcf:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T10:29:33.203302" + "timestamp": "2026-01-20T11:59:37.378838683" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index": { "content": [ @@ -258,7 +302,7 @@ { "id": "test3" }, - "test3_vcf.vcf.gz:md5,85db49dd1629d60e1165f491df6348f6" + "test3_vcf.vcf.gz:md5,0b4986bbb6bdc9e0ee12ba09e70092e9" ] ], [ @@ -272,15 +316,21 @@ [ ], - [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T11:26:17.482474" + "timestamp": "2026-01-20T11:59:45.296771646" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub": { "content": [ @@ -305,7 +355,11 @@ ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -326,16 +380,20 @@ "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T09:53:07.368971" + "timestamp": "2026-01-20T12:00:39.792930532" }, "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub": { "content": [ @@ -360,7 +418,11 @@ ] ], "3": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ], "csi": [ [ @@ -381,15 +443,19 @@ "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,689f810ab8f069cb75cfebed99a52df8" + "versions_bcftools": [ + [ + "BCFTOOLS_CONCAT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-15T09:53:01.405995" + "timestamp": "2026-01-20T12:00:31.985353981" } } \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/nextflow.config b/modules/nf-core/bcftools/concat/tests/nextflow.config index c6a5142a..1d5fee9e 100644 --- a/modules/nf-core/bcftools/concat/tests/nextflow.config +++ b/modules/nf-core/bcftools/concat/tests/nextflow.config @@ -1,3 +1,3 @@ process { - ext.args = "--no-version" + ext.args = "--no-version -a" } diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config index afa03d74..2455b145 100644 --- a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config @@ -1,4 +1,4 @@ process { ext.prefix = { "${meta.id}_vcf" } - ext.args = "--output-type z --write-index=csi --no-version --allow-overlaps" + ext.args = "--output-type z --write-index=csi --no-version --allow-overlaps -a" } diff --git a/modules/nf-core/bcftools/isec/bcftools-isec.diff b/modules/nf-core/bcftools/isec/bcftools-isec.diff new file mode 100644 index 00000000..bea9e9d9 --- /dev/null +++ b/modules/nf-core/bcftools/isec/bcftools-isec.diff @@ -0,0 +1,46 @@ +Changes in component 'nf-core/bcftools/isec' +'modules/nf-core/bcftools/isec/meta.yml' is unchanged +Changes in 'bcftools/isec/main.nf': +--- modules/nf-core/bcftools/isec/main.nf ++++ modules/nf-core/bcftools/isec/main.nf +@@ -12,6 +12,15 @@ + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results ++ tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf ++ tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi ++ tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf ++ tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi ++ tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf ++ tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi ++ tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf ++ tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi ++ + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -30,7 +39,7 @@ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ +- ${vcf_files} \\ ++ ${vcf_files} + """ + + stub: +@@ -43,5 +52,9 @@ + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0002.vcf.gz ++ touch ${prefix}/0002.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0003.vcf.gz ++ touch ${prefix}/0003.vcf.gz.tbi + """ + } + +'modules/nf-core/bcftools/isec/environment.yml' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/isec/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/isec/environment.yml b/modules/nf-core/bcftools/isec/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/isec/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf new file mode 100644 index 00000000..cda1a662 --- /dev/null +++ b/modules/nf-core/bcftools/isec/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_ISEC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcfs), path(tbis), path(file_list), path(targets_file), path(regions_file) + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results + tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf + tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi + tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf + tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi + tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf + tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi + tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf + tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi + + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + targets_file_args = targets_file ? "-T ${targets_file}" : '' + regions_file_args = regions_file ? "-R ${regions_file}" : '' + vcf_files = file_list ? "-l ${file_list}" : "${vcfs}" + + """ + bcftools isec \\ + ${args} \\ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ + ${vcf_files} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/README.txt + touch ${prefix}/sites.txt + echo "" | gzip > ${prefix}/0000.vcf.gz + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi + echo "" | gzip > ${prefix}/0002.vcf.gz + touch ${prefix}/0002.vcf.gz.tbi + echo "" | gzip > ${prefix}/0003.vcf.gz + touch ${prefix}/0003.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/bcftools/isec/meta.yml b/modules/nf-core/bcftools/isec/meta.yml new file mode 100644 index 00000000..051e141e --- /dev/null +++ b/modules/nf-core/bcftools/isec/meta.yml @@ -0,0 +1,101 @@ +name: bcftools_isec +description: Apply set operations to VCF files +keywords: + - variant calling + - intersect + - union + - complement + - VCF + - BCF +tools: + - isec: + description: | + Computes intersections, unions and complements of VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf/bcf files. These must be compressed and have an associated index. + e.g. [ 'file1.vcf.gz', 'file2.vcf' ] + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + - tbis: + type: list + description: | + List containing the tbi index files corresponding to the vcf/bcf input files + pattern: "*.tbi" + ontologies: + - edam: "http://edamontology.org/format_3475" # Tabix index + - file_list: + type: file + description: | + Optional text file containing the list of VCF/BCF files to be processed by bcftools isec, one per line. + ontologies: + - edam: "http://edamontology.org/format_2330" # Text file + - targets_file: + type: file + description: | + Optional file containing target regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated + - regions_file: + type: file + description: | + Optional file containing regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated +output: + results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Directory containing the output files from bcftools isec + pattern: "${prefix}/" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test b/modules/nf-core/bcftools/isec/tests/main.nf.test new file mode 100644 index 00000000..d0a1f751 --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test @@ -0,0 +1,318 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ISEC" + script "../main.nf" + process "BCFTOOLS_ISEC" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/isec" + + config "./nextflow.config" + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list") { + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub") { + options "-stub" + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + + ) + } + } +} diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test.snap b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap new file mode 100644 index 00000000..8a2cb65f --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap @@ -0,0 +1,348 @@ +{ + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,10fc33b66522645600d44afbd41fb792", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:48.711543241" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,f4190b7943f8f12886ad57ecaedd0c43" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:48:46.533255686" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T19:06:04.239620535" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T18:58:08.73508502" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,4426b6b26b177d85e150f06bd5138411", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-27T12:52:50.066330847" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:56.874977547" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,16eeab1b2463bab4d498a4dfdaa297fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:49:26.428693544" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:55:27.123701797" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/isec/tests/nextflow.config b/modules/nf-core/bcftools/isec/tests/nextflow.config new file mode 100644 index 00000000..ac887d6b --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--nfiles +2 --output-type z --no-version' +} diff --git a/modules/nf-core/bcftools/merge/bcftools-merge.diff b/modules/nf-core/bcftools/merge/bcftools-merge.diff new file mode 100644 index 00000000..6a8812db --- /dev/null +++ b/modules/nf-core/bcftools/merge/bcftools-merge.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/bcftools/merge' +'modules/nf-core/bcftools/merge/meta.yml' is unchanged +Changes in 'bcftools/merge/main.nf': +--- modules/nf-core/bcftools/merge/main.nf ++++ modules/nf-core/bcftools/merge/main.nf +@@ -9,7 +9,8 @@ + + input: + tuple val(meta), path(vcfs), path(tbis), path(bed) +- tuple val(meta2), path(fasta), path(fai) ++ tuple val(meta2), path(fasta) ++ tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + +'modules/nf-core/bcftools/merge/environment.yml' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/merge/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml index ba863b38..cb55500b 100644 --- a/modules/nf-core/bcftools/merge/environment.yml +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 - # renovate: datasource=conda depName=bioconda/bcftools - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index c560a902..f295c0e6 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -1,22 +1,21 @@ process BCFTOOLS_MERGE { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': - 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" input: - tuple val(meta), path(vcfs), path(tbis) + tuple val(meta), path(vcfs), path(tbis), path(bed) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) - tuple val(meta4), path(bed) output: tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf - tuple val(meta), path("*.{csi,tbi}") , emit: index, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.{csi,tbi}"), emit: index, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: task.ext.when == null || task.ext.when @@ -25,50 +24,51 @@ process BCFTOOLS_MERGE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = (vcfs.collect().size() > 1) ? vcfs.sort{ it.name } : vcfs - def regions = bed ? "--regions-file $bed" : "" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" + def input = vcfs.collect().size() > 1 ? vcfs.sort { vcf -> vcf.name } : vcfs + def regions = bed ? "--regions-file ${bed}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" """ bcftools merge \\ - $args \\ - $regions \\ - --threads $task.cpus \\ + ${args} \\ + ${regions} \\ + --threads ${task.cpus} \\ --output ${prefix}.${extension} \\ - $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS + ${input} """ stub: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" - def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : - args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : - args.contains("--write-index") || args.contains("-W") ? "csi" : - "" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" """ ${create_cmd} ${prefix}.${extension} ${create_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml index 09af245a..445e5c56 100644 --- a/modules/nf-core/bcftools/merge/meta.yml +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -31,6 +31,11 @@ input: List containing the tbi index files corresponding to the vcfs input files e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] ontologies: [] + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" + ontologies: [] - - meta2: type: map description: | @@ -42,27 +47,12 @@ input: FILE` parameter)" pattern: "*.{fasta,fa}" ontologies: [] - - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - fai: type: file description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)" pattern: "*.fai" ontologies: [] - - - meta4: - type: map - description: | - Groovy Map containing bed information - e.g. [ id:'genome' ] - - bed: - type: file - description: "(Optional) The bed regions to merge on" - pattern: "*.bed" - ontologies: [] output: vcf: - - meta: @@ -86,13 +76,27 @@ output: description: index of merged output pattern: "*.{csi,tbi}" ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/bcftools/merge/tests/bcf.config b/modules/nf-core/bcftools/merge/tests/bcf.config deleted file mode 100644 index 4467d07d..00000000 --- a/modules/nf-core/bcftools/merge/tests/bcf.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = '--output-type u --no-version' -} diff --git a/modules/nf-core/bcftools/merge/tests/bcf_gz.config b/modules/nf-core/bcftools/merge/tests/bcf_gz.config deleted file mode 100644 index 280de8db..00000000 --- a/modules/nf-core/bcftools/merge/tests/bcf_gz.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = '--output-type b --no-version' -} diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test b/modules/nf-core/bcftools/merge/tests/main.nf.test index 3995fc1a..3a1f1220 100644 --- a/modules/nf-core/bcftools/merge/tests/main.nf.test +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test @@ -2,6 +2,8 @@ nextflow_process { name "Test Process BCFTOOLS_MERGE" script "../main.nf" + config "./nextflow.config" + process "BCFTOOLS_MERGE" tag "modules" @@ -11,9 +13,10 @@ nextflow_process { test("sarscov2 - [vcf, tbi], [], [], []") { - config "./nextflow.config" - when { + params { + args_modules = "--force-samples --force-single --no-version" + } process { """ input[0] = [ @@ -25,11 +28,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -40,18 +42,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf output") { - config "./vcf.config" - when { + params { + args_modules = "--output-type v --no-version" + } process { """ input[0] = [ @@ -63,11 +65,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -78,18 +79,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).md5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output") { - config "./vcf_gz.config" - when { + params { + args_modules = "--output-type z --no-version" + } process { """ input[0] = [ @@ -101,11 +102,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -116,18 +116,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - bcf output") { - config "./bcf.config" - when { + params { + args_modules = "--output-type u --no-version" + } process { """ input[0] = [ @@ -139,11 +139,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -154,18 +153,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("bcf") }, { assert snapshot( file(process.out.vcf.get(0).get(1)).name, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output") { - config "./bcf_gz.config" - when { + params { + args_modules = "--output-type b --no-version" + } process { """ input[0] = [ @@ -177,11 +176,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -192,18 +190,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("bcf.gz") }, { assert snapshot( file(process.out.vcf.get(0).get(1)).name, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index") { - config "./vcf_gz_index.config" - when { + params { + args_modules = "--output-type z --write-index --no-version" + } process { """ input[0] = [ @@ -215,11 +213,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -232,19 +229,18 @@ nextflow_process { { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, file(process.out.index.get(0).get(1)).name, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index") { - config "./vcf_gz_index_csi.config" - when { - + params { + args_modules = "--output-type z --write-index=csi --no-version" + } process { """ input[0] = [ @@ -256,11 +252,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -273,18 +268,18 @@ nextflow_process { { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, file(process.out.index.get(0).get(1)).name, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index") { - config "./vcf_gz_index_tbi.config" - when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } process { """ input[0] = [ @@ -296,11 +291,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -313,18 +307,18 @@ nextflow_process { { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, file(process.out.index.get(0).get(1)).name, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], bed") { - config "./nextflow.config" - when { + params { + args_modules = "--force-samples --force-single --no-version" + } process { """ input[0] = [ @@ -336,14 +330,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] - ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + input[1] = [[],[],[]] """ } } @@ -354,18 +344,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).md5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output") { - config "./nextflow.gvcf.config" - when { + params { + args_modules = "--force-samples --no-version --output-type z --gvcf genome.fasta" + } process { """ input[0] = [ @@ -377,20 +367,14 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), - ] + ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] input[1] = [ [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] - ] - input[2] = [ - [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] ] - input[3] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] - ] """ } } @@ -401,17 +385,18 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - one sample") { - config "./nextflow.config" when { + params { + args_modules = "--force-samples --force-single --no-version" + } process { """ input[0] = [ @@ -421,11 +406,10 @@ nextflow_process { ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -436,19 +420,20 @@ nextflow_process { { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, { assert snapshot( path(process.out.vcf.get(0).get(1)).md5, - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions") }, ).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - stub") { options "-stub" - config "./nextflow.config" when { + params { + args_modules = "--force-samples --force-single --no-version" + } process { """ input[0] = [ @@ -460,11 +445,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -476,15 +460,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub") { options "-stub" - config "./vcf.config" when { + params { + args_modules = "--output-type v --no-version" + } process { """ input[0] = [ @@ -496,11 +481,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -512,15 +496,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub") { options "-stub" - config "./vcf_gz.config" when { + params { + args_modules = "--output-type z --no-version" + } process { """ input[0] = [ @@ -532,11 +517,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -548,15 +532,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub") { options "-stub" - config "./bcf.config" when { + params { + args_modules = "--output-type u --no-version" + } process { """ input[0] = [ @@ -568,11 +553,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -584,15 +568,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub") { options "-stub" - config "./bcf_gz.config" when { + params { + args_modules = "--output-type b --no-version" + } process { """ input[0] = [ @@ -604,11 +589,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -620,15 +604,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub") { options "-stub" - config "./vcf_gz_index.config" when { + params { + args_modules = "--output-type z --write-index --no-version" + } process { """ input[0] = [ @@ -640,11 +625,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -657,15 +641,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub") { options "-stub" - config "./vcf_gz_index_csi.config" when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } process { """ input[0] = [ @@ -677,11 +662,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] + ], + [] ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] + input[1] = [[],[],[]] """ } } @@ -694,52 +678,16 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub") { options "-stub" - config "./vcf_gz_index_tbi.config" when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] - ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] - """ + params { + args_modules = "--output-type z --write-index=tbi --no-version" } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, - { assert process.out.index.get(0).get(1).endsWith("tbi") }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - [vcf, tbi], [], [], bed - stub") { - - options "-stub" - config "./nextflow.config" - - when { process { """ input[0] = [ @@ -751,58 +699,10 @@ nextflow_process { [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), - ] - ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub") { - - options "-stub" - config "./nextflow.gvcf.config" - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz', checkIfExists: true) ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), - ] - ] - input[1] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] - ] - input[2] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] - ] - input[3] = [ - [ id:'test' ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] + [] ] + input[1] = [[],[],[]] """ } } @@ -811,43 +711,9 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("tbi") }, { assert snapshot(process.out).match() } ) } - - } - - test("sarscov2 - [vcf, tbi], [], [], [] - one sample - stub") { - - options "-stub" - config "./nextflow.config" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) - ] - ] - input[1] = [[],[]] - input[2] = [[],[]] - input[3] = [[],[]] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, - { assert snapshot(process.out).match() } - ) - } - } } diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test.snap b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap index d340b7b4..9dd09178 100644 --- a/modules/nf-core/bcftools/merge/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap @@ -3,15 +3,21 @@ "content": [ "e0de448dc8e712956a03ce68d79a0b3a", "test.vcf.gz.tbi", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:16.859885" + "timestamp": "2026-01-20T12:05:01.453191274" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub": { "content": [ @@ -28,7 +34,11 @@ ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ @@ -41,29 +51,39 @@ "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:43.269991" + "timestamp": "2026-01-20T12:05:43.064165277" }, "sarscov2 - [vcf, tbi], [], [], bed": { "content": [ "febdcfb851dcfc83d8248520830aef10", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:21.848388" + "timestamp": "2026-01-20T12:05:09.906419282" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub": { "content": [ @@ -85,7 +105,11 @@ ] ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ [ @@ -103,29 +127,39 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:21:03.763345" + "timestamp": "2026-01-20T12:06:17.06290054" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf output": { "content": [ "57bb84274f336465d0a0946b532093b0", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:19:47.331149" + "timestamp": "2026-01-20T12:04:14.044948834" }, "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub": { "content": [ @@ -142,7 +176,11 @@ ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ @@ -155,16 +193,20 @@ "test.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:59.170567" + "timestamp": "2026-01-20T12:06:08.633006073" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub": { "content": [ @@ -186,7 +228,11 @@ ] ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ [ @@ -204,16 +250,20 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:21:13.022634" + "timestamp": "2026-01-20T12:06:33.897407991" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub": { "content": [ @@ -230,7 +280,11 @@ ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ @@ -243,95 +297,78 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:48.592261" + "timestamp": "2026-01-20T12:05:51.445673109" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index": { "content": [ "e0de448dc8e712956a03ce68d79a0b3a", "test.vcf.gz.csi", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:11.953139" + "timestamp": "2026-01-20T12:04:53.868676326" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output": { "content": [ "e0de448dc8e712956a03ce68d79a0b3a", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-11T14:19:51.935426" - }, - "sarscov2 - [vcf, tbi], [], [], bed - stub": { - "content": [ { - "0": [ + "versions_bcftools": [ [ - { - "id": "test" - }, - "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "BCFTOOLS_MERGE", + "bcftools", + "1.22" ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ], - "index": [ - - ], - "vcf": [ - [ - { - "id": "test" - }, - "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:21:18.319666" + "timestamp": "2026-01-20T12:04:22.660557224" }, "sarscov2 - [vcf, tbi], [], [], [] - bcf output": { "content": [ "test.bcf", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:19:56.742352" + "timestamp": "2026-01-20T12:04:30.965022315" }, "sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub": { "content": [ @@ -348,7 +385,11 @@ ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ @@ -361,134 +402,78 @@ "test.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:53.962449" + "timestamp": "2026-01-20T12:05:59.932077434" }, - "sarscov2 - [vcf, tbi], [], [], [] - one sample - stub": { + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index": { "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.csi", { - "0": [ + "versions_bcftools": [ [ - { - "id": "test" - }, - "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "BCFTOOLS_MERGE", + "bcftools", + "1.22" ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ], - "index": [ - - ], - "vcf": [ - [ - { - "id": "test" - }, - "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:21:29.155018" + "timestamp": "2026-01-20T12:04:46.41923299" }, - "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub": { + "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output": { "content": [ + "645b7f7f9131bfe350a9ec3cf82c17fe", { - "0": [ + "versions_bcftools": [ [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "BCFTOOLS_MERGE", + "bcftools", + "1.22" ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ], - "index": [ - - ], - "vcf": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-11T14:21:23.944931" - }, - "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index": { - "content": [ - "e0de448dc8e712956a03ce68d79a0b3a", - "test.vcf.gz.csi", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:06.894016" - }, - "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output": { - "content": [ - "645b7f7f9131bfe350a9ec3cf82c17fe", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-11T14:20:28.149857" + "timestamp": "2026-01-20T12:05:18.989455751" }, "sarscov2 - [vcf, tbi], [], [], [] - one sample": { "content": [ "2a374cf02f0c32cf607646167e7f153b", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:32.592911" + "timestamp": "2026-01-20T12:05:26.431607458" }, "sarscov2 - [vcf, tbi], [], [], [] - stub": { "content": [ @@ -505,7 +490,11 @@ ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ @@ -518,16 +507,20 @@ "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:37.830691" + "timestamp": "2026-01-20T12:05:34.692613749" }, "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub": { "content": [ @@ -549,7 +542,11 @@ ] ], "2": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ], "index": [ [ @@ -567,41 +564,57 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:21:08.5748" + "timestamp": "2026-01-20T12:06:25.137261969" }, "sarscov2 - [vcf, tbi], [], [], []": { "content": [ "e0de448dc8e712956a03ce68d79a0b3a", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:19:41.986954" + "timestamp": "2026-01-20T12:04:05.637837157" }, "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output": { "content": [ "test.bcf.gz", - [ - "versions.yml:md5,46d60729adb9ea9a4e4ab722b487a56b" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_MERGE", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:20:01.801297" + "timestamp": "2026-01-20T12:04:38.467108836" } } \ No newline at end of file diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.config b/modules/nf-core/bcftools/merge/tests/nextflow.config index c3f0b715..36f2fa1a 100644 --- a/modules/nf-core/bcftools/merge/tests/nextflow.config +++ b/modules/nf-core/bcftools/merge/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: BCFTOOLS_MERGE { - ext.args = '--force-samples --force-single --no-version' + ext.args = "${params.args_modules}" } } diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config b/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config deleted file mode 100644 index 8c457b71..00000000 --- a/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: BCFTOOLS_MERGE { - ext.args = { "--force-samples --no-version --output-type z --gvcf $fasta" } - } -} diff --git a/modules/nf-core/bcftools/merge/tests/vcf.config b/modules/nf-core/bcftools/merge/tests/vcf.config deleted file mode 100644 index 759222e5..00000000 --- a/modules/nf-core/bcftools/merge/tests/vcf.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = '--output-type v --no-version' -} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz.config b/modules/nf-core/bcftools/merge/tests/vcf_gz.config deleted file mode 100644 index 8b6ad8b4..00000000 --- a/modules/nf-core/bcftools/merge/tests/vcf_gz.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = '--output-type z --no-version' -} diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..443c8bbb --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,71 @@ +process BCFTOOLS_NORM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + def index = '' + if (extension in ['vcf.gz', 'bcf', 'bcf.gz']) { + if (['--write-index=tbi', '-W=tbi'].any { arg -> args.contains(arg) } && extension == 'vcf.gz') { + index = 'tbi' + } + else if (['--write-index=tbi', '-W=tbi', '--write-index=csi', '-W=csi', '--write-index', '-W'].any { arg -> args.contains(arg) }) { + index = 'csi' + } + } + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = index ? "touch ${prefix}.${extension}.${index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..9feecac0 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,107 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 00000000..05851753 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,545 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + +} diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 00000000..ee2dadf7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,876 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:29.987030961" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:06.488086505" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:34.863776359" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:54.718705045" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:43.007377633" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:07:54.877084219" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:22.220435939" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:58.483532889" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:03.22576704" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:27.281315407" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:14.249715835" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:46.665932019" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:38.144449162" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:10.602984345" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:51.053195842" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 00000000..b79af868 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 00000000..f36f397c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 00000000..510803b4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 00000000..10bf93e3 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 00000000..b31dd2de --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config similarity index 65% rename from modules/nf-core/bcftools/merge/tests/vcf_gz_index.config rename to modules/nf-core/bcftools/norm/tests/vcf_gz_index.config index 9f1e9b1d..7dd696ee 100644 --- a/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -1,3 +1,4 @@ process { + ext.prefix = { "${meta.id}_vcf" } ext.args = "--output-type z --write-index --no-version" } diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config similarity index 66% rename from modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config rename to modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config index 8308ee1a..aebffb6f 100644 --- a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -1,3 +1,4 @@ process { + ext.prefix = { "${meta.id}_vcf" } ext.args = "--output-type z --write-index=csi --no-version" } diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config similarity index 66% rename from modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config rename to modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config index 9be4075b..b192ae7d 100644 --- a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -1,3 +1,4 @@ process { + ext.prefix = { "${meta.id}_vcf" } ext.args = "--output-type z --write-index=tbi --no-version" } diff --git a/modules/nf-core/bcftools/query/bcftools-query.diff b/modules/nf-core/bcftools/query/bcftools-query.diff new file mode 100644 index 00000000..790c3808 --- /dev/null +++ b/modules/nf-core/bcftools/query/bcftools-query.diff @@ -0,0 +1,40 @@ +Changes in component 'nf-core/bcftools/query' +'modules/nf-core/bcftools/query/meta.yml' is unchanged +Changes in 'bcftools/query/main.nf': +--- modules/nf-core/bcftools/query/main.nf ++++ modules/nf-core/bcftools/query/main.nf +@@ -14,7 +14,8 @@ + path samples + + output: +- tuple val(meta), path("*.${suffix}"), emit: output ++ tuple val(meta), path("*.${suffix}.gz"), emit: output ++ tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -35,12 +36,17 @@ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ +- touch ${prefix}.${suffix} \\ ++ touch ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ touch ${prefix}.${suffix}.gz.tbi ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + } + +'modules/nf-core/bcftools/query/environment.yml' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/query/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..4d2da568 --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,52 @@ +process BCFTOOLS_QUERY { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}.gz"), emit: output + tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + touch ${prefix}.${suffix}.gz.tbi + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..6bcb5e57 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,89 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be queried. + pattern: "*.{vcf.gz, vcf}" + ontologies: [] + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..63ac5af8 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..5168ef3f --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:54.523612853" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:17:00.64798632" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:47.953130141" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..8547ec10 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} diff --git a/modules/nf-core/bcftools/sort/bcftools-sort.diff b/modules/nf-core/bcftools/sort/bcftools-sort.diff index b1696f25..0b3e62f8 100644 --- a/modules/nf-core/bcftools/sort/bcftools-sort.diff +++ b/modules/nf-core/bcftools/sort/bcftools-sort.diff @@ -1,15 +1,18 @@ Changes in component 'nf-core/bcftools/sort' 'modules/nf-core/bcftools/sort/meta.yml' is unchanged -Changes in 'bcftools/sort/main.nf': ---- modules/nf-core/bcftools/sort/main.nf -+++ modules/nf-core/bcftools/sort/main.nf -@@ -67,4 +67,4 @@ - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ --} -+} -'modules/nf-core/bcftools/sort/environment.yml' is unchanged +'modules/nf-core/bcftools/sort/main.nf' is unchanged +Changes in 'bcftools/sort/environment.yml': +--- modules/nf-core/bcftools/sort/environment.yml ++++ modules/nf-core/bcftools/sort/environment.yml +@@ -5,6 +5,5 @@ + - bioconda + dependencies: + # renovate: datasource=conda depName=bioconda/htslib ++ - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 +- # renovate: datasource=conda depName=bioconda/bcftools +- - bioconda::bcftools=1.22 + 'modules/nf-core/bcftools/sort/tests/vcf_gz_index.config' is unchanged 'modules/nf-core/bcftools/sort/tests/main.nf.test' is unchanged 'modules/nf-core/bcftools/sort/tests/vcf_gz_index_csi.config' is unchanged diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml index ba863b38..cb55500b 100644 --- a/modules/nf-core/bcftools/sort/environment.yml +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 - # renovate: datasource=conda depName=bioconda/bcftools - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf index 302c7311..e0dfad2d 100644 --- a/modules/nf-core/bcftools/sort/main.nf +++ b/modules/nf-core/bcftools/sort/main.nf @@ -1,20 +1,20 @@ process BCFTOOLS_SORT { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': - 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" input: tuple val(meta), path(vcf) output: tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi, optional: true - tuple val(meta), path("*.csi") , emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: task.ext.when == null || task.ext.when @@ -22,49 +22,51 @@ process BCFTOOLS_SORT { script: def args = task.ext.args ?: '--output-type z' def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" - + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def max_memory = task.memory ? "--max-mem ${task.memory.toUnit('MB') * 0.9}M" : "" """ bcftools \\ sort \\ - --output ${prefix}.sorted.${extension} \\ + --output ${prefix}.${extension} \\ --temp-dir . \\ - $args \\ - $vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS + ${max_memory} \\ + ${args} \\ + ${vcf} """ stub: def args = task.ext.args ?: '--output-type z' def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" - def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : - args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : - args.contains("--write-index") || args.contains("-W") ? "csi" : - "" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" """ ${create_cmd} ${prefix}.${extension} ${create_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml index c15487c7..1c3f2a54 100644 --- a/modules/nf-core/bcftools/sort/meta.yml +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -58,13 +58,27 @@ output: description: Default VCF file index pattern: "*.csi" ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool authors: - "@Gwennid" maintainers: diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test index 7d580e4f..bda7bace 100644 --- a/modules/nf-core/bcftools/sort/tests/main.nf.test +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test @@ -27,7 +27,6 @@ nextflow_process { { assert snapshot(process.out).match("vcf") } ) } - } test("sarscov2 - vcf_gz_index") { @@ -52,12 +51,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("sarscov2 - vcf_gz_index_csi") { @@ -82,12 +80,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("sarscov2 - vcf_gz_index_tbi") { @@ -112,12 +109,11 @@ nextflow_process { process.out.vcf, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match() }, { assert process.out.tbi[0][1].endsWith(".tbi") } ) } - } test("sarscov2 - vcf - stub") { @@ -139,7 +135,6 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("sarscov2 - vcf_gz_index - stub") { @@ -165,7 +160,6 @@ nextflow_process { { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("sarscov2 - vcf_gz_index_csi - stub") { @@ -191,7 +185,6 @@ nextflow_process { { assert process.out.csi[0][1].endsWith(".csi") } ) } - } test("sarscov2 - vcf_gz_index_tbi - stub") { @@ -217,6 +210,5 @@ nextflow_process { { assert process.out.tbi[0][1].endsWith(".tbi") } ) } - } } diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test.snap b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap index 3cbca56c..9b9e4dc7 100644 --- a/modules/nf-core/bcftools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap @@ -22,7 +22,11 @@ ], "3": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -43,16 +47,20 @@ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:49:07.959267" + "timestamp": "2026-01-20T12:19:24.286732272" }, "vcf": { "content": [ @@ -72,7 +80,11 @@ ], "3": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -88,16 +100,20 @@ "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" ] ], - "versions": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:35.236174" + "timestamp": "2026-01-20T12:18:38.783455397" }, "sarscov2 - vcf_gz_index": { "content": [ @@ -120,15 +136,21 @@ [ ], - [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:39.95133" + "timestamp": "2026-01-20T12:18:45.081447395" }, "sarscov2 - vcf_gz_index_csi": { "content": [ @@ -151,15 +173,21 @@ [ ], - [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:44.50977" + "timestamp": "2026-01-20T12:18:51.194304667" }, "sarscov2 - vcf_gz_index - stub": { "content": [ @@ -184,7 +212,11 @@ ] ], "3": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ], "csi": [ [ @@ -205,16 +237,20 @@ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:58.749279" + "timestamp": "2026-01-20T12:19:11.413154111" }, "sarscov2 - vcf_gz_index_csi - stub": { "content": [ @@ -239,7 +275,11 @@ ] ], "3": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ], "csi": [ [ @@ -260,16 +300,20 @@ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:49:03.283017" + "timestamp": "2026-01-20T12:19:17.770087535" }, "sarscov2 - vcf - stub": { "content": [ @@ -289,7 +333,11 @@ ], "3": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ], "csi": [ @@ -305,16 +353,20 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:54.113947" + "timestamp": "2026-01-20T12:19:04.409593559" }, "sarscov2 - vcf_gz_index_tbi": { "content": [ @@ -337,14 +389,20 @@ "test_vcf.vcf.gz.tbi" ] ], - [ - "versions.yml:md5,9699a51675cf58ed9d61b4063de92229" - ] + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-11T14:48:48.979311" + "timestamp": "2026-01-20T12:18:57.536633502" } } \ No newline at end of file diff --git a/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff new file mode 100644 index 00000000..4caed4cc --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff @@ -0,0 +1,25 @@ +Changes in component 'nf-core/deepvariant/callvariants' +'modules/nf-core/deepvariant/callvariants/meta.yml' is unchanged +Changes in 'deepvariant/callvariants/main.nf': +--- modules/nf-core/deepvariant/callvariants/main.nf ++++ modules/nf-core/deepvariant/callvariants/main.nf +@@ -1,10 +1,11 @@ + + process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" +- label 'process_high' +- ++ label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" ++ label "${params.use_gpu ? '' : 'process_long'}" ++ + //Conda is not supported at the moment +- container "docker.io/google/deepvariant:1.9.0" ++ container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf new file mode 100644 index 00000000..fafdd634 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -0,0 +1,51 @@ + +process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" + label "${params.use_gpu ? '' : 'process_long'}" + + //Conda is not supported at the moment + container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz"), emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/nf-core/deepvariant/callvariants/meta.yml b/modules/nf-core/deepvariant/callvariants/meta.yml new file mode 100644 index 00000000..fa1aaa42 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/meta.yml @@ -0,0 +1,68 @@ +name: deepvariant_callvariants +description: Call variants from the examples produced by make_examples +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - make_examples_tfrecords: + type: file + description: The actual sharded input files, from DEEPVARIANT_MAKEEXAMPLES process + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format +output: + call_variants_tfrecords: + - - meta: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + - ${prefix}.call-*-of-*.tfrecord.gz: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test new file mode 100644 index 00000000..d617650b --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_CALLVARIANTS" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_CALLVARIANTS" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.call_variants_tfrecords.get(0).get(0) == [ id:'test', single_end:false ] }, + // The tfrecord binary representation is not stable, but we check the name of the output. + { assert snapshot(file(process.out.call_variants_tfrecords.get(0).get(1)).name).match("homo_sapiens-wgs-call_variants_tfrecords-filenames")}, + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta + [] // No input paths are needed in stub mode + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..ce71dac2 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens-wgs-call_variants_tfrecords-filenames": { + "content": [ + "test.call-00000-of-00001.tfrecord.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-04T17:04:33.276938" + }, + "homo_sapiens - wgs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "call_variants_tfrecords": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:02:54.403068431" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/callvariants/tests/nextflow.config b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config new file mode 100644 index 00000000..68aec144 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + cpus = 2 // Keep CPUs fixed so the number of output files is reproducible + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff new file mode 100644 index 00000000..a747261a --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff @@ -0,0 +1,54 @@ +Changes in component 'nf-core/deepvariant/makeexamples' +'modules/nf-core/deepvariant/makeexamples/meta.yml' is unchanged +Changes in 'deepvariant/makeexamples/main.nf': +--- modules/nf-core/deepvariant/makeexamples/main.nf ++++ modules/nf-core/deepvariant/makeexamples/main.nf +@@ -1,11 +1,11 @@ + process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" +- label 'process_high' ++ label 'process_very_high' ++ label 'process_short' ++ ++ container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" + +- //Conda is not supported at the moment +- container "docker.io/google/deepvariant:1.9.0" +- +- input: ++ input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) +@@ -14,7 +14,7 @@ + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples +- tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), emit: gvcf ++ tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), optional: true, emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + +@@ -30,14 +30,16 @@ + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions ${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" ++ def gvcf_arg = params.generate_gvcf ? "--gvcf \"./${prefix}.gvcf.tfrecord@${task.cpus}.gz\"" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ ++ --sample_name ${prefix} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ +- --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ ++ ${gvcf_arg} \\ + ${regions} \\ + ${par_regions} \\ + ${args} \\ + +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf new file mode 100644 index 00000000..55a932d4 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -0,0 +1,59 @@ +process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" + label 'process_very_high' + + container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), optional: true, emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions ${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" + def gvcf_arg = params.generate_gvcf ? "--gvcf \"./${prefix}.gvcf.tfrecord@${task.cpus}.gz\"" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ + --sample_name ${prefix} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + ${gvcf_arg} \\ + ${regions} \\ + ${par_regions} \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + done + """ +} diff --git a/modules/nf-core/deepvariant/makeexamples/meta.yml b/modules/nf-core/deepvariant/makeexamples/meta.yml new file mode 100644 index 00000000..12056fbd --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/meta.yml @@ -0,0 +1,135 @@ +name: deepvariant_makeexamples +description: Transforms the input alignments to a format suitable for the deep neural + network variant caller +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + pattern: "*.gzi" + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" + ontologies: [] +output: + examples: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + gvcf: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.gvcf.tfrecord-*-of-*.gz: + type: list + description: | + Tuple containing sample metadata and the GVCF data in tfrecord format + small_model_calls: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz: + type: list + description: | + Optional variant calls from the small model, if enabled, in tfrecord format + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test new file mode 100644 index 00000000..cc06f780 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test @@ -0,0 +1,204 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_MAKEEXAMPLES" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_MAKEEXAMPLES" + + tag "deepvariant/makeexamples" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - [bam, bai] - fasta - fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test1-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test1-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test1-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test2-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test2-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test2-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test3-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test3-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test3-gvcf-filenames") } + ) + } + } + + test("stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test4-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test4-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test4-gvcf-filenames") } + ) + } + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap new file mode 100644 index 00000000..729f0dc5 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap @@ -0,0 +1,178 @@ +{ + "test1-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.93412258" + }, + "test3-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:09:55.034298895" + }, + "test2-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.467533548" + }, + "test1-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.790379812" + }, + "test2-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:07:52.050411549" + }, + "test4-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:12:07.012233232" + }, + "test4-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-0000-of-0002.tfrecord.gz", + "test.examples.tfrecord-0000-of-0002.tfrecord.gz.example_info.json", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.286077155" + }, + "test1-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:05:28.75651648" + }, + "test3-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.780115231" + }, + "test3-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.702565392" + }, + "test2-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.550236324" + }, + "test4-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-0000-of-0002.tfrecord.gz", + "test.gvcf.tfrecord-0001-of-0002.tfrecord.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.412547051" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config new file mode 100644 index 00000000..6811fe48 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + cpus = 2 // The number of output files is determined by cpus - keep it the same for tests + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff new file mode 100644 index 00000000..b721ee76 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff @@ -0,0 +1,67 @@ +Changes in component 'nf-core/deepvariant/postprocessvariants' +'modules/nf-core/deepvariant/postprocessvariants/meta.yml' is unchanged +Changes in 'deepvariant/postprocessvariants/main.nf': +--- modules/nf-core/deepvariant/postprocessvariants/main.nf ++++ modules/nf-core/deepvariant/postprocessvariants/main.nf +@@ -1,6 +1,6 @@ + process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" +- label 'process_medium' ++ label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" +@@ -14,8 +14,8 @@ + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index +- tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf +- tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index ++ tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf, optional: true ++ tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index, optional: true + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: +@@ -32,14 +32,17 @@ + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + +- def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ +- if (!gvcf_matcher.matches()) { +- throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") ++ def gvcf_arg = "" ++ if (gvcf_tfrecords) { ++ def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ ++ if (!gvcf_matcher.matches()) { ++ throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") ++ } ++ def gvcf_tfrecord_name = gvcf_matcher[0][1] ++ def gvcf_shardCount = gvcf_matcher[0][2] ++ def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" ++ gvcf_arg = "--nonvariant_site_tfrecord_path \"${gvcf_tfrecords_logical_name}\" --gvcf_outfile \"${prefix}.g.vcf.gz\"" + } +- def gvcf_tfrecord_name = gvcf_matcher[0][1] +- def gvcf_shardCount = gvcf_matcher[0][2] +- // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz +- def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. +@@ -62,10 +65,10 @@ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ +- --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ +- --gvcf_outfile "${prefix}.g.vcf.gz" \\ ++ --sample_name ${prefix} \\ + ${regions} \\ + ${small_model_arg} \\ ++ ${gvcf_arg} \\ + --cpus $task.cpus + + """ + +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf new file mode 100644 index 00000000..b1fa93ac --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -0,0 +1,90 @@ +process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_high' + label 'process_short' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), path(small_model_calls), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf, optional: true + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index, optional: true + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def gvcf_arg = "" + if (gvcf_tfrecords) { + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + gvcf_arg = "--nonvariant_site_tfrecord_path \"${gvcf_tfrecords_logical_name}\" --gvcf_outfile \"${prefix}.g.vcf.gz\"" + } + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls) { + small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + small_model_tfrecord_name = small_model_matcher[0][1] + small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + """ + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --sample_name ${prefix} \\ + ${regions} \\ + ${small_model_arg} \\ + ${gvcf_arg} \\ + --cpus $task.cpus + + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/meta.yml b/modules/nf-core/deepvariant/postprocessvariants/meta.yml new file mode 100644 index 00000000..4a087011 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/meta.yml @@ -0,0 +1,155 @@ +name: deepvariant_postprocessvariants +description: DeepVariant is an analysis pipeline that uses a deep neural network to + call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variant_calls_tfrecord_files: + type: file + description: | + One or more data files containing variant calls from DEEPVARIANT_CALLVARIANTS + pattern: "*.tfrecord.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - gvcf_tfrecords: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with the coverage information used for GVCF output + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - small_model_calls: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with variant calls from the small model + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz.{tbi,csi}: + type: file + description: Index for VCF + pattern: "$*.vcf.gz.{tbi,csi}" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz.{tbi,csi}: + type: file + description: Index for GVCF + pattern: "*.g.vcf.gz.{tbi,csi}" + ontologies: [] + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test new file mode 100644 index 00000000..ef9110b0 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_POSTPROCESSVARIANTS" + script "../main.nf" + process "DEEPVARIANT_POSTPROCESSVARIANTS" + config "./nextflow.config" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + run("DEEPVARIANT_CALLVARIANTS") { + script "../../callvariants/main.nf" + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).map { meta, tf, gvcf, small_model_calls -> [ meta, tf, gvcf, small_model_calls, [] ] } + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [], + [], + [], + [], + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..a981cf84 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:22:12.888323156" + }, + "homo_sapiens - wgs": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:19:32.037352523" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config new file mode 100644 index 00000000..b8f3f47a --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/ensemblvep/download/environment.yml b/modules/nf-core/ensemblvep/download/environment.yml index c73d7e1e..7e60f7f9 100644 --- a/modules/nf-core/ensemblvep/download/environment.yml +++ b/modules/nf-core/ensemblvep/download/environment.yml @@ -4,4 +4,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ensembl-vep=115 + - bioconda::ensembl-vep=115.2 + - bioconda::perl-math-cdf=0.1 diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf index 714d7088..7e1aeefe 100644 --- a/modules/nf-core/ensemblvep/download/main.nf +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -4,15 +4,16 @@ process ENSEMBLVEP_DOWNLOAD { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f1/f1872dbae2edaae3b7591ac2769efb2de3969adb34752a3ce7cdc9a1409640bb/data' - : 'community.wave.seqera.io/library/ensembl-vep:115--3f10c53a4cdeedf2'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3d/3da6e21cbf9803529421d7e136d1ebec5ff71ec50e0d996eda2ce11ec2c19bf9/data' + : 'community.wave.seqera.io/library/ensembl-vep_perl-math-cdf:1e13f65f931a6954'}" input: tuple val(meta), val(assembly), val(species), val(cache_version) output: tuple val(meta), path(prefix), emit: cache - path "versions.yml", emit: versions + tuple val("${task.process}"), val('ensemblvep'), eval("vep --help | sed -n '/ensembl-vep/s/.*: //p'"), topic: versions, emit: versions_ensemblvep + tuple val("${task.process}"), val('perl-math-cdf'), eval("perl -MMath::CDF -e 'print \$Math::CDF::VERSION'"), topic: versions, emit: versions_perlmathcdf when: task.ext.when == null || task.ext.when @@ -27,21 +28,11 @@ process ENSEMBLVEP_DOWNLOAD { --ASSEMBLY ${assembly} \\ --CACHE_VERSION ${cache_version} \\ ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS """ stub: prefix = task.ext.prefix ?: 'vep_cache' """ mkdir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml index df7cf260..ae1b9c6f 100644 --- a/modules/nf-core/ensemblvep/download/meta.yml +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -1,6 +1,6 @@ name: ensemblvep_download -description: Ensembl Variant Effect Predictor (VEP). The cache downloading options - are controlled through `task.ext.args`. +description: Ensembl Variant Effect Predictor (VEP). The cache downloading + options are controlled through `task.ext.args`. keywords: - annotation - cache @@ -12,7 +12,8 @@ tools: or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. homepage: https://www.ensembl.org/info/docs/tools/vep/index.html documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html - licence: ["Apache-2.0"] + licence: + - "Apache-2.0" identifier: "" input: - - meta: @@ -44,13 +45,46 @@ output: description: cache pattern: "*" ontologies: [] + versions_ensemblvep: + - - ${task.process}: + type: string + description: The process the versions were collected from + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + versions_perlmathcdf: + - - ${task.process}: + type: string + description: The process the versions were collected from + - perl-math-cdf: + type: string + description: The name of the tool + - perl -MMath::CDF -e 'print \$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - perl-math-cdf: + type: string + description: The name of the tool + - perl -MMath::CDF -e 'print \$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool authors: - "@maxulysse" maintainers: diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test b/modules/nf-core/ensemblvep/download/tests/main.nf.test index 496dbeca..0206fe11 100644 --- a/modules/nf-core/ensemblvep/download/tests/main.nf.test +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "ensemblvep" tag "ensemblvep/download" - test("celegans - download") { + test("ENSEMBLVEP download cache for 115_WBcel235") { when { process { @@ -28,12 +28,12 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } - test("celegans - download - stub") { + test("ENSEMBLVEP download cache for 115_WBcel235 - stub") { options "-stub" @@ -53,7 +53,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap index 2f7c7aed..e5618485 100644 --- a/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap @@ -1,8 +1,8 @@ { - "celegans - download": { + "ENSEMBLVEP download cache for 115_WBcel235": { "content": [ { - "0": [ + "cache": [ [ { "id": "115_WBcel235" @@ -11,281 +11,156 @@ [ [ [ - "1-1000000.gz:md5,bb3f43b7512715df72792988ed262d0e", - "10000001-11000000.gz:md5,3a2acf075f478e42dab768cc3913d3f7", - "1000001-2000000.gz:md5,2a1fa1b46a47f90fe36b5c8ab35d01a1", - "11000001-12000000.gz:md5,e16032d944e54a4f41a82925817f88c6", - "12000001-13000000.gz:md5,f51d695b264752299a031b9dbcc44de1", - "13000001-14000000.gz:md5,1532d1764ee39bfd8e1be8b50974ab29", - "14000001-15000000.gz:md5,24173fdc2f65e39aac7ecc4c2726fcf2", - "15000001-16000000.gz:md5,d36d8154114de70c6779ebfb08f1ff8a", - "2000001-3000000.gz:md5,dadb500858b247daf0cf5562938a081d", - "3000001-4000000.gz:md5,eaaebe12f3402b7794c1d82a677ff5ad", - "4000001-5000000.gz:md5,84633b5ee211f4e23170233aefc415ee", - "5000001-6000000.gz:md5,de297b2d604da2070f9630a83db73b22", - "6000001-7000000.gz:md5,0fa47dad82ca5bce6ef96afc491a602c", - "7000001-8000000.gz:md5,2387061609a13eeaeb89a90dd13b1495", - "8000001-9000000.gz:md5,1bfd2fea5051d0a09dde436e75733739", - "9000001-10000000.gz:md5,dbb48f71832f61b9ec01c6f60548e954" + "1-1000000.gz:md5,f9cb33a3ddbb1b82597b3af3e9643dd3", + "10000001-11000000.gz:md5,71aa93ae6388be95fa5269be60236b95", + "1000001-2000000.gz:md5,956f858010c1346ee042ef0ea522043b", + "11000001-12000000.gz:md5,30274a3b8244e492cdf1f611a04848db", + "12000001-13000000.gz:md5,b28d20155f72c1261ba4bb08bea3d302", + "13000001-14000000.gz:md5,293a59ed7b397c1817d77412e124df53", + "14000001-15000000.gz:md5,5291aeb2805c8823b44562d875c7905e", + "15000001-16000000.gz:md5,7fd86078be5f3a400635b269a02185ea", + "2000001-3000000.gz:md5,7f6241cf8e3c91e1c882f3cc86d2dc3a", + "3000001-4000000.gz:md5,e77dc4834119301bbe19dc2a4a05348a", + "4000001-5000000.gz:md5,ac34c2980fea824b21e04874847904ad", + "5000001-6000000.gz:md5,3dfdb3bcd812de1e2b69cfca89a1c1f9", + "6000001-7000000.gz:md5,8446c31796e1aee447af2e353b3734a1", + "7000001-8000000.gz:md5,f84e88618cf747c774ddb98d971e2615", + "8000001-9000000.gz:md5,3094317bb21e35067424e9ad957dfcbe", + "9000001-10000000.gz:md5,3883799cae1ed356ecb416550a89ebde" ], [ - "1-1000000.gz:md5,701d7a2d7afa18b4efd7db488089f162", - "10000001-11000000.gz:md5,105bf3809d7be6f5c310383d5013e633", - "1000001-2000000.gz:md5,9b9eae5303b82666138619afd7b562dd", - "11000001-12000000.gz:md5,c5cc2c2dd5afc3028f3f0a7c03c067c1", - "12000001-13000000.gz:md5,721a1108f6d3b83f1ec96cf39ac9844d", - "13000001-14000000.gz:md5,9fec39204bcdcc5e0458b4ae19ca6f60", - "14000001-15000000.gz:md5,234b0a0b377b6740b70b29ecf0beb9fb", - "15000001-16000000.gz:md5,18444392a5aa3a7e2f09606f3481b765", - "2000001-3000000.gz:md5,e7ed6a622c2759de225b4602feabc1d1", - "3000001-4000000.gz:md5,fde2094cb236ed5cca8f1d6ef2d1e2ed", - "4000001-5000000.gz:md5,2035077fec25cb75beaaf61a0801ec0b", - "5000001-6000000.gz:md5,7d5534332f9aee366ad57add71bb0625", - "6000001-7000000.gz:md5,3da3614a656ed04dd8ba6f06b4c12c37", - "7000001-8000000.gz:md5,f24538b6263452002c9eef88657f332a", - "8000001-9000000.gz:md5,c1357499904580cb780ef275493d06a5", - "9000001-10000000.gz:md5,ed7fee5ec049da066556e033cec1f9b1" + "1-1000000.gz:md5,c2393dd5d120068d496b8789bee91974", + "10000001-11000000.gz:md5,106ab30775ba5fa950f8df7909a163b2", + "1000001-2000000.gz:md5,beeda6a82ca6800093d96bbf2eaa81f9", + "11000001-12000000.gz:md5,27d27c0245dbbb270b2001ca75b03042", + "12000001-13000000.gz:md5,8eea54682f0ef9e1977527999f6bfe29", + "13000001-14000000.gz:md5,5f4581505ebf8c4488bff65b16f125e0", + "14000001-15000000.gz:md5,00e8035d7e179ee51faa7032db090fad", + "15000001-16000000.gz:md5,c8438ec1765eb98cfca60eb2165f4d70", + "2000001-3000000.gz:md5,cf3f572db267b4a8552b37d1c3a78b2d", + "3000001-4000000.gz:md5,ec7df072bf71748c26e25afc2ea353f1", + "4000001-5000000.gz:md5,9dd91cde9d18bae0c884dff7050abaef", + "5000001-6000000.gz:md5,1be8b286a79eebb80ff0684f5c110407", + "6000001-7000000.gz:md5,a0debd5341d3e5ba986225f0f45d01b9", + "7000001-8000000.gz:md5,fc2d82666540059fea44c14569208b14", + "8000001-9000000.gz:md5,0a75c0b27da2c4ae946fdae5e5d83e59", + "9000001-10000000.gz:md5,172900e4145c9a9aa783f01550f4d8a2" ], [ - "1-1000000.gz:md5,e4167c4f03a10c72e22231bce4138383", - "10000001-11000000.gz:md5,a0d9708cbad465edc12f3ce420aa2516", - "1000001-2000000.gz:md5,0b7849c9659b74518f94f0ff760c07c3", - "11000001-12000000.gz:md5,743d76706105cf702cd1eac015c0afd3", - "12000001-13000000.gz:md5,f66f0aa9a02ab3e781cb04d10cc05093", - "13000001-14000000.gz:md5,56dd08b3ddc0b003221e1a1d16331d61", - "2000001-3000000.gz:md5,5477e38b3d53108e4e8ef70b4576dacd", - "3000001-4000000.gz:md5,bd40e0f8602020d14e3e38aeb3a9f31c", - "4000001-5000000.gz:md5,6ae06d70e1fa445e46d66e69b0ca629f", - "5000001-6000000.gz:md5,db03cd98e32f6adb5b424a81420b983c", - "6000001-7000000.gz:md5,4dffec87c3e18cd5c5cdb0befa3230c8", - "7000001-8000000.gz:md5,96ed0ed8f00382c43f6686f70f2b3925", - "8000001-9000000.gz:md5,5c20ed1c637e1182766e66f5741bd070", - "9000001-10000000.gz:md5,2ecd0f2778a09425a2c83e4b5e76ba1b" + "1-1000000.gz:md5,d1d97f733248a030e650a9bd1e7f41bc", + "10000001-11000000.gz:md5,429d681ffa8ca9dc23c62cdab4ffc42b", + "1000001-2000000.gz:md5,58be924f576c85b462df995b76074c9d", + "11000001-12000000.gz:md5,f789a4312ddd0bd423b3c55f66af782c", + "12000001-13000000.gz:md5,76a0fdd867ca559eaf5e8fe3e0f8ea94", + "13000001-14000000.gz:md5,867d9a3b7b6a720b634180efebf65658", + "2000001-3000000.gz:md5,e93c4b398494843b309abf71b218c14a", + "3000001-4000000.gz:md5,ff5b0d6680207d23490279c5f62ba33b", + "4000001-5000000.gz:md5,8b5e33878c599de252a931205a034446", + "5000001-6000000.gz:md5,25b9f4a4edafb9572c323ca1783dd7c1", + "6000001-7000000.gz:md5,c52c31d11ab225f7c9793f0c82ce72e6", + "7000001-8000000.gz:md5,a894863e34285ca3b042cd0eeed253c6", + "8000001-9000000.gz:md5,bcce1adf098aa44c0e55a2df2d801449", + "9000001-10000000.gz:md5,b27661dc26cd23c6a9c41cc885187b4f" ], [ - "1-1000000.gz:md5,28ab3ff34fdd100a65afb89b4ae0f825", - "10000001-11000000.gz:md5,feaf25015c12e72ea5c9e444ffb9a6a9", - "1000001-2000000.gz:md5,8878c44d092a2f7de7298bcecc43da73", - "11000001-12000000.gz:md5,60764928b738a5afb38e739747945ee0", - "12000001-13000000.gz:md5,a397368a23323df7104469537ef4e363", - "13000001-14000000.gz:md5,ecefb54ca22247017c75d8af2fc72e67", - "14000001-15000000.gz:md5,513213c222473a434e73b79927fa3eb3", - "15000001-16000000.gz:md5,b381b0e58227aec9cd1e78f3c2ace485", - "16000001-17000000.gz:md5,44a78638fbdd9d56f20462ba50867d14", - "17000001-18000000.gz:md5,fb6bdfe6078d861895a83f4f7232f74d", - "2000001-3000000.gz:md5,3fe93a5ff45260e3814e177e0f91a9e9", - "3000001-4000000.gz:md5,d449609384c8c0a6cf159b556ef29d47", - "4000001-5000000.gz:md5,201ec9eb75148838ef97bb931acdc02f", - "5000001-6000000.gz:md5,44166dce1082f3e3ce9128930a7aa6a3", - "6000001-7000000.gz:md5,0a54b8acee2e4531423079f361e465ee", - "7000001-8000000.gz:md5,17f245e52522d1270e7005a633eea682", - "8000001-9000000.gz:md5,5f364dabdef97a204e89bce1b9835822", - "9000001-10000000.gz:md5,bb77aff8a7673b2153476b0b9374ffd8" + "1-1000000.gz:md5,70dfe9bedc7a87a063f9b6f2e48b846b", + "10000001-11000000.gz:md5,767a1bd59613ad669cbb835cc11d06ff", + "1000001-2000000.gz:md5,9e1276159b8a073611f048cf6c4a0738", + "11000001-12000000.gz:md5,93dcc27f1185c7af20489ce5796da68e", + "12000001-13000000.gz:md5,0bc55d7b3f7419adcdbf474b7192c5ba", + "13000001-14000000.gz:md5,2a8c5ee78d9d2b462ec889733f311c19", + "14000001-15000000.gz:md5,68e79d710886838294c8de27ea7c1c00", + "15000001-16000000.gz:md5,d975f06bf6f8f1722c29c7b8cce7095b", + "16000001-17000000.gz:md5,bf59b0b7461b4c97520762754a91e41c", + "17000001-18000000.gz:md5,3174d1bff1351ee17f632c6adec54c90", + "2000001-3000000.gz:md5,22ac8f1975b085ed3f5773bc220ff120", + "3000001-4000000.gz:md5,0a8a89f524f7955f3db2847344e688d9", + "4000001-5000000.gz:md5,648d45d681cc4cbe68db4638665122ca", + "5000001-6000000.gz:md5,dfe8c94f45221ed152874083ec0991e2", + "6000001-7000000.gz:md5,e9c705d31a7c1cac9d02c6a018a0d655", + "7000001-8000000.gz:md5,723513cca5842d5c238dc110075ad8dd", + "8000001-9000000.gz:md5,3703d6fbdad1f21a2c1f40c5d8ad5371", + "9000001-10000000.gz:md5,afb3ba977fc7cd18b47bb5fa6fc85832" ], [ - "1-1000000.gz:md5,ad7333dd27732b000f1fb91c69accc47" + "1-1000000.gz:md5,397d664998c2dbfd56c0af94c385f5ee" ], [ - "1-1000000.gz:md5,a104f41c6b74d3657094341960b3e9aa", - "10000001-11000000.gz:md5,cb73357d42446a2a62d41b4ab6814486", - "1000001-2000000.gz:md5,c3d1f590019cb2d9a3fe138ebc3d4adb", - "11000001-12000000.gz:md5,f5de9fb9e6a24c3a0176951fe70cfd35", - "12000001-13000000.gz:md5,ad46cdbd9087eeac328e8dcecb60bdd6", - "13000001-14000000.gz:md5,635dbb498a367214ca45ea4466e15c27", - "14000001-15000000.gz:md5,b4e72a2a01c2d524c1076a9aeb5be6ed", - "15000001-16000000.gz:md5,4ce9dd41ed032eac6234ae6d414dead8", - "16000001-17000000.gz:md5,908fc89908bf5be9862c9f989e105a97", - "17000001-18000000.gz:md5,7bda940567e9c6fa9ba5493911e607b7", - "18000001-19000000.gz:md5,94c81ccdf2110781989774198c725e76", - "19000001-20000000.gz:md5,8e39cf5f5b24bb28c117d502a5d4cd64", - "20000001-21000000.gz:md5,d9a72ba97be5648b08ac53bc4649086e", - "2000001-3000000.gz:md5,e90cc7adc6c60b2b37736f572fe79166", - "3000001-4000000.gz:md5,6b5bbecf824fc18ce9b46c1cbedf0984", - "4000001-5000000.gz:md5,beccbf5aee5031d2d6bc29f2c7486672", - "5000001-6000000.gz:md5,23810c84b4f8607537321f0823a48db3", - "6000001-7000000.gz:md5,d9008100f5e23d3f42849d2bb9c45432", - "7000001-8000000.gz:md5,079f7fa854d40d426925d85a58d688e0", - "8000001-9000000.gz:md5,f000a7feaa7b461cdb6fc3df3651f6ae", - "9000001-10000000.gz:md5,154d5d7cf4d70cc3d9ea2349af7103be" + "1-1000000.gz:md5,ce88865c73633b2b57e6f3481a21b3f8", + "10000001-11000000.gz:md5,8f1f45979e56451d44a3887321910d30", + "1000001-2000000.gz:md5,c40b987ac5b22a6674055849e24204ad", + "11000001-12000000.gz:md5,4e5a2ba329951bd2e01738886766e777", + "12000001-13000000.gz:md5,0670c5ee8dbf7c5f497a948071f94026", + "13000001-14000000.gz:md5,24d4532d1e9c0839f750f6dc798beacd", + "14000001-15000000.gz:md5,3a3de4c21b35ae9cd349e4641d8023a8", + "15000001-16000000.gz:md5,d60e0088ed511f809d9f2137fe46731c", + "16000001-17000000.gz:md5,2ee41d67e7cc73cf0e1bf8e47c708436", + "17000001-18000000.gz:md5,87304db95265191a4368c51ee7993801", + "18000001-19000000.gz:md5,b943b2b6865fa97a04fac7e17ac7c789", + "19000001-20000000.gz:md5,4d1338b66252cbc28c9991967cc5805e", + "20000001-21000000.gz:md5,a81d47d38d78d30bb4575f7a65c9707e", + "2000001-3000000.gz:md5,2ff7e93b14741df46621a15840fa5276", + "3000001-4000000.gz:md5,1ff84c0fb7dd85b55a1f32954f3bd625", + "4000001-5000000.gz:md5,4079a7728625d842f231d1f664b50264", + "5000001-6000000.gz:md5,399986007f0bc002054aff687bce85fc", + "6000001-7000000.gz:md5,c97f0d1a323011c5eb59d2c09a12634e", + "7000001-8000000.gz:md5,0973af953b9632efa1813d0c1265902d", + "8000001-9000000.gz:md5,1c9695b7857c8494396edbf68e2d8095", + "9000001-10000000.gz:md5,7e3e78052518c97155260eba1975ec41" ], [ - "1-1000000.gz:md5,d7a153a75f81d0b45eab68bfe526ffb5", - "10000001-11000000.gz:md5,db38af65cff53b1d1f06489cc872b015", - "1000001-2000000.gz:md5,0a7fff7f67d786d9281ca4783a5ec163", - "11000001-12000000.gz:md5,d3231f8fd2e05281d4388d67c5dfaed2", - "12000001-13000000.gz:md5,779879ffc3cdf8cd4021fef90b130791", - "13000001-14000000.gz:md5,0dea4526b0aa9085afae63249ab361ba", - "14000001-15000000.gz:md5,c25184561e98dd04343f49a8dc339134", - "15000001-16000000.gz:md5,146861be6570e64a0ceb555c9f8e3367", - "16000001-17000000.gz:md5,8e497ba1fe43f39adc47f625b2f0d0cd", - "17000001-18000000.gz:md5,d90fbba28b69487d8948e22f1b7b4217", - "2000001-3000000.gz:md5,2c6990365afec01242bcf698edc7e9da", - "3000001-4000000.gz:md5,9b6c90fd96f4d781d1ccf1339c7fb094", - "4000001-5000000.gz:md5,ee14bc5bfb88da22b27004a948570904", - "5000001-6000000.gz:md5,b8f609f3fb3d607f2da254a217b9fc6a", - "6000001-7000000.gz:md5,be9317f3c10182edd13426cdab8e1607", - "7000001-8000000.gz:md5,a5ab699b93ad721e676f0694a288ae36", - "8000001-9000000.gz:md5,6b5e1b10f154d5b046b7ac58f0957fcb", - "9000001-10000000.gz:md5,db16ac0616d71ce8647e885be9ed0cad" + "1-1000000.gz:md5,e5b943beee3102d4bbcf931618e4516f", + "10000001-11000000.gz:md5,2258cce59acc40383eba19df4adab977", + "1000001-2000000.gz:md5,48ffa67b4638d76f54feffeaf3ceb919", + "11000001-12000000.gz:md5,810e31cc6ef2995163dc4a249c73f131", + "12000001-13000000.gz:md5,0bf858719d427d5dcff6640dd0126ec0", + "13000001-14000000.gz:md5,f337fe31c4cf04d2b61c8cc4e65b3d31", + "14000001-15000000.gz:md5,51fa13d1ceefd98a6a093a41d565aa08", + "15000001-16000000.gz:md5,eaa3a4f3c8e447a78f482f8ff94ee488", + "16000001-17000000.gz:md5,95d704615bf688dea4fcf8916ea35326", + "17000001-18000000.gz:md5,356290a920138412ac131046246fe606", + "2000001-3000000.gz:md5,b32d23dedc91e0f05047cb3f75ab9679", + "3000001-4000000.gz:md5,304b9eb63522e575fb97a7c18e602c93", + "4000001-5000000.gz:md5,d2fa3e7f01321332dc5a65feef9defc0", + "5000001-6000000.gz:md5,d150eec8b35ccf2e136fdc6e7bd17481", + "6000001-7000000.gz:md5,fde4ba9ba1cbe463b65afd45f64d0809", + "7000001-8000000.gz:md5,40a5fe0f827dc040e851c5769896d4f5", + "8000001-9000000.gz:md5,0112761790ef2e78c5515bb3f9923277", + "9000001-10000000.gz:md5,f2d25d2a55f146875f0e5d51d3108341" ], - "chr_synonyms.txt:md5,9b3745f472606bf05c7068b21e2b31aa", + "chr_synonyms.txt:md5,8a7e0941aa4f0e676ab5594f0226b2b5", "info.txt:md5,33ccb74a030a9a345051628c337cb8af" ] ] ] ] ], - "1": [ - "versions.yml:md5,906ef8bda2ae1461f20649a6ba8611f1" - ], - "cache": [ + "versions_ensemblvep": [ [ - { - "id": "115_WBcel235" - }, - [ - [ - [ - [ - "1-1000000.gz:md5,bb3f43b7512715df72792988ed262d0e", - "10000001-11000000.gz:md5,3a2acf075f478e42dab768cc3913d3f7", - "1000001-2000000.gz:md5,2a1fa1b46a47f90fe36b5c8ab35d01a1", - "11000001-12000000.gz:md5,e16032d944e54a4f41a82925817f88c6", - "12000001-13000000.gz:md5,f51d695b264752299a031b9dbcc44de1", - "13000001-14000000.gz:md5,1532d1764ee39bfd8e1be8b50974ab29", - "14000001-15000000.gz:md5,24173fdc2f65e39aac7ecc4c2726fcf2", - "15000001-16000000.gz:md5,d36d8154114de70c6779ebfb08f1ff8a", - "2000001-3000000.gz:md5,dadb500858b247daf0cf5562938a081d", - "3000001-4000000.gz:md5,eaaebe12f3402b7794c1d82a677ff5ad", - "4000001-5000000.gz:md5,84633b5ee211f4e23170233aefc415ee", - "5000001-6000000.gz:md5,de297b2d604da2070f9630a83db73b22", - "6000001-7000000.gz:md5,0fa47dad82ca5bce6ef96afc491a602c", - "7000001-8000000.gz:md5,2387061609a13eeaeb89a90dd13b1495", - "8000001-9000000.gz:md5,1bfd2fea5051d0a09dde436e75733739", - "9000001-10000000.gz:md5,dbb48f71832f61b9ec01c6f60548e954" - ], - [ - "1-1000000.gz:md5,701d7a2d7afa18b4efd7db488089f162", - "10000001-11000000.gz:md5,105bf3809d7be6f5c310383d5013e633", - "1000001-2000000.gz:md5,9b9eae5303b82666138619afd7b562dd", - "11000001-12000000.gz:md5,c5cc2c2dd5afc3028f3f0a7c03c067c1", - "12000001-13000000.gz:md5,721a1108f6d3b83f1ec96cf39ac9844d", - "13000001-14000000.gz:md5,9fec39204bcdcc5e0458b4ae19ca6f60", - "14000001-15000000.gz:md5,234b0a0b377b6740b70b29ecf0beb9fb", - "15000001-16000000.gz:md5,18444392a5aa3a7e2f09606f3481b765", - "2000001-3000000.gz:md5,e7ed6a622c2759de225b4602feabc1d1", - "3000001-4000000.gz:md5,fde2094cb236ed5cca8f1d6ef2d1e2ed", - "4000001-5000000.gz:md5,2035077fec25cb75beaaf61a0801ec0b", - "5000001-6000000.gz:md5,7d5534332f9aee366ad57add71bb0625", - "6000001-7000000.gz:md5,3da3614a656ed04dd8ba6f06b4c12c37", - "7000001-8000000.gz:md5,f24538b6263452002c9eef88657f332a", - "8000001-9000000.gz:md5,c1357499904580cb780ef275493d06a5", - "9000001-10000000.gz:md5,ed7fee5ec049da066556e033cec1f9b1" - ], - [ - "1-1000000.gz:md5,e4167c4f03a10c72e22231bce4138383", - "10000001-11000000.gz:md5,a0d9708cbad465edc12f3ce420aa2516", - "1000001-2000000.gz:md5,0b7849c9659b74518f94f0ff760c07c3", - "11000001-12000000.gz:md5,743d76706105cf702cd1eac015c0afd3", - "12000001-13000000.gz:md5,f66f0aa9a02ab3e781cb04d10cc05093", - "13000001-14000000.gz:md5,56dd08b3ddc0b003221e1a1d16331d61", - "2000001-3000000.gz:md5,5477e38b3d53108e4e8ef70b4576dacd", - "3000001-4000000.gz:md5,bd40e0f8602020d14e3e38aeb3a9f31c", - "4000001-5000000.gz:md5,6ae06d70e1fa445e46d66e69b0ca629f", - "5000001-6000000.gz:md5,db03cd98e32f6adb5b424a81420b983c", - "6000001-7000000.gz:md5,4dffec87c3e18cd5c5cdb0befa3230c8", - "7000001-8000000.gz:md5,96ed0ed8f00382c43f6686f70f2b3925", - "8000001-9000000.gz:md5,5c20ed1c637e1182766e66f5741bd070", - "9000001-10000000.gz:md5,2ecd0f2778a09425a2c83e4b5e76ba1b" - ], - [ - "1-1000000.gz:md5,28ab3ff34fdd100a65afb89b4ae0f825", - "10000001-11000000.gz:md5,feaf25015c12e72ea5c9e444ffb9a6a9", - "1000001-2000000.gz:md5,8878c44d092a2f7de7298bcecc43da73", - "11000001-12000000.gz:md5,60764928b738a5afb38e739747945ee0", - "12000001-13000000.gz:md5,a397368a23323df7104469537ef4e363", - "13000001-14000000.gz:md5,ecefb54ca22247017c75d8af2fc72e67", - "14000001-15000000.gz:md5,513213c222473a434e73b79927fa3eb3", - "15000001-16000000.gz:md5,b381b0e58227aec9cd1e78f3c2ace485", - "16000001-17000000.gz:md5,44a78638fbdd9d56f20462ba50867d14", - "17000001-18000000.gz:md5,fb6bdfe6078d861895a83f4f7232f74d", - "2000001-3000000.gz:md5,3fe93a5ff45260e3814e177e0f91a9e9", - "3000001-4000000.gz:md5,d449609384c8c0a6cf159b556ef29d47", - "4000001-5000000.gz:md5,201ec9eb75148838ef97bb931acdc02f", - "5000001-6000000.gz:md5,44166dce1082f3e3ce9128930a7aa6a3", - "6000001-7000000.gz:md5,0a54b8acee2e4531423079f361e465ee", - "7000001-8000000.gz:md5,17f245e52522d1270e7005a633eea682", - "8000001-9000000.gz:md5,5f364dabdef97a204e89bce1b9835822", - "9000001-10000000.gz:md5,bb77aff8a7673b2153476b0b9374ffd8" - ], - [ - "1-1000000.gz:md5,ad7333dd27732b000f1fb91c69accc47" - ], - [ - "1-1000000.gz:md5,a104f41c6b74d3657094341960b3e9aa", - "10000001-11000000.gz:md5,cb73357d42446a2a62d41b4ab6814486", - "1000001-2000000.gz:md5,c3d1f590019cb2d9a3fe138ebc3d4adb", - "11000001-12000000.gz:md5,f5de9fb9e6a24c3a0176951fe70cfd35", - "12000001-13000000.gz:md5,ad46cdbd9087eeac328e8dcecb60bdd6", - "13000001-14000000.gz:md5,635dbb498a367214ca45ea4466e15c27", - "14000001-15000000.gz:md5,b4e72a2a01c2d524c1076a9aeb5be6ed", - "15000001-16000000.gz:md5,4ce9dd41ed032eac6234ae6d414dead8", - "16000001-17000000.gz:md5,908fc89908bf5be9862c9f989e105a97", - "17000001-18000000.gz:md5,7bda940567e9c6fa9ba5493911e607b7", - "18000001-19000000.gz:md5,94c81ccdf2110781989774198c725e76", - "19000001-20000000.gz:md5,8e39cf5f5b24bb28c117d502a5d4cd64", - "20000001-21000000.gz:md5,d9a72ba97be5648b08ac53bc4649086e", - "2000001-3000000.gz:md5,e90cc7adc6c60b2b37736f572fe79166", - "3000001-4000000.gz:md5,6b5bbecf824fc18ce9b46c1cbedf0984", - "4000001-5000000.gz:md5,beccbf5aee5031d2d6bc29f2c7486672", - "5000001-6000000.gz:md5,23810c84b4f8607537321f0823a48db3", - "6000001-7000000.gz:md5,d9008100f5e23d3f42849d2bb9c45432", - "7000001-8000000.gz:md5,079f7fa854d40d426925d85a58d688e0", - "8000001-9000000.gz:md5,f000a7feaa7b461cdb6fc3df3651f6ae", - "9000001-10000000.gz:md5,154d5d7cf4d70cc3d9ea2349af7103be" - ], - [ - "1-1000000.gz:md5,d7a153a75f81d0b45eab68bfe526ffb5", - "10000001-11000000.gz:md5,db38af65cff53b1d1f06489cc872b015", - "1000001-2000000.gz:md5,0a7fff7f67d786d9281ca4783a5ec163", - "11000001-12000000.gz:md5,d3231f8fd2e05281d4388d67c5dfaed2", - "12000001-13000000.gz:md5,779879ffc3cdf8cd4021fef90b130791", - "13000001-14000000.gz:md5,0dea4526b0aa9085afae63249ab361ba", - "14000001-15000000.gz:md5,c25184561e98dd04343f49a8dc339134", - "15000001-16000000.gz:md5,146861be6570e64a0ceb555c9f8e3367", - "16000001-17000000.gz:md5,8e497ba1fe43f39adc47f625b2f0d0cd", - "17000001-18000000.gz:md5,d90fbba28b69487d8948e22f1b7b4217", - "2000001-3000000.gz:md5,2c6990365afec01242bcf698edc7e9da", - "3000001-4000000.gz:md5,9b6c90fd96f4d781d1ccf1339c7fb094", - "4000001-5000000.gz:md5,ee14bc5bfb88da22b27004a948570904", - "5000001-6000000.gz:md5,b8f609f3fb3d607f2da254a217b9fc6a", - "6000001-7000000.gz:md5,be9317f3c10182edd13426cdab8e1607", - "7000001-8000000.gz:md5,a5ab699b93ad721e676f0694a288ae36", - "8000001-9000000.gz:md5,6b5e1b10f154d5b046b7ac58f0957fcb", - "9000001-10000000.gz:md5,db16ac0616d71ce8647e885be9ed0cad" - ], - "chr_synonyms.txt:md5,9b3745f472606bf05c7068b21e2b31aa", - "info.txt:md5,33ccb74a030a9a345051628c337cb8af" - ] - ] - ] + "ENSEMBLVEP_DOWNLOAD", + "ensemblvep", + "115.2" ] ], - "versions": [ - "versions.yml:md5,906ef8bda2ae1461f20649a6ba8611f1" + "versions_perlmathcdf": [ + [ + "ENSEMBLVEP_DOWNLOAD", + "perl-math-cdf", + "" + ] ] } ], + "timestamp": "2026-02-13T13:36:59.060515944", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-04T10:00:46.010443" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, - "celegans - download - stub": { + "ENSEMBLVEP download cache for 115_WBcel235 - stub": { "content": [ { - "0": [ + "cache": [ [ { "id": "115_WBcel235" @@ -295,28 +170,26 @@ ] ] ], - "1": [ - "versions.yml:md5,906ef8bda2ae1461f20649a6ba8611f1" - ], - "cache": [ + "versions_ensemblvep": [ [ - { - "id": "115_WBcel235" - }, - [ - - ] + "ENSEMBLVEP_DOWNLOAD", + "ensemblvep", + "115.2" ] ], - "versions": [ - "versions.yml:md5,906ef8bda2ae1461f20649a6ba8611f1" + "versions_perlmathcdf": [ + [ + "ENSEMBLVEP_DOWNLOAD", + "perl-math-cdf", + "" + ] ] } ], + "timestamp": "2026-02-13T13:37:10.696285488", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-04T10:00:57.964733" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } } } \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/download/tests/nextflow.config b/modules/nf-core/ensemblvep/download/tests/nextflow.config index 49485470..882ef8ee 100644 --- a/modules/nf-core/ensemblvep/download/tests/nextflow.config +++ b/modules/nf-core/ensemblvep/download/tests/nextflow.config @@ -1,5 +1,5 @@ params { - vep_cache_version = "114" + vep_cache_version = "115" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" } diff --git a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff index 659e2264..f4675ea2 100644 --- a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff +++ b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff @@ -3,7 +3,7 @@ Changes in component 'nf-core/ensemblvep/vep' Changes in 'ensemblvep/vep/main.nf': --- modules/nf-core/ensemblvep/vep/main.nf +++ modules/nf-core/ensemblvep/vep/main.nf -@@ -1,11 +1,11 @@ +@@ -1,6 +1,6 @@ process ENSEMBLVEP_VEP { tag "${meta.id}" - label 'process_medium' @@ -11,121 +11,29 @@ Changes in 'ensemblvep/vep/main.nf': conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container -- ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4b/4b5a8c173dc9beaa93effec76b99687fc926b1bd7be47df5d6ce19d7d6b4d6b7/data' -- : 'community.wave.seqera.io/library/ensembl-vep:115.2--90ec797ecb088e9a'}" -+ ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:114.2--pl5321h2a3209d_0' -+ : 'biocontainers/ensembl-vep:114.2--pl5321h2a3209d_0'}" - - input: - tuple val(meta), path(vcf), path(custom_extra_files) @@ -15,6 +15,8 @@ - path cache - tuple val(meta2), path(fasta) + tuple val(meta2), path(cache) + tuple val(meta3), path(fasta) path extra_files + path custom_vep + path custom_vep_tbi output: - tuple val(meta), path("*.vcf.gz"), emit: vcf, optional: true -@@ -36,6 +38,7 @@ + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true +@@ -38,6 +40,8 @@ def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" def reference = fasta ? "--fasta ${fasta}" : "" def create_index = file_extension == "vcf" ? "tabix ${args2} ${prefix}.${file_extension}.gz" : "" + args = args.replaceAll(/--custom file=[^,]+/, "--custom file=${custom_vep}") ++ """ vep \\ -i ${vcf} \\ -Changes in 'ensemblvep/vep/environment.yml': ---- modules/nf-core/ensemblvep/vep/environment.yml -+++ modules/nf-core/ensemblvep/vep/environment.yml -@@ -4,5 +4,4 @@ - - conda-forge - - bioconda - dependencies: -- # renovate: datasource=conda depName=bioconda/ensembl-vep -- - bioconda::ensembl-vep=115.2=pl5321h2a3209d_1 -+ - bioconda::ensembl-vep=114.2 - -Changes in 'ensemblvep/vep/tests/main.nf.test': ---- modules/nf-core/ensemblvep/vep/tests/main.nf.test -+++ modules/nf-core/ensemblvep/vep/tests/main.nf.test -@@ -16,7 +16,7 @@ - when { - process { - """ -- vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/115_WBcel235/')).collect() -+ vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/113_WBcel235/')).collect() - - input[0] = Channel.of([ - [ id:'test' ], // meta map -@@ -55,7 +55,7 @@ - when { - process { - """ -- vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/115_WBcel235/')).collect() -+ vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/113_WBcel235/')).collect() - - input[0] = Channel.of([ - [ id:'test' ], // meta map -@@ -79,7 +79,7 @@ - assert process.success - assertAll( - { assert snapshot(process.out.versions).match() }, -- { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v115.2") } -+ { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v114.2") } - ) - } - } - +'modules/nf-core/ensemblvep/vep/environment.yml' is unchanged +'modules/nf-core/ensemblvep/vep/tests/main.nf.test' is unchanged 'modules/nf-core/ensemblvep/vep/tests/vcf.config' is unchanged 'modules/nf-core/ensemblvep/vep/tests/tab.gz.config' is unchanged -Changes in 'ensemblvep/vep/tests/main.nf.test.snap': ---- modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap -+++ modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap -@@ -2,27 +2,27 @@ - "test_ensemblvep_vep_fasta_tab_gz": { - "content": [ - [ -- "versions.yml:md5,1e9ba958f2a6c180c064505b29f843ef" -+ "versions.yml:md5,01653f5a713b20d56ed2468a2dab959a" - ] - ], - "meta": { - "nf-test": "0.9.2", -- "nextflow": "25.04.7" -+ "nextflow": "25.04.6" - }, -- "timestamp": "2025-09-29T10:00:37.755173" -+ "timestamp": "2025-08-21T13:16:46.760065318" - }, - "test_ensemblvep_vep_fasta_vcf - stub (not really but linting complains otherwise)": { - "content": [ - [ -- "versions.yml:md5,1e9ba958f2a6c180c064505b29f843ef" -+ "versions.yml:md5,01653f5a713b20d56ed2468a2dab959a" - ], - "d41d8cd98f00b204e9800998ecf8427e", - "test.vcf.gz.tbi" - ], - "meta": { - "nf-test": "0.9.2", -- "nextflow": "25.04.7" -+ "nextflow": "25.04.6" - }, -- "timestamp": "2025-09-29T10:00:03.682447" -+ "timestamp": "2025-08-21T13:16:00.65871573" - } - } -Changes in 'ensemblvep/vep/tests/nextflow.config': ---- modules/nf-core/ensemblvep/vep/tests/nextflow.config -+++ modules/nf-core/ensemblvep/vep/tests/nextflow.config -@@ -1,5 +1,5 @@ - params { -- vep_cache_version = "115" -+ vep_cache_version = "113" - vep_genome = "WBcel235" - vep_species = "caenorhabditis_elegans" - } - +'modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap' is unchanged +'modules/nf-core/ensemblvep/vep/tests/nextflow.config' is unchanged ************************************************************ diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml index d8e22f70..7e60f7f9 100644 --- a/modules/nf-core/ensemblvep/vep/environment.yml +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -4,4 +4,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ensembl-vep=114.2 + - bioconda::ensembl-vep=115.2 + - bioconda::perl-math-cdf=0.1 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index 9daaadab..6a9d676e 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -4,27 +4,29 @@ process ENSEMBLVEP_VEP { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:114.2--pl5321h2a3209d_0' - : 'biocontainers/ensembl-vep:114.2--pl5321h2a3209d_0'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3d/3da6e21cbf9803529421d7e136d1ebec5ff71ec50e0d996eda2ce11ec2c19bf9/data' + : 'community.wave.seqera.io/library/ensembl-vep_perl-math-cdf:1e13f65f931a6954'}" input: tuple val(meta), path(vcf), path(custom_extra_files) val genome val species val cache_version - path cache - tuple val(meta2), path(fasta) + tuple val(meta2), path(cache) + tuple val(meta3), path(fasta) path extra_files path custom_vep path custom_vep_tbi output: - tuple val(meta), path("*.vcf.gz"), emit: vcf, optional: true - tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi, optional: true - tuple val(meta), path("*.tab.gz"), emit: tab, optional: true - tuple val(meta), path("*.json.gz"), emit: json, optional: true - path "*.html", emit: report, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.tab.gz"), emit: tab, optional: true + tuple val(meta), path("${prefix}.json.gz"), emit: json, optional: true + tuple val(meta), val("${task.process}"), val('ensemblvep'), path("*.html"), topic: multiqc_files, emit: report, optional: true + tuple val("${task.process}"), val('ensemblvep'), eval("vep --help | sed -n '/ensembl-vep/s/.*: //p'"), topic: versions, emit: versions_ensemblvep + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix + tuple val("${task.process}"), val('perl-math-cdf'), eval("perl -MMath::CDF -e 'print \\\$Math::CDF::VERSION'"), topic: versions, emit: versions_perlmathcdf when: task.ext.when == null || task.ext.when @@ -34,11 +36,12 @@ process ENSEMBLVEP_VEP { def args2 = task.ext.args2 ?: '' def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" def reference = fasta ? "--fasta ${fasta}" : "" def create_index = file_extension == "vcf" ? "tabix ${args2} ${prefix}.${file_extension}.gz" : "" args = args.replaceAll(/--custom file=[^,]+/, "--custom file=${custom_vep}") + """ vep \\ -i ${vcf} \\ @@ -54,27 +57,15 @@ process ENSEMBLVEP_VEP { --fork ${task.cpus} ${create_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' def create_index = file_extension == "vcf" ? "touch ${prefix}.${file_extension}.gz.tbi" : "" """ echo "" | gzip > ${prefix}.${file_extension}.gz ${create_index} touch ${prefix}_summary.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml index 7d91a7d0..daa62526 100644 --- a/modules/nf-core/ensemblvep/vep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -1,6 +1,6 @@ name: ensemblvep_vep -description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled - through `task.ext.args`. +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is + controlled through `task.ext.args`. keywords: - annotation - vcf @@ -13,7 +13,8 @@ tools: or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. homepage: https://www.ensembl.org/info/docs/tools/vep/index.html documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html - licence: ["Apache-2.0"] + licence: + - "Apache-2.0" identifier: "" input: - - meta: @@ -44,12 +45,17 @@ input: type: integer description: | which version of the cache to annotate with - - cache: - type: file - description: | - path to VEP cache (optional) - ontologies: [] - - meta2: + type: map + description: | + Groovy Map containing cache information + e.g. [ id:'test' ] + - cache: + type: file + description: | + path to VEP cache (optional) + ontologies: [] + - - meta3: type: map description: | Groovy Map containing fasta reference information @@ -71,19 +77,19 @@ output: type: map description: | Map with sample information - - "*.vcf.gz": + - ${prefix}.vcf.gz: type: file description: | annotated vcf (optional) pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 tbi: - - meta: type: map description: | Map with sample information - - "*.vcf.gz.tbi": + - ${prefix}.vcf.gz.tbi: type: file description: | annotated vcf index (optional) @@ -94,38 +100,116 @@ output: type: map description: | Map with sample information - - "*.tab.gz": + - ${prefix}.tab.gz: type: file description: | tab file with annotated variants (optional) pattern: "*.ann.tab.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 json: - - meta: type: map description: | Map with sample information - - "*.json.gz": + - ${prefix}.json.gz: type: file description: | json file with annotated variants (optional) pattern: "*.ann.json.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 report: - - "*.html": - type: file - description: VEP report file - pattern: "*.html" - ontologies: [] + - - meta: + type: map + description: | + Map with sample information + - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "*.html": + type: file + description: VEP report file + pattern: "*.html" + ontologies: [] + versions_ensemblvep: + - - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + versions_tabix: + - - ${task.process}: + type: string + description: The process + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + versions_perlmathcdf: + - - ${task.process}: + type: string + description: The process + - perl-math-cdf: + type: string + description: The tool name + - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +topics: + multiqc_files: + - - meta: + type: string + description: | + Map with sample information + - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "*.html": + type: file + description: VEP report file + pattern: "*.html" + ontologies: [] versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + - - ${task.process}: + type: string + description: The process + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process + - perl-math-cdf: + type: string + description: The tool name + - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool authors: - "@maxulysse" - "@matthdsm" diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test index 4a62ffd5..63d91155 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -16,9 +16,12 @@ nextflow_process { when { process { """ - vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/113_WBcel235/')).collect() + vep_cache = channel.of([ + [ id:'115_WBcel235' ], + file('s3://annotation-cache/vep_cache/115_WBcel235/') + ]).collect() - input[0] = Channel.of([ + input[0] = channel.of([ [ id:'test' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] @@ -27,7 +30,7 @@ nextflow_process { input[2] = params.vep_species input[3] = params.vep_cache_version input[4] = vep_cache - input[5] = Channel.value([ + input[5] = channel.value([ [id:"fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) @@ -40,9 +43,9 @@ nextflow_process { assert process.success assertAll( { assert snapshot( - process.out.versions, - path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, - file(process.out.tbi.get(0).get(1)).name + file(process.out.vcf[0][1]).name + ",variantsMD5:" + path(process.out.vcf[0][1]).vcf.variantsMD5, + file(process.out.tbi[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } ).match() } ) } @@ -55,9 +58,12 @@ nextflow_process { when { process { """ - vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/113_WBcel235/')).collect() + vep_cache = channel.of([ + [ id:'115_WBcel235' ], + file('s3://annotation-cache/vep_cache/115_WBcel235/') + ]).collect() - input[0] = Channel.of([ + input[0] = channel.of([ [ id:'test' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] @@ -66,7 +72,7 @@ nextflow_process { input[2] = params.vep_species input[3] = params.vep_cache_version input[4] = vep_cache - input[5] = Channel.value([ + input[5] = channel.value([ [id:"fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) @@ -78,8 +84,11 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out.versions).match() }, - { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v114.2") } + { assert snapshot( + file(process.out.tab[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() }, + { assert path(process.out.tab[0][1]).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v115.2") } ) } } diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap index bd80e401..6f834d3e 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -1,28 +1,69 @@ { "test_ensemblvep_vep_fasta_tab_gz": { "content": [ - [ - "versions.yml:md5,01653f5a713b20d56ed2468a2dab959a" - ] + "test.tab.gz", + { + "versions_ensemblvep": [ + [ + "ENSEMBLVEP_VEP", + "ensemblvep", + "115.2" + ] + ], + "versions_perlmathcdf": [ + [ + "ENSEMBLVEP_VEP", + "perl-math-cdf", + "0.1" + ] + ], + "versions_tabix": [ + [ + "ENSEMBLVEP_VEP", + "tabix", + "1.21" + ] + ] + } ], + "timestamp": "2026-02-06T12:47:57.03116265", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-08-21T13:16:46.760065318" + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } }, "test_ensemblvep_vep_fasta_vcf - stub (not really but linting complains otherwise)": { "content": [ - [ - "versions.yml:md5,01653f5a713b20d56ed2468a2dab959a" - ], - "d41d8cd98f00b204e9800998ecf8427e", - "test.vcf.gz.tbi" + "test.vcf.gz,variantsMD5:d41d8cd98f00b204e9800998ecf8427e", + "test.vcf.gz.tbi", + { + "versions_ensemblvep": [ + [ + "ENSEMBLVEP_VEP", + "ensemblvep", + "115.2" + ] + ], + "versions_perlmathcdf": [ + [ + "ENSEMBLVEP_VEP", + "perl-math-cdf", + "0.1" + ] + ], + "versions_tabix": [ + [ + "ENSEMBLVEP_VEP", + "tabix", + "1.21" + ] + ] + } ], + "timestamp": "2026-02-06T12:47:34.421995264", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-08-21T13:16:00.65871573" + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } } } \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config index f1f66774..8e68fe24 100644 --- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -1,5 +1,5 @@ params { - vep_cache_version = "113" + vep_cache_version = "115" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" } diff --git a/modules/nf-core/longphase/haplotag/environment.yml b/modules/nf-core/longphase/haplotag/environment.yml index 65c58ba3..f436bdae 100644 --- a/modules/nf-core/longphase/haplotag/environment.yml +++ b/modules/nf-core/longphase/haplotag/environment.yml @@ -3,5 +3,7 @@ channels: - conda-forge - bioconda + dependencies: - - bioconda::longphase=1.7.3 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf index 86ba0592..7eb84669 100644 --- a/modules/nf-core/longphase/haplotag/main.nf +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_HAPLOTAG { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/longphase:1.7.3--hf5e1c6e_0': - 'biocontainers/longphase:1.7.3--hf5e1c6e_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) @@ -16,7 +16,7 @@ process LONGPHASE_HAPLOTAG { output: tuple val(meta), path("*.{bam,cram}"), emit: bam tuple val(meta), path("*.log") , emit: log , optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions when: task.ext.when == null || task.ext.when @@ -42,11 +42,6 @@ process LONGPHASE_HAPLOTAG { if [ -f "${prefix}.out" ]; then mv ${prefix}.out ${prefix}.log fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS """ stub: @@ -57,10 +52,5 @@ process LONGPHASE_HAPLOTAG { """ touch ${prefix}.${suffix} ${log} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS """ } diff --git a/modules/nf-core/longphase/haplotag/meta.yml b/modules/nf-core/longphase/haplotag/meta.yml index 63368b31..5c3ad844 100644 --- a/modules/nf-core/longphase/haplotag/meta.yml +++ b/modules/nf-core/longphase/haplotag/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_haplotag" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - haplotag - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -92,13 +92,27 @@ output: description: Log file pattern: "*.log" ontologies: [] + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test b/modules/nf-core/longphase/haplotag/tests/main.nf.test index c80133c6..687f61e0 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -79,7 +79,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, process.out.log, bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getHeader()[2..5], bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getReadsMD5(), @@ -118,7 +118,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -157,7 +157,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -194,9 +194,9 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap index 3cf35dc3..23287721 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap @@ -2,25 +2,6 @@ "[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" - ], "bam": [ [ { @@ -37,55 +18,51 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-08-09T09:40:30.738831707" + "timestamp": "2026-03-20T10:13:11.968590854" }, "[ bam, bai, snps, [], [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-07-25T09:12:34.848038423" + "timestamp": "2026-03-20T10:12:40.584213389" }, "[ bam, bai, snps, [], [] ], fasta, fai -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" - ], "bam": [ [ { @@ -97,63 +74,79 @@ "log": [ ], - "versions": [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-08-09T09:40:20.836809553" + "timestamp": "2026-03-20T10:13:04.628910585" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-10-04T13:39:33.50395694" + "timestamp": "2026-03-20T10:12:57.997252428" }, "[ bam, bai, snps, [], [] ], fasta, fai - log & cram": { "content": [ - [ - "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ [ { "id": "test" }, - "test.log:md5,6203f10696f4b0909f0d327c021df773" + "test.log:md5,4ca65dc87cc5c362e8a5fa26a47a896b" ] ], [ "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-07-25T09:45:54.254102844" + "timestamp": "2026-03-20T10:12:49.73950987" } } \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/environment.yml b/modules/nf-core/longphase/phase/environment.yml index 068a22cd..f436bdae 100644 --- a/modules/nf-core/longphase/phase/environment.yml +++ b/modules/nf-core/longphase/phase/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.20 - - bioconda::longphase=1.7.3 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/phase/longphase-phase.diff b/modules/nf-core/longphase/phase/longphase-phase.diff index 1e9d2652..51a6018c 100644 --- a/modules/nf-core/longphase/phase/longphase-phase.diff +++ b/modules/nf-core/longphase/phase/longphase-phase.diff @@ -1,492 +1,74 @@ Changes in component 'nf-core/longphase/phase' -Changes in 'longphase/phase/meta.yml': ---- modules/nf-core/longphase/phase/meta.yml -+++ modules/nf-core/longphase/phase/meta.yml -@@ -34,7 +34,7 @@ - description: Index of sorted BAM/CRAM file(s) - pattern: "*.{bai,crai,csi}" - ontologies: [] -- - snvs: -+ - snps: - type: file - description: VCF file with SNPs (and INDELs) - pattern: "*.{vcf,vcf.gz}" -@@ -70,39 +70,15 @@ - pattern: "*.fai" - ontologies: [] - output: -- snv_vcf: -+ vcf: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` -- - "${prefix}.vcf.gz": -+ - "*.vcf.gz": - type: file -- description: Compressed VCF file with phased SNVs and indels -- pattern: "*.vcf.gz" -- ontologies: -- - edam: http://edamontology.org/format_3989 # GZIP format -- sv_vcf: -- - - meta: -- type: map -- description: | -- Groovy Map containing sample information -- e.g. `[ id:'sample1', single_end:false ]` -- - "${prefix}_SV.vcf.gz": -- type: file -- description: Compressed VCF file with phased SVs -- pattern: "*_SV.vcf.gz" -- ontologies: -- - edam: http://edamontology.org/format_3989 # GZIP format -- mod_vcf: -- - - meta: -- type: map -- description: | -- Groovy Map containing sample information -- e.g. `[ id:'sample1', single_end:false ]` -- - "${prefix}_mod.vcf.gz": -- type: file -- description: Compressed VCF file with phased modifications -+ description: Compressed VCF file with phased variants - pattern: "*.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format - +'modules/nf-core/longphase/phase/meta.yml' is unchanged Changes in 'longphase/phase/main.nf': --- modules/nf-core/longphase/phase/main.nf +++ modules/nf-core/longphase/phase/main.nf -@@ -4,20 +4,19 @@ - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': -- 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" -+ 'https://depot.galaxyproject.org/singularity/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0': -+ 'biocontainers/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0' }" - - input: -- tuple val(meta), path(bam), path(bai), path(snvs), path(svs), path(mods) -+ tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) +@@ -14,11 +14,14 @@ output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true -- path "versions.yml" , emit: versions -+ tuple val(meta), path("*.vcf.gz") , emit: vcf -+ tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi -+ path "versions.yml" , emit: versions - ++ tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf ++ tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index ++ tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true ++ tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true ++ tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true ++ tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions +- ++ when: task.ext.when == null || task.ext.when -@@ -25,7 +24,7 @@ - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' -- prefix = task.ext.prefix ?: "${meta.id}" -+ def prefix = task.ext.prefix ?: "${meta.id}" - def sv_file = svs ? "--sv-file ${svs}" : "" - def mod_file = mods ? "--mod-file ${mods}" : "" - def bams = bam.collectMany { file -> ["-b", file] }.join(" ") -@@ -36,7 +35,7 @@ - --threads $task.cpus \\ - -o ${prefix} \\ - --reference ${fasta} \\ -- --snp-file ${snvs} \\ -+ --snp-file ${snps} \\ - ${bams} \\ - ${sv_file} \\ - ${mod_file} \\ -@@ -44,7 +43,9 @@ - bgzip \\ - --threads $task.cpus \\ + +@@ -46,27 +49,28 @@ $args2 \\ -- ${prefix}*.vcf -+ ${prefix}.vcf -+ -+ tabix -p vcf ${prefix}.vcf.gz + ${prefix}*.vcf - cat <<-END_VERSIONS > versions.yml - "${task.process}": -@@ -54,15 +55,10 @@ +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') +- END_VERSIONS ++ tabix -p vcf ${prefix}.vcf.gz ++ ++ if [ -f ${prefix}_SV.vcf.gz ]; then ++ tabix -p vcf ${prefix}_SV.vcf.gz ++ fi ++ ++ if [ -f ${prefix}_mod.vcf.gz ]; then ++ tabix -p vcf ${prefix}_mod.vcf.gz ++ fi + """ stub: - def args = task.ext.args ?: '' -- prefix = task.ext.prefix ?: "${meta.id}" +- def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" - def sv_command = svs ? "echo '' | bgzip -c > ${prefix}_SV.vcf.gz" : "" - def mod_command = mods ? "echo '' | bgzip -c > ${prefix}_mod.vcf.gz" : "" -+ def prefix = task.ext.prefix ?: "${meta.id}" """ - echo $args - echo "" | bgzip -c > ${prefix}.vcf.gz -- +- echo "" | bgzip -c > ${prefix}.vcf.gz ++ tabix -p vcf ${prefix}.vcf.gz + - $sv_command - $mod_command -+ echo "" > ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -Changes in 'longphase/phase/environment.yml': ---- modules/nf-core/longphase/phase/environment.yml -+++ modules/nf-core/longphase/phase/environment.yml -@@ -5,5 +5,5 @@ - - bioconda - - dependencies: -- - bioconda::htslib=1.22.1 -- - bioconda::longphase=2.0 -+ - bioconda::htslib=1.20 -+ - bioconda::longphase=1.7.3 - -Changes in 'longphase/phase/tests/main.nf.test': ---- modules/nf-core/longphase/phase/tests/main.nf.test -+++ modules/nf-core/longphase/phase/tests/main.nf.test -@@ -44,7 +44,7 @@ - - } - -- test("[ bam, bai, snps, svs, [] ], fasta, fai") { -+test("[ bam, bai, snps, svs, [] ], fasta, fai") { - - when { - process { -@@ -78,7 +78,7 @@ - - } - -- test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { -+test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { - - when { - process { -@@ -154,38 +154,4 @@ - - } - -- test("[ bam, bai, snps, svs, [] ], fasta, fai - stub") { -- options "-stub" -- -- when { -- process { -- """ -- input[0] = [ -- [ id:'test' ], -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), -- [] -- ] -- input[1] = [ -- [ id:'reference' ], -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) -- ] -- input[2] = [ -- [ id:'reference' ], -- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) -- ] -- """ -- } -- } -- -- then { -- assertAll( -- { assert process.success }, -- { assert snapshot(process.out).match() } -- ) -- } -- -- } ++ if [ -f ${prefix}_SV.vcf.gz ]; then ++ tabix -p vcf ${prefix}_SV.vcf.gz ++ fi + +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') +- END_VERSIONS ++ if [ -f ${prefix}_mod.vcf.gz ]; then ++ tabix -p vcf ${prefix}_mod.vcf.gz ++ fi + """ } -Changes in 'longphase/phase/tests/main.nf.test.snap': ---- modules/nf-core/longphase/phase/tests/main.nf.test.snap -+++ modules/nf-core/longphase/phase/tests/main.nf.test.snap -@@ -1,59 +1,4 @@ - { -- "[ bam, bai, snps, svs, [] ], fasta, fai - stub": { -- "content": [ -- { -- "0": [ -- [ -- { -- "id": "test" -- }, -- "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" -- ] -- ], -- "1": [ -- [ -- { -- "id": "test" -- }, -- "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" -- ] -- ], -- "2": [ -- -- ], -- "3": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ], -- "mod_vcf": [ -- -- ], -- "snv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" -- ] -- ], -- "sv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" -- ] -- ], -- "versions": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ] -- } -- ], -- "meta": { -- "nf-test": "0.9.2", -- "nextflow": "25.04.5" -- }, -- "timestamp": "2025-11-06T16:06:24.025191062" -- }, - "[ bam, bai, snps, [], [] ], fasta, fai": { - "content": [ - { -@@ -62,42 +7,30 @@ - { - "id": "test" - }, -- "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" -+ "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" - ] - ], - "1": [ -- -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ], -- "2": [ -- -- ], -- "3": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ], -- "mod_vcf": [ -- -- ], -- "snv_vcf": [ -+ "vcf": [ - [ - { - "id": "test" - }, -- "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" -+ "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" - ] - ], -- "sv_vcf": [ -- -- ], - "versions": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ] - } - ], - "meta": { -- "nf-test": "0.9.2", -- "nextflow": "25.04.5" -+ "nf-test": "0.8.4", -+ "nextflow": "24.04.2" - }, -- "timestamp": "2025-11-06T16:05:57.029934447" -+ "timestamp": "2024-07-22T12:14:04.269956432" - }, - "[ bam, bai, snps, svs, [] ], fasta, fai": { - "content": [ -@@ -107,52 +40,30 @@ - { - "id": "test" - }, -- "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" -+ "test.vcf.gz:md5,b0a3effd6e076edbe7e2f1f7cfff547c" - ] - ], - "1": [ -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" -+ ], -+ "vcf": [ - [ - { - "id": "test" - }, -- "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" -- ] -- ], -- "2": [ -- -- ], -- "3": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ], -- "mod_vcf": [ -- -- ], -- "snv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" -- ] -- ], -- "sv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" -+ "test.vcf.gz:md5,b0a3effd6e076edbe7e2f1f7cfff547c" - ] - ], - "versions": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ] - } - ], - "meta": { -- "nf-test": "0.9.2", -- "nextflow": "25.04.5" -+ "nf-test": "0.9.0", -+ "nextflow": "24.04.4" - }, -- "timestamp": "2025-11-06T16:06:03.319855838" -+ "timestamp": "2024-10-04T13:37:16.921910004" - }, - "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { - "content": [ -@@ -162,52 +73,30 @@ - { - "id": "test" - }, -- "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" -+ "test.vcf.gz:md5,04905b6042998e592c9f3b887ae9e09c" - ] - ], - "1": [ -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" -+ ], -+ "vcf": [ - [ - { - "id": "test" - }, -- "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" -- ] -- ], -- "2": [ -- -- ], -- "3": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ], -- "mod_vcf": [ -- -- ], -- "snv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" -- ] -- ], -- "sv_vcf": [ -- [ -- { -- "id": "test" -- }, -- "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" -+ "test.vcf.gz:md5,04905b6042998e592c9f3b887ae9e09c" - ] - ], - "versions": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ] - } - ], - "meta": { -- "nf-test": "0.9.2", -- "nextflow": "25.04.5" -+ "nf-test": "0.9.0", -+ "nextflow": "24.04.4" - }, -- "timestamp": "2025-11-06T16:06:10.867281359" -+ "timestamp": "2024-10-04T13:37:23.41768963" - }, - "[ bam, bai, snps, [], [] ], fasta, fai - stub": { - "content": [ -@@ -221,18 +110,9 @@ - ] - ], - "1": [ -- -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ], -- "2": [ -- -- ], -- "3": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -- ], -- "mod_vcf": [ -- -- ], -- "snv_vcf": [ -+ "vcf": [ - [ - { - "id": "test" -@@ -240,18 +120,15 @@ - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], -- "sv_vcf": [ -- -- ], - "versions": [ -- "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" -+ "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ] - } - ], - "meta": { -- "nf-test": "0.9.2", -- "nextflow": "25.04.5" -+ "nf-test": "0.8.4", -+ "nextflow": "24.04.2" - }, -- "timestamp": "2025-11-06T16:06:17.992733472" -+ "timestamp": "2024-07-22T12:15:40.296227382" - } - } +'modules/nf-core/longphase/phase/environment.yml' is unchanged +'modules/nf-core/longphase/phase/tests/main.nf.test' is unchanged +'modules/nf-core/longphase/phase/tests/main.nf.test.snap' is unchanged 'modules/nf-core/longphase/phase/tests/nextflow.config' is unchanged ************************************************************ diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index f8351afd..63d119f7 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -4,27 +4,31 @@ process LONGPHASE_PHASE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0': - 'biocontainers/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: - tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) + tuple val(meta), path(bam), path(bai), path(snvs), path(svs), path(mods) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi - path "versions.yml" , emit: versions - + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true + tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def sv_file = svs ? "--sv-file ${svs}" : "" def mod_file = mods ? "--mod-file ${mods}" : "" def bams = bam.collectMany { file -> ["-b", file] }.join(" ") @@ -35,7 +39,7 @@ process LONGPHASE_PHASE { --threads $task.cpus \\ -o ${prefix} \\ --reference ${fasta} \\ - --snp-file ${snps} \\ + --snp-file ${snvs} \\ ${bams} \\ ${sv_file} \\ ${mod_file} \\ @@ -43,26 +47,30 @@ process LONGPHASE_PHASE { bgzip \\ --threads $task.cpus \\ $args2 \\ - ${prefix}.vcf - + ${prefix}*.vcf + tabix -p vcf ${prefix}.vcf.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS + if [ -f ${prefix}_SV.vcf.gz ]; then + tabix -p vcf ${prefix}_SV.vcf.gz + fi + + if [ -f ${prefix}_mod.vcf.gz ]; then + tabix -p vcf ${prefix}_mod.vcf.gz + fi """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ - echo "" | bgzip -c > ${prefix}.vcf.gz - echo "" > ${prefix}.vcf.gz.tbi + tabix -p vcf ${prefix}.vcf.gz + + if [ -f ${prefix}_SV.vcf.gz ]; then + tabix -p vcf ${prefix}_SV.vcf.gz + fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS + if [ -f ${prefix}_mod.vcf.gz ]; then + tabix -p vcf ${prefix}_mod.vcf.gz + fi """ } diff --git a/modules/nf-core/longphase/phase/meta.yml b/modules/nf-core/longphase/phase/meta.yml index 93963d58..266b878b 100644 --- a/modules/nf-core/longphase/phase/meta.yml +++ b/modules/nf-core/longphase/phase/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_phase" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - phase - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -34,7 +34,7 @@ input: description: Index of sorted BAM/CRAM file(s) pattern: "*.{bai,crai,csi}" ontologies: [] - - snps: + - snvs: type: file description: VCF file with SNPs (and INDELs) pattern: "*.{vcf,vcf.gz}" @@ -70,25 +70,63 @@ input: pattern: "*.fai" ontologies: [] output: - vcf: + snv_vcf: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "*.vcf.gz": + - ${prefix}.vcf.gz: type: file - description: Compressed VCF file with phased variants + description: Compressed VCF file with phased SNVs and indels pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 + sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}_SV.vcf.gz: + type: file + description: Compressed VCF file with phased SVs + pattern: "*_SV.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + mod_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}_mod.vcf.gz: + type: file + description: Compressed VCF file with phased modifications + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test b/modules/nf-core/longphase/phase/tests/main.nf.test index 3e303312..30c666ba 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test +++ b/modules/nf-core/longphase/phase/tests/main.nf.test @@ -38,13 +38,13 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } -test("[ bam, bai, snps, svs, [] ], fasta, fai") { + test("[ bam, bai, snps, svs, [] ], fasta, fai") { when { process { @@ -72,13 +72,13 @@ test("[ bam, bai, snps, svs, [] ], fasta, fai") { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } -test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { + test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { when { process { @@ -112,7 +112,7 @@ test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -148,10 +148,44 @@ test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } + test("[ bam, bai, snps, svs, [] ], fasta, fai - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } } diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test.snap b/modules/nf-core/longphase/phase/tests/main.nf.test.snap index 8a38c1ca..c6a155f9 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/phase/tests/main.nf.test.snap @@ -1,107 +1,154 @@ { - "[ bam, bai, snps, [], [] ], fasta, fai": { + "[ bam, bai, snps, svs, [] ], fasta, fai - stub": { "content": [ { - "0": [ + "mod_vcf": [ + + ], + "snv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "1": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + "sv_vcf": [ + [ + { + "id": "test" + }, + "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] ], - "vcf": [ + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-20T10:13:50.746589174" + }, + "[ bam, bai, snps, [], [] ], fasta, fai": { + "content": [ + { + "mod_vcf": [ + + ], + "snv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + "test.vcf.gz:md5,73d5f51aea92e09b3d427837066f114c" ] ], - "versions": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + "sv_vcf": [ + + ], + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-07-22T12:14:04.269956432" + "timestamp": "2026-03-20T10:13:19.273322013" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ + "mod_vcf": [ + + ], + "snv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,b0a3effd6e076edbe7e2f1f7cfff547c" + "test.vcf.gz:md5,af297491417a5727de21f893b553db37" ] ], - "1": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ], - "vcf": [ + "sv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,b0a3effd6e076edbe7e2f1f7cfff547c" + "test_SV.vcf.gz:md5,4636e0ac86a86565e5d04b5d1b6a00e7" ] ], - "versions": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-10-04T13:37:16.921910004" + "timestamp": "2026-03-20T10:13:27.450110496" }, "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ + "mod_vcf": [ + + ], + "snv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,04905b6042998e592c9f3b887ae9e09c" + "test.vcf.gz:md5,f688da3f046717765e879c061510e037" ] ], - "1": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" - ], - "vcf": [ + "sv_vcf": [ [ { "id": "test" }, - "test.vcf.gz:md5,04905b6042998e592c9f3b887ae9e09c" + "test_SV.vcf.gz:md5,5336fc5eb9d3421cef66fd18320a4cb8" ] ], - "versions": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-10-04T13:37:23.41768963" + "timestamp": "2026-03-20T10:13:36.797768748" }, "[ bam, bai, snps, [], [] ], fasta, fai - stub": { "content": [ { - "0": [ + "mod_vcf": [ + + ], + "snv_vcf": [ [ { "id": "test" @@ -109,26 +156,22 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "1": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + "sv_vcf": [ + ], - "vcf": [ + "versions_longphase": [ [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " ] - ], - "versions": [ - "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2024-07-22T12:15:40.296227382" + "timestamp": "2026-03-20T10:13:44.86300696" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 453d4566..df55071a 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -20,7 +20,7 @@ process MINIMAP2_ALIGN { tuple val(meta), path("*.paf") , optional: true, emit: paf tuple val(meta), path("*.bam") , optional: true, emit: bam tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("minimap2"), eval("minimap2 --version"), topic: versions, emit: versions_minimap2 when: task.ext.when == null || task.ext.when @@ -38,25 +38,17 @@ process MINIMAP2_ALIGN { def bam_input = "${reads.extension}".matches('sam|bam|cram') def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' def query = bam_input ? "-" : reads - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + def target = reference ?: (bam_input ? error("Error: minimap2/align BAM input mode requires reference") : reads) """ $samtools_reset_fastq \\ minimap2 \\ - $args \\ - -t $task.cpus \\ - $target \\ - $query \\ - $cigar_paf \\ - $set_cigar_bam \\ - $bam_output - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS + ${args} \\ + -t ${task.cpus} \\ + ${target} \\ + ${query} \\ + ${cigar_paf} \\ + ${set_cigar_bam} \\ + ${bam_output} """ stub: @@ -64,15 +56,11 @@ process MINIMAP2_ALIGN { def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" def bam_input = "${reads.extension}".matches('sam|bam|cram') - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + if(bam_input && !reference) { + error("Error: minimap2/align BAM input mode requires reference!") + } """ touch $output_file ${bam_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index b501526e..40bb20ad 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -85,13 +85,27 @@ output: description: BAM alignment index pattern: "*.bam.*" ontologies: [] + versions_minimap2: + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version authors: - "@heuermh" - "@sofstam" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff new file mode 100644 index 00000000..967bb654 --- /dev/null +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/minimap2/align' +'modules/nf-core/minimap2/align/meta.yml' is unchanged +Changes in 'minimap2/align/main.nf': +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -1,6 +1,6 @@ + process MINIMAP2_ALIGN { + tag "$meta.id" +- label 'process_high' ++ label 'process_very_high' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + +'modules/nf-core/minimap2/align/environment.yml' is unchanged +'modules/nf-core/minimap2/align/tests/main.nf.test' is unchanged +'modules/nf-core/minimap2/align/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 4072c171..34597d6f 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -36,7 +36,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -71,7 +71,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -108,7 +108,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -142,7 +142,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -176,7 +176,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -211,7 +211,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -438,4 +438,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 89f20336..93e0eb3b 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -9,15 +9,21 @@ ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", "test.bam.bai", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:23.829797899" + "timestamp": "2026-01-22T15:02:10.851485367" }, "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { "content": [ @@ -44,7 +50,11 @@ ] ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -67,16 +77,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:54.665655242" + "timestamp": "2026-01-22T15:02:56.708796666" }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ @@ -103,7 +117,11 @@ ] ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -126,16 +144,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:38.492212433" + "timestamp": "2026-01-22T15:02:32.614463827" }, "sarscov2 - fastq, fasta, false, [], false, false - stub": { "content": [ @@ -156,7 +178,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ @@ -173,16 +199,20 @@ "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:43.879647142" + "timestamp": "2026-01-22T15:02:40.02163098" }, "sarscov2 - fastq, fasta, true, [], false, false - stub": { "content": [ @@ -203,7 +233,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -220,16 +254,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:33.262333471" + "timestamp": "2026-01-22T15:02:25.102539679" }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ @@ -240,15 +278,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "1bc392244f228bf52cf0b5a8f6a654c9", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:07.571731983" + "timestamp": "2026-01-22T15:01:46.456636022" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ @@ -259,15 +303,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "f194745c0ccfcb2a9c0aee094a08750", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:47:56.497792473" + "timestamp": "2026-01-22T15:01:30.525133177" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ @@ -279,15 +329,21 @@ ], "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:01.888544427" + "timestamp": "2026-01-22T15:01:38.84829029" }, "sarscov2 - bam, fasta, true, [], false, false": { "content": [ @@ -298,15 +354,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:18.376062313" + "timestamp": "2026-01-22T15:02:02.351060285" }, "sarscov2 - bam, fasta, true, [], false, false - stub": { "content": [ @@ -327,7 +389,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -344,16 +410,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:49.268693724" + "timestamp": "2026-01-22T15:02:47.579634041" }, "sarscov2 - fastq, [], true, false, false": { "content": [ @@ -463,14 +533,20 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "16c1c651f8ec67383bcdee3c55aed94f", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:12.942360555" + "timestamp": "2026-01-22T15:01:54.090788633" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf index 56cb0efd..dd81eab0 100644 --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -12,7 +12,7 @@ process MINIMAP2_INDEX { output: tuple val(meta), path("*.mmi"), emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("minimap2"), eval("minimap2 --version"), topic: versions, emit: versions_minimap2 when: task.ext.when == null || task.ext.when @@ -25,20 +25,10 @@ process MINIMAP2_INDEX { -d ${fasta.baseName}.mmi \\ $args \\ $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ stub: """ touch ${fasta.baseName}.mmi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml index 0d6a2d86..6985fb0c 100644 --- a/modules/nf-core/minimap2/index/meta.yml +++ b/modules/nf-core/minimap2/index/meta.yml @@ -10,7 +10,8 @@ tools: A versatile pairwise aligner for genomic and spliced nucleotide sequences. homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide - licence: ["MIT"] + licence: + - "MIT" identifier: "" input: - - meta: @@ -35,13 +36,27 @@ output: description: Minimap2 fasta index. pattern: "*.mmi" ontologies: [] + versions_minimap2: + - - ${task.process}: + type: string + description: The name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version: + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version: + type: eval + description: The expression to obtain the version of the tool authors: - "@yuukiiwa" - "@drpatelh" diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test index 97840ff7..79b7cc55 100644 --- a/modules/nf-core/minimap2/index/tests/main.nf.test +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -29,4 +29,26 @@ nextflow_process { } -} \ No newline at end of file + test("minimap2 index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap index dbb32049..a3ec750b 100644 --- a/modules/nf-core/minimap2/index/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,2c3e19022653b28d77646b2e9cc9bdb3" + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ], "index": [ [ @@ -21,15 +25,60 @@ "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" ] ], - "versions": [ - "versions.yml:md5,2c3e19022653b28d77646b2e9cc9bdb3" + "versions_minimap2": [ + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:12:10.625322" + }, + "minimap2 index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] + ], + "index": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_minimap2": [ + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-04-22T14:50:37.213379742" + "timestamp": "2026-02-09T16:12:15.244242" } } \ No newline at end of file diff --git a/modules/nf-core/modkit/pileup/environment.yml b/modules/nf-core/modkit/pileup/environment.yml index 7b7a0ca3..62b97863 100644 --- a/modules/nf-core/modkit/pileup/environment.yml +++ b/modules/nf-core/modkit/pileup/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - ont-modkit=0.4.4 + - ont-modkit=0.6.1 diff --git a/modules/nf-core/modkit/pileup/main.nf b/modules/nf-core/modkit/pileup/main.nf index df6ff59d..7487783d 100644 --- a/modules/nf-core/modkit/pileup/main.nf +++ b/modules/nf-core/modkit/pileup/main.nf @@ -4,19 +4,19 @@ process MODKIT_PILEUP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ont-modkit:0.4.4--hcdda2d0_0': - 'biocontainers/ont-modkit:0.4.4--hcdda2d0_0' }" + 'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0': + 'biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }" input: tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta), path(fai) - tuple val(meta3), path(bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(bed) output: - tuple val(meta), path("*.bed") , emit: bed , optional: true - tuple val(meta), path("*.bedgraph"), emit: bedgraph, optional: true + tuple val(meta), path("*.bed.gz") , emit: bedgz , optional: true tuple val(meta), path("*.log") , emit: log , optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions when: task.ext.when == null || task.ext.when @@ -31,6 +31,8 @@ process MODKIT_PILEUP { modkit \\ pileup \\ $args \\ + --bgzf \\ + --bgzf-threads ${task.cpus} \\ --threads ${task.cpus} \\ --prefix ${prefix} \\ $reference \\ @@ -45,26 +47,17 @@ process MODKIT_PILEUP { fi done else - mv ${prefix}.tmp ${prefix}.bed + mv ${prefix}.tmp ${prefix}.bed.gz fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - modkit: \$( modkit --version | sed 's/mod_kit //' ) - END_VERSIONS """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.bed - touch ${prefix}.bedgraph - touch ${prefix}.log + echo $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - modkit: \$( modkit --version | sed 's/mod_kit //' ) - END_VERSIONS + echo | gzip > ${prefix}.bed.gz + touch ${prefix}.log """ } diff --git a/modules/nf-core/modkit/pileup/meta.yml b/modules/nf-core/modkit/pileup/meta.yml index 32aba6ad..262a1067 100644 --- a/modules/nf-core/modkit/pileup/meta.yml +++ b/modules/nf-core/modkit/pileup/meta.yml @@ -6,8 +6,8 @@ keywords: - long-read tools: - "modkit": - description: A bioinformatics tool for working with modified bases in Oxford Nanopore - sequencing data + description: A bioinformatics tool for working with modified bases in Oxford + Nanopore sequencing data homepage: https://github.com/nanoporetech/modkit documentation: https://github.com/nanoporetech/modkit tool_dev_url: https://github.com/nanoporetech/modkit @@ -36,8 +36,8 @@ input: e.g. `[ id:'hg38' ]` - fasta: type: file - description: Reference sequence in FASTA format. Required for motif (e.g. CpG) - filtering + description: Reference sequence in FASTA format. Required for motif (e.g. + CpG) filtering pattern: "*.fasta" ontologies: [] - fai: @@ -52,33 +52,22 @@ input: e.g. `[ id:'regions' ]` - bed: type: file - description: BED file that will restrict threshold estimation and pileup results - to positions overlapping intervals in the file + description: BED file that will restrict threshold estimation and pileup + results to positions overlapping intervals in the file pattern: "*.bed" ontologies: [] output: - bed: + bedgz: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'test', single_end:false ]` - - "*.bed": + - "*.bed.gz": type: file - description: bedMethyl output file(s) + description: bgzf-compressed bedMethyl output file(s) pattern: "*.bed" ontologies: [] - bedgraph: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - "*.bedgraph": - type: file - description: bedgraph output files - pattern: "*.bedgraph" - ontologies: [] log: - - meta: type: map @@ -90,15 +79,31 @@ output: description: File for debug logs to be written to pattern: "*.log" ontologies: [] + versions_modkit: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool + +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool authors: - "@Michal-Babins" - "@fellen31" maintainers: - "@fellen31" + - "@jkh00" diff --git a/modules/nf-core/modkit/pileup/modkit-pileup.diff b/modules/nf-core/modkit/pileup/modkit-pileup.diff new file mode 100644 index 00000000..d880bb11 --- /dev/null +++ b/modules/nf-core/modkit/pileup/modkit-pileup.diff @@ -0,0 +1,23 @@ +Changes in component 'nf-core/modkit/pileup' +'modules/nf-core/modkit/pileup/meta.yml' is unchanged +Changes in 'modkit/pileup/main.nf': +--- modules/nf-core/modkit/pileup/main.nf ++++ modules/nf-core/modkit/pileup/main.nf +@@ -9,8 +9,9 @@ + + input: + tuple val(meta), path(bam), path(bai) +- tuple val(meta2), path(fasta), path(fai) +- tuple val(meta3), path(bed) ++ tuple val(meta2), path(fasta) ++ tuple val(meta3), path(fai) ++ tuple val(meta4), path(bed) + + output: + tuple val(meta), path("*.bed.gz") , emit: bedgz , optional: true + +'modules/nf-core/modkit/pileup/environment.yml' is unchanged +'modules/nf-core/modkit/pileup/tests/main.nf.test' is unchanged +'modules/nf-core/modkit/pileup/tests/main.nf.test.snap' is unchanged +'modules/nf-core/modkit/pileup/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/modkit/pileup/tests/main.nf.test b/modules/nf-core/modkit/pileup/tests/main.nf.test index 6ce8dfe2..652b725a 100644 --- a/modules/nf-core/modkit/pileup/tests/main.nf.test +++ b/modules/nf-core/modkit/pileup/tests/main.nf.test @@ -7,10 +7,14 @@ nextflow_process { tag "modkit" tag "modkit/pileup" process "MODKIT_PILEUP" + config "./nextflow.config" test("[bam, bai], [], []") { when { + params { + module_args = '' + } process { """ input[0] = [ @@ -36,6 +40,9 @@ nextflow_process { test("[bam, bai], [fasta, fai], []") { when { + params { + module_args = '' + } process { """ input[0] = [ @@ -65,6 +72,9 @@ nextflow_process { test("[bam, bai], [fasta, fai], bed") { when { + params { + module_args = '--modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -93,11 +103,12 @@ nextflow_process { } - test("[bam, bai], [fasta, fai], bed - traditional") { - - config "./nextflow.traditional.config" + test("[bam, bai], [fasta, fai], [], phased") { when { + params { + module_args = '--phased --modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -110,42 +121,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = Channel.of('chr22\t0\t1000') - .collectFile(name: 'chr22.bed', newLine: true) - .map { file -> [ [ id:'chr22' ], file ] } - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("[bam, bai], [fasta, fai], bed - traditional, HP") { - - config "./nextflow.traditional_hp.config" - - when { - process { - """ - input[0] = [ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) - ] - input[1] = [ - [ id: 'test_ref' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) - ] - input[2] = Channel.of('chr22\t0\t1000') - .collectFile(name: 'chr22.bed', newLine: true) - .map { file -> [ [ id:'chr22' ], file ] } + input[2] = [[],[]] """ } } @@ -159,11 +135,12 @@ nextflow_process { } - test("[bam, bai], [fasta, fai], bed - traditional, HP, bedgraph") { - - config "./nextflow.traditional_hp_bedgraph.config" + test("[bam, bai], [fasta, fai], bed, phased") { when { + params { + module_args = '--phased --modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -197,6 +174,9 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ input[0] = [ @@ -224,6 +204,9 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ input[0] = [ @@ -251,10 +234,12 @@ nextflow_process { } test("[bam, bai], [fasta, fai], bed - stub") { - options "-stub" when { + params { + module_args = '--modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -283,12 +268,14 @@ nextflow_process { } - test("[bam, bai], [fasta, fai], bed - traditional - stub") { + test("[bam, bai], [fasta, fai], [], phased - stub") { - config "./nextflow.traditional.config" options "-stub" when { + params { + module_args = '--phased --modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -301,43 +288,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = Channel.of('chr22\t0\t1000') - .collectFile(name: 'chr22.bed', newLine: true) - .map { file -> [ [ id:'chr22' ], file ] } - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("[bam, bai], [fasta, fai], bed - traditional, HP - stub") { - - config "./nextflow.traditional_hp.config" - options "-stub" - - when { - process { - """ - input[0] = [ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) - ] - input[1] = [ - [ id: 'test_ref' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) - ] - input[2] = Channel.of('chr22\t0\t1000') - .collectFile(name: 'chr22.bed', newLine: true) - .map { file -> [ [ id:'chr22' ], file ] } + input[2] = [[],[]] """ } } @@ -351,12 +302,14 @@ nextflow_process { } - test("[bam, bai], [fasta, fai], bed - traditional, HP, bedgraph - stub") { + test("[bam, bai], [fasta, fai], bed, phased - stub") { - config "./nextflow.traditional_hp_bedgraph.config" options "-stub" when { + params { + module_args = '--phased --modified-bases 5mC 5hmC' + } process { """ input[0] = [ @@ -385,4 +338,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/modkit/pileup/tests/main.nf.test.snap b/modules/nf-core/modkit/pileup/tests/main.nf.test.snap index af80c6ec..55ba8d79 100644 --- a/modules/nf-core/modkit/pileup/tests/main.nf.test.snap +++ b/modules/nf-core/modkit/pileup/tests/main.nf.test.snap @@ -7,42 +7,44 @@ { "id": "test" }, - "test.bed:md5,ac38ce3bed1f8aa770d1e1a9f332e170" + "test.bed.gz:md5,e60263a72bb1cfdca8eb12028cfe1472" ] ], "1": [ ], "2": [ - - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ], - "bed": [ + "bedgz": [ [ { "id": "test" }, - "test.bed:md5,ac38ce3bed1f8aa770d1e1a9f332e170" + "test.bed.gz:md5,e60263a72bb1cfdca8eb12028cfe1472" ] - ], - "bedgraph": [ - ], "log": [ ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:01:47.27240322" + "timestamp": "2026-01-22T11:46:15.943030058" }, "[bam, bai], [fasta, fai], [] - stub": { "content": [ @@ -52,18 +54,10 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -71,23 +65,19 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "2": [ [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "log": [ @@ -98,69 +88,77 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:14.786526107" + "timestamp": "2026-01-22T11:46:32.126118262" }, - "[bam, bai], [fasta, fai], bed - traditional, HP, bedgraph": { + "[bam, bai], [fasta, fai], [], phased": { "content": [ { "0": [ - - ], - "1": [ [ { "id": "test" }, [ - "test_1_m_CG0_combined.bedgraph:md5,af55904c9acbafa17ff35ee3239152d0", - "test_2_m_CG0_combined.bedgraph:md5,13554927fb35b71a98c0e6bcdc3945c9" + "test_combined.bed.gz:md5,2ade8f1d8c163ff61698d828d0ef3fb0", + "test_hp1.bed.gz:md5,e8e8c00005127492fd5610a9fa0cd098", + "test_hp2.bed.gz:md5,b211935a2e88b0ebeee987abc3a622e2" ] ] ], - "2": [ + "1": [ ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ - + "2": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, [ - "test_1_m_CG0_combined.bedgraph:md5,af55904c9acbafa17ff35ee3239152d0", - "test_2_m_CG0_combined.bedgraph:md5,13554927fb35b71a98c0e6bcdc3945c9" + "test_combined.bed.gz:md5,2ade8f1d8c163ff61698d828d0ef3fb0", + "test_hp1.bed.gz:md5,e8e8c00005127492fd5610a9fa0cd098", + "test_hp2.bed.gz:md5,b211935a2e88b0ebeee987abc3a622e2" ] ] ], "log": [ ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:04.393035124" + "timestamp": "2026-01-22T11:46:20.026595076" }, - "[bam, bai], [fasta, fai], bed - traditional, HP": { + "[bam, bai], [fasta, fai], [], phased - stub": { "content": [ { "0": [ @@ -168,95 +166,56 @@ { "id": "test" }, - [ - "test_1.bed:md5,464a9870774c340753e79639aeaf76c2", - "test_2.bed:md5,118de4b653dd082d76faa8802df493eb" - ] + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ [ { "id": "test" }, - [ - "test_1.bed:md5,464a9870774c340753e79639aeaf76c2", - "test_2.bed:md5,118de4b653dd082d76faa8802df493eb" - ] + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "bedgraph": [ - - ], - "log": [ - + "2": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T15:01:58.76489375" - }, - "[bam, bai], [fasta, fai], []": { - "content": [ - { - "0": [ + "bedgz": [ [ { "id": "test" }, - "test.bed:md5,f973de342df883efc1656c82a3a3978d" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "log": [ [ { "id": "test" }, - "test.bed:md5,f973de342df883efc1656c82a3a3978d" + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "bedgraph": [ - - ], - "log": [ - - ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:01:41.559902964" + "timestamp": "2026-01-22T11:46:40.051078689" }, - "[bam, bai], [fasta, fai], bed - traditional, HP - stub": { + "[bam, bai], [fasta, fai], []": { "content": [ { "0": [ @@ -264,62 +223,44 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,f973de342df883efc1656c82a3a3978d" ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "2": [ [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ - [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,f973de342df883efc1656c82a3a3978d" ] ], "log": [ + + ], + "versions_modkit": [ [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] - ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:31.123304033" + "timestamp": "2026-01-22T11:46:11.978364755" }, "[bam, bai], [], []": { "content": [ @@ -329,44 +270,46 @@ { "id": "test" }, - "test.bed:md5,f973de342df883efc1656c82a3a3978d" + "test.bed.gz:md5,f973de342df883efc1656c82a3a3978d" ] ], "1": [ ], "2": [ - - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ], - "bed": [ + "bedgz": [ [ { "id": "test" }, - "test.bed:md5,f973de342df883efc1656c82a3a3978d" + "test.bed.gz:md5,f973de342df883efc1656c82a3a3978d" ] - ], - "bedgraph": [ - ], "log": [ ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:01:35.512266692" + "timestamp": "2026-01-22T11:46:07.765200083" }, - "[bam, bai], [fasta, fai], bed - traditional, HP, bedgraph - stub": { + "[bam, bai], [fasta, fai], bed, phased": { "content": [ { "0": [ @@ -374,64 +317,54 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test_combined.bed.gz:md5,e60263a72bb1cfdca8eb12028cfe1472", + "test_hp1.bed.gz:md5,8125fff82b626811d73c6a09c292ed31", + "test_hp2.bed.gz:md5,6438ea42b16a21db197ddf73ec665685" + ] ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "2": [ [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "bedgz": [ [ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "bedgraph": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test_combined.bed.gz:md5,e60263a72bb1cfdca8eb12028cfe1472", + "test_hp1.bed.gz:md5,8125fff82b626811d73c6a09c292ed31", + "test_hp2.bed.gz:md5,6438ea42b16a21db197ddf73ec665685" + ] ] ], "log": [ + + ], + "versions_modkit": [ [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] - ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:37.027692372" + "timestamp": "2026-01-22T11:46:24.273720433" }, - "[bam, bai], [], [] - stub": { + "[bam, bai], [fasta, fai], bed, phased - stub": { "content": [ { "0": [ @@ -439,18 +372,10 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -458,23 +383,19 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "2": [ [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "log": [ @@ -485,18 +406,22 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:09.515064105" + "timestamp": "2026-01-22T11:46:44.034301557" }, - "[bam, bai], [fasta, fai], bed - traditional - stub": { + "[bam, bai], [], [] - stub": { "content": [ { "0": [ @@ -504,18 +429,10 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -523,23 +440,19 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "2": [ [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "log": [ @@ -550,16 +463,20 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" + "versions_modkit": [ + [ + "MODKIT_PILEUP", + "modkit", + "0.6.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:02:25.6273826" + "timestamp": "2026-01-22T11:46:28.153658113" }, "[bam, bai], [fasta, fai], bed - stub": { "content": [ @@ -569,18 +486,10 @@ { "id": "test" }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ - [ - { - "id": "test" - }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -588,23 +497,19 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "2": [ [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] ], - "bedgraph": [ + "bedgz": [ [ { "id": "test" }, - "test.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "log": [ @@ -615,60 +520,19 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T15:02:20.328195604" - }, - "[bam, bai], [fasta, fai], bed - traditional": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.bed:md5,db6333f714a8ea4aa33902404a8d4812" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" - ], - "bed": [ + "versions_modkit": [ [ - { - "id": "test" - }, - "test.bed:md5,db6333f714a8ea4aa33902404a8d4812" + "MODKIT_PILEUP", + "modkit", + "0.6.1" ] - ], - "bedgraph": [ - - ], - "log": [ - - ], - "versions": [ - "versions.yml:md5,8527c1177696459470aef23905cfb6af" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-04-15T15:01:52.925069493" + "timestamp": "2026-01-22T11:46:36.173040315" } } \ No newline at end of file diff --git a/modules/nf-core/modkit/pileup/tests/nextflow.config b/modules/nf-core/modkit/pileup/tests/nextflow.config new file mode 100644 index 00000000..62521d0b --- /dev/null +++ b/modules/nf-core/modkit/pileup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MODKIT_PILEUP' { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/modkit/pileup/tests/nextflow.traditional.config b/modules/nf-core/modkit/pileup/tests/nextflow.traditional.config deleted file mode 100644 index 672598d5..00000000 --- a/modules/nf-core/modkit/pileup/tests/nextflow.traditional.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'MODKIT_PILEUP' { - ext.args = '--preset traditional' - } -} diff --git a/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp.config b/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp.config deleted file mode 100644 index c0e0d79a..00000000 --- a/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'MODKIT_PILEUP' { - ext.args = '--preset traditional --partition-tag HP' - } -} diff --git a/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp_bedgraph.config b/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp_bedgraph.config deleted file mode 100644 index 4f456dcd..00000000 --- a/modules/nf-core/modkit/pileup/tests/nextflow.traditional_hp_bedgraph.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'MODKIT_PILEUP' { - ext.args = '--preset traditional --partition-tag HP --bedgraph' - } -} diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml index f871e054..1c7f3ee8 100644 --- a/modules/nf-core/mosdepth/environment.yml +++ b/modules/nf-core/mosdepth/environment.yml @@ -5,4 +5,5 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/mosdepth - - mosdepth=0.3.10 + - htslib=1.22.1 + - mosdepth=0.3.11=h0ec343a_1 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf index 3bf945f9..63739bfa 100644 --- a/modules/nf-core/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -4,8 +4,8 @@ process MOSDEPTH { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.10--h4e814b3_1' : - 'biocontainers/mosdepth:0.3.10--h4e814b3_1'}" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/00/00d32b53160c26794959da7303ee6e2107afd4d292060c9f287b0af1fddbd847/data' : + 'community.wave.seqera.io/library/mosdepth_htslib:0f58993cb6d93294'}" input: tuple val(meta), path(bam), path(bai), path(bed) @@ -24,7 +24,7 @@ process MOSDEPTH { tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('mosdepth'), eval("mosdepth --version | sed 's/mosdepth //g'"), topic: versions, emit: versions_mosdepth when: task.ext.when == null || task.ext.when @@ -34,11 +34,11 @@ process MOSDEPTH { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" - if (bed && args.contains("--by")) { + if (bed && (args.contains("--by") || args.contains("-b "))) { error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } - if (!bed && args.contains("--thresholds")) { - error "'--thresholds' can only be specified in conjunction with '--by'" + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" } """ @@ -49,15 +49,17 @@ process MOSDEPTH { $args \\ $prefix \\ $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') - END_VERSIONS """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } """ touch ${prefix}.global.dist.txt touch ${prefix}.region.dist.txt @@ -71,10 +73,5 @@ process MOSDEPTH { touch ${prefix}.quantized.bed.gz.csi echo "" | gzip > ${prefix}.thresholds.bed.gz touch ${prefix}.thresholds.bed.gz.csi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml index af1ea44a..04c8bfe1 100644 --- a/modules/nf-core/mosdepth/meta.yml +++ b/modules/nf-core/mosdepth/meta.yml @@ -178,13 +178,28 @@ output: description: Index file for BED file with threshold coverage pattern: "*.{thresholds.bed.gz.csi}" ontologies: [] + versions_mosdepth: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool + +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool authors: - "@joseespinosa" - "@drpatelh" @@ -192,6 +207,5 @@ authors: - "@matthdsm" maintainers: - "@joseespinosa" - - "@drpatelh" - "@ramprasadn" - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test index 0b3c860d..b05dde5b 100644 --- a/modules/nf-core/mosdepth/tests/main.nf.test +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -7,10 +7,14 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "mosdepth" + config "./nextflow.config" test("homo_sapiens - bam, bai, []") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -25,9 +29,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -36,6 +40,9 @@ nextflow_process { test("homo_sapiens - bam, bai, bed") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -50,9 +57,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -61,6 +68,9 @@ nextflow_process { test("homo_sapiens - cram, crai, []") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -78,9 +88,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -89,6 +99,9 @@ nextflow_process { test("homo_sapiens - cram, crai, bed") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -106,9 +119,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -116,8 +129,10 @@ nextflow_process { test("homo_sapiens - bam, bai, [] - window") { - config "./window.config" when { + params { + module_args = "--by 100" + } process { """ input[0] = [ @@ -132,9 +147,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -142,8 +157,10 @@ nextflow_process { test("homo_sapiens - bam, bai, [] - quantized") { - config "./quantized.config" when { + params { + module_args = "--quantize 0:1:4:100:200" + } process { """ input[0] = [ @@ -158,9 +175,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -168,8 +185,10 @@ nextflow_process { test("homo_sapiens - bam, bai, bed - thresholds") { - config "./threshold.config" when { + params { + module_args = "--thresholds 1,10,20,30" + } process { """ input[0] = [ @@ -184,9 +203,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } @@ -194,8 +213,10 @@ nextflow_process { test("homo_sapiens - bam, bai, bed - fail") { - config "./window.config" when { + params { + module_args = "--by 100" + } process { """ input[0] = [ @@ -210,9 +231,7 @@ nextflow_process { } then { - assertAll( - { assert process.failed } - ) + assert process.failed } } @@ -221,6 +240,9 @@ nextflow_process { options "-stub" when { + params { + module_args = "" + } process { """ input[0] = [ @@ -235,9 +257,9 @@ nextflow_process { } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out).match()} ) } diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap index 67e16562..c27fcc79 100644 --- a/modules/nf-core/mosdepth/tests/main.nf.test.snap +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -39,7 +39,11 @@ ] ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ [ @@ -221,16 +225,20 @@ "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:57:12.350279421" + "timestamp": "2025-09-23T13:06:13.219131" }, "homo_sapiens - cram, crai, bed": { "content": [ @@ -260,7 +268,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ [ @@ -289,7 +301,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -307,7 +319,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "8": [ @@ -340,7 +352,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -367,7 +379,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "regions_txt": [ @@ -394,16 +406,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:56:12.528228123" + "timestamp": "2025-09-23T13:22:14.011309" }, "homo_sapiens - bam, bai, [] - quantized": { "content": [ @@ -433,7 +449,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ @@ -456,7 +476,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -480,7 +500,7 @@ "id": "test", "single_end": true }, - "test.quantized.bed.gz.csi:md5,be9617f551f19a33923f1e886eaefb93" + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" ] ], "global_txt": [ @@ -507,7 +527,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -528,7 +548,7 @@ "id": "test", "single_end": true }, - "test.quantized.bed.gz.csi:md5,be9617f551f19a33923f1e886eaefb93" + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" ] ], "regions_bed": [ @@ -555,16 +575,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:56:38.422491251" + "timestamp": "2025-09-23T13:22:22.818082" }, "homo_sapiens - bam, bai, bed": { "content": [ @@ -594,7 +618,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ [ @@ -623,7 +651,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -641,7 +669,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "8": [ @@ -674,7 +702,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -701,7 +729,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "regions_txt": [ @@ -728,16 +756,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:55:43.01015749" + "timestamp": "2025-09-23T13:22:04.449943" }, "homo_sapiens - bam, bai, [] - window": { "content": [ @@ -767,7 +799,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ [ @@ -796,7 +832,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -814,7 +850,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,257d67678136963d9dd904330079609d" + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" ] ], "8": [ @@ -847,7 +883,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -874,7 +910,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,257d67678136963d9dd904330079609d" + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" ] ], "regions_txt": [ @@ -901,16 +937,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:56:27.10647246" + "timestamp": "2025-09-23T13:22:18.435089" }, "homo_sapiens - bam, bai, []": { "content": [ @@ -940,7 +980,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ @@ -963,7 +1007,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -1002,7 +1046,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -1038,16 +1082,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:55:30.449110281" + "timestamp": "2025-09-23T13:21:59.785829" }, "homo_sapiens - cram, crai, []": { "content": [ @@ -1077,7 +1125,11 @@ ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ @@ -1100,7 +1152,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -1139,7 +1191,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -1175,16 +1227,20 @@ "thresholds_csi": [ ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:55:55.244274402" + "timestamp": "2025-09-23T13:22:09.294766" }, "homo_sapiens - bam, bai, bed - thresholds": { "content": [ @@ -1222,11 +1278,15 @@ "id": "test", "single_end": true }, - "test.thresholds.bed.gz.csi:md5,912055ee9452229439df6fae95644196" + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" ] ], "12": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ], "2": [ [ @@ -1255,7 +1315,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "6": [ @@ -1273,7 +1333,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "8": [ @@ -1306,7 +1366,7 @@ "id": "test", "single_end": true }, - "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" ] ], "per_base_d4": [ @@ -1333,7 +1393,7 @@ "id": "test", "single_end": true }, - "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" ] ], "regions_txt": [ @@ -1369,18 +1429,22 @@ "id": "test", "single_end": true }, - "test.thresholds.bed.gz.csi:md5,912055ee9452229439df6fae95644196" + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" ] ], - "versions": [ - "versions.yml:md5,333368078626c18a32eeb12299080cc9" + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-17T14:56:49.888375978" + "timestamp": "2025-09-23T13:22:27.300204" } } \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/nextflow.config b/modules/nf-core/mosdepth/tests/nextflow.config new file mode 100644 index 00000000..b21c05b5 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MOSDEPTH" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/mosdepth/tests/quantized.config b/modules/nf-core/mosdepth/tests/quantized.config deleted file mode 100644 index 63c55350..00000000 --- a/modules/nf-core/mosdepth/tests/quantized.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--quantize 0:1:4:100:200" -} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/threshold.config b/modules/nf-core/mosdepth/tests/threshold.config deleted file mode 100644 index 9b014ddf..00000000 --- a/modules/nf-core/mosdepth/tests/threshold.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--thresholds 1,10,20,30" -} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/window.config b/modules/nf-core/mosdepth/tests/window.config deleted file mode 100644 index 7a0f755c..00000000 --- a/modules/nf-core/mosdepth/tests/window.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--by 100" -} \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index d02016a0..009874d4 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.32 + - bioconda::multiqc=1.33 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index c1158fb0..5376aea1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,24 +1,21 @@ process MULTIQC { + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c6c120d559d7ee04c7442b61ad7cf5a9e8970be5feefb37d68eeaa60c1034eb/data' : - 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' + : 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b'}" input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - path(replace_names) - path(sample_names) + tuple val(meta), path(multiqc_files, stageAs: "?/*"), path(multiqc_config, stageAs: "?/*"), path(multiqc_logo), path(replace_names), path(sample_names) output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions + tuple val(meta), path("*.html"), emit: report + tuple val(meta), path("*_data"), emit: data + tuple val(meta), path("*_plots"), emit: plots, optional: true + // MultiQC should not push its versions to the `versions` topic. Its input depends on the versions topic to be resolved thus outputting to the topic will let the pipeline hang forever + tuple val("${task.process}"), val('multiqc'), eval('multiqc --version | sed "s/.* //g"'), emit: versions when: task.ext.when == null || task.ext.when @@ -26,38 +23,28 @@ process MULTIQC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def config = multiqc_config ? multiqc_config instanceof List ? "--config ${multiqc_config.join(' --config ')}" : "--config ${multiqc_config}" : "" def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ - $args \\ - $config \\ - $prefix \\ - $extra_config \\ - $logo \\ - $replace \\ - $samples \\ + ${args} \\ + ${config} \\ + ${prefix} \\ + ${logo} \\ + ${replace} \\ + ${samples} \\ . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS """ stub: """ mkdir multiqc_data + touch multiqc_data/.stub mkdir multiqc_plots + touch multiqc_plots/.stub touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS """ } diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ce30eb73..ef434a9a 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,6 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into - a single report +description: Aggregate results from bioinformatics analyses across many samples + into a single report keywords: - QC - bioinformatics tools @@ -12,74 +12,91 @@ tools: It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] + licence: + - "GPL-3.0-or-later" identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - ontologies: [] - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - ontologies: [] - - replace_names: - type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - ontologies: - - edam: http://edamontology.org/format_3475 # TSV - - sample_names: - type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" - ontologies: - - edam: http://edamontology.org/format_3475 # TSV -output: - report: - - "*multiqc_report.html": + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - multiqc_files: type: file - description: MultiQC report file - pattern: "multiqc_report.html" + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC ontologies: [] - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - plots: - - "*_plots": + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 + - multiqc_logo: type: file - description: Plots created by MultiQC - pattern: "*_data" + description: Optional logo file for MultiQC + pattern: "*.{png}" ontologies: [] - versions: - - versions.yml: + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 + - sample_names: type: file - description: File containing software versions - pattern: "versions.yml" + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - edam: http://edamontology.org/format_3475 +output: + report: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.html": + type: file + description: MultiQC report file + pattern: ".html" + ontologies: [] + data: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_plots" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - multiqc: + type: string + description: The tool name + - multiqc --version | sed "s/.* //g": + type: eval + description: The expression to obtain the version of the tool authors: - "@abhi18av" - "@bunop" @@ -90,3 +107,27 @@ maintainers: - "@bunop" - "@drpatelh" - "@jfy133" +containers: + conda: + linux/amd64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock + linux/arm64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock + docker: + linux/amd64: + build_id: bd-ee7739d47738383b_1 + name: community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b + scanId: sc-6ddec592dcadd583_4 + linux/arm64: + build_id: bd-58d7dee710ab3aa8_1 + name: community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8 + scanId: sc-a04c42273e34c55c_2 + singularity: + linux/amd64: + build_id: bd-e3576ddf588fa00d_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d + linux/arm64: + build_id: bd-2537ca5f8445e3c2_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2 diff --git a/modules/nf-core/multiqc/tests/custom_prefix.config b/modules/nf-core/multiqc/tests/custom_prefix.config new file mode 100644 index 00000000..b30b1358 --- /dev/null +++ b/modules/nf-core/multiqc/tests/custom_prefix.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = "custom_prefix" + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index 33316a7d..0e422eaa 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -15,25 +15,58 @@ nextflow_process { when { process { """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_single") } + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } ) } + } + + test("sarscov2 single-end [fastqc] - custom prefix") { + config "./custom_prefix.config" + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) + """ + } + } + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } } test("sarscov2 single-end [fastqc] [config]") { @@ -41,22 +74,60 @@ nextflow_process { when { process { """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true), + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + file(process.out.plots[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } + } + + test("sarscov2 single-end [fastqc] [multiple configs]") { + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [ + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true), + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true) + ], + [], + [], + [] + ]) """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_config") } + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + file(process.out.plots[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } ) } } @@ -68,25 +139,23 @@ nextflow_process { when { process { """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out.report.collect { file(it).getName() } + - process.out.data.collect { file(it).getName() } + - process.out.plots.collect { file(it).getName() } + - process.out.versions ).match("multiqc_stub") } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } - } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index a88bafd6..c022701f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -1,41 +1,130 @@ { - "multiqc_versions_single": { + "sarscov2 single-end [fastqc] [multiple configs]": { "content": [ - [ - "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" - ] + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ], "meta": { "nf-test": "0.9.3", - "nextflow": "24.10.4" + "nextflow": "25.10.4" }, - "timestamp": "2025-10-27T13:33:24.356715" + "timestamp": "2026-02-26T20:21:35.851707" }, - "multiqc_stub": { + "sarscov2 single-end [fastqc]": { "content": [ - [ - "multiqc_report.html", - "multiqc_data", - "multiqc_plots", - "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" - ] + "multiqc_report.html", + "multiqc_data", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ], "meta": { - "nf-test": "0.9.3", - "nextflow": "24.10.4" + "nf-test": "0.9.4", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-27T13:34:11.103619" + "timestamp": "2026-02-26T15:10:36.019680076" }, - "multiqc_versions_config": { + "sarscov2 single-end [fastqc] - stub": { "content": [ - [ - "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" - ] + { + "data": [ + [ + { + "id": "FASTQC" + }, + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "plots": [ + [ + { + "id": "FASTQC" + }, + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "report": [ + [ + { + "id": "FASTQC" + }, + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ], "meta": { - "nf-test": "0.9.3", - "nextflow": "24.10.4" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:14:39.789193051" + }, + "sarscov2 single-end [fastqc] [config]": { + "content": [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:21:29.116129274" + }, + "sarscov2 single-end [fastqc] - custom prefix": { + "content": [ + "custom_prefix.html", + "custom_prefix_data", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-27T13:34:04.615233" + "timestamp": "2026-02-26T15:10:43.419877592" } } \ No newline at end of file diff --git a/modules/nf-core/nanoplot/environment.yml b/modules/nf-core/nanoplot/environment.yml new file mode 100644 index 00000000..c6b1c5bf --- /dev/null +++ b/modules/nf-core/nanoplot/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::nanoplot=1.46.1 diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf new file mode 100644 index 00000000..e89c3941 --- /dev/null +++ b/modules/nf-core/nanoplot/main.nf @@ -0,0 +1,55 @@ +process NANOPLOT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanoplot:1.46.1--pyhdfd78af_0' : + 'biocontainers/nanoplot:1.46.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(ontfile) + + output: + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.png") , optional: true, emit: png + tuple val(meta), path("*.txt") , emit: txt + tuple val("${task.process}"), val('nanoplot'), eval("NanoPlot --version 2>&1 | sed 's/^.*NanoPlot //; s/ .*\$//'"), topic: versions, emit: versions_nanoplot + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def input_file = ("$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" : + ("$ontfile".endsWith(".txt")) ? "--summary ${ontfile}" : ("$ontfile".endsWith(".arrow")) ? "--arrow ${ontfile}" : '' + """ + NanoPlot \\ + $args \\ + -t $task.cpus \\ + $input_file + + for nanoplot_file in *.html *.png *.txt *.log + do + if [[ -s \$nanoplot_file ]] + then + mv \$nanoplot_file ${prefix}_\$nanoplot_file + fi + done + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_LengthvsQualityScatterPlot_dot.html + touch ${prefix}_LengthvsQualityScatterPlot_kde.html + touch ${prefix}_NanoPlot-report.html + touch ${prefix}_NanoStats.txt + touch ${prefix}_Non_weightedHistogramReadlength.html + touch ${prefix}_Non_weightedLogTransformed_HistogramReadlength.html + touch ${prefix}_WeightedHistogramReadlength.html + touch ${prefix}_WeightedLogTransformed_HistogramReadlength.html + touch ${prefix}_Yield_By_Length.html + """ +} diff --git a/modules/nf-core/nanoplot/meta.yml b/modules/nf-core/nanoplot/meta.yml new file mode 100644 index 00000000..3df7b98c --- /dev/null +++ b/modules/nf-core/nanoplot/meta.yml @@ -0,0 +1,74 @@ +name: nanoplot +description: Run NanoPlot on nanopore-sequenced reads +keywords: + - quality control + - qc + - fastq + - sequencing summary + - nanopore +tools: + - nanoplot: + description: | + NanoPlot is a tool for plotting long-read sequencing data and + alignment. + homepage: http://nanoplot.bioinf.be + documentation: https://github.com/wdecoster/NanoPlot + licence: ["GPL-3.0-or-later"] + identifier: biotools:nanoplot +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ontfile: + type: file + description: ONT file + ontologies: [] +output: + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: NanoPlot report + pattern: "*{.html}" + ontologies: [] + png: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.png": + type: file + description: Plots generated by NanoPlot + pattern: "*{.png}" + ontologies: [] + txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Stats from NanoPlot + pattern: "*{.txt}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@yuukiiwa" +maintainers: + - "@drpatelh" + - "@yuukiiwa" diff --git a/modules/nf-core/nanoplot/tests/main.nf.test b/modules/nf-core/nanoplot/tests/main.nf.test new file mode 100644 index 00000000..f5163bab --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_process { + + name "Test Process NANOPLOT" + tag "modules_nfcore" + tag "modules" + tag "nanoplot" + script "../main.nf" + process "NANOPLOT" + + test("NanoPlot summary") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.html[0][1].size() > 0 }, + { assert process.out.txt[0][1] }, + { assert file(process.out.txt[0][1]).readLines().size() > 1 }, + { assert snapshot( + process.out.html[0][1].collect { p -> file(p).name }, + file(process.out.txt[0][1]).name, + // file(process.out.png[0][1]).name, optional + // file(process.out.log[0][1]).name, name contains date + process.out.versions + ).match() + } + ) + } + } + + test("NanoPlot FASTQ") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.html[0][1].size() > 0 }, + { assert process.out.txt[0][1] }, + { assert file(process.out.txt[0][1]).readLines().size() > 1 }, + { assert snapshot( + process.out.html[0][1].collect { p -> file(p).name }, + file(process.out.txt[0][1]).name, + // file(process.out.png[0][1]).name, optional + // file(process.out.log[0][1]).name, name contains date + process.out.versions + ).match() + } + ) + } + } + + test("NanoPlot - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/nanoplot/tests/main.nf.test.snap b/modules/nf-core/nanoplot/tests/main.nf.test.snap new file mode 100644 index 00000000..527d6605 --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test.snap @@ -0,0 +1,127 @@ +{ + "NanoPlot - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,1dcb7bfda282d447bdf7fff8f83e0076" + ], + "html": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "png": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1dcb7bfda282d447bdf7fff8f83e0076" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-29T11:06:10.291228562" + }, + "NanoPlot FASTQ": { + "content": [ + [ + "LengthvsQualityScatterPlot_dot.html", + "LengthvsQualityScatterPlot_kde.html", + "NanoPlot-report.html", + "Non_weightedHistogramReadlength.html", + "Non_weightedLogTransformed_HistogramReadlength.html", + "WeightedHistogramReadlength.html", + "WeightedLogTransformed_HistogramReadlength.html", + "Yield_By_Length.html" + ], + "NanoStats.txt", + [ + "versions.yml:md5,1dcb7bfda282d447bdf7fff8f83e0076" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-28T16:57:47.664858879" + }, + "NanoPlot summary": { + "content": [ + [ + "ActivePores_Over_Time.html", + "ActivityMap_ReadsPerChannel.html", + "CumulativeYieldPlot_Gigabases.html", + "CumulativeYieldPlot_NumberOfReads.html", + "NanoPlot-report.html", + "Non_weightedHistogramReadlength.html", + "Non_weightedLogTransformed_HistogramReadlength.html", + "NumberOfReads_Over_Time.html", + "TimeLengthViolinPlot.html", + "TimeQualityViolinPlot.html", + "TimeSequencingSpeed_ViolinPlot.html", + "WeightedHistogramReadlength.html", + "WeightedLogTransformed_HistogramReadlength.html", + "Yield_By_Length.html" + ], + "NanoStats.txt", + [ + "versions.yml:md5,1dcb7bfda282d447bdf7fff8f83e0076" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-28T16:57:35.686031695" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoplot/tests/tags.yaml b/modules/nf-core/nanoplot/tests/tags.yaml new file mode 100644 index 00000000..7c6ce3fa --- /dev/null +++ b/modules/nf-core/nanoplot/tests/tags.yaml @@ -0,0 +1,2 @@ +nanoplot: + - modules/nf-core/nanoplot/** diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf index db712811..51162836 100644 --- a/modules/nf-core/pigz/uncompress/main.nf +++ b/modules/nf-core/pigz/uncompress/main.nf @@ -36,7 +36,6 @@ process PIGZ_UNCOMPRESS { """ stub: - def args = task.ext.args ?: '' uncompressed_filename = zip.toString() - '.gz' """ touch $uncompressed_filename diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml index cddd0a3b..bbc6a945 100644 --- a/modules/nf-core/pigz/uncompress/meta.yml +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -10,7 +10,7 @@ tools: description: "Parallel implementation of the gzip algorithm." homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" - + licence: ["Zlib"] identifier: "" input: - - meta: diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test index 9c3289bc..09eabe30 100644 --- a/modules/nf-core/pigz/uncompress/tests/main.nf.test +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -29,11 +29,11 @@ nextflow_process { } } - + test("Should run without failures - stub") { - + options "-stub" - + when { params { outdir = "$outputDir" @@ -55,4 +55,4 @@ nextflow_process { } } -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/cat/environment.yml +++ b/modules/nf-core/samtools/cat/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf index 0490b81d..6df7aa76 100644 --- a/modules/nf-core/samtools/cat/main.nf +++ b/modules/nf-core/samtools/cat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_CAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -13,7 +13,7 @@ process SAMTOOLS_CAT { output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: @@ -29,11 +29,6 @@ process SAMTOOLS_CAT { $args \\ -o ${prefix}.${file_type} \\ $input_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -41,10 +36,5 @@ process SAMTOOLS_CAT { def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() """ touch ${prefix}.${file_type} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/cat/meta.yml b/modules/nf-core/samtools/cat/meta.yml index a2ac0e21..3c4e2fbb 100644 --- a/modules/nf-core/samtools/cat/meta.yml +++ b/modules/nf-core/samtools/cat/meta.yml @@ -51,13 +51,27 @@ output: description: Concatenated CRAM file pattern: "*.{cram}" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@matthdsm" maintainers: diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test b/modules/nf-core/samtools/cat/tests/main.nf.test index dad80b83..7910f65d 100644 --- a/modules/nf-core/samtools/cat/tests/main.nf.test +++ b/modules/nf-core/samtools/cat/tests/main.nf.test @@ -14,22 +14,18 @@ nextflow_process { when { process { """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map + input[0] = [ + [id: 'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.unaligned.bam', checkIfExists: true) ] - ]) + ] """ } } then { - assertAll( - { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, - { assert snapshot(process.out.cram).match("bams_cram") }, - { assert snapshot(process.out.versions).match("bams_versions") } - ) + assert process.success + assert snapshot(process.out).match() } } @@ -40,22 +36,18 @@ nextflow_process { when { process { """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map + input[0] = [ + [id: 'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.unaligned.bam', checkIfExists: true) ] - ]) + ] """ } } then { - assertAll( - { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, - { assert snapshot(process.out.cram).match("bams_stub_cram") }, - { assert snapshot(process.out.versions).match("bams_stub_versions") } - ) + assert process.success + assert snapshot(process.out).match() } } } diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap index 9af1b19f..1b341c7b 100644 --- a/modules/nf-core/samtools/cat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap @@ -1,70 +1,96 @@ { - "bams_stub_cram": { + "bams_stub": { "content": [ - [ - - ] + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "SAMTOOLS_CAT", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_CAT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-02T16:45:42.587418" + "timestamp": "2026-02-04T14:52:02.961050797" }, - "bams_stub_versions": { + "bams": { "content": [ - [ - "versions.yml:md5,cd29ae344fb0bf5635527e1cb7a7d95f" - ] + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,ceee7822994e89e29d42cad03be8d54b" + ] + ], + "1": [ + + ], + "2": [ + [ + "SAMTOOLS_CAT", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,ceee7822994e89e29d42cad03be8d54b" + ] + ], + "cram": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_CAT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T07:47:51.511914861" - }, - "bams_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-02T16:45:37.965199" - }, - "bams_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-02T16:45:37.96805" - }, - "bams_stub_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-02T16:45:42.583881" - }, - "bams_versions": { - "content": [ - [ - "versions.yml:md5,cd29ae344fb0bf5635527e1cb7a7d95f" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T08:47:50.783194958" + "timestamp": "2026-02-04T14:51:56.905803472" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 6de0095d..97bfb578 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -4,12 +4,11 @@ process SAMTOOLS_FAIDX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta), path(fasta), path(fai) val get_sizes output: @@ -17,7 +16,7 @@ process SAMTOOLS_FAIDX { tuple val(meta), path ("*.sizes") , emit: sizes, optional: true tuple val(meta), path ("*.fai") , emit: fai, optional: true tuple val(meta), path ("*.gzi") , emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when @@ -32,11 +31,6 @@ process SAMTOOLS_FAIDX { $args ${get_sizes_command} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -51,11 +45,5 @@ process SAMTOOLS_FAIDX { fi ${get_sizes_command} - - cat <<-END_VERSIONS > versions.yml - - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index b7a2e0c1..80aae1da 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -1,5 +1,6 @@ name: samtools_faidx -description: Index FASTA file, and optionally generate a file of chromosome sizes +description: Index FASTA file, and optionally generate a file of chromosome + sizes keywords: - index - fasta @@ -14,7 +15,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -27,11 +29,6 @@ input: description: FASTA file pattern: "*.{fa,fasta}" ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - fai: type: file description: FASTA index file @@ -40,7 +37,6 @@ input: - get_sizes: type: boolean description: use cut to get the sizes of the index (true) or not (false) - output: fa: - - meta: @@ -86,13 +82,27 @@ output: description: Optional gzip index file for compressed inputs pattern: "*.gzi" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test index 64219b7d..9a86db86 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -8,24 +8,30 @@ nextflow_process { tag "modules_nfcore" tag "samtools" tag "samtools/faidx" + config "./nextflow.config" test("test_samtools_faidx") { when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -33,89 +39,105 @@ nextflow_process { test("test_samtools_faidx_bgzip") { when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_fasta") { - config "./nextflow.config" - when { + params { + module_args = 'MT192765.1 -o extract.fa' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_stub_fasta") { - config "./nextflow2.config" - + options "-stub" when { + params { + module_args = '-o extract.fa' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_stub_fai") { + options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -123,22 +145,25 @@ nextflow_process { test("test_samtools_faidx_get_sizes") { when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -146,22 +171,25 @@ nextflow_process { test("test_samtools_faidx_get_sizes_bgzip") { when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -171,22 +199,25 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -196,24 +227,27 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 73722414..41697444 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -2,35 +2,13 @@ "test_samtools_faidx": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -41,50 +19,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:31:48.258623157" + "timestamp": "2026-02-10T15:39:12.541649151" }, "test_samtools_faidx_get_sizes_bgzip - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" - ], "fa": [ ], @@ -112,45 +64,24 @@ "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:41.122428188" + "timestamp": "2026-02-10T15:41:44.040426987" }, "test_samtools_faidx_get_sizes": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], @@ -173,55 +104,31 @@ "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:34:02.353546697" + "timestamp": "2026-02-10T15:47:03.653912015" }, "test_samtools_faidx_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -229,8 +136,7 @@ "gzi": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -238,46 +144,28 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:31:55.157487176" + "timestamp": "2026-02-10T15:50:04.023566795" }, "test_samtools_faidx_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" ] @@ -291,45 +179,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:02.149455586" + "timestamp": "2026-02-10T15:39:23.529404162" }, "test_samtools_faidx_get_sizes - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" - ], "fa": [ ], @@ -352,48 +219,30 @@ "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:34.29376776" + "timestamp": "2026-02-10T15:41:39.039834304" }, "test_samtools_faidx_stub_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "fai": [ @@ -405,51 +254,33 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:09.125065185" + "timestamp": "2026-02-10T15:39:28.961701609" }, "test_samtools_faidx_stub_fai": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "gzi": [ @@ -458,50 +289,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:16.274287863" + "timestamp": "2026-02-10T15:39:34.471028474" }, "test_samtools_faidx_get_sizes_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], @@ -529,15 +334,19 @@ "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:28.117654855" + "timestamp": "2026-02-10T15:39:45.439016495" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config index f76a3ba0..202c036e 100644 --- a/modules/nf-core/samtools/faidx/tests/nextflow.config +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -1,7 +1,7 @@ process { withName: SAMTOOLS_FAIDX { - ext.args = 'MT192765.1 -o extract.fa' + ext.args = params.module_args } } diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config deleted file mode 100644 index 33ebbd5d..00000000 --- a/modules/nf-core/samtools/faidx/tests/nextflow2.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - - withName: SAMTOOLS_FAIDX { - ext.args = '-o extract.fa' - } -} diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index c23f3a5c..0cfb7e87 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,15 +4,15 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -25,21 +25,23 @@ process SAMTOOLS_FLAGSTAT { --threads ${task.cpus} \\ $bam \\ > ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS + cat <<-END_FLAGSTAT > ${prefix}.flagstat + 1000000 + 0 in total (QC-passed reads + QC-failed reads) + 0 + 0 secondary + 0 + 0 supplementary + 0 + 0 duplicates + 900000 + 0 mapped (90.00% : N/A) + 1000000 + 0 paired in sequencing + 500000 + 0 read1 + 500000 + 0 read2 + 800000 + 0 properly paired (80.00% : N/A) + 850000 + 0 with mate mapped to a different chr + 50000 + 0 with mate mapped to a different chr (mapQ>=5) + END_FLAGSTAT """ } diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index ebbc15f2..8caa1bcc 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -1,6 +1,6 @@ name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG - type +description: Counts the number of alignments in a BAM/CRAM/SAM file for each + FLAG type keywords: - stats - mapping @@ -17,7 +17,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -47,13 +48,27 @@ output: description: File containing samtools flagstat output pattern: "*.{flagstat}" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap index 04c3852b..f5c882da 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -8,11 +8,15 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "1": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ], "flagstat": [ [ @@ -20,19 +24,23 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], - "versions": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:02:58.866491759" + "timestamp": "2026-02-03T11:14:30.820969684" }, "BAM": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ], "flagstat": [ [ @@ -58,15 +70,19 @@ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" ] ], - "versions": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:02:47.383332837" + "timestamp": "2026-02-03T11:14:25.581619424" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/idxstats/environment.yml +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index 3d5bf4bd..d5b70a7f 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -4,15 +4,15 @@ process SAMTOOLS_IDXSTATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.idxstats"), emit: idxstats - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -27,11 +27,6 @@ process SAMTOOLS_IDXSTATS { --threads ${task.cpus-1} \\ $bam \\ > ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -39,10 +34,5 @@ process SAMTOOLS_IDXSTATS { """ touch ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index 96d42746..fd153841 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -17,7 +17,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -47,13 +48,27 @@ output: description: File containing samtools idxstats output pattern: "*.{idxstats}" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test index 5fd1fc78..c990cd55 100644 --- a/modules/nf-core/samtools/idxstats/tests/main.nf.test +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -25,7 +25,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -47,7 +50,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap index 2cc89a3b..19a54c7c 100644 --- a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -1,72 +1,56 @@ { "bam - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" - ], - "idxstats": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:11:56.466856235" + "timestamp": "2026-02-02T16:21:46.333090477" }, "bam": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "1": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" - ], - "idxstats": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:11:46.311550359" + "timestamp": "2026-02-02T16:21:41.063422521" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 7019a72e..e2a0e56d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input) @@ -14,7 +14,7 @@ process SAMTOOLS_INDEX { tuple val(meta), path("*.bai") , optional:true, emit: bai tuple val(meta), path("*.csi") , optional:true, emit: csi tuple val(meta), path("*.crai"), optional:true, emit: crai - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -27,11 +27,6 @@ process SAMTOOLS_INDEX { -@ ${task.cpus} \\ $args \\ $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -40,10 +35,5 @@ process SAMTOOLS_INDEX { "crai" : args.contains("-c") ? "csi" : "bai" """ touch ${input}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 1bed6bca..c6d4ce25 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -14,7 +14,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -60,13 +61,27 @@ output: description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index ca34fb5c..c96cec86 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -23,7 +23,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -43,7 +46,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -67,7 +73,7 @@ nextflow_process { { assert process.success }, { assert snapshot( file(process.out.csi[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -89,7 +95,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -110,7 +119,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -133,7 +145,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.csi, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 72d65e81..afc8a1ff 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,250 +1,156 @@ { "csi - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - - ], - "csi": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:25.261127166" + "timestamp": "2026-01-28T17:52:10.030187" }, "crai - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - - ], - "1": [ - - ], - "2": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:12.653194876" + "timestamp": "2026-01-28T17:51:59.125484" }, "bai - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "crai": [ - - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:01.854932651" + "timestamp": "2026-01-28T17:51:47.277042" }, "csi": { "content": [ "test.paired_end.sorted.bam.csi", - [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:51.485364222" + "timestamp": "2026-01-28T17:51:35.758735" }, "crai": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], { - "0": [ - - ], - "1": [ - - ], - "2": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:40.518873972" + "timestamp": "2026-01-28T17:51:26.561965" }, "bai": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "crai": [ - - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:21.184050361" + "timestamp": "2026-01-28T17:51:15.299035" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/stats/environment.yml +++ b/modules/nf-core/samtools/stats/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index eac01eeb..40dbf661 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_STATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input), path(input_index) @@ -13,36 +13,28 @@ process SAMTOOLS_STATS { output: tuple val(meta), path("*.stats"), emit: stats - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ + ${args} \\ --threads ${task.cpus} \\ ${reference} \\ ${input} \\ > ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 6dc51885..5c59cce4 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -55,13 +55,29 @@ output: description: File containing samtools stats output pattern: "*.{stats}" ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + authors: - "@drpatelh" - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/stats/samtools-stats.diff b/modules/nf-core/samtools/stats/samtools-stats.diff new file mode 100644 index 00000000..f6a5b76c --- /dev/null +++ b/modules/nf-core/samtools/stats/samtools-stats.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/samtools/stats' +'modules/nf-core/samtools/stats/meta.yml' is unchanged +Changes in 'samtools/stats/main.nf': +--- modules/nf-core/samtools/stats/main.nf ++++ modules/nf-core/samtools/stats/main.nf +@@ -1,6 +1,6 @@ + process SAMTOOLS_STATS { + tag "$meta.id" +- label 'process_single' ++ label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + +'modules/nf-core/samtools/stats/environment.yml' is unchanged +'modules/nf-core/samtools/stats/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/stats/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap index df507be7..94d981b2 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -8,11 +8,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -20,19 +24,23 @@ "id": "test", "single_end": false }, - "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:16.767396182" + "timestamp": "2025-11-01T02:27:18.460724" }, "bam - stub": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -58,16 +70,20 @@ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:29.721580274" + "timestamp": "2025-11-01T02:27:30.245839" }, "cram - stub": { "content": [ @@ -82,7 +98,11 @@ ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -93,16 +113,20 @@ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:53.567964304" + "timestamp": "2025-11-01T02:27:39.041649" }, "bam": { "content": [ @@ -113,11 +137,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,d53a2584376d78942839e9933a34d11b" + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -125,18 +153,22 @@ "id": "test", "single_end": false }, - "test.stats:md5,d53a2584376d78942839e9933a34d11b" + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:28:50.73610604" + "timestamp": "2025-11-01T02:26:55.988241" } } \ No newline at end of file diff --git a/modules/nf-core/severus/environment.yml b/modules/nf-core/severus/environment.yml index 6081c124..d657c525 100644 --- a/modules/nf-core/severus/environment.yml +++ b/modules/nf-core/severus/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::severus=1.3 + - bioconda::severus=1.6 diff --git a/modules/nf-core/severus/main.nf b/modules/nf-core/severus/main.nf index 7df7c4e5..95fe7912 100644 --- a/modules/nf-core/severus/main.nf +++ b/modules/nf-core/severus/main.nf @@ -12,24 +12,22 @@ process SEVERUS { tuple val(meta2), path(bed), path(pon_path) output: - tuple val(meta), path("severus.log") , emit: log - tuple val(meta), path("read_qual.txt") , emit: read_qual - tuple val(meta), path("breakpoints_double.csv") , emit: breakpoints_double - tuple val(meta), path("read_alignments") , emit: read_alignments , optional: true - tuple val(meta), path("read_ids.csv") , emit: read_ids , optional: true - tuple val(meta), path("severus_collaped_dup.bed") , emit: collapsed_dup , optional: true - tuple val(meta), path("severus_LOH.bed") , emit: loh , optional: true - tuple val(meta), path("all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true - tuple val(meta), path("all_SVs/severus_all.vcf.gz.tbi") , emit: all_tbi , optional: true - tuple val(meta), path("all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true - tuple val(meta), path("all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true - tuple val(meta), path("all_SVs/plots/severus_*.html") , emit: all_plots , optional: true - tuple val(meta), path("somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf //, optional: true - tuple val(meta), path("somatic_SVs/severus_somatic.vcf.gz.tbi") , emit: somatic_tbi , optional: true - tuple val(meta), path("somatic_SVs/breakpoint_clusters_list.tsv") , emit: somatic_breakpoints_clusters_list, optional: true - tuple val(meta), path("somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true - tuple val(meta), path("somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}/severus.log") , emit: log + tuple val(meta), path("${prefix}/read_qual.txt") , emit: read_qual + tuple val(meta), path("${prefix}/breakpoints_double.csv") , emit: breakpoints_double + tuple val(meta), path("${prefix}/read_alignments") , emit: read_alignments , optional: true + tuple val(meta), path("${prefix}/read_ids.csv") , emit: read_ids , optional: true + tuple val(meta), path("${prefix}/severus_collaped_dup.bed") , emit: collapsed_dup , optional: true + tuple val(meta), path("${prefix}/severus_LOH.bed") , emit: loh , optional: true + tuple val(meta), path("${prefix}/all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/all_SVs/plots/severus_*.html") , emit: all_plots , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -42,6 +40,7 @@ process SEVERUS { def vntr_bed = bed ? "--vntr-bed ${bed}" : "" def phasing_vcf = vcf ? "--phasing-vcf ${vcf}" : "" def pon = pon_path && (!control_input) ? "--PON ${pon_path}" : "" + """ severus \\ $args \\ @@ -51,13 +50,12 @@ process SEVERUS { $pon \\ $control \\ $phasing_vcf \\ - --out-dir . - - bgzip somatic_SVs/severus_somatic.vcf - tabix -p vcf somatic_SVs/severus_somatic.vcf.gz - bgzip all_SVs/severus_all.vcf - tabix -p vcf all_SVs/severus_all.vcf.gz + --out-dir ${prefix} + bgzip ${prefix}/somatic_SVs/severus_somatic.vcf + tabix -p vcf ${prefix}/somatic_SVs/severus_somatic.vcf.gz + bgzip ${prefix}/all_SVs/severus_all.vcf + tabix -p vcf ${prefix}/all_SVs/severus_all.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -66,32 +64,29 @@ process SEVERUS { """ stub: - def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir -p all_SVs/plots - mkdir -p somatic_SVs/plots + mkdir -p ${prefix}/all_SVs/plots + mkdir -p ${prefix}/somatic_SVs/plots - touch severus_collaped_dup.bed - touch severus.log - touch severus_LOH.bed - touch read_alignments - touch read_ids.csv - touch read_qual.txt - touch breakpoints_double.csv - touch all_SVs/severus_all.vcf.gz - touch all_SVs/severus_all.vcf.gz.tbi - touch all_SVs/breakpoints_clusters_list.tsv - touch all_SVs/breakpoints_clusters.tsv - touch all_SVs/plots/severus_0.html - touch all_SVs/plots/severus_1.html - touch somatic_SVs/severus_somatic.vcf.gz - touch somatic_SVs/severus_somatic.vcf.gz.tbi - touch somatic_SVs/breakpoints_clusters_list.tsv - touch somatic_SVs/breakpoints_clusters.tsv - touch somatic_SVs/plots/severus_0.html - touch somatic_SVs/plots/severus_1.html + touch ${prefix}/severus_collaped_dup.bed + touch ${prefix}/severus.log + touch ${prefix}/severus_LOH.bed + touch ${prefix}/read_alignments + touch ${prefix}/read_ids.csv + touch ${prefix}/read_qual.txt + touch ${prefix}/breakpoints_double.csv + touch ${prefix}/all_SVs/severus_all.vcf + touch ${prefix}/all_SVs/breakpoints_clusters_list.tsv + touch ${prefix}/all_SVs/breakpoints_clusters.tsv + touch ${prefix}/all_SVs/plots/severus_0.html + touch ${prefix}/all_SVs/plots/severus_1.html + touch ${prefix}/somatic_SVs/severus_somatic.vcf + touch ${prefix}/somatic_SVs/breakpoints_clusters_list.tsv + touch ${prefix}/somatic_SVs/breakpoints_clusters.tsv + touch ${prefix}/somatic_SVs/plots/severus_0.html + touch ${prefix}/somatic_SVs/plots/severus_1.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/severus/meta.yml b/modules/nf-core/severus/meta.yml index 8ada3548..b39df3fd 100644 --- a/modules/nf-core/severus/meta.yml +++ b/modules/nf-core/severus/meta.yml @@ -29,23 +29,28 @@ input: description: path to one or multiple target BAM/CRAM files (e.g. tumor, must be indexed) pattern: "*.{bam,cram}" + ontologies: [] - target_index: type: file description: path to one or multiple target BAM/CRAM index files pattern: "*.{bai,crai,csi}" + ontologies: [] - control_input: type: file description: path to the control BAM/CRAM file (e.g. normal, must be indexed) pattern: "*.{bam,cram}" + ontologies: [] - control_index: type: file description: path to the control BAM/CRAM file index pattern: "*.{bai,crai,csi}" + ontologies: [] - vcf: type: file description: path to vcf file used for phasing (if using haplotype specific SV calling pattern: "*.{vcf,vcf.gz}" + ontologies: [] - - meta2: type: map description: | @@ -55,162 +60,192 @@ input: type: file description: path to bed file for tandem repeat regions (must be ordered) pattern: "*.bed" + ontologies: [] output: - - log: - - meta: + log: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/severus.log: type: file - description: Severus log file + description: | + log file pattern: "${prefix}/severus.log" - - read_qual: - - meta: + ontologies: [] + read_qual: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/read_qual.txt: type: file - description: Severus read_qual file + description: | + txt file containing read quality information pattern: "${prefix}/read_qual.txt" - - breakpoints_double: - - meta: + ontologies: [] + breakpoints_double: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/breakpoints_double.csv: type: file - description: Severus breakpoints_double file + description: | + Detailed info about the detected breakpoints for all samples in text format, intended for an advanced user. pattern: "${prefix}/breakpoints_double.csv" - - read_alignments: - - meta: + ontologies: [] + read_alignments: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/read_alignments: type: file - description: read alignments file + description: | pattern: "${prefix}/read_alignments" - - read_ids: - - meta: + ontologies: [] + read_ids: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/read_ids.csv: type: file - description: read IDs for support reads + description: | + Contains supporting read IDs for each SV pattern: "${prefix}/read_ids" - - collapsed_dup: - - meta: + ontologies: [] + collapsed_dup: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/severus_collaped_dup.bed: type: file - description: a bed file with identified collapsed duplication regions + description: | pattern: "${prefix}/severus_collaped_dup" - - loh: - - meta: + ontologies: [] + loh: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/severus_LOH.bed: type: file - description: a bed file with predicted LOH regions + description: | + BED file containing loss of heterozygosity information pattern: "${prefix}/severus_LOH.bed" - - all_vcf: - - meta: + ontologies: [] + all_vcf: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/all_SVs/severus_all.vcf: - type: file - description: VCF file with all SVs (somatic + germline) + type: map + description: | + VCF file containing somatic and germline structural variants pattern: "${prefix}/all_SVs/severus_all.vcf" - - all_breakpoints_clusters_list: - - meta: + all_breakpoints_clusters_list: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/all_SVs/breakpoints_clusters_list.tsv: type: file - description: Severus breakpoints clusters list file + description: | + a TSV containing a list of all breakpoint clusters pattern: "${prefix}/all_SVs/breakpoints_clusters_list.tsv" - - all_breakpoints_clusters: - - meta: + ontologies: [] + all_breakpoints_clusters: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/all_SVs/breakpoints_clusters.tsv: type: file - description: Severus breakpoints clusters file + description: | + TSV file listing meta information in breakpoint clusters pattern: "${prefix}/all_SVs/breakpoints_clusters.tsv" - - all_plots: - - meta: + ontologies: [] + all_plots: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/all_SVs/plots/severus_*.html: type: file - description: Severus plots + description: | + Plotly graph containing of somatic and germline breakpoint clusters pattern: "${prefix}/all_SVs/plots/*.html" - - somatic_vcf: - - meta: + ontologies: [] + somatic_vcf: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ${prefix}/somatic_SVs/severus_all.vcf: + pattern: "${prefix}/severus.log" + - ${prefix}/somatic_SVs/severus_somatic.vcf: type: file - description: VCF file with somatic SVs + description: | + VCF file containing somatic structural variants (SV) pattern: "${prefix}/somatic_SVs/severus_all.vcf" - - somatic_breakpoints_clusters_list: - - meta: + ontologies: [] + somatic_breakpoints_clusters_list: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/somatic_SVs/breakpoints_clusters_list.tsv: type: file - description: Severus somatic breakpoints clusters list file + description: | + TSV file containing full list of somatic breakpoint clusters pattern: "${prefix}/somatic_SVs/breakpoints_clusters_list.tsv" - - somatic_breakpoints_clusters: - - meta: + ontologies: [] + somatic_breakpoints_clusters: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/somatic_SVs/breakpoints_clusters.tsv: type: file - description: Severus somatic breakpoints clusters file + description: | + TSV file containing meta information of somatic breakpoint clusters pattern: "${prefix}/somatic_SVs/breakpoints_clusters.tsv" - - somatic_plots: - - meta: + ontologies: [] + somatic_plots: + - - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + pattern: "${prefix}/severus.log" - ${prefix}/somatic_SVs/plots/severus_*.html: type: file - description: Severus somatic plots + description: | + Plotly graph of somatic breakpoint clusters pattern: "${prefix}/somatic_SVs/plots/*.html" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/severus/severus.diff b/modules/nf-core/severus/severus.diff index d5e967c7..bf4cdb0a 100644 --- a/modules/nf-core/severus/severus.diff +++ b/modules/nf-core/severus/severus.diff @@ -3,14 +3,8 @@ Changes in component 'nf-core/severus' Changes in 'severus/main.nf': --- modules/nf-core/severus/main.nf +++ modules/nf-core/severus/main.nf -@@ -4,30 +4,32 @@ - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/severus:1.3--pyhdfd78af_0': -- 'biocontainers/severus:1.3--pyhdfd78af_0' }" -+ 'https://depot.galaxyproject.org/singularity/severus:1.6--pyhdfd78af_0': -+ 'biocontainers/severus:1.6--pyhdfd78af_0' }" +@@ -8,8 +8,8 @@ + 'biocontainers/severus:1.6--pyhdfd78af_0' }" input: - tuple val(meta), path(target_input), path(target_index), path(control_input), path(control_index), path(vcf) @@ -19,48 +13,33 @@ Changes in 'severus/main.nf': + tuple val(meta2), path(bed), path(pon_path) output: -- tuple val(meta), path("${prefix}/severus.log") , emit: log -- tuple val(meta), path("${prefix}/read_qual.txt") , emit: read_qual -- tuple val(meta), path("${prefix}/breakpoints_double.csv") , emit: breakpoints_double -- tuple val(meta), path("${prefix}/read_alignments") , emit: read_alignments , optional: true -- tuple val(meta), path("${prefix}/read_ids.csv") , emit: read_ids , optional: true -- tuple val(meta), path("${prefix}/severus_collaped_dup.bed") , emit: collapsed_dup , optional: true -- tuple val(meta), path("${prefix}/severus_LOH.bed") , emit: loh , optional: true + tuple val(meta), path("${prefix}/severus.log") , emit: log +@@ -19,13 +19,13 @@ + tuple val(meta), path("${prefix}/read_ids.csv") , emit: read_ids , optional: true + tuple val(meta), path("${prefix}/severus_collaped_dup.bed") , emit: collapsed_dup , optional: true + tuple val(meta), path("${prefix}/severus_LOH.bed") , emit: loh , optional: true - tuple val(meta), path("${prefix}/all_SVs/severus_all.vcf") , emit: all_vcf , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters.tsv") , emit: all_breakpoints_clusters , optional: true -- tuple val(meta), path("${prefix}/all_SVs/plots/severus_*.html") , emit: all_plots , optional: true -- tuple val(meta), path("${prefix}/somatic_SVs/severus_all.vcf") , emit: somatic_vcf , optional: true ++ tuple val(meta), path("${prefix}/all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true ++ tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true ++ tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/all_SVs/plots/severus_*.html") , emit: all_plots , optional: true +- tuple val(meta), path("${prefix}/somatic_SVs/severus_somatic.vcf") , emit: somatic_vcf , optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true -- tuple val(meta), path("${prefix}/somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true -- path "versions.yml" , emit: versions -+ tuple val(meta), path("severus.log") , emit: log -+ tuple val(meta), path("read_qual.txt") , emit: read_qual -+ tuple val(meta), path("breakpoints_double.csv") , emit: breakpoints_double -+ tuple val(meta), path("read_alignments") , emit: read_alignments , optional: true -+ tuple val(meta), path("read_ids.csv") , emit: read_ids , optional: true -+ tuple val(meta), path("severus_collaped_dup.bed") , emit: collapsed_dup , optional: true -+ tuple val(meta), path("severus_LOH.bed") , emit: loh , optional: true -+ tuple val(meta), path("all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true -+ tuple val(meta), path("all_SVs/severus_all.vcf.gz.tbi") , emit: all_tbi , optional: true -+ tuple val(meta), path("all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true -+ tuple val(meta), path("all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true -+ tuple val(meta), path("all_SVs/plots/severus_*.html") , emit: all_plots , optional: true -+ tuple val(meta), path("somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf //, optional: true -+ tuple val(meta), path("somatic_SVs/severus_somatic.vcf.gz.tbi") , emit: somatic_tbi , optional: true -+ tuple val(meta), path("somatic_SVs/breakpoint_clusters_list.tsv") , emit: somatic_breakpoints_clusters_list, optional: true -+ tuple val(meta), path("somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true -+ tuple val(meta), path("somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true -+ path "versions.yml" , emit: versions ++ tuple val(meta), path("${prefix}/somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf , optional: true ++ tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true ++ tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true + path "versions.yml" , emit: versions - when: - task.ext.when == null || task.ext.when -@@ -39,15 +41,23 @@ +@@ -39,15 +39,23 @@ def control = control_input ? "--control-bam ${control_input}" : "" def vntr_bed = bed ? "--vntr-bed ${bed}" : "" def phasing_vcf = vcf ? "--phasing-vcf ${vcf}" : "" + def pon = pon_path && (!control_input) ? "--PON ${pon_path}" : "" ++ """ severus \\ $args \\ @@ -70,68 +49,17 @@ Changes in 'severus/main.nf': + $pon \\ $control \\ $phasing_vcf \\ -- --out-dir ${prefix} -+ --out-dir . -+ -+ bgzip somatic_SVs/severus_somatic.vcf -+ tabix -p vcf somatic_SVs/severus_somatic.vcf.gz -+ bgzip all_SVs/severus_all.vcf -+ tabix -p vcf all_SVs/severus_all.vcf.gz + --out-dir ${prefix} + - - cat <<-END_VERSIONS > versions.yml - "${task.process}": -@@ -60,26 +70,28 @@ - prefix = task.ext.prefix ?: "${meta.id}" - - """ -- mkdir -p ${prefix}/all_SVs/plots -- mkdir -p ${prefix}/somatic_SVs/plots -+ mkdir -p all_SVs/plots -+ mkdir -p somatic_SVs/plots - -- touch ${prefix}/severus_collaped_dup.bed -- touch ${prefix}/severus.log -- touch ${prefix}/severus_LOH.bed -- touch ${prefix}/read_alignments -- touch ${prefix}/read_ids.csv -- touch ${prefix}/read_qual.txt -- touch ${prefix}/breakpoints_double.csv -- touch ${prefix}/all_SVs/severus_all.vcf -- touch ${prefix}/all_SVs/breakpoints_clusters_list.tsv -- touch ${prefix}/all_SVs/breakpoints_clusters.tsv -- touch ${prefix}/all_SVs/plots/severus_0.html -- touch ${prefix}/all_SVs/plots/severus_1.html -- touch ${prefix}/somatic_SVs/severus_somatic.vcf -- touch ${prefix}/somatic_SVs/breakpoints_clusters_list.tsv -- touch ${prefix}/somatic_SVs/breakpoints_clusters.tsv -- touch ${prefix}/somatic_SVs/plots/severus_0.html -- touch ${prefix}/somatic_SVs/plots/severus_1.html -+ touch severus_collaped_dup.bed -+ touch severus.log -+ touch severus_LOH.bed -+ touch read_alignments -+ touch read_ids.csv -+ touch read_qual.txt -+ touch breakpoints_double.csv -+ touch all_SVs/severus_all.vcf.gz -+ touch all_SVs/severus_all.vcf.gz.tbi -+ touch all_SVs/breakpoints_clusters_list.tsv -+ touch all_SVs/breakpoints_clusters.tsv -+ touch all_SVs/plots/severus_0.html -+ touch all_SVs/plots/severus_1.html -+ touch somatic_SVs/severus_somatic.vcf.gz -+ touch somatic_SVs/severus_somatic.vcf.gz.tbi -+ touch somatic_SVs/breakpoints_clusters_list.tsv -+ touch somatic_SVs/breakpoints_clusters.tsv -+ touch somatic_SVs/plots/severus_0.html -+ touch somatic_SVs/plots/severus_1.html ++ bgzip ${prefix}/somatic_SVs/severus_somatic.vcf ++ tabix -p vcf ${prefix}/somatic_SVs/severus_somatic.vcf.gz ++ bgzip ${prefix}/all_SVs/severus_all.vcf ++ tabix -p vcf ${prefix}/all_SVs/severus_all.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": 'modules/nf-core/severus/environment.yml' is unchanged -'modules/nf-core/severus/tests/tags.yml' is unchanged 'modules/nf-core/severus/tests/main.nf.test' is unchanged 'modules/nf-core/severus/tests/main.nf.test.snap' is unchanged 'modules/nf-core/severus/tests/nextflow.config' is unchanged diff --git a/modules/nf-core/severus/tests/main.nf.test.snap b/modules/nf-core/severus/tests/main.nf.test.snap index 28754a33..a6b09ed1 100644 --- a/modules/nf-core/severus/tests/main.nf.test.snap +++ b/modules/nf-core/severus/tests/main.nf.test.snap @@ -7,7 +7,7 @@ { "id": "test" }, - "read_qual.txt:md5,d0428d5dc149f2b40e46e480809d7417" + "read_qual.txt:md5,93f74117a9c031a9f7f3e56fcfc4b02c" ] ], [ @@ -15,10 +15,10 @@ { "id": "test" }, - "breakpoints_double.csv:md5,a0fdb9d522587e7b088b3a61fce99843" + "breakpoints_double.csv:md5,7cb071eb787217599ce780b443d9f203" ] ], - "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=1, phased=false, phasedAutodetect=false]", [ ], @@ -26,14 +26,14 @@ ], [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.6" }, - "timestamp": "2025-01-22T16:18:01.020231375" + "timestamp": "2025-10-23T10:30:12.366674714" }, "homo_sapiens - [ bam, bai, bam, bai, vcf ], [[],[]]": { "content": [ @@ -43,7 +43,7 @@ { "id": "test" }, - "read_qual.txt:md5,799cd79452b0480c945e6663c572ea87" + "read_qual.txt:md5,c3964c3cb18fe94df02d8b5b1d46b0f8" ] ], [ @@ -51,11 +51,11 @@ { "id": "test" }, - "breakpoints_double.csv:md5,085eae5aaaed3f4970b65b80f5ded767" + "breakpoints_double.csv:md5,c3c4c8b8737458c898d2ab73cc726a73" ] ], "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", - "7f6a6ca528a33bf3d41520971bcd00bb", + "467023c532e1c5a9df93d98a0ec92255", [ ], @@ -63,7 +63,12 @@ ], [ - + [ + { + "id": "test" + }, + "severus_somatic.vcf:md5,d977e35947a1294be9795880c5d7ef14" + ] ], [ @@ -72,14 +77,14 @@ ], [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-22T16:11:13.354543848" + "timestamp": "2026-01-26T13:03:25.015885535" }, "homo_sapiens - [ bam, bai, [], [], [] ], [[],[]] - stub": { "content": [ @@ -112,7 +117,12 @@ ] ], "11": [ - + [ + { + "id": "test" + }, + "severus_somatic.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "12": [ [ @@ -142,7 +152,7 @@ ] ], "15": [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ], "2": [ [ @@ -327,18 +337,23 @@ ] ], "somatic_vcf": [ - + [ + { + "id": "test" + }, + "severus_somatic.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "versions": [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.6" }, - "timestamp": "2025-01-22T16:03:53.176564181" + "timestamp": "2025-10-23T10:30:28.305922677" }, "homo_sapiens - [ bam, bai, bam, bai, [] ], [[],[]]": { "content": [ @@ -348,7 +363,7 @@ { "id": "test" }, - "read_qual.txt:md5,799cd79452b0480c945e6663c572ea87" + "read_qual.txt:md5,c3964c3cb18fe94df02d8b5b1d46b0f8" ] ], [ @@ -356,11 +371,11 @@ { "id": "test" }, - "breakpoints_double.csv:md5,e2b55013bcb77a0b738cdba485520f68" + "breakpoints_double.csv:md5,820a478c24ca5a83e122a7e811f8414d" ] ], "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", - "7f6a6ca528a33bf3d41520971bcd00bb", + "467023c532e1c5a9df93d98a0ec92255", [ ], @@ -368,7 +383,12 @@ ], [ - + [ + { + "id": "test" + }, + "severus_somatic.vcf:md5,7d69ee117bd032a31166cf2cf7a3ff8c" + ] ], [ @@ -377,14 +397,14 @@ ], [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-22T16:11:01.967892199" + "timestamp": "2026-01-26T13:03:18.532828432" }, "homo_sapiens - [ bam, bai, bam, bai, vcf ], [ bed ]": { "content": [ @@ -394,7 +414,7 @@ { "id": "test" }, - "read_qual.txt:md5,8ef1a36618e7f2cad39c79c9aed3cd64" + "read_qual.txt:md5,8ea4c56f328b82b104702705c06f0731" ] ], [ @@ -402,11 +422,11 @@ { "id": "test" }, - "breakpoints_double.csv:md5,085eae5aaaed3f4970b65b80f5ded767" + "breakpoints_double.csv:md5,c3c4c8b8737458c898d2ab73cc726a73" ] ], "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", - "7f6a6ca528a33bf3d41520971bcd00bb", + "467023c532e1c5a9df93d98a0ec92255", [ ], @@ -414,7 +434,12 @@ ], [ - + [ + { + "id": "test" + }, + "severus_somatic.vcf:md5,e8f4f5d77f5a17692ce82cde5cd7c833" + ] ], [ @@ -423,13 +448,13 @@ ], [ - "versions.yml:md5,ba5c727bfdd7186a318f8fa6c2aca06b" + "versions.yml:md5,ad073554f79b24f01b7eeb44b7e70008" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-22T16:11:25.517633729" + "timestamp": "2026-01-26T13:03:31.514266729" } } \ No newline at end of file diff --git a/modules/nf-core/severus/tests/tags.yml b/modules/nf-core/severus/tests/tags.yml deleted file mode 100644 index d7e36658..00000000 --- a/modules/nf-core/severus/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -severus: - - "modules/nf-core/severus/**" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index e712ebe6..b9c324da 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -12,7 +12,7 @@ process UNTAR { output: tuple val(meta), path("${prefix}"), emit: untar - path "versions.yml", emit: versions + tuple val("${task.process}"), val('untar'), eval('tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//"'), emit: versions_untar, topic: versions when: task.ext.when == null || task.ext.when @@ -43,10 +43,6 @@ process UNTAR { ${args2} fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ stub: @@ -75,10 +71,5 @@ process UNTAR { fi done fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index 1b6bf491..571d8078 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -1,5 +1,5 @@ name: untar -description: Extract files. +description: Extract files from tar, tar.gz, tar.bz2, tar.xz archives keywords: - untar - uncompress @@ -7,7 +7,7 @@ keywords: tools: - untar: description: | - Extract tar.gz files. + Extract tar, tar.gz, tar.bz2, tar.xz files. documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] identifier: "" @@ -19,8 +19,8 @@ input: e.g. [ id:'test', single_end:false ] - archive: type: file - description: File to be untar - pattern: "*.{tar}.{gz}" + description: File to be untarred + pattern: "*.{tar,tar.gz,tar.bz2,tar.xz}" ontologies: - edam: http://edamontology.org/format_3981 # TAR format - edam: http://edamontology.org/format_3989 # GZIP format @@ -38,13 +38,29 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] pattern: "*/" + versions_untar: + - - ${task.process}: + type: string + description: The name of the process + - untar: + type: string + description: The name of the tool + - tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - untar: + type: string + description: The name of the tool + - tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test index c957517a..fde8db16 100644 --- a/modules/nf-core/untar/tests/main.nf.test +++ b/modules/nf-core/untar/tests/main.nf.test @@ -20,7 +20,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot( + process.out.untar, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, ) } } @@ -38,7 +41,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot( + process.out.untar, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, ) } } @@ -58,7 +64,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot( + process.out.untar, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, ) } } @@ -78,7 +87,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot( + process.out.untar, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, ) } } diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap index ceb91b79..51a414dd 100644 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -1,158 +1,118 @@ { "test_untar_onlyfiles": { "content": [ - { - "0": [ + [ + [ + [ + + ], [ - [ - - ], - [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" - ] + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" ] - ], - "1": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" - ], - "untar": [ + ] + ], + { + "versions_untar": [ [ - [ - - ], - [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" - ] + "UNTAR", + "untar", + "1.34" ] - ], - "versions": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-10T12:04:28.231047" + "timestamp": "2026-01-28T17:49:32.000491" }, "test_untar_onlyfiles - stub": { "content": [ - { - "0": [ + [ + [ + [ + + ], [ - [ - - ], - [ - "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "1": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" - ], - "untar": [ + ] + ], + { + "versions_untar": [ [ - [ - - ], - [ - "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "UNTAR", + "untar", + "1.34" ] - ], - "versions": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-10T12:04:45.773103" + "timestamp": "2026-01-28T17:49:58.812479" }, "test_untar - stub": { "content": [ - { - "0": [ + [ + [ + [ + + ], [ - [ - - ], - [ - "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "1": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" - ], - "untar": [ + ] + ], + { + "versions_untar": [ [ - [ - - ], - [ - "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "UNTAR", + "untar", + "1.34" ] - ], - "versions": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-10T12:04:36.777441" + "timestamp": "2026-01-28T17:49:48.119456" }, "test_untar": { "content": [ - { - "0": [ + [ + [ + [ + + ], [ - [ - - ], - [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" - ] + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" ] - ], - "1": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" - ], - "untar": [ + ] + ], + { + "versions_untar": [ [ - [ - - ], - [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" - ] + "UNTAR", + "untar", + "1.34" ] - ], - "versions": [ - "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-10T12:04:19.377674" + "timestamp": "2026-01-28T17:49:17.252494" } } \ No newline at end of file diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf index a0c02109..b977ff6d 100644 --- a/modules/nf-core/unzip/main.nf +++ b/modules/nf-core/unzip/main.nf @@ -35,7 +35,6 @@ process UNZIP { """ stub: - def args = task.ext.args ?: '' if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) """ diff --git a/modules/nf-core/whatshap/stats/environment.yml b/modules/nf-core/whatshap/stats/environment.yml new file mode 100644 index 00000000..389d6871 --- /dev/null +++ b/modules/nf-core/whatshap/stats/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::whatshap=2.8" diff --git a/modules/nf-core/whatshap/stats/main.nf b/modules/nf-core/whatshap/stats/main.nf new file mode 100644 index 00000000..54abecd0 --- /dev/null +++ b/modules/nf-core/whatshap/stats/main.nf @@ -0,0 +1,59 @@ + +process WHATSHAP_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/whatshap:2.8--py39h2de1943_0': + 'biocontainers/whatshap:2.8--py39h2de1943_0' }" + + input: + tuple val(meta), path(vcf) // channel: [ val(meta), path(vcf) ] + val(include_tsv_output) // value: [ true | false ] + val(include_gtf_output) // value: [ true | false ] + val(inlude_block_output) // value: [ true | false ] + + output: + tuple val(meta), path("${prefix}.tsv"), emit: tsv, optional: true + tuple val(meta), path("${prefix}.gtf"), emit: gtf, optional: true + tuple val(meta), path("${prefix}.txt"), emit: block, optional: true + tuple val(meta), path("${prefix}.log"), emit: log + tuple val("${task.process}"), val('whatshap'), eval("whatshap --version"), emit: versions_whatshap, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def output_tsv = include_tsv_output ? "--tsv ${prefix}.tsv" : '' + def output_gtf = include_gtf_output ? "--gtf ${prefix}.gtf" : '' + def output_block = inlude_block_output ? "--block-list ${prefix}.txt" : '' + """ + whatshap stats \\ + $args \\ + $output_tsv \\ + $output_gtf \\ + $output_block \\ + $vcf \\ + | tee ${prefix}.log + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tsv_touch_cmd = include_tsv_output ? "touch ${prefix}.tsv" : '' + def gtf_touch_cmd = include_gtf_output ? "touch ${prefix}.gtf" : '' + def block_touch_cmd = inlude_block_output ? "touch ${prefix}.txt" : '' + def log_touch_cmd = "touch ${prefix}.log" + """ + echo $args + + $tsv_touch_cmd + $gtf_touch_cmd + $block_touch_cmd + $log_touch_cmd + """ +} diff --git a/modules/nf-core/whatshap/stats/meta.yml b/modules/nf-core/whatshap/stats/meta.yml new file mode 100644 index 00000000..5db5a176 --- /dev/null +++ b/modules/nf-core/whatshap/stats/meta.yml @@ -0,0 +1,109 @@ +name: "whatshap_stats" +description: Compute statistics from phased variant file using Whatshap +keywords: + - vcf + - whatshap + - stats + - phasing + - phase +tools: + - "whatshap": + description: Phase genomic variants using DNA sequencing reads (haplotype + assembly). + args_id: "$args" + homepage: "https://whatshap.readthedocs.io" + documentation: "https://whatshap.readthedocs.io" + tool_dev_url: "https://github.com/whatshap/whatshap" + doi: "10.1101/085050" + licence: ["MIT"] + identifier: biotools:whatshap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - vcf: + type: file + description: Phased variant vcf file + pattern: "*.vcf" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - include_tsv_output: + type: boolean + description: Whether to include TSV output file + default: false + - include_gtf_output: + type: boolean + description: Whether to include GTF output file + default: false + - inlude_block_output: + type: boolean + description: Whether to include block list output file + default: false +output: + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.tsv: + type: file + description: Whatshap stats output in TSV format + pattern: "*.tsv" + gtf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.gtf: + type: file + description: Whatshap stats output in GTF format + pattern: "*.gtf" + block: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.txt: + type: file + description: Whatshap stats block list output + pattern: "*.txt" + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.log: + type: file + description: Whatshap stats output in TXT format + pattern: "*.log" + versions_whatshap: + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@eliottBo" +maintainers: + - "@eliottBo" diff --git a/modules/nf-core/whatshap/stats/tests/main.nf.test b/modules/nf-core/whatshap/stats/tests/main.nf.test new file mode 100644 index 00000000..12cc2f57 --- /dev/null +++ b/modules/nf-core/whatshap/stats/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process WHATSHAP_STATS" + script "../main.nf" + process "WHATSHAP_STATS" + + tag "modules" + tag "modules_nfcore" + tag "whatshap" + tag "whatshap/stats" + + test("homo_sapiens - all_output - vcf") { + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = true + input[3] = true + + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}, + process.out).match()} + + ) + + } + + } + test("homo_sapiens - tsv_output - vcf") { + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = false + input[3] = false + + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}, process.out).match() }, + + ) + + } + + } + + test("homo_sapiens - vcf -stub") { + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = true + input[3] = true + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out + ).match() }, + ) + + } + + } + +} diff --git a/modules/nf-core/whatshap/stats/tests/main.nf.test.snap b/modules/nf-core/whatshap/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..fea3adca --- /dev/null +++ b/modules/nf-core/whatshap/stats/tests/main.nf.test.snap @@ -0,0 +1,276 @@ +{ + "homo_sapiens - tsv_output - vcf": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + + ], + "gtf": [ + + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:19.643449202" + }, + "homo_sapiens - all_output - vcf": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,4a2b521799cdccfc2a296f49df39e313" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,aa64e268909459b49a82ebab3b8bde5f" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,aa64e268909459b49a82ebab3b8bde5f" + ] + ], + "gtf": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,4a2b521799cdccfc2a296f49df39e313" + ] + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:07.409933392" + }, + "homo_sapiens - vcf -stub": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:23.489785877" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 8b7c724c..84c9f487 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,20 @@ params { // Input options input = null + // Small variant calling options + germline_var_keep = ['deepvariant', 'clair'] + somatic_var_keep = ['deepsomatic', 'clair'] + germline_var_combine = 'all' + somatic_var_combine = 'all' + prioritize_caller_germline = 'deepvariant' + prioritize_caller_somatic = 'deepsomatic' + generate_gvcf = false + + // PON Options + clairsto_pon_vcfs = null + clairsto_pon_flags = null + deepsomatic_pon_vcfs = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' @@ -26,18 +40,22 @@ params { vep_custom = null vep_custom_tbi = null - - normal_fiber = true - // Skip options skip_qc = false skip_cramino = false + skip_nanoplot = false skip_mosdepth = false skip_bamstats = false skip_ascat = false skip_wakhan = false skip_fiber = false + skip_normalfiber = false + skip_m6a = false skip_vep = false + skip_modcall = false + skip_modkit = false + use_gpu = false + skip_whatshapstats = false // minimap2 options minimap2_ont_model = null @@ -45,7 +63,10 @@ params { save_secondary_alignment = true // Fibertools options - params.autocorrelation = null + autocorrelation = null + + // Severus options + severus_minsupport = 3 // ASCAT options ascat_ploidy = null @@ -55,6 +76,7 @@ params { ascat_penalty = 150 ascat_purity = null ascat_longread_bins = 2000 + ascat_pdf_plots = false ascat_allelecounter_flags = "-f 0" ascat_chroms = null // Only use if running on a subset of chromosomes (c(1:22, 'X', 'Y')) @@ -76,6 +98,17 @@ params { multiqc_methods_description = null // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = System.getenv('HOOK_URL') + help = false + help_full = false + show_hidden = false + version = false outdir = null publish_dir_mode = 'copy' email = null @@ -88,6 +121,11 @@ params { show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + + // Config options + config_profile_name = null + config_profile_description = null trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options config_profile_name = null config_profile_description = null @@ -141,7 +179,18 @@ profiles { apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } - arm { + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { @@ -198,18 +247,6 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } - } gpu { docker.runOptions = '-u $(id -u):$(id -g) --gpus all' apptainer.runOptions = '--nv' @@ -344,24 +381,18 @@ manifest { mainScript = 'main.nf' defaultBranch = 'main' nextflowVersion = '!>=25.04.0' - version = '1.0.0' + version = '1.1.0dev' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run IntGenomicsLab/lrsomatic -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - } } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 12dabb38..a7cc1495 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -43,6 +43,74 @@ } } }, + "pon_options": { + "title": "Panel of Normals (PON) options", + "type": "object", + "description": "Options for panel of normals filtering", + "default": "", + "properties": { + "clairsto_pon_vcfs": { + "type": "string", + "description": "Path to panel of normals VCF file(s) for ClairS-TO somatic variant filtering" + }, + "clairsto_pon_flags": { + "type": "string", + "description": "Population allele matching flags for ClairS-TO PON VCFs (one per VCF, comma-separated)" + }, + "deepsomatic_pon_vcfs": { + "type": "string", + "description": "Path to panel of normals VCF file(s) for DeepSomatic --population_vcfs. If not set, uses container-bundled defaults in tumor-only mode, or no PON in paired mode" + } + } + }, + "small_variant_calling_options": { + "title": "options for small variant calling", + "type": "object", + "properties": { + "germline_var_keep": { + "type": "array", + "description": "List of germline variant callers to use. Must include at least one of [deepvariant, clair].", + "items": { + "type": "string", + "enum": ["deepvariant", "clair"] + }, + "minItems": 1 + }, + "somatic_var_keep": { + "type": "array", + "description": "List of somatic variant callers to use. Must include at least one of [deepsomatic, clair].", + "items": { + "type": "string", + "enum": ["deepsomatic", "clair"] + }, + "minItems": 1 + }, + "germline_var_combine": { + "type": "string", + "description": "When two germline callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", + "default": "all", + "enum": ["consensus", "all"] + }, + "somatic_var_combine": { + "type": "string", + "description": "When two somatic callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", + "default": "all", + "enum": ["consensus", "all"] + }, + "prioritize_caller_germline": { + "type": "string", + "description": "When both germline callers are used, specifies which caller's format to use for variants called by both. Must be [deepvariant, clair].", + "default": "deepvariant", + "enum": ["deepvariant", "clair"] + }, + "prioritize_caller_somatic": { + "type": "string", + "description": "When both somatic callers are used, specifies which caller's format to use for variants called by both. Must be [deepsomatic, clair].", + "default": "deepsomatic", + "enum": ["deepsomatic", "clair"] + } + } + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -58,6 +126,7 @@ }, "igenomes_ignore": { "type": "boolean", + "default": false, "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, @@ -70,23 +139,6 @@ "fa_icon": "fas fa-ban", "hidden": true, "default": "s3://ngi-igenomes/igenomes/" - }, - "vep_cache": { - "type": "string", - "description": "Path to VEP cache directory.", - "fa_icon": "fas fa-database", - "help_text": "Path to the directory containing the VEP cache. If you are using an iGenomes reference, this will be set automatically. Otherwise, you will need to provide this path yourself." - }, - "vep_cache_version": { - "type": "integer", - "description": "Version of the VEP cache to use.", - "fa_icon": "fas fa-hashtag", - "help_text": "The version of the VEP cache to use. This should match the version of VEP being used." - }, - "vep_args": { - "type": "string", - "description": "Additional command line arguments to pass to VEP.", - "fa_icon": "fas fa-terminal" } } }, @@ -111,6 +163,68 @@ } } }, + "fibertools_options": { + "title": "Fibertools options", + "type": "object", + "description": "", + "default": "", + "properties": { + "autocorrelation": { + "type": "string" + } + } + }, + "vep_options": { + "title": "VEP options", + "type": "object", + "description": "", + "default": "", + "properties": { + "vep_cache": { + "type": "string", + "description": "Path to VEP cache directory.", + "fa_icon": "fas fa-database", + "help_text": "Path to the directory containing the VEP cache. If you are using an iGenomes reference, this will be set automatically. Otherwise, you will need to provide this path yourself.", + "default": "s3://annotation-cache/vep_cache/" + }, + "vep_args": { + "type": "string", + "description": "Additional command line arguments to pass to VEP.", + "fa_icon": "fas fa-terminal", + "default": "--everything --filter_common --per_gene --total_length --offline --format vcf" + }, + "vep_cache_version": { + "type": "integer", + "description": "Version of the VEP cache to use.", + "fa_icon": "fas fa-hashtag", + "help_text": "The version of the VEP cache to use. This should match the version of VEP being used.", + "default": 113 + }, + "download_vep_cache": { + "type": "boolean", + "default": false, + "description": "Download the VEP cache if not already present" + }, + "vep_custom": { + "type": "string" + }, + "vep_custom_tbi": { + "type": "string" + } + } + }, + "severus_options": { + "title": "Severus options", + "type": "object", + "description": "", + "default": "", + "properties": { + "severus_minsupport": { + "type": "integer", + "default": 3 + } + } + }, "ascat_parameters": { "title": "ASCAT parameters", "type": "object", @@ -166,6 +280,22 @@ "ascat_rt_files": { "type": "string", "description": "path to (zip) of RT files" + }, + "ascat_pdf_plots": { + "type": "boolean", + "default": false, + "description": "Boolean for ASCAT production of pdf plots (entered as string)" + } + } + }, + "wakhan_options": { + "title": "Wakhan options", + "type": "object", + "description": "", + "default": "", + "properties": { + "wakhan_chroms": { + "type": "string" } } }, @@ -177,31 +307,73 @@ "properties": { "skip_qc": { "type": "boolean", + "default": false, "description": "Skips all QC steps" }, "skip_cramino": { "type": "boolean", + "default": false, "description": "Skips Cramino" }, "skip_mosdepth": { "type": "boolean", + "default": false, "description": "Skips Mosdepth" }, "skip_bamstats": { "type": "boolean", + "default": false, "description": "Skips samtools flagstat, stats, and idxstats" }, "skip_wakhan": { "type": "boolean", + "default": false, "description": "Skips wakhan" }, "skip_fiber": { "type": "boolean", + "default": false, "description": "Skip Fibertools steps" }, "skip_ascat": { "type": "boolean", + "default": false, "description": "Skip ASCAT" + }, + "skip_m6a": { + "type": "boolean", + "default": false, + "description": "Skip m6a calling by Fibertools" + }, + "skip_vep": { + "type": "boolean", + "default": false, + "description": "Skip VEP annotation" + }, + "skip_normalfiber": { + "type": "boolean", + "default": false, + "description": "Skip Fibertools steps for the normal sample" + }, + "skip_nanoplot": { + "type": "boolean", + "default": false, + "description": "Skip Nanoplot" + }, + "skip_whatshapstats": { + "type": "boolean", + "default": false, + "description": "Skip WhatsHap stats" + }, + "skip_modcall": { + "type": "boolean", + "default": false, + "description": "Skip modification calling" + }, + "use_gpu": { + "type": "boolean", + "default": false, + "description": "Use GPU for supported tools (e.g. DeepVariant, DeepSomatic, Clair3)" } } }, @@ -262,6 +434,7 @@ "properties": { "version": { "type": "boolean", + "default": false, "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", "hidden": true @@ -285,6 +458,7 @@ }, "plaintext_email": { "type": "boolean", + "default": false, "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", "hidden": true @@ -299,6 +473,7 @@ }, "monochrome_logs": { "type": "boolean", + "default": false, "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", "hidden": true @@ -348,12 +523,20 @@ "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true }, - "normal_fiber": { + "help": { "type": "boolean", - "default": true, - "description": "do fiber-seq on normal samples", - "fa_icon": "fas fa-fiber", - "hidden": true + "default": false, + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "default": false, + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "default": false, + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } @@ -362,15 +545,33 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/pon_options" + }, + { + "$ref": "#/$defs/small_variant_calling_options" + }, { "$ref": "#/$defs/reference_genome_options" }, { "$ref": "#/$defs/minimap2_options" }, + { + "$ref": "#/$defs/fibertools_options" + }, + { + "$ref": "#/$defs/vep_options" + }, + { + "$ref": "#/$defs/severus_options" + }, { "$ref": "#/$defs/ascat_parameters" }, + { + "$ref": "#/$defs/wakhan_options" + }, { "$ref": "#/$defs/skip_options" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 67aa24f4..e0ca7d28 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "Stable", - "datePublished": "2025-11-28T12:51:54+00:00", - "description": "# IntGenomicsLab/lrsomatic\n\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "InProgress", + "datePublished": "2025-12-23T12:58:53+00:00", + "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\nFor more information on how to run the pipeline, you can also go [here](https://intgenomicslab.github.io/lrsomatic).\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use `IntGenomicsLab/lrsomatic` for your analysis, please cite it using the following:\n\n> LRSomatic: a highly scalable and robust pipeline for somatic variant calling in long-read sequencing data\n>\n> Robert A. Forsyth*, Luuk Harbers*, Amber Verhasselt, Ana-Luc\u00eda Rocha Iraiz\u00f3s, Sidi Yang, Joris Vande Velde, Christopher Davies, Nischalan Pillay, Laurens Lambrechts, Jonas Demeulemeester\n>\n> bioRxiv 2026.02.26.707772; doi: https://doi.org/10.64898/2026.02.26.707772\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -96,7 +96,7 @@ }, "mentions": [ { - "@id": "#abec6cbc-500e-43f6-bc1f-0f2530f2956d" + "@id": "#37ea2685-1622-4ca2-a059-c12365c26c03" } ], "name": "IntGenomicsLab/lrsomatic" @@ -124,7 +124,7 @@ "ComputationalWorkflow" ], "dateCreated": "", - "dateModified": "2025-11-28T13:51:54Z", + "dateModified": "2025-12-23T13:58:53Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -144,10 +144,10 @@ }, "url": [ "https://github.com/IntGenomicsLab/lrsomatic", - "https://nf-co.re/IntGenomicsLab/lrsomatic/1.0.0/" + "https://nf-co.re/IntGenomicsLab/lrsomatic/dev/" ], "version": [ - "1.0.0" + "1.1.0dev" ] }, { @@ -163,11 +163,11 @@ "version": "!>=25.04.0" }, { - "@id": "#abec6cbc-500e-43f6-bc1f-0f2530f2956d", + "@id": "#37ea2685-1622-4ca2-a059-c12365c26c03", "@type": "TestSuite", "instance": [ { - "@id": "#7f53c2d3-49e7-4c7c-bec5-7e617642b3b6" + "@id": "#3407ebf2-d870-42e0-a3c2-7be89cb69e2b" } ], "mainEntity": { @@ -176,7 +176,7 @@ "name": "Test suite for IntGenomicsLab/lrsomatic" }, { - "@id": "#7f53c2d3-49e7-4c7c-bec5-7e617642b3b6", + "@id": "#3407ebf2-d870-42e0-a3c2-7be89cb69e2b", "@type": "TestInstance", "name": "GitHub Actions workflow for testing IntGenomicsLab/lrsomatic", "resource": "repos/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml", diff --git a/subworkflows/local/deepsomatic.nf b/subworkflows/local/deepsomatic.nf new file mode 100644 index 00000000..1bb5d86c --- /dev/null +++ b/subworkflows/local/deepsomatic.nf @@ -0,0 +1,69 @@ +include { DEEPSOMATIC_MAKEEXAMPLES } from '../../modules/local/deepsomatic/makeexamples/main' +include { DEEPSOMATIC_CALLVARIANTS } from '../../modules/local/deepsomatic/callvariants/main' +include { DEEPSOMATIC_POSTPROCESSVARIANTS } from '../../modules/local/deepsomatic/postprocessvariants/main' + +workflow DEEPSOMATIC { + take: + ch_input // [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // normal_bam/bai may be [] for tumor-only mode + _ch_intervals // [[:], []] -- empty intervals (genome-wide calling) + ch_fasta // [[:], fasta] + ch_fai // [[:], fai] + ch_gzi // [[:], gzi] -- bgzipped FASTA index (empty if FASTA is not bgzipped) + ch_ds_pon // [[:], [pon_vcf_path, ...]] or [[:], []] + // user-supplied DeepSomatic PON VCFs as a separate tuple input (like fasta/fai/gzi) + // empty path list => process uses container-bundled defaults (tumor-only) or no PON (paired) + + main: + + // + // MODULE: DEEPSOMATIC_MAKEEXAMPLES (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // [[:], fasta] / [[:], fai] / [[:], gzi] / [[:], [pon_vcfs...]] + // Output: .examples -- [meta, [tfrecord shards...]] -- serialised pileup examples + // .gvcf -- [meta, [gvcf tfrecord shards...]] + // + DEEPSOMATIC_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi, ch_ds_pon) + + // + // MODULE: DEEPSOMATIC_CALLVARIANTS (label: process_gpu / process_high) + // Input: DEEPSOMATIC_MAKEEXAMPLES.out.examples -- [meta, [tfrecord shards...]] + // Output: .call_variants_tfrecords -- [meta, tfrecord] -- DNN variant call records + // + DEEPSOMATIC_CALLVARIANTS(DEEPSOMATIC_MAKEEXAMPLES.out.examples) + + // Join CALLVARIANTS output with MAKEEXAMPLES gVCF records only when generate_gvcf is true. + // ch_postproc_input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // trailing [] are for optional candidate positions and haplotype outputs (unused) + ch_postproc_input = params.generate_gvcf + ? DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPSOMATIC_MAKEEXAMPLES.out.gvcf, failOnMismatch: true + ).map { meta, call_tfrecord, gvcf_tfrecords -> + [meta, call_tfrecord, gvcf_tfrecords, [], []] + } + : DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.map { meta, call_tfrecord -> + [meta, call_tfrecord, [], [], []] + } + + // + // MODULE: DEEPSOMATIC_POSTPROCESSVARIANTS (label: process_medium) + // Input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // Output: .vcf -- [meta, vcf] -- somatic variant calls (VCF) + // .vcf_index -- [meta, tbi] + // .gvcf -- [meta, gvcf] -- genome VCF (all sites) + // .gvcf_index-- [meta, tbi] + // + DEEPSOMATIC_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi, + ch_ds_pon + ) + + emit: + vcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf // [meta, vcf] + vcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf_index // [meta, tbi] + gvcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf // [meta, gvcf] + gvcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf_index // [meta, tbi] +} diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf new file mode 100644 index 00000000..bc641a7b --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -0,0 +1,179 @@ +// IMPORT MODULES +include { CLAIR3 } from '../../../modules/local/clair3/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_GERMLINE { + + take: + normal_bams // [meta, normal_bam, normal_bai] -- normal sample BAMs from T/N pairs + fasta // [[:], fasta] + fai // [[:], fai] + clair3_models // [meta(id=model_name), model_dir] -- downloaded Clair3 model directories + + main: + germline_vcf = channel.empty() + + // COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS + // Clair3 requires the model directory path; models are keyed by model name (meta.id) + if(params.germline_var_keep.contains('clair')) { + + // Extract model name from meta.id for combine-by key + clair3_models + .map{ meta, file -> + def clair3_model_name = meta.id + return [meta, clair3_model_name, file] + } + .set{clair3_models} + // clair3_models: [meta(id=model_name), model_name_str, model_dir] + + // Emit [meta, clair3_model_name, bam, bai] to use model_name as the combine key + normal_bams + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, meta.clair3_model, bam, bai ] + } + .set { normal_bams_model } + // normal_bams_model: [meta, clair3_model_name, bam, bai] + // clair3_model_name is the join key used by .combine(clair3_models, by:1) + + // + // MODULE: CLAIR3 (label: process_high) + // Input: [meta, bam, bai, model_dir, platform_str] + // fasta / fai + // Output: .vcf -- [meta, vcf] -- germline SNVs/indels + // .tbi -- [meta, tbi] + // + normal_bams_model + .combine(clair3_models,by:1) // join on clair3_model_name + .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> + def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform + return [meta_bam, bam, bai, model, platform] + } + .set{ clair3_input_ch } + // clair3_input_ch: [meta, bam, bai, model_dir, platform_str] + // platform_str: 'hifi' for PacBio ('pb' → 'hifi'), otherwise meta.platform (e.g. 'ont') + + CLAIR3 ( + clair3_input_ch, + fasta, + fai + ) + + CLAIR3.out.vcf + .join(CLAIR3.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clair3'] + return [new_meta, vcf, tbi] + } + .set{clair3_ch} + // clair3_ch: [meta(+caller:'clair3'), vcf, tbi] + } + + // DEEPVARIANT + if(params.germline_var_keep.contains('deepvariant')) { + + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] is empty intervals (genome-wide) + // fasta / fai + // [[:],[]] x2 -- empty PAR/GFF interval files (not used for WGS) + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + normal_bams + .map {meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + def intervals = [] + return [new_meta, bam, bai, intervals] + } + .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) + ) + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] + } + + // COMBINE GERMLINE VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output + if (params.germline_var_keep.size() > 1) { + // Mix both caller VCFs into a single channel for GERMLINE_CONSENSUS + clair3_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + // Normalise, annotate with caller ID, intersect, and combine per params + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.prioritize_caller_germline, + params.germline_var_combine + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{ germline_vcf } + // germline_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.germline_var_keep == ['clair']) { + clair3_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == ['deepvariant']) { + deepvariant_ch + .set{germline_vcf} + } + + // Strip 'caller' field from final germline VCF meta (not needed downstream) + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + + emit: + germline_vcf // [meta, vcf, tbi] -- final germline VCF (Clair3, DeepVariant, or consensus) +} diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf new file mode 100644 index 00000000..7c2cf944 --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -0,0 +1,168 @@ +// IMPORT MODULES +include { CLAIRS } from '../../../modules/local/clairs/main.nf' +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' + +// IMPORT SUBWORKFLOWS +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_SOMATIC { + + take: + tumor_normal_bams // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + fasta // [[:], fasta] + fai // [[:], fai] + ds_pon_channel // [ [pon_vcf_path, ...] ] or [ [] ] + // user-supplied DeepSomatic PON VCFs; empty list => no --population_vcfs (paired mode) + + main: + somatic_vcf = channel.empty() + + // CLAIRS: somatic SNV/indel calling from T/N paired BAMs + if(params.somatic_var_keep.contains('clair')) { + // Append ClairS model name (from meta) as the last element for CLAIRS module + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] + } + .set { clairs_input } + // clairs_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, clairS_model_str] + + // + // MODULE: CLAIRS (label: process_high) + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, model_str] + // fasta / fai + // Output: .vcfs -- [meta, [snv_vcf, indel_vcf]] -- separate SNV and indel VCFs + // .tbi -- [meta, [snv_tbi, indel_tbi]] + // + CLAIRS ( + clairs_input, + fasta, + fai + ) + + // CONCAT CLAIRS INDEL AND SNV OUTPUT + // ClairS outputs separate SNV and indel VCFs; merge into a single sorted VCF + CLAIRS.out.vcfs + .join(CLAIRS.out.tbi) + .set{clairs_out} + // clairs_out: [meta, [snv_vcf, indel_vcf], [snv_tbi, indel_tbi]] + + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcf...], [tbi...]] + // Output: .vcf -- [meta, vcf] -- unsorted concatenated SNV+indel VCF + // + BCFTOOLS_CONCAT ( + clairs_out + ) + + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // + BCFTOOLS_SORT ( + BCFTOOLS_CONCAT.out.vcf + ) + + BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clairs'] + return [new_meta, vcf, tbi] + } + .set{clairs_ch} + // clairs_ch: [meta(+caller:'clairs'), vcf, tbi] -- merged and sorted ClairS somatic VCF + } + + // DEEPSOMATIC: somatic variant calling using deep learning T/N model + if(params.somatic_var_keep.contains('deepsomatic')) { + + // DeepSomatic expects [normal, tumor] order (opposite of input tuple) + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + } + .set{ deepsomatic_input } + // deepsomatic_input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // [[:],[]] -- empty intervals + // fasta / fai / [[:],[]] -- empty GZI + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + DEEPSOMATIC ( + deepsomatic_input, + [[:],[]], // intervals (empty = genome-wide) + fasta, + fai, + [[:],[]], // GZI (empty if FASTA is uncompressed) + ds_pon_channel + ) + + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] + } + + // COMBINE SOMATIC VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output + if (params.somatic_var_keep.size() > 1) { + clairs_ch + .mix(deepsomatic_ch) + .set{combine_somatic_ch} + // combine_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + SOMATIC_CONSENSUS( + combine_somatic_ch, + fasta, + fai, + params.prioritize_caller_somatic, + params.somatic_var_combine + ) + + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{ somatic_vcf } + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.somatic_var_keep == ['clair']) { + clairs_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == ['deepsomatic']) { + deepsomatic_ch + .set{somatic_vcf} + } + + // Strip 'caller' from meta before emitting + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + + emit: + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS, DeepSomatic, or consensus) +} diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf new file mode 100644 index 00000000..32af3d42 --- /dev/null +++ b/subworkflows/local/phasing_haplotyping.nf @@ -0,0 +1,394 @@ +// Import modules +include { LONGPHASE_PHASE as LONGPHASE_PHASE_GERMLINE } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_SOMATIC } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_GERMLINE } from '../../modules/local/longphase/modcall/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../modules/local/longphase/modcall/main.nf' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' + + +workflow PHASING_HAPLOTYPING { + take: + tumor_normal_bams // [meta, bam, bai] -- all samples: tumor, normal, and tumor-only + germline_vcf // [meta, vcf, tbi] -- germline small variants (from PAIRED_SMALLVAR_GERMLINE or TUMORONLY_SMALLVAR) + somatic_vcf // [meta, vcf, tbi] -- somatic small variants (from PAIRED_SMALLVAR_SOMATIC or TUMORONLY_SMALLVAR) + fasta // [[:], fasta] + fai // [[:], fai] + + main: + + // SPLIT INTO PAIRED AND TUMOR ONLY + // paired_data is set to the matched sample ID for paired samples, null/false for tumor-only + tumor_normal_bams + .branch { meta, _bams, _bai -> + paired: meta.paired_data + tumor_only: !meta.paired_data + } + .set { branched_bams } + // branched_bams.paired: [meta, bam, bai] -- tumor + normal from paired runs + // branched_bams.tumor_only: [meta, bam, bai] -- tumor-only samples + + branched_bams.paired + .set{ paired_ch } + + // Strip 'type' from tumor-only meta (no type distinction needed in this stream) + branched_bams.tumor_only + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ tumor_only_ch } + // tumor_only_ch: [meta (no type), bam, bai] + + // Split paired samples into normal and tumor streams for separate handling + paired_ch + .branch { meta, _bam, _bai -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set {paired_ch_branched} + // paired_ch_branched.normal: [meta, bam, bai] -- normal BAMs from T/N pairs + // paired_ch_branched.tumor: [meta, bam, bai] -- tumor BAMs from T/N pairs + + // Strip 'type' from paired normal/tumor meta to allow joining with tumor-only channel + paired_ch_branched.normal + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_normal_ch } + // paired_normal_ch: [meta (no type), bam, bai] + + paired_ch_branched.tumor + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_tumor_ch } + // paired_tumor_ch: [meta (no type), bam, bai] + + // Germline phasing uses normal BAMs (+ tumor-only BAMs used as their own "normal" proxy) + tumor_only_ch + .mix(paired_normal_ch) + .set { normal_bams_w_tumoronly_ch } + // normal_bams_w_tumoronly_ch: [meta, bam, bai] + // -- normal BAMs from T/N pairs + tumor-only BAMs (both phased with germline VCF) + + // Somatic phasing uses tumor BAMs (+ tumor-only BAMs) + tumor_only_ch + .mix(paired_tumor_ch) + .set{ tumor_bams_ch} + // tumor_bams_ch: [meta, bam, bai] -- tumor BAMs from T/N pairs + tumor-only BAMs + + // MODCALL: detect base modifications (e.g. 5mC) from aligned BAMs using Longphase + // Results are used as additional evidence during phasing + + if (!params.skip_modcall) { + + // + // MODULE: LONGPHASE_MODCALL_GERMLINE (label: process_high) + // Input: [meta, bam, bai] -- normal BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls (e.g. CpG methylation) + // + LONGPHASE_MODCALL_GERMLINE ( + normal_bams_w_tumoronly_ch, + fasta, + fai + ) + + // + // MODULE: LONGPHASE_MODCALL_SOMATIC (label: process_high) + // Input: [meta, bam, bai] -- tumor BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls for tumor + // + + LONGPHASE_MODCALL_SOMATIC ( + tumor_bams_ch, + fasta, + fai + ) + + } + + // Merge germline and somatic VCFs into a single file for somatic phasing + // Longphase requires all variant sites in one VCF to produce a consistent phase block + germline_vcf + .join(somatic_vcf) + .map { meta, germ_vcf, germ_tbi, som_vcf, som_tbi -> + def vcfs = [som_vcf, germ_vcf] // somatic first (higher priority in phasing) + def tbis = [som_tbi, germ_tbi] + return [ meta, vcfs, tbis] + } + .set{germline_somatic_vcfs} + // germline_somatic_vcfs (pre-concat): [meta, [somatic_vcf, germline_vcf], [somatic_tbi, germline_tbi]] + + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcfs...], [tbis...]] -- somatic + germline VCFs to concatenate + // Output: .vcf -- [meta, vcf] -- unsorted concatenated VCF + // + BCFTOOLS_CONCAT(germline_somatic_vcfs) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + // concat_out: [meta, vcf] -- concatenated (unsorted) somatic+germline VCF + + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] -- unsorted concatenated VCF + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf + .set{germline_somatic_vcfs} + // germline_somatic_vcfs (final): [meta, vcf] -- sorted combined somatic+germline VCF for somatic phasing + + // PHASING: assign variants to haplotypes using Longphase + // - Germline phasing: uses normal BAMs + germline-only VCF (produces the phase blocks) + // - Somatic phasing: uses tumor BAMs + merged somatic+germline VCF (transfers germline phase to somatic sites) + if (!params.skip_modcall) { + // With modcall: include base-modification VCF as additional phasing evidence + normal_bams_w_tumoronly_ch + .join(germline_vcf) + .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) + .map { meta, bam, bai, vcf, _tbi, mods-> + def svs = [] // SVs for phasing are not used here + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], mod_vcf] + + tumor_bams_ch + .join(germline_somatic_vcfs) + .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) + .map { meta, bam, bai, vcf, mods-> + def svs = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], mod_vcf] + } + else { + // Without modcall: empty lists for SVs and mods + normal_bams_w_tumoronly_ch + .join(germline_vcf) + .map { meta, bam, bai, vcf, _tbi -> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], []] + + tumor_bams_ch + .join(germline_somatic_vcfs) + .map { meta, bam, bai, vcf -> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], []] + } + + // + // MODULE: LONGPHASE_PHASE_GERMLINE (label: process_medium) + // Input: [meta, bam, bai, vcf, svs, mods] -- normal BAMs + germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased germline SNV VCF (PS tags added) + // .snv_vcf_index -- [meta, tbi] + // + LONGPHASE_PHASE_GERMLINE ( + longphase_phase_germline_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE_GERMLINE.out.snv_vcf + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf_index) + .set{ phased_germline_vcf } + // phased_germline_vcf: [meta, vcf, tbi] -- Longphase-phased germline VCF + + // + // MODULE: LONGPHASE_PHASE_SOMATIC (label: process_medium) + // Input: [meta, bam, bai, combined_vcf, svs, mods] -- tumor BAMs + somatic+germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased somatic (+ germline) VCF + // .snv_vcf_index -- [meta, tbi] + // + LONGPHASE_PHASE_SOMATIC ( + longphase_phase_somatic_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE_SOMATIC.out.snv_vcf + .join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index) + .set{ phased_somatic_vcf } + // phased_somatic_vcf: [meta, vcf, tbi] -- Longphase-phased somatic (+ germline) VCF + + // HAPLOTAGGING: tag each read in the BAM with its haplotype (HP tag) using the phased germline VCF + // All sample types (tumor, normal, tumor-only) are haplotagged using the germline phase blocks + // 'type' is re-added to meta here so downstream tools can distinguish tumor from normal in the output + + if(!params.skip_modcall) { + // Strip 'type' from modcall output meta to allow joining with other channels (which have no 'type') + LONGPHASE_MODCALL_GERMLINE.out.mod_vcf + .map { meta, mods -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, mods ] + } + .set{modcall_vcf_ch} + // modcall_vcf_ch: [meta (no type), mod_vcf] -- base modification VCF from germline modcall + + // Build haplotag input for tumor-only samples (re-add type:"tumor") + tumor_only_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] + + paired_tumor_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] + + paired_normal_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "normal"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], mod_vcf] + + } + else { + // Without modcall: empty lists for mods + tumor_only_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] + + paired_tumor_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] + + paired_normal_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "normal"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], []] + + } + + // Merge all sample types for haplotagging in a single LONGPHASE_HAPLOTAG call + tumor_only_ch + .mix(paired_tumor_ch) + .mix(paired_normal_ch) + .set {longphase_haplotag_input_ch} + // longphase_haplotag_input_ch: [meta(+type), bam, bai, phased_germline_vcf, [], mod_vcf_or_[]] + // -- all samples (tumor-only, paired tumor, paired normal) + + // + // MODULE: LONGPHASE_HAPLOTAG (label: process_medium) + // Input: [meta, bam, bai, phased_vcf, svs, mods] -- BAM + phased germline VCF (± mod VCF) + // fasta / fai + // Output: .bam -- [meta, bam] -- BAM with HP (haplotype) and PS (phase set) tags added to reads + // + LONGPHASE_HAPLOTAG ( + longphase_haplotag_input_ch, + fasta, + fai + ) + + LONGPHASE_HAPLOTAG.out.bam + .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (pre-index): [meta, bam] -- haplotagged BAM (no index yet) + + // + // MODULE: SAMTOOLS_INDEX (label: process_medium) + // Input: [meta, bam] -- haplotagged BAM + // Output: .bai -- [meta, bai] + // + SAMTOOLS_INDEX ( + tumor_normal_hapbams_ch + ) + tumor_normal_hapbams_ch + .join(SAMTOOLS_INDEX.out.bai) + .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (final): [meta, bam, bai] -- haplotagged BAM with index + + + emit: + tumor_normal_hapbams_ch // [meta, bam, bai] -- haplotagged BAMs for all samples + phased_germline_vcf // [meta, vcf, tbi] -- phased germline VCF (used by SEVERUS + VEP) + phased_somatic_vcf // [meta, vcf, tbi] -- phased somatic VCF (used by VEP) +} diff --git a/subworkflows/local/prepare_annotation.nf b/subworkflows/local/prepare_annotation.nf index 76b5fdf5..f6b98e78 100644 --- a/subworkflows/local/prepare_annotation.nf +++ b/subworkflows/local/prepare_annotation.nf @@ -3,26 +3,41 @@ include {ENSEMBLVEP_DOWNLOAD } from '../../modules/nf-core/ensemblvep/download/m workflow PREPARE_ANNOTATION { take: - vep_cache - vep_cache_version - vep_genome - vep_args - vep_species - download_vep_cache + vep_cache // path: local VEP cache directory (or S3 annotation-cache URL) + vep_cache_version // int: VEP cache version (e.g. 110) + vep_genome // str: genome assembly string (e.g. "GRCh38") + vep_args // str: extra VEP CLI arguments (parsed to detect --merged / --refseq) + vep_species // str: species name (e.g. "homo_sapiens") + download_vep_cache // bool: if true, download cache via ENSEMBLVEP_DOWNLOAD instead of using local path main: - ch_versions = Channel.empty() - ensemblvep_cache = Channel.empty() + ch_versions = channel.empty() + ensemblvep_cache = channel.empty() + + // + // MODULE: ENSEMBLVEP_DOWNLOAD (label: process_medium) + // Only runs when params.download_vep_cache == true + // Input: vep_download_info -- [[:], vep_genome, vep_species, vep_cache_version] + // Output: .cache -- downloaded and extracted VEP cache directory + // if (download_vep_cache) { - vep_download_info = Channel.of([[],vep_genome, vep_species, vep_cache_version]) - ENSEMBLVEP_DOWNLOAD(vep_download_info) + // Build input tuple: empty meta + genome/species/version for ENSEMBLVEP_DOWNLOAD + vep_download_info = channel.of([[],vep_genome, vep_species, vep_cache_version]) + // vep_download_info: [[:], genome_str, species_str, cache_version_int] + + ENSEMBLVEP_DOWNLOAD ( + vep_download_info + ) + ensemblvep_cache = ENSEMBLVEP_DOWNLOAD.out.cache ch_versions = ch_versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) } else { + // Validate that the local cache directory exists and resolve the correct subdirectory + // The annotation-cache S3 bucket uses a version-prefixed path; local paths do not def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" def vep_species_suffix = vep_args.contains("--merged") ? '_merged' : (vep_args.contains("--refseq") ? '_refseq' : '') def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" @@ -35,18 +50,13 @@ workflow PREPARE_ANNOTATION { } } - ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() + // Collect the resolved cache root as a channel value + ensemblvep_cache = channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() } - - - - // - // MODULE: ENSEMBLVEP_DOWNLOAD - // - + // ensemblvep_cache: path (or list-of-paths) to the VEP cache root directory emit: - vep_cache = ensemblvep_cache + vep_cache = ensemblvep_cache // path -- VEP cache directory (downloaded or validated local) versions = ch_versions } diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf index 5df2b891..efc867d9 100644 --- a/subworkflows/local/prepare_reference_files.nf +++ b/subworkflows/local/prepare_reference_files.nf @@ -13,122 +13,150 @@ include { WGET } from '../../modules/nf-core/wget/main workflow PREPARE_REFERENCE_FILES { take: - fasta - ascat_alleles - ascat_loci - ascat_loci_gc - ascat_loci_rt - basecall_meta - clair3_modelMap + fasta // str: path to reference FASTA (may be .gz) + ascat_alleles // str: path to ASCAT allele files (directory or .zip), or null + ascat_loci // str: path to ASCAT loci files (directory or .zip), or null + ascat_loci_gc // str: path to ASCAT GC correction file (.zip or direct), or null + ascat_loci_rt // str: path to ASCAT RT correction file (.zip or direct), or null + basecall_meta // [meta, basecall_model_str, kinetics_str] -- from METAEXTRACT per sample + clair3_modelMap // Map -- used to resolve download URLs main: - ch_versions = Channel.empty() - ch_prepared_fasta = Channel.empty() - allele_files = Channel.empty() - loci_files = Channel.empty() - gc_file = Channel.empty() - rt_file = Channel.empty() - - // Check if fasta and gtf are zipped + ch_versions = channel.empty() + ch_prepared_fasta = channel.empty() + allele_files = channel.empty() + loci_files = channel.empty() + gc_file = channel.empty() + rt_file = channel.empty() + + // Decompress FASTA if gzipped; pass through as-is if already uncompressed if (fasta.endsWith('.gz')){ + // + // MODULE: UNZIP_FASTA (PIGZ_UNCOMPRESS alias; label: process_medium) + // Input: [[:], fasta.gz] + // Output: .file -- [[:], fasta] -- decompressed FASTA + // UNZIP_FASTA( [ [:], fasta ]) ch_prepared_fasta = UNZIP_FASTA.out.file ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions) } else { - ch_prepared_fasta = [ [:], fasta ] + ch_prepared_fasta = channel.value([ [:], fasta ]) } + // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed FASTA - - basecall_meta.map { meta, basecall_model_meta, kinetics_meta -> - def id_new = basecall_model_meta ?: meta.clair3_model + // Build Clair3 model download URLs from basecall metadata + // Priority: explicit meta.clair3_model param > auto-detected from BAM header via modelMap + // PacBio models from HKU mirror; ONT models from Oxford Nanopore CDN + basecall_meta.map { meta, basecall_model_meta, _kinetics_meta -> + def id_new = basecall_model_meta ? clair3_modelMap.get(basecall_model_meta) : basecall_model_meta def meta_new = [id: id_new] def model = (!meta.clair3_model || meta.clair3_model.toString().trim() in ['', '[]']) ? clair3_modelMap.get(basecall_model_meta) : meta.clair3_model def download_prefix = ( basecall_model_meta == 'hifi_revio' ? "https://www.bio8.cs.hku.hk/clair3/clair3_models/" : "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3" ) def url = "${download_prefix}/${model}.tar.gz" return [ meta_new, url ] } - .unique() - .set{ model_urls } + .unique() // deduplicate: multiple samples with the same basecall model share one download + .set{ clair3_model_urls } + // clair3_model_urls: [meta(id=clair3_model_name), download_url_str] + // one item per unique Clair3 model needed across all samples // - // MODULE: Download model + // MODULE: WGET (label: process_single) + // Input: [meta, url_str] -- model name (id) + download URL + // Output: .outfile -- [meta, tarball] -- downloaded .tar.gz model archive // - - WGET ( model_urls ) + WGET ( clair3_model_urls ) ch_versions = ch_versions.mix(WGET.out.versions) // - // MODULE: Untar model + // MODULE: UNTAR (label: process_single) + // Input: WGET.out.outfile -- [meta, tarball] + // Output: .untar -- [meta, model_dir] -- extracted Clair3 model directory // - UNTAR ( WGET.out.outfile ) - ch_versions = ch_versions.mix(UNTAR.out.versions) - - UNTAR.out.untar.set { downloaded_model_files } + UNTAR.out.untar.set { downloaded_clair3_models } + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] // - // MODULE: Index the fasta + // MODULE: SAMTOOLS_FAIDX (label: process_single) + // Input: [[:], fasta, []] -- empty meta + empty regions file (index full FASTA) + // false -- do not write fai to stdout + // Output: .fai -- [[:], fai_path] // - SAMTOOLS_FAIDX ( - ch_prepared_fasta, - [ [:], [] ], + ch_prepared_fasta.map { meta, fa -> [meta, fa, []] }, false ) ch_prepared_fai = SAMTOOLS_FAIDX.out.fai - - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + // ch_prepared_fai: [[:], fai_path] -- empty meta // - // Prepare ASCAT files + // Prepare ASCAT reference files + // Each file set can be provided as a .zip archive or a plain directory/file path + // All ASCAT outputs are flat file collections (no meta tuple) for use with ASCAT module // - - // prepare ascat and controlfreec reference files if ( !params.skip_ascat ) { - if (!ascat_alleles) allele_files = Channel.empty() + // Allele files: per-chromosome SNP allele frequency files (used for LogR/BAF calculation) + if (!ascat_alleles) allele_files = channel.empty() else if (ascat_alleles.endsWith(".zip")) { - UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - allele_files = UNZIP_ALLELES.out.unzipped_archive.flatMap { it[1].listFiles() }.collect() + // MODULE: UNZIP_ALLELES (UNZIP alias; label: process_single) + // Input: [meta(id=basename), [zip_file]] -- collected zip + // Output: .unzipped_archive -- [meta, dir] -- extracted directory; flatMap lists individual files + UNZIP_ALLELES(channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) + allele_files = UNZIP_ALLELES.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // allele_files: [path, path, ...] -- all per-chromosome allele files collected ch_versions = ch_versions.mix(UNZIP_ALLELES.out.versions) - } else allele_files = Channel.fromPath(ascat_alleles).collect() + } else allele_files = channel.fromPath(ascat_alleles).collect() - if (!ascat_loci) loci_files = Channel.empty() + // Loci files: per-chromosome SNP loci positions + if (!ascat_loci) loci_files = channel.empty() else if (ascat_loci.endsWith(".zip")) { - UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - loci_files = UNZIP_LOCI.out.unzipped_archive.flatMap { it[1].listFiles() }.collect() + // MODULE: UNZIP_LOCI (UNZIP alias; label: process_single) + UNZIP_LOCI(channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) + loci_files = UNZIP_LOCI.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // loci_files: [path, path, ...] -- all per-chromosome loci files collected ch_versions = ch_versions.mix(UNZIP_LOCI.out.versions) - } else loci_files = Channel.fromPath(ascat_loci).collect() + } else loci_files = channel.fromPath(ascat_loci).collect() - if (!ascat_loci_gc) gc_file = Channel.value([]) + // GC correction file: genome-wide GC content per locus (optional) + if (!ascat_loci_gc) gc_file = channel.value([]) else if ( ascat_loci_gc.endsWith(".zip") ) { - UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - gc_file = UNZIP_GC.out.unzipped_archive.flatMap { it[1].listFiles() }.collect() + // MODULE: UNZIP_GC (UNZIP alias; label: process_single) + UNZIP_GC(channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) + gc_file = UNZIP_GC.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // gc_file: [path, ...] -- GC correction file(s) collected ch_versions = ch_versions.mix(UNZIP_GC.out.versions) - } else gc_file = Channel.fromPath(ascat_loci_gc).collect() + } else gc_file = channel.fromPath(ascat_loci_gc).collect() - if (!ascat_loci_rt) rt_file = Channel.value([]) + // Replication timing correction file: RT correction per locus (optional) + if (!ascat_loci_rt) rt_file = channel.value([]) else if (ascat_loci_rt.endsWith(".zip")) { - UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - rt_file = UNZIP_RT.out.unzipped_archive.flatMap { it[1].listFiles() }.collect() + // MODULE: UNZIP_RT (UNZIP alias; label: process_single) + UNZIP_RT(channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) + rt_file = UNZIP_RT.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // rt_file: [path, ...] -- RT correction file(s) collected ch_versions = ch_versions.mix(UNZIP_RT.out.versions) - } else rt_file = Channel.fromPath(ascat_loci_rt).collect() + } else rt_file = channel.fromPath(ascat_loci_rt).collect() } emit: - prepped_fasta = ch_prepared_fasta - prepped_fai = ch_prepared_fai - - allele_files - loci_files - gc_file - rt_file - downloaded_model_files + prepped_fasta = ch_prepared_fasta // [[:], fasta_path] -- uncompressed reference FASTA + prepped_fai = ch_prepared_fai // [[:], fai_path] -- samtools FAI index + + // ASCAT reference files -- flat file collections (no meta tuple wrapper) + // Each is a list of paths collected into a single channel value + allele_files // [path, ...] -- per-chromosome allele frequency files + loci_files // [path, ...] -- per-chromosome loci position files + gc_file // [path, ...] -- GC correction file ([] if not provided) + rt_file // [path, ...] -- replication timing correction file ([] if not provided) + + downloaded_clair3_models // [meta(id=clair3_model_name), model_dir] versions = ch_versions } diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf new file mode 100644 index 00000000..57acddfa --- /dev/null +++ b/subworkflows/local/small_variant_consensus.nf @@ -0,0 +1,260 @@ +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { BCFTOOLS_ANNOTATE } from '../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_ANNOTATE as STANDARDIZE_AF } from '../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' +include { BCFTOOLS_SORT as SORT_POST_NORM } from '../../modules/nf-core/bcftools/sort/main' + + + +workflow SMALL_VARIANT_CONSENSUS { + take: + mixed_vcfs // [meta(+caller field), vcf, tbi] -- one item per caller per sample + // meta.caller is one of: 'clair3', 'clairs-to', 'clairs', 'deepvariant', 'deepsomatic' + fasta // [[:], fasta] + _fai // [[:], fai] + prioritize_caller // str: which caller's calls take priority ('deepvariant'/'deepsomatic' or 'clair') + combine_method // str: 'consensus' (intersection only) or 'all' (intersection + private calls from priority caller) + + main: + + // + // MODULE: BCFTOOLS_NORM (label: process_medium) + // Left-align and normalise each per-caller VCF. bcftools norm does not require + // sorted input, and left-alignment can itself shift positions and create + // out-of-order records, so we sort AFTER normalisation rather than before. + // Input: [meta, vcf, tbi] -- per-caller VCF + // Output: .vcf -- [meta, vcf] -- left-aligned, normalised VCF (unsorted) + // + BCFTOOLS_NORM(mixed_vcfs, fasta) + + // + // MODULE: SORT_POST_NORM (BCFTOOLS_SORT alias, label: process_medium) + // Re-sort after normalisation to fix any coordinate disorder introduced by + // left-alignment, and write the tabix index inline. + // Input: [meta, vcf] + // Output: .vcf -- [meta, vcf.gz] + // .tbi -- [meta, tbi] + // + SORT_POST_NORM(BCFTOOLS_NORM.out.vcf) + + SORT_POST_NORM.out.vcf + .join(SORT_POST_NORM.out.tbi) + .set { normalized_vcfs } + // normalized_vcfs: [meta(+caller), vcf.gz, tbi] -- normalised, sorted per-caller VCF + + // + // MODULE: STANDARDIZE_AF (BCFTOOLS_ANNOTATE alias, label: process_low) + // Renames the allele frequency FORMAT field to match the priority caller's convention: + // FORMAT/AF -> FORMAT/VAF when prioritize_caller is 'deepvariant'/'deepsomatic' + // FORMAT/VAF -> FORMAT/AF when prioritize_caller is 'clair' + // This is a no-op for VCFs that already use the target field name. + // + if (combine_method == 'all') { + normalized_vcfs + .map { meta, vcf, tbi -> + def rename_to = prioritize_caller in ['deepvariant', 'deepsomatic'] ? 'VAF' : 'AF' + def new_meta = meta + [rename_to: rename_to] + return [new_meta, vcf, tbi, [], [], [], [], []] + } + .set { standardize_input } + + STANDARDIZE_AF(standardize_input) + + STANDARDIZE_AF.out.vcf + .join(STANDARDIZE_AF.out.tbi) + .map { meta, vcf, tbi -> + def clean_meta = meta.findAll { k, _v -> k != 'rename_to' } + return [clean_meta, vcf, tbi] + } + .set { normalized_vcfs } + // normalized_vcfs: [meta(+caller), vcf, tbi] -- normalised, AF-standardized per-caller VCF + } + // In 'consensus' mode, normalized_vcfs comes from SORT_POST_NORM (post-BCFTOOLS_NORM re-sorting) + + // + // MODULE: BCFTOOLS_QUERY (label: process_single) + // Extract variant positions to build a caller-annotation file used by BCFTOOLS_ANNOTATE + // Input: [meta, vcf, tbi] -- normalised VCF + // Output: .output -- [meta, tsv] -- tab-separated annotation file (CHROM POS CALLER) + // .index -- [meta, tbi] + // + BCFTOOLS_QUERY(normalized_vcfs, [], [], []) + + // Prepare BCFTOOLS_ANNOTATE input: VCF + caller-name annotation file + normalized_vcfs + .join(BCFTOOLS_QUERY.out.output) + .join(BCFTOOLS_QUERY.out.index) + .map{ meta, vcf, tbi, annotations, annotations_index -> + def columns = [] // no extra column specs + def header_lines = [] // no extra header lines + def rename_chrs = [] // no chromosome renaming + return [ meta, vcf, tbi, annotations, annotations_index, columns, header_lines, rename_chrs ] + } + .set{annotate_input} + // annotate_input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + + // + // MODULE: BCFTOOLS_ANNOTATE (label: process_medium) + // Adds CALLER INFO field to each VCF record using the query-generated annotation file + // Input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + // Output: .vcf -- [meta, vcf] -- VCF with CALLER annotation added + // .tbi -- [meta, tbi] + // + BCFTOOLS_ANNOTATE(annotate_input) + + BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + .set{annotated_vcfs} + // annotated_vcfs: [meta(+caller), vcf, tbi] -- VCF with CALLER INFO tag + + // Branch annotated VCFs by caller family for the intersection step + annotated_vcfs + .branch { meta, _vcfs, _tbi -> + deepvariant: meta.caller in [ 'deepvariant', 'deepsomatic' ] + clair: meta.caller in ['clair3','clairs-to','clairs'] + } + .set{annotated_vcfs_branched} + // annotated_vcfs_branched.deepvariant: [meta(caller=deepvariant/deepsomatic), vcf, tbi] + // annotated_vcfs_branched.clair: [meta(caller=clair3/clairs-to/clairs), vcf, tbi] + + clair_ch = annotated_vcfs_branched.clair + deepvariant_ch = annotated_vcfs_branched.deepvariant + + // Strip 'caller' field from meta before joining so both channels share the same key + clair_ch + .map {meta, vcfs, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] + } + .set{clair_ch} + // clair_ch: [meta (no caller), vcf, tbi] + + deepvariant_ch + .map {meta, vcfs, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta (no caller), vcf, tbi] + + // Join DeepVariant and Clair VCFs per sample into a single tuple for BCFTOOLS_ISEC + deepvariant_ch + .join(clair_ch) + .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + def vcfs = [deepvar_vcf, clair_vcf] + def tbis = [deepvar_tbi, clair_tbi] + return [ meta, vcfs, tbis] + } + .set{mixed_vcfs} + // mixed_vcfs (re-paired): [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + + // Add empty optional fields required by BCFTOOLS_ISEC + mixed_vcfs + .map{ meta, vcfs, tbis -> + def file = [] // no regions file + def target = [] // no target sites + def regions = [] // no region string + return [meta, vcfs, tbis, file, target, regions] + } + .set{isec_input} + // isec_input: [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi], [], [], []] + + // + // MODULE: BCFTOOLS_ISEC (label: process_medium) + // Computes the intersection and private sets for the two callers + // Input: [meta, [vcf1, vcf2], [tbi1, tbi2], [], [], []] + // Output (custom nf-core module outputs): + // .deepvar_consensus_vcf -- [meta, vcf] -- variants called by both callers (DeepVariant record) + // .clair_consensus_vcf -- [meta, vcf] -- variants called by both callers (Clair record) + // .deepvar_private_vcf -- [meta, vcf] -- variants unique to DeepVariant + // .clair_private_vcf -- [meta, vcf] -- variants unique to Clair + // (+ corresponding .tbi outputs for each) + // + BCFTOOLS_ISEC(isec_input) + + if (combine_method == 'consensus') { + // Take only the intersection: variants called by BOTH callers + // Use the record from the prioritized caller + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { + BCFTOOLS_ISEC.out.deepvar_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.deepvar_consensus_tbi + .set{tbi} + } + else if (prioritize_caller == 'clair') { + BCFTOOLS_ISEC.out.clair_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.clair_consensus_tbi + .set{tbi} + } + // vcf/tbi: [meta, vcf/tbi] -- consensus-only calls from the priority caller + } + + else if (combine_method == 'all') { + // Take the intersection PLUS the private calls from the prioritized caller + // (private calls from the non-priority caller are discarded) + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { + // consensus (DeepVariant record) + DeepVariant-private variants + BCFTOOLS_ISEC.out.deepvar_consensus_vcf + .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) + .join(BCFTOOLS_ISEC.out.clair_private_vcf) + .join(BCFTOOLS_ISEC.out.clair_private_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + // concat_input: [meta, [consensus_vcf, private_vcf], [consensus_tbi, private_tbi]] + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + } + else if (prioritize_caller == 'clair') { + // consensus (Clair record) + Clair-private variants + BCFTOOLS_ISEC.out.deepvar_private_vcf + .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) + .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) + .join(BCFTOOLS_ISEC.out.clair_consensus_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + // concat_input: [meta, [private_vcf, consensus_vcf], [private_tbi, consensus_tbi]] + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + } + // concat_out: [meta, vcf] -- unsorted concatenated VCF (consensus + priority-caller-private) + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf + .set{vcf} + BCFTOOLS_SORT.out.tbi + .set{tbi} + // vcf/tbi: [meta, vcf/tbi] -- sorted combined VCF + } + + emit: + vcf // [meta, vcf] -- final consensus/combined VCF + tbi // [meta, tbi] + +} diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf deleted file mode 100644 index 91c7014c..00000000 --- a/subworkflows/local/tumor_normal_happhase.nf +++ /dev/null @@ -1,314 +0,0 @@ -include { CLAIR3 } from '../../modules/local/clair3/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' -include { CLAIRS } from '../../modules/local/clairs/main.nf' -include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' -include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' - -workflow TUMOR_NORMAL_HAPPHASE { - take: - mixed_bams - fasta - fai - clair3_modelMap - clairs_modelMap - downloaded_model_files - - main: - - ch_versions = Channel.empty() - tumor_only_severus = Channel.empty() - somatic_vep = Channel.empty() - germline_vep = Channel.empty() - - // Branch input bams in normal and tumour - mixed_bams - .branch{ meta, bam, bai -> - normal: meta.type == "normal" - tumor: meta.type == "tumor" - } - .set{ mixed_bams } - - // Get normal bams and add platform/model info for Clair3 usage - // remove type from so that information can be merged easier later - - downloaded_model_files - .map{ meta, file -> - def basecall_model = meta.id - return [basecall_model, meta, file] - } - .set{downloaded_model_files} - - mixed_bams.normal - .map{ meta, bam, bai -> - def basecall_model = (!meta.clair3_model || meta.clair3_model.toString().trim() in ['', '[]']) ? meta.basecall_model : meta.clair3_model - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - basecall_model: meta.basecall_model, - clairS_model: meta.clairS_model] - return [ basecall_model, new_meta, bam, bai ] - } - .set { normal_bams_model } - - normal_bams_model - .combine(downloaded_model_files,by:0) - .map{ basecall_model, meta, bam, bai, meta2, model -> - def platform = (meta.platform == "pb") ? "hifi" : "ont" - return [meta, bam, bai, model, platform] - } - .set{ normal_bams } - - // normal_bams -> meta: [id, paired_data, platform, sex, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // clair3_model: clair3 model name - // platform: name of sequencing platform - - - // Get tumour bams - // remove type from so that information can be merged easier later - mixed_bams.tumor - .map{ meta, bam, bai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - basecall_model: meta.basecall_model, - clairS_model: meta.clairS_model] - return[new_meta, bam, bai] - } - .set{ tumor_bams } - - // tumor_bams -> meta: [id, paired_data, platform, sex, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - - // - // MODULE: CLAIR3 - // - // small germline variant calling - CLAIR3 ( - normal_bams, - fasta, - fai - ) - - ch_versions = ch_versions.mix(CLAIR3.out.versions) - - // Add germline vcf to normal bams - // remove clair3 model information - - normal_bams - .join(CLAIR3.out.vcf) - .map { meta, bam, bai, clair3_model, platform, vcf -> - def svs = [] - def mods = [] - return [meta, bam, bai, vcf, svs, mods] - } - .set{ normal_bams_germlinevcf } - - // normal_bams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // vcf: normal small germline variant vcf - // svs: structural variant vcf (empty) - // mods: modcall-generated VCF with modifications (empty) - - CLAIR3.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { germline_vep } - - // - // MODULE: LONGPHASE_PHASE - // - // Phase normals - - LONGPHASE_PHASE ( - normal_bams_germlinevcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - // Add phased vcf to normal bams - // Add type information back - // both are needed for mixing with the tumor bams - - normal_bams - .join(LONGPHASE_PHASE.out.vcf) - .map { meta, bam, bai, clair3_model, platform, vcf -> - def new_meta = meta + [type: "normal"] - def snvs = [] - def mods = [] - return[new_meta, bam, bai, vcf, snvs, mods] - } - .set{ normal_bams } - - // normal_bams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // vcf: normal small germline variant vcf - // svs: structural variant vcf (empty) - // mods: modcall-generated VCF with modifications (empty) - - - // Add phased vcf to tumour bams and type information - // mix with the normal bams - tumor_bams - .join(LONGPHASE_PHASE.out.vcf) - .map { meta, bam, bai, vcf -> - def new_meta = meta + [type: "tumor"] - def snvs = [] - def mods = [] - return [new_meta, bam, bai, vcf, snvs, mods] - } - .mix(normal_bams) - .set{ mixed_bams_vcf } - - // mixed_bams_vcf -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // vcf: normal small germline variant vcf - // svs: structural variant vcf (empty) - // mods: modcall-generated VCF with modifications (empty) - - // - // MODULE: LONGPHASE_HAPLOTAG - // - // haplotag tumor and normal bams with normal vcf files for both - LONGPHASE_HAPLOTAG ( - mixed_bams_vcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // Get final tagged bams - LONGPHASE_HAPLOTAG.out.bam - .set{ mixed_hapbams } - - // mixed_hapbams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bams: haplotagged aligned bams - - - // - // MODULE: SAMTOOLS_INDEX - // - // index the haplotaged bams - - SAMTOOLS_INDEX ( - mixed_hapbams - ) - - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - // Add index to channel - mixed_bams_vcf - .join(mixed_hapbams) - .join(SAMTOOLS_INDEX.out.bai) - .set{ mixed_hapbams } - - // mixed_hapbams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bams: haplotagged aligned bams - // bais: indexes for bam files - - // Group everything back together in one channel - mixed_hapbams - .map { meta, bam, bai, vcf, snvs, mods, hapbam, hapbai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - basecall_model: meta.basecall_model, - clairS_model: meta.clairS_model] - return[new_meta, [[type: meta.type], hapbam], [[type: meta.type], hapbai]] - } - .groupTuple(size: 2) - .map{ meta, bam, bai -> - def normal_bam = bam[0][0].type == "normal" ? bam[0][1] : bam[1][1] - def tumor_bam = bam[0][0].type == "tumor" ? bam[0][1] : bam[1][1] - def normal_bai = bai[0][0].type == "normal" ? bai[0][1] : bai[1][1] - def tumor_bai = bai[0][0].type == "tumor" ? bai[0][1] : bai[1][1] - // Return channel - return [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - } - .join(LONGPHASE_PHASE.out.vcf) - .join(LONGPHASE_PHASE.out.tbi) - .set{tumor_normal_severus} - - // tumor_normal_severus -> meta: [id, paired_data, platform, sex, fiber, basecall_model] - // tumor_bam: haplotagged aligned bam for tumor - // tumor_bai: indexes for tumor bam files - // normal_bam: haplotagged aligned bam files for normal - // normal_bai: indexes for normal bam files - // phased_vcf: phased small variant vcf for normal - - // Get ClairS input channel - tumor_normal_severus - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi -> - def model = (!meta.clairS_model || meta.clairS_model.toString().trim() in ['', '[]']) ? clairs_modelMap.get(meta.basecall_model.toString().trim()) : meta.clairS_model - return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai,model] - } - .set { clairs_input } - - // - // MODULE: CLAIRS - // - - CLAIRS ( - clairs_input, - fasta, - fai - ) - - CLAIRS.out.vcfs - .join(CLAIRS.out.tbi) - .set{clairs_out} - - // - // MODULE: BCFTOOLS_CONCAT - // - - BCFTOOLS_CONCAT( - clairs_out - ) - - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) - - // - // MODULE: BCFTOOLS_SORT - // - - BCFTOOLS_SORT( - BCFTOOLS_CONCAT.out.vcf - ) - - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - - BCFTOOLS_SORT.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { somatic_vep } - - ch_versions = ch_versions.mix(CLAIRS.out.versions) - - emit: - tumor_normal_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf new file mode 100644 index 00000000..0476122e --- /dev/null +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -0,0 +1,264 @@ +// IMPORT MODULES +include { CLAIRSTO } from '../../../modules/local/clairsto/main.nf' +include { VCFSPLIT } from '../../../modules/local/vcfsplit/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + + +workflow TUMORONLY_SMALLVAR { + + take: + tumor_bams // [meta, tumor_bam, tumor_bai] -- tumor-only aligned BAMs (no matched normal) + fasta // [[:], fasta] + fai // [[:], fai] + clairsto_pon_channel // [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // used by ClairS-TO to filter germline variants with population allele databases + ds_pon_channel // [ [pon_vcf_path, ...] ] or [ [] ] + // user-supplied DeepSomatic PON VCFs; empty list => container defaults + + main: + + somatic_vcf = channel.empty() + germline_vcf = channel.empty() + + // CLAIRS-TO: somatic AND germline variant calling from tumor-only BAM + // ClairS-TO uses a panel-of-normals / population allele database to separate somatic from germline + // Runs if either somatic or germline clair calling is requested (produces both jointly) + + if(params.somatic_var_keep.contains('clair') || params.germline_var_keep.contains('clair')) { + // Append model name and PoN info to build the full CLAIRSTO input + tumor_bams + .map { meta, bam, bai -> + return [ meta, bam, bai, meta.clairSTO_model] + } + .combine(clairsto_pon_channel) + .set{ clairsto_input_ch} + // clairsto_input_ch: [meta, bam, bai, clairSTO_model_str, [pon_vcf_paths], [pon_flags]] + + // + // MODULE: CLAIRSTO (label: process_high) + // Input: [meta, bam, bai, model_str, [pon_vcfs], [pon_flags]] + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- SNV calls (germline + somatic, unsplit) + // .indel_vcf -- [meta, vcf] -- indel calls (germline + somatic, unsplit) + // + CLAIRSTO ( + clairsto_input_ch, + fasta, + fai + ) + + // SPLIT CLAIRSTO GERMLINE AND SOMATIC VARIATION + // ClairS-TO outputs a combined VCF with FILTER tags indicating somatic/germline status; + // VCFSPLIT separates these into two VCFs + + CLAIRSTO.out.indel_vcf + .join(CLAIRSTO.out.snv_vcf) + .set{ clairsto_combined_vcf } + // clairsto_combined_vcf: [meta, indel_vcf, snv_vcf] + + // + // MODULE: VCFSPLIT (label: process_single) + // Input: [meta, indel_vcf, snv_vcf] -- combined ClairS-TO output + // Output: .germline_vcf -- [meta, vcf] -- germline variants only + // .germline_tbi -- [meta, tbi] + // .somatic_vcf -- [meta, vcf] -- somatic variants only + // .somatic_tbi -- [meta, tbi] + // + VCFSPLIT ( + clairsto_combined_vcf + ) + + VCFSPLIT.out.germline_vcf + .join(VCFSPLIT.out.germline_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_germline_ch} + // clairsto_germline_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- germline variants + + VCFSPLIT.out.somatic_vcf + .join(VCFSPLIT.out.somatic_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_somatic_ch} + // clairsto_somatic_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- somatic variants + } + + // DEEPVARIANT: germline-only variant calling (no somatic mode for tumor-only) + if(params.germline_var_keep.contains('deepvariant')) { + + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] = genome-wide (no interval list) + // fasta / fai / [[:],[]] x2 -- empty PAR/GFF + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + tumor_bams + .map { meta, bam, bai -> + def intervals = [] + return [meta,bam,bai, intervals] + } + .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) + ) + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] + } + + // COMBINE GERMLINE VARIANTS + // If both callers requested: run consensus; otherwise pass through single-caller output + if (params.germline_var_keep.size() > 1) { + clairsto_germline_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.prioritize_caller_germline, + params.germline_var_combine + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{germline_vcf} + // germline_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.germline_var_keep == ['clair']) { + clairsto_germline_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == ['deepvariant']) { + deepvariant_ch + .set{germline_vcf} + } + + // DEEPSOMATIC: somatic variant calling in tumor-only mode (no matched normal) + // Normal BAM/BAI are passed as empty lists; DeepSomatic uses the model's internal normal baseline + if(params.somatic_var_keep.contains('deepsomatic')) { + tumor_bams + .map { meta, tumor_bam, tumor_bai -> + def normal_bam = [] + def normal_bai = [] + return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] + } + .set{deepsomatic_input_ch} + // deepsomatic_input_ch: [meta, [], [], tumor_bam, tumor_bai] + // empty normal_bam/bai signals tumor-only mode to DEEPSOMATIC subworkflow + + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, [], [], tumor_bam, tumor_bai] -- tumor-only (no normal) + // [[:],[]] / fasta / fai / [[:],[]] + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + DEEPSOMATIC ( + deepsomatic_input_ch, + [[:],[]], // intervals (empty = genome-wide) + fasta, + fai, + [[:],[]], // GZI (empty if FASTA is uncompressed) + ds_pon_channel + ) + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] + } + + // COMBINE SOMATIC VARIATION + if (params.somatic_var_keep.size() > 1) { + clairsto_somatic_ch + .mix(deepsomatic_ch) + .set{combined_somatic_ch} + // combined_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + SOMATIC_CONSENSUS( + combined_somatic_ch, + fasta, + fai, + params.prioritize_caller_somatic, + params.somatic_var_combine + ) + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{somatic_vcf} + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.somatic_var_keep == ['clair']) { + clairsto_somatic_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == ['deepsomatic']) { + deepsomatic_ch + .set{somatic_vcf} + } + + // Strip 'caller' from meta before emitting both VCFs + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + + emit: + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS-TO, DeepSomatic, or consensus) + germline_vcf // [meta, vcf, tbi] -- final germline VCF (ClairS-TO germline, DeepVariant, or consensus) + + +} diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf deleted file mode 100644 index 220a8710..00000000 --- a/subworkflows/local/tumor_only_happhase.nf +++ /dev/null @@ -1,199 +0,0 @@ -include { CLAIRSTO } from '../../modules/local/clairsto/main.nf' -include { VCFSPLIT } from '../../modules/local/vcfsplit/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' - -workflow TUMOR_ONLY_HAPPHASE { - - take: - tumor_bams - fasta - fai - clairSTO_modelMap - dbsnp - colors - onekgenomes - gnomad - - main: - - ch_versions = Channel.empty() - tumor_only_severus = Channel.empty() - somatic_vep = Channel.empty() - germline_vep = Channel.empty() - - tumor_bams - .map{ meta, bam, bai -> - def clairSTO_model = (!meta.clairSTO_model || meta.clairSTO_model.toString().trim() in ['', '[]']) ? clairSTO_modelMap.get(meta.basecall_model.toString().trim()) : meta.clairSTO_model - return [meta, bam, bai, clairSTO_model] - } - .set{ tumor_bams } - - // - // MODULE: CLAIRSTO - // - // call somatic/non-somatic variants - // (* not called as germline * just non-somatic) - - CLAIRSTO ( - tumor_bams, - fasta, - fai, - dbsnp, - colors, - onekgenomes, - gnomad - ) - - ch_versions = ch_versions.mix(CLAIRSTO.out.versions) - - CLAIRSTO.out.indel_vcf - .join(CLAIRSTO.out.snv_vcf) - .set{ clairsto_vcf } - - ch_versions = ch_versions.mix(CLAIRSTO.out.versions) - // clairsto_vcf -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // indel_vcf: vcf for indels - // snv_vcf: vcf for snvs - - // - // MODULE: VCFSPLIT - // - // ClairSTO gives outputs in snv.vcf and indel.vcf - // reformats them to be in somatic.vcf and nonsomatic.vcf - - VCFSPLIT ( - clairsto_vcf - ) - ch_versions = ch_versions.mix(VCFSPLIT.out.versions) - - ch_versions = ch_versions.mix(VCFSPLIT.out.versions) - - // Add the nonsomatic vcf info - // remove model info - tumor_bams - .join(VCFSPLIT.out.germline_vcf) - .map{ meta, bam, bai, model, snps -> - def svs = [] - def mods = [] - return[meta, bam, bai, snps, svs, mods] - } - .set{ tumor_bams_germlinevcf } - - - VCFSPLIT.out.somatic_vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { somatic_vep } - - VCFSPLIT.out.germline_vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { germline_vep } - - // tumor_bams_germlinevcf -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // vcf: tumor small nonsomatic variant vcf - // svs: structural variant vcf (empty) - // mods: modcall-generated VCF with modifications (empty) - - // - // MODULES: LONGPHASE_PHASE - // - // Phase tumor bams on nonsomatic vcf - - LONGPHASE_PHASE ( - tumor_bams_germlinevcf, - fasta, - fai - ) - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - // Add phased nonsomatic vcf info - // remove model info - tumor_bams - .join(LONGPHASE_PHASE.out.vcf) - .map{ meta, bam, bai, model, snps -> - def svs = [] - def mods = [] - return [meta, bam, bai, snps, svs, mods] - } - .set{ tumor_bams_phasedvcf } - - // tumor_bams_germlinevcf -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bai: indexes for bam files - // vcf: phased tumor small nonsomatic variant vcf - // svs: structural variant vcf (empty) - // mods: modcall-generated VCF with modifications (empty) - - // - // MODULES: LONGPHASE_HAPLOTAG - // - // Haplotag the tumor bams - - LONGPHASE_HAPLOTAG ( - tumor_bams_phasedvcf, - fasta, - fai - ) - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // grab phased bams - LONGPHASE_HAPLOTAG.out.bam - .set{ haplotagged_bams } - - // haplotagged_bams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bams: list of concatenated aligned bams - - // - // MODULES: SAMTOOLS_INDEX - // - // index the haplotagged bams - SAMTOOLS_INDEX ( - haplotagged_bams - ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - // join information and the phased VCF file - haplotagged_bams - .join(SAMTOOLS_INDEX.out.bai) - .join(LONGPHASE_PHASE.out.vcf) - .join(LONGPHASE_PHASE.out.tbi) - .map{ meta, hap_bam, hap_bai, vcf, tbi -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - basecall_model: meta.basecall_model] - return [new_meta, hap_bam, hap_bai, [], [], vcf, tbi] - } - .set{ tumor_only_severus } - - // tumor_only_severus -> meta: [id, paired_data, platform, sex, fiber, basecall_model] - // hap_bam: haplotagged aligned bam for tumor - // hap_bai: indexes for tumor bam files - // normal_bam: haplotagged aligned bam files for normal (empty) - // normal_bai: indexes for normal bam files (empty) - // phased_vcf: phased small variant vcf - - emit: - tumor_only_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf index d7d151a1..34db6e1f 100644 --- a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf @@ -29,10 +29,10 @@ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + _monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet + _input // string: Path to input samplesheet help // boolean: Display help message and exit help_full // boolean: Show the full help message show_hidden // boolean: Show hidden parameters in the help message @@ -84,39 +84,87 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // + // Parse the input samplesheet CSV and build a per-sample BAM channel + // Each samplesheet row describes one tumor (+ optional normal) sample + // Columns: sample_id, bam_tumor, bam_normal, method, sex, fiber, + // clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { meta, bam_tumor, bam_normal, method, sex, fiber, clair3_model, clairSTO_model, clairS_model -> + // Step 1: build a combined meta map from the samplesheet columns + // paired_data = true if a normal BAM is present; false for tumor-only + .map { meta, bam_tumor, bam_normal, method, sex, fiber, clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate -> def real_clair3_model = (clair3_model == null ) ? null : clair3_model def real_clairS_model = (clairS_model == null ) ? null : clairS_model def real_clairSTO_model = (clairSTO_model == null ) ? null : clairSTO_model def paired_data = bam_normal ? true : false - def meta_info = meta + [ paired_data: paired_data, platform: method, sex: sex, fiber: fiber, clair3_model: real_clair3_model, clairS_model : real_clairS_model, clairSTO_model: real_clairSTO_model] + def meta_info = meta + [ paired_data: paired_data, + platform: method, // 'ont' or 'pb' + sex: sex, // 'XX', 'XY', or null (for ASCAT) + fiber: fiber, // 'y' or 'n' (fiber-seq data flag) + clair3_model: real_clair3_model, + clairS_model: real_clairS_model, + clairSTO_model: real_clairSTO_model, + tumor_replicate: tumor_replicate, + normal_replicate: normal_replicate] return [ meta_info, [ bam_tumor ], [ bam_normal ?: [] ] ] } + // Flatten BAM lists (handles multi-run entries where bam_tumor/bam_normal are lists) .map { meta, bam_tumor, bam_normal -> [ meta, bam_tumor.flatten(), bam_normal.flatten() ] } + // Step 2: split each row into separate tumor and normal items + // flatMap emits 1 item (tumor-only) or 2 items (tumor + normal) per samplesheet row + // Each item gets type='tumor' or type='normal' and the appropriate replicate ID .flatMap { meta, tumor_bam, normal_bam -> def meta_tumor = meta.clone() meta_tumor.type = 'tumor' + meta_tumor.replicate = meta_tumor.tumor_replicate + meta_tumor = meta_tumor.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'replicate') def result = [[meta_tumor, tumor_bam]] + // result so far: [[meta_tumor, [tumor_bam_path...]]] if (normal_bam) { def meta_normal = meta.clone() meta_normal.type = 'normal' + meta_normal.replicate = meta_normal.normal_replicate + meta_normal = meta_normal.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'replicate') result << [meta_normal, normal_bam] + // result now: [[meta_tumor, [tumor_bams]], [meta_normal, [normal_bams]]] } return result } .set { ch_samplesheet } - - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type] - // bam: unaligned bams + // ch_samplesheet: [meta, [bam...]] + // meta fields: id, paired_data, type ('tumor'|'normal'), platform ('ont'|'pb'), + // sex, fiber ('y'|'n'), clair3_model, clairS_model, clairSTO_model, replicate + // paired_data: true for both items in a T/N pair (same value for tumor AND normal rows) + // bam: list of paths (multiple runs for same sample remain as a list until SAMTOOLS_CAT) + // + // NOTE: tumor-only rows emit ONE item (type='tumor', paired_data=false) + // paired rows emit TWO items — tumor (paired_data=true) + normal (paired_data=true) + // Both share the same 'id' to allow downstream joins emit: - samplesheet = ch_samplesheet + samplesheet = ch_samplesheet // [meta, [bam...]] -- see channel structure above versions = ch_versions } diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 44d4c010..34e8fe10 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -12,21 +12,14 @@ workflow BAM_STATS_SAMTOOLS { ch_fasta // channel: [ val(meta), path(fasta) ] main: - ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) emit: stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test index 76e7a40a..2f329695 100644 --- a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -36,8 +36,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } @@ -66,8 +65,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } @@ -96,8 +94,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap index 8ca22526..9c8ff1b5 100644 --- a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -17,7 +17,7 @@ "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -29,18 +29,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -60,19 +55,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:35.660286921" + "timestamp": "2026-02-03T11:10:30.076183827" }, "test_bam_stats_samtools_single_end - stub": { "content": [ @@ -92,7 +82,7 @@ "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -104,18 +94,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -135,19 +120,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:24.220305512" + "timestamp": "2026-02-03T11:10:24.379362883" }, "test_bam_stats_samtools_paired_end_cram - stub": { "content": [ @@ -167,7 +147,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -179,18 +159,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -210,19 +185,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:54.206770141" + "timestamp": "2026-02-03T11:10:35.91658956" }, "test_bam_stats_samtools_single_end": { "content": [ @@ -250,20 +220,15 @@ "id": "test", "single_end": true }, - "test.stats:md5,291bb2393ec947140d12d42c2795b222" + "test.stats:md5,7a05a22bdb17e8df6e8c2d100ff09a31" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:07:49.731645858" + "timestamp": "2026-02-03T11:32:20.243663217" }, "test_bam_stats_samtools_paired_end": { "content": [ @@ -291,20 +256,15 @@ "id": "test", "single_end": true }, - "test.stats:md5,8140d69cdedd77570ca1d7618a744e16" + "test.stats:md5,a391612b5ef5b181e854ccaad8c8a068" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:01.421996172" + "timestamp": "2026-02-03T11:32:26.434187887" }, "test_bam_stats_samtools_paired_end_cram": { "content": [ @@ -332,19 +292,14 @@ "id": "test", "single_end": false }, - "test.stats:md5,1622856127bafd6cdbadee9cd64ec9b7" + "test.stats:md5,2b0e31ab01b867a6ff312023ae03838d" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:12.640915756" + "timestamp": "2026-02-03T11:32:32.441454186" } } \ No newline at end of file diff --git a/subworkflows/nf-core/deepvariant/README.md b/subworkflows/nf-core/deepvariant/README.md new file mode 100644 index 00000000..6f816c22 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/README.md @@ -0,0 +1,8 @@ +# DeepVariant subworkflow + +Usage: the input channel should contain tuples of three elements: `meta`, an alignment file in bam or +cram format, and a corresponding index. + +It is very important that the input channel's `meta` is unique for all the input elements, because the subworkflow does a join on `meta`. + +Please note the important configuration items listed in the `deepvariant` module's README file. It is required to use the configuration to specify the input "channels" (data types to extract from bam file) for `DEEPVARIANT_MAKEEXAMPLES`, and the model to run for `DEEPVARIANT_CALLVARIANTS`. The correct arguments for a specific model (data type) can be determined by manually using the `run_deepvariant` command from the Docker / Singularity image with the `--dry_run` option. diff --git a/subworkflows/nf-core/deepvariant/deepvariant.diff b/subworkflows/nf-core/deepvariant/deepvariant.diff new file mode 100644 index 00000000..cb98a20e --- /dev/null +++ b/subworkflows/nf-core/deepvariant/deepvariant.diff @@ -0,0 +1,49 @@ +Changes in component 'nf-core/deepvariant' +'subworkflows/nf-core/deepvariant/README.md' is unchanged +'subworkflows/nf-core/deepvariant/meta.yml' is unchanged +Changes in 'deepvariant/main.nf': +--- subworkflows/nf-core/deepvariant/main.nf ++++ subworkflows/nf-core/deepvariant/main.nf +@@ -6,7 +6,7 @@ + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] +- ch_fai // channel: [ val(meta3), path(fail) ] ++ ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + +@@ -16,14 +16,19 @@ + + DEEPVARIANT_CALLVARIANTS(DEEPVARIANT_MAKEEXAMPLES.out.examples) + +- // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant +- // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. ++ // Input to postprocessing step needs the variant calls from CALLVARIANTS, ++ // and optionally the gvcfs from MAKEEXAMPLES (only when params.generate_gvcf is true). + ch_intervals = ch_input.map { meta, _input, _index, intervals -> [ meta, intervals ] } + +- ch_postproc_input = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( +- DEEPVARIANT_MAKEEXAMPLES.out.gvcf, +- failOnMismatch: true +- ).join( ++ ch_call_and_gvcf = params.generate_gvcf ++ ? DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( ++ DEEPVARIANT_MAKEEXAMPLES.out.gvcf, failOnMismatch: true ++ ) ++ : DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.map { meta, tfrecord -> ++ [meta, tfrecord, []] ++ } ++ ++ ch_postproc_input = ch_call_and_gvcf.join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).join( + +'subworkflows/nf-core/deepvariant/tests/main.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/equality.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/disable-small-model.conf' is unchanged +'subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf' is unchanged +'subworkflows/nf-core/deepvariant/tests/main.nf.test.snap' is unchanged +'subworkflows/nf-core/deepvariant/tests/nextflow.config' is unchanged +************************************************************ diff --git a/subworkflows/nf-core/deepvariant/main.nf b/subworkflows/nf-core/deepvariant/main.nf new file mode 100644 index 00000000..d00ecc95 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/main.nf @@ -0,0 +1,51 @@ +include { DEEPVARIANT_MAKEEXAMPLES } from '../../../modules/nf-core/deepvariant/makeexamples/main' +include { DEEPVARIANT_CALLVARIANTS } from '../../../modules/nf-core/deepvariant/callvariants/main' +include { DEEPVARIANT_POSTPROCESSVARIANTS } from '../../../modules/nf-core/deepvariant/postprocessvariants/main' + +workflow DEEPVARIANT { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + DEEPVARIANT_CALLVARIANTS(DEEPVARIANT_MAKEEXAMPLES.out.examples) + + // Input to postprocessing step needs the variant calls from CALLVARIANTS, + // and optionally the gvcfs from MAKEEXAMPLES (only when params.generate_gvcf is true). + ch_intervals = ch_input.map { meta, _input, _index, intervals -> [ meta, intervals ] } + + ch_call_and_gvcf = params.generate_gvcf + ? DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, failOnMismatch: true + ) + : DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.map { meta, tfrecord -> + [meta, tfrecord, []] + } + + ch_postproc_input = ch_call_and_gvcf.join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).join( + ch_intervals, + failOnMismatch: true + ) + + DEEPVARIANT_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi + ) + + emit: + vcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf + vcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf_index + gvcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf + gvcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf_index +} diff --git a/subworkflows/nf-core/deepvariant/meta.yml b/subworkflows/nf-core/deepvariant/meta.yml new file mode 100644 index 00000000..bd459a62 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: deepvariant +description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +components: + - deepvariant/makeexamples + - deepvariant/callvariants + - deepvariant/postprocessvariants +input: + - ch_input: + type: list + description: | + Input aligned reads in bam or cram format, with index, and optional intervals BED file + Structure: [ val(meta), path(bam_or_cram), path(bai_or_crai), path(intervals_bed) ] + - ch_fasta: + type: file + description: | + Reference genome + Structure: [ val(meta2), path(fasta) ] + - ch_fai: + type: string + description: | + Reference genome index in fai format + Structure: [ val(meta3), path(fai) ] + - ch_gzi: + type: string + description: | + Reference genome index in gzi format (either gzi or fai should be used) + Structure: [ val(meta4), val(gzi) ] + - ch_par_bed: + type: string + description: | + bed file of pseudoautosomal regions (optional) + Structure: [ val(meta5), val(par_bed) ] + pattern: "*.bed" +output: + - vcf: + type: file + description: | + Variant calls + Structure: [ val(meta), path(vcf) ] + pattern: "*.vcf.gz" + - vcf_tbi: + type: file + description: | + Index for variant call file + Structure: [ val(meta), path(vcf_tbi) ] + pattern: "*.tbi" + - gvcf: + type: file + description: | + Variant call file with genomic coverage information + Structure: [ val(meta), path(gvcf) ] + pattern: "*.g.vcf.gz" + - gvcf_tbi: + type: file + description: | + Index for the GVCF. + Structure: [ val(meta), path(gvcf_tbi) ] + pattern: "*.tbi" + - versions: + type: file + description: | + File containing software versions + Structure: path(versions.yml) + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" diff --git a/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf new file mode 100644 index 00000000..83a16d55 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf @@ -0,0 +1,22 @@ +include { DEEPVARIANT_RUNDEEPVARIANT } from '../../../../modules/nf-core/deepvariant/rundeepvariant/main' +include { DEEPVARIANT } from '../main' + +workflow DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fail) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + DEEPVARIANT_RUNDEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + emit: + wf_vcf = DEEPVARIANT.out.vcf + pc_vcf = DEEPVARIANT_RUNDEEPVARIANT.out.vcf + wf_gvcf = DEEPVARIANT.out.gvcf + pc_gvcf = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf +} diff --git a/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf new file mode 100644 index 00000000..eb1b53bc --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf @@ -0,0 +1,8 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/equality.nf.test b/subworkflows/nf-core/deepvariant/tests/equality.nf.test new file mode 100644 index 00000000..c4a2276e --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/equality.nf.test @@ -0,0 +1,63 @@ + +nextflow_workflow { + + name "Compare subworkflow DEEPVARIANT to the process DEEPVARIANT_RUNDEEPVARIANT" + script "./deepvariant-workflow-and-process-equality-tester.nf" + config "./nextflow.config" + workflow "DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant/rundeepvariant" + + test("ensure that the subworkflow and DEEPVARIANT_RUNDEEPVARIANT have the same output") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.wf_vcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_vcf[0][1]).vcf.variantsMD5 }, + { assert path(workflow.out.wf_gvcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_gvcf[0][1]).vcf.variantsMD5 }, + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test b/subworkflows/nf-core/deepvariant/tests/main.nf.test new file mode 100644 index 00000000..d2451980 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test @@ -0,0 +1,152 @@ +nextflow_workflow { + + name "Test Subworkflow DEEPVARIANT" + script "../main.nf" + config "./nextflow.config" + workflow "DEEPVARIANT" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + + test("homo_sapiens - two inputs - bam - fasta - fai") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - different samples and regions - cram - fasta - fai") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - disable small model - cram - fasta - fai") { + + config "./disable-small-model.conf" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap new file mode 100644 index 00000000..2d14299a --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap @@ -0,0 +1,419 @@ +{ + "homo_sapiens - disable small model - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:09:09.621357638" + }, + "homo_sapiens - different samples and regions - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:05:25.205895104" + }, + "homo_sapiens - two inputs - bam - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:01:12.430387646" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/deepvariant/tests/nextflow.config b/subworkflows/nf-core/deepvariant/tests/nextflow.config new file mode 100644 index 00000000..ad76d4ec --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + + // This configures RUNDEEPVARIANT, which is used as a reference for the correct test output + withName: DEEPVARIANT_RUNDEEPVARIANT { + ext.args = '--model_type=WGS ' + ext.prefix = { "${meta.id}_out" } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index ee4738c8..acb39724 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -71,4 +71,3 @@ workflow UTILS_NFSCHEMA_PLUGIN { emit: dummy_emit = true } - diff --git a/tests/.nftignore b/tests/.nftignore index 9651e78a..a1de7635 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -17,4 +17,13 @@ pipeline_info/*.{html,json,txt,yml} */variants/severus/read_ids.csv */variants/severus/severus.log */variants/severus/{all_SVs,somatic_SVs}/*.{vcf.gz,vcf.gz.tbi} -*/qc/{tumor,normal}/{cramino_ubam,cramino_aln}/*_cramino.txt +*/variants/phased/*.{vcf.gz,vcf.gz.tbi} +*/qc/{tumor,normal}/cramino_ubam_*/*_cramino.txt +*/qc/{tumor,normal}/cramino_aln/*_cramino.txt +*/qc/{tumor,normal}/nanoplot_ubam_*/*.html +*/qc/{tumor,normal}/nanoplot_aln/*.html +*/qc/{tumor,normal}/nanoplot_ubam_*/*.txt +*/qc/{tumor,normal}/nanoplot_aln/*.txt +*/qc/{tumor,normal}/mosdepth/*.txt +*/variants/deepsomatic/*.{vcf.gz,vcf.gz.tbi} +*/variants/deepvariant/*.{vcf.gz,vcf.gz.tbi} diff --git a/tests/default.nf.test b/tests/default.nf.test index d4daad19..a7a4281c 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -19,6 +19,43 @@ nextflow_pipeline { def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') assertAll( { assert workflow.success}, + { //files exist + assert file("$launchDir/output/sample1/variants/clair3/merge_output.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clair3/merge_output.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/clairs/indel.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clairs/indel.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/clairs/snvs.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clairs/snvs.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clair3/merge_output.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clair3/merge_output.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/clairs/indel.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clairs/indel.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/clairs/snvs.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clairs/snvs.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_normal.bam").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_tumor.bam").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_normal.bam.bai").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_tumor.bam.bai").exists() + assert file("$launchDir/output/sample3/variants/clairsto/indel.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/snv.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/somatic.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/germline.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz.tbi").exists() + + }, { assert snapshot( // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions removeNextflowVersion("$outputDir/pipeline_info/lrsomatic_software_mqc_versions.yml"), diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index e8071e7c..fe5bf271 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -2,9 +2,21 @@ "-profile test": { "content": [ { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.22 + }, "BCFTOOLS_CONCAT": { "bcftools": 1.22 }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_NORM": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY": { + "bcftools": 1.22 + }, "BCFTOOLS_SORT": { "bcftools": 1.22 }, @@ -12,61 +24,102 @@ "clair3": "1.2.0" }, "CLAIRS": { - "clairs": "0.4.1" + "clairs": "0.4.4" }, "CLAIRSTO": { - "clairsto": "0.4.0" + "clairsto": "0.4.2" }, "CRAMINO_POST": { - "cramino": "1.0.0" + "cramino": "1.3.0" }, "CRAMINO_PRE": { - "cramino": "1.0.0" + "cramino": "1.3.0" + }, + "DEEPSOMATIC_CALLVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_MAKEEXAMPLES": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_POSTPROCESSVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPVARIANT_CALLVARIANTS": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_MAKEEXAMPLES": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_POSTPROCESSVARIANTS": { + "deepvariant": "1.9.0" }, "GERMLINE_VEP": { - "ensemblvep": 114.2, + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, "tabix": 1.21 }, "LONGPHASE_HAPLOTAG": { - "longphase": "1.7.3" + "longphase": "2.0.1" + }, + "LONGPHASE_MODCALL_GERMLINE": { + "longphase": "2.0.1" }, - "LONGPHASE_PHASE": { - "longphase": "1.7.3" + "LONGPHASE_MODCALL_SOMATIC": { + "longphase": "2.0.1" + }, + "LONGPHASE_PHASE_GERMLINE": { + "longphase": "2.0.1" + }, + "LONGPHASE_PHASE_SOMATIC": { + "longphase": "2.0.1" }, "METAEXTRACT": { "samtools": 1.21 }, "MINIMAP2_ALIGN": { - "minimap2": "2.29-r1283", - "samtools": 1.21 + "minimap2": "2.29-r1283" }, "MOSDEPTH": { - "mosdepth": "0.3.10" + "mosdepth": "0.3.11" + }, + "NANOPLOT_POST": { + "nanoplot": "1.46.1" + }, + "NANOPLOT_PRE": { + "nanoplot": "1.46.1" }, "SAMTOOLS_FAIDX": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SAMTOOLS_FLAGSTAT": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SAMTOOLS_IDXSTATS": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SAMTOOLS_INDEX": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SAMTOOLS_STATS": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SEVERUS": { "severus": 1.6 }, "SOMATIC_VEP": { - "ensemblvep": 114.2, + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, "tabix": 1.21 }, + "SORT_POST_NORM": { + "bcftools": 1.22 + }, + "STANDARDIZE_AF": { + "bcftools": 1.22 + }, "SV_VEP": { - "ensemblvep": 114.2, + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, "tabix": 1.21 }, "UNTAR": { @@ -76,13 +129,16 @@ "pigz": 2.8 }, "VCFSPLIT": { - "vcfsplit": 1.2 + "bcftools": 1.2 }, "WGET": { "wget": "1.21.4" }, + "WHATSHAP_STATS": { + "whatshap": 2.8 + }, "Workflow": { - "IntGenomicsLab/lrsomatic": "v1.0.0" + "IntGenomicsLab/lrsomatic": "v1.1.0dev" } }, [ @@ -99,21 +155,73 @@ "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_flagstat.txt", + "multiqc/multiqc_data/multiqc_samtools_idxstats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/multiqc_whatshap_phased_bp_plot.txt", + "multiqc/multiqc_data/multiqc_whatshap_stats.txt", + "multiqc/multiqc_data/samtools-flagstat-pct-table.txt", + "multiqc/multiqc_data/samtools-flagstat-table.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Normalised_Counts.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Raw_Counts.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/whatshap-stats-table.txt", "multiqc/multiqc_plots", "multiqc/multiqc_plots/pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/multiqc_whatshap_phased_bp_plot.pdf", + "multiqc/multiqc_plots/pdf/samtools-flagstat-pct-table.pdf", + "multiqc/multiqc_plots/pdf/samtools-flagstat-table.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/whatshap-stats-table.pdf", "multiqc/multiqc_plots/png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/multiqc_whatshap_phased_bp_plot.png", + "multiqc/multiqc_plots/png/samtools-flagstat-pct-table.png", + "multiqc/multiqc_plots/png/samtools-flagstat-table.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/whatshap-stats-table.png", "multiqc/multiqc_plots/svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/multiqc_whatshap_phased_bp_plot.svg", + "multiqc/multiqc_plots/svg/samtools-flagstat-pct-table.svg", + "multiqc/multiqc_plots/svg/samtools-flagstat-table.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/whatshap-stats-table.svg", "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/lrsomatic_software_mqc_versions.yml", @@ -126,28 +234,70 @@ "sample1/qc", "sample1/qc/normal", "sample1/qc/normal/cramino_aln", - "sample1/qc/normal/cramino_aln/sample1_cramino.txt", - "sample1/qc/normal/cramino_ubam", - "sample1/qc/normal/cramino_ubam/sample1_cramino.txt", + "sample1/qc/normal/cramino_aln/sample1_normal_cramino.txt", + "sample1/qc/normal/cramino_ubam_rep1", + "sample1/qc/normal/cramino_ubam_rep1/sample1_normal_cramino.txt", "sample1/qc/normal/mosdepth", "sample1/qc/normal/mosdepth/sample1.mosdepth.global.dist.txt", "sample1/qc/normal/mosdepth/sample1.mosdepth.summary.txt", + "sample1/qc/normal/nanoplot_aln", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoPlot-report.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoStats.txt", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Yield_By_Length.html", + "sample1/qc/normal/nanoplot_ubam_rep1", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_NanoPlot-report.html", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_NanoStats.txt", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_Non_weightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_WeightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_rep1/sample1_normal_ubam_Yield_By_Length.html", "sample1/qc/normal/samtools", "sample1/qc/normal/samtools/sample1.flagstat", "sample1/qc/normal/samtools/sample1.idxstats", "sample1/qc/normal/samtools/sample1.stats", "sample1/qc/tumor", "sample1/qc/tumor/cramino_aln", - "sample1/qc/tumor/cramino_aln/sample1_cramino.txt", - "sample1/qc/tumor/cramino_ubam", - "sample1/qc/tumor/cramino_ubam/sample1_cramino.txt", + "sample1/qc/tumor/cramino_aln/sample1_tumor_cramino.txt", + "sample1/qc/tumor/cramino_ubam_rep1", + "sample1/qc/tumor/cramino_ubam_rep1/sample1_tumor_cramino.txt", "sample1/qc/tumor/mosdepth", "sample1/qc/tumor/mosdepth/sample1.mosdepth.global.dist.txt", "sample1/qc/tumor/mosdepth/sample1.mosdepth.summary.txt", + "sample1/qc/tumor/nanoplot_aln", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoPlot-report.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoStats.txt", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Yield_By_Length.html", + "sample1/qc/tumor/nanoplot_ubam_rep1", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_NanoPlot-report.html", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_NanoStats.txt", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_WeightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_rep1/sample1_tumor_ubam_Yield_By_Length.html", "sample1/qc/tumor/samtools", "sample1/qc/tumor/samtools/sample1.flagstat", "sample1/qc/tumor/samtools/sample1.idxstats", "sample1/qc/tumor/samtools/sample1.stats", + "sample1/qc/whatshap_stats", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.gtf", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.log", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.tsv", "sample1/variants", "sample1/variants/clair3", "sample1/variants/clair3/merge_output.vcf.gz", @@ -157,12 +307,26 @@ "sample1/variants/clairs/indel.vcf.gz.tbi", "sample1/variants/clairs/snvs.vcf.gz", "sample1/variants/clairs/snvs.vcf.gz.tbi", + "sample1/variants/deepsomatic", + "sample1/variants/deepsomatic/sample1.vcf.gz", + "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", + "sample1/variants/deepvariant", + "sample1/variants/deepvariant/sample1.vcf.gz", + "sample1/variants/deepvariant/sample1.vcf.gz.tbi", + "sample1/variants/phased", + "sample1/variants/phased/germline_smallvariants.vcf.gz", + "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants.vcf.gz", + "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample1/variants/severus", "sample1/variants/severus/all_SVs", "sample1/variants/severus/all_SVs/breakpoint_clusters.tsv", "sample1/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/all_SVs/severus_all.vcf.gz", - "sample1/variants/severus/all_SVs/severus_all.vcf.gz.tbi", "sample1/variants/severus/breakpoints_double.csv", "sample1/variants/severus/read_ids.csv", "sample1/variants/severus/read_qual.txt", @@ -171,7 +335,6 @@ "sample1/variants/severus/somatic_SVs/breakpoint_clusters.tsv", "sample1/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz.tbi", "sample1/vep", "sample1/vep/SVs", "sample1/vep/SVs/sample1_SV_VEP.vcf.gz", @@ -194,28 +357,70 @@ "sample2/qc", "sample2/qc/normal", "sample2/qc/normal/cramino_aln", - "sample2/qc/normal/cramino_aln/sample2_cramino.txt", - "sample2/qc/normal/cramino_ubam", - "sample2/qc/normal/cramino_ubam/sample2_cramino.txt", + "sample2/qc/normal/cramino_aln/sample2_normal_cramino.txt", + "sample2/qc/normal/cramino_ubam_rep1", + "sample2/qc/normal/cramino_ubam_rep1/sample2_normal_cramino.txt", "sample2/qc/normal/mosdepth", "sample2/qc/normal/mosdepth/sample2.mosdepth.global.dist.txt", "sample2/qc/normal/mosdepth/sample2.mosdepth.summary.txt", + "sample2/qc/normal/nanoplot_aln", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoPlot-report.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoStats.txt", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Yield_By_Length.html", + "sample2/qc/normal/nanoplot_ubam_rep1", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_NanoPlot-report.html", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_NanoStats.txt", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_Non_weightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_WeightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_rep1/sample2_normal_ubam_Yield_By_Length.html", "sample2/qc/normal/samtools", "sample2/qc/normal/samtools/sample2.flagstat", "sample2/qc/normal/samtools/sample2.idxstats", "sample2/qc/normal/samtools/sample2.stats", "sample2/qc/tumor", "sample2/qc/tumor/cramino_aln", - "sample2/qc/tumor/cramino_aln/sample2_cramino.txt", - "sample2/qc/tumor/cramino_ubam", - "sample2/qc/tumor/cramino_ubam/sample2_cramino.txt", + "sample2/qc/tumor/cramino_aln/sample2_tumor_cramino.txt", + "sample2/qc/tumor/cramino_ubam_rep1", + "sample2/qc/tumor/cramino_ubam_rep1/sample2_tumor_cramino.txt", "sample2/qc/tumor/mosdepth", "sample2/qc/tumor/mosdepth/sample2.mosdepth.global.dist.txt", "sample2/qc/tumor/mosdepth/sample2.mosdepth.summary.txt", + "sample2/qc/tumor/nanoplot_aln", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoPlot-report.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoStats.txt", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Yield_By_Length.html", + "sample2/qc/tumor/nanoplot_ubam_rep1", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_NanoPlot-report.html", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_NanoStats.txt", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_WeightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_rep1/sample2_tumor_ubam_Yield_By_Length.html", "sample2/qc/tumor/samtools", "sample2/qc/tumor/samtools/sample2.flagstat", "sample2/qc/tumor/samtools/sample2.idxstats", "sample2/qc/tumor/samtools/sample2.stats", + "sample2/qc/whatshap_stats", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.gtf", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.log", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.tsv", "sample2/variants", "sample2/variants/clair3", "sample2/variants/clair3/merge_output.vcf.gz", @@ -225,12 +430,26 @@ "sample2/variants/clairs/indel.vcf.gz.tbi", "sample2/variants/clairs/snvs.vcf.gz", "sample2/variants/clairs/snvs.vcf.gz.tbi", + "sample2/variants/deepsomatic", + "sample2/variants/deepsomatic/sample2.vcf.gz", + "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", + "sample2/variants/deepvariant", + "sample2/variants/deepvariant/sample2.vcf.gz", + "sample2/variants/deepvariant/sample2.vcf.gz.tbi", + "sample2/variants/phased", + "sample2/variants/phased/germline_smallvariants.vcf.gz", + "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants.vcf.gz", + "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample2/variants/severus", "sample2/variants/severus/all_SVs", "sample2/variants/severus/all_SVs/breakpoint_clusters.tsv", "sample2/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/all_SVs/severus_all.vcf.gz", - "sample2/variants/severus/all_SVs/severus_all.vcf.gz.tbi", "sample2/variants/severus/breakpoints_double.csv", "sample2/variants/severus/read_ids.csv", "sample2/variants/severus/read_qual.txt", @@ -239,7 +458,6 @@ "sample2/variants/severus/somatic_SVs/breakpoint_clusters.tsv", "sample2/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz.tbi", "sample2/vep", "sample2/vep/SVs", "sample2/vep/SVs/sample2_SV_VEP.vcf.gz", @@ -260,16 +478,39 @@ "sample3/qc", "sample3/qc/tumor", "sample3/qc/tumor/cramino_aln", - "sample3/qc/tumor/cramino_aln/sample3_cramino.txt", - "sample3/qc/tumor/cramino_ubam", - "sample3/qc/tumor/cramino_ubam/sample3_cramino.txt", + "sample3/qc/tumor/cramino_aln/sample3_tumor_cramino.txt", + "sample3/qc/tumor/cramino_ubam_rep1", + "sample3/qc/tumor/cramino_ubam_rep1/sample3_tumor_cramino.txt", "sample3/qc/tumor/mosdepth", "sample3/qc/tumor/mosdepth/sample3.mosdepth.global.dist.txt", "sample3/qc/tumor/mosdepth/sample3.mosdepth.summary.txt", + "sample3/qc/tumor/nanoplot_aln", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoPlot-report.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoStats.txt", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Yield_By_Length.html", + "sample3/qc/tumor/nanoplot_ubam_rep1", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_NanoPlot-report.html", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_NanoStats.txt", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_WeightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_rep1/sample3_tumor_ubam_Yield_By_Length.html", "sample3/qc/tumor/samtools", "sample3/qc/tumor/samtools/sample3.flagstat", "sample3/qc/tumor/samtools/sample3.idxstats", "sample3/qc/tumor/samtools/sample3.stats", + "sample3/qc/whatshap_stats", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.gtf", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.log", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.tsv", "sample3/variants", "sample3/variants/clairsto", "sample3/variants/clairsto/germline.vcf.gz", @@ -280,12 +521,26 @@ "sample3/variants/clairsto/snv.vcf.gz.tbi", "sample3/variants/clairsto/somatic.vcf.gz", "sample3/variants/clairsto/somatic.vcf.gz.tbi", + "sample3/variants/deepsomatic", + "sample3/variants/deepsomatic/sample3.vcf.gz", + "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", + "sample3/variants/deepvariant", + "sample3/variants/deepvariant/sample3.vcf.gz", + "sample3/variants/deepvariant/sample3.vcf.gz.tbi", + "sample3/variants/phased", + "sample3/variants/phased/germline_smallvariants.vcf.gz", + "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants.vcf.gz", + "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample3/variants/severus", "sample3/variants/severus/all_SVs", "sample3/variants/severus/all_SVs/breakpoint_clusters.tsv", "sample3/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/all_SVs/severus_all.vcf.gz", - "sample3/variants/severus/all_SVs/severus_all.vcf.gz.tbi", "sample3/variants/severus/breakpoints_double.csv", "sample3/variants/severus/read_ids.csv", "sample3/variants/severus/read_qual.txt", @@ -294,7 +549,6 @@ "sample3/variants/severus/somatic_SVs/breakpoint_clusters.tsv", "sample3/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz.tbi", "sample3/vep", "sample3/vep/SVs", "sample3/vep/SVs/sample3_SV_VEP.vcf.gz", @@ -310,65 +564,64 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,7373f28eae1e18614bec4508e6647ff2", - "sample1_normal.bam.bai:md5,fc9dd46a2a04c098cee87650edec0f89", - "sample1_tumor.bam:md5,4aa78492fa890945efe2af47cbe76194", - "sample1_tumor.bam.bai:md5,75ed8b553427bbbd9bbc3c7b52982e85", - "sample1.mosdepth.global.dist.txt:md5,4e1c72f8465c18ffd854c42850eb7c5f", - "sample1.mosdepth.summary.txt:md5,cf13d4b24e5ebf31b629a1195a1fff41", - "sample1.flagstat:md5,815a5385bd57ef44847714130b80d630", - "sample1.idxstats:md5,19be02d7e966e4a291b66ab5b14742d3", - "sample1.stats:md5,f61e05f232d4b3174797d4b25bdd9457", - "sample1.mosdepth.global.dist.txt:md5,e04da37ef2b7cd587fa3158b9f36d2cb", - "sample1.mosdepth.summary.txt:md5,ef3aefa72ca2e9bbbe5acc91fc1ecde6", - "sample1.flagstat:md5,34e851b5504d961632f26991160ded5a", - "sample1.idxstats:md5,1d43b03114bcc9b70d4333e91498efbe", - "sample1.stats:md5,afdcefd9c5a69d5252fe0f9186d349fd", + "sample1_normal.bam:md5,3ce847c38eb619781e32a10c28e0c35c", + "sample1_normal.bam.bai:md5,8dd8c7fa037badc7097067d5a88672cd", + "sample1_tumor.bam:md5,ed5eb35b63d5e92fa8e461b9a1732b21", + "sample1_tumor.bam.bai:md5,21018d3f1f85be74fd7dc66873219b05", + "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", + "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", + "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", + "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", + "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", + "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", + "sample1_whatshap_stats.gtf:md5,428ca0e0f48dc2e3e1b978fa7cf720f3", + "sample1_whatshap_stats.log:md5,5c1f0f79a60a6879b75271fa94b620e8", + "sample1_whatshap_stats.tsv:md5,98582c7e0ff74a2a1978bf70ac9926ee", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,244b62ee8500ddccf63bf1a8eb19d7bf", - "read_qual.txt:md5,1ad9d1900f8dcb291c97adc65c9d341c", + "breakpoints_double.csv:md5,27b409c73dd0d8bde316545f86ac7f15", + "read_qual.txt:md5,fbe6cd0b65cbfc1ca699e252e531ab72", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample2_normal.bam:md5,c96d49ed1176c787752e7fcf23bd5ffb", - "sample2_normal.bam.bai:md5,87d97a08bdca08f6eaf2725dd114d3c3", - "sample2_tumor.bam:md5,4f375cb01fb7a7c8161474f2f789d8a4", - "sample2_tumor.bam.bai:md5,54967d76febdeb0abed1bd68d8aee337", - "sample2.mosdepth.global.dist.txt:md5,6cdc97a81a603db702cb5a113b8bc62a", - "sample2.mosdepth.summary.txt:md5,864370930ec1d695d942f4960bcf8fc6", - "sample2.flagstat:md5,cce0bb7ca79e14d8369ccc714adf4be3", - "sample2.idxstats:md5,e7de97b2362a8e944896dc4eca0b0bd8", - "sample2.stats:md5,9d98e3ec064b376880648a79c199b9b5", - "sample2.mosdepth.global.dist.txt:md5,eda3bf93b39e342e85e43931ce8b417e", - "sample2.mosdepth.summary.txt:md5,a68ca9504f5c9b73bf697d8ac22a1df0", - "sample2.flagstat:md5,83e7d7d922941691d2b023f0bd9655aa", - "sample2.idxstats:md5,fe8a5d1263481ea7902d575b4d95f655", - "sample2.stats:md5,2904de743414042e112d541ffc0f83ba", + "sample2_normal.bam:md5,2ecddb6279310240f2dc29b0cf7f8c84", + "sample2_normal.bam.bai:md5,9caebccee8882bcf95e65631c0ac6730", + "sample2_tumor.bam:md5,c8db7cc4b189dec3fadd2cce07d9fcae", + "sample2_tumor.bam.bai:md5,28827d78aa318e26f71b68581ed5c607", + "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", + "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", + "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", + "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", + "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", + "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", + "sample2_whatshap_stats.gtf:md5,a13f0ac1edd7abde4ad013bf2619fe0f", + "sample2_whatshap_stats.log:md5,1d5ed1faca328d3014e9b14a44d18a23", + "sample2_whatshap_stats.tsv:md5,a275209ef9e7885ee5ea3a4aa1c970fd", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,6a1d4530feae8258a925d1f0641a63ff", - "read_qual.txt:md5,27edf87814aec6fa18546c8606aae4ed", + "breakpoints_double.csv:md5,7d2fe02046bd2ff7138b46d8f67fc755", + "read_qual.txt:md5,fe3f87458d7c0c6591c37e1fd70cecf2", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample3_tumor.bam:md5,c80bbdcb0a6fcfe4164c6d96e6d9ad6a", - "sample3_tumor.bam.bai:md5,cdf45221635a6b03be6f28aa60b202bc", - "sample3.mosdepth.global.dist.txt:md5,e04da37ef2b7cd587fa3158b9f36d2cb", - "sample3.mosdepth.summary.txt:md5,ef3aefa72ca2e9bbbe5acc91fc1ecde6", - "sample3.flagstat:md5,34e851b5504d961632f26991160ded5a", - "sample3.idxstats:md5,1d43b03114bcc9b70d4333e91498efbe", - "sample3.stats:md5,48eb55e610bd2a6d13c5d38d3c61d29e", + "sample3_tumor.bam:md5,f5dfadf92345cd529de4c1919f82b632", + "sample3_tumor.bam.bai:md5,f8ca34c0584329c62cb70ce8fb942cb6", + "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", + "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", + "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", + "sample3_whatshap_stats.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample3_whatshap_stats.log:md5,ca067293878d1760638626a8c5a31432", + "sample3_whatshap_stats.tsv:md5,62beceb9731cafc620ce5c6eb07a9cc9", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,298a01c868eb493baaaa90ced9a9f17e", - "read_qual.txt:md5,1b4392f3b9071533e9ea77ff9df6c813", + "breakpoints_double.csv:md5,a9a0e0a75975904952788c2a0bd3fa85", + "read_qual.txt:md5,25efaa43bb81a4592bfb8f5f08f84b34", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2025-11-28T14:26:44.508445086" + "timestamp": "2026-04-27T21:19:25.811843046" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 2b84c973..72cb930a 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -13,33 +13,38 @@ include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_lrso // // IMPORT MODULES // -include { SAMTOOLS_CAT } from '../modules/nf-core/samtools/cat/main' -include { MINIMAP2_INDEX } from '../modules/nf-core/minimap2/index/main' -include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main' -include { CRAMINO as CRAMINO_PRE } from '../modules/local/cramino/main' -include { CRAMINO as CRAMINO_POST } from '../modules/local/cramino/main' -include { MOSDEPTH } from '../modules/nf-core/mosdepth/main' -include { ASCAT } from '../modules/nf-core/ascat/main' -include { SEVERUS } from '../modules/nf-core/severus/main.nf' -include { METAEXTRACT } from '../modules/local/metaextract/main' -include { WAKHAN } from '../modules/local/wakhan/main' -include { FIBERTOOLSRS_PREDICTM6A } from '../modules/local/fibertoolsrs/predictm6a' -include { FIBERTOOLSRS_FIRE } from '../modules/local/fibertoolsrs/fire' -include { FIBERTOOLSRS_NUCLEOSOMES } from '../modules/local/fibertoolsrs/nucleosomes' -include { FIBERTOOLSRS_QC } from '../modules/local/fibertoolsrs/qc' -include { ENSEMBLVEP_VEP as SOMATIC_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' -include { ENSEMBLVEP_VEP as GERMLINE_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' -include { ENSEMBLVEP_VEP as SV_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' +include { SAMTOOLS_CAT } from '../modules/nf-core/samtools/cat/main' +include { MINIMAP2_INDEX } from '../modules/nf-core/minimap2/index/main' +include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main' +include { CRAMINO as CRAMINO_PRE } from '../modules/local/cramino/main' +include { CRAMINO as CRAMINO_POST } from '../modules/local/cramino/main' +include { NANOPLOT as NANOPLOT_PRE } from '../modules/nf-core/nanoplot/main' +include { NANOPLOT as NANOPLOT_POST } from '../modules/nf-core/nanoplot/main' +include { MOSDEPTH } from '../modules/nf-core/mosdepth/main' +include { ASCAT } from '../modules/nf-core/ascat/main' +include { SEVERUS } from '../modules/nf-core/severus/main.nf' +include { METAEXTRACT } from '../modules/local/metaextract/main' +include { WAKHAN } from '../modules/local/wakhan/main' +include { FIBERTOOLSRS_PREDICTM6A } from '../modules/local/fibertoolsrs/predictm6a' +include { FIBERTOOLSRS_FIRE } from '../modules/local/fibertoolsrs/fire' +include { FIBERTOOLSRS_NUCLEOSOMES } from '../modules/local/fibertoolsrs/nucleosomes' +include { FIBERTOOLSRS_QC } from '../modules/local/fibertoolsrs/qc' +include { ENSEMBLVEP_VEP as SOMATIC_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' +include { ENSEMBLVEP_VEP as GERMLINE_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' +include { ENSEMBLVEP_VEP as SV_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' +include { WHATSHAP_STATS } from '../modules/nf-core/whatshap/stats/main' +include { MODKIT_PILEUP } from '../modules/nf-core/modkit/pileup/main' + // // IMPORT SUBWORKFLOWS // -include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' -include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' -include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' -include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_happhase' -include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' - - +include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' +include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' +include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' +include { TUMORONLY_SMALLVAR } from '../subworkflows/local/tumor_only/tumoronly_smallvar' +include { PAIRED_SMALLVAR_SOMATIC } from '../subworkflows/local/paired/paired_smallvar_somatic' +include { PAIRED_SMALLVAR_GERMLINE } from '../subworkflows/local/paired/paired_smallvar_germline' +include { PHASING_HAPLOTYPING } from '../subworkflows/local/phasing_haplotyping' @@ -65,7 +70,7 @@ workflow LRSOMATIC { 'dna_r10.4.1_e8.2_400bps_sup@v4.2.0': 'r1041_e82_400bps_sup_v420', 'dna_r10.4.1_e8.2_400bps_sup@v4.1.0': 'r1041_e82_400bps_sup_v410', 'dna_r10.4.1_e8.2_260bps_sup@v4.0.0': 'r1041_e82_260bps_sup_v400', - 'hifi_revio' : 'hifi_revio' + 'hifi_revio' : 'hifi' ] def clairs_modelMap = [ @@ -75,7 +80,7 @@ workflow LRSOMATIC { 'dna_r10.4.1_e8.2_400bps_sup@v4.3.0': 'ont_r10_dorado_sup_5khz_ssrs', 'dna_r10.4.1_e8.2_400bps_sup@v5.0.0': 'ont_r10_dorado_sup_5khz_ssrs', 'dna_r10.4.1_e8.2_400bps_sup@v5.2.0': 'ont_r10_dorado_sup_5khz_ssrs', - 'hifi_revio' : 'hifi_revio_ss' + 'hifi_revio' : 'hifi_revio_ssrs' ] @@ -84,109 +89,235 @@ workflow LRSOMATIC { params.genome_name = getGenomeAttribute('genome_name') params.ascat_allele_files = getGenomeAttribute('ascat_alleles') params.ascat_loci_files = getGenomeAttribute('ascat_loci') + params.ascat_gc_file = getGenomeAttribute('ascat_loci_gc') + params.ascat_rt_file = getGenomeAttribute('ascat_loci_rt') params.centromere_bed = getGenomeAttribute('centromere_bed') params.pon_file = getGenomeAttribute('pon_file') params.bed_file = getGenomeAttribute('bed_file') params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') - params.dbsnp = getGenomeAttribute('dbsnp') - params.colors = getGenomeAttribute('colors') - params.onekgenomes = getGenomeAttribute('onekgenomes') - params.gnomad = getGenomeAttribute('gnomad') - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + if (params.clairsto_pon_vcfs != null) { + pon_files = params.clairsto_pon_vcfs.split(',').collect { f -> file(f.trim()) } + if (params.clairsto_pon_flags != null) { + pon_flags = params.clairsto_pon_flags.split(',').collect { f -> f.trim() } + } else if (params.genome == 'GRCh38') { + pon_flags = ["True", "True", "False", "False"] + } else if (params.genome == 'CHM13') { + pon_flags = ["True", "True", "False", "False", "False"] + } else { + pon_flags = pon_files.collect { "False" } + } + } + else if (params.genome == 'GRCh38') { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + ] + pon_flags = [ + "True", + "True", + "False", + "False" + ] + } + else if (params.genome == 'CHM13') { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + getGenomeAttribute('asap') + ] + pon_flags = [ + "True", + "True", + "False", + "False", + "False" + ] + } + if (pon_files.size() != pon_flags.size()) { + error "PoN VCFs and allele flags must have same length" + } + channel + .of( tuple(pon_files, pon_flags) ) + .set { clairsto_pon_channel } + // clairsto_pon_channel: [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // -- single tuple of parallel lists; each flag indicates whether the corresponding VCF + // is a population allele database (True) vs. a panel-of-normals artefact file (False) + + // DeepSomatic PON channel: user-supplied VCF paths, or empty list (process falls back to container defaults) + ds_pon_files = params.deepsomatic_pon_vcfs != null + ? params.deepsomatic_pon_vcfs.split(',').collect { f -> file(f.trim()) } + : params.genome == 'CHM13' + ? [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + getGenomeAttribute('asap') + ] + : [] + // DeepSomatic requires no chromosome overlap across population VCFs. + // When multiple databases are provided (e.g., CHM13 gnomad + 1kgenomes + colors + dbsnp + asap), + // the merge is done inline inside DEEPSOMATIC_MAKEEXAMPLES and DEEPSOMATIC_POSTPROCESSVARIANTS + // so that both callers can start in parallel as soon as BAMs are ready. + channel.value( [[:], ds_pon_files] ).set { ds_pon_channel } + // ds_pon_channel: [[:], [vcf_path, ...]] or [[:], []] + // -- raw unmerged PON VCF paths (no .tbi required); merging happens inline in each DeepSomatic process + // -- GRCh38/other + no user PON: empty list => process uses container-bundled GRCh38 defaults (tumor-only) + + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() // // MODULE: METAEXTRACT // // extracts the base calling model from the bam files + // MODULE: METAEXTRACT (label: process_single) + // Input: [meta, [bam...]] METAEXTRACT( ch_samplesheet ) - ch_versions = ch_versions.mix(METAEXTRACT.out.versions) basecall_meta = METAEXTRACT.out.meta_ext - // Adds the base calling model to meta.basecall_model + // basecall_meta: [meta, basecall_model_str, kinetics_str] + // basecall_model_str -- e.g. "dna_r10.4.1_e8.2_400bps_sup@v5.0.0" or "hifi_revio" + // kinetics_str -- "true" if PacBio kinetics tags present, else "false" ch_samplesheet .join(basecall_meta) .map { meta, bam, basecall_model_meta, kinetics_meta -> - def meta_new = meta + [ basecall_model: basecall_model_meta, kinetics: kinetics_meta] + def chosen_clair3_model = meta.clair3_model ?: clair3_modelMap.get(basecall_model_meta) + def chosen_clairSTO_model = meta.clairSTO_model ?: clairs_modelMap.get(basecall_model_meta) + def chosen_clairS_model = meta.clairS_model ?: clairs_modelMap.get(basecall_model_meta) + def meta_new =[ id: meta.id, + paired_data: meta.paired_data, + type: meta.type, + platform: meta.platform, + sex: meta.sex, + fiber: meta.fiber, + replicate: meta.replicate, + clair3_model: chosen_clair3_model, + clairS_model: chosen_clairS_model, + clairSTO_model: chosen_clairSTO_model, + kinetics: kinetics_meta] return[ meta_new, bam ] } - .groupTuple() - .map { meta, bam -> - [ meta, bam.flatten()] - } .set{ch_samplesheet} + // ch_samplesheet (updated): [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, replicate, + // clair3_model, clairS_model, clairSTO_model, kinetics + // bams are grouped per sample (multiple runs merged into a list) + // + // SUBWORKFLOW: PREPARE_REFERENCE_FILES + // Decompresses the reference FASTA if needed, indexes it, downloads Clair3 models, + // and decompresses ASCAT reference files + // Input: params.fasta, ASCAT file paths, basecall_meta, clair3_modelMap + // Output: .prepped_fasta -- [[:], fasta] + // .prepped_fai -- [[:], fai] + // .downloaded_clair3_models-- [meta(id=model_name), model_dir] + // .allele_files / .loci_files / .gc_file / .rt_file -- flat file collections + // + PREPARE_REFERENCE_FILES ( + params.fasta, + params.ascat_allele_files, + params.ascat_loci_files, + params.ascat_gc_file, + params.ascat_rt_file, + basecall_meta, + clair3_modelMap + ) - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of unaligned bams + downloaded_clair3_models = PREPARE_REFERENCE_FILES.out.downloaded_clair3_models + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] - ch_split = ch_samplesheet - .branch { meta, bam -> - single: bam.size() == 1 - multiple: bam.size() > 1 - } + ch_nanoplot_pre_txt = channel.empty() - // - // MODULE: SAMTOOLS_CAT - // - // concatenates bam files from single sample + if (!params.skip_qc && !params.skip_cramino) { - SAMTOOLS_CAT ( ch_split.multiple ) - .bam - .mix ( ch_split.single ) - .set { ch_cat_ubams } + // + // MODULE: CRAMINO_PRE (label: process_medium) + // Input: [meta, [bam...]] -- pre-alignment unaligned BAMs + // Output: cramino_pre.out.arrow -- [meta, arrow_file] (feather format stats) + // + CRAMINO_PRE( ch_samplesheet ) - // ch_cat_ubams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated unaligned bams + if (!params.skip_nanoplot) { - ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions) + // + // MODULE: NANOPLOT_PRE (label: process_medium) + // Input: CRAMINO_PRE.out.arrow -- [meta, arrow_file] + // Output: nanoplot HTML/txt reports + // - // - // MODULE: CRAMINO - // - // QC the unaligned bams - if (!params.skip_qc && !params.skip_cramino) { + NANOPLOT_PRE(CRAMINO_PRE.out.arrow) - CRAMINO_PRE ( ch_cat_ubams ) + } - ch_versions = ch_versions.mix(CRAMINO_PRE.out.versions) } + // Drop 'replicate' from meta before concatenation -- replicate info not needed downstream + // groupTuple merges per-replicate entries that share the same sample ID into one item + // (e.g. two B2194541 rows with replicate=1 and replicate=2 become one entry with [bam1, bam2]) + ch_samplesheet + .map{ meta, bam -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam] + } + .groupTuple() + .map { meta, bam -> + [ meta, bam.flatten() ] + } + .set{ch_samplesheet_no_rep} + // ch_samplesheet_no_rep: [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, + // clair3_model, clairS_model, clairSTO_model, kinetics + // (replicate field removed; replicates for same sample merged into single BAM list) + + // Branch on number of input BAMs: samples with a single BAM skip concatenation + + ch_split = ch_samplesheet_no_rep + .branch { _meta, bam -> + single: bam.size() == 1 + multiple: bam.size() > 1 + } + // ch_split.single: [meta, [bam]] -- pass-through, no concatenation needed + // ch_split.multiple: [meta, [bam...]] -- need SAMTOOLS_CAT to merge // - // SUBWORKFLOW: PREPARE_REFERENCE_FILES + // MODULE: SAMTOOLS_CAT (label: process_single) + // Input: [meta, [bam...]] -- multiple unaligned BAMs for same sample + // Output: .bam -- [meta, bam] -- single merged unaligned BAM // - PREPARE_REFERENCE_FILES ( - params.fasta, - params.ascat_allele_files, - params.ascat_loci_files, - params.ascat_gc_files, - params.ascat_rt_files, - basecall_meta, - clair3_modelMap - ) + SAMTOOLS_CAT ( ch_split.multiple ) + .bam + .mix ( ch_split.single ) + .set { ch_cat_ubams } + // ch_cat_ubams: [meta, bam] -- single (possibly concatenated) unaligned BAM per sample - vep_cache = Channel.empty() + vep_cache = channel.empty() if (!params.skip_vep) { - Channel - .of([ - vep_cache: params.vep_cache, - vep_cache_version: params.vep_cache_version, - vep_genome: params.vep_genome, - vep_args: params.vep_args, - vep_species: params.vep_species, - download_vep_cache: params.download_vep_cache - ]) - + // SUBWORKFLOW: PREPARE_ANNOTATION + // Validates or downloads the VEP cache directory + // Output: .vep_cache -- path to VEP cache root directory PREPARE_ANNOTATION ( params.vep_cache, params.vep_cache_version, @@ -196,21 +327,21 @@ workflow LRSOMATIC { params.download_vep_cache ) ch_versions = ch_versions.mix(PREPARE_ANNOTATION.out.versions) - vep_cache = PREPARE_ANNOTATION.out.vep_cache + // Wrap VEP cache path in a tuple with empty meta for use in ENSEMBLVEP_VEP + vep_cache = PREPARE_ANNOTATION.out.vep_cache.map {cache -> [[:], cache] } + // vep_cache: [[:], cache_dir_path] -- empty meta + VEP cache directory } ch_versions = ch_versions.mix(PREPARE_REFERENCE_FILES.out.versions) - ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta - ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai + ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta // [[:], fasta] + ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai // [[:], fai] - downloaded_model_files = PREPARE_REFERENCE_FILES.out.downloaded_model_files - - // ASCAT files - allele_files = PREPARE_REFERENCE_FILES.out.allele_files - loci_files = PREPARE_REFERENCE_FILES.out.loci_files - gc_file = PREPARE_REFERENCE_FILES.out.gc_file - rt_file = PREPARE_REFERENCE_FILES.out.rt_file + // ASCAT reference files -- flat path collections (no meta wrapper), passed directly to ASCAT module + allele_files = PREPARE_REFERENCE_FILES.out.allele_files // [path, ...] -- per-chromosome allele files + loci_files = PREPARE_REFERENCE_FILES.out.loci_files // [path, ...] -- per-chromosome loci files + gc_file = PREPARE_REFERENCE_FILES.out.gc_file // [path, ...] -- GC correction ([] if skipped) + rt_file = PREPARE_REFERENCE_FILES.out.rt_file // [path, ...] -- RT correction ([] if skipped) // // MODULE: FIBERTOOLSRS_PREDICTM6A @@ -218,105 +349,140 @@ workflow LRSOMATIC { // predict m6a in unaligned bam if (!params.skip_fiber) { - if(!params.normal_fiber){ + // Fiber-seq processing: predict m6A methylation, call nucleosomes and FIRE elements + // Only applicable to PacBio samples with fiber-seq data (meta.fiber == "y") + if (!params.skip_normalfiber){ + // Process all samples (including normals) for fiber-seq + ubams = ch_cat_ubams + } + else { + // Skip fiber-seq processing for normal samples; set aside normals to re-join later ch_cat_ubams - .branch { meta, bams -> + .branch { meta, _bams -> normal: meta.type == "normal" tumor: meta.type == "tumor" } .set { ch_cat_ubams_normal_branching } + // ch_cat_ubams_normal_branching.normal: [meta, bam] -- normal samples (held out) + // ch_cat_ubams_normal_branching.tumor: [meta, bam] -- tumor samples only normal_bams = ch_cat_ubams_normal_branching.normal ubams = ch_cat_ubams_normal_branching.tumor } - else { - ubams = ch_cat_ubams - } + // Branch by sequencing platform: PacBio needs m6A prediction, ONT does not ubams - .branch{ meta, bams -> + .branch{ meta, _bams -> pacBio: meta.platform == "pb" ont: meta.platform == "ont" } .set{ch_cat_ubams_pacbio_ont_branching} + // ch_cat_ubams_pacbio_ont_branching.pacBio: [meta, bam] -- PacBio samples + // ch_cat_ubams_pacbio_ont_branching.ont: [meta, bam] -- ONT samples (skip m6A) pacbio_bams = ch_cat_ubams_pacbio_ont_branching.pacBio + // Branch PacBio samples: only those with kinetics tags can have m6A predicted pacbio_bams - .branch{meta, bams -> + .branch{meta, _bams -> kinetics: meta.kinetics == "true" noKinetics: meta.kinetics == "false" } .set{pacbio_bams} + // pacbio_bams.kinetics: [meta, bam] -- PacBio with kinetics (mm/ml tags); m6A predictable + // pacbio_bams.noKinetics: [meta, bam] -- PacBio without kinetics; skip PREDICTM6A - FIBERTOOLSRS_PREDICTM6A ( - pacbio_bams.kinetics - ) - pacbio_bams.noKinetics - .mix(FIBERTOOLSRS_PREDICTM6A.out.bam) - .set{predicted_bams} - - ch_versions = ch_versions.mix(FIBERTOOLSRS_PREDICTM6A.out.versions) + if (!params.skip_m6a) { + // + // MODULE: FIBERTOOLSRS_PREDICTM6A (label: process_high) + // Input: [meta, bam] -- PacBio BAM with kinetics tags + // Output: .bam -- [meta, bam] -- BAM with m6A (MM/ML) tags added + // + FIBERTOOLSRS_PREDICTM6A ( + pacbio_bams.kinetics + ) + // Merge PacBio with and without kinetics: both now have (or skip) m6A tags + pacbio_bams.noKinetics + .mix(FIBERTOOLSRS_PREDICTM6A.out.bam) + .set{predicted_bams} + } + else { + pacbio_bams.noKinetics + .mix(pacbio_bams.kinetics) + .set{predicted_bams} + } + // predicted_bams: [meta, bam] -- all PacBio samples (m6A tags present where applicable) + // Re-merge ONT and PacBio before fiber-seq branching ch_cat_ubams_pacbio_ont_branching.ont .mix(predicted_bams) .set{fiber_branch} + // fiber_branch (pre-split): [meta, bam] -- all samples (ONT + PacBio, with m6A if applicable) + // Branch on fiber-seq flag: only fiber-seq samples get nucleosome/FIRE calling fiber_branch - .branch{ meta, bams -> + .branch{ meta, _bams -> fiber: meta.fiber == "y" nonFiber: meta.fiber == "n" } .set{fiber_branch} + // fiber_branch.fiber: [meta, bam] -- fiber-seq samples → nucleosome + FIRE calling + // fiber_branch.nonFiber: [meta, bam] -- non-fiber samples → passed through unchanged // - // MODULE: FIBERTOOLSRS_NUCLEOSOMES + // MODULE: FIBERTOOLSRS_NUCLEOSOMES (label: process_high) + // Input: [meta, bam] -- fiber-seq BAM (with m6A tags for PacBio) + // Output: .bam -- [meta, bam] -- BAM with nucleosome footprint tags added // FIBERTOOLSRS_NUCLEOSOMES ( fiber_branch.fiber ) - ch_versions = ch_versions.mix(FIBERTOOLSRS_NUCLEOSOMES.out.versions) - // - // MODULE: FIBERTOOLSRS_FIRE + // MODULE: FIBERTOOLSRS_FIRE (label: process_high) + // Input: FIBERTOOLSRS_NUCLEOSOMES.out.bam -- [meta, bam] -- BAM with nucleosome tags + // Output: .bam -- [meta, bam] -- BAM with FIRE (Fiber-seq Inferred Regulatory Elements) tags // FIBERTOOLSRS_FIRE ( FIBERTOOLSRS_NUCLEOSOMES.out.bam ) - ch_versions = ch_versions.mix(FIBERTOOLSRS_FIRE.out.versions) - - if(!params.normal_fiber){ + if (!params.skip_normalfiber){ + // Re-merge fiber and non-fiber samples after FIRE annotation fiber_branch.nonFiber - .mix(normal_bams) .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } else { + // Re-merge fiber, non-fiber, and held-out normal samples fiber_branch.nonFiber + .mix(normal_bams) .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } + // ch_cat_ubams (updated): [meta, bam] -- all samples; fiber-seq samples now carry + // nucleosome + FIRE tags in BAM; m6A tags present for PacBio fiber-seq if(!params.skip_qc) { // - // MODULE: FIBERTOOLSRS_QC + // MODULE: FIBERTOOLSRS_QC (label: process_medium) + // Input: FIBERTOOLSRS_FIRE.out.bam -- [meta, bam] -- annotated fiber-seq BAM + // Output: QC reports for fiber-seq signal (written to outdir) // + FIBERTOOLSRS_QC ( FIBERTOOLSRS_FIRE.out.bam ) - - ch_versions = ch_versions.mix(FIBERTOOLSRS_QC.out.versions) } - } // - // MODULE: MINIMAP2_ALIGN + // MODULE: MINIMAP2_ALIGN (label: process_high) + // Input: [meta, bam] -- unaligned BAM (may carry m6A/nucleosome/FIRE tags for fiber-seq) + // ch_fasta -- [[:], fasta] + // sort_bam=true, cigar_paf_format='bai', cigar_bam='', split_prefix='' + // Output: .bam -- [meta, bam] -- coordinate-sorted aligned BAM + // .index -- [meta, bai] -- BAM index // - // Aligns ubams MINIMAP2_ALIGN ( ch_cat_ubams, @@ -328,74 +494,198 @@ workflow LRSOMATIC { ) MINIMAP2_ALIGN.out.bam .set { ch_minimap_bam } + // ch_minimap_bam: [meta, bam] -- coordinate-sorted aligned BAM + // Join BAM with its index, then branch into paired-sample vs. tumor-only paths + ch_minimap_bam + .join(MINIMAP2_ALIGN.out.index) + .set {ch_index_minimap} + // ch_index_minimap: [meta, bam, bai] -- aligned BAM + index, all samples - // ch_minimap_bams -> meta: [id, paired_data, platform, sex, type, fiber,basecall_model] - // bam: list of concatenated aligned bams - - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) - + // + // MODULE: MODKIT_PILEUP + // - // ch_minimap_bams into tumor and paired to phase the paired ones on normal - // and add index + if (!params.skip_modkit) { + MODKIT_PILEUP(ch_index_minimap, ch_fasta, ch_fai, [[:],[]]) + } - ch_minimap_bam - .join(MINIMAP2_ALIGN.out.index) - .branch { meta, bams, bais -> - paired: meta.paired_data - tumor_only: !meta.paired_data + ch_index_minimap + .branch { meta, _bams, _bais -> + paired: meta.paired_data // meta.paired_data is the normal sample ID for tumors, or the tumor ID for normals + tumor_only: !meta.paired_data // meta.paired_data is null/false for tumor-only samples } .set { branched_minimap } + // branched_minimap.paired: [meta, bam, bai] -- tumor AND normal samples flow together here; + // each item is a single sample, joined downstream + // branched_minimap.tumor_only: [meta, bam, bai] -- tumor-only samples (no matched normal) - // branched_minimap -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of concatenated aligned bams - // bais: indexes for bam files + // SUBWORKFLOW: TUMORONLY_SMALLVAR + // Input: branched_minimap.tumor_only -- [meta, bam, bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels + // .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (ClairS-TO germline output) + TUMORONLY_SMALLVAR( + branched_minimap.tumor_only, + ch_fasta, + ch_fai, + clairsto_pon_channel, + ds_pon_channel + ) - // - // SUBWORFKLOW: TUMOR_NORMAL_HAPPHASE - // - // Phasing/haplotaging/small germline variant calling for tumor-normal samples + branched_minimap.paired + .set{paired_ch} - TUMOR_NORMAL_HAPPHASE ( - branched_minimap.paired, + // Split paired samples into tumor and normal streams for joining + paired_ch + .branch { meta, _bams, _bais -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set{branched_paired_ch} + // branched_paired_ch.normal: [meta, bam, bai] -- normal samples (meta.type == "normal") + // branched_paired_ch.tumor: [meta, bam, bai] -- tumor samples (meta.type == "tumor") + + // Strip 'type' field from normal meta before joining, so the key is just sample ID + branched_paired_ch.normal + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .set{paired_normal_bams} + // paired_normal_bams: [meta (no type), normal_bam, normal_bai] + + // Join tumor and normal BAMs into a single channel for somatic variant calling + // Join key is meta (with 'type' stripped), so tumor meta.id must equal normal meta.id + branched_paired_ch.tumor + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .join(paired_normal_bams) + .set { somatic_smallvar_input } + // somatic_smallvar_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + + // SUBWORKFLOW: PAIRED_SMALLVAR_SOMATIC + // Input: somatic_smallvar_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels (ClairS and/or DeepSomatic consensus) + PAIRED_SMALLVAR_SOMATIC ( + somatic_smallvar_input, ch_fasta, ch_fai, - clair3_modelMap, - clairs_modelMap, - downloaded_model_files + ds_pon_channel ) - ch_versions = ch_versions.mix(TUMOR_NORMAL_HAPPHASE.out.versions) + // SUBWORKFLOW: PAIRED_SMALLVAR_GERMLINE + // Input: branched_paired_ch.normal -- [meta, bam, bai] -- normal sample BAMs only + // downloaded_clair3_models -- [meta(id=model_name), model_dir] + // Output: .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (Clair3 and/or DeepVariant consensus) + PAIRED_SMALLVAR_GERMLINE ( + branched_paired_ch.normal, + ch_fasta, + ch_fai, + downloaded_clair3_models + ) - // - // SUBWORKFLOW: TUMOR_ONLY_HAPPHASE - // - // Phasing/haplotagging for tumor only samples + // Merge germline VCFs from paired and tumor-only paths into a single channel + PAIRED_SMALLVAR_GERMLINE.out.germline_vcf + .mix(TUMORONLY_SMALLVAR.out.germline_vcf) + .set{ch_germline_vcf} + // ch_germline_vcf: [meta, vcf, tbi] -- germline variants for all samples (paired + tumor-only) + + // Merge somatic VCFs from tumor-only and paired T/N paths into a single channel + TUMORONLY_SMALLVAR.out.somatic_vcf + .mix(PAIRED_SMALLVAR_SOMATIC.out.somatic_vcf) + .set{ch_somatic_vcf} + // ch_somatic_vcf: [meta, vcf, tbi] -- somatic variants for all samples + + // SUBWORKFLOW: PHASING_HAPLOTYPING + // Input: ch_index_minimap -- [meta, bam, bai] -- all aligned BAMs (tumor + normal + tumor-only) + // ch_germline_vcf -- [meta, vcf, tbi] -- germline variants (used to phase reads) + // ch_somatic_vcf -- [meta, vcf, tbi] -- somatic variants (get phasing transferred) + // ch_fasta / ch_fai + // Output: .phased_germline_vcf -- [meta, vcf, tbi] -- phased germline VCF + // .phased_somatic_vcf -- [meta, vcf, tbi] -- phased somatic VCF + // .tumor_normal_hapbams_ch -- [meta, bam, bai] -- haplotagged BAMs (all samples) + PHASING_HAPLOTYPING ( + ch_index_minimap, + ch_germline_vcf, + ch_somatic_vcf, + ch_fasta, + ch_fai + ) - dbsnp = file(params.dbsnp) - colors = file(params.colors) - onekgenomes = file(params.onekgenomes) - gnomad = file(params.gnomad) + // Prepare phased VCFs for VEP: add empty 'extra' list required by ENSEMBLVEP_VEP + PHASING_HAPLOTYPING.out.phased_somatic_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { somatic_vep } + // somatic_vep: [meta, vcf, []] -- phased somatic VCF ready for VEP annotation + PHASING_HAPLOTYPING.out.phased_germline_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { germline_vep } + // germline_vep: [meta, vcf, []] -- phased germline VCF ready for VEP annotation - TUMOR_ONLY_HAPPHASE ( - branched_minimap.tumor_only, - ch_fasta, - ch_fai, - clairs_modelMap, - dbsnp, - colors, - onekgenomes, - gnomad - ) - germline_vep = TUMOR_NORMAL_HAPPHASE.out.germline_vep.mix(TUMOR_ONLY_HAPPHASE.out.germline_vep) - somatic_vep = TUMOR_NORMAL_HAPPHASE.out.somatic_vep.mix(TUMOR_ONLY_HAPPHASE.out.somatic_vep) + whatshap_stats_txt = channel.empty() + + if (!params.skip_qc && !params.skip_whatshapstats) { + + // Drop the empty 'extra' element added for VEP input + germline_vep + .map { meta, vcf, _extra -> + return [meta, vcf] } + .set { ch_whatshap_stats } + // ch_whatshap_stats: [meta, vcf] -- phased germline VCF for phasing QC + + // + // MODULE: WHATSHAP_STATS (label: process_single) + // Input: [meta, vcf] -- phased VCF (germline) + // gtf=true, sample=true, chr_lengths=false + // Output: .tsv -- [meta, tsv] -- per-chromosome phasing statistics + // + + WHATSHAP_STATS ( + ch_whatshap_stats, + true, + true, + false + ) + + whatshap_stats_txt = WHATSHAP_STATS.out.tsv + + } if (!params.skip_vep) { + // - // MODULE: GERMLINE_VEP + // MODULE: GERMLINE_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: germline_vep -- [meta, vcf, []] -- phased germline VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated germline VCF with consequence predictions // if (params.vep_custom != null) { vep_custom = file(params.vep_custom) @@ -419,10 +709,12 @@ workflow LRSOMATIC { vep_custom_tbi ) - ch_versions = ch_versions.mix(GERMLINE_VEP.out.versions) - // - // MODULE: SOMATIC_VEP + // MODULE: SOMATIC_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: somatic_vep -- [meta, vcf, []] -- phased somatic VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated somatic VCF with consequence predictions // SOMATIC_VEP ( @@ -436,30 +728,48 @@ workflow LRSOMATIC { vep_custom, vep_custom_tbi ) - - ch_versions = ch_versions.mix(SOMATIC_VEP.out.versions) } - - ch_versions = ch_versions.mix(TUMOR_ONLY_HAPPHASE.out.versions) - - // Get Severus input channel - TUMOR_NORMAL_HAPPHASE.out.tumor_normal_severus - .mix(TUMOR_ONLY_HAPPHASE.out.tumor_only_severus) - .set { severus_reformat } - // Format is [meta, tumor_hapbam, tumor_bai, normal_hapbam, normal_bai, vcf] + // Build SEVERUS input by combining tumor-only and T/N paired samples with phased germline VCFs + // Tumor-only samples get empty lists for normal BAM/BAI (SEVERUS runs in tumor-only mode) + branched_minimap.tumor_only + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .map{meta, tumor_bam, tumor_bai-> + def normal_bam = [] + def normal_bai = [] + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + } + // Mix with paired T/N input (which already has normal BAM/BAI from somatic_smallvar_input) + .mix(somatic_smallvar_input) + // Attach phased germline VCF (used by SEVERUS for phased SV calling) + .join(PHASING_HAPLOTYPING.out.phased_germline_vcf) + .set{severus_input} + // severus_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, phased_germline_tbi] + // normal_bam/bai are empty lists [] for tumor-only samples // - // MODULE: SEVERUS + // MODULE: SEVERUS (label: process_high) + // Input: severus_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi] + // [[:], bed_file, pon_file] -- optional target BED and panel-of-normals for SV filtering + // Output: .all_vcf -- [meta, vcf] -- all somatic SVs (sniffles2 format) // SEVERUS ( - severus_reformat, + severus_input, [[:], params.bed_file, params.pon_file] ) - - ch_versions = ch_versions.mix(SEVERUS.out.versions) SEVERUS.out.all_vcf @@ -468,8 +778,14 @@ workflow LRSOMATIC { return [meta, vcf, extra] } .set { sv_vep } + // sv_vep: [meta, severus_all_vcf, []] -- all SVs ready for VEP annotation if(!params.skip_vep) { + // + // MODULE: SV_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: sv_vep -- [meta, vcf, []] -- SEVERUS SV VCF + // Output: annotated SV VCF with consequence predictions + // SV_VEP ( sv_vep, params.vep_genome, @@ -481,35 +797,59 @@ workflow LRSOMATIC { vep_custom, vep_custom_tbi ) - - ch_versions = ch_versions.mix(SV_VEP.out.versions) } - // - // MODULE: CRAMINO - // + + ch_nanoplot_post_txt = channel.empty() + if (!params.skip_qc && !params.skip_cramino) { + // + // MODULE: CRAMINO_POST (label: process_medium) + // Input: ch_minimap_bam -- [meta, bam] -- post-alignment coordinate-sorted BAM + // Output: .arrow -- [meta, arrow_file] -- alignment statistics in feather format + // + CRAMINO_POST ( ch_minimap_bam ) - ch_versions = ch_versions.mix(CRAMINO_POST.out.versions) + if (!params.skip_nanoplot) { + + // + // MODULE: NANOPLOT_POST (label: process_medium) + // Input: CRAMINO_POST.out.arrow -- [meta, arrow_file] + // Output: HTML/txt QC reports (post-alignment) + // + + NANOPLOT_POST(CRAMINO_POST.out.arrow) + + } + + } // // Module: MOSDEPTH // - ch_mosdepth_global = Channel.empty() - ch_mosdepth_summary = Channel.empty() + ch_mosdepth_global = channel.empty() + ch_mosdepth_summary = channel.empty() if (!params.skip_qc && !params.skip_mosdepth) { - // prepare mosdepth input channel: we need to specify compulsory path to bed as well + // MOSDEPTH requires a BED file argument; pass [] to compute genome-wide depth ch_minimap_bam.join(MINIMAP2_ALIGN.out.index) .map { meta, bam, bai -> [meta, bam, bai, []] } .set { ch_mosdepth_in } + // ch_mosdepth_in: [meta, bam, bai, []] -- [] is the optional BED (empty = genome-wide) + // + // MODULE: MOSDEPTH (label: process_medium) + // Input: [meta, bam, bai, bed] -- bed is [] for genome-wide coverage + // ch_fasta -- [[:], fasta] -- used for CRAM decoding (if applicable) + // Output: .global_txt -- [meta, txt] -- global depth summary + // .summary_txt -- [meta, txt] -- per-contig depth summary + // MOSDEPTH ( ch_mosdepth_in, ch_fasta @@ -517,41 +857,47 @@ workflow LRSOMATIC { ch_mosdepth_global = MOSDEPTH.out.global_txt ch_mosdepth_summary = MOSDEPTH.out.summary_txt - - ch_versions = ch_versions.mix(MOSDEPTH.out.versions) } // - // SUBWORKFLOW: BAM_STATS_SAMTOOLS + // SUBWORKFLOW: BAM_STATS_SAMTOOLS (nf-core subworkflow) + // Input: [meta, bam, bai] -- aligned BAM with index + // ch_fasta -- [[:], fasta] + // Output: .stats -- [meta, txt] -- samtools stats output + // .flagstat -- [meta, txt] -- samtools flagstat output + // .idxstats -- [meta, txt] -- samtools idxstats output // - ch_bam_stats = Channel.empty() - ch_bam_flagstat = Channel.empty() - ch_bam_idxstats = Channel.empty() + ch_bam_stats = channel.empty() + ch_bam_flagstat = channel.empty() + ch_bam_idxstats = channel.empty() if (!params.skip_qc && !params.skip_bamstats ) { BAM_STATS_SAMTOOLS ( - ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // Join bam channel with index channel + ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // [meta, bam, bai] ch_fasta ) - bam_stats_ch = BAM_STATS_SAMTOOLS.out.stats - bam_flagstat_ch = BAM_STATS_SAMTOOLS.out.flagstat - bam_idxstats_ch = BAM_STATS_SAMTOOLS.out.idxstats - - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + ch_bam_stats = BAM_STATS_SAMTOOLS.out.stats + ch_bam_flagstat = BAM_STATS_SAMTOOLS.out.flagstat + ch_bam_idxstats = BAM_STATS_SAMTOOLS.out.idxstats } // - // MODULE: ASCAT + // MODULE: ASCAT (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] -- NOTE: normal before tumor (ASCAT convention) + // allele_files, loci_files, gc_file, rt_file -- ASCAT reference files + // Output: .png plots, .segments, .purity_ploidy -- copy number results // if (!params.skip_ascat) { - severus_reformat - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi -> + // ASCAT expects [normal, tumor] order; rearrange from severus_input [tumor, normal] order + severus_input + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } .set { ascat_ch } + // ascat_ch: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] ASCAT ( ascat_ch, @@ -560,68 +906,94 @@ workflow LRSOMATIC { loci_files, [], [], - [], - [] + gc_file, + rt_file ) ch_versions = ch_versions.mix(ASCAT.out.versions) } // - // MODULE: WAKHAN + // MODULE: WAKHAN (label: process_medium) + // Haplotype-aware genome assembly and variant phasing visualisation + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // ch_fasta -- [[:], fasta] + // centromere_bed -- BED file of centromere coordinates (for assembly anchoring) + // Output: WAKHAN assembly reports (written to outdir) // if (!params.skip_wakhan) { - // Prepare input channel for WAKHAN - severus_reformat - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi -> - return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf] - } + // Attach SEVERUS SV VCF to the severus_input channel (dropping the phased TBI) + severus_input .join(SEVERUS.out.all_vcf) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, _phased_tbi, all_vcf -> + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, all_vcf] + } .set { wakhan_input } + // wakhan_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // normal_bam/bai are [] for tumor-only samples WAKHAN ( wakhan_input, - ch_fasta + ch_fasta, + file(params.centromere_bed) ) - - ch_versions = ch_versions.mix(WAKHAN.out.versions) } // - // Collate and save software versions + // Collate software versions from two sources: + // 1. ch_versions (classic path): version YAML files emitted by modules + // 2. channel.topic("versions") (topic channel path): version tuples [process, tool, version] + // emitted directly by modules that use the topic-channel pattern // - softwareVersionsToYAML(ch_versions) + def topic_versions = channel.topic("versions") + .distinct() // deduplicate identical version entries across samples + .branch { entry -> + versions_file: entry instanceof Path // classic YAML file path + versions_tuple: true // [process, tool, version] tuple + } + + def topic_versions_string = topic_versions.versions_tuple + .map { process, tool, version -> + // Strip workflow prefix (everything before the last ':') from process name + [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] + } + .groupTuple(by:0) // group tool versions by process name + .map { process, tool_versions -> + tool_versions.unique().sort() + "${process}:\n${tool_versions.join('\n')}" + } + // topic_versions_string: formatted YAML-like string per process, ready to write + + // Merge both version sources and write to versions YAML (consumed by MultiQC) + softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) + .mix(topic_versions_string) .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'lrsomatic_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true ).set { ch_collated_versions } + // ch_collated_versions: path -- merged software versions YAML for MultiQC // - // MODULE: MultiQC + // MODULE: MULTIQC (label: process_single) + // Aggregates QC reports from all modules into a single HTML report + // Input: [[id:'multiqc'], [qc_files...], [config_files...], [logo], [], []] + // Output: .report -- [meta, html] -- MultiQC HTML report // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( + ch_methods_description = channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) @@ -632,26 +1004,37 @@ workflow LRSOMATIC { ) ) - // Collect MultiQC files - ch_multiqc_files = ch_multiqc_files.mix(ch_bam_stats.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_bam_flagstat.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_bam_idxstats.collect{it[1]}.ifEmpty([])) + // Collect QC outputs from all optional modules + // .collect{it -> it[1]} extracts the file from [meta, file] tuples; ifEmpty([]) handles skipped modules + ch_multiqc_files = ch_multiqc_files.mix(ch_bam_stats.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_bam_flagstat.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_bam_idxstats.collect{it -> it[1]}.ifEmpty([])) + + ch_multiqc_files = ch_multiqc_files.mix(ch_mosdepth_global.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_mosdepth_summary.collect{it -> it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_mosdepth_global.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_mosdepth_summary.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_nanoplot_pre_txt.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_nanoplot_post_txt.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(whatshap_stats_txt.collect{it -> it[1]}.ifEmpty([])) + // Build the final MULTIQC input tuple: all QC files + config files + logo MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] + ch_multiqc_files + .collect() + .map { files -> + def multiqc_config_files = [file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)] + if (params.multiqc_config) { + multiqc_config_files += [file(params.multiqc_config, checkIfExists: true)] + } + def multiqc_logo_file = params.multiqc_logo ? [file(params.multiqc_logo, checkIfExists: true)] : [] + // MULTIQC input: [meta, [qc_files], [config_files], [logo], [], []] + [[id: 'multiqc'], files, multiqc_config_files, multiqc_logo_file, [], []] + } ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + multiqc_report = MULTIQC.out.report.map { _meta, report -> report } // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ]