From db8002b4a880143fc4ede750d01dc32238f355df Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Wed, 27 May 2026 16:00:26 +0900 Subject: [PATCH 01/20] Import samtools/merge module --- modules.json | 5 + .../nf-core/samtools/merge/environment.yml | 10 + modules/nf-core/samtools/merge/main.nf | 49 ++++ modules/nf-core/samtools/merge/meta.yml | 122 ++++++++++ .../nf-core/samtools/merge/tests/main.nf.test | 219 ++++++++++++++++++ .../samtools/merge/tests/main.nf.test.snap | 202 ++++++++++++++++ .../samtools/merge/tests/nextflow.config | 5 + 7 files changed, 612 insertions(+) create mode 100644 modules/nf-core/samtools/merge/environment.yml create mode 100644 modules/nf-core/samtools/merge/main.nf create mode 100644 modules/nf-core/samtools/merge/meta.yml create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/merge/tests/nextflow.config diff --git a/modules.json b/modules.json index e531393..6d7ecbe 100644 --- a/modules.json +++ b/modules.json @@ -60,6 +60,11 @@ "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", "installed_by": ["modules"] }, + "samtools/merge": { + "branch": "master", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["modules"] + }, "seqtk/cutn": { "branch": "master", "git_sha": "a46713779030a5f508117080cbf4b693dd4c6e33", diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 0000000..946bb36 --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.23.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 0000000..abb56f9 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_MERGE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" + + input: + tuple val(meta), path(input_files, stageAs: "?/*"), path(index_files, stageAs: "?/*") + tuple val(meta2), path(fasta), path(fai), path(gzi) + + output: + tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional: true, emit: cram + tuple val(meta), path("*.{bai,crai,csi}"), optional: true, emit: index + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + merge \\ + --threads ${task.cpus - 1} \\ + ${args} \\ + ${reference} \\ + ${prefix}.${file_type} \\ + ${input_files} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 0000000..66e582e --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,122 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - index_files: + type: file + description: BAI/CRAI/CSI index file + pattern: "*.{bai,crai,csi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + ontologies: [] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" + ontologies: [] + - gzi: + type: file + description: Index of the compressed reference file the CRAM was created with + (optional) + pattern: "*.gzi" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bai,crai,csi}": + type: file + description: BAM index file (optional) + pattern: "*.{bai,crai,csi}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@yuukiiwa" + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@yuukiiwa" + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 0000000..b3caf86 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,219 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + config "./nextflow.config" + + test("bams") { + + when { + params { + module_args = '--write-index' + } + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.cram, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("crams_fastq") { + + when { + params { + module_args = '--write-index --output-fmt cram,version=3.0' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + [] + ] + """ + } + } + + then { + def fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta' + assert process.success + assertAll( + { assert snapshot( + cram(process.out.cram[0][1], fasta).getReadsMD5(), + process.out.bam, + file(process.out.index[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("crams_fastq_gz") { + + when { + params { + module_args = '--write-index' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + //nft-bam doesn't like the fasta.gz + file(process.out.cram[0][1]).name, + process.out.bam, + file(process.out.index[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("bam") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai', checkIfExists: true) ] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.index, + process.out.cram, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("bams - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("bams_no_index - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 0000000..77ed449 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,202 @@ +{ + "bams_no_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:31.816184" + }, + "crams_fastq": { + "content": [ + "c4525b95f05075208347295e6a1fb232", + [ + + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:05.719443" + }, + "bams - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:24.795562" + }, + "bams": { + "content": [ + "47c9f174d8c8afc1a13c75ee4b5e5d43", + "test.bam.csi", + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:00:59.536803" + }, + "crams_fastq_gz": { + "content": [ + "test.cram", + [ + + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:11.614088" + }, + "bam": { + "content": [ + "8da8fc1099a955e3ceb198665350e766", + [ + + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:17.736424" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/nextflow.config b/modules/nf-core/samtools/merge/tests/nextflow.config new file mode 100644 index 0000000..5e29c67 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SAMTOOLS_MERGE { + ext.args = params.module_args + } +} From 0ab27ac8c1f35284922c41a261fb66eacdbe8b1b Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Thu, 28 May 2026 11:47:14 +0900 Subject: [PATCH 02/20] Add a multi_cram option. --- nextflow.config | 3 +++ nextflow_schema.json | 34 ++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 3d19c53..657ba2a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -75,7 +75,10 @@ params { lastal_extr_args = '' last_split_mismap = '1e-05' lastal_params = null + + // Export option export_aln_to = 'no_export' + multi_cram = null // Schema validation default options validate_params = true diff --git a/nextflow_schema.json b/nextflow_schema.json index bac08c2..a2ab81c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -103,14 +103,6 @@ "description": "Arguments for the lastdb, last-train, lastal and last-split programs.", "default": "", "properties": { - "export_aln_to": { - "type": "string", - "default": "no_export", - "description": "Convert the final _one-to-one_ alignment to a different format than MAF.", - "pattern": "^((no_export|axt|bam|bed|blast|blasttab|blasttab+|chain|cram|gff|html|psl|sam|tab)?,?)*(? Date: Thu, 28 May 2026 11:49:46 +0900 Subject: [PATCH 03/20] Merge the fasta_bgzip_index_dict_samtools outputs in a single channel. --- .../fasta_bgzip_index_dict_samtools/main.nf | 13 ++++-- .../fasta_bgzip_index_dict_samtools/meta.yml | 42 ++++--------------- workflows/pairgenomealign.nf | 20 ++------- 3 files changed, 22 insertions(+), 53 deletions(-) diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf b/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf index ff88cf6..4fce842 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf +++ b/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf @@ -16,9 +16,14 @@ workflow FASTA_BGZIP_INDEX_DICT_SAMTOOLS { SAMTOOLS_DICT ( SAMTOOLS_BGZIP.out.fasta ) + ch_joined = SAMTOOLS_BGZIP.out.fasta + .join(SAMTOOLS_FAIDX.out.fai) + .join(SAMTOOLS_FAIDX.out.gzi) + .join(SAMTOOLS_DICT.out.dict) + .map { meta, fasta, fai, gzi, dict -> + [ meta, fasta, fai, gzi, dict ] + } + emit: - fasta_gz = SAMTOOLS_BGZIP.out.fasta // channel: [ val(meta), fasta.gz ] - fai = SAMTOOLS_FAIDX.out.fai // channel: [ val(meta), fai ] - gzi = SAMTOOLS_FAIDX.out.gzi // channel: [ val(meta), gzi ] - dict = SAMTOOLS_DICT .out.dict // channel: [ val(meta), dict ] + fasta_fai_gzi_dict = ch_joined // channel: [ val(meta), fasta.gz, fai, gzi, dict ] } diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml b/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml index 58e9c18..6d10225 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml +++ b/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml @@ -18,41 +18,17 @@ input: Structure: [ val(meta), path(fasta) ] pattern: "*.{fa,fa.gz,fa.bz2,fa.xz,fasta,fasta.gz,fasta.bz2,fasta.xz}" output: - - fasta_gz: + - fasta_fai_gzi_dict: type: file description: | - Channel containing FASTA filed compressed with the BGZF algorithm. - Original files are re-used when they were already BGZF-compressed. - Structure: [ val(meta), path(fasta_gz) ] - pattern: "*.{fa.gz,fasta.gz}" - - fai: - type: file - description: | - Channel containing FASTA index files - Structure: [ val(meta), path(fai) ] - pattern: "*.fai" - - gzi: - type: file - description: | - Channel containing bgzip index files - Structure: [ val(meta), path(gzi) ] - pattern: "*.gzi" - - dict: - type: file - description: | - Channel containing sequence dictionary files - Structure: [ val(meta), path(dict) ] - pattern: "*.dict" - - versions_samtools: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool + Channel containing: + - BGZF-compressed FASTA + - FASTA index (.fai) + - BGZF index (.gzi) + - sequence dictionary (.dict) + Structure: + [ val(meta), path(fasta_gz), path(fai), path(gzi), path(dict) ] + pattern: "*" topics: versions: - - "${task.process}": diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index d517cb0..38e0779 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -91,30 +91,18 @@ workflow PAIRGENOMEALIGN { pairalign_out = PAIRALIGN_M2M.out } - // If we export to CRAM we need a BGZIPped genome, indexed, and its sequence dictionary, - // if we export to SAM or BAM this is also nice to have, - // otherwise we need placeholders. - ch_targetgenome_faz = [[],[]] - ch_targetgenome_fai = [[],[]] - ch_targetgenome_gzi = [[],[]] - ch_targetgenome_dic = [[],[]] - export_formats = params.export_aln_to.tokenize(',') if (export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) - ch_targetgenome_faz = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_gz - ch_targetgenome_fai = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fai - ch_targetgenome_gzi = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.gzi - ch_targetgenome_dic = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.dict } if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), - ch_targetgenome_faz, - ch_targetgenome_fai, - ch_targetgenome_gzi, - ch_targetgenome_dic + FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, + FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, + FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, + FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, dict] } ) } From 48fcd5a09b6a3f4417e5201308ddf92b00d1b1c9 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 14:13:15 +0900 Subject: [PATCH 04/20] Also output the dictionary file. --- conf/modules.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index f6fb2df..90019c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -140,6 +140,8 @@ process { ] } + // FASTA_BGZIP_INDEX_DICT_SAMTOOLS subworkflow: + withName: 'SAMTOOLS_BGZIP' { publishDir = [ path: { "${params.outdir}/alignment" }, @@ -158,7 +160,8 @@ process { withName: 'SAMTOOLS_DICT' { ext.args = { "-u ./${fasta} -a ${meta.id}" } publishDir = [ - enabled: false + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, ] } From f37dcf923885b3463c62900f1a0a75db4032c657 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 16:30:19 +0900 Subject: [PATCH 05/20] Patch samtools/merge to preserve local paths to the reference. --- modules.json | 3 ++- modules/nf-core/samtools/merge/main.nf | 4 +++- .../samtools/merge/samtools-merge.diff | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/samtools/merge/samtools-merge.diff diff --git a/modules.json b/modules.json index 6d7ecbe..2930337 100644 --- a/modules.json +++ b/modules.json @@ -63,7 +63,8 @@ "samtools/merge": { "branch": "master", "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "seqtk/cutn": { "branch": "master", diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index abb56f9..03e4535 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -24,7 +24,9 @@ process SAMTOOLS_MERGE { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() - def reference = fasta ? "--reference ${fasta}" : "" + // In this pipeline we know that the input CRAM files have a correct relative path to the reference, and we want to keep it. + // Passing --reference transforms the link to an absolute path containing temporary folder path. + def reference = "" """ # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 0000000..a22f0db --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,22 @@ +Changes in component 'nf-core/samtools/merge' +'modules/nf-core/samtools/merge/environment.yml' is unchanged +Changes in 'samtools/merge/main.nf': +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -24,7 +24,9 @@ + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() +- def reference = fasta ? "--reference ${fasta}" : "" ++ // In this pipeline we know that the input CRAM files have a correct relative path to the reference, and we want to keep it. ++ // Passing --reference transforms the link to an absolute path containing temporary folder path. ++ def reference = "" + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + +'modules/nf-core/samtools/merge/meta.yml' is unchanged +'modules/nf-core/samtools/merge/tests/nextflow.config' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test' is unchanged +************************************************************ From dcfa35f843581f877a6e323193bcd7b026c14c82 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 16:30:57 +0900 Subject: [PATCH 06/20] Correct default value of params.multi_cram, for use in if statements. --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 657ba2a..bc03fae 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,7 +78,7 @@ params { // Export option export_aln_to = 'no_export' - multi_cram = null + multi_cram = false // Schema validation default options validate_params = true From e20e2ca279366a259fdd41abe4796c7a8b5b657f Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 16:34:48 +0900 Subject: [PATCH 07/20] Properly handle the case when maf-convert does not need a genome sequence. --- workflows/pairgenomealign.nf | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 38e0779..37be897 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -94,15 +94,18 @@ workflow PAIRGENOMEALIGN { export_formats = params.export_aln_to.tokenize(',') if (export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) + ch_genome_for_cram = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.first() + } else { + ch_genome_for_cram = channel.value( [[:], [], [], [], []] ) } if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), - FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, - FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, - FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, - FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, dict] } + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, dict] } ) } From 8f2fa02a413be84be210f547154b0e036c2c697d Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 16:35:30 +0900 Subject: [PATCH 08/20] Produce a merged CRAM file with all the target-query alignments. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merged CRAM file is neither a pangenome nor a multiple sequence alignment, but I find it very useful. Temporarly CRAM files are produced but not exported. Their header indicates only the name of the query genomes in the read group fields. The files are merged in a single CRAM file, where each read group represents one genome. Each target-query alignment is a one-to-one relationship so a base in the target is aligned at most once to each query. Care is taken to ensure that the path to the reference genome is relative to the current directory. The multi-query CRAM file is output in the same directory as its index and the BGZIpped genome, indexed too. Thus the multi-query CRAM file can be loaded and visualised in the IGV. The coverage plot shows how many query genomes align to the target at a given location. Expanded track view allows to visualise all the sequence differences. You can stabilise the order of the genomes, but IGV enforces alphanumeric sorting. You can work around this limitation by prefixing the sample IDs with numbers in the sample sheet. Custom scripts can (and have) be written to slice a pieces of the multi-query CRAM file and turn these pieces into real MSAs… --- conf/modules.config | 10 ++++++++++ workflows/pairgenomealign.nf | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 90019c2..19a7522 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -111,6 +111,16 @@ process { ext.prefix = { "${meta.id}.m2m_plt_filtered" } } + withName: ALIGNMENT_CRAM { + publishDir = [ + enabled: false + ] + } + + withName: ALIGNMENT_MERGE { + ext.args = { "--write-index" } + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 37be897..62f8522 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -5,7 +5,9 @@ */ include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' +include { LAST_MAFCONVERT as ALIGNMENT_CRAM} from '../modules/nf-core/last/mafconvert/main' include { LAST_MAFCONVERT as ALIGNMENT_EXP } from '../modules/nf-core/last/mafconvert/main' +include { SAMTOOLS_MERGE as ALIGNMENT_MERGE} from '../modules/nf-core/samtools/merge/main' include { LAST_DOTPLOT as MULTIQC_THUMBS } from '../modules/nf-core/last/dotplot/main' include { MULTIQC_THUMBS_HTML } from '../modules/local/multiqc_thumbs_html/main' include { MULTIQC_ASSEMBLYSCAN_PLOT_DATA } from '../modules/local/multiqc_assemblyscan_plot_data/main' @@ -92,7 +94,7 @@ workflow PAIRGENOMEALIGN { } export_formats = params.export_aln_to.tokenize(',') - if (export_formats.contains('cram') | export_formats.contains('bam')) { + if (params.multi_cram | export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) ch_genome_for_cram = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.first() } else { @@ -109,6 +111,35 @@ workflow PAIRGENOMEALIGN { ) } + if (params.multi_cram) { + // We want the read group IDs to be just the query genome name (which is already long enough). + o2o_alignments = pairalign_out.o2o.map { meta, alns -> + def newMeta = meta.clone() // Avoids unexpected propagation to pairalign_out.o2o's meta.id. + newMeta.id = newMeta.id.replaceAll(/^.*___/, '') + [newMeta, alns] + } + ALIGNMENT_CRAM( + o2o_alignments.map {it + "cram"}, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, dict] } + ) + // Collect all per-query CRAMs into a single merged CRAM per target genome + ch_merge_input = ALIGNMENT_CRAM.out.alignment + // Rename and use as grouping key + .map { meta, cram -> tuple(params.targetName, cram) } + // group all CRAMs + .groupTuple() + // convert to SAMTOOLS_MERGE input format + .map { id, crams -> tuple([id: id], crams, []) } + // Output a single CRAM file under the target genome name. + ALIGNMENT_MERGE( + ch_merge_input, + FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fasta, fai, gzi] } + ) + } + if (params.multiqc_thumbs != 0) { MULTIQC_THUMBS( pairalign_out.o2o.map { x -> [x[0], x[1], []] }, From 4a19cd165e318941eae919bc0f3eef73e96d9ae9 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 17:04:40 +0900 Subject: [PATCH 09/20] Document the changes. --- CHANGELOG.md | 15 +++++++++++++++ docs/output.md | 2 ++ docs/usage.md | 1 + 3 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d3b744..468c3fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- New `--multi_cram` option to produce a multi-query CRAM file combining all the alignments ([#60](https://github.com/nf-core/pairgenomealign/pull/60)). - New `--multiqc_thumbs` option to produce alignment thumbnails in the MultiQC report ([#93](https://github.com/nf-core/pairgenomealign/pull/93)). - New `--strand` option to index only one strand of the genome, which reduces memory usage at the expense of speed, and suppresses `-/+` alignments ([#97](https://github.com/nf-core/pairgenomealign/pull/97)). +### `Dependencies` + +| Dependency | Old version | New version | +| ---------------- | ----------- | ----------- | +| `SAMTOOLS_MERGE` | | 1.23.1 | + +### `Parameters` + +| Old parameter | New parameter | +| ------------- | ------------------ | +| | `--multi_cram` | +| | `--multiqc_thumbs` | +| | `----strand` | + ## [v2.2.3](https://github.com/nf-core/pairgenomealign/releases/tag/2.2.3) "Reitou mikan" - [May 20th 2026] ### `Fixed` diff --git a/docs/output.md b/docs/output.md index 9a40a0e..178aae9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -44,6 +44,8 @@ Basic statistics on nucleotide content and contig length are collected for align - `*.o2o_aln.maf.gz` is the _**one-to-one**_ alignment between the _target_ and _query_ genomes. - `*.o2o_aln.tsv` reports nucleotide percent identity of the _**one-to-one**_ alignment for MultiQC. - For each _**one-to-one**_ alignment there will be an additional file in a format such as Axt, Chain, GFF or SAM/BAM/CRAM if you used the `--export_aln_to` parameter. These extra files are always compressed with gzip when their format is text-based. The SAM/BAM/CRAM files are always sorted. Their header features all sequences from the _target_ genome, including the ones that did not align to the _query_ so that alignment files can be merged without disturbing the sort order. + - The _target_ genome sequence, compressed with `bgzip` and indexed by `samtools` is also present when BAM or CRAM files are produced. + - A multi-_query_ CRAM sequence is present when `--multi_cram` is used, named like the _target_ genome but with the `cram` suffix. diff --git a/docs/usage.md b/docs/usage.md index 63ae37b..f317cbe 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -49,6 +49,7 @@ The parameters are described in details in the [online documentation](https://nf - `--m2m` enables the computation of the _many-to-many_ alignment, which reports alignments without enforcing uniqueness. This mode is required for self‑alignments and is useful for duplication or repeat analyses, but can exhaust computing resources on large or highly repetitive genomes. - The `--skip_dotplot_*` options disable dotplot visualisations. This is particularly useful when comparing very similar and repetitive genomes (for example, two vertebrate genomes from the same species), where dotplots other than the _one‑to‑one_ alignment can become extremely dense and difficult to interpret, without affecting the underlying alignments. - Users who need formats other than MAF can use the `--export_aln_to` parameter to generate additional coordinate‑based (PSL, GFF) or full alignment (SAM/BAM/CRAM) outputs for downstream analyses. Other formats like Axt or Chain are also supported. +- `--multi_cram` produces a single CRAM file that combines all alignments. It is neither a pangenome nor a multiple sequence alignment; however, once you make use of it—by loading it into the [Integrative Genomics Viewer](https://igv.org/), or extracting slices and converting them into multiple sequence alignments—it becomes a very powerful resource. ## Fixed arguments (taken from the [LAST cookbook][] and the [LAST tuning][] manual) From 0fcb2dcf732234c801def91fc8f0f7fa987f6be1 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 17:47:14 +0900 Subject: [PATCH 10/20] Also update the subworkflow's snapshot. --- .../tests/main.nf.test.snap | 70 +++---------------- 1 file changed, 11 insertions(+), 59 deletions(-) diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap b/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap index 43b2132..bebb50a 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap +++ b/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap @@ -8,78 +8,30 @@ "id": "test", "single_end": false }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, + "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", "test.fasta.gz.dict:md5,f953d89119d6d0ae5ceab0c81aac83f6" ] ], - "dict": [ + "fasta_fai_gzi_dict": [ [ { "id": "test", "single_end": false }, + "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", "test.fasta.gz.dict:md5,f953d89119d6d0ae5ceab0c81aac83f6" ] - ], - "fai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "fasta_gz": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "gzi": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] ] } ], + "timestamp": "2026-05-29T17:44:46.008863777", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T17:24:55.166247381" + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } } } \ No newline at end of file From c870d97ccbc251f887b8076c1035c3a5a0e4df26 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 18:01:22 +0900 Subject: [PATCH 11/20] Fix changelog borken by merge --- CHANGELOG.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4923ef5..1b51d63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | ------------- | ------------------ | +| | `--multi_cram` | | | `--multiqc_thumbs` | | | `--query` | | | `--queryName` | @@ -27,13 +28,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ---------------- | ----------- | ----------- | | `SAMTOOLS_MERGE` | | 1.23.1 | -### `Parameters` - -| Old parameter | New parameter | -| ------------- | ------------------ | -| | `--multi_cram` | -| | `--multiqc_thumbs` | -| | `----strand` | ## [v2.2.3](https://github.com/nf-core/pairgenomealign/releases/tag/2.2.3) "Reitou mikan" - [May 20th 2026] From c6cbffed7a874bf7bcf72a7a05e7840593659e47 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 29 May 2026 18:37:49 +0900 Subject: [PATCH 12/20] prek run --show-diff-on-failure --color=always --all-files --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b51d63..d574d66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ---------------- | ----------- | ----------- | | `SAMTOOLS_MERGE` | | 1.23.1 | - ## [v2.2.3](https://github.com/nf-core/pairgenomealign/releases/tag/2.2.3) "Reitou mikan" - [May 20th 2026] ### `Fixed` From 9f136d43774656a5631a6a7934e8dfafbc11f5bd Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Sat, 30 May 2026 15:57:20 +0900 Subject: [PATCH 13/20] Use CRAM 3.0 to be consistent with maf-convert. Will change to CRAM 3.1 in pairgenomealign 3.0.0. --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 19a7522..6045c28 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -118,7 +118,7 @@ process { } withName: ALIGNMENT_MERGE { - ext.args = { "--write-index" } + ext.args = { "-O cram,version=3.0 --write-index" } } withName: 'MULTIQC' { From cc1fd2683e003f6764999de910f89b0a048359e7 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 09:51:23 +0900 Subject: [PATCH 14/20] Generate 4 channels at once. Co-authored-by: Joon Klaps --- workflows/pairgenomealign.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 62f8522..a99a61f 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -100,7 +100,14 @@ workflow PAIRGENOMEALIGN { } else { ch_genome_for_cram = channel.value( [[:], [], [], [], []] ) } - +ch_targetgenome = ch_genome_for_cram + .first() + .multiMap { meta, fasta, fai, gzi, dict -> + fasta: [meta,fasta] + fai: [meta,fai] + gzi: [meta,gzi] + dict: [meta,dict + } if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), From 220e3c21f182b14cf6dae2988a61b663745c2d47 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 09:53:55 +0900 Subject: [PATCH 15/20] Use the 4 channels generated with multiMap. Co-authored-by: Joon Klaps --- workflows/pairgenomealign.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index a99a61f..40ee829 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -111,10 +111,10 @@ ch_targetgenome = ch_genome_for_cram if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, dict] } + ch_targetgenome.fasta, + ch_targetgenome.fai, + ch_targetgenome.gzi, + ch_targetgenome.dict ) } From fbec92913617d38fe867031ad912169b85ca6ebf Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 09:54:42 +0900 Subject: [PATCH 16/20] Use the 4 channels generated with multiMap. Co-authored-by: Joon Klaps --- workflows/pairgenomealign.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 40ee829..173d9a1 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -127,10 +127,10 @@ ch_targetgenome = ch_genome_for_cram } ALIGNMENT_CRAM( o2o_alignments.map {it + "cram"}, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fai] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, gzi] }, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, dict] } + ch_targetgenome.fasta, + ch_targetgenome.fai, + ch_targetgenome.gzi, + ch_targetgenome.dict ) // Collect all per-query CRAMs into a single merged CRAM per target genome ch_merge_input = ALIGNMENT_CRAM.out.alignment From 2bfdc198efa583221a1cf9bc76f5042f75cb3456 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 09:57:02 +0900 Subject: [PATCH 17/20] Use the same bgzipped genome channel everywhere Co-authored-by: Joon Klaps --- workflows/pairgenomealign.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 173d9a1..ae56631 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -143,7 +143,7 @@ ch_targetgenome = ch_genome_for_cram // Output a single CRAM file under the target genome name. ALIGNMENT_MERGE( ch_merge_input, - FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.map { meta, fasta, fai, gzi, dict -> [meta, fasta, fai, gzi] } + ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta, fai, gzi ] }, ) } From 1ad1902b714a2c24b8e5b9836a2891c865d4390b Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 09:59:52 +0900 Subject: [PATCH 18/20] prek run --show-diff-on-failure --color=always --all-files --- workflows/pairgenomealign.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index ae56631..c2c25cc 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -102,7 +102,7 @@ workflow PAIRGENOMEALIGN { } ch_targetgenome = ch_genome_for_cram .first() - .multiMap { meta, fasta, fai, gzi, dict -> + .multiMap { meta, fasta, fai, gzi, dict -> fasta: [meta,fasta] fai: [meta,fai] gzi: [meta,gzi] From 0e58188f8bcb19b8d4e5d397b516cbeb291e20cf Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 10:10:11 +0900 Subject: [PATCH 19/20] Simplify one if/else statement in just one if. Co-authored-by: Joon Klaps --- workflows/pairgenomealign.nf | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index c2c25cc..a31024d 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -93,21 +93,22 @@ workflow PAIRGENOMEALIGN { pairalign_out = PAIRALIGN_M2M.out } + ch_genome_for_cram = channel.value( [[:], [], [], [], []] ) export_formats = params.export_aln_to.tokenize(',') if (params.multi_cram | export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) ch_genome_for_cram = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.first() - } else { - ch_genome_for_cram = channel.value( [[:], [], [], [], []] ) } -ch_targetgenome = ch_genome_for_cram - .first() - .multiMap { meta, fasta, fai, gzi, dict -> - fasta: [meta,fasta] - fai: [meta,fai] - gzi: [meta,gzi] - dict: [meta,dict - } + + ch_targetgenome = ch_genome_for_cram + .first() + .multiMap { meta, fasta, fai, gzi, dict -> + fasta: [meta, fasta] + fai: [meta, fai] + gzi: [meta, gzi] + dict: [meta, dict] + } + if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), From 15df6b61a16d87d89796e49a916c1bbd37943f15 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Tue, 2 Jun 2026 11:47:55 +0900 Subject: [PATCH 20/20] =?UTF-8?q?Use=20nf-core's=20version=20of=20FASTA=5F?= =?UTF-8?q?BGZIP=5FINDEX=5FDICT=5FSAMTOOLS=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …which I submitted recently based on the local version. --- CHANGELOG.md | 14 +- conf/modules.config | 2 +- modules.json | 23 +- .../bgziptabix}/environment.yml | 6 +- modules/nf-core/htslib/bgziptabix/main.nf | 88 +++ modules/nf-core/htslib/bgziptabix/meta.yml | 125 ++++ .../htslib/bgziptabix/tests/main.nf.test | 435 +++++++++++++ .../htslib/bgziptabix/tests/main.nf.test.snap | 574 ++++++++++++++++++ modules/nf-core/samtools/bgzip/main.nf | 50 -- modules/nf-core/samtools/bgzip/meta.yml | 68 --- .../nf-core/samtools/bgzip/tests/main.nf.test | 109 ---- .../samtools/bgzip/tests/main.nf.test.snap | 217 ------- modules/nf-core/samtools/dict/environment.yml | 4 +- modules/nf-core/samtools/dict/main.nf | 14 +- modules/nf-core/samtools/dict/meta.yml | 1 + .../samtools/dict/tests/main.nf.test.snap | 14 +- .../nf-core/samtools/faidx/environment.yml | 4 +- modules/nf-core/samtools/faidx/main.nf | 23 +- modules/nf-core/samtools/faidx/meta.yml | 18 +- .../nf-core/samtools/faidx/tests/main.nf.test | 118 ++-- .../samtools/faidx/tests/main.nf.test.snap | 329 +--------- .../samtools/faidx/tests/nextflow.config | 1 - .../fasta_bgzip_index_dict_samtools/main.nf | 29 - .../tests/main.nf.test.snap | 37 -- .../fasta_bgzip_index_dict_samtools/main.nf | 36 ++ .../fasta_bgzip_index_dict_samtools/meta.yml | 21 +- .../tests/main.nf.test | 24 +- .../tests/main.nf.test.snap | 37 ++ .../tests/nextflow.config | 7 + workflows/pairgenomealign.nf | 9 +- 30 files changed, 1488 insertions(+), 949 deletions(-) rename modules/nf-core/{samtools/bgzip => htslib/bgziptabix}/environment.yml (51%) create mode 100644 modules/nf-core/htslib/bgziptabix/main.nf create mode 100644 modules/nf-core/htslib/bgziptabix/meta.yml create mode 100644 modules/nf-core/htslib/bgziptabix/tests/main.nf.test create mode 100644 modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap delete mode 100644 modules/nf-core/samtools/bgzip/main.nf delete mode 100644 modules/nf-core/samtools/bgzip/meta.yml delete mode 100644 modules/nf-core/samtools/bgzip/tests/main.nf.test delete mode 100644 modules/nf-core/samtools/bgzip/tests/main.nf.test.snap delete mode 100644 subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf delete mode 100644 subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main.nf rename subworkflows/{local => nf-core}/fasta_bgzip_index_dict_samtools/meta.yml (67%) rename subworkflows/{local => nf-core}/fasta_bgzip_index_dict_samtools/tests/main.nf.test (54%) create mode 100644 subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config diff --git a/CHANGELOG.md b/CHANGELOG.md index d574d66..afae5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New `--strand` option to index only one strand of the genome, which reduces memory usage at the expense of speed, and suppresses `-/+` alignments ([#97](https://github.com/nf-core/pairgenomealign/pull/97)). - New `--query` and `--queryName` convenience options to skip samplesheet creation when there is only one _query_ genome to align ([#112](https://github.com/nf-core/pairgenomealign/pull/112)). +### `Fixed` + +- Using the nf-core version of the `FASTA_BGZIP_INDEX_DICT_SAMTOOLS` subworkflow that we just contributed. + ### `Parameters` | Old parameter | New parameter | @@ -24,9 +28,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` -| Dependency | Old version | New version | -| ---------------- | ----------- | ----------- | -| `SAMTOOLS_MERGE` | | 1.23.1 | +| Dependency | Old version | New version | +| ------------------- | ----------- | ----------- | +| `SAMTOOLS_BGZIP` | 1.21 | | +| `SAMTOOLS_DICT` | 1.21 | 1.23.1 | +| `SAMTOOLS_FAIDX` | 1.21 | 1.23.1 | +| `SAMTOOLS_MERGE` | | 1.23.1 | +| `HTSLIB_BGZIPTABIX` | | 1.23.1 | ## [v2.2.3](https://github.com/nf-core/pairgenomealign/releases/tag/2.2.3) "Reitou mikan" - [May 20th 2026] diff --git a/conf/modules.config b/conf/modules.config index 6045c28..04c56d9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -152,7 +152,7 @@ process { // FASTA_BGZIP_INDEX_DICT_SAMTOOLS subworkflow: - withName: 'SAMTOOLS_BGZIP' { + withName: 'HTSLIB_BGZIPTABIX' { publishDir = [ path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 2930337..128bc39 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "947e154c66b92eb041baca20d14d1d9ab0b47182", "installed_by": ["modules"] }, + "htslib/bgziptabix": { + "branch": "master", + "git_sha": "54e41f4ed3aead45054380a9befeb927612ffc91", + "installed_by": ["fasta_bgzip_index_dict_samtools"] + }, "last/dotplot": { "branch": "master", "git_sha": "1f966905b442fa623130663471eac3b9c5d35527", @@ -45,20 +50,15 @@ "git_sha": "008f9d3e61209bf995edac3ba531f54e269e1215", "installed_by": ["modules"] }, - "samtools/bgzip": { - "branch": "master", - "git_sha": "0d265a14027515eaa36a9d3a931655b918781145", - "installed_by": ["modules"] - }, "samtools/dict": { "branch": "master", - "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", - "installed_by": ["modules"] + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["fasta_bgzip_index_dict_samtools", "modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", - "installed_by": ["modules"] + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["fasta_bgzip_index_dict_samtools", "modules"] }, "samtools/merge": { "branch": "master", @@ -75,6 +75,11 @@ }, "subworkflows": { "nf-core": { + "fasta_bgzip_index_dict_samtools": { + "branch": "master", + "git_sha": "e340200ac2d101e0b5ce3e9a512f6e4af58ecb3b", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", diff --git a/modules/nf-core/samtools/bgzip/environment.yml b/modules/nf-core/htslib/bgziptabix/environment.yml similarity index 51% rename from modules/nf-core/samtools/bgzip/environment.yml rename to modules/nf-core/htslib/bgziptabix/environment.yml index 89e12a6..5736012 100644 --- a/modules/nf-core/samtools/bgzip/environment.yml +++ b/modules/nf-core/htslib/bgziptabix/environment.yml @@ -4,7 +4,5 @@ channels: - conda-forge - bioconda dependencies: - # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 - # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::htslib=1.23.1 + - conda-forge::xz=5.8.3 diff --git a/modules/nf-core/htslib/bgziptabix/main.nf b/modules/nf-core/htslib/bgziptabix/main.nf new file mode 100644 index 0000000..9efe968 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/main.nf @@ -0,0 +1,88 @@ +process HTSLIB_BGZIPTABIX { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/33/33a1f2c7f36ec58339e41cbea096d121f606918778a91cfbef944b40ba7ce48b/data' + : 'community.wave.seqera.io/library/htslib_xz:49c8c84af5c4b3b9'}" + + input: + tuple val(meta), path(infile), path(infile_tbi), path(regions) + val action + val make_index + val out_ext + + output: + tuple val(meta), path("${outfile}"), emit: output + tuple val(meta), path("${outfile}.{tbi,csi}"), emit: index, optional: true + // all htslib tools have the same version, we use bgzip + tuple val("${task.process}"), val('htslib'), eval("bgzip --version | sed '1! d; s/bgzip (htslib) //'"), topic: versions, emit: versions_htslib + tuple val("${task.process}"), val('xz'), eval("xz --version | sed '1! d; s/xz (XZ Utils) //'"), topic: versions, emit: versions_xz + + when: + task.ext.when == null || task.ext.when + + script: + def allowed_actions = ["compress", "decompress"] + if (action !in allowed_actions) { + error("htslib/bgziptabix: Invalid action: ${action}. Allowed actions are: ${allowed_actions.join(', ')}") + } + + if (action == "decompress" && make_index) { + log.warn("htslib/bgziptabix: Cannot create index when decompressing. Ignoring make_index option.") + } + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def compress_cmd = action == "compress" ? "bgzip -c ${args} -@ ${task.cpus}" : "cat" + def bgzip_cmd = action == "compress" ? "[ '\$(basename ${infile})' != '\$(basename ${outfile})' ] && ln -s ${infile} ${outfile}" : "bgzip -c -d ${args} -@ ${task.cpus} ${infile} > ${outfile}" + + def regions_arg = regions ? "-R ${regions}" : "" + def tabix_cmd = (make_index && !infile_tbi) ? "tabix -@ ${task.cpus} ${regions_arg} ${args2} -f ${outfile}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + def uncompressed_cmd = action == "compress" ? "${compress_cmd} ${infile} > ${outfile}" : (infile.getName() == outfile ? "" : "ln -s ${infile} ${outfile}") + """ + ${link_tabix_cmd} + + FILE_TYPE=\$(htsfile ${infile}) + + case "\$FILE_TYPE" in + *BGZF-compressed*) + ${bgzip_cmd} ;; + *gzip-compressed*) + [ "\$(basename ${infile})" == "\$(basename ${outfile})" ] && echo "Input and output names cannot be the same" && exit 1 + zcat ${infile} | ${compress_cmd} > ${outfile} ;; + *bzip2-compressed*) + bzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *XZ-compressed*) + xzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *) + ${uncompressed_cmd} ;; + esac + + ${tabix_cmd} + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def touch_cmd = action == "compress" ? "echo | bgzip -c" : "echo" + def index_fmt = args2.contains('-C') ? 'csi' : 'tbi' + def tabix_cmd = make_index ? "touch ${outfile}.${index_fmt}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + """ + echo ${args} + + ${touch_cmd} > ${outfile} + + ${tabix_cmd} + ${link_tabix_cmd} + """ +} diff --git a/modules/nf-core/htslib/bgziptabix/meta.yml b/modules/nf-core/htslib/bgziptabix/meta.yml new file mode 100644 index 0000000..4cdefd0 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/meta.yml @@ -0,0 +1,125 @@ +name: "htslib_bgziptabix" +description: "Multi-purpose module to compress, decompress and index files using bgzip + and tabix." +keywords: + - compress + - decompress + - index + - bgzip + - tabix + - gzip + - bzip + - xz +tools: + - "htslib": + description: "C library for high-throughput sequencing data formats." + homepage: "http://www.htslib.org/" + documentation: "http://www.htslib.org/doc/" + tool_dev_url: "https://github.com/samtools/htslib" + doi: "10.1093/gigascience/giab007" + licence: + - "MIT" + identifier: biotools:htslib +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - infile: + type: file + description: Input file to compress or decompress + pattern: "*" + ontologies: [] + - infile_tbi: + type: file + description: Optional tabix index for the input file. + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + - regions: + type: file + description: Optional file of regions to extract (BED or chr:start-end format). + Only used when creating an index for the output file. + pattern: "*.{bed,txt,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - edam: http://edamontology.org/format_3003 # BED + - action: + type: string + description: Action to perform, either `compress` or `decompress` + - make_index: + type: boolean + description: Whether to create a tabix index for the output file; only used + if `action` is `compress` + - out_ext: + type: string + description: Output file extension without `.gz` suffix (for example `vcf`) +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}: + type: file + description: Compressed or decompressed output file + pattern: "*" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}.{tbi,csi}: + type: file + description: Tabix index file for the compressed output file + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + versions_htslib: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + versions_xz: + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@itrujnara" +maintainers: + - "@itrujnara" diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test new file mode 100644 index 0000000..a734650 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test @@ -0,0 +1,435 @@ +nextflow_process { + + name "Test Process HTSLIB_BGZIPTABIX" + script "../main.nf" + process "HTSLIB_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "htslib" + tag "htslib/bgziptabix" + + test("sarscov2 - vcf - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - vcf - compress - index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.get(0).get(1).endsWith('.vcf.gz.tbi') } + ) + } + + } + + test("sarscov2 - vcf + regions - compress - index") { + when { + process { + """ + input[0] = [ + [ id:'example' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.output[0][1]).vcf.getVariantsMD5(), + ).match() } + ) + } + } + + test("sarscov2 - bgzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - bgzip - compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - gzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - name clash") { + + when { + process { + """ + input[0] = [ + [ id:'test_1' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.failed + assertAll( + { assert process.errorReport.contains("Input and output names cannot be the same") } + ) + } + } + + test("metagenome - bz2 - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - bz2 - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, + { assert process.out.output.get(0).get(1).endsWith('test') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - vcf - compress - index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("sarscov2 - vcf - decompress - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("illegal action") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'invalid_action' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.failed + assert process.errorReport.contains("Invalid action: invalid_action. Allowed actions are: compress, decompress") + } + + } + +} diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 0000000..52cfc90 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,574 @@ +{ + "sarscov2 - gzip - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:33.710007592", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.gz:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:42.445692755", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:48.804507455", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress - stub": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:23:14.567213835", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - gzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:27.607706101", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index - stub": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:57.854824265", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:20:56.277612816", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:57.137689117", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7f005943c935f2b55ba3f9d4802aa09f" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:02.256241871", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - decompress": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "3": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + { + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-12T11:31:30.587093278", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - no index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ] + } + ], + "timestamp": "2026-05-06T15:27:48.766118732", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:21.172365408", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf + regions - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "example" + }, + "example.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "output": [ + [ + { + "id": "example" + }, + "example.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + "bc7bf3ee9e8430e064c539eb81e59bf9" + ], + "timestamp": "2026-05-19T10:34:19.00293386", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:14.663326257", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/bgzip/main.nf b/modules/nf-core/samtools/bgzip/main.nf deleted file mode 100644 index 6d50e90..0000000 --- a/modules/nf-core/samtools/bgzip/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process SAMTOOLS_BGZIP { - tag "$fasta" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("${output}"), emit: fasta - // samtools-bgzip has no --version option so let's use lastal from the same suite - tuple val("${task.process}"), val('samtools'), eval("samtools --version | head -n 1 | sed 's/^.*samtools //'"), emit: versions_samtools, topic: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - output = "${prefix}.fasta.gz" - """ - FILE_TYPE=\$(htsfile $fasta) - case "\$FILE_TYPE" in - *BGZF-compressed*) - # Do nothing or just rename if the file was already compressed - [ "\$(basename $fasta)" != "\$(basename ${output})" ] && ln -s $fasta ${output} ;; - *gzip-compressed*) - [ "\$(basename $fasta)" == "\$(basename ${output})" ] && echo "Filename collision (\$basename $fasta)" && exit 1 - zcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *bzip2-compressed*) - bzcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *XZ-compressed*) - xzcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *) - bgzip -c $args -@${task.cpus} $fasta > ${output} ;; - esac - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - output = "${prefix}.gz" - """ - [ "\$(basename $fasta)" == "\$(basename ${output})" ] && echo "Filename collision (\$basename $fasta)" && exit 1 - echo '' | bgzip > ${output} - """ -} diff --git a/modules/nf-core/samtools/bgzip/meta.yml b/modules/nf-core/samtools/bgzip/meta.yml deleted file mode 100644 index 4f3517e..0000000 --- a/modules/nf-core/samtools/bgzip/meta.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: samtools_bgzip -description: Outputs a FASTA file compressed with the BGZF algorithm -keywords: - - fasta - - BGZF - - bgzip -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] - identifier: biotools:samtools -input: - - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: FASTA file, compressed or not. - pattern: "*.{fa,fa.gz,fa.bz2,fa.xz,fasta,fasta.gz,fasta.bz2,fasta.xz}" - ontologies: [] -output: - fasta: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*.{fa.gz,fasta.gz}" - - ${output}: - type: map - description: | - A FASTA file compressed with the BGZF algorithm. It will be - the original file if it was already BGZF-compressed. - pattern: "*.{fa.gz,fasta.gz}" - versions_samtools: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool -topics: - versions: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool - -authors: - - "@charles-plessy" -maintainers: - - "@charles-plessy" diff --git a/modules/nf-core/samtools/bgzip/tests/main.nf.test b/modules/nf-core/samtools/bgzip/tests/main.nf.test deleted file mode 100644 index dd9d5d2..0000000 --- a/modules/nf-core/samtools/bgzip/tests/main.nf.test +++ /dev/null @@ -1,109 +0,0 @@ -nextflow_process { - - name "Test Process SAMTOOLS_BGZIP" - script "../main.nf" - process "SAMTOOLS_BGZIP" - - tag "modules" - tag "modules_nfcore" - tag "samtools" - tag "samtools/bgzip" - - test("test_samtools_bgzip - fasta") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta bgzipped") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta bgzipped same name") { - // This checks that the module avoids file name clashes when input is already bzipped. - - when { - process { - """ - input[0] = [ [ id:'genome', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - proteome gzipped") { - // This file is not bgziped. It is used to check the re-zipping branch of the case statement in the module. - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta stub") { - - options "-stub" - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap b/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap deleted file mode 100644 index a704c1b..0000000 --- a/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap +++ /dev/null @@ -1,217 +0,0 @@ -{ - "test_samtools_bgzip - fasta bgzipped same name": { - "content": [ - { - "0": [ - [ - { - "id": "genome", - "single_end": false - }, - "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "genome", - "single_end": false - }, - "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:40.706529206" - }, - "test_samtools_bgzip - fasta": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:25.35750234" - }, - "test_samtools_bgzip - fasta bgzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:33.250478519" - }, - "test_samtools_bgzip - fasta stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:08:05.629367619" - }, - "test_samtools_bgzip - proteome gzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,db0ecd5dbce6bf9730685b94ec87854d" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,db0ecd5dbce6bf9730685b94ec87854d" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:57.879134659" - } -} \ No newline at end of file diff --git a/modules/nf-core/samtools/dict/environment.yml b/modules/nf-core/samtools/dict/environment.yml index 89e12a6..946bb36 100644 --- a/modules/nf-core/samtools/dict/environment.yml +++ b/modules/nf-core/samtools/dict/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/dict/main.nf b/modules/nf-core/samtools/dict/main.nf index dd1f1b5..5d26198 100644 --- a/modules/nf-core/samtools/dict/main.nf +++ b/modules/nf-core/samtools/dict/main.nf @@ -1,17 +1,17 @@ process SAMTOOLS_DICT { - tag "$fasta" + tag "${fasta}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" input: tuple val(meta), path(fasta) output: - tuple val(meta), path ("*.dict"), emit: dict + tuple val(meta), path("*.dict"), emit: dict tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: @@ -22,8 +22,8 @@ process SAMTOOLS_DICT { """ samtools \\ dict \\ - $args \\ - $fasta \\ + ${args} \\ + ${fasta} \\ > ${fasta}.dict """ diff --git a/modules/nf-core/samtools/dict/meta.yml b/modules/nf-core/samtools/dict/meta.yml index 73c7551..2d73a5f 100644 --- a/modules/nf-core/samtools/dict/meta.yml +++ b/modules/nf-core/samtools/dict/meta.yml @@ -64,3 +64,4 @@ authors: - "@muffato" maintainers: - "@muffato" + - "@matthdsm" diff --git a/modules/nf-core/samtools/dict/tests/main.nf.test.snap b/modules/nf-core/samtools/dict/tests/main.nf.test.snap index b08f8fb..a1be1d6 100644 --- a/modules/nf-core/samtools/dict/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/dict/tests/main.nf.test.snap @@ -15,7 +15,7 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ], "dict": [ @@ -31,16 +31,16 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-22T09:04:24.522267802" + "timestamp": "2026-03-19T08:57:25.259692" }, "sarscov2 - fasta": { "content": [ @@ -50,15 +50,15 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-21T15:33:11.354952955" + "timestamp": "2026-03-19T08:57:20.375172" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 89e12a6..946bb36 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 57a0349..175a53a 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -1,22 +1,21 @@ process SAMTOOLS_FAIDX { - tag "$fasta" + tag "${fasta}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta), path(fasta), path(fai) val get_sizes output: - tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true - tuple val(meta), path ("*.sizes") , emit: sizes, optional: true - tuple val(meta), path ("*.fai") , emit: fai, optional: true - tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + tuple val(meta), path("*.{fa,fasta}"), emit: fa, optional: true + tuple val(meta), path("*.sizes"), emit: sizes, optional: true + tuple val(meta), path("*.fai"), emit: fai, optional: true + tuple val(meta), path("*.gzi"), emit: gzi, optional: true tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: @@ -28,8 +27,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $fasta \\ - $args + ${fasta} \\ + ${args} ${get_sizes_command} """ diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 163c301..529f7a2 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -1,5 +1,6 @@ name: samtools_faidx -description: Index FASTA file, and optionally generate a file of chromosome sizes +description: Index FASTA file, and optionally generate a file of chromosome + sizes keywords: - index - fasta @@ -14,7 +15,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -27,11 +29,6 @@ input: description: FASTA file pattern: "*.{fa,fasta}" ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - fai: type: file description: FASTA index file @@ -40,7 +37,6 @@ input: - get_sizes: type: boolean description: use cut to get the sizes of the index (true) or not (false) - output: fa: - - meta: @@ -94,9 +90,8 @@ output: type: string description: The tool name - "samtools version | sed '1!d;s/.* //'": - type: string + type: eval description: The command used to generate the version of the tool - topics: versions: - - ${task.process}: @@ -106,7 +101,7 @@ topics: type: string description: The tool name - "samtools version | sed '1!d;s/.* //'": - type: string + type: eval description: The command used to generate the version of the tool authors: - "@drpatelh" @@ -115,3 +110,4 @@ authors: maintainers: - "@maxulysse" - "@phue" + - "@matthdsm" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test index 02ba504..9a86db8 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -18,10 +18,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -29,7 +31,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -42,10 +44,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -53,7 +57,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -66,11 +70,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } @@ -78,7 +83,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -92,11 +97,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } @@ -104,7 +110,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -118,10 +124,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -129,7 +137,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -142,12 +150,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -155,7 +163,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -168,12 +176,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -181,7 +189,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -196,12 +204,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -209,7 +217,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -224,12 +232,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -237,7 +245,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 565d20e..e879d96 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -2,39 +2,13 @@ "test_samtools_faidx": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -49,54 +23,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:02:40.159309157" + "timestamp": "2026-03-19T08:57:29.747964" }, "test_samtools_faidx_get_sizes_bgzip - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -128,49 +68,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:39.550619177" + "timestamp": "2026-03-19T08:58:10.04235" }, "test_samtools_faidx_get_sizes": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -197,59 +108,27 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:16.844965756" + "timestamp": "2026-03-19T08:57:55.552612" }, "test_samtools_faidx_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -257,8 +136,7 @@ "gzi": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -270,50 +148,24 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:02:47.301476131" + "timestamp": "2026-03-19T08:57:34.346045" }, "test_samtools_faidx_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" ] @@ -331,49 +183,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T09:44:40.559583279" + "timestamp": "2026-03-19T08:57:39.136814" }, "test_samtools_faidx_get_sizes - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -400,50 +223,24 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:31.989929281" + "timestamp": "2026-03-19T08:58:05.200994" }, "test_samtools_faidx_stub_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -461,53 +258,27 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T09:44:48.295693103" + "timestamp": "2026-03-19T08:57:44.105654" }, "test_samtools_faidx_stub_fai": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -522,54 +293,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:09.784289542" + "timestamp": "2026-03-19T08:57:50.839162" }, "test_samtools_faidx_get_sizes_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -601,15 +338,15 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:24.814967939" + "timestamp": "2026-03-19T08:58:00.460031" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config index 202c036..b3c4d0c 100644 --- a/modules/nf-core/samtools/faidx/tests/nextflow.config +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -3,5 +3,4 @@ process { withName: SAMTOOLS_FAIDX { ext.args = params.module_args } - } diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf b/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf deleted file mode 100644 index 4fce842..0000000 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/main.nf +++ /dev/null @@ -1,29 +0,0 @@ -include { SAMTOOLS_BGZIP } from '../../../modules/nf-core/samtools/bgzip/main' -include { SAMTOOLS_DICT } from '../../../modules/nf-core/samtools/dict/main' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' - -workflow FASTA_BGZIP_INDEX_DICT_SAMTOOLS { - - take: - ch_fasta // channel: [ val(meta), fasta ] - - main: - - // Guarantee BGZIP compression - SAMTOOLS_BGZIP ( ch_fasta ) - - SAMTOOLS_FAIDX ( SAMTOOLS_BGZIP.out.fasta, [[],[]], [[],[]] ) - - SAMTOOLS_DICT ( SAMTOOLS_BGZIP.out.fasta ) - - ch_joined = SAMTOOLS_BGZIP.out.fasta - .join(SAMTOOLS_FAIDX.out.fai) - .join(SAMTOOLS_FAIDX.out.gzi) - .join(SAMTOOLS_DICT.out.dict) - .map { meta, fasta, fai, gzi, dict -> - [ meta, fasta, fai, gzi, dict ] - } - - emit: - fasta_fai_gzi_dict = ch_joined // channel: [ val(meta), fasta.gz, fai, gzi, dict ] -} diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap b/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap deleted file mode 100644 index bebb50a..0000000 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap +++ /dev/null @@ -1,37 +0,0 @@ -{ - "sarscov2 - fasta": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6", - "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", - "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", - "test.fasta.gz.dict:md5,f953d89119d6d0ae5ceab0c81aac83f6" - ] - ], - "fasta_fai_gzi_dict": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6", - "test.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", - "test.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", - "test.fasta.gz.dict:md5,f953d89119d6d0ae5ceab0c81aac83f6" - ] - ] - } - ], - "timestamp": "2026-05-29T17:44:46.008863777", - "meta": { - "nf-test": "0.9.5", - "nextflow": "25.10.4" - } - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main.nf b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main.nf new file mode 100644 index 0000000..31aa683 --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main.nf @@ -0,0 +1,36 @@ +include { HTSLIB_BGZIPTABIX } from '../../../modules/nf-core/htslib/bgziptabix/main' +include { SAMTOOLS_DICT } from '../../../modules/nf-core/samtools/dict/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' + +workflow FASTA_BGZIP_INDEX_DICT_SAMTOOLS { + + take: + ch_fasta // channel: [ val(meta), fasta ] + + main: + + HTSLIB_BGZIPTABIX ( + ch_fasta.map { meta, fasta -> [meta, fasta, [], []] }, + 'compress', + [], + [] + ) + + SAMTOOLS_FAIDX ( + HTSLIB_BGZIPTABIX.out.output.map {meta, fasta -> [meta, fasta, []]}, + true + ) + + SAMTOOLS_DICT ( + HTSLIB_BGZIPTABIX.out.output + ) + + ch_joined = HTSLIB_BGZIPTABIX.out.output + .join(SAMTOOLS_FAIDX.out.fai) + .join(SAMTOOLS_FAIDX.out.gzi) + .join(SAMTOOLS_FAIDX.out.sizes) + .join(SAMTOOLS_DICT.out.dict) + + emit: + fasta_fai_gzi_dict = ch_joined // channel: [ val(meta), fasta.gz, fai, gzi, sizes, dict ] +} diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml similarity index 67% rename from subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml rename to subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml index 6d10225..f2281e8 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/meta.yml +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml @@ -1,15 +1,16 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "fasta_bgzip_index_dict_samtools" -description: Sort SAM/BAM/CRAM file +description: Ensure BGZF compression, index, and produce a sequence size summary and a sequence dictionary for each FASTA file. keywords: - bgzip - faidx - dict - fasta components: - - samtools/bgzip + - htslib/bgziptabix - samtools/dict - samtools/faidx + input: - ch_fasta: type: file @@ -17,6 +18,7 @@ input: FASTA file, compressed or not. Structure: [ val(meta), path(fasta) ] pattern: "*.{fa,fa.gz,fa.bz2,fa.xz,fasta,fasta.gz,fasta.bz2,fasta.xz}" + output: - fasta_fai_gzi_dict: type: file @@ -25,21 +27,12 @@ output: - BGZF-compressed FASTA - FASTA index (.fai) - BGZF index (.gzi) + - sequence size summary (.sizes) - sequence dictionary (.dict) Structure: - [ val(meta), path(fasta_gz), path(fai), path(gzi), path(dict) ] + [ val(meta), path(fasta_gz), path(fai), path(gzi), path(sizes), path(dict) ] pattern: "*" -topics: - versions: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool + authors: - "@charles-plessy" maintainers: diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test similarity index 54% rename from subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test rename to subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test index 3cba7b3..4c3faf1 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test @@ -5,30 +5,34 @@ nextflow_workflow { workflow "FASTA_BGZIP_INDEX_DICT_SAMTOOLS" tag "subworkflows" - tag "subworkflows_" + tag "subworkflows_nfcore" tag "subworkflows/fasta_bgzip_index_dict_samtools" + tag "htslib" + tag "htslib/bgziptabix" tag "samtools" - tag "samtools/bgzip" tag "samtools/dict" tag "samtools/faidx" - test("sarscov2 - fasta") { + config "./nextflow.config" + + test("sarscov2 - fasta - genomes") { when { workflow { """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), - ] + input[0] = channel.of([ + [ id:'genome_complete' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } - then { + assert workflow.success assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert snapshot( + workflow.out + ).match() } ) } } diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap new file mode 100644 index 0000000..06dc06e --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - fasta - genomes": { + "content": [ + { + "0": [ + [ + { + "id": "genome_complete" + }, + "genome_complete.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "genome_complete.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "genome_complete.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", + "genome_complete.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c", + "genome_complete.gz.dict:md5,7259d9fba4f0029e294b70a7bf05af6a" + ] + ], + "fasta_fai_gzi_dict": [ + [ + { + "id": "genome_complete" + }, + "genome_complete.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "genome_complete.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "genome_complete.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", + "genome_complete.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c", + "genome_complete.gz.dict:md5,7259d9fba4f0029e294b70a7bf05af6a" + ] + ] + } + ], + "timestamp": "2026-05-28T11:12:13.641917963", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config new file mode 100644 index 0000000..2ab54d9 --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + withName: 'SAMTOOLS_DICT' { + // This makes the output deterministic, + // otherwise you get temporary file folder names in the file path. + ext.args = { "-u ./${fasta} -a ${meta.id}" } + } +} diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index a31024d..564e7c5 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -19,7 +19,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/ma include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { FASTA_BGZIP_INDEX_DICT_SAMTOOLS } from '../subworkflows/local/fasta_bgzip_index_dict_samtools' +include { FASTA_BGZIP_INDEX_DICT_SAMTOOLS } from '../subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_pairgenomealign_pipeline' /* @@ -93,7 +93,7 @@ workflow PAIRGENOMEALIGN { pairalign_out = PAIRALIGN_M2M.out } - ch_genome_for_cram = channel.value( [[:], [], [], [], []] ) + ch_genome_for_cram = channel.value( [[:], [], [], [], [], []] ) export_formats = params.export_aln_to.tokenize(',') if (params.multi_cram | export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) @@ -101,8 +101,7 @@ workflow PAIRGENOMEALIGN { } ch_targetgenome = ch_genome_for_cram - .first() - .multiMap { meta, fasta, fai, gzi, dict -> + .multiMap { meta, fasta, fai, gzi, _sizes, dict -> fasta: [meta, fasta] fai: [meta, fai] gzi: [meta, gzi] @@ -144,7 +143,7 @@ workflow PAIRGENOMEALIGN { // Output a single CRAM file under the target genome name. ALIGNMENT_MERGE( ch_merge_input, - ch_genome_for_cram.map { meta, fasta, fai, gzi, dict -> [meta, fasta, fai, gzi ] }, + ch_genome_for_cram.map { meta, fasta, fai, gzi, _sizes, _dict -> [meta, fasta, fai, gzi ] }, ) }