diff --git a/CHANGELOG.md b/CHANGELOG.md index 26c9c0d..afae5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,19 +7,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- New `--multi_cram` option to produce a multi-query CRAM file combining all the alignments ([#60](https://github.com/nf-core/pairgenomealign/pull/60)). - New `--multiqc_thumbs` option to produce alignment thumbnails in the MultiQC report ([#93](https://github.com/nf-core/pairgenomealign/pull/93)). - New `--strand` option to index only one strand of the genome, which reduces memory usage at the expense of speed, and suppresses `-/+` alignments ([#97](https://github.com/nf-core/pairgenomealign/pull/97)). - New `--query` and `--queryName` convenience options to skip samplesheet creation when there is only one _query_ genome to align ([#112](https://github.com/nf-core/pairgenomealign/pull/112)). +### `Fixed` + +- Using the nf-core version of the `FASTA_BGZIP_INDEX_DICT_SAMTOOLS` subworkflow that we just contributed. + ### `Parameters` | Old parameter | New parameter | | ------------- | ------------------ | +| | `--multi_cram` | | | `--multiqc_thumbs` | | | `--query` | | | `--queryName` | | | `--strand` | +### `Dependencies` + +| Dependency | Old version | New version | +| ------------------- | ----------- | ----------- | +| `SAMTOOLS_BGZIP` | 1.21 | | +| `SAMTOOLS_DICT` | 1.21 | 1.23.1 | +| `SAMTOOLS_FAIDX` | 1.21 | 1.23.1 | +| `SAMTOOLS_MERGE` | | 1.23.1 | +| `HTSLIB_BGZIPTABIX` | | 1.23.1 | + ## [v2.2.3](https://github.com/nf-core/pairgenomealign/releases/tag/2.2.3) "Reitou mikan" - [May 20th 2026] ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index f6fb2df..04c56d9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -111,6 +111,16 @@ process { ext.prefix = { "${meta.id}.m2m_plt_filtered" } } + withName: ALIGNMENT_CRAM { + publishDir = [ + enabled: false + ] + } + + withName: ALIGNMENT_MERGE { + ext.args = { "-O cram,version=3.0 --write-index" } + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ @@ -140,7 +150,9 @@ process { ] } - withName: 'SAMTOOLS_BGZIP' { + // FASTA_BGZIP_INDEX_DICT_SAMTOOLS subworkflow: + + withName: 'HTSLIB_BGZIPTABIX' { publishDir = [ path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, @@ -158,7 +170,8 @@ process { withName: 'SAMTOOLS_DICT' { ext.args = { "-u ./${fasta} -a ${meta.id}" } publishDir = [ - enabled: false + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, ] } diff --git a/docs/output.md b/docs/output.md index 9a40a0e..178aae9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -44,6 +44,8 @@ Basic statistics on nucleotide content and contig length are collected for align - `*.o2o_aln.maf.gz` is the _**one-to-one**_ alignment between the _target_ and _query_ genomes. - `*.o2o_aln.tsv` reports nucleotide percent identity of the _**one-to-one**_ alignment for MultiQC. - For each _**one-to-one**_ alignment there will be an additional file in a format such as Axt, Chain, GFF or SAM/BAM/CRAM if you used the `--export_aln_to` parameter. These extra files are always compressed with gzip when their format is text-based. The SAM/BAM/CRAM files are always sorted. Their header features all sequences from the _target_ genome, including the ones that did not align to the _query_ so that alignment files can be merged without disturbing the sort order. + - The _target_ genome sequence, compressed with `bgzip` and indexed by `samtools` is also present when BAM or CRAM files are produced. + - A multi-_query_ CRAM sequence is present when `--multi_cram` is used, named like the _target_ genome but with the `cram` suffix. diff --git a/docs/usage.md b/docs/usage.md index 4609c37..346c80a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,6 +51,7 @@ The parameters are described in details in the [online documentation](https://nf - `--m2m` enables the computation of the _many-to-many_ alignment, which reports alignments without enforcing uniqueness. This mode is required for self‑alignments and is useful for duplication or repeat analyses, but can exhaust computing resources on large or highly repetitive genomes. - The `--skip_dotplot_*` options disable dotplot visualisations. This is particularly useful when comparing very similar and repetitive genomes (for example, two vertebrate genomes from the same species), where dotplots other than the _one‑to‑one_ alignment can become extremely dense and difficult to interpret, without affecting the underlying alignments. - Users who need formats other than MAF can use the `--export_aln_to` parameter to generate additional coordinate‑based (PSL, GFF) or full alignment (SAM/BAM/CRAM) outputs for downstream analyses. Other formats like Axt or Chain are also supported. +- `--multi_cram` produces a single CRAM file that combines all alignments. It is neither a pangenome nor a multiple sequence alignment; however, once you make use of it—by loading it into the [Integrative Genomics Viewer](https://igv.org/), or extracting slices and converting them into multiple sequence alignments—it becomes a very powerful resource. ## Fixed arguments (taken from the [LAST cookbook][] and the [LAST tuning][] manual) diff --git a/modules.json b/modules.json index e531393..128bc39 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "947e154c66b92eb041baca20d14d1d9ab0b47182", "installed_by": ["modules"] }, + "htslib/bgziptabix": { + "branch": "master", + "git_sha": "54e41f4ed3aead45054380a9befeb927612ffc91", + "installed_by": ["fasta_bgzip_index_dict_samtools"] + }, "last/dotplot": { "branch": "master", "git_sha": "1f966905b442fa623130663471eac3b9c5d35527", @@ -45,20 +50,21 @@ "git_sha": "008f9d3e61209bf995edac3ba531f54e269e1215", "installed_by": ["modules"] }, - "samtools/bgzip": { - "branch": "master", - "git_sha": "0d265a14027515eaa36a9d3a931655b918781145", - "installed_by": ["modules"] - }, "samtools/dict": { "branch": "master", - "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", - "installed_by": ["modules"] + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["fasta_bgzip_index_dict_samtools", "modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", - "installed_by": ["modules"] + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["fasta_bgzip_index_dict_samtools", "modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "seqtk/cutn": { "branch": "master", @@ -69,6 +75,11 @@ }, "subworkflows": { "nf-core": { + "fasta_bgzip_index_dict_samtools": { + "branch": "master", + "git_sha": "e340200ac2d101e0b5ce3e9a512f6e4af58ecb3b", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", diff --git a/modules/nf-core/htslib/bgziptabix/environment.yml b/modules/nf-core/htslib/bgziptabix/environment.yml new file mode 100644 index 0000000..5736012 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.23.1 + - conda-forge::xz=5.8.3 diff --git a/modules/nf-core/htslib/bgziptabix/main.nf b/modules/nf-core/htslib/bgziptabix/main.nf new file mode 100644 index 0000000..9efe968 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/main.nf @@ -0,0 +1,88 @@ +process HTSLIB_BGZIPTABIX { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/33/33a1f2c7f36ec58339e41cbea096d121f606918778a91cfbef944b40ba7ce48b/data' + : 'community.wave.seqera.io/library/htslib_xz:49c8c84af5c4b3b9'}" + + input: + tuple val(meta), path(infile), path(infile_tbi), path(regions) + val action + val make_index + val out_ext + + output: + tuple val(meta), path("${outfile}"), emit: output + tuple val(meta), path("${outfile}.{tbi,csi}"), emit: index, optional: true + // all htslib tools have the same version, we use bgzip + tuple val("${task.process}"), val('htslib'), eval("bgzip --version | sed '1! d; s/bgzip (htslib) //'"), topic: versions, emit: versions_htslib + tuple val("${task.process}"), val('xz'), eval("xz --version | sed '1! d; s/xz (XZ Utils) //'"), topic: versions, emit: versions_xz + + when: + task.ext.when == null || task.ext.when + + script: + def allowed_actions = ["compress", "decompress"] + if (action !in allowed_actions) { + error("htslib/bgziptabix: Invalid action: ${action}. Allowed actions are: ${allowed_actions.join(', ')}") + } + + if (action == "decompress" && make_index) { + log.warn("htslib/bgziptabix: Cannot create index when decompressing. Ignoring make_index option.") + } + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def compress_cmd = action == "compress" ? "bgzip -c ${args} -@ ${task.cpus}" : "cat" + def bgzip_cmd = action == "compress" ? "[ '\$(basename ${infile})' != '\$(basename ${outfile})' ] && ln -s ${infile} ${outfile}" : "bgzip -c -d ${args} -@ ${task.cpus} ${infile} > ${outfile}" + + def regions_arg = regions ? "-R ${regions}" : "" + def tabix_cmd = (make_index && !infile_tbi) ? "tabix -@ ${task.cpus} ${regions_arg} ${args2} -f ${outfile}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + def uncompressed_cmd = action == "compress" ? "${compress_cmd} ${infile} > ${outfile}" : (infile.getName() == outfile ? "" : "ln -s ${infile} ${outfile}") + """ + ${link_tabix_cmd} + + FILE_TYPE=\$(htsfile ${infile}) + + case "\$FILE_TYPE" in + *BGZF-compressed*) + ${bgzip_cmd} ;; + *gzip-compressed*) + [ "\$(basename ${infile})" == "\$(basename ${outfile})" ] && echo "Input and output names cannot be the same" && exit 1 + zcat ${infile} | ${compress_cmd} > ${outfile} ;; + *bzip2-compressed*) + bzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *XZ-compressed*) + xzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *) + ${uncompressed_cmd} ;; + esac + + ${tabix_cmd} + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def touch_cmd = action == "compress" ? "echo | bgzip -c" : "echo" + def index_fmt = args2.contains('-C') ? 'csi' : 'tbi' + def tabix_cmd = make_index ? "touch ${outfile}.${index_fmt}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + """ + echo ${args} + + ${touch_cmd} > ${outfile} + + ${tabix_cmd} + ${link_tabix_cmd} + """ +} diff --git a/modules/nf-core/htslib/bgziptabix/meta.yml b/modules/nf-core/htslib/bgziptabix/meta.yml new file mode 100644 index 0000000..4cdefd0 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/meta.yml @@ -0,0 +1,125 @@ +name: "htslib_bgziptabix" +description: "Multi-purpose module to compress, decompress and index files using bgzip + and tabix." +keywords: + - compress + - decompress + - index + - bgzip + - tabix + - gzip + - bzip + - xz +tools: + - "htslib": + description: "C library for high-throughput sequencing data formats." + homepage: "http://www.htslib.org/" + documentation: "http://www.htslib.org/doc/" + tool_dev_url: "https://github.com/samtools/htslib" + doi: "10.1093/gigascience/giab007" + licence: + - "MIT" + identifier: biotools:htslib +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - infile: + type: file + description: Input file to compress or decompress + pattern: "*" + ontologies: [] + - infile_tbi: + type: file + description: Optional tabix index for the input file. + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + - regions: + type: file + description: Optional file of regions to extract (BED or chr:start-end format). + Only used when creating an index for the output file. + pattern: "*.{bed,txt,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - edam: http://edamontology.org/format_3003 # BED + - action: + type: string + description: Action to perform, either `compress` or `decompress` + - make_index: + type: boolean + description: Whether to create a tabix index for the output file; only used + if `action` is `compress` + - out_ext: + type: string + description: Output file extension without `.gz` suffix (for example `vcf`) +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}: + type: file + description: Compressed or decompressed output file + pattern: "*" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}.{tbi,csi}: + type: file + description: Tabix index file for the compressed output file + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + versions_htslib: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + versions_xz: + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@itrujnara" +maintainers: + - "@itrujnara" diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test new file mode 100644 index 0000000..a734650 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test @@ -0,0 +1,435 @@ +nextflow_process { + + name "Test Process HTSLIB_BGZIPTABIX" + script "../main.nf" + process "HTSLIB_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "htslib" + tag "htslib/bgziptabix" + + test("sarscov2 - vcf - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - vcf - compress - index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.get(0).get(1).endsWith('.vcf.gz.tbi') } + ) + } + + } + + test("sarscov2 - vcf + regions - compress - index") { + when { + process { + """ + input[0] = [ + [ id:'example' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.output[0][1]).vcf.getVariantsMD5(), + ).match() } + ) + } + } + + test("sarscov2 - bgzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - bgzip - compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - gzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - name clash") { + + when { + process { + """ + input[0] = [ + [ id:'test_1' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.failed + assertAll( + { assert process.errorReport.contains("Input and output names cannot be the same") } + ) + } + } + + test("metagenome - bz2 - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - bz2 - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, + { assert process.out.output.get(0).get(1).endsWith('test') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - vcf - compress - index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("sarscov2 - vcf - decompress - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("illegal action") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'invalid_action' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.failed + assert process.errorReport.contains("Invalid action: invalid_action. Allowed actions are: compress, decompress") + } + + } + +} diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 0000000..52cfc90 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,574 @@ +{ + "sarscov2 - gzip - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:33.710007592", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.gz:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:42.445692755", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:48.804507455", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress - stub": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:23:14.567213835", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - gzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:27.607706101", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index - stub": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:57.854824265", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:20:56.277612816", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:57.137689117", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7f005943c935f2b55ba3f9d4802aa09f" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:02.256241871", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - decompress": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "3": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + { + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-12T11:31:30.587093278", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - no index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ] + } + ], + "timestamp": "2026-05-06T15:27:48.766118732", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:21.172365408", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf + regions - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "example" + }, + "example.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "output": [ + [ + { + "id": "example" + }, + "example.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + "bc7bf3ee9e8430e064c539eb81e59bf9" + ], + "timestamp": "2026-05-19T10:34:19.00293386", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:14.663326257", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/bgzip/main.nf b/modules/nf-core/samtools/bgzip/main.nf deleted file mode 100644 index 6d50e90..0000000 --- a/modules/nf-core/samtools/bgzip/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process SAMTOOLS_BGZIP { - tag "$fasta" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("${output}"), emit: fasta - // samtools-bgzip has no --version option so let's use lastal from the same suite - tuple val("${task.process}"), val('samtools'), eval("samtools --version | head -n 1 | sed 's/^.*samtools //'"), emit: versions_samtools, topic: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - output = "${prefix}.fasta.gz" - """ - FILE_TYPE=\$(htsfile $fasta) - case "\$FILE_TYPE" in - *BGZF-compressed*) - # Do nothing or just rename if the file was already compressed - [ "\$(basename $fasta)" != "\$(basename ${output})" ] && ln -s $fasta ${output} ;; - *gzip-compressed*) - [ "\$(basename $fasta)" == "\$(basename ${output})" ] && echo "Filename collision (\$basename $fasta)" && exit 1 - zcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *bzip2-compressed*) - bzcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *XZ-compressed*) - xzcat $fasta | bgzip -c $args -@${task.cpus} > ${output} ;; - *) - bgzip -c $args -@${task.cpus} $fasta > ${output} ;; - esac - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - output = "${prefix}.gz" - """ - [ "\$(basename $fasta)" == "\$(basename ${output})" ] && echo "Filename collision (\$basename $fasta)" && exit 1 - echo '' | bgzip > ${output} - """ -} diff --git a/modules/nf-core/samtools/bgzip/meta.yml b/modules/nf-core/samtools/bgzip/meta.yml deleted file mode 100644 index 4f3517e..0000000 --- a/modules/nf-core/samtools/bgzip/meta.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: samtools_bgzip -description: Outputs a FASTA file compressed with the BGZF algorithm -keywords: - - fasta - - BGZF - - bgzip -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] - identifier: biotools:samtools -input: - - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: FASTA file, compressed or not. - pattern: "*.{fa,fa.gz,fa.bz2,fa.xz,fasta,fasta.gz,fasta.bz2,fasta.xz}" - ontologies: [] -output: - fasta: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*.{fa.gz,fasta.gz}" - - ${output}: - type: map - description: | - A FASTA file compressed with the BGZF algorithm. It will be - the original file if it was already BGZF-compressed. - pattern: "*.{fa.gz,fasta.gz}" - versions_samtools: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool -topics: - versions: - - - "${task.process}": - type: string - description: The name of the process - - samtools: - type: string - description: The name of the tool - - "samtools --version | head -n 1 | sed 's/^.*samtools //'": - type: eval - description: The expression to obtain the version of the tool - -authors: - - "@charles-plessy" -maintainers: - - "@charles-plessy" diff --git a/modules/nf-core/samtools/bgzip/tests/main.nf.test b/modules/nf-core/samtools/bgzip/tests/main.nf.test deleted file mode 100644 index dd9d5d2..0000000 --- a/modules/nf-core/samtools/bgzip/tests/main.nf.test +++ /dev/null @@ -1,109 +0,0 @@ -nextflow_process { - - name "Test Process SAMTOOLS_BGZIP" - script "../main.nf" - process "SAMTOOLS_BGZIP" - - tag "modules" - tag "modules_nfcore" - tag "samtools" - tag "samtools/bgzip" - - test("test_samtools_bgzip - fasta") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta bgzipped") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta bgzipped same name") { - // This checks that the module avoids file name clashes when input is already bzipped. - - when { - process { - """ - input[0] = [ [ id:'genome', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - proteome gzipped") { - // This file is not bgziped. It is used to check the re-zipping branch of the case statement in the module. - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("test_samtools_bgzip - fasta stub") { - - options "-stub" - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap b/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap deleted file mode 100644 index a704c1b..0000000 --- a/modules/nf-core/samtools/bgzip/tests/main.nf.test.snap +++ /dev/null @@ -1,217 +0,0 @@ -{ - "test_samtools_bgzip - fasta bgzipped same name": { - "content": [ - { - "0": [ - [ - { - "id": "genome", - "single_end": false - }, - "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "genome", - "single_end": false - }, - "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:40.706529206" - }, - "test_samtools_bgzip - fasta": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:25.35750234" - }, - "test_samtools_bgzip - fasta bgzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:33.250478519" - }, - "test_samtools_bgzip - fasta stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:08:05.629367619" - }, - "test_samtools_bgzip - proteome gzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,db0ecd5dbce6bf9730685b94ec87854d" - ] - ], - "1": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ], - "fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fasta.gz:md5,db0ecd5dbce6bf9730685b94ec87854d" - ] - ], - "versions_samtools": [ - [ - "SAMTOOLS_BGZIP", - "samtools", - "1.22.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T11:07:57.879134659" - } -} \ No newline at end of file diff --git a/modules/nf-core/samtools/dict/environment.yml b/modules/nf-core/samtools/dict/environment.yml index 89e12a6..946bb36 100644 --- a/modules/nf-core/samtools/dict/environment.yml +++ b/modules/nf-core/samtools/dict/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/dict/main.nf b/modules/nf-core/samtools/dict/main.nf index dd1f1b5..5d26198 100644 --- a/modules/nf-core/samtools/dict/main.nf +++ b/modules/nf-core/samtools/dict/main.nf @@ -1,17 +1,17 @@ process SAMTOOLS_DICT { - tag "$fasta" + tag "${fasta}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" input: tuple val(meta), path(fasta) output: - tuple val(meta), path ("*.dict"), emit: dict + tuple val(meta), path("*.dict"), emit: dict tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: @@ -22,8 +22,8 @@ process SAMTOOLS_DICT { """ samtools \\ dict \\ - $args \\ - $fasta \\ + ${args} \\ + ${fasta} \\ > ${fasta}.dict """ diff --git a/modules/nf-core/samtools/dict/meta.yml b/modules/nf-core/samtools/dict/meta.yml index 73c7551..2d73a5f 100644 --- a/modules/nf-core/samtools/dict/meta.yml +++ b/modules/nf-core/samtools/dict/meta.yml @@ -64,3 +64,4 @@ authors: - "@muffato" maintainers: - "@muffato" + - "@matthdsm" diff --git a/modules/nf-core/samtools/dict/tests/main.nf.test.snap b/modules/nf-core/samtools/dict/tests/main.nf.test.snap index b08f8fb..a1be1d6 100644 --- a/modules/nf-core/samtools/dict/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/dict/tests/main.nf.test.snap @@ -15,7 +15,7 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ], "dict": [ @@ -31,16 +31,16 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-22T09:04:24.522267802" + "timestamp": "2026-03-19T08:57:25.259692" }, "sarscov2 - fasta": { "content": [ @@ -50,15 +50,15 @@ [ "SAMTOOLS_DICT", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-21T15:33:11.354952955" + "timestamp": "2026-03-19T08:57:20.375172" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 89e12a6..946bb36 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 57a0349..175a53a 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -1,22 +1,21 @@ process SAMTOOLS_FAIDX { - tag "$fasta" + tag "${fasta}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : - 'biocontainers/samtools:1.22.1--h96c455f_0' }" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta), path(fasta), path(fai) val get_sizes output: - tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true - tuple val(meta), path ("*.sizes") , emit: sizes, optional: true - tuple val(meta), path ("*.fai") , emit: fai, optional: true - tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + tuple val(meta), path("*.{fa,fasta}"), emit: fa, optional: true + tuple val(meta), path("*.sizes"), emit: sizes, optional: true + tuple val(meta), path("*.fai"), emit: fai, optional: true + tuple val(meta), path("*.gzi"), emit: gzi, optional: true tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: @@ -28,8 +27,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $fasta \\ - $args + ${fasta} \\ + ${args} ${get_sizes_command} """ diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 163c301..529f7a2 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -1,5 +1,6 @@ name: samtools_faidx -description: Index FASTA file, and optionally generate a file of chromosome sizes +description: Index FASTA file, and optionally generate a file of chromosome + sizes keywords: - index - fasta @@ -14,7 +15,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -27,11 +29,6 @@ input: description: FASTA file pattern: "*.{fa,fasta}" ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - fai: type: file description: FASTA index file @@ -40,7 +37,6 @@ input: - get_sizes: type: boolean description: use cut to get the sizes of the index (true) or not (false) - output: fa: - - meta: @@ -94,9 +90,8 @@ output: type: string description: The tool name - "samtools version | sed '1!d;s/.* //'": - type: string + type: eval description: The command used to generate the version of the tool - topics: versions: - - ${task.process}: @@ -106,7 +101,7 @@ topics: type: string description: The tool name - "samtools version | sed '1!d;s/.* //'": - type: string + type: eval description: The command used to generate the version of the tool authors: - "@drpatelh" @@ -115,3 +110,4 @@ authors: maintainers: - "@maxulysse" - "@phue" + - "@matthdsm" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test index 02ba504..9a86db8 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -18,10 +18,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -29,7 +31,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -42,10 +44,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -53,7 +57,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -66,11 +70,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } @@ -78,7 +83,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -92,11 +97,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } @@ -104,7 +110,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -118,10 +124,12 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } @@ -129,7 +137,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -142,12 +150,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -155,7 +163,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -168,12 +176,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -181,7 +189,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -196,12 +204,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -209,7 +217,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -224,12 +232,12 @@ nextflow_process { } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } @@ -237,7 +245,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match()} + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 565d20e..e879d96 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -2,39 +2,13 @@ "test_samtools_faidx": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -49,54 +23,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:02:40.159309157" + "timestamp": "2026-03-19T08:57:29.747964" }, "test_samtools_faidx_get_sizes_bgzip - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -128,49 +68,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:39.550619177" + "timestamp": "2026-03-19T08:58:10.04235" }, "test_samtools_faidx_get_sizes": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -197,59 +108,27 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:16.844965756" + "timestamp": "2026-03-19T08:57:55.552612" }, "test_samtools_faidx_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -257,8 +136,7 @@ "gzi": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -270,50 +148,24 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:02:47.301476131" + "timestamp": "2026-03-19T08:57:34.346045" }, "test_samtools_faidx_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" ] @@ -331,49 +183,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T09:44:40.559583279" + "timestamp": "2026-03-19T08:57:39.136814" }, "test_samtools_faidx_get_sizes - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -400,50 +223,24 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:31.989929281" + "timestamp": "2026-03-19T08:58:05.200994" }, "test_samtools_faidx_stub_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -461,53 +258,27 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T09:44:48.295693103" + "timestamp": "2026-03-19T08:57:44.105654" }, "test_samtools_faidx_stub_fai": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -522,54 +293,20 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:09.784289542" + "timestamp": "2026-03-19T08:57:50.839162" }, "test_samtools_faidx_get_sizes_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - [ - "SAMTOOLS_FAIDX", - "samtools", - "1.22.1" - ] - ], "fa": [ ], @@ -601,15 +338,15 @@ [ "SAMTOOLS_FAIDX", "samtools", - "1.22.1" + "1.23.1" ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2025-12-23T14:03:24.814967939" + "timestamp": "2026-03-19T08:58:00.460031" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config index 202c036..b3c4d0c 100644 --- a/modules/nf-core/samtools/faidx/tests/nextflow.config +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -3,5 +3,4 @@ process { withName: SAMTOOLS_FAIDX { ext.args = params.module_args } - } diff --git a/modules/nf-core/samtools/bgzip/environment.yml b/modules/nf-core/samtools/merge/environment.yml similarity index 83% rename from modules/nf-core/samtools/bgzip/environment.yml rename to modules/nf-core/samtools/merge/environment.yml index 89e12a6..946bb36 100644 --- a/modules/nf-core/samtools/bgzip/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/samtools - - bioconda::samtools=1.22.1 + - bioconda::samtools=1.23.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 0000000..03e4535 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,51 @@ +process SAMTOOLS_MERGE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c5d2818c8b9f58e1fba77ce219fdaf32087ae53e857c4a496402978af26e78c/data' + : 'community.wave.seqera.io/library/htslib_samtools:1.23.1--5b6bb4ede7e612e5'}" + + input: + tuple val(meta), path(input_files, stageAs: "?/*"), path(index_files, stageAs: "?/*") + tuple val(meta2), path(fasta), path(fai), path(gzi) + + output: + tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional: true, emit: cram + tuple val(meta), path("*.{bai,crai,csi}"), optional: true, emit: index + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + // In this pipeline we know that the input CRAM files have a correct relative path to the reference, and we want to keep it. + // Passing --reference transforms the link to an absolute path containing temporary folder path. + def reference = "" + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + merge \\ + --threads ${task.cpus - 1} \\ + ${args} \\ + ${reference} \\ + ${prefix}.${file_type} \\ + ${input_files} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 0000000..66e582e --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,122 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - index_files: + type: file + description: BAI/CRAI/CSI index file + pattern: "*.{bai,crai,csi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + ontologies: [] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" + ontologies: [] + - gzi: + type: file + description: Index of the compressed reference file the CRAM was created with + (optional) + pattern: "*.gzi" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bai,crai,csi}": + type: file + description: BAM index file (optional) + pattern: "*.{bai,crai,csi}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@yuukiiwa" + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@yuukiiwa" + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 0000000..a22f0db --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,22 @@ +Changes in component 'nf-core/samtools/merge' +'modules/nf-core/samtools/merge/environment.yml' is unchanged +Changes in 'samtools/merge/main.nf': +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -24,7 +24,9 @@ + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() +- def reference = fasta ? "--reference ${fasta}" : "" ++ // In this pipeline we know that the input CRAM files have a correct relative path to the reference, and we want to keep it. ++ // Passing --reference transforms the link to an absolute path containing temporary folder path. ++ def reference = "" + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + +'modules/nf-core/samtools/merge/meta.yml' is unchanged +'modules/nf-core/samtools/merge/tests/nextflow.config' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 0000000..b3caf86 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,219 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + config "./nextflow.config" + + test("bams") { + + when { + params { + module_args = '--write-index' + } + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.cram, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("crams_fastq") { + + when { + params { + module_args = '--write-index --output-fmt cram,version=3.0' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + [] + ] + """ + } + } + + then { + def fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta' + assert process.success + assertAll( + { assert snapshot( + cram(process.out.cram[0][1], fasta).getReadsMD5(), + process.out.bam, + file(process.out.index[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("crams_fastq_gz") { + + when { + params { + module_args = '--write-index' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + //nft-bam doesn't like the fasta.gz + file(process.out.cram[0][1]).name, + process.out.bam, + file(process.out.index[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("bam") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai', checkIfExists: true) ] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.index, + process.out.cram, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("bams - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("bams_no_index - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 0000000..77ed449 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,202 @@ +{ + "bams_no_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:31.816184" + }, + "crams_fastq": { + "content": [ + "c4525b95f05075208347295e6a1fb232", + [ + + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:05.719443" + }, + "bams - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:24.795562" + }, + "bams": { + "content": [ + "47c9f174d8c8afc1a13c75ee4b5e5d43", + "test.bam.csi", + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:00:59.536803" + }, + "crams_fastq_gz": { + "content": [ + "test.cram", + [ + + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:11.614088" + }, + "bam": { + "content": [ + "8da8fc1099a955e3ceb198665350e766", + [ + + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.23.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-19T09:01:17.736424" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/nextflow.config b/modules/nf-core/samtools/merge/tests/nextflow.config new file mode 100644 index 0000000..5e29c67 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SAMTOOLS_MERGE { + ext.args = params.module_args + } +} diff --git a/nextflow.config b/nextflow.config index 547b02d..cd85098 100644 --- a/nextflow.config +++ b/nextflow.config @@ -77,7 +77,10 @@ params { lastal_extr_args = '' last_split_mismap = '1e-05' lastal_params = null + + // Export option export_aln_to = 'no_export' + multi_cram = false // Schema validation default options validate_params = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 93c61b8..c436e83 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -118,14 +118,6 @@ "description": "Arguments for the lastdb, last-train, lastal and last-split programs.", "default": "", "properties": { - "export_aln_to": { - "type": "string", - "default": "no_export", - "description": "Convert the final _one-to-one_ alignment to a different format than MAF.", - "pattern": "^((no_export|axt|bam|bed|blast|blasttab|blasttab+|chain|cram|gff|html|psl|sam|tab)?,?)*(? [meta, fasta, [], []] }, + 'compress', + [], + [] + ) + + SAMTOOLS_FAIDX ( + HTSLIB_BGZIPTABIX.out.output.map {meta, fasta -> [meta, fasta, []]}, + true + ) + + SAMTOOLS_DICT ( + HTSLIB_BGZIPTABIX.out.output + ) + + ch_joined = HTSLIB_BGZIPTABIX.out.output + .join(SAMTOOLS_FAIDX.out.fai) + .join(SAMTOOLS_FAIDX.out.gzi) + .join(SAMTOOLS_FAIDX.out.sizes) + .join(SAMTOOLS_DICT.out.dict) + + emit: + fasta_fai_gzi_dict = ch_joined // channel: [ val(meta), fasta.gz, fai, gzi, sizes, dict ] +} diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml new file mode 100644 index 0000000..f2281e8 --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/meta.yml @@ -0,0 +1,39 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_bgzip_index_dict_samtools" +description: Ensure BGZF compression, index, and produce a sequence size summary and a sequence dictionary for each FASTA file. +keywords: + - bgzip + - faidx + - dict + - fasta +components: + - htslib/bgziptabix + - samtools/dict + - samtools/faidx + +input: + - ch_fasta: + type: file + description: | + FASTA file, compressed or not. + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fa,fa.gz,fa.bz2,fa.xz,fasta,fasta.gz,fasta.bz2,fasta.xz}" + +output: + - fasta_fai_gzi_dict: + type: file + description: | + Channel containing: + - BGZF-compressed FASTA + - FASTA index (.fai) + - BGZF index (.gzi) + - sequence size summary (.sizes) + - sequence dictionary (.dict) + Structure: + [ val(meta), path(fasta_gz), path(fai), path(gzi), path(sizes), path(dict) ] + pattern: "*" + +authors: + - "@charles-plessy" +maintainers: + - "@charles-plessy" diff --git a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test similarity index 54% rename from subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test rename to subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test index 3cba7b3..4c3faf1 100644 --- a/subworkflows/local/fasta_bgzip_index_dict_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test @@ -5,30 +5,34 @@ nextflow_workflow { workflow "FASTA_BGZIP_INDEX_DICT_SAMTOOLS" tag "subworkflows" - tag "subworkflows_" + tag "subworkflows_nfcore" tag "subworkflows/fasta_bgzip_index_dict_samtools" + tag "htslib" + tag "htslib/bgziptabix" tag "samtools" - tag "samtools/bgzip" tag "samtools/dict" tag "samtools/faidx" - test("sarscov2 - fasta") { + config "./nextflow.config" + + test("sarscov2 - fasta - genomes") { when { workflow { """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), - ] + input[0] = channel.of([ + [ id:'genome_complete' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } - then { + assert workflow.success assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert snapshot( + workflow.out + ).match() } ) } } diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap new file mode 100644 index 0000000..06dc06e --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - fasta - genomes": { + "content": [ + { + "0": [ + [ + { + "id": "genome_complete" + }, + "genome_complete.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "genome_complete.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "genome_complete.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", + "genome_complete.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c", + "genome_complete.gz.dict:md5,7259d9fba4f0029e294b70a7bf05af6a" + ] + ], + "fasta_fai_gzi_dict": [ + [ + { + "id": "genome_complete" + }, + "genome_complete.gz:md5,6e9fe4042a72f2345f644f239272b7e6", + "genome_complete.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5", + "genome_complete.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474", + "genome_complete.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c", + "genome_complete.gz.dict:md5,7259d9fba4f0029e294b70a7bf05af6a" + ] + ] + } + ], + "timestamp": "2026-05-28T11:12:13.641917963", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config new file mode 100644 index 0000000..2ab54d9 --- /dev/null +++ b/subworkflows/nf-core/fasta_bgzip_index_dict_samtools/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + withName: 'SAMTOOLS_DICT' { + // This makes the output deterministic, + // otherwise you get temporary file folder names in the file path. + ext.args = { "-u ./${fasta} -a ${meta.id}" } + } +} diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index d517cb0..564e7c5 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -5,7 +5,9 @@ */ include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' +include { LAST_MAFCONVERT as ALIGNMENT_CRAM} from '../modules/nf-core/last/mafconvert/main' include { LAST_MAFCONVERT as ALIGNMENT_EXP } from '../modules/nf-core/last/mafconvert/main' +include { SAMTOOLS_MERGE as ALIGNMENT_MERGE} from '../modules/nf-core/samtools/merge/main' include { LAST_DOTPLOT as MULTIQC_THUMBS } from '../modules/nf-core/last/dotplot/main' include { MULTIQC_THUMBS_HTML } from '../modules/local/multiqc_thumbs_html/main' include { MULTIQC_ASSEMBLYSCAN_PLOT_DATA } from '../modules/local/multiqc_assemblyscan_plot_data/main' @@ -17,7 +19,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/ma include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { FASTA_BGZIP_INDEX_DICT_SAMTOOLS } from '../subworkflows/local/fasta_bgzip_index_dict_samtools' +include { FASTA_BGZIP_INDEX_DICT_SAMTOOLS } from '../subworkflows/nf-core/fasta_bgzip_index_dict_samtools/main' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_pairgenomealign_pipeline' /* @@ -91,30 +93,57 @@ workflow PAIRGENOMEALIGN { pairalign_out = PAIRALIGN_M2M.out } - // If we export to CRAM we need a BGZIPped genome, indexed, and its sequence dictionary, - // if we export to SAM or BAM this is also nice to have, - // otherwise we need placeholders. - ch_targetgenome_faz = [[],[]] - ch_targetgenome_fai = [[],[]] - ch_targetgenome_gzi = [[],[]] - ch_targetgenome_dic = [[],[]] - + ch_genome_for_cram = channel.value( [[:], [], [], [], [], []] ) export_formats = params.export_aln_to.tokenize(',') - if (export_formats.contains('cram') | export_formats.contains('bam')) { + if (params.multi_cram | export_formats.contains('cram') | export_formats.contains('bam')) { FASTA_BGZIP_INDEX_DICT_SAMTOOLS( ch_targetgenome ) - ch_targetgenome_faz = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_gz - ch_targetgenome_fai = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fai - ch_targetgenome_gzi = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.gzi - ch_targetgenome_dic = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.dict + ch_genome_for_cram = FASTA_BGZIP_INDEX_DICT_SAMTOOLS.out.fasta_fai_gzi_dict.first() + } + + ch_targetgenome = ch_genome_for_cram + .multiMap { meta, fasta, fai, gzi, _sizes, dict -> + fasta: [meta, fasta] + fai: [meta, fai] + gzi: [meta, gzi] + dict: [meta, dict] } if (!(params.export_aln_to == "no_export")) { ALIGNMENT_EXP( pairalign_out.o2o.combine(Channel.fromList(export_formats)), - ch_targetgenome_faz, - ch_targetgenome_fai, - ch_targetgenome_gzi, - ch_targetgenome_dic + ch_targetgenome.fasta, + ch_targetgenome.fai, + ch_targetgenome.gzi, + ch_targetgenome.dict + ) + } + + if (params.multi_cram) { + // We want the read group IDs to be just the query genome name (which is already long enough). + o2o_alignments = pairalign_out.o2o.map { meta, alns -> + def newMeta = meta.clone() // Avoids unexpected propagation to pairalign_out.o2o's meta.id. + newMeta.id = newMeta.id.replaceAll(/^.*___/, '') + [newMeta, alns] + } + ALIGNMENT_CRAM( + o2o_alignments.map {it + "cram"}, + ch_targetgenome.fasta, + ch_targetgenome.fai, + ch_targetgenome.gzi, + ch_targetgenome.dict + ) + // Collect all per-query CRAMs into a single merged CRAM per target genome + ch_merge_input = ALIGNMENT_CRAM.out.alignment + // Rename and use as grouping key + .map { meta, cram -> tuple(params.targetName, cram) } + // group all CRAMs + .groupTuple() + // convert to SAMTOOLS_MERGE input format + .map { id, crams -> tuple([id: id], crams, []) } + // Output a single CRAM file under the target genome name. + ALIGNMENT_MERGE( + ch_merge_input, + ch_genome_for_cram.map { meta, fasta, fai, gzi, _sizes, _dict -> [meta, fasta, fai, gzi ] }, ) }