From d0262a629ba37603d6cc012afcb5e9bb65377421 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Thu, 13 Jun 2024 14:56:44 +0000 Subject: [PATCH] Start adding metamaps classifier --- assets/schema_database.json | 1 + conf/modules.config | 20 ++ modules.json | 10 + .../nf-core/metamaps/classify/environment.yml | 7 + modules/nf-core/metamaps/classify/main.nf | 63 ++++ modules/nf-core/metamaps/classify/meta.yml | 85 ++++++ .../metamaps/classify/tests/main.nf.test | 122 ++++++++ .../metamaps/classify/tests/main.nf.test.snap | 288 ++++++++++++++++++ .../metamaps/classify/tests/nextflow.config | 5 + .../nf-core/metamaps/classify/tests/tags.yml | 2 + .../metamaps/mapdirectly/environment.yml | 7 + modules/nf-core/metamaps/mapdirectly/main.nf | 57 ++++ modules/nf-core/metamaps/mapdirectly/meta.yml | 63 ++++ .../metamaps/mapdirectly/tests/main.nf.test | 93 ++++++ .../mapdirectly/tests/main.nf.test.snap | 180 +++++++++++ .../mapdirectly/tests/nextflow.config | 5 + .../metamaps/mapdirectly/tests/tags.yml | 2 + nextflow.config | 3 + nextflow_schema.json | 7 +- subworkflows/local/profiling.nf | 35 +++ .../local/standardisation_profiles.nf | 3 +- .../utils_nfcore_taxprofiler_pipeline/main.nf | 1 + 22 files changed, 1057 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/metamaps/classify/environment.yml create mode 100644 modules/nf-core/metamaps/classify/main.nf create mode 100644 modules/nf-core/metamaps/classify/meta.yml create mode 100644 modules/nf-core/metamaps/classify/tests/main.nf.test create mode 100644 modules/nf-core/metamaps/classify/tests/main.nf.test.snap create mode 100644 modules/nf-core/metamaps/classify/tests/nextflow.config create mode 100644 modules/nf-core/metamaps/classify/tests/tags.yml create mode 100644 modules/nf-core/metamaps/mapdirectly/environment.yml create mode 100644 modules/nf-core/metamaps/mapdirectly/main.nf create mode 100644 modules/nf-core/metamaps/mapdirectly/meta.yml create mode 100644 modules/nf-core/metamaps/mapdirectly/tests/main.nf.test create mode 100644 modules/nf-core/metamaps/mapdirectly/tests/main.nf.test.snap create mode 100644 modules/nf-core/metamaps/mapdirectly/tests/nextflow.config create mode 100644 modules/nf-core/metamaps/mapdirectly/tests/tags.yml diff --git a/assets/schema_database.json b/assets/schema_database.json index e467b41d..2083a127 100644 --- a/assets/schema_database.json +++ b/assets/schema_database.json @@ -20,6 +20,7 @@ "kraken2", "krakenuniq", "malt", + "metamaps", "metaphlan", "motus" ], diff --git a/conf/modules.config b/conf/modules.config index 4b9ba64a..9f292c3e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -780,6 +780,26 @@ process { ] } + withName: METAMAPS_MAPDIRECTLY { + tag = {"${meta.db_name}|${meta.id}"} + ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.metamaps_mapdirectly" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.metamaps_mapdirectly" } + publishDir = [ + path: { "${params.outdir}/metamaps/${meta.db_name}/" }, + mode: params.publish_dir_mode + ] + } + + withName: METAMAPS_CLASSIFY { + tag = {"${meta.db_name}|${meta.id}"} + ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.metamaps" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.metamaps" } + publishDir = [ + path: { "${params.outdir}/metamaps/${meta.db_name}/" }, + mode: params.publish_dir_mode, + pattern: '*.WIMP' + ] + } + + withName: TAXPASTA_MERGE { tag = { "${meta.tool}|${meta.id}" } ext.args = { diff --git a/modules.json b/modules.json index c4aea907..656644de 100644 --- a/modules.json +++ b/modules.json @@ -160,6 +160,16 @@ "git_sha": "dbce8951ff9a39ad08d87e563636bbcc6ef34032", "installed_by": ["modules"] }, + "metamaps/classify": { + "branch": "master", + "git_sha": "4272190cc308098e66e142812cbd554ea8d31857", + "installed_by": ["modules"] + }, + "metamaps/mapdirectly": { + "branch": "master", + "git_sha": "4272190cc308098e66e142812cbd554ea8d31857", + "installed_by": ["modules"] + }, "metaphlan/mergemetaphlantables": { "branch": "master", "git_sha": "efae1c431e539d6a6d323ee2e9223c4b81a152ce", diff --git a/modules/nf-core/metamaps/classify/environment.yml b/modules/nf-core/metamaps/classify/environment.yml new file mode 100644 index 00000000..3e66c559 --- /dev/null +++ b/modules/nf-core/metamaps/classify/environment.yml @@ -0,0 +1,7 @@ +name: "metamaps_classify" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::metamaps=0.1.633d2e0" diff --git a/modules/nf-core/metamaps/classify/main.nf b/modules/nf-core/metamaps/classify/main.nf new file mode 100644 index 00000000..94bbf274 --- /dev/null +++ b/modules/nf-core/metamaps/classify/main.nf @@ -0,0 +1,63 @@ +process METAMAPS_CLASSIFY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metamaps:0.1.633d2e0--h21ec9f0_0': + 'biocontainers/metamaps:0.1.633d2e0--h21ec9f0_0' }" + + input: + tuple val(meta), path(classification_res), path(meta_file), path(meta_unmappedreadsLengths), path(para_file) + path database_folder + + + output: + tuple val(meta), path("*classification_res.EM.WIMP") , emit: wimp + tuple val(meta), path("*classification_res.EM.evidenceUnknownSpecies") , emit: evidence_unknown_species + tuple val(meta), path("*classification_res.EM.reads2Taxon") , emit: reads2taxon + tuple val(meta), path("*classification_res.EM") , emit: em + tuple val(meta), path("*classification_res.EM.contigCoverage") , emit: contig_coverage + tuple val(meta), path("*classification_res.EM.lengthAndIdentitiesPerMappingUnit") , emit: length_and_id + tuple val(meta), path("*classification_res.EM.reads2Taxon.krona") , emit: krona + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + metamaps \\ + classify \\ + $args \\ + --mappings $classification_res \\ + --threads $task.cpus \\ + --DB $database_folder + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamaps: \$(metamaps | sed -n 2p | sed 's/^.*MetaMaps v //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_classification_res.EM.WIMP + touch ${prefix}_classification_res.EM.evidenceUnknownSpecies + touch ${prefix}_classification_res.EM.reads2Taxon + touch ${prefix}_classification_res.EM + touch ${prefix}_classification_res.EM.contigCoverage + touch ${prefix}_classification_res.EM.lengthAndIdentitiesPerMappingUnit + touch ${prefix}_classification_res.EM.reads2Taxon.krona + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamaps: \$(metamaps | sed -n 2p | sed 's/^.*MetaMaps v //') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/metamaps/classify/meta.yml b/modules/nf-core/metamaps/classify/meta.yml new file mode 100644 index 00000000..32c3dd49 --- /dev/null +++ b/modules/nf-core/metamaps/classify/meta.yml @@ -0,0 +1,85 @@ +name: metamaps_classify +description: Strain-level metagenomic assignment +keywords: + - metamaps + - long reads + - metagenomics + - taxonomy +tools: + - metamaps: + description: MetaMaps is a tool for long-read metagenomic analysis + homepage: https://github.com/DiltheyLab/MetaMaps + documentation: https://github.com/DiltheyLab/MetaMaps/blob/master/README.md + tool_dev_url: https://github.com/DiltheyLab/MetaMaps + doi: "10.1038/s41467-019-10934-2" + licence: ["Public Domain"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - classification_res: + type: file + description: Coordinates where reads map + pattern: "*.{classification_res}" + - meta_file: + type: file + description: Statistics for mapping result + pattern: "*.{classification_res.meta}" + - meta_unmappedreadsLengths: + type: file + description: Statistics for length of unmapped reads + pattern: "*.{classification_res.meta.unmappedReadsLengths}" + - para_file: + type: file + description: Log with parameters + pattern: "*.{classification_res.parameters}" + - database_folder: + type: directory + description: Path to MetaMaps database + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wimp: + type: file + description: Sample composition at different taxonomic levels + pattern: "*.{classification_res.EM.WIMP}" + - evidence_unknown_species: + type: file + description: Statistics on read identities and zero-coverage regions + pattern: "*.{classification_res.EM.evidenceUnknownSpecies}" + - reads2taxon: + type: file + description: Taxon ID assignement of reads + pattern: "*.{classification_res.EM.reads2Taxon}" + - em: + type: file + description: The final and complete set of approximate read mappings + pattern: "*.{classification_res.EM}" + - contig_coverage: + type: file + description: Read coverage for contigs + pattern: "*.{classification_res.EM.contigCoverage}" + - length_and_id: + type: file + description: Read length and estimated identity for all reads + pattern: "*.{classification_res.EM.lengthAndIdentitiesPerMappingUnit}" + - krona: + type: file + description: Taxon ID assignement of reads in Krona format + pattern: "*.{classification_res.EM.reads2Taxon.krona}" + +authors: + - "@henningonsbring" + - "@sofstam" diff --git a/modules/nf-core/metamaps/classify/tests/main.nf.test b/modules/nf-core/metamaps/classify/tests/main.nf.test new file mode 100644 index 00000000..86175f4c --- /dev/null +++ b/modules/nf-core/metamaps/classify/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process METAMAPS_CLASSIFY" + script "../main.nf" + process "METAMAPS_CLASSIFY" + + tag "modules" + tag "modules_nfcore" + tag "metamaps" + tag "metamaps/mapdirectly" + tag "metamaps/classify" + tag "untar" + + test("sarscov2_nanopore_classify") { + setup { + run("UNTAR") { + config "./nextflow.config" + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/metamaps_db.tar.gz', checkIfExists: true) + ] + """ + } + } + run("METAMAPS_MAPDIRECTLY") { + script "../../../metamaps/mapdirectly/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['nanopore']['test2_fastq_gz'], checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + } + + when { + process { + """ + classification_folder = METAMAPS_MAPDIRECTLY.out.classification_res + .join(METAMAPS_MAPDIRECTLY.out.meta_file) + .join(METAMAPS_MAPDIRECTLY.out.meta_unmappedreadsLengths) + .join(METAMAPS_MAPDIRECTLY.out.para_file) + input[0] = classification_folder + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out ).match() } + ) + } + + } + + test("sarscov2_nanopore_classify - stub") { + + options "-stub" + + setup { + run("UNTAR") { + config "./nextflow.config" + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/metamaps_db.tar.gz', checkIfExists: true) + ] + """ + } + } + run("METAMAPS_MAPDIRECTLY") { + script "../../../metamaps/mapdirectly/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['nanopore']['test2_fastq_gz'], checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + } + + when { + process { + """ + classification_folder = METAMAPS_MAPDIRECTLY.out.classification_res + .join(METAMAPS_MAPDIRECTLY.out.meta_file) + .join(METAMAPS_MAPDIRECTLY.out.meta_unmappedreadsLengths) + .join(METAMAPS_MAPDIRECTLY.out.para_file) + input[0] = classification_folder + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out ).match() } + ) + } + + } + + +} + + diff --git a/modules/nf-core/metamaps/classify/tests/main.nf.test.snap b/modules/nf-core/metamaps/classify/tests/main.nf.test.snap new file mode 100644 index 00000000..506ff286 --- /dev/null +++ b/modules/nf-core/metamaps/classify/tests/main.nf.test.snap @@ -0,0 +1,288 @@ +{ + "sarscov2_nanopore_classify": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.WIMP:md5,b39a980519a382422ac6da49bea13de8" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.evidenceUnknownSpecies:md5,a8577e31b3d9b3a598b43fc531075e81" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.reads2Taxon:md5,f08d1d1865a3949a9c542b5b930fb37c" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM:md5,c6b1f5475e3fb9c6633aa0623456eebc" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.contigCoverage:md5,131507f62806647f11b1dc10d546c73f" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.lengthAndIdentitiesPerMappingUnit:md5,56be33596aa37bc360bbec47cebdc5e2" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.reads2Taxon.krona:md5,9f644c743e2f4ed5617fe3d421758771" + ] + ], + "7": [ + "versions.yml:md5,60993f8b87f7a2e25367c7dc96c9b859" + ], + "contig_coverage": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.contigCoverage:md5,131507f62806647f11b1dc10d546c73f" + ] + ], + "em": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM:md5,c6b1f5475e3fb9c6633aa0623456eebc" + ] + ], + "evidence_unknown_species": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.evidenceUnknownSpecies:md5,a8577e31b3d9b3a598b43fc531075e81" + ] + ], + "krona": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.reads2Taxon.krona:md5,9f644c743e2f4ed5617fe3d421758771" + ] + ], + "length_and_id": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.lengthAndIdentitiesPerMappingUnit:md5,56be33596aa37bc360bbec47cebdc5e2" + ] + ], + "reads2taxon": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.reads2Taxon:md5,f08d1d1865a3949a9c542b5b930fb37c" + ] + ], + "versions": [ + "versions.yml:md5,60993f8b87f7a2e25367c7dc96c9b859" + ], + "wimp": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.EM.WIMP:md5,b39a980519a382422ac6da49bea13de8" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-06-12T10:56:21.646051" + }, + "sarscov2_nanopore_classify - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.WIMP:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.evidenceUnknownSpecies:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.reads2Taxon:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.contigCoverage:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.lengthAndIdentitiesPerMappingUnit:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.reads2Taxon.krona:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,60993f8b87f7a2e25367c7dc96c9b859" + ], + "contig_coverage": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.contigCoverage:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "em": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "evidence_unknown_species": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.evidenceUnknownSpecies:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "krona": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.reads2Taxon.krona:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "length_and_id": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.lengthAndIdentitiesPerMappingUnit:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads2taxon": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.reads2Taxon:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,60993f8b87f7a2e25367c7dc96c9b859" + ], + "wimp": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.EM.WIMP:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-06-12T10:56:30.641353" + } +} \ No newline at end of file diff --git a/modules/nf-core/metamaps/classify/tests/nextflow.config b/modules/nf-core/metamaps/classify/tests/nextflow.config new file mode 100644 index 00000000..fffad1f3 --- /dev/null +++ b/modules/nf-core/metamaps/classify/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: UNTAR { + ext.args2 = "--no-same-owner" + } +} diff --git a/modules/nf-core/metamaps/classify/tests/tags.yml b/modules/nf-core/metamaps/classify/tests/tags.yml new file mode 100644 index 00000000..d9368795 --- /dev/null +++ b/modules/nf-core/metamaps/classify/tests/tags.yml @@ -0,0 +1,2 @@ +metamaps/classify: + - "modules/nf-core/metamaps/classify/**" diff --git a/modules/nf-core/metamaps/mapdirectly/environment.yml b/modules/nf-core/metamaps/mapdirectly/environment.yml new file mode 100644 index 00000000..19287be0 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/environment.yml @@ -0,0 +1,7 @@ +name: "metamaps_mapdirectly" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::metamaps=0.1.633d2e0" diff --git a/modules/nf-core/metamaps/mapdirectly/main.nf b/modules/nf-core/metamaps/mapdirectly/main.nf new file mode 100644 index 00000000..a398f541 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/main.nf @@ -0,0 +1,57 @@ +process METAMAPS_MAPDIRECTLY { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metamaps:0.1.633d2e0--h21ec9f0_0': + 'biocontainers/metamaps:0.1.633d2e0--h21ec9f0_0' }" + + input: + tuple val(meta), path(reads) + path database + + output: + tuple val(meta), path("*classification_res") , emit: classification_res + tuple val(meta), path("*classification_res.meta") , emit: meta_file + tuple val(meta), path("*classification_res.meta.unmappedReadsLengths"), emit: meta_unmappedreadsLengths + tuple val(meta), path("*classification_res.parameters") , emit: para_file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + db=`find -L ${database} -name "DB.fa"` + metamaps \\ + mapDirectly \\ + $args \\ + --all \\ + --reference \$db \\ + --threads $task.cpus \\ + --query $reads \\ + --output ${prefix}.classification_res + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamaps: \$(metamaps | sed -n 2p | sed 's/^.*MetaMaps v //') + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_classification_res + touch ${prefix}_classification_res.meta + touch ${prefix}_classification_res.meta.unmappedReadsLengths + touch ${prefix}_classification_res.parameters + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamaps: \$(metamaps | sed -n 2p | sed 's/^.*MetaMaps v //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/metamaps/mapdirectly/meta.yml b/modules/nf-core/metamaps/mapdirectly/meta.yml new file mode 100644 index 00000000..4ac8e940 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/meta.yml @@ -0,0 +1,63 @@ +name: "metamaps_mapdirectly" +description: Maps long reads to a metamaps database +keywords: + - metamaps + - long reads + - metagenomics + - taxonomy +tools: + - metamaps: + description: MetaMaps is a tool for long-read metagenomic analysis + homepage: https://github.com/DiltheyLab/MetaMaps + documentation: https://github.com/DiltheyLab/MetaMaps/blob/master/README.md + tool_dev_url: https://github.com/DiltheyLab/MetaMaps + doi: "10.1038/s41467-019-10934-2" + licence: ["Public Domain"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input fastq file containing query sequences + pattern: "*.{fq,fastq,fastq.gz,fq.gz}" + - database: + type: file + description: Database file in fasta format + pattern: "*.{fa,fasta}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - classification_res: + type: file + description: Coordinates where reads map + pattern: "*.{classification_res}" + - meta_file: + type: file + description: Statistics for mapping result + pattern: "*.{classification_res.meta}" + - meta_unmappedreadsLengths: + type: file + description: Statistics for length of unmapped reads + pattern: "*.{classification_res.meta.unmappedReadsLengths}" + - para_file: + type: file + description: Log with parameters + pattern: "*.{classification_res.parameters}" + +authors: + - "@henningonsbring" + - "@sofstam" +maintainers: + - "@sofstam" diff --git a/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test b/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test new file mode 100644 index 00000000..f8a10104 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_process { + + name "Test Process METAMAPS_MAPDIRECTLY" + script "../main.nf" + process "METAMAPS_MAPDIRECTLY" + + tag "modules" + tag "modules_nfcore" + tag "metamaps" + tag "metamaps/mapdirectly" + tag "untar" + + + test("sarscov2_nanopore_mapdirectly") { + setup { + run("UNTAR") { + config "./nextflow.config" + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/metamaps_db.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out ).match() } + ) + } + + } + + test("sarscov2_nanopore_mapdirectly - stub ") { + + options "-stub" + + setup { + run("UNTAR") { + config "./nextflow.config" + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/metamaps_db.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out ).match() } + ) + } + + } + +} + + diff --git a/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test.snap b/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test.snap new file mode 100644 index 00000000..e5524136 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/tests/main.nf.test.snap @@ -0,0 +1,180 @@ +{ + "sarscov2_nanopore_mapdirectly - stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.meta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.meta.unmappedReadsLengths:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.parameters:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,54af95dffcf4481d255c84d70cf851ef" + ], + "classification_res": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "meta_file": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.meta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "meta_unmappedreadsLengths": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.meta.unmappedReadsLengths:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "para_file": [ + [ + { + "id": "test", + "single_end": true + }, + "test_classification_res.parameters:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54af95dffcf4481d255c84d70cf851ef" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-06-12T14:12:30.932597" + }, + "sarscov2_nanopore_mapdirectly": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.meta:md5,cf13c221c6d05329af73a0480466ddfb" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.meta.unmappedReadsLengths:md5,f636a6dc2372a5c93d472dd109a5e06d" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.parameters:md5,c0fedae5209f2ac9ff41b48bdd2a403b" + ] + ], + "4": [ + "versions.yml:md5,54af95dffcf4481d255c84d70cf851ef" + ], + "classification_res": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "meta_file": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.meta:md5,cf13c221c6d05329af73a0480466ddfb" + ] + ], + "meta_unmappedreadsLengths": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.meta.unmappedReadsLengths:md5,f636a6dc2372a5c93d472dd109a5e06d" + ] + ], + "para_file": [ + [ + { + "id": "test", + "single_end": true + }, + "test.classification_res.parameters:md5,c0fedae5209f2ac9ff41b48bdd2a403b" + ] + ], + "versions": [ + "versions.yml:md5,54af95dffcf4481d255c84d70cf851ef" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-06-12T14:12:22.645009" + } +} \ No newline at end of file diff --git a/modules/nf-core/metamaps/mapdirectly/tests/nextflow.config b/modules/nf-core/metamaps/mapdirectly/tests/nextflow.config new file mode 100644 index 00000000..fffad1f3 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: UNTAR { + ext.args2 = "--no-same-owner" + } +} diff --git a/modules/nf-core/metamaps/mapdirectly/tests/tags.yml b/modules/nf-core/metamaps/mapdirectly/tests/tags.yml new file mode 100644 index 00000000..a3ff0ec6 --- /dev/null +++ b/modules/nf-core/metamaps/mapdirectly/tests/tags.yml @@ -0,0 +1,2 @@ +metamaps/mapdirectly: + - "modules/nf-core/metamaps/mapdirectly/**" diff --git a/nextflow.config b/nextflow.config index 1a1333e2..350589fa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -178,6 +178,9 @@ params { ganon_report_maxcount = 0 ganon_save_readclassifications = false + // metamaps + run_metamaps = false + // krona run_krona = false krona_taxonomy_directory = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 810629f4..841f919c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -370,7 +370,7 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "Save reads from samples that went through the run-merging step", - "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> ⚠️ Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n" + "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> \u26a0\ufe0f Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n" } }, "fa_icon": "fas fa-clipboard-check" @@ -602,6 +602,11 @@ "description": "Specify a maximum number of reads a hit must have to be retained in the ganon report.", "help_text": "Specify the maximum number of reads or percentage of counts a hit must have against a taxon to be retained. To specify a maximum percentage, specify between 0 and 1 (e.g. 0.9 for 90%), and more than 1 to specify a hard count cut off (e.g. 10000 for maximum of 10,000 reads).\n\n> Modifies tool parameter(s):\n- ganon report: `--max-count`", "fa_icon": "fas fa-filter" + }, + "run_metamaps": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on profiling with metamaps. Requires database to be present CSV file passed to --databases." } }, "fa_icon": "fas fa-align-center" diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 09b8ac19..0615169b 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -19,6 +19,8 @@ include { KMCP_SEARCH } from '../../modules/nf include { KMCP_PROFILE } from '../../modules/nf-core/kmcp/profile/main' include { GANON_CLASSIFY } from '../../modules/nf-core/ganon/classify/main' include { GANON_REPORT } from '../../modules/nf-core/ganon/report/main' +include { METAMAPS_MAPDIRECTLY } from '../../modules/nf-core/metamaps/mapdirectly/main' +include { METAMAPS_CLASSIFY } from '../../modules/nf-core/metamaps/classify/main' // Custom Functions @@ -78,6 +80,7 @@ workflow PROFILING { motus: it[2]['tool'] == 'motus' kmcp: it[2]['tool'] == 'kmcp' ganon: it[2]['tool'] == 'ganon' + metamaps: it[2]['tool'] == 'metamaps' unknown: true } @@ -494,6 +497,38 @@ workflow PROFILING { } + if ( params.run_metamaps ) { + + ch_input_for_metamaps_mapdirectly = ch_input_for_profiling.metamaps + .filter { + meta, reads, meta_db, db -> + if ( meta.instrument_platform == 'ILLUMINA' || meta.instrument_platform == "ION_TORRENT" ) log.warn "[nf-core/taxprofiler] MetaMaps is a tool for long-read classification. Skipping MetaMaps for sample ${meta.id}." + meta_db.tool == 'metamaps' && ( meta.instrument_platform != 'ILLUMINA' || meta.instrument_platform != "ION_TORRENT" ) + } + .multiMap { + it -> + reads: [ it[0] + it[2], it[1] ] + db: it[3] + } + + ch_input_for_metamaps_mapdirectly.reads + + METAMAPS_MAPDIRECTLY( ch_input_for_metamaps_mapdirectly.reads, ch_input_for_metamaps_mapdirectly.db ) + ch_versions = ch_versions.mix( METAMAPS_MAPDIRECTLY.out.versions.first() ) + + ch_database_for_metamaps_classify = databases + .filter { meta, db -> meta.tool == "metamaps" } + .map { meta, db -> [meta.db_name, meta, db] } + + ch_input_for_metamaps_classify = combineProfilesWithDatabase(METAMAPS_MAPDIRECTLY.out.classification_res, METAMAPS_MAPDIRECTLY.out.meta_file, METAMAPS_MAPDIRECTLY.out.meta_unmappedreadsLengths, METAMAPS_MAPDIRECTLY.out.para_file, ch_database_for_metamaps_classify) + + METAMAPS_CLASSIFY(ch_input_for_metamaps_classify.profile, ch_input_for_metamaps_classify.db, ) + ch_versions = ch_versions.mix( METAMAPS_CLASSIFY.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( METAMAPS_CLASSIFY.out.wimp ) + + + } + emit: classifications = ch_raw_classifications profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 95cd9d3f..baa2e703 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -10,7 +10,6 @@ include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' -include { GANON_TABLE } from '../../modules/nf-core/ganon/table/main' // Custom Functions @@ -212,6 +211,8 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( GANON_TABLE.out.txt ) ch_versions = ch_versions.mix( GANON_TABLE.out.versions ) +// Add metamaps to taxpasta + emit: taxpasta = TAXPASTA_MERGE.out.merged_profiles versions = ch_versions diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf index eb143651..48eae786 100644 --- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf @@ -244,6 +244,7 @@ def toolCitationText() { params.run_motus ? "mOTUs (Ruscheweyh et al. 2022)," : "", params.run_ganon ? "ganon (Piro et al. 2020)" : "", params.run_kmcp ? "KMCP (Shen et al. 2023)" : "", + params.run_metamaps ? "MetaMaps (Dilthey et al. 2019)" : "", "." ].join(' ').trim()