diff --git a/conf/modules.config b/conf/modules.config index 1b82c9ee..4939702f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -858,7 +858,8 @@ process { publishDir = [ path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' + pattern: '*.{tsv,csv,arrow,parquet,biom}', + saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null} ] } @@ -877,7 +878,8 @@ process { publishDir = [ path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' + pattern: '*.{tsv,csv,arrow,parquet,biom}', + saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null} ] } diff --git a/conf/test.config b/conf/test.config index 4e457585..f122b8e0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -48,6 +48,10 @@ params { kraken2_save_reads = true centrifuge_save_reads = true run_profile_standardisation = true + + // Generate downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'differentialabundance' } process { diff --git a/conf/test_nothing.config b/conf/test_nothing.config index e8b87bc7..59f8fab9 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -41,6 +41,10 @@ params { run_motus = false run_kmcp = false run_ganon = false + + // Generate downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'differentialabundance' } process { diff --git a/docs/output.md b/docs/output.md index 7cf96395..4bb1ef88 100644 --- a/docs/output.md +++ b/docs/output.md @@ -683,6 +683,9 @@ The following report files are used for the taxpasta step: Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool. ::: +The pipeline can also generate downstream pipeline input samplesheets. +These are stored in `/downstream_samplesheets`. + ### MultiQC
@@ -744,3 +747,29 @@ For example, DIAMOND output does not have a dedicated section in the MultiQC HTM
[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### Downstream samplesheets + +The pipeline can also generate input files for the following downstream +pipelines: + +- [nf-core/differentialabundance](https://nf-co.re/differentialabundance) + +
+Output files + +- `downstream_samplesheets/` + + - `samplesheet.csv`: A samplesheet for each classifier. + - `matrix.csv`: The output from taxpasta. + +
+ +To generate the downsteam samplesheets for `nf-core/differentialabundance`, you need to turn on `--run_profile_standardisation`, `--generate_downstream_samplesheets` and `--generate_pipeline_samplesheets differentialabundance` The pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. + +`samplesheet.csv` includes sample IDs from the `taxpasta` output for each classifier. You will need to specify the conditions (the groups you want to compare) as desired. + +:::warning +Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box! +They may not be complete (e.g. some columns may need to be manually filled in). +::: diff --git a/nextflow.config b/nextflow.config index e8dd7723..b48c5858 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,6 +196,10 @@ params { taxpasta_add_ranklineage = false taxpasta_ignore_errors = false standardisation_motus_generatebiom = false + + // Generate downstream samplesheet + generate_downstream_samplesheets = false + generate_pipeline_samplesheets = null } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 3ada1a56..77119e80 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -712,6 +712,25 @@ }, "fa_icon": "fas fa-chart-line" }, + "generate_samplesheet_options": { + "title": "Downstream pipeline samplesheet generation options", + "type": "object", + "fa_icon": "fas fa-align-justify", + "description": "Options for generating input samplesheets for complementary downstream pipelines.", + "properties": { + "generate_downstream_samplesheets": { + "type": "boolean", + "description": "Turn on generation of samplesheets for downstream pipelines.", + "fa_icon": "fas fa-toggle-on" + }, + "generate_pipeline_samplesheets": { + "type": "string", + "description": "Specify which pipeline to generate a samplesheet for.", + "fa_icon": "fas fa-toolbox", + "pattern": "^(differentialabundance)(?:,(differentialabundance)){0,1}" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -972,6 +991,9 @@ { "$ref": "#/definitions/postprocessing_and_visualisation_options" }, + { + "$ref": "#/definitions/generate_samplesheet_options" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf new file mode 100644 index 00000000..975f8a7f --- /dev/null +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -0,0 +1,52 @@ +// +// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline +// + +workflow SAMPLESHEET_DIFFERENTIALABUNDANCE { + take: + ch_taxpasta + + main: + format_sep = '\t' + + ch_taxpasta.map { it -> + def tool_name = it[0]['tool'] + def id = it[0]['id'] + def file_path = it[1] + def samplesheet_name = file(file_path).getName() + + ch_list_for_samplesheet = Channel + .fromPath(file_path) + .splitCsv(sep: format_sep) + .map { row -> row.drop(1) } + .flatten() + + ch_colnames = Channel.of('sample') + + channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, "downstream_samplesheets/differentialabundance", "samplesheet_${samplesheet_name}" ) + } +} + +workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { + take: + ch_taxpasta + + main: + def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") + + if ( downstreampipeline_names.contains('differentialabundance')) { + SAMPLESHEET_DIFFERENTIALABUNDANCE(ch_taxpasta) + } +} + +def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, samplesheet_name) { + // Constructs the header string and then the strings of each row, and + // finally concatenates for saving. Originally designed by @mahesh-panchal + ch_header + .concat(ch_list_for_samplesheet) + .collectFile( + name: "${params.outdir}/${outdir_subdir}/${samplesheet_name}", + newLine: true, + sort: false + ) +} diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 93eb55dd..7f96f7d6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -63,15 +63,16 @@ if ( [params.taxpasta_add_name, params.taxpasta_add_rank, params.taxpasta_add_li // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' -include { NONPAREIL } from '../subworkflows/local/nonpareil' -include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' -include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' -include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' -include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' -include { PROFILING } from '../subworkflows/local/profiling' -include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' -include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles' +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' +include { NONPAREIL } from '../subworkflows/local/nonpareil' +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' +include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' +include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' +include { PROFILING } from '../subworkflows/local/profiling' +include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' +include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles' +include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheet/main.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -324,6 +325,10 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions ) } + if ( params.generate_downstream_samplesheets ) { + GENERATE_DOWNSTREAM_SAMPLESHEETS ( STANDARDISATION_PROFILES.out.taxpasta) + } + /* MODULE: MultiQC */