Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
8a37c4f
add CREATE_ASSEMBLY_METADATA_CSV module, add versions tracking for al…
ochkalova Apr 24, 2026
4bb213f
add CREATE_GENOME_METADATA_TSV module
ochkalova Apr 24, 2026
6340b4b
refactor metadata files creating modules
ochkalova Apr 27, 2026
7cb2880
add find_concatenate module
ochkalova Apr 28, 2026
c6b7fbc
patch find/concatenate
ochkalova Apr 28, 2026
8304fbf
fix version statement for GENERATE_ASSEMBLY_MANIFEST module
ochkalova Apr 28, 2026
f217ac8
add FIND_CONCATENATE to assemblysubmit and update tests
ochkalova Apr 28, 2026
48abbfd
add FIND_CONCATENATE to genomesubmit and update tests
ochkalova Apr 28, 2026
badfb2b
add additional inputs instead of params usage subwf
ochkalova Apr 24, 2026
47b8698
add additional inputs instead of params usage in ENA_WEBIN_CLI_WRAPPER
ochkalova Apr 24, 2026
c410d5e
add additional inputs in GENOME_UPLOAD
ochkalova Apr 24, 2026
708cb1e
add additional inputs in GENERATE_ASSEMBLY_MANIFEST
ochkalova Apr 24, 2026
fd698fd
update tests
ochkalova Apr 24, 2026
689259a
fix checkm2_db definition
ochkalova Apr 24, 2026
74fa995
remove --upload_force param because it's unnecessary for a user
ochkalova Apr 24, 2026
1371dec
do not reference params in ASSEMBLYSUBMIT and GENOMESUBMIT
ochkalova Apr 27, 2026
93439b4
add test_upload parameter to REGISTERSTUDY and GENERATE_ASSEMBLY_MANI…
ochkalova Apr 27, 2026
ff2d46d
rename checkm2_db_zenodo_id to checkm2_db_download_id for consistency
ochkalova Apr 27, 2026
7a6030c
add missing REGISTERSTUDY input
ochkalova Apr 27, 2026
4cf9a0e
rename webincli_submit to webincli_mode
ochkalova Apr 28, 2026
4530c73
add CREATE_ASSEMBLY_METADATA_CSV module, add versions tracking for al…
ochkalova Apr 24, 2026
0b2d785
add CREATE_GENOME_METADATA_TSV module
ochkalova Apr 24, 2026
c7b2603
add FIND_CONCATENATE to assemblysubmit and update tests
ochkalova Apr 28, 2026
5e435bf
update snapshot
ochkalova Apr 28, 2026
555e4a1
Merge branch 'dev' into feat/metadata_modules
ochkalova Apr 28, 2026
aafeff6
update snapshot
ochkalova Apr 28, 2026
c2c7ba6
add citations
KateSakharova Apr 24, 2026
84b94b7
test custom data in multiqc. So far it works with files _mqc prefix
KateSakharova Apr 24, 2026
e48194f
add custom tables to multiqc
KateSakharova Apr 27, 2026
f828cd5
wip
KateSakharova Apr 28, 2026
e3b8072
wip
KateSakharova Apr 28, 2026
006d953
include topics
KateSakharova Apr 29, 2026
121a75b
Merge branch 'dev' into feature/multiqc-citations
KateSakharova Apr 29, 2026
7a560d8
fix links
KateSakharova Apr 29, 2026
d97a072
different fixes
KateSakharova Apr 29, 2026
689b785
fix bloody multiqc version
KateSakharova Apr 30, 2026
37e6886
lint
KateSakharova Apr 30, 2026
cf71ebf
fix wording and versions
KateSakharova May 1, 2026
4d6dfbe
update snaphots
KateSakharova May 6, 2026
bb7e9ac
revert multiqc container, die in hell!
KateSakharova May 6, 2026
80c4b08
snapshots
KateSakharova May 6, 2026
bbfcaf0
lint
KateSakharova May 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 152 additions & 1 deletion assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,156 @@ report_section_order:
order: -1002

export_plots: true

disable_general_stats: true
disable_version_detection: true

custom_content:
order:
- assembly_metadata
- genome_metadata
- sample_registration
- submission_results_assemblies
- submission_results_genomes

sp:
assembly_metadata:
fn_re: ".*assemblies_metadata.csv$"
genome_metadata:
fn_re: ".*genomes_metadata.tsv$"
sample_registration:
fn_re: '.*registered.*\.tsv$'
submission_results_genomes:
fn_re: 'genomes_accessions\.tsv$'
submission_results_assemblies:
fn_re: 'assemblies_accessions\.tsv$'

# Custom data configuration
custom_data:
assembly_metadata:
id: "assembly_metadata"
section_name: "Assembly metadata"
description: "Metadata for submitted assemblies"
file_format: "csv"
plot_type: "table"
pconfig:
id: "assembly_metadata"
title: "Assembly metadata table"
headers:
runs:
title: "Runs"
assembly_coverage:
title: "Coverage"
assembler:
title: "Assembler"
assembler_version:
title: "Version"
file_path:
title: "Filepath"
sample: "Sample"

genome_metadata:
id: "genome_metadata"
section_name: "Genome metadata"
description: "Metadata for submitted genomes"
file_format: "tsv"
plot_type: "table"
pconfig:
id: "genome_metadata"
title: "Genome metadata table"
headers:
genome_name:
title: "Genome name"
genome_path:
title: "Genome path"
accessions:
title: "Accession"
assembly_software:
title: "Assembly software"
binning_software:
title: "Binning software"
binning_parameters:
title: "Binning parameters"
stats_generation_software:
title: "Stats software"
completeness:
title: "Completeness (%)"
contamination:
title: "Contamination (%)"
genome_coverage:
title: "Coverage"
metagenome:
title: "Metagenome"
co-assembly:
title: "Co-assembly"
broad_environment:
title: "Broad environment"
local_environment:
title: "Local environment"
environmental_medium:
title: "Environmental medium"
rRNA_presence:
title: "rRNA presence"
Comment thread
KateSakharova marked this conversation as resolved.
NCBI_lineage:
title: "NCBI lineage"

sample_registration:
file_format: "tsv"
plot_type: "table"
pconfig:
id: "sample_registration"
title: "Sample registration assigned accessions"
headers:
alias:
title: "ID"
description: "FASTA file for upload"
accession:
title: "ENA sample accession"
description: "Assigned accession after submission"

submission_results_assemblies:
id: "submission_results_assemblies"
section_name: "Submission results"
description: >
Accessions assigned during upload process.
As all assemblies in ENA are submitted as ‘analyses’, for each assembly submission, Webin will report a unique accession number that starts with ERZ.
For most assemblies, this accession number is for internal processing only and will not be visible in the browser.
As a result, for most assemblies you will receive additional post-processing accession numbers starting with GCA_.
Always make a note of any accessions you receive as these are the unique identifiers for each of your submissions to ENA.
The ERZ accession can be used to access information on the progress of the internal processing of each assembly through the [Webin Portal](https://ena-docs.readthedocs.io/en/latest/submit/general-guide/submissions-portal.html).
You can also use this service to see the assigned chromosome, contig, and scaffold accessions.
Please follow the Webin Portal link to learn more about this.
See individual submission guidelines for information on what accessions you will receive for each assembly type.
More information about accessions can be found in ENA docs: https://ena-docs.readthedocs.io/en/latest/submit/assembly.html#accessions.
file_format: "tsv"
plot_type: "table"
pconfig:
id: "Assemblies assigned accessions"
title: "Submission results"
col1_header: "ID"
col2_header: "ENA accession"
headers:
alias:
title: "ID"
description: "FASTA file for upload"
accession:
title: "ENA accession"
description: "Assigned accession after submission"

submission_results_genomes:
id: "submission_results_genomes"
section_name: "Submission results"
description: "Accessions assigned during upload process."
file_format: "tsv"
plot_type: "table"
pconfig:
id: "Genomes assigned accessions"
title: "Submission results"
col1_header: "ID"
col2_header: "ENA accession"
headers:
alias:
title: "ID"
description: "FASTA file for upload"
accession:
title: "ENA accession"
description: "Assigned accession after submission"
7 changes: 6 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,12 @@ process {
//

withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
ext.args = {
[
params.multiqc_title ? "--title \"$params.multiqc_title\"" : '',
"-p"
Comment thread
KateSakharova marked this conversation as resolved.
].findAll().join(' ')
}
publishDir = [
path: { "${params.outdir}/multiqc" },
mode: params.publish_dir_mode,
Expand Down
6 changes: 6 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ When `--mode metagenomic_assemblies` is used, results are written under `metagen

Assembly study registration, manifest generation, and Webin-CLI submission are executed by the workflow, but their intermediate outputs are not currently published into `--outdir` by the pipeline.

> [!NOTE]
> As all assemblies in ENA are submitted as ‘analyses’, for each assembly submission, Webin will report a unique accession number that starts with ERZ.
> For most assemblies, this accession number is for **internal processing only** and will not be visible in the browser.
> As a result, for most assemblies you will receive additional post-processing accession numbers starting with GCA\_.
> More information about accessions can be found in ENA docs: https://ena-docs.readthedocs.io/en/latest/submit/assembly.html#accessions.

## Common outputs

### MultiQC
Expand Down
6 changes: 4 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ workflow NFCORE_SEQSUBMIT {
params.upload_tpa,
params.test_upload,
params.webin_cli_version,
params.webincli_mode
params.webincli_mode,
params.outdir
)
ch_multiqc_report = GENOMESUBMIT.out.multiqc_report
} else if (params.mode == "metagenomic_assemblies") {
Expand All @@ -66,7 +67,8 @@ workflow NFCORE_SEQSUBMIT {
params.upload_tpa,
params.test_upload,
params.webin_cli_version,
params.webincli_mode
params.webincli_mode,
params.outdir
)
ch_multiqc_report = ASSEMBLYSUBMIT.out.multiqc_report
}
Expand Down
6 changes: 0 additions & 6 deletions modules/local/ena_webin_cli_download/environment.yml

This file was deleted.

23 changes: 0 additions & 23 deletions modules/local/ena_webin_cli_download/main.nf

This file was deleted.

35 changes: 0 additions & 35 deletions modules/local/ena_webin_cli_download/meta.yml

This file was deleted.

51 changes: 0 additions & 51 deletions modules/local/ena_webin_cli_download/tests/main.nf.test

This file was deleted.

19 changes: 0 additions & 19 deletions modules/local/ena_webin_cli_download/tests/main.nf.test.snap

This file was deleted.

13 changes: 5 additions & 8 deletions modules/local/ena_webin_cli_wrapper/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ process ENA_WEBIN_CLI_WRAPPER {

label 'process_low'
tag "${meta.id}"
container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.21"
// ena-webin-cli 9.0.3 + mgnify-pipelines-toolkit 1.4.24
Comment thread
KateSakharova marked this conversation as resolved.
container "community.wave.seqera.io/library/ena-webin-cli_mgnify-pipelines-toolkit:0fd318932c5ba88e"
Comment thread
KateSakharova marked this conversation as resolved.
stageInMode 'copy'

input:
tuple val(meta), path(submission_item), path(manifest)
path(webin_cli_jar)
val test_upload
val webincli_mode

Expand All @@ -22,17 +22,14 @@ process ENA_WEBIN_CLI_WRAPPER {
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
def test_flag = test_upload ? "--test" : ""
def fasta_dir = submission_item.toRealPath().parent
Comment thread
mberacochea marked this conversation as resolved.

"""
# change FASTA path in manifest to current workdir
export ITEM_FULL_PATH=\$(readlink -f ${submission_item})
sed 's|^FASTA\t.*|FASTA\t'"\${ITEM_FULL_PATH}"'|g' ${manifest} > ${prefix}_updated_manifest.manifest

webin_cli_handler \\
-m ${prefix}_updated_manifest.manifest \\
-m ${manifest} \\
-o ${prefix}_accessions.tsv \\
--webin-cli-jar ${webin_cli_jar} \\
--mode ${webincli_mode} \\
--fasta-dir ${fasta_dir} \\
${test_flag} \\
${args}

Expand Down
5 changes: 0 additions & 5 deletions modules/local/ena_webin_cli_wrapper/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ input:
type: file
description: |
Webin-CLI submission manifest file.
- - webin_cli_jar:
type: file
description: |
The Webin-CLI JAR file downloaded by ena_webin_cli_download.
pattern: "webin-cli-*.jar"
- - test_upload:
type: value
description: Whether to run ENA Webin-CLI in test mode.
Expand Down
Loading
Loading