Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions bin/submit_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,21 @@ def submit_xml(
"Content-Type": "application/xml",
"Accept": "application/xml",
}
resp = requests.post(
url, data=xml_bytes,
headers=headers, auth=auth, timeout=120,
)
resp.raise_for_status()
return ET.fromstring(resp.content)
try:
resp = requests.post(
url, data=xml_bytes,
headers=headers, auth=auth, timeout=120,
)
resp.raise_for_status()
except requests.exceptions.RequestException as e:
logger.error(f"Network error: Could not reach ENA server. Details: {e}")
sys.exit(1)

try:
return ET.fromstring(resp.content)
except ET.ParseError:
logger.error("The ENA server returned an invalid response (not XML).")
sys.exit(1)


# -----------------------------------------------------------
Expand Down Expand Up @@ -251,6 +260,24 @@ def load_and_validate_input_file(
raises ValueError.
"""
ext = Path(filepath).suffix.lower()

if ext in [".csv", ".tsv"]:
delimiter = "," if ext == ".csv" else "\t"
with open(filepath, "r", encoding="utf-8") as f:
header_line = f.readline()
if not header_line:
raise ValueError(f"File {filepath} is empty.")

# Clean up headers (strip quotes and whitespace)
headers = [h.strip().replace('"', '') for h in header_line.split(delimiter)]
missing = [f for f in _REQUIRED_FIELDS if f not in headers]

if missing:
# Use the logger you set up earlier!
logger.error(f"Missing required columns in {ext} file: {', '.join(missing)}")
raise ValueError(f"Missing columns: {', '.join(missing)}")
# --------------------------------------------------------

if ext == ".json":
records = extract_records_from_json(filepath)
elif ext == ".csv":
Expand Down Expand Up @@ -619,13 +646,17 @@ def main(
write_results(results, output)

logger.info("=" * 60)
logger.info("SUBMISSION SUMMARY")
logger.info("SUBMISSION SUMMARY - COMPLETED SUCCESSFULLY")
logger.info("Environment: %s", env_label)
logger.info("Total Studies Processed: %d", len(studies))
logger.info(" Submitted (ADD): %d", len(results["submitted"]))

for submission in results["submitted"]:
alias = submission["alias"]
accession = submission["accession"]
external_accession = submission["external_accession"]
logger.info(f" {alias} -> {accession} ({external_accession})")
alias = submission.get("alias", "Unknown")
accession = submission.get("accession", "Pending")
# Using .get() here makes it robust against missing dictionary keys
logger.info(f" {alias} -> {accession}")

logger.info("=" * 60)


Expand Down
30 changes: 26 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ workflow NFCORE_SEQSUBMIT {
//
// WORKFLOW: Run pipeline
//
// Depending on the input type (mags/bins or metagenomic_assemblies), one or the another workflow will be triggered
if (params.mode == "mags" || params.mode == "bins") {
GENOMESUBMIT (
samplesheet,
Expand Down Expand Up @@ -77,12 +76,35 @@ workflow NFCORE_SEQSUBMIT {
params.webincli_mode
)
ch_multiqc_report = ASSEMBLYSUBMIT.out.multiqc_report
}

} else if (params.mode == "reads") {
GENOMESUBMIT (
samplesheet,
params.multiqc_config,
params.multiqc_logo,
params.multiqc_methods_description,
params.outdir,
params.mode,
params.submission_study,
params.study_metadata,
params.trna_limit,
params.rrna_limit,
params.checkm2_db,
params.checkm2_db_download_id,
params.cat_db,
params.cat_db_download_id,
params.centre_name,
params.upload_tpa,
params.test_upload,
params.webin_cli_version,
params.webincli_mode
)
ch_multiqc_report = GENOMESUBMIT.out.multiqc_report
} // ← closes the if/else chain

emit:
multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html
multiqc_report = ch_multiqc_report
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
Expand Down
45 changes: 8 additions & 37 deletions subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,9 @@ workflow PIPELINE_INITIALISATION {
// Validate parameters and generate parameter summary to stdout
//

def before_text = ""
def after_text = ""
before_text = """
-\033[2m----------------------------------------------------\033[0m-
\033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m
\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m
\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m
\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m
\033[0;32m`._,._,\'\033[0m
\033[0;35m nf-core/seqsubmit ${workflow.manifest.version}\033[0m
-\033[2m----------------------------------------------------\033[0m-
"""
after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""}
* The nf-core framework
https://doi.org/10.1038/s41587-020-0439-x

* Software dependencies
https://github.com/nf-core/seqsubmit/blob/master/CITATIONS.md
"""
if (monochrome_logs) {
before_text = before_text.replaceAll(/\033\[[0-9;]*m/, '')
}

command = "nextflow run ${workflow.manifest.name} -profile <docker/singularity/.../institute> --input samplesheet.csv --outdir <OUTDIR>"
def before_text = "--- nf-core/seqsubmit ---"
def after_text = "-------------------------"
def command = "nextflow run nf-core/seqsubmit --input samplesheet.csv --outdir <OUTDIR> --mode <MODE>"

UTILS_NFSCHEMA_PLUGIN (
workflow,
Expand All @@ -92,32 +71,24 @@ workflow PIPELINE_INITIALISATION {
command
)

//
// Check config provided to the pipeline
//
UTILS_NFCORE_PIPELINE (
nextflow_cli_args
)

//
// Create channel from input file provided through params.input
//

if ( mode == "mags" || mode == "bins" ) {
ch_samplesheet = channel
.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json"))
ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json"))
} else if ( mode == "metagenomic_assemblies" ) {
ch_samplesheet = channel
.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_assembly.json"))
ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_assembly.json"))
} else if ( mode == "reads" ) {
ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json"))
} else {
error("No input was found. Please, point to the location of your samplesheet using --input_genome or --input_assembly")
error("No input found")
}

emit:
samplesheet = ch_samplesheet
versions = ch_versions
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SUBWORKFLOW FOR PIPELINE COMPLETION
Expand Down
Loading