diff --git a/bin/submit_study.py b/bin/submit_study.py index 28c1f9a..871db76 100755 --- a/bin/submit_study.py +++ b/bin/submit_study.py @@ -80,12 +80,21 @@ def submit_xml( "Content-Type": "application/xml", "Accept": "application/xml", } - resp = requests.post( - url, data=xml_bytes, - headers=headers, auth=auth, timeout=120, - ) - resp.raise_for_status() - return ET.fromstring(resp.content) + try: + resp = requests.post( + url, data=xml_bytes, + headers=headers, auth=auth, timeout=120, + ) + resp.raise_for_status() + except requests.exceptions.RequestException as e: + logger.error(f"Network error: Could not reach ENA server. Details: {e}") + sys.exit(1) + + try: + return ET.fromstring(resp.content) + except ET.ParseError: + logger.error("The ENA server returned an invalid response (not XML).") + sys.exit(1) # ----------------------------------------------------------- @@ -251,6 +260,24 @@ def load_and_validate_input_file( raises ValueError. """ ext = Path(filepath).suffix.lower() + + if ext in [".csv", ".tsv"]: + delimiter = "," if ext == ".csv" else "\t" + with open(filepath, "r", encoding="utf-8") as f: + header_line = f.readline() + if not header_line: + raise ValueError(f"File {filepath} is empty.") + + # Clean up headers (strip quotes and whitespace) + headers = [h.strip().replace('"', '') for h in header_line.split(delimiter)] + missing = [f for f in _REQUIRED_FIELDS if f not in headers] + + if missing: + # Use the logger you set up earlier! + logger.error(f"Missing required columns in {ext} file: {', '.join(missing)}") + raise ValueError(f"Missing columns: {', '.join(missing)}") + # -------------------------------------------------------- + if ext == ".json": records = extract_records_from_json(filepath) elif ext == ".csv": @@ -619,13 +646,17 @@ def main( write_results(results, output) logger.info("=" * 60) - logger.info("SUBMISSION SUMMARY") + logger.info("SUBMISSION SUMMARY - COMPLETED SUCCESSFULLY") + logger.info("Environment: %s", env_label) + logger.info("Total Studies Processed: %d", len(studies)) logger.info(" Submitted (ADD): %d", len(results["submitted"])) + for submission in results["submitted"]: - alias = submission["alias"] - accession = submission["accession"] - external_accession = submission["external_accession"] - logger.info(f" {alias} -> {accession} ({external_accession})") + alias = submission.get("alias", "Unknown") + accession = submission.get("accession", "Pending") + # Using .get() here makes it robust against missing dictionary keys + logger.info(f" {alias} -> {accession}") + logger.info("=" * 60) diff --git a/main.nf b/main.nf index 96a767b..a9d99f2 100644 --- a/main.nf +++ b/main.nf @@ -38,7 +38,6 @@ workflow NFCORE_SEQSUBMIT { // // WORKFLOW: Run pipeline // - // Depending on the input type (mags/bins or metagenomic_assemblies), one or the another workflow will be triggered if (params.mode == "mags" || params.mode == "bins") { GENOMESUBMIT ( samplesheet, @@ -77,12 +76,35 @@ workflow NFCORE_SEQSUBMIT { params.webincli_mode ) ch_multiqc_report = ASSEMBLYSUBMIT.out.multiqc_report - } - + } else if (params.mode == "reads") { + GENOMESUBMIT ( + samplesheet, + params.multiqc_config, + params.multiqc_logo, + params.multiqc_methods_description, + params.outdir, + params.mode, + params.submission_study, + params.study_metadata, + params.trna_limit, + params.rrna_limit, + params.checkm2_db, + params.checkm2_db_download_id, + params.cat_db, + params.cat_db_download_id, + params.centre_name, + params.upload_tpa, + params.test_upload, + params.webin_cli_version, + params.webincli_mode + ) + ch_multiqc_report = GENOMESUBMIT.out.multiqc_report + } // ← closes the if/else chain emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + multiqc_report = ch_multiqc_report } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW diff --git a/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf index 05d08e4..f6659ae 100644 --- a/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf @@ -55,30 +55,9 @@ workflow PIPELINE_INITIALISATION { // Validate parameters and generate parameter summary to stdout // - def before_text = "" - def after_text = "" - before_text = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/seqsubmit ${workflow.manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/seqsubmit/blob/master/CITATIONS.md -""" - if (monochrome_logs) { - before_text = before_text.replaceAll(/\033\[[0-9;]*m/, '') - } - - command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + def before_text = "--- nf-core/seqsubmit ---" + def after_text = "-------------------------" + def command = "nextflow run nf-core/seqsubmit --input samplesheet.csv --outdir --mode " UTILS_NFSCHEMA_PLUGIN ( workflow, @@ -92,32 +71,24 @@ workflow PIPELINE_INITIALISATION { command ) - // - // Check config provided to the pipeline - // UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) - // - // Create channel from input file provided through params.input - // - if ( mode == "mags" || mode == "bins" ) { - ch_samplesheet = channel - .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json")) + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json")) } else if ( mode == "metagenomic_assemblies" ) { - ch_samplesheet = channel - .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_assembly.json")) + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_assembly.json")) + } else if ( mode == "reads" ) { + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input_genome.json")) } else { - error("No input was found. Please, point to the location of your samplesheet using --input_genome or --input_assembly") + error("No input found") } emit: samplesheet = ch_samplesheet versions = ch_versions } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW FOR PIPELINE COMPLETION