diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index a8b0008..3e1aeba 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -25,6 +25,7 @@ custom_content: - input - input_seeds - input_network + - network_node_degree_distribution - drugstone_link - overlap - jaccard_similarity diff --git a/assets/network_node_degree_distribution_header.yaml b/assets/network_node_degree_distribution_header.yaml new file mode 100644 index 0000000..809d911 --- /dev/null +++ b/assets/network_node_degree_distribution_header.yaml @@ -0,0 +1,16 @@ +parent_id: input +id: network_node_degree_distribution +parent_name: Input +section_name: Network node degree distribution +description: Node degree distributions of the input network(s). +plot_type: linegraph +pconfig: + id: network_node_degree_distribution_line_graph + title: Node Degree Distribution + xlab: Node Degree + data_labels: + - name: Counts + ylab: Counts + - name: Percentages + ylab: Percentage +data: diff --git a/bin/graph_tool_parser.py b/bin/graph_tool_parser.py index 588bc6e..ea1e2b7 100755 --- a/bin/graph_tool_parser.py +++ b/bin/graph_tool_parser.py @@ -5,11 +5,14 @@ import argparse import csv +import json import logging import sys import os import graph_tool.all as gt from pathlib import Path +from collections import Counter +import yaml logger = logging.getLogger() @@ -60,6 +63,42 @@ def save_multiqc(g, stem): ) +def save_node_degree_distribution(g, stem): + # Calculate degree for each vertex + degrees = [v.out_degree() for v in g.vertices()] + + # Count frequency of each degree + degree_counts = Counter(degrees) + + # Get total number of vertices for normalization + total_vertices = len(degrees) + + # Create absolute counts dictionary: {degree: count} + absolute_counts = [ + [degree, count] for degree, count in sorted(degree_counts.items()) + ] + + # Create relative frequencies dictionary: {degree: percentage} + relative_frequencies = [ + [degree, count / total_vertices * 100] + for degree, count in sorted(degree_counts.items()) + ] + # save node degree distribution as yaml + node_degree_distribution = { + "name": stem, + "absolute": absolute_counts, + "relative": relative_frequencies, + } + + with open(f"{stem}.node_degree_distribution.yaml", "w") as file: + yaml.safe_dump( + node_degree_distribution, + file, + sort_keys=False, + default_flow_style=None, # keeps list pairs as [x, y] + ) + + def save_diamond(g, stem): with open(f"{stem}.diamond.csv", "w") as file: writer = csv.writer(file, lineterminator="\n") @@ -108,6 +147,7 @@ def save(g, stem, format): if format == "gt": save_gt(g=g, stem=stem) save_multiqc(g=g, stem=stem) + save_node_degree_distribution(g=g, stem=stem) elif format == "diamond": save_diamond(g=g, stem=stem) elif format == "domino": @@ -170,7 +210,13 @@ def parse_args(argv=None): "-f", "--format", help="Output format (default gt). If format it gt, a summary file for multiqc will be generated as well.", - choices=("gt", "diamond", "domino", "robust", "rwr"), + choices=( + "gt", + "diamond", + "domino", + "robust", + "rwr", + ), default="gt", ) parser.add_argument( diff --git a/bin/multiqc_formatter.py b/bin/multiqc_formatter.py new file mode 100755 index 0000000..c6e8a11 --- /dev/null +++ b/bin/multiqc_formatter.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +import argparse +from pathlib import Path +import yaml +import sys + + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description="formats file for multiqc custom contents", + epilog="Example: python multiqc_formatter.py -i network.gt -f network_degree", + ) + parser.add_argument( + "-i", "--input", type=Path, nargs="*", required=True, help="Input files" + ) + parser.add_argument("-H", "--header", type=Path, required=True, help="Header file") + return parser.parse_args(argv) + + +def parse_input(input_files, header_file): + with open(header_file, "r") as header: + header_data = yaml.safe_load(header) + header_id = header_data.get("id", "") + + if header_id == "network_node_degree_distribution": + save_node_degree_distribution(input_files, header_file) + + +def save_node_degree_distribution(input_files, header_file): + with open(header_file, "r", encoding="utf-8") as header: + mqc_payload = yaml.safe_load(header) or {} + + absolute_data = {} + relative_data = {} + + for file in input_files: + with open(file, "r", encoding="utf-8") as distribution_file: + distribution = yaml.safe_load(distribution_file) or {} + + network_name = distribution.get("name") or file.stem + absolute = distribution.get("absolute") + relative = distribution.get("relative") + + if absolute is None or relative is None: + raise ValueError( + f"Invalid distribution YAML in {file}: expected keys 'absolute' and 'relative'" + ) + + absolute_data[network_name] = absolute + relative_data[network_name] = relative + + mqc_payload["data"] = [absolute_data, relative_data] + + with open("./node_degree_distribution_mqc.yaml", "w", encoding="utf-8") as file: + yaml.safe_dump(mqc_payload, file, sort_keys=False, default_flow_style=None) + + +def main(): + args = parse_args() + parse_input(args.input, args.header) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/network_annotation.py b/bin/network_annotation.py index f98b01e..77cb8f6 100755 --- a/bin/network_annotation.py +++ b/bin/network_annotation.py @@ -91,6 +91,9 @@ def run(args): # Assign component ID to each component of the subnetwork component_id = assign_component_ids(subnetwork) + # Add node degrees as vertex properties + add_node_degrees(subnetwork, name_to_degree_full, name_to_degree_sub) + # Save the network containing the annotations in graph-tool format subnetwork.save(args.output_file) @@ -140,5 +143,24 @@ def assign_component_ids(graph): return graph.vp["component_id"] +def add_node_degrees(subnetwork, name_to_degree_full, name_to_degree_sub): + """ + Adds the node degree from both the full network and the subnetwork as vertex properties. + """ + # Add degree in the full network + subnetwork.vp["degree_in_full_network"] = subnetwork.new_vertex_property("int") + # Add degree in the subnetwork + subnetwork.vp["degree_in_module"] = subnetwork.new_vertex_property("int") + + for v in subnetwork.vertices(): + name = subnetwork.vp["name"][v] + full_degree = name_to_degree_full.get(name, 0) + sub_degree = name_to_degree_sub.get(name, 0) + subnetwork.vp["degree_in_full_network"][v] = full_degree + subnetwork.vp["degree_in_module"][v] = sub_degree + + return subnetwork.vp["degree_in_full_network"], subnetwork.vp["degree_in_module"] + + if __name__ == "__main__": main() diff --git a/bin/visualize_modules.py b/bin/visualize_modules.py index d682a85..337886a 100755 --- a/bin/visualize_modules.py +++ b/bin/visualize_modules.py @@ -104,6 +104,7 @@ def parse_args(argv=None): def main(argv=None): """Coordinate argument parsing and program execution.""" args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") if not args.module.is_file(): logger.error(f"The given input file {args.file_in} was not found!") diff --git a/conf/base.config b/conf/base.config index 87ad5b7..de35ee6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,7 +11,7 @@ process { // Set default container for python dependencies - container = 'ghcr.io/repo4eu/modulediscovery_python_dependencies:v0.1.0' + container = 'ghcr.io/repo4eu/modulediscovery_python_dependencies:v0.2.0' cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } diff --git a/conf/modules.config b/conf/modules.config index d407354..bfb66a9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,6 +21,13 @@ process { ] } + withName: 'MULTIQCFORMATTER' { + publishDir = [ + path: { "${params.outdir}/mqc_summaries"}, + mode: params.publish_dir_mode, + saveAs: {filename -> filename.equals('versions.yml') ? null : filename} + ] + } // Input parsing withName: GRAPHTOOLPARSER { diff --git a/docs/output.md b/docs/output.md index 96e6545..e344136 100644 --- a/docs/output.md +++ b/docs/output.md @@ -51,7 +51,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ### Prepare network -The [graph-tool](https://graph-tool.skewed.de/) library is used to parse the input network(s) into the [`.gt`](https://graph-tool.skewed.de/static/docs/stable/gt_format.html) format, the internal representation used for networks within the pipeline. Additionally, it is used to generate networks in the specific formats required by the various disease module inference methods. This step also gathers summary statistics for the MultiQC report, including the number of nodes and edges, the network [diameter](https://graph-tool.skewed.de/static/docs/stable/autosummary/graph_tool.topology.pseudo_diameter.html#graph_tool.topology.pseudo_diameter), the number of connected components, the size of the largest connected component, the count of self-loops (nodes with edges to themselves), and the number of duplicate edges (multiple edges connecting the same two nodes). +The [graph-tool](https://graph-tool.skewed.de/) library is used to parse the input network(s) into the [`.gt`](https://graph-tool.skewed.de/static/docs/stable/gt_format.html) format, the internal representation used for networks within the pipeline. Additionally, it is used to generate networks in the specific formats required by the various disease module inference methods. This step also gathers summary statistics for the MultiQC report, including the number of nodes and edges, the network [diameter](https://graph-tool.skewed.de/static/docs/stable/autosummary/graph_tool.topology.pseudo_diameter.html#graph_tool.topology.pseudo_diameter), the number of connected components, the size of the largest connected component, the distribution of node degrees, the count of self-loops (nodes with edges to themselves), and the number of duplicate edges (multiple edges connecting the same two nodes).
Output files @@ -63,6 +63,7 @@ The [graph-tool](https://graph-tool.skewed.de/) library is used to parse the inp - `.robust.tsv`: Input network in the format required for ROBUST or ROBUST (bias-aware). Only created if the methods are used. - `.rwr.csv`: Input network in the format required for RWR. Only created if the method is used. - `mqc_summaries/` + - ` node_degree_distribution_mqc.yaml`: Network node degree distribution for the MultiQC report. - ` input_network_mqc.tsv`: Network summary statistics for the MultiQC report.
@@ -84,7 +85,7 @@ The format of the input seed file(s) is validated, and any seed nodes not presen ## Disease module inference -The inferred disease modules are exported in multiple formats, including [`.gt`](https://graph-tool.skewed.de/static/docs/stable/gt_format.html), [`.graphml`](https://de.wikipedia.org/wiki/GraphML), and node and edge lists in `.tsv`. If a method returns only a node list rather than a full network, the connecting edges are extracted from the input network. Module nodes are annotated with their seed status (`is_seed`), their subnetwork participation degree ([`spd`](https://nedrex.net/tutorial/availableFunctions.html)), and a component identifier (`component_id`) to indicate which connected component they belong to. Additionally, tool-specific node properties are added, which are explained in the sections below. +The inferred disease modules are exported in multiple formats, including [`.gt`](https://graph-tool.skewed.de/static/docs/stable/gt_format.html), [`.graphml`](https://de.wikipedia.org/wiki/GraphML), and node and edge lists in `.tsv`. If a method returns only a node list rather than a full network, the connecting edges are extracted from the input network. Module nodes are annotated with their seed status (`is_seed`), their degree within both the whole network (`degree_in_full_network`) and the disease module(`degree_in_module`), their subnetwork participation degree ([`spd`](https://nedrex.net/tutorial/availableFunctions.html)), and a component identifier (`component_id`) to indicate which connected component they belong to. Additionally, tool-specific node properties are added, which are explained in the sections below. ### Only seeds diff --git a/modules/local/graphtoolparser/main.nf b/modules/local/graphtoolparser/main.nf index 37b1c97..2f27e6d 100644 --- a/modules/local/graphtoolparser/main.nf +++ b/modules/local/graphtoolparser/main.nf @@ -7,9 +7,10 @@ process GRAPHTOOLPARSER { val format output: - tuple val(meta), path("*${format}*") , emit: network - tuple val(meta), path("input_network_multiqc.tsv") , emit: multiqc, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*${format}*") , emit: network + tuple val(meta), path("input_network_multiqc.tsv") , emit: multiqc , optional: true + tuple val(meta), path("*node_degree_distribution.yaml") , emit: node_degree , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/multiqcformatter/main.nf b/modules/local/multiqcformatter/main.nf new file mode 100644 index 0000000..f9ed777 --- /dev/null +++ b/modules/local/multiqcformatter/main.nf @@ -0,0 +1,21 @@ +process MULTIQCFORMATTER { + label 'process_single' + + input: + tuple path(header), path(inputFiles, stageAs: 'input/*') + output: + path("*mqc*") , emit : multiqc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + script: + """ + multiqc_formatter.py -i $inputFiles -H $header + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index fc00ca3..2a0ad7c 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "BIOPAX_PARSER": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )", "pybiopax": "0.1.4", "nedrex": "0.1.4" @@ -24,72 +24,75 @@ "biodigest": "0.2.16" }, "DRUGPREDICTIONS": { - "python": "3.12.7", + "python": "3.12.13", "pandas": "2.2.3", "drugstone": "0.4.5" }, "DRUGSTONEEXPORT": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, "GPROFILER2_GOST\"": { "r-ggplot2": "3.4.3", "r-gprofiler2": "0.2.2", - "gprofiler-data": "biomart: Ensembl\nbiomart_version: '114'\ndisplay_name: Human\ngenebuild: GRCh38.p14\ngprofiler_version: e114_eg62_p19_fa3a7d2c\norganism: hsapiens\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 03.2026\n classes: None\n HPA:\n name: Human Protein Atlas\n version: |-\n annotations: HPA website: 25-11-06\n classes: script: 26-01-20\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2026-03-15\n MIRNA:\n name: miRTarBase\n version: Release 10.0\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2026-3-20\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2025.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20260310'\ntaxonomy_id: '9606'\n" + "gprofiler-data": "biomart: Ensembl\nbiomart_version: '114'\ndisplay_name: Human\ngenebuild: GRCh38.p14\ngprofiler_version: e114_eg62_p19_27110d83\norganism: hsapiens\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2026-01-23\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 03.2026\n classes: None\n HPA:\n name: Human Protein Atlas\n version: |-\n annotations: HPA website: 25-11-06\n classes: script: 26-01-20\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2026-03-15\n MIRNA:\n name: miRTarBase\n version: Release 10.0\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2026-3-20\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2025.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20260310'\ntaxonomy_id: '9606'\n" }, "GRAPHTOOLPARSER": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, "MODULEPARSER": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, + "MULTIQCFORMATTER": { + "python": "3.12.13" + }, "NETWORKANNOTATION": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, "NETWORKPERTURBATION": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, "NETWORKPERTURBATIONEVALUATION": { - "python": "3.12.7" + "python": "3.12.13" }, "PROXIMITY": { - "python": "3.12.7", - "numpy": "2.1.2", + "python": "3.12.13", + "numpy": "2.1.3", "pandas": "2.2.3", "networkx": 3.3 }, "SAVEMODULES": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )", "pandas": "2.2.3" }, "SEEDPERTURBATION": { - "python": "3.12.7" + "python": "3.12.13" }, "SEEDPERTURBATIONEVALUATION": { - "python": "3.12.7" + "python": "3.12.13" }, "SHORTEST_PATHS": { - "python": "3.12.7", + "python": "3.12.13", "networkx": 3.3 }, "TOPOLOGY": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )" }, "VISUALIZEMODULES": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )", "networkx": 3.3, "pyintergraph": "1.3.3", "pyvis": "0.3.1" }, "VISUALIZEMODULESDRUGS": { - "python": "3.12.7", + "python": "3.12.13", "graph-tool": "2.77 (commit 4ad25c62, )", "networkx": 3.3, "pyintergraph": "1.3.3", @@ -184,6 +187,7 @@ "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.diamond/entrez_seeds_1.entrez_ppi.diamond.gprofiler2.gost_results.rds", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.diamond/entrez_seeds_1.entrez_ppi.diamond.gprofiler2.gostplot.html", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.diamond/entrez_seeds_1.entrez_ppi.diamond.gprofiler2.gostplot.png", + "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.diamond/gprofiler_full_hsapiens.GO_WP_REAC_KEGG.ENSG_filtered.gmt", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/R_sessionInfo.log", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/entrez_seeds_1.entrez_ppi.no_tool.gprofiler2.GO:CC.sub_enriched_pathways.png", @@ -196,6 +200,7 @@ "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/entrez_seeds_1.entrez_ppi.no_tool.gprofiler2.gost_results.rds", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/entrez_seeds_1.entrez_ppi.no_tool.gprofiler2.gostplot.html", "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/entrez_seeds_1.entrez_ppi.no_tool.gprofiler2.gostplot.png", + "evaluation/gprofiler/entrez_seeds_1.entrez_ppi.no_tool/gprofiler_full_hsapiens.GO_WP_REAC_KEGG.ENSG_filtered.gmt", "evaluation/seed_perturbation", "evaluation/seed_perturbation/entrez_seeds_1.entrez_ppi.diamond", "evaluation/seed_perturbation/entrez_seeds_1.entrez_ppi.diamond/entrez_seeds_1.entrez_ppi.diamond.seed_perturbation_evaluation_detailed.tsv", @@ -210,6 +215,7 @@ "input/networks", "input/networks/entrez_ppi.diamond.csv", "input/networks/entrez_ppi.gt", + "input/networks/entrez_ppi.node_degree_distribution.yaml", "input/perturbed_networks", "input/perturbed_networks/entrez_ppi", "input/perturbed_networks/entrez_ppi/entrez_ppi.perm_0.gt", @@ -254,7 +260,7 @@ "main.nf:md5,c63ef7440278f889ab5c6d56bce23e06" ], [ - "main.nf:md5,561b4f67517eec047bef02f1df8a755e" + "main.nf:md5,b05cab49c2df502baebbffac0352fafe" ], [ "main.nf:md5,a936288c36f38445ad191ef3e0b9f4f9" @@ -268,6 +274,9 @@ [ "main.nf:md5,c22c94f52d7dfae36a4f8470c7818804" ], + [ + "main.nf:md5,5416d5bc6d14178c734672ab0f3a39d5" + ], [ "main.nf:md5,7a26946c4861bf5d704172f597d2b0ae" ], @@ -403,6 +412,7 @@ "mqc_summaries/jaccard_similarity_no_seeds_matrix_mqc.tsv", "mqc_summaries/network_perturbation_jaccard_mqc.yaml", "mqc_summaries/network_perturbation_mqc.tsv", + "mqc_summaries/node_degree_distribution_mqc.yaml", "mqc_summaries/seed_perturbation_jaccard_mqc.yaml", "mqc_summaries/seed_perturbation_mqc.tsv", "mqc_summaries/shared_nodes_matrix_mqc.tsv", @@ -423,6 +433,8 @@ "multiqc/multiqc_data/multiqc_input_seeds.txt", "multiqc/multiqc_data/multiqc_jaccard_similarity.txt", "multiqc/multiqc_data/multiqc_jaccard_similarity_no_seeds.txt", + "multiqc/multiqc_data/multiqc_network_node_degree_distribution_line_graph.txt", + "multiqc/multiqc_data/multiqc_network_node_degree_distribution_line_graph_1.txt", "multiqc/multiqc_data/multiqc_network_perturbation_jaccard_box_plot.txt", "multiqc/multiqc_data/multiqc_seed_perturbation_jaccard_box_plot.txt", "multiqc/multiqc_data/multiqc_shared_nodes.txt", @@ -439,6 +451,8 @@ "multiqc/multiqc_plots/pdf/input_seeds-pct.pdf", "multiqc/multiqc_plots/pdf/jaccard_similarity.pdf", "multiqc/multiqc_plots/pdf/jaccard_similarity_no_seeds.pdf", + "multiqc/multiqc_plots/pdf/network_node_degree_distribution_line_graph_Counts.pdf", + "multiqc/multiqc_plots/pdf/network_node_degree_distribution_line_graph_Percentages.pdf", "multiqc/multiqc_plots/pdf/network_perturbation_jaccard_box_plot.pdf", "multiqc/multiqc_plots/pdf/seed_perturbation_jaccard_box_plot.pdf", "multiqc/multiqc_plots/pdf/shared_nodes.pdf", @@ -452,6 +466,8 @@ "multiqc/multiqc_plots/png/input_seeds-pct.png", "multiqc/multiqc_plots/png/jaccard_similarity.png", "multiqc/multiqc_plots/png/jaccard_similarity_no_seeds.png", + "multiqc/multiqc_plots/png/network_node_degree_distribution_line_graph_Counts.png", + "multiqc/multiqc_plots/png/network_node_degree_distribution_line_graph_Percentages.png", "multiqc/multiqc_plots/png/network_perturbation_jaccard_box_plot.png", "multiqc/multiqc_plots/png/seed_perturbation_jaccard_box_plot.png", "multiqc/multiqc_plots/png/shared_nodes.png", @@ -465,6 +481,8 @@ "multiqc/multiqc_plots/svg/input_seeds-pct.svg", "multiqc/multiqc_plots/svg/jaccard_similarity.svg", "multiqc/multiqc_plots/svg/jaccard_similarity_no_seeds.svg", + "multiqc/multiqc_plots/svg/network_node_degree_distribution_line_graph_Counts.svg", + "multiqc/multiqc_plots/svg/network_node_degree_distribution_line_graph_Percentages.svg", "multiqc/multiqc_plots/svg/network_perturbation_jaccard_box_plot.svg", "multiqc/multiqc_plots/svg/seed_perturbation_jaccard_box_plot.svg", "multiqc/multiqc_plots/svg/shared_nodes.svg", @@ -478,17 +496,19 @@ [ "entrez_ppi.diamond.csv:md5,59c734eaa00f2265dd1bb896f2aee772", "entrez_ppi.gt:md5,c908f353a8a0e0cb04aa9cb67121521d", + "entrez_ppi.node_degree_distribution.yaml:md5,24eee40aaebf40772be9fc643e36c6b9", "entrez_seeds_1.entrez_ppi.no_tool.gt:md5,e19b2d65dfd9fd20f1a8f843d52c1235", "entrez_seeds_1.entrez_ppi.tsv:md5,1291b36e8add2d6ef0b8b72f7889016b", - "entrez_seeds_1.entrez_ppi.no_tool.graphml:md5,bfaed89376015c6ec3724a870fd86031", - "entrez_seeds_1.entrez_ppi.no_tool.gt:md5,5939c5390ea37756858e189b2f90462a", + "entrez_seeds_1.entrez_ppi.no_tool.graphml:md5,cc3311884751102ec002365dc513bb42", + "entrez_seeds_1.entrez_ppi.no_tool.gt:md5,d904aa651ab7267f077e75a6f0dc4f5c", "entrez_seeds_1.entrez_ppi.no_tool.edges.tsv:md5,99cbfff7b59f45c1de3e5e6d36692099", - "entrez_seeds_1.entrez_ppi.no_tool.nodes.tsv:md5,e711b826c2117dfde768e65f44cac358", + "entrez_seeds_1.entrez_ppi.no_tool.nodes.tsv:md5,6d067dc2849d56e45b7aadf4b5f02fbd", "input_network_mqc.tsv:md5,eedaf7f299c89a43429fa66dffb2bbc3", - "input_seeds_mqc.tsv:md5,269d96ba266e5481c37a550fe4f13521" + "input_seeds_mqc.tsv:md5,269d96ba266e5481c37a550fe4f13521", + "node_degree_distribution_mqc.yaml:md5,b4b1eab1a3622315ecfc98065a2760c6" ] ], - "timestamp": "2026-05-05T18:19:28.128987463", + "timestamp": "2026-06-23T15:30:30.075392251", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/diseasemodulediscovery.nf b/workflows/diseasemodulediscovery.nf index f088fc5..177f743 100644 --- a/workflows/diseasemodulediscovery.nf +++ b/workflows/diseasemodulediscovery.nf @@ -9,6 +9,7 @@ // include { INPUTCHECK } from '../modules/local/inputcheck/main' include { GRAPHTOOLPARSER } from '../modules/local/graphtoolparser/main' +include { MULTIQCFORMATTER } from '../modules/local/multiqcformatter/main' include { NETWORKANNOTATION } from '../modules/local/networkannotation/main' include { SAVEMODULES } from '../modules/local/savemodules/main' include { VISUALIZEMODULES } from '../modules/local/visualizemodules/main' @@ -151,7 +152,6 @@ workflow DISEASEMODULEDISCOVERY { ch_multiqc_files = ch_multiqc_files.mix(ch_network_multiqc) ch_network_gt = GRAPHTOOLPARSER.out.network - // Check input // channel: [ val(meta[id,seeds_id,network_id]), path(seeds), path(network) ] ch_seeds_network = ch_seeds @@ -568,6 +568,16 @@ workflow DISEASEMODULEDISCOVERY { } + // Format complex MultiQC input files + MULTIQCFORMATTER( + GRAPHTOOLPARSER.out.node_degree.map{_meta, path -> path}.collect().map{networks -> + def header = new File("$projectDir/assets/network_node_degree_distribution_header.yaml").toPath() + [header, networks] + } + ) + ch_multiqc_files = ch_multiqc_files.mix(MULTIQCFORMATTER.out.multiqc) + ch_versions = ch_versions.mix(MULTIQCFORMATTER.out.versions) + // Collate and save software versions // def topic_versions = channel.topic("versions")