Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ custom_content:
- input
- input_seeds
- input_network
- network_node_degree_distribution
- drugstone_link
- overlap
- jaccard_similarity
Expand Down
16 changes: 16 additions & 0 deletions assets/network_node_degree_distribution_header.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
id: network_node_degree_distribution
parent_name: Input
section_name: network node degree distribution
description: node degree distributions of the network.
Comment thread
JohannesKersting marked this conversation as resolved.
Outdated
plot_type: linegraph
pconfig:
id: network_node_degree_distribution_line_graph
title: Node Degree Distribution
xlab: Node Degree
ylab: Relative Frequency
data_labels:
- name: Counts
ylab: Counts
- name: Percentages
ylab: Percentage
data:
51 changes: 49 additions & 2 deletions bin/graph_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@

import argparse
import csv
import json
import logging
import sys
import os
import graph_tool.all as gt
from pathlib import Path
from collections import Counter
import yaml

logger = logging.getLogger()

Expand All @@ -36,7 +39,7 @@ def save_multiqc(g, stem):
duplicate_edges.append(e) # Mark for removal
else:
seen_edges.add(edge_tuple)

with open("input_network_multiqc.tsv", "w") as file:
file.write(
"Network\t"
Expand All @@ -59,6 +62,43 @@ def save_multiqc(g, stem):
f"{len(duplicate_edges)}\n"
)

def save_node_degree_distribution(g, stem):
# Calculate degree for each vertex
degrees = [v.out_degree() for v in g.vertices()]

# Count frequency of each degree
degree_counts = Counter(degrees)

# Get total number of vertices for normalization
total_vertices = len(degrees)

# Create absolute counts dictionary: {degree: count}
absolute_counts = [
[degree, count] for degree, count in sorted(degree_counts.items())
]

# Create relative frequencies dictionary: {degree: fraction}
relative_frequencies = [
[degree, count / total_vertices]
for degree, count in sorted(degree_counts.items())
]
#save node degree distribution as yaml
node_degree_distribution = {
"name": stem,
"absolute": absolute_counts,
"relative": relative_frequencies,
}

with open(f"{stem}.node_degree_distribution.yaml", "w") as file:
yaml.safe_dump(
node_degree_distribution,
file,
sort_keys=False,
default_flow_style=None, # keeps list pairs as [x, y]
)




def save_diamond(g, stem):
with open(f"{stem}.diamond.csv", "w") as file:
Expand Down Expand Up @@ -108,6 +148,7 @@ def save(g, stem, format):
if format == "gt":
save_gt(g=g, stem=stem)
save_multiqc(g=g, stem=stem)
save_node_degree_distribution(g=g, stem=stem)
elif format == "diamond":
save_diamond(g=g, stem=stem)
elif format == "domino":
Expand Down Expand Up @@ -170,7 +211,13 @@ def parse_args(argv=None):
"-f",
"--format",
help="Output format (default gt). If format it gt, a summary file for multiqc will be generated as well.",
choices=("gt", "diamond", "domino", "robust", "rwr"),
choices=(
"gt",
"diamond",
"domino",
"robust",
"rwr",
),
default="gt",
)
parser.add_argument(
Expand Down
63 changes: 63 additions & 0 deletions bin/multiqc_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python
import argparse
from pathlib import Path
import yaml
import sys


def parse_args(argv=None):
parser = argparse.ArgumentParser(
description="formats file for multiqc custom contents",
epilog="Example: python multiqc_formatter.py -i network.gt -f network_degree",
)
parser.add_argument(
"-i", "--input", type=Path, nargs="*", required=True, help="Input files"
)
parser.add_argument("-H", "--header", type=Path, required=True, help="Header file")
return parser.parse_args(argv)


def parse_input(input_files, header_file):
with open(header_file, "r") as header:
header_id = header.readline().split(":", 1)[1].strip()

if header_id == "network_node_degree_distribution":
save_node_degree_distribution(input_files, header_file)


def save_node_degree_distribution(input_files, header_file):
with open(header_file, "r", encoding="utf-8") as header:
mqc_payload = yaml.safe_load(header) or {}

absolute_data = {}
relative_data = {}

for file in input_files:
with open(file, "r", encoding="utf-8") as distribution_file:
distribution = yaml.safe_load(distribution_file) or {}

network_name = distribution.get("name") or file.stem
absolute = distribution.get("absolute")
relative = distribution.get("relative")

if absolute is None or relative is None:
raise ValueError(
f"Invalid distribution YAML in {file}: expected keys 'absolute' and 'relative'"
)

absolute_data[network_name] = absolute
relative_data[network_name] = relative

mqc_payload["data"] = [absolute_data, relative_data]

with open("./node_degree_distribution_mqc.yaml", "w", encoding="utf-8") as file:
yaml.safe_dump(mqc_payload, file, sort_keys=False, default_flow_style=None)


def main():
args = parse_args()
parse_input(args.input, args.header)


if __name__ == "__main__":
sys.exit(main())
22 changes: 22 additions & 0 deletions bin/network_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def run(args):
# Assign component ID to each component of the subnetwork
component_id = assign_component_ids(subnetwork)

# Add node degrees as vertex properties
add_node_degrees(subnetwork, name_to_degree_full, name_to_degree_sub)

# Save the network containing the annotations in graph-tool format
subnetwork.save(args.output_file)

Expand Down Expand Up @@ -140,5 +143,24 @@ def assign_component_ids(graph):
return graph.vp["component_id"]


def add_node_degrees(subnetwork, name_to_degree_full, name_to_degree_sub):
"""
Adds the node degree from both the full network and the subnetwork as vertex properties.
"""
# Add degree in the full network
subnetwork.vp["degree_in_full_network"] = subnetwork.new_vertex_property("int")
# Add degree in the subnetwork
subnetwork.vp["degree_in_module"] = subnetwork.new_vertex_property("int")

for v in subnetwork.vertices():
name = subnetwork.vp["name"][v]
full_degree = name_to_degree_full.get(name, 0)
sub_degree = name_to_degree_sub.get(name, 0)
subnetwork.vp["degree_in_full_network"][v] = full_degree
subnetwork.vp["degree_in_module"][v] = sub_degree

return subnetwork.vp["degree_in_full_network"], subnetwork.vp["degree_in_module"]


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions bin/visualize_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def parse_args(argv=None):
def main(argv=None):
"""Coordinate argument parsing and program execution."""
args = parse_args(argv)
import pyvis
print(f"pyvis version: {pyvis.__version__}")
Comment thread
JohannesKersting marked this conversation as resolved.
Outdated

logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
if not args.module.is_file():
logger.error(f"The given input file {args.file_in} was not found!")
Expand Down
7 changes: 7 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ process {
]
}

withName: 'MULTIQCFORMATTER' {
publishDir = [
path: { "${params.outdir}/mqc_summaries"},
mode: params.publish_dir_mode,
saveas: {filename -> filename.equals('versions.yml') ? null : filename}
]
}
Comment thread
JohannesKersting marked this conversation as resolved.
// Input parsing

withName: GRAPHTOOLPARSER {
Expand Down
2 changes: 1 addition & 1 deletion modules/local/graphtoolparser/main.nf
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
process GRAPHTOOLPARSER {
tag "$meta.id"
label 'process_single'

Comment thread
Schansiate marked this conversation as resolved.
input:
tuple val(meta), (path(network), stageAs: 'input/*')
val format

output:
tuple val(meta), path("*${format}*") , emit: network
tuple val(meta), path("input_network_multiqc.tsv") , emit: multiqc, optional: true
tuple val(meta), path("*node_degree_distribution.yaml") , emit: node_degree, optional: true
path "versions.yml" , emit: versions

when:
Expand Down
21 changes: 21 additions & 0 deletions modules/local/multiqcformatter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
process MULTIQCFORMATTER {
label 'process_single'
Comment thread
JohannesKersting marked this conversation as resolved.
input:
tuple path(header), path(inputFiles)
output:
path("*mqc*"), emit : multiqc
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
script:
"""
multiqc_formatter.py -i $inputFiles -H $header
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
graph-tool: \$(python -c "import graph_tool; print(graph_tool.__version__)")
END_VERSIONS
"""

}
11 changes: 10 additions & 1 deletion workflows/diseasemodulediscovery.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//
include { INPUTCHECK } from '../modules/local/inputcheck/main'
include { GRAPHTOOLPARSER } from '../modules/local/graphtoolparser/main'
include { MULTIQCFORMATTER } from '../modules/local/multiqcformatter/main'
include { NETWORKANNOTATION } from '../modules/local/networkannotation/main'
include { SAVEMODULES } from '../modules/local/savemodules/main'
include { VISUALIZEMODULES } from '../modules/local/visualizemodules/main'
Expand Down Expand Up @@ -146,8 +147,16 @@ workflow DISEASEMODULEDISCOVERY {
)
ch_multiqc_files = ch_multiqc_files.mix(ch_network_multiqc)
ch_network_gt = GRAPHTOOLPARSER.out.network
ch_node_degree_distribution = GRAPHTOOLPARSER.out.node_degree


MULTIQCFORMATTER(
ch_node_degree_distribution.map{_meta, path -> path}.collect().map{networks ->
def header = new File("$projectDir/assets/network_node_degree_distribution_header.yaml").toPath()
[header, networks]
}
)
ch_multiqc_files = ch_multiqc_files.mix(MULTIQCFORMATTER.out.multiqc)
ch_versions = ch_versions.mix(MULTIQCFORMATTER.out.versions)
Comment thread
Schansiate marked this conversation as resolved.
Outdated
// Check input
// channel: [ val(meta[id,seeds_id,network_id]), path(seeds), path(network) ]
ch_seeds_network = ch_seeds
Expand Down
Loading