-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_pathways_from_reactions.py
125 lines (103 loc) · 4.23 KB
/
add_pathways_from_reactions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sys
import os
import cobra
import pandas as pd
from tqdm import tqdm
import memote
import libsbml
import helper_functions as hf
from bioservices.kegg import KEGG
'''
Usage: add_genes_from_kegg.py <path_input_sbml-file> <path_output_sbml-file>
<path_output_tsv-file_changes_pathways> <path_memote-report> <name_organism>
'''
def main(args):
# console access
if len(args) != 5:
print(main.__doc__)
sys.exit(1)
infile = args[1]
outfile = args[2]
outfile_tsv = args[3]
memote_report = args[4]
name_organism = args[5]
if not os.path.exists(infile):
print("[Error] %s : No such file." % infile)
sys.exit(1)
# create Readers and Writers
reader = libsbml.SBMLReader()
writer = libsbml.SBMLWriter()
kegg = KEGG()
# Read SBML File
doc = reader.readSBML(infile)
model = doc.getModel()
# find organism
req = kegg.lookfor_organism(name_organism)[0].split(' ')
org_id = req[1] # 'fma'
model_cobra = cobra.io.read_sbml_model(infile)
changes_pathways = pd.DataFrame(columns=["pos", "gene_id", "pathway"])
start = 0
# accessing previous progress
if os.path.exists(outfile_tsv):
changes_pathways_old = pd.read_csv(outfile_tsv, sep="\t", index_col=0)
start = changes_pathways_old.loc[len(changes_pathways_old.index)-1, "pos"]
# -- Pathway annotation via KEGG
reac_num = model.getNumReactions()
for i in tqdm(range(start, reac_num)):
pathways = dict()
if "kegg.reaction" in model_cobra.reactions[i].annotation:
reac_ids = model_cobra.reactions[i].annotation["kegg.reaction"]
if not isinstance(reac_ids, list):
reac_ids = [reac_ids]
for reac_id in reac_ids:
reaction = kegg.parse(kegg.get(reac_id))
if "PATHWAY" in reaction:
pathway = reaction["PATHWAY"]
pathways.update(pathway)
genes = model_cobra.reactions[i].genes
for gene in genes:
if "kegg.genes" in gene.annotation:
locus_tags = gene.annotation["kegg.genes"]
if not isinstance(locus_tags, list):
locus_tags = [locus_tags]
for locus_tag in locus_tags:
locus_tag = locus_tag.split(":")[1]
pathway = kegg.get_pathway_by_gene(locus_tag, org_id)
if pathway is not None:
pathways.update(pathway)
if pathways is not None:
changes_pathways.loc[len(changes_pathways.index)] = [i, model_cobra.reactions[i].id, pathways]
for pathway_id_kegg in pathways.keys():
lnk = f"https://identifiers.org/kegg.pathway/{pathway_id_kegg}"
model = hf.add_link_annotation_reaction(model, lnk, libsbml.BQB_OCCURS_IN, i)
if i % 100 == 99:
# Export model
doc.setModel(model)
writer.writeSBML(doc, outfile)
# Export progress
if os.path.exists(outfile_tsv):
changes_pathways_old = pd.read_csv(outfile_tsv, sep="\t", index_col=0)
changes_pathways = pd.concat([changes_pathways_old, changes_pathways])
changes_pathways.reset_index(drop=True, inplace=True)
changes_pathways_old = None
changes_pathways.to_csv(outfile_tsv, sep="\t")
changes_pathways = pd.DataFrame(columns=["pos", "gene_id", "pathway"])
# Export model
doc.setModel(model)
writer.writeSBML(doc, outfile)
# Export changes
if os.path.exists(outfile_tsv):
changes_pathways_old = pd.read_csv(outfile_tsv, sep="\t", index_col=0)
changes_pathways = pd.concat([changes_pathways_old, changes_pathways])
changes_pathways.reset_index(drop=True, inplace=True)
changes_pathways_old = None
changes_pathways.to_csv(outfile_tsv, sep="\t")
# Import model as cobra model for memote
model = cobra.io.read_sbml_model(outfile)
# Make memote report
result = memote.test_model(model, results=True)
report = memote.snapshot_report(result[1], config=None, html=True)
with open(memote_report, "w") as handle:
handle.write(report)
if __name__ == '__main__':
main(sys.argv)