-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbalance_analysis.py
104 lines (82 loc) · 3.91 KB
/
balance_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import sys
import os
import cobra
import re
import pandas as pd
from tqdm import tqdm
import helper_functions as hf
'''
Usage: balance_analysis.py <path_input_sbml-file> <path_output_tsv-file_imbalances>
The output is a .tsv-format table with the columns: "model_index", "reaction_name", "imbalances",
"reaction_string", "formulas", "frequent_compound"
The last row of the table contains a list of compounds that consistently produce the same problem.
'''
def main(args):
# console access
if len(args) != 3:
print(main.__doc__)
sys.exit(1)
infile = args[1]
outfile = args[2]
if not os.path.exists(infile):
print("[Error] %s : No such file." % infile)
sys.exit(1)
# create Readers and Writers
model = cobra.io.sbml.read_sbml_model(infile)
# check mass balance:
unbalanced_list = pd.DataFrame(columns=["model_index", "reaction_name", "imbalances", "reaction_string",
"formulas", "frequent_compound"])
compound_counter = dict()
for i in tqdm(range(len(model.reactions))):
reaction = model.reactions[i]
imbalances = reaction.check_mass_balance()
formulas = ""
charges = ""
# Makes human readable and interpretable lists, when imbalances exist
if len(imbalances) > 0 and re.search("EX_|sink_|Growth", reaction.id) is None:
reactants, operator, products = re.split("(-->|<=>|<--)", reaction.build_reaction_string())
metabolites = list(reaction.metabolites.keys())
for m in metabolites:
if m.id in compound_counter.keys():
compound_counter[m.id].append(i)
else:
compound_counter[m.id] = [i]
formulas = formulas + m.formula + f"({m.charge}),\n"
formulas = formulas[:-2]
reactants, operator, products = re.split("(-->|<=>|<--)", reaction.build_reaction_string())
reac_str = reactants + operator + "\n" + products
unbalanced_list.loc[len(unbalanced_list.index)] = [i, reaction.id, imbalances, reac_str, formulas, ""]
# Extract frequent compounds, that are present with the same imbalances in multiple cases
frequent_compounds = []
for cckey, ccvalues in compound_counter.items():
list_imbalances = []
for pos in ccvalues:
imbalances = list(unbalanced_list.loc[unbalanced_list.model_index == pos, "imbalances"])[0]
negative_imbalances = dict()
for k, v in imbalances.items():
negative_imbalances[k] = -v
if imbalances in list_imbalances or negative_imbalances in list_imbalances:
frequent_compounds.append(cckey)
unbalanced_list.loc[unbalanced_list.model_index == pos, "frequent_compound"] = \
unbalanced_list.loc[unbalanced_list.model_index == pos, "frequent_compound"] + cckey + ", "
list_imbalances.append(imbalances)
# Make table human-readable
frequent_compounds = hf.delete_doubles(frequent_compounds)
fc_str = ""
for i in range(len(frequent_compounds)):
if i % 5 == 4:
fc_str = fc_str + str(frequent_compounds[i]) + ",\n"
else:
fc_str = fc_str + str(frequent_compounds[i]) + ","
for i in range(len(unbalanced_list.index)):
imbalances_str = ""
for key in unbalanced_list.loc[i, "imbalances"].keys():
imbalances_str = imbalances_str + str(key) + ": " + str(unbalanced_list.loc[i, "imbalances"][key]) + "\n"
imbalances_str = imbalances_str[:-1]
unbalanced_list.loc[i, "imbalances"] = imbalances_str
unbalanced_list.loc[len(unbalanced_list.index)] = [-1, "Recurring imbalances:",
fc_str, len(frequent_compounds), "", ""]
# export list
unbalanced_list.to_csv(outfile, sep="\t")
if __name__ == '__main__':
main(sys.argv)