Skip to content

Commit 0227831

Browse files
committed
Changes to metaboreport and WBM modelling
1 parent f8e9ef7 commit 0227831

15 files changed

+215
-32
lines changed

src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m

+12-4
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
assoR = [];
2626
for i = 1 : length(model.grRules)
27-
cnt = 0;
27+
cnt = 0;
2828
if ~isempty(strfind(model.grRules{i},gene))
29-
3029
% case 1 - 1 gene
3130
if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match
3231
assoR(i,1)=1;
@@ -37,12 +36,18 @@
3736
assoR(i,1)=1;
3837
end
3938
elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or '
40-
39+
4140
[geneTok] = strtok(gene,'.');
41+
geneTok = regexprep(geneTok,'\(','');
42+
geneTok = regexprep(geneTok,'\)','');
43+
geneTok = regexprep(geneTok,' ','');
4244
if isempty(strfind(model.grRules{i},{' and '})) % only 'or's
4345
[c,d] = split(model.grRules{i},' or ');
4446
for j = 1 : length(c)
4547
cTok = strtok(c{j},'.');
48+
cTok = regexprep(cTok,'\(','');
49+
cTok = regexprep(cTok,'\)','');
50+
cTok = regexprep(cTok,' ','');
4651
if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match
4752
cnt = cnt +1;
4853
end
@@ -51,7 +56,7 @@
5156
if cnt == length(c) % all genes in or are alt splice forms
5257
assoR(i,1)=1;
5358
end
54-
elseif cnt>0
59+
elseif cnt>0
5560
assoR(i,1)=1;
5661
end
5762
else % contains 'and'
@@ -61,6 +66,9 @@
6166
[a,b] = split(c{j},' and '); % split the 'and's
6267
for k = 1 : length(a)
6368
aTok = strtok(a{k},'.');
69+
aTok = regexprep(aTok,'\(','');
70+
aTok = regexprep(aTok,'\)','');
71+
aTok = regexprep(aTok,' ','');
6472
if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match
6573
cnt = cnt +1;
6674
end

src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m

+26-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor)
1+
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences)
22
% This function add reaction(s) to the whole-body metabolic model,
33
% including the required coupling constraint.
44
% This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C')
@@ -13,15 +13,29 @@
1313
% gprs List of grRules
1414
% subSystems List of subSystems
1515
% couplingFactor Coupling factor to be added, default 20000
16+
% rxnNotes List of notes for the reactions (optional)
17+
% rxnReferences List of references for the reactions (optional)
1618
%
1719
% OUTPUT
1820
% model Updated model structure
1921
%
2022
% Ines Thiele 2018
23+
% IT - added gpr rules to be properly taken into account
2124

22-
if ~exist('couplingFactor','var')
25+
26+
if ~exist('couplingFactor','var') || ~isempty(couplingFactor)
2327
couplingFactor = 20000;
2428
end
29+
if ~exist('rxnNotes','var') || isempty(rxnNotes)
30+
rxnNotesPresent = 0;
31+
else
32+
rxnNotesPresent = 1;
33+
end
34+
if ~exist('rxnReferences','var') || isempty(rxnReferences)
35+
rxnRefPresent = 0;
36+
else
37+
rxnRefPresent = 1;
38+
end
2539

2640
for i = 1 : length(rxnAbbrs)
2741

@@ -31,8 +45,17 @@
3145
model = addReaction(model,rxnAbbrs{i},reactions{i});
3246
A = strmatch(rxnAbbrs(i),model.rxns,'exact');
3347
model.subSystems(A) = subSystems(i);
34-
model.grRules(A) = gprs(i);
48+
%model.grRules(A) = gprs(i);
49+
if ~isempty(gprs{i})
50+
model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0);
51+
end
3552
model.rxnNames(A) = rxnNames(i);
53+
if isfield(model,'rxnNotes') && rxnNotesPresent == 1
54+
model.rxnNotes(A) = rxnNotes(i);
55+
end
56+
if isfield(model,'rxnReferences') && rxnRefPresent == 1
57+
model.rxnReferences(A) = rxnReferences(i);
58+
end
3659
[token,rem] = strtok(rxnAbbrs{i},'_');
3760
% find organ biomass
3861
if strcmp(token,'sIEC')

src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
% OUTPUT:
1313
% variable: matlab variable returned
1414

15-
global useSolveCobraLPCPLEX
16-
useSolveCobraLPCPLEX
15+
% global useSolveCobraLPCPLEX
16+
% useSolveCobraLPCPLEX
1717

1818
useReadCbModel = 0;
1919
switch fileName

src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
model.lb(find(ismember(model.rxns,'biomass_reaction')))=0;
9999
model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0;
100100
model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0;
101+
model.lb(find(contains(model.rxns,'biomass')))=0;
101102

102103

103104
TestSolutionNameOpenSinks ='';

src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222

2323
for i = startSearch : endSearch
24+
i
25+
2426
% remove spaces in keggIds
2527
if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId)
2628
metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s','');

src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m

+3-2
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,10 @@
7575
for i = startSearch : endSearch
7676
% use Kegg as query term
7777
% if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1))
78-
i
78+
progress = i/(endSearch-startSearch+1);
79+
fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']);
7980
for z = 1 : size(mapping,1)
80-
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
81+
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
8182
% search for exact term
8283
try
8384
% check if the field contains a list, if not go ahead with

src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
startSearch = 1;
2222
end
2323
if ~exist('endSearch','var')
24+
F = fieldnames(metabolite_structure);
2425
endSearch = length(F);
2526
end
2627

src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
fields = fieldnames(metabolite_structure.(Mets{1}));
4848

4949
for i = startSearch : endSearch
50+
51+
progress = i/(endSearch-startSearch+1);
52+
fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']);
5053
if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1))
5154
% check that smile or inchiKey does not exist
5255
% go to chebi and parse website for smile

src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m

+17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch)
22

3+
% This function searches kegg for identifiers. It will either use
4+
% kegg ids provided by the metabolite structure.
5+
%
6+
% INPUT
7+
% metabolite_structure metabolite structure
8+
% startSearch specify where the search should start in the
9+
% metabolite structure. Must be numeric (optional, default: all metabolites
10+
% in the structure will be search for)
11+
% endSearch specify where the search should end in the
12+
% metabolite structure. Must be numeric (optional, default: all metabolites
13+
% in the structure will be search for)
14+
%
15+
% OUTPUT
16+
% metabolite_structure updated metabolite structure
17+
%
18+
%
19+
% Ines Thiele, 09/2021
320

421
annotationSource = 'Kegg website';
522
annotationType = 'automatic';

src/reconstruction/metaboRePort/generateMetaboScore.m

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
modelProp.Details.SinkRxns = [model.rxns(contains(model.rxns,'Sink_'));model.rxns(contains(model.rxns,'sink_'))];
9696
% biomass reactions
9797
modelProp.BiomassRxns = BioR;
98-
modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns,'biomass')));% exclude EX_biomass?
98+
modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns),'biomass'));% exclude EX_biomass?
9999

100100
modelProp.MetabolicRxns = MetR;
101101
modelProp.Details.MetabolicRxns =MetRxns';

src/reconstruction/metaboRePort/reportTemplate.html

+3-1
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,8 @@ <h4>Overall score</h4>
23052305
</div>
23062306
</div>
23072307
</div>
2308+
<br>
2309+
<br>
23082310
</section>
23092311

23102312

@@ -2323,7 +2325,7 @@ <h4>Overall score</h4>
23232325
<p >Powered by the <a href="https://vmh.life" target="_blank" style="color: #FFFFFF">VMH</a> and the
23242326
<a href="https://opencobra.github.io/cobratoolbox/stable/" target="_blank" style="color: #FFFFFF"> COBRA Toolbox</a> </p>
23252327

2326-
<p >Copyright &copy; 2022 <a href="https://thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
2328+
<p >Copyright &copy; 2024 <a href="http://www.thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
23272329
</div>
23282330
<!-- <div class="col">
23292331
<p>

src/reconstruction/metaboRePort/tutorial_MetaboRePort.m

+32-17
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
%% MetaboRePort:
1+
%% MetaboRePort:
22

33
% Set path to the cobratoolbox
44
global CBTDIR
55

6+
currentDir = pwd;
67
% Set root directory
7-
root = '';
8+
root = '/Users/ines/Dropbox/MY PAPERS/SUBMITTED/Submitted/150k/metaboReports/APOLLOreconstructions';
89

910
% user defined path
1011
folder = [root filesep 'refinedReconstructions']; % Set path to folder with reconstructions
@@ -33,21 +34,25 @@
3334

3435
% Ensure that the name of the rBioNet metabolite structure is metabolite_structure_rBioNet
3536
metabolite_structure_rBioNet = load(metstructPath);
36-
metabolite_structure_rBioNet = metabolite_structure_rBioNet.(string(fieldnames(metabolite_structure_rBioNet)));
37+
metabolite_structure_rBioNet = metabolite_structure_rBioNet.metabolite_structure_rBioNet;
3738

3839
% Get reconstructions and reconstruction paths
3940
directory = what(folder);
4041
modelPaths = append(directory.path, filesep, directory.mat);
41-
modelList = getModelPaths(folder);
42+
modelList = modelPaths;
43+
modelList(~contains(modelList(:,1),'.mat'),:)=[];
44+
4245

4346
% Preallocate ScoresOverall table for speed
47+
if ~exist(ScoresOverall,'var')
4448
ScoresOverall = cell(length(modelList),2);
49+
end
4550

4651
tic;
47-
for i = 1 : length(modelList)
52+
for i = 1 : 100%length(modelList)
4853
disp(i)
4954
% Load model
50-
model = load(modelPaths(i));
55+
model = load(modelPaths{i});
5156
model = model.(string(fieldnames(model))); % ensure that the name of the loaded model is "model".
5257

5358
%[modelProp1,ScoresOverall1] = generateMemoteLikeScore(model);
@@ -63,10 +68,15 @@
6368

6469
[modelProp2,ScoresOverall2] = generateMetaboScore(modelUpdated);
6570

66-
modelProperties.(regexprep(modelList{i},'.mat','')).ScoresOverall = ScoresOverall2;
67-
modelProperties.(regexprep(modelList{i},'.mat','')).modelUpdated = modelUpdated;
68-
modelProperties.(regexprep(modelList{i},'.mat','')).modelProp2 = modelProp2;
69-
ScoresOverall{i,1} = regexprep(modelList{i},'.mat','');
71+
chdir(strcat(updatedReconstructPath, filesep));
72+
fileName = regexprep(modelList{i},folder,'');% replace folder name if present
73+
fileName = regexprep(fileName,'\/','');% replace folder name if present
74+
fileName = regexprep(fileName,'.mat','');
75+
modelName = strcat('model_',fileName);
76+
modelProperties.(modelName).ScoresOverall = ScoresOverall2;
77+
modelProperties.(modelName).modelUpdated = modelUpdated;
78+
modelProperties.(modelName).modelProp2 = modelProp2;
79+
ScoresOverall{i,1} = regexprep(fileName,'.mat','');
7080
ScoresOverall{i,2} = num2str(ScoresOverall2);
7181

7282
if mod(i,10) % Save every ten models
@@ -75,20 +85,25 @@
7585

7686
% save updated mat file
7787
model = modelUpdated;
78-
save(strcat(updatedReconstructPath, filesep, modelList(i), '.mat'),'model');
88+
if ~contains(fileName,'.mat')
89+
save(strcat(fileName, '.mat'),'model');
90+
else
91+
save(fileName,'model');
92+
end
93+
chdir(currentDir)
7994

80-
%% generate sbml file
95+
% generate sbml file
8196
% remove description from model structure as this causes issues
8297
if any(contains(fieldnames(modelUpdated), {'description'}))
8398
modelUpdated = rmfield(modelUpdated,'description');
8499
end
85100

86101
% Set sbml path
87-
sbmlPath = char(strcat(annotatedSBMLreconstructions, filesep, 'Annotated_',modelList(i)));
102+
sbmlPath = char(strcat(annotatedSBMLreconstructions, filesep, 'Annotated_',fileName));
88103
% Save model
89104
outmodel = writeCbModel(modelUpdated, 'format','sbml', 'fileName', sbmlPath);
90-
end
91-
toc;
92-
105+
93106
% Generate a generateMetaboReport for each reconstruction
94-
evalc('generateMetaboReport(modelProperties,reportDir)');
107+
evalc('generateMetaboReport(modelProperties,reportDir)');
108+
end
109+
toc;

0 commit comments

Comments
 (0)