Skip to content

Commit 0eabab2

Browse files
authored
Merge pull request #2424 from ithiele/it_04_07_2024
Changes to metaboreport and WBM modelling
2 parents 4978a98 + 80847f4 commit 0eabab2

14 files changed

+272
-94
lines changed

src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m

+12-4
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
assoR = [];
2626
for i = 1 : length(model.grRules)
27-
cnt = 0;
27+
cnt = 0;
2828
if ~isempty(strfind(model.grRules{i},gene))
29-
3029
% case 1 - 1 gene
3130
if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match
3231
assoR(i,1)=1;
@@ -37,12 +36,18 @@
3736
assoR(i,1)=1;
3837
end
3938
elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or '
40-
39+
4140
[geneTok] = strtok(gene,'.');
41+
geneTok = regexprep(geneTok,'\(','');
42+
geneTok = regexprep(geneTok,'\)','');
43+
geneTok = regexprep(geneTok,' ','');
4244
if isempty(strfind(model.grRules{i},{' and '})) % only 'or's
4345
[c,d] = split(model.grRules{i},' or ');
4446
for j = 1 : length(c)
4547
cTok = strtok(c{j},'.');
48+
cTok = regexprep(cTok,'\(','');
49+
cTok = regexprep(cTok,'\)','');
50+
cTok = regexprep(cTok,' ','');
4651
if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match
4752
cnt = cnt +1;
4853
end
@@ -51,7 +56,7 @@
5156
if cnt == length(c) % all genes in or are alt splice forms
5257
assoR(i,1)=1;
5358
end
54-
elseif cnt>0
59+
elseif cnt>0
5560
assoR(i,1)=1;
5661
end
5762
else % contains 'and'
@@ -61,6 +66,9 @@
6166
[a,b] = split(c{j},' and '); % split the 'and's
6267
for k = 1 : length(a)
6368
aTok = strtok(a{k},'.');
69+
aTok = regexprep(aTok,'\(','');
70+
aTok = regexprep(aTok,'\)','');
71+
aTok = regexprep(aTok,' ','');
6472
if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match
6573
cnt = cnt +1;
6674
end

src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m

+26-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor)
1+
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences)
22
% This function add reaction(s) to the whole-body metabolic model,
33
% including the required coupling constraint.
44
% This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C')
@@ -13,15 +13,29 @@
1313
% gprs List of grRules
1414
% subSystems List of subSystems
1515
% couplingFactor Coupling factor to be added, default 20000
16+
% rxnNotes List of notes for the reactions (optional)
17+
% rxnReferences List of references for the reactions (optional)
1618
%
1719
% OUTPUT
1820
% model Updated model structure
1921
%
2022
% Ines Thiele 2018
23+
% IT - added gpr rules to be properly taken into account
2124

22-
if ~exist('couplingFactor','var')
25+
26+
if ~exist('couplingFactor','var') || ~isempty(couplingFactor)
2327
couplingFactor = 20000;
2428
end
29+
if ~exist('rxnNotes','var') || isempty(rxnNotes)
30+
rxnNotesPresent = 0;
31+
else
32+
rxnNotesPresent = 1;
33+
end
34+
if ~exist('rxnReferences','var') || isempty(rxnReferences)
35+
rxnRefPresent = 0;
36+
else
37+
rxnRefPresent = 1;
38+
end
2539

2640
for i = 1 : length(rxnAbbrs)
2741

@@ -31,8 +45,17 @@
3145
model = addReaction(model,rxnAbbrs{i},reactions{i});
3246
A = strmatch(rxnAbbrs(i),model.rxns,'exact');
3347
model.subSystems(A) = subSystems(i);
34-
model.grRules(A) = gprs(i);
48+
%model.grRules(A) = gprs(i);
49+
if ~isempty(gprs{i})
50+
model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0);
51+
end
3552
model.rxnNames(A) = rxnNames(i);
53+
if isfield(model,'rxnNotes') && rxnNotesPresent == 1
54+
model.rxnNotes(A) = rxnNotes(i);
55+
end
56+
if isfield(model,'rxnReferences') && rxnRefPresent == 1
57+
model.rxnReferences(A) = rxnReferences(i);
58+
end
3659
[token,rem] = strtok(rxnAbbrs{i},'_');
3760
% find organ biomass
3861
if strcmp(token,'sIEC')

src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m

+60-65
Original file line numberDiff line numberDiff line change
@@ -131,71 +131,66 @@
131131
% Author:
132132
% - Tim Hensen, August 2024
133133

134-
if isempty(searchDirectory)
135-
searchDirectory = what('2020_WholeBodyModelling\Data').path;
136-
end
137-
138-
if nargin<3
139-
excludeVersion = '';
140-
end
141-
142-
% Find latest harvery/harvetta models
143-
WBMs = what(searchDirectory).mat;
144-
145-
% Check if any WBMs can be found
146-
if isempty(WBMs)
147-
error('No WBM .mat files found in folder.')
148-
end
149-
150-
% Remove .mat
151-
WBMs = erase(WBMs,'.mat');
152-
153-
% Exclude WBMs if needed
154-
if ~isempty(excludeVersion)
155-
WBMs(excludeVersion)=[];
156-
end
157-
158-
% Filter on Harvey or Harvetta
159-
WBMs(~contains(WBMs,filename))=[];
160-
161-
% Find version numbers in reconstruction names
162-
modelNumbers = regexp(WBMs,'[0-9]','match');
163-
164-
% Produce 2 column numerical array with the major version in the first
165-
% column and the minor version in the second column.
166-
modelNumbers = string(vertcat(modelNumbers{:}));
167-
modelNumbers = str2double(horzcat(modelNumbers(:,1), strcat(modelNumbers(:,2), modelNumbers(:,3))));
168-
169-
% Add the version letter
170-
letterVersions = string(regexp(WBMs,'(?<=\d)[a-zA-Z]','match'));
171-
% Convert lettes to numbers using ascii table
172-
modelNumbers = [modelNumbers double(char(letterVersions))-96]; % https://www.asciitable.com/
173-
174-
% Find latest version
175-
checkLatest = @(x) max(x) == x;
176-
177-
% Find versions with the latest major release
178-
latestMajor = checkLatest(modelNumbers(:,1));
179-
if sum(latestMajor)==1
180-
% Select model if only one entry has the highest major release
181-
nameOfWBM = WBMs(latestMajor);
182-
else
183-
% Remove all entries without the latest major release
184-
modelNumbers(~latestMajor,:)=[];
185-
WBMs(~latestMajor)=[];
186-
% Find entries with the latest minor release
187-
latestMinor = checkLatest(modelNumbers(:,2));
188-
if sum(latestMinor)==1
189-
% Select model if only one entry has the highest major release
190-
nameOfWBM = WBMs(latestMinor);
191-
else
192-
% Remove all entries without the latest minor release
193-
modelNumbers(~latestMinor,:)=[];
194-
WBMs(~latestMinor)=[];
195-
% Find entries with the latest letter release
196-
latestLetter = checkLatest(modelNumbers(:,3));
197-
if sum(latestLetter)==1
198-
nameOfWBM = WBMs(latestLetter);
134+
% global useSolveCobraLPCPLEX
135+
% useSolveCobraLPCPLEX
136+
137+
useReadCbModel = 0;
138+
switch fileName
139+
case 'Harvey'
140+
if useSolveCobraLPCPLEX
141+
%COBRA v2 format
142+
load Harvey_1_01c
143+
144+
male.subSystems(strmatch('Transport, endoplasmic reticular',male.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
145+
male.subSystems(strmatch('Arginine and Proline Metabolism',male.subSystems,'exact'))={'Arginine and proline Metabolism'};
146+
male.subSystems(strmatch(' ',male.subSystems,'exact'))={'Miscellaneous'};
147+
148+
if 1
149+
%convert to v3 format except for coupling constraints
150+
male = convertOldStyleModel(male,0,0);
151+
end
152+
else
153+
if useReadCbModel
154+
male = readCbModel('Harvey_1_03c', 'fileType','Matlab', 'modelName', 'male');
155+
else
156+
%COBRA v3 format
157+
%load Harvey_1_02c
158+
try
159+
load Harvey_1_03d
160+
catch
161+
load Harvey_1_03c
162+
end
163+
end
164+
end
165+
if isfield(male,'gender')
166+
male.sex = male.gender;
167+
male = rmfield(male,'gender');
168+
else
169+
male.sex = 'male';
170+
end
171+
if isfield(male,'rxnGeneMat')
172+
male = rmfield(male,'rxnGeneMat');
173+
end
174+
variable = male;
175+
case 'Harvetta'
176+
if useSolveCobraLPCPLEX
177+
%COBRA v2 format
178+
try
179+
load Harvetta_1_01d
180+
catch
181+
load Harvetta_1_01c
182+
end
183+
female.subSystems(strmatch('Transport, endoplasmic reticular',female.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
184+
female.subSystems(strmatch('Arginine and Proline Metabolism',female.subSystems,'exact'))={'Arginine and proline Metabolism'};
185+
female.subSystems(strmatch(' ',female.subSystems,'exact'))={'Miscellaneous'};
186+
187+
if 1
188+
%convert to v3 format except for coupling constraints
189+
female = convertOldStyleModel(female,0,0);
190+
end
191+
else
192+
if useReadCbModel
193+
female = readCbModel('Harvetta_1_03c', 'fileType','Matlab', 'modelName', 'male');
199194
else
200195
error('No single latest model could be found')
201196
end

src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
model.lb(find(ismember(model.rxns,'biomass_reaction')))=0;
9999
model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0;
100100
model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0;
101+
model.lb(find(contains(model.rxns,'biomass')))=0;
101102

102103

103104
TestSolutionNameOpenSinks ='';

src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222

2323
for i = startSearch : endSearch
24+
i
25+
2426
% remove spaces in keggIds
2527
if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId)
2628
metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s','');

src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m

+3-2
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,10 @@
7575
for i = startSearch : endSearch
7676
% use Kegg as query term
7777
% if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1))
78-
i
78+
progress = i/(endSearch-startSearch+1);
79+
fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']);
7980
for z = 1 : size(mapping,1)
80-
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
81+
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
8182
% search for exact term
8283
try
8384
% check if the field contains a list, if not go ahead with

src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
startSearch = 1;
2222
end
2323
if ~exist('endSearch','var')
24+
F = fieldnames(metabolite_structure);
2425
endSearch = length(F);
2526
end
2627

src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
fields = fieldnames(metabolite_structure.(Mets{1}));
4848

4949
for i = startSearch : endSearch
50+
51+
progress = i/(endSearch-startSearch+1);
52+
fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']);
5053
if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1))
5154
% check that smile or inchiKey does not exist
5255
% go to chebi and parse website for smile

src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m

+17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch)
22

3+
% This function searches kegg for identifiers. It will either use
4+
% kegg ids provided by the metabolite structure.
5+
%
6+
% INPUT
7+
% metabolite_structure metabolite structure
8+
% startSearch specify where the search should start in the
9+
% metabolite structure. Must be numeric (optional, default: all metabolites
10+
% in the structure will be search for)
11+
% endSearch specify where the search should end in the
12+
% metabolite structure. Must be numeric (optional, default: all metabolites
13+
% in the structure will be search for)
14+
%
15+
% OUTPUT
16+
% metabolite_structure updated metabolite structure
17+
%
18+
%
19+
% Ines Thiele, 09/2021
320

421
annotationSource = 'Kegg website';
522
annotationType = 'automatic';

src/reconstruction/metaboRePort/reportTemplate.html

+3-1
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,8 @@ <h4>Overall score</h4>
23052305
</div>
23062306
</div>
23072307
</div>
2308+
<br>
2309+
<br>
23082310
</section>
23092311

23102312

@@ -2323,7 +2325,7 @@ <h4>Overall score</h4>
23232325
<p >Powered by the <a href="https://vmh.life" target="_blank" style="color: #FFFFFF">VMH</a> and the
23242326
<a href="https://opencobra.github.io/cobratoolbox/stable/" target="_blank" style="color: #FFFFFF"> COBRA Toolbox</a> </p>
23252327

2326-
<p >Copyright &copy; 2022 <a href="https://thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
2328+
<p >Copyright &copy; 2024 <a href="http://www.thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
23272329
</div>
23282330
<!-- <div class="col">
23292331
<p>

0 commit comments

Comments
 (0)