Skip to content

Commit

Permalink
Changes to metaboreport and WBM modelling
Browse files Browse the repository at this point in the history
  • Loading branch information
ithiele committed Feb 7, 2025
1 parent f8e9ef7 commit 0227831
Show file tree
Hide file tree
Showing 15 changed files with 215 additions and 32 deletions.
16 changes: 12 additions & 4 deletions src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@

assoR = [];
for i = 1 : length(model.grRules)
cnt = 0;
cnt = 0;
if ~isempty(strfind(model.grRules{i},gene))

% case 1 - 1 gene
if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match
assoR(i,1)=1;
Expand All @@ -37,12 +36,18 @@
assoR(i,1)=1;
end
elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or '

[geneTok] = strtok(gene,'.');
geneTok = regexprep(geneTok,'\(','');
geneTok = regexprep(geneTok,'\)','');
geneTok = regexprep(geneTok,' ','');
if isempty(strfind(model.grRules{i},{' and '})) % only 'or's
[c,d] = split(model.grRules{i},' or ');
for j = 1 : length(c)
cTok = strtok(c{j},'.');
cTok = regexprep(cTok,'\(','');
cTok = regexprep(cTok,'\)','');
cTok = regexprep(cTok,' ','');
if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match
cnt = cnt +1;
end
Expand All @@ -51,7 +56,7 @@
if cnt == length(c) % all genes in or are alt splice forms
assoR(i,1)=1;
end
elseif cnt>0
elseif cnt>0
assoR(i,1)=1;
end
else % contains 'and'
Expand All @@ -61,6 +66,9 @@
[a,b] = split(c{j},' and '); % split the 'and's
for k = 1 : length(a)
aTok = strtok(a{k},'.');
aTok = regexprep(aTok,'\(','');
aTok = regexprep(aTok,'\)','');
aTok = regexprep(aTok,' ','');
if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match
cnt = cnt +1;
end
Expand Down
29 changes: 26 additions & 3 deletions src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor)
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences)
% This function add reaction(s) to the whole-body metabolic model,
% including the required coupling constraint.
% This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C')
Expand All @@ -13,15 +13,29 @@
% gprs List of grRules
% subSystems List of subSystems
% couplingFactor Coupling factor to be added, default 20000
% rxnNotes List of notes for the reactions (optional)
% rxnReferences List of references for the reactions (optional)
%
% OUTPUT
% model Updated model structure
%
% Ines Thiele 2018
% IT - added gpr rules to be properly taken into account

if ~exist('couplingFactor','var')

if ~exist('couplingFactor','var') || ~isempty(couplingFactor)
couplingFactor = 20000;
end
if ~exist('rxnNotes','var') || isempty(rxnNotes)
rxnNotesPresent = 0;
else
rxnNotesPresent = 1;
end
if ~exist('rxnReferences','var') || isempty(rxnReferences)
rxnRefPresent = 0;
else
rxnRefPresent = 1;
end

for i = 1 : length(rxnAbbrs)

Expand All @@ -31,8 +45,17 @@
model = addReaction(model,rxnAbbrs{i},reactions{i});
A = strmatch(rxnAbbrs(i),model.rxns,'exact');
model.subSystems(A) = subSystems(i);
model.grRules(A) = gprs(i);
%model.grRules(A) = gprs(i);
if ~isempty(gprs{i})
model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0);
end
model.rxnNames(A) = rxnNames(i);
if isfield(model,'rxnNotes') && rxnNotesPresent == 1
model.rxnNotes(A) = rxnNotes(i);
end
if isfield(model,'rxnReferences') && rxnRefPresent == 1
model.rxnReferences(A) = rxnReferences(i);
end
[token,rem] = strtok(rxnAbbrs{i},'_');
% find organ biomass
if strcmp(token,'sIEC')
Expand Down
4 changes: 2 additions & 2 deletions src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
% OUTPUT:
% variable: matlab variable returned

global useSolveCobraLPCPLEX
useSolveCobraLPCPLEX
% global useSolveCobraLPCPLEX
% useSolveCobraLPCPLEX

useReadCbModel = 0;
switch fileName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
model.lb(find(ismember(model.rxns,'biomass_reaction')))=0;
model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0;
model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0;
model.lb(find(contains(model.rxns,'biomass')))=0;


TestSolutionNameOpenSinks ='';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@


for i = startSearch : endSearch
i

% remove spaces in keggIds
if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId)
metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s','');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@
for i = startSearch : endSearch
% use Kegg as query term
% if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1))
i
progress = i/(endSearch-startSearch+1);
fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']);
for z = 1 : size(mapping,1)
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
% search for exact term
try
% check if the field contains a list, if not go ahead with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
startSearch = 1;
end
if ~exist('endSearch','var')
F = fieldnames(metabolite_structure);
endSearch = length(F);
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
fields = fieldnames(metabolite_structure.(Mets{1}));

for i = startSearch : endSearch

progress = i/(endSearch-startSearch+1);
fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']);
if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1))
% check that smile or inchiKey does not exist
% go to chebi and parse website for smile
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch)

% This function searches kegg for identifiers. It will either use
% kegg ids provided by the metabolite structure.
%
% INPUT
% metabolite_structure metabolite structure
% startSearch specify where the search should start in the
% metabolite structure. Must be numeric (optional, default: all metabolites
% in the structure will be search for)
% endSearch specify where the search should end in the
% metabolite structure. Must be numeric (optional, default: all metabolites
% in the structure will be search for)
%
% OUTPUT
% metabolite_structure updated metabolite structure
%
%
% Ines Thiele, 09/2021

annotationSource = 'Kegg website';
annotationType = 'automatic';
Expand Down
2 changes: 1 addition & 1 deletion src/reconstruction/metaboRePort/generateMetaboScore.m
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
modelProp.Details.SinkRxns = [model.rxns(contains(model.rxns,'Sink_'));model.rxns(contains(model.rxns,'sink_'))];
% biomass reactions
modelProp.BiomassRxns = BioR;
modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns,'biomass')));% exclude EX_biomass?
modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns),'biomass'));% exclude EX_biomass?

modelProp.MetabolicRxns = MetR;
modelProp.Details.MetabolicRxns =MetRxns';
Expand Down
4 changes: 3 additions & 1 deletion src/reconstruction/metaboRePort/reportTemplate.html
Original file line number Diff line number Diff line change
Expand Up @@ -2305,6 +2305,8 @@ <h4>Overall score</h4>
</div>
</div>
</div>
<br>
<br>
</section>


Expand All @@ -2323,7 +2325,7 @@ <h4>Overall score</h4>
<p >Powered by the <a href="https://vmh.life" target="_blank" style="color: #FFFFFF">VMH</a> and the
<a href="https://opencobra.github.io/cobratoolbox/stable/" target="_blank" style="color: #FFFFFF"> COBRA Toolbox</a> </p>

<p >Copyright &copy; 2022 <a href="https://thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
<p >Copyright &copy; 2024 <a href="http://www.thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
</div>
<!-- <div class="col">
<p>
Expand Down
49 changes: 32 additions & 17 deletions src/reconstruction/metaboRePort/tutorial_MetaboRePort.m
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
%% MetaboRePort:
%% MetaboRePort:

% Set path to the cobratoolbox
global CBTDIR

currentDir = pwd;
% Set root directory
root = '';
root = '/Users/ines/Dropbox/MY PAPERS/SUBMITTED/Submitted/150k/metaboReports/APOLLOreconstructions';

% user defined path
folder = [root filesep 'refinedReconstructions']; % Set path to folder with reconstructions
Expand Down Expand Up @@ -33,21 +34,25 @@

% Ensure that the name of the rBioNet metabolite structure is metabolite_structure_rBioNet
metabolite_structure_rBioNet = load(metstructPath);
metabolite_structure_rBioNet = metabolite_structure_rBioNet.(string(fieldnames(metabolite_structure_rBioNet)));
metabolite_structure_rBioNet = metabolite_structure_rBioNet.metabolite_structure_rBioNet;

% Get reconstructions and reconstruction paths
directory = what(folder);
modelPaths = append(directory.path, filesep, directory.mat);
modelList = getModelPaths(folder);
modelList = modelPaths;
modelList(~contains(modelList(:,1),'.mat'),:)=[];


% Preallocate ScoresOverall table for speed
if ~exist(ScoresOverall,'var')
ScoresOverall = cell(length(modelList),2);
end

tic;
for i = 1 : length(modelList)
for i = 1 : 100%length(modelList)
disp(i)
% Load model
model = load(modelPaths(i));
model = load(modelPaths{i});
model = model.(string(fieldnames(model))); % ensure that the name of the loaded model is "model".

%[modelProp1,ScoresOverall1] = generateMemoteLikeScore(model);
Expand All @@ -63,10 +68,15 @@

[modelProp2,ScoresOverall2] = generateMetaboScore(modelUpdated);

modelProperties.(regexprep(modelList{i},'.mat','')).ScoresOverall = ScoresOverall2;
modelProperties.(regexprep(modelList{i},'.mat','')).modelUpdated = modelUpdated;
modelProperties.(regexprep(modelList{i},'.mat','')).modelProp2 = modelProp2;
ScoresOverall{i,1} = regexprep(modelList{i},'.mat','');
chdir(strcat(updatedReconstructPath, filesep));
fileName = regexprep(modelList{i},folder,'');% replace folder name if present
fileName = regexprep(fileName,'\/','');% replace folder name if present
fileName = regexprep(fileName,'.mat','');
modelName = strcat('model_',fileName);
modelProperties.(modelName).ScoresOverall = ScoresOverall2;
modelProperties.(modelName).modelUpdated = modelUpdated;
modelProperties.(modelName).modelProp2 = modelProp2;
ScoresOverall{i,1} = regexprep(fileName,'.mat','');
ScoresOverall{i,2} = num2str(ScoresOverall2);

if mod(i,10) % Save every ten models
Expand All @@ -75,20 +85,25 @@

% save updated mat file
model = modelUpdated;
save(strcat(updatedReconstructPath, filesep, modelList(i), '.mat'),'model');
if ~contains(fileName,'.mat')
save(strcat(fileName, '.mat'),'model');
else
save(fileName,'model');
end
chdir(currentDir)

%% generate sbml file
% generate sbml file
% remove description from model structure as this causes issues
if any(contains(fieldnames(modelUpdated), {'description'}))
modelUpdated = rmfield(modelUpdated,'description');
end

% Set sbml path
sbmlPath = char(strcat(annotatedSBMLreconstructions, filesep, 'Annotated_',modelList(i)));
sbmlPath = char(strcat(annotatedSBMLreconstructions, filesep, 'Annotated_',fileName));
% Save model
outmodel = writeCbModel(modelUpdated, 'format','sbml', 'fileName', sbmlPath);
end
toc;


% Generate a generateMetaboReport for each reconstruction
evalc('generateMetaboReport(modelProperties,reportDir)');
evalc('generateMetaboReport(modelProperties,reportDir)');
end
toc;
Loading

0 comments on commit 0227831

Please sign in to comment.