From 02278318cf8d3260ada880fad3bc7316b7101903 Mon Sep 17 00:00:00 2001 From: Ines Thiele Date: Fri, 7 Feb 2025 09:07:58 +0000 Subject: [PATCH] Changes to metaboreport and WBM modelling --- .../wholeBody/PSCMToolbox/getRxnsFromGene.m | 16 ++- .../wholeBody/PSCMToolbox/io/addReactionsHH.m | 29 ++++- .../wholeBody/PSCMToolbox/io/loadPSCMfile.m | 4 +- .../PSCMToolbox/performSanityChecksonRecon.m | 1 + .../cleanUpMetabolite_structure.m | 2 + .../connect2resources/parseBridgeDb.m | 5 +- .../connect2resources/parseDBCollection.m | 1 + .../connect2resources/parseHmdbWebPage.m | 3 + .../connect2resources/parseKeggWebpage.m | 17 +++ .../metaboRePort/generateMetaboScore.m | 2 +- .../metaboRePort/reportTemplate.html | 4 +- .../metaboRePort/tutorial_MetaboRePort.m | 49 +++++--- .../tutorial_MetaboRePort_APOLLO.m | 109 ++++++++++++++++++ .../refinement/addSinkReactions.m | 2 +- .../refinement/changeGeneAssociation.m | 3 +- 15 files changed, 215 insertions(+), 32 deletions(-) create mode 100644 src/reconstruction/metaboRePort/tutorial_MetaboRePort_APOLLO.m diff --git a/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m b/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m index d64b4c1397..455368f7f5 100644 --- a/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m +++ b/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m @@ -24,9 +24,8 @@ assoR = []; for i = 1 : length(model.grRules) - cnt = 0; + cnt = 0; if ~isempty(strfind(model.grRules{i},gene)) - % case 1 - 1 gene if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match assoR(i,1)=1; @@ -37,12 +36,18 @@ assoR(i,1)=1; end elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or ' - + [geneTok] = strtok(gene,'.'); + geneTok = regexprep(geneTok,'\(',''); + geneTok = regexprep(geneTok,'\)',''); + geneTok = regexprep(geneTok,' ',''); if isempty(strfind(model.grRules{i},{' and '})) % only 'or's [c,d] = split(model.grRules{i},' or '); for j = 1 : length(c) cTok = strtok(c{j},'.'); + cTok = regexprep(cTok,'\(',''); + cTok = regexprep(cTok,'\)',''); + cTok = regexprep(cTok,' ',''); if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match cnt = cnt +1; end @@ -51,7 +56,7 @@ if cnt == length(c) % all genes in or are alt splice forms assoR(i,1)=1; end - elseif cnt>0 + elseif cnt>0 assoR(i,1)=1; end else % contains 'and' @@ -61,6 +66,9 @@ [a,b] = split(c{j},' and '); % split the 'and's for k = 1 : length(a) aTok = strtok(a{k},'.'); + aTok = regexprep(aTok,'\(',''); + aTok = regexprep(aTok,'\)',''); + aTok = regexprep(aTok,' ',''); if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match cnt = cnt +1; end diff --git a/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m b/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m index a4d1ca3288..ce9af9475f 100644 --- a/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m +++ b/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m @@ -1,4 +1,4 @@ -function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor) +function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences) % This function add reaction(s) to the whole-body metabolic model, % including the required coupling constraint. % This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C') @@ -13,15 +13,29 @@ % gprs List of grRules % subSystems List of subSystems % couplingFactor Coupling factor to be added, default 20000 +% rxnNotes List of notes for the reactions (optional) +% rxnReferences List of references for the reactions (optional) % % OUTPUT % model Updated model structure % % Ines Thiele 2018 +% IT - added gpr rules to be properly taken into account -if ~exist('couplingFactor','var') + +if ~exist('couplingFactor','var') || ~isempty(couplingFactor) couplingFactor = 20000; end +if ~exist('rxnNotes','var') || isempty(rxnNotes) + rxnNotesPresent = 0; +else + rxnNotesPresent = 1; +end +if ~exist('rxnReferences','var') || isempty(rxnReferences) + rxnRefPresent = 0; +else + rxnRefPresent = 1; +end for i = 1 : length(rxnAbbrs) @@ -31,8 +45,17 @@ model = addReaction(model,rxnAbbrs{i},reactions{i}); A = strmatch(rxnAbbrs(i),model.rxns,'exact'); model.subSystems(A) = subSystems(i); - model.grRules(A) = gprs(i); + %model.grRules(A) = gprs(i); + if ~isempty(gprs{i}) + model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0); + end model.rxnNames(A) = rxnNames(i); + if isfield(model,'rxnNotes') && rxnNotesPresent == 1 + model.rxnNotes(A) = rxnNotes(i); + end + if isfield(model,'rxnReferences') && rxnRefPresent == 1 + model.rxnReferences(A) = rxnReferences(i); + end [token,rem] = strtok(rxnAbbrs{i},'_'); % find organ biomass if strcmp(token,'sIEC') diff --git a/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m b/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m index 553f109f3d..02604965e4 100644 --- a/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m +++ b/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m @@ -12,8 +12,8 @@ % OUTPUT: % variable: matlab variable returned -global useSolveCobraLPCPLEX -useSolveCobraLPCPLEX +% global useSolveCobraLPCPLEX +% useSolveCobraLPCPLEX useReadCbModel = 0; switch fileName diff --git a/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m b/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m index 185ed22c84..6dfd9c3be4 100644 --- a/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m +++ b/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m @@ -98,6 +98,7 @@ model.lb(find(ismember(model.rxns,'biomass_reaction')))=0; model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0; model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0; +model.lb(find(contains(model.rxns,'biomass')))=0; TestSolutionNameOpenSinks =''; diff --git a/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m b/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m index fa6e65efdb..9094f76bd9 100644 --- a/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m +++ b/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m @@ -21,6 +21,8 @@ for i = startSearch : endSearch + i + % remove spaces in keggIds if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId) metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s',''); diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m index c2bb1df524..25066a27ae 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m @@ -75,9 +75,10 @@ for i = startSearch : endSearch % use Kegg as query term % if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1)) - i + progress = i/(endSearch-startSearch+1); + fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']); for z = 1 : size(mapping,1) - if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1)) + if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1)) % search for exact term try % check if the field contains a list, if not go ahead with diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m index aad0718a49..4bb07e4e56 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m @@ -21,6 +21,7 @@ startSearch = 1; end if ~exist('endSearch','var') + F = fieldnames(metabolite_structure); endSearch = length(F); end diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m index 19ef49385b..1a8064ead2 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m @@ -47,6 +47,9 @@ fields = fieldnames(metabolite_structure.(Mets{1})); for i = startSearch : endSearch + + progress = i/(endSearch-startSearch+1); + fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']); if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1)) % check that smile or inchiKey does not exist % go to chebi and parse website for smile diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m index f0af04753a..9aefb53441 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m @@ -1,5 +1,22 @@ function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch) +% This function searches kegg for identifiers. It will either use +% kegg ids provided by the metabolite structure. +% +% INPUT +% metabolite_structure metabolite structure +% startSearch specify where the search should start in the +% metabolite structure. Must be numeric (optional, default: all metabolites +% in the structure will be search for) +% endSearch specify where the search should end in the +% metabolite structure. Must be numeric (optional, default: all metabolites +% in the structure will be search for) +% +% OUTPUT +% metabolite_structure updated metabolite structure +% +% +% Ines Thiele, 09/2021 annotationSource = 'Kegg website'; annotationType = 'automatic'; diff --git a/src/reconstruction/metaboRePort/generateMetaboScore.m b/src/reconstruction/metaboRePort/generateMetaboScore.m index eb2e4494f7..c0be460f31 100644 --- a/src/reconstruction/metaboRePort/generateMetaboScore.m +++ b/src/reconstruction/metaboRePort/generateMetaboScore.m @@ -95,7 +95,7 @@ modelProp.Details.SinkRxns = [model.rxns(contains(model.rxns,'Sink_'));model.rxns(contains(model.rxns,'sink_'))]; % biomass reactions modelProp.BiomassRxns = BioR; -modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns,'biomass')));% exclude EX_biomass? +modelProp.Details.BiomassRxns = model.rxns(contains(lower(model.rxns),'biomass'));% exclude EX_biomass? modelProp.MetabolicRxns = MetR; modelProp.Details.MetabolicRxns =MetRxns'; diff --git a/src/reconstruction/metaboRePort/reportTemplate.html b/src/reconstruction/metaboRePort/reportTemplate.html index 8765ad21f5..60a4a5d320 100644 --- a/src/reconstruction/metaboRePort/reportTemplate.html +++ b/src/reconstruction/metaboRePort/reportTemplate.html @@ -2305,6 +2305,8 @@

Overall score

+
+
@@ -2323,7 +2325,7 @@

Overall score

Powered by the VMH and the COBRA Toolbox

-

Copyright © 2022 ThieleLab@Uni Galway, Ireland.

+

Copyright © 2024 ThieleLab@Uni Galway, Ireland.