Merge pull request #2424 from ithiele/it_04_07_2024

rmtfleming · web-flow · commit 0eabab23b035 · 2025-03-05T10:03:34.000Z
Changes to metaboreport and WBM modelling
diff --git a/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m b/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m
@@ -24,9 +24,8 @@
 
 assoR = [];
 for i = 1 : length(model.grRules)
-     cnt = 0;
+    cnt = 0;
     if ~isempty(strfind(model.grRules{i},gene))
-         
         % case 1 - 1 gene
         if  ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match
             assoR(i,1)=1;
@@ -37,12 +36,18 @@
                 assoR(i,1)=1;
             end
         elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or '
-          
+            
             [geneTok] = strtok(gene,'.');
+            geneTok = regexprep(geneTok,'\(','');
+            geneTok = regexprep(geneTok,'\)','');
+            geneTok = regexprep(geneTok,' ','');
             if isempty(strfind(model.grRules{i},{' and '})) % only 'or's
                 [c,d] = split(model.grRules{i},' or ');
                 for j = 1 : length(c)
                     cTok = strtok(c{j},'.');
+                    cTok = regexprep(cTok,'\(','');
+                    cTok = regexprep(cTok,'\)','');
+                    cTok = regexprep(cTok,' ','');
                     if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match
                         cnt = cnt +1;
                     end
@@ -51,7 +56,7 @@
                     if cnt == length(c) % all genes in or are alt splice forms
                         assoR(i,1)=1;
                     end
-                 elseif cnt>0
+                elseif cnt>0
                     assoR(i,1)=1;
                 end
             else % contains 'and'
@@ -61,6 +66,9 @@
                         [a,b] = split(c{j},' and '); % split the 'and's
                         for k = 1 : length(a)
                             aTok = strtok(a{k},'.');
+                            aTok = regexprep(aTok,'\(','');
+                            aTok = regexprep(aTok,'\)','');
+                            aTok = regexprep(aTok,' ','');
                             if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match
                                 cnt = cnt +1;
                             end
diff --git a/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m b/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m
@@ -1,4 +1,4 @@
-function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor)
+function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences)
 % This function add reaction(s) to the whole-body metabolic model,
 % including the required coupling constraint.
 % This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C')
@@ -13,15 +13,29 @@
 % gprs              List of grRules
 % subSystems        List of subSystems
 % couplingFactor    Coupling factor to be added, default 20000
+% rxnNotes          List of notes for the reactions (optional)
+% rxnReferences     List of references for the reactions (optional)
 %
 % OUTPUT
 % model             Updated model structure
 %
 % Ines Thiele 2018
+% IT - added gpr rules to be properly taken into account
 
-if ~exist('couplingFactor','var')
+
+if ~exist('couplingFactor','var') || ~isempty(couplingFactor)
     couplingFactor = 20000;
 end
+if ~exist('rxnNotes','var') || isempty(rxnNotes)
+    rxnNotesPresent = 0;
+else
+        rxnNotesPresent = 1;
+end
+if ~exist('rxnReferences','var') || isempty(rxnReferences)
+    rxnRefPresent = 0;
+else
+    rxnRefPresent = 1;
+end
 
 for i = 1 : length(rxnAbbrs)
     
@@ -31,8 +45,17 @@
         model = addReaction(model,rxnAbbrs{i},reactions{i});
         A = strmatch(rxnAbbrs(i),model.rxns,'exact');
         model.subSystems(A) = subSystems(i);
-        model.grRules(A) = gprs(i);
+        %model.grRules(A) = gprs(i);
+        if ~isempty(gprs{i})
+            model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0);
+        end
         model.rxnNames(A) = rxnNames(i);
+        if isfield(model,'rxnNotes') && rxnNotesPresent == 1
+            model.rxnNotes(A) = rxnNotes(i);
+        end
+        if isfield(model,'rxnReferences') && rxnRefPresent == 1
+            model.rxnReferences(A) = rxnReferences(i);
+        end
         [token,rem] = strtok(rxnAbbrs{i},'_');
         % find organ biomass
         if strcmp(token,'sIEC')
diff --git a/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m b/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m
@@ -131,71 +131,66 @@
 % Author: 
 %         - Tim Hensen, August 2024
 
-if isempty(searchDirectory)
-    searchDirectory = what('2020_WholeBodyModelling\Data').path;
-end
-
-if nargin<3
-    excludeVersion = '';
-end
-
-% Find latest harvery/harvetta models
-WBMs = what(searchDirectory).mat;
-
-% Check if any WBMs can be found
-if isempty(WBMs)
-    error('No WBM .mat files found in folder.')
-end
-
-% Remove .mat
-WBMs = erase(WBMs,'.mat');
-
-% Exclude WBMs if needed
-if ~isempty(excludeVersion)
-    WBMs(excludeVersion)=[];
-end
-
-% Filter on Harvey or Harvetta
-WBMs(~contains(WBMs,filename))=[];
-
-% Find version numbers in reconstruction names
-modelNumbers = regexp(WBMs,'[0-9]','match');
-
-% Produce 2 column numerical array with the major version in the first
-% column and the minor version in the second column.
-modelNumbers = string(vertcat(modelNumbers{:}));
-modelNumbers = str2double(horzcat(modelNumbers(:,1), strcat(modelNumbers(:,2), modelNumbers(:,3))));
-
-% Add the version letter
-letterVersions = string(regexp(WBMs,'(?<=\d)[a-zA-Z]','match'));
-% Convert lettes to numbers using ascii table
-modelNumbers = [modelNumbers double(char(letterVersions))-96]; % https://www.asciitable.com/
-
-% Find latest version
-checkLatest = @(x) max(x) == x;
-
-% Find versions with the latest major release
-latestMajor = checkLatest(modelNumbers(:,1));
-if sum(latestMajor)==1
-    % Select model if only one entry has the highest major release
-    nameOfWBM = WBMs(latestMajor);
-else
-    % Remove all entries without the latest major release
-    modelNumbers(~latestMajor,:)=[];
-    WBMs(~latestMajor)=[];
-    % Find entries with the latest minor release
-    latestMinor = checkLatest(modelNumbers(:,2));
-    if sum(latestMinor)==1
-            % Select model if only one entry has the highest major release
-            nameOfWBM = WBMs(latestMinor);
-    else
-            % Remove all entries without the latest minor release
-            modelNumbers(~latestMinor,:)=[];
-            WBMs(~latestMinor)=[];
-            % Find entries with the latest letter release
-            latestLetter = checkLatest(modelNumbers(:,3));
-            if sum(latestLetter)==1
-                nameOfWBM = WBMs(latestLetter);
+% global useSolveCobraLPCPLEX
+% useSolveCobraLPCPLEX
+
+useReadCbModel = 0;
+switch fileName
+    case 'Harvey'
+        if useSolveCobraLPCPLEX
+            %COBRA v2 format
+            load Harvey_1_01c
+            
+            male.subSystems(strmatch('Transport, endoplasmic reticular',male.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
+            male.subSystems(strmatch('Arginine and Proline Metabolism',male.subSystems,'exact'))={'Arginine and proline Metabolism'};
+            male.subSystems(strmatch(' ',male.subSystems,'exact'))={'Miscellaneous'};
+            
+            if 1
+                %convert to v3 format except for coupling constraints
+                male   = convertOldStyleModel(male,0,0);
+            end
+        else
+            if useReadCbModel
+                male = readCbModel('Harvey_1_03c', 'fileType','Matlab', 'modelName', 'male');
+            else
+                %COBRA v3 format
+                %load Harvey_1_02c
+                try
+                    load Harvey_1_03d
+                catch
+                    load Harvey_1_03c
+                end
+            end
+        end
+        if isfield(male,'gender')
+            male.sex = male.gender;
+            male = rmfield(male,'gender');
+        else
+            male.sex = 'male';
+        end
+        if isfield(male,'rxnGeneMat')
+            male = rmfield(male,'rxnGeneMat');
+        end
+        variable = male;
+    case 'Harvetta'
+        if useSolveCobraLPCPLEX
+            %COBRA v2 format
+            try
+                load Harvetta_1_01d
+            catch
+                load Harvetta_1_01c
+            end
+            female.subSystems(strmatch('Transport, endoplasmic reticular',female.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
+            female.subSystems(strmatch('Arginine and Proline Metabolism',female.subSystems,'exact'))={'Arginine and proline Metabolism'};
+            female.subSystems(strmatch(' ',female.subSystems,'exact'))={'Miscellaneous'};
+            
+            if 1
+                %convert to v3 format except for coupling constraints
+                female = convertOldStyleModel(female,0,0);
+            end
+        else
+            if useReadCbModel
+                female = readCbModel('Harvetta_1_03c', 'fileType','Matlab', 'modelName', 'male');
             else
                 error('No single latest model could be found')
             end
diff --git a/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m b/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m
@@ -98,6 +98,7 @@
 model.lb(find(ismember(model.rxns,'biomass_reaction')))=0;
 model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0;
 model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0;
+model.lb(find(contains(model.rxns,'biomass')))=0;
 
 
 TestSolutionNameOpenSinks ='';
diff --git a/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m b/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m
@@ -21,6 +21,8 @@
 
 
 for i = startSearch : endSearch
+    i
+    
     % remove spaces in keggIds
     if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId)
         metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s','');
diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m
@@ -75,9 +75,10 @@
 for i = startSearch : endSearch
     % use Kegg as query term
     % if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1))
-    i
+    progress = i/(endSearch-startSearch+1);
+    fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']);
     for z = 1 : size(mapping,1)
-        if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
+        if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1)) 
             %  search for exact term
             try
                 % check if the field contains a list, if not go ahead with
diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m
@@ -21,6 +21,7 @@
     startSearch = 1;
 end
 if ~exist('endSearch','var')
+    F = fieldnames(metabolite_structure);
     endSearch = length(F);
 end
 
diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m
@@ -47,6 +47,9 @@
 fields = fieldnames(metabolite_structure.(Mets{1}));
 
 for i = startSearch : endSearch
+    
+    progress = i/(endSearch-startSearch+1);
+    fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']);
     if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1))
         % check that smile or inchiKey does not exist
         % go to chebi and parse website for smile
diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m
@@ -1,5 +1,22 @@
 function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch)
 
+% This function searches kegg for identifiers. It will either use
+% kegg ids provided by the metabolite structure.
+%
+% INPUT
+% metabolite_structure  metabolite structure
+% startSearch           specify where the search should start in the
+%                       metabolite structure. Must be numeric (optional, default: all metabolites
+%                       in the structure will be search for)
+% endSearch             specify where the search should end in the
+%                       metabolite structure. Must be numeric (optional, default: all metabolites
+%                       in the structure will be search for)
+%
+% OUTPUT
+% metabolite_structure  updated metabolite structure
+%
+%
+% Ines Thiele, 09/2021
 
 annotationSource = 'Kegg website';
 annotationType = 'automatic';
diff --git a/src/reconstruction/metaboRePort/reportTemplate.html b/src/reconstruction/metaboRePort/reportTemplate.html
@@ -2305,6 +2305,8 @@ <h4>Overall score</h4>
                         </div>
             </div>
          </div>
+         <br>
+         <br>
     </section>
   
   
@@ -2323,7 +2325,7 @@ <h4>Overall score</h4>
                 <p >Powered by the <a href="https://vmh.life" target="_blank" style="color: #FFFFFF">VMH</a> and the
                   <a href="https://opencobra.github.io/cobratoolbox/stable/" target="_blank" style="color: #FFFFFF"> COBRA Toolbox</a>  </p>
         
-                <p >Copyright &copy; 2022 <a href="https://thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
+                <p >Copyright &copy; 2024 <a href="http://www.thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
               </div>
              <!--      <div class="col">
                 <p>          
diff --git a/src/reconstruction/metaboRePort/tutorial_MetaboRePort.m b/src/reconstruction/metaboRePort/tutorial_MetaboRePort.m
diff --git a/src/reconstruction/metaboRePort/tutorial_MetaboRePort_APOLLO.m b/src/reconstruction/metaboRePort/tutorial_MetaboRePort_APOLLO.m
diff --git a/src/reconstruction/refinement/addSinkReactions.m b/src/reconstruction/refinement/addSinkReactions.m
diff --git a/src/reconstruction/refinement/changeGeneAssociation.m b/src/reconstruction/refinement/changeGeneAssociation.m