diff --git a/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m b/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m index d64b4c1397..455368f7f5 100644 --- a/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m +++ b/src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m @@ -24,9 +24,8 @@ assoR = []; for i = 1 : length(model.grRules) - cnt = 0; + cnt = 0; if ~isempty(strfind(model.grRules{i},gene)) - % case 1 - 1 gene if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match assoR(i,1)=1; @@ -37,12 +36,18 @@ assoR(i,1)=1; end elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or ' - + [geneTok] = strtok(gene,'.'); + geneTok = regexprep(geneTok,'\(',''); + geneTok = regexprep(geneTok,'\)',''); + geneTok = regexprep(geneTok,' ',''); if isempty(strfind(model.grRules{i},{' and '})) % only 'or's [c,d] = split(model.grRules{i},' or '); for j = 1 : length(c) cTok = strtok(c{j},'.'); + cTok = regexprep(cTok,'\(',''); + cTok = regexprep(cTok,'\)',''); + cTok = regexprep(cTok,' ',''); if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match cnt = cnt +1; end @@ -51,7 +56,7 @@ if cnt == length(c) % all genes in or are alt splice forms assoR(i,1)=1; end - elseif cnt>0 + elseif cnt>0 assoR(i,1)=1; end else % contains 'and' @@ -61,6 +66,9 @@ [a,b] = split(c{j},' and '); % split the 'and's for k = 1 : length(a) aTok = strtok(a{k},'.'); + aTok = regexprep(aTok,'\(',''); + aTok = regexprep(aTok,'\)',''); + aTok = regexprep(aTok,' ',''); if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match cnt = cnt +1; end diff --git a/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m b/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m index a4d1ca3288..ce9af9475f 100644 --- a/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m +++ b/src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m @@ -1,4 +1,4 @@ -function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor) +function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences) % This function add reaction(s) to the whole-body metabolic model, % including the required coupling constraint. % This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C') @@ -13,15 +13,29 @@ % gprs List of grRules % subSystems List of subSystems % couplingFactor Coupling factor to be added, default 20000 +% rxnNotes List of notes for the reactions (optional) +% rxnReferences List of references for the reactions (optional) % % OUTPUT % model Updated model structure % % Ines Thiele 2018 +% IT - added gpr rules to be properly taken into account -if ~exist('couplingFactor','var') + +if ~exist('couplingFactor','var') || ~isempty(couplingFactor) couplingFactor = 20000; end +if ~exist('rxnNotes','var') || isempty(rxnNotes) + rxnNotesPresent = 0; +else + rxnNotesPresent = 1; +end +if ~exist('rxnReferences','var') || isempty(rxnReferences) + rxnRefPresent = 0; +else + rxnRefPresent = 1; +end for i = 1 : length(rxnAbbrs) @@ -31,8 +45,17 @@ model = addReaction(model,rxnAbbrs{i},reactions{i}); A = strmatch(rxnAbbrs(i),model.rxns,'exact'); model.subSystems(A) = subSystems(i); - model.grRules(A) = gprs(i); + %model.grRules(A) = gprs(i); + if ~isempty(gprs{i}) + model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0); + end model.rxnNames(A) = rxnNames(i); + if isfield(model,'rxnNotes') && rxnNotesPresent == 1 + model.rxnNotes(A) = rxnNotes(i); + end + if isfield(model,'rxnReferences') && rxnRefPresent == 1 + model.rxnReferences(A) = rxnReferences(i); + end [token,rem] = strtok(rxnAbbrs{i},'_'); % find organ biomass if strcmp(token,'sIEC') diff --git a/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m b/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m index e5dec8a6de..8f6c75e6b6 100644 --- a/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m +++ b/src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m @@ -131,71 +131,66 @@ % Author: % - Tim Hensen, August 2024 -if isempty(searchDirectory) - searchDirectory = what('2020_WholeBodyModelling\Data').path; -end - -if nargin<3 - excludeVersion = ''; -end - -% Find latest harvery/harvetta models -WBMs = what(searchDirectory).mat; - -% Check if any WBMs can be found -if isempty(WBMs) - error('No WBM .mat files found in folder.') -end - -% Remove .mat -WBMs = erase(WBMs,'.mat'); - -% Exclude WBMs if needed -if ~isempty(excludeVersion) - WBMs(excludeVersion)=[]; -end - -% Filter on Harvey or Harvetta -WBMs(~contains(WBMs,filename))=[]; - -% Find version numbers in reconstruction names -modelNumbers = regexp(WBMs,'[0-9]','match'); - -% Produce 2 column numerical array with the major version in the first -% column and the minor version in the second column. -modelNumbers = string(vertcat(modelNumbers{:})); -modelNumbers = str2double(horzcat(modelNumbers(:,1), strcat(modelNumbers(:,2), modelNumbers(:,3)))); - -% Add the version letter -letterVersions = string(regexp(WBMs,'(?<=\d)[a-zA-Z]','match')); -% Convert lettes to numbers using ascii table -modelNumbers = [modelNumbers double(char(letterVersions))-96]; % https://www.asciitable.com/ - -% Find latest version -checkLatest = @(x) max(x) == x; - -% Find versions with the latest major release -latestMajor = checkLatest(modelNumbers(:,1)); -if sum(latestMajor)==1 - % Select model if only one entry has the highest major release - nameOfWBM = WBMs(latestMajor); -else - % Remove all entries without the latest major release - modelNumbers(~latestMajor,:)=[]; - WBMs(~latestMajor)=[]; - % Find entries with the latest minor release - latestMinor = checkLatest(modelNumbers(:,2)); - if sum(latestMinor)==1 - % Select model if only one entry has the highest major release - nameOfWBM = WBMs(latestMinor); - else - % Remove all entries without the latest minor release - modelNumbers(~latestMinor,:)=[]; - WBMs(~latestMinor)=[]; - % Find entries with the latest letter release - latestLetter = checkLatest(modelNumbers(:,3)); - if sum(latestLetter)==1 - nameOfWBM = WBMs(latestLetter); +% global useSolveCobraLPCPLEX +% useSolveCobraLPCPLEX + +useReadCbModel = 0; +switch fileName + case 'Harvey' + if useSolveCobraLPCPLEX + %COBRA v2 format + load Harvey_1_01c + + male.subSystems(strmatch('Transport, endoplasmic reticular',male.subSystems,'exact'))={'Transport, endoplasmic reticulum'}; + male.subSystems(strmatch('Arginine and Proline Metabolism',male.subSystems,'exact'))={'Arginine and proline Metabolism'}; + male.subSystems(strmatch(' ',male.subSystems,'exact'))={'Miscellaneous'}; + + if 1 + %convert to v3 format except for coupling constraints + male = convertOldStyleModel(male,0,0); + end + else + if useReadCbModel + male = readCbModel('Harvey_1_03c', 'fileType','Matlab', 'modelName', 'male'); + else + %COBRA v3 format + %load Harvey_1_02c + try + load Harvey_1_03d + catch + load Harvey_1_03c + end + end + end + if isfield(male,'gender') + male.sex = male.gender; + male = rmfield(male,'gender'); + else + male.sex = 'male'; + end + if isfield(male,'rxnGeneMat') + male = rmfield(male,'rxnGeneMat'); + end + variable = male; + case 'Harvetta' + if useSolveCobraLPCPLEX + %COBRA v2 format + try + load Harvetta_1_01d + catch + load Harvetta_1_01c + end + female.subSystems(strmatch('Transport, endoplasmic reticular',female.subSystems,'exact'))={'Transport, endoplasmic reticulum'}; + female.subSystems(strmatch('Arginine and Proline Metabolism',female.subSystems,'exact'))={'Arginine and proline Metabolism'}; + female.subSystems(strmatch(' ',female.subSystems,'exact'))={'Miscellaneous'}; + + if 1 + %convert to v3 format except for coupling constraints + female = convertOldStyleModel(female,0,0); + end + else + if useReadCbModel + female = readCbModel('Harvetta_1_03c', 'fileType','Matlab', 'modelName', 'male'); else error('No single latest model could be found') end diff --git a/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m b/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m index 185ed22c84..6dfd9c3be4 100644 --- a/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m +++ b/src/analysis/wholeBody/PSCMToolbox/performSanityChecksonRecon.m @@ -98,6 +98,7 @@ model.lb(find(ismember(model.rxns,'biomass_reaction')))=0; model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0; model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0; +model.lb(find(contains(model.rxns,'biomass')))=0; TestSolutionNameOpenSinks =''; diff --git a/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m b/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m index fa6e65efdb..9094f76bd9 100644 --- a/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m +++ b/src/dataIntegration/metaboAnnotator/buildMetStruct/cleanUpMetabolite_structure.m @@ -21,6 +21,8 @@ for i = startSearch : endSearch + i + % remove spaces in keggIds if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId) metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s',''); diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m index c2bb1df524..25066a27ae 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseBridgeDb.m @@ -75,9 +75,10 @@ for i = startSearch : endSearch % use Kegg as query term % if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1)) - i + progress = i/(endSearch-startSearch+1); + fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']); for z = 1 : size(mapping,1) - if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1)) + if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1)) % search for exact term try % check if the field contains a list, if not go ahead with diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m index aad0718a49..4bb07e4e56 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseDBCollection.m @@ -21,6 +21,7 @@ startSearch = 1; end if ~exist('endSearch','var') + F = fieldnames(metabolite_structure); endSearch = length(F); end diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m index 19ef49385b..1a8064ead2 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseHmdbWebPage.m @@ -47,6 +47,9 @@ fields = fieldnames(metabolite_structure.(Mets{1})); for i = startSearch : endSearch + + progress = i/(endSearch-startSearch+1); + fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']); if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1)) % check that smile or inchiKey does not exist % go to chebi and parse website for smile diff --git a/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m b/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m index f0af04753a..9aefb53441 100644 --- a/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m +++ b/src/dataIntegration/metaboAnnotator/connect2resources/parseKeggWebpage.m @@ -1,5 +1,22 @@ function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch) +% This function searches kegg for identifiers. It will either use +% kegg ids provided by the metabolite structure. +% +% INPUT +% metabolite_structure metabolite structure +% startSearch specify where the search should start in the +% metabolite structure. Must be numeric (optional, default: all metabolites +% in the structure will be search for) +% endSearch specify where the search should end in the +% metabolite structure. Must be numeric (optional, default: all metabolites +% in the structure will be search for) +% +% OUTPUT +% metabolite_structure updated metabolite structure +% +% +% Ines Thiele, 09/2021 annotationSource = 'Kegg website'; annotationType = 'automatic'; diff --git a/src/reconstruction/metaboRePort/reportTemplate.html b/src/reconstruction/metaboRePort/reportTemplate.html index 8765ad21f5..60a4a5d320 100644 --- a/src/reconstruction/metaboRePort/reportTemplate.html +++ b/src/reconstruction/metaboRePort/reportTemplate.html @@ -2305,6 +2305,8 @@
Powered by the VMH and the COBRA Toolbox
-Copyright © 2022 ThieleLab@Uni Galway, Ireland.
Copyright © 2024 ThieleLab@Uni Galway, Ireland.