Skip to content

Commit

Permalink
Merge pull request #2424 from ithiele/it_04_07_2024
Browse files Browse the repository at this point in the history
Changes to metaboreport and WBM modelling
  • Loading branch information
rmtfleming authored Mar 5, 2025
2 parents 4978a98 + 80847f4 commit 0eabab2
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 94 deletions.
16 changes: 12 additions & 4 deletions src/analysis/wholeBody/PSCMToolbox/getRxnsFromGene.m
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@

assoR = [];
for i = 1 : length(model.grRules)
cnt = 0;
cnt = 0;
if ~isempty(strfind(model.grRules{i},gene))

% case 1 - 1 gene
if ~isempty(strmatch(model.grRules{i},gene,'exact')) % perfect match
assoR(i,1)=1;
Expand All @@ -37,12 +36,18 @@
assoR(i,1)=1;
end
elseif ~isempty(strfind(model.grRules{i},{' or '})) % consider cases of alt splices and ' or '

[geneTok] = strtok(gene,'.');
geneTok = regexprep(geneTok,'\(','');
geneTok = regexprep(geneTok,'\)','');
geneTok = regexprep(geneTok,' ','');
if isempty(strfind(model.grRules{i},{' and '})) % only 'or's
[c,d] = split(model.grRules{i},' or ');
for j = 1 : length(c)
cTok = strtok(c{j},'.');
cTok = regexprep(cTok,'\(','');
cTok = regexprep(cTok,'\)','');
cTok = regexprep(cTok,' ','');
if ~isempty(strmatch(geneTok,cTok,'exact')) || length(find(strmatch(geneTok,cTok,'exact')))>=1% perfect match
cnt = cnt +1;
end
Expand All @@ -51,7 +56,7 @@
if cnt == length(c) % all genes in or are alt splice forms
assoR(i,1)=1;
end
elseif cnt>0
elseif cnt>0
assoR(i,1)=1;
end
else % contains 'and'
Expand All @@ -61,6 +66,9 @@
[a,b] = split(c{j},' and '); % split the 'and's
for k = 1 : length(a)
aTok = strtok(a{k},'.');
aTok = regexprep(aTok,'\(','');
aTok = regexprep(aTok,'\)','');
aTok = regexprep(aTok,' ','');
if ~isempty(strmatch(geneTok,aTok,'exact')) % perfect match
cnt = cnt +1;
end
Expand Down
29 changes: 26 additions & 3 deletions src/analysis/wholeBody/PSCMToolbox/io/addReactionsHH.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor)
function [model] = addReactionsHH(model, rxnAbbrs,rxnNames, reactions, gprs, subSystems,couplingFactor,rxnNotes,rxnReferences)
% This function add reaction(s) to the whole-body metabolic model,
% including the required coupling constraint.
% This function is based on model = addReaction(model,'newRxn1','A -> B + 2 C')
Expand All @@ -13,15 +13,29 @@
% gprs List of grRules
% subSystems List of subSystems
% couplingFactor Coupling factor to be added, default 20000
% rxnNotes List of notes for the reactions (optional)
% rxnReferences List of references for the reactions (optional)
%
% OUTPUT
% model Updated model structure
%
% Ines Thiele 2018
% IT - added gpr rules to be properly taken into account

if ~exist('couplingFactor','var')

if ~exist('couplingFactor','var') || ~isempty(couplingFactor)
couplingFactor = 20000;
end
if ~exist('rxnNotes','var') || isempty(rxnNotes)
rxnNotesPresent = 0;
else
rxnNotesPresent = 1;
end
if ~exist('rxnReferences','var') || isempty(rxnReferences)
rxnRefPresent = 0;
else
rxnRefPresent = 1;
end

for i = 1 : length(rxnAbbrs)

Expand All @@ -31,8 +45,17 @@
model = addReaction(model,rxnAbbrs{i},reactions{i});
A = strmatch(rxnAbbrs(i),model.rxns,'exact');
model.subSystems(A) = subSystems(i);
model.grRules(A) = gprs(i);
%model.grRules(A) = gprs(i);
if ~isempty(gprs{i})
model = changeGeneAssociation(model, rxnAbbrs{i}, gprs{i}, {}, {}, 0);
end
model.rxnNames(A) = rxnNames(i);
if isfield(model,'rxnNotes') && rxnNotesPresent == 1
model.rxnNotes(A) = rxnNotes(i);
end
if isfield(model,'rxnReferences') && rxnRefPresent == 1
model.rxnReferences(A) = rxnReferences(i);
end
[token,rem] = strtok(rxnAbbrs{i},'_');
% find organ biomass
if strcmp(token,'sIEC')
Expand Down
125 changes: 60 additions & 65 deletions src/analysis/wholeBody/PSCMToolbox/io/loadPSCMfile.m
Original file line number Diff line number Diff line change
Expand Up @@ -131,71 +131,66 @@
% Author:
% - Tim Hensen, August 2024

if isempty(searchDirectory)
searchDirectory = what('2020_WholeBodyModelling\Data').path;
end

if nargin<3
excludeVersion = '';
end

% Find latest harvery/harvetta models
WBMs = what(searchDirectory).mat;

% Check if any WBMs can be found
if isempty(WBMs)
error('No WBM .mat files found in folder.')
end

% Remove .mat
WBMs = erase(WBMs,'.mat');

% Exclude WBMs if needed
if ~isempty(excludeVersion)
WBMs(excludeVersion)=[];
end

% Filter on Harvey or Harvetta
WBMs(~contains(WBMs,filename))=[];

% Find version numbers in reconstruction names
modelNumbers = regexp(WBMs,'[0-9]','match');

% Produce 2 column numerical array with the major version in the first
% column and the minor version in the second column.
modelNumbers = string(vertcat(modelNumbers{:}));
modelNumbers = str2double(horzcat(modelNumbers(:,1), strcat(modelNumbers(:,2), modelNumbers(:,3))));

% Add the version letter
letterVersions = string(regexp(WBMs,'(?<=\d)[a-zA-Z]','match'));
% Convert lettes to numbers using ascii table
modelNumbers = [modelNumbers double(char(letterVersions))-96]; % https://www.asciitable.com/

% Find latest version
checkLatest = @(x) max(x) == x;

% Find versions with the latest major release
latestMajor = checkLatest(modelNumbers(:,1));
if sum(latestMajor)==1
% Select model if only one entry has the highest major release
nameOfWBM = WBMs(latestMajor);
else
% Remove all entries without the latest major release
modelNumbers(~latestMajor,:)=[];
WBMs(~latestMajor)=[];
% Find entries with the latest minor release
latestMinor = checkLatest(modelNumbers(:,2));
if sum(latestMinor)==1
% Select model if only one entry has the highest major release
nameOfWBM = WBMs(latestMinor);
else
% Remove all entries without the latest minor release
modelNumbers(~latestMinor,:)=[];
WBMs(~latestMinor)=[];
% Find entries with the latest letter release
latestLetter = checkLatest(modelNumbers(:,3));
if sum(latestLetter)==1
nameOfWBM = WBMs(latestLetter);
% global useSolveCobraLPCPLEX
% useSolveCobraLPCPLEX

useReadCbModel = 0;
switch fileName
case 'Harvey'
if useSolveCobraLPCPLEX
%COBRA v2 format
load Harvey_1_01c

male.subSystems(strmatch('Transport, endoplasmic reticular',male.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
male.subSystems(strmatch('Arginine and Proline Metabolism',male.subSystems,'exact'))={'Arginine and proline Metabolism'};
male.subSystems(strmatch(' ',male.subSystems,'exact'))={'Miscellaneous'};

if 1
%convert to v3 format except for coupling constraints
male = convertOldStyleModel(male,0,0);
end
else
if useReadCbModel
male = readCbModel('Harvey_1_03c', 'fileType','Matlab', 'modelName', 'male');
else
%COBRA v3 format
%load Harvey_1_02c
try
load Harvey_1_03d
catch
load Harvey_1_03c
end
end
end
if isfield(male,'gender')
male.sex = male.gender;
male = rmfield(male,'gender');
else
male.sex = 'male';
end
if isfield(male,'rxnGeneMat')
male = rmfield(male,'rxnGeneMat');
end
variable = male;
case 'Harvetta'
if useSolveCobraLPCPLEX
%COBRA v2 format
try
load Harvetta_1_01d
catch
load Harvetta_1_01c
end
female.subSystems(strmatch('Transport, endoplasmic reticular',female.subSystems,'exact'))={'Transport, endoplasmic reticulum'};
female.subSystems(strmatch('Arginine and Proline Metabolism',female.subSystems,'exact'))={'Arginine and proline Metabolism'};
female.subSystems(strmatch(' ',female.subSystems,'exact'))={'Miscellaneous'};

if 1
%convert to v3 format except for coupling constraints
female = convertOldStyleModel(female,0,0);
end
else
if useReadCbModel
female = readCbModel('Harvetta_1_03c', 'fileType','Matlab', 'modelName', 'male');
else
error('No single latest model could be found')
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
model.lb(find(ismember(model.rxns,'biomass_reaction')))=0;
model.lb(find(ismember(model.rxns,'biomass_maintenance_noTrTr')))=0;
model.lb(find(ismember(model.rxns,'biomass_maintenance')))=0;
model.lb(find(contains(model.rxns,'biomass')))=0;


TestSolutionNameOpenSinks ='';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@


for i = startSearch : endSearch
i

% remove spaces in keggIds
if isempty(find(isnan(metabolite_structure.(Mets{i}).keggId))) && ~isnumeric(metabolite_structure.(Mets{i}).keggId)
metabolite_structure.(Mets{i}).keggId = regexprep(metabolite_structure.(Mets{i}).keggId,'\s','');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@
for i = startSearch : endSearch
% use Kegg as query term
% if ~isempty(metabolite_structure.(Mets{i}).keggId) && isempty(find(isnan(metabolite_structure.(Mets{i}).keggId),1))
i
progress = i/(endSearch-startSearch+1);
fprintf([num2str(progress*100) ' percent ... Retrieving Bridge DB data ... \n']);
for z = 1 : size(mapping,1)
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
if isfield(metabolite_structure.(Mets{i}),(mapping{z,1})) && ~isempty(metabolite_structure.(Mets{i}).(mapping{z,1})) && isempty(find(isnan(metabolite_structure.(Mets{i}).(mapping{z,1})),1))
% search for exact term
try
% check if the field contains a list, if not go ahead with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
startSearch = 1;
end
if ~exist('endSearch','var')
F = fieldnames(metabolite_structure);
endSearch = length(F);
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
fields = fieldnames(metabolite_structure.(Mets{1}));

for i = startSearch : endSearch

progress = i/(endSearch-startSearch+1);
fprintf([num2str(progress*100) ' percent ... Retrieving HMDB data ... \n']);
if ~isempty(metabolite_structure.(Mets{i}).hmdb) && isempty(find(isnan(metabolite_structure.(Mets{i}).hmdb),1))
% check that smile or inchiKey does not exist
% go to chebi and parse website for smile
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
function [metabolite_structure,IDsAdded] = parseKeggWebpage(metabolite_structure,startSearch,endSearch)

% This function searches kegg for identifiers. It will either use
% kegg ids provided by the metabolite structure.
%
% INPUT
% metabolite_structure metabolite structure
% startSearch specify where the search should start in the
% metabolite structure. Must be numeric (optional, default: all metabolites
% in the structure will be search for)
% endSearch specify where the search should end in the
% metabolite structure. Must be numeric (optional, default: all metabolites
% in the structure will be search for)
%
% OUTPUT
% metabolite_structure updated metabolite structure
%
%
% Ines Thiele, 09/2021

annotationSource = 'Kegg website';
annotationType = 'automatic';
Expand Down
4 changes: 3 additions & 1 deletion src/reconstruction/metaboRePort/reportTemplate.html
Original file line number Diff line number Diff line change
Expand Up @@ -2305,6 +2305,8 @@ <h4>Overall score</h4>
</div>
</div>
</div>
<br>
<br>
</section>


Expand All @@ -2323,7 +2325,7 @@ <h4>Overall score</h4>
<p >Powered by the <a href="https://vmh.life" target="_blank" style="color: #FFFFFF">VMH</a> and the
<a href="https://opencobra.github.io/cobratoolbox/stable/" target="_blank" style="color: #FFFFFF"> COBRA Toolbox</a> </p>

<p >Copyright &copy; 2022 <a href="https://thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
<p >Copyright &copy; 2024 <a href="http://www.thielelab.eu" target="_blank" style="color: #FFFFFF">ThieleLab@Uni Galway, Ireland. </a><br></p>
</div>
<!-- <div class="col">
<p>
Expand Down
Loading

0 comments on commit 0eabab2

Please sign in to comment.