Skip to content

Commit 4978a98

Browse files
authored
Merge pull request #2385 from Sami990/master
functions that calculate weights of cf and cr in eFBA
2 parents 0478293 + 3c0cb15 commit 4978a98

File tree

3 files changed

+345
-0
lines changed

3 files changed

+345
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
function geneWeight = calculateGeneWeight(model,Transcriptomic, Threshold)
2+
% Inputs:
3+
% model - A COBRA model with mandatory fields: grRules and SConsistentRxnBool.
4+
% Transcriptomic - A table with entrezID and geneExpression values.
5+
% Threshold - Threshold for transcriptomic data.
6+
7+
8+
% Note: geneWeight is a value that can be used in entropicFBA to assign a weight
9+
% that corresponds to the gene expression value to internal reactions. If you
10+
% want to use this value for this purpose, use the following formula:
11+
% cr = cf = -log(geneWeight + 1e-8) + 1 - ci/g
12+
% Default values: g = 2 , ci = 0
13+
14+
% Author : Samira Ranjbar 2024
15+
% (The algoithm is explained here:
16+
% https://doi.org/10.1016/j.isci.2023.106201)
17+
%-------------------------------------------------
18+
if isfield(model,'grRules')
19+
rule = model.grRules(model.SConsistentRxnBool);
20+
else
21+
error('grRules is missiming')
22+
end
23+
if ~isfield(model,'SConsistentRxnBool')
24+
error('grRules is missiming')
25+
end
26+
27+
orList =[];
28+
% Divide the rule by "or" and iterate over each resulting subrule
29+
for i = 1:length(rule)
30+
% if ~isempty(rule(i))
31+
subrules = strsplit(strjoin(rule(i)), ' or ');
32+
33+
for subruleIndex = 1:length(subrules)
34+
% Split each subrule by "and" to get a list of genes
35+
genes = strsplit(subrules{subruleIndex}, ' and ');
36+
37+
g_vector = {};
38+
39+
% Process each gene
40+
for geneIndex = 1:length(genes)
41+
gene = strrep(genes{geneIndex}, '(', ''); % Remove "("
42+
gene = strrep(gene, ')', ''); % Remove ")"
43+
gene = strrep(gene, ' ', ''); % Remove spaces
44+
g_vector{geneIndex} = [gene];
45+
end
46+
% g_table = cell2table(g_vector', 'VariableNames', {'GeneID'});
47+
48+
% Evaluate the minimum expression value
49+
if contains(g_vector,'rec1_')
50+
fpkmTable1 = Transcriptomic(:,[1,2]);
51+
values = fpkmTable1{ismember(strrep(cellstr(num2str(fpkmTable1.entrezID)),' ',''), strrep(cellstr(g_vector),'rec1_','')), 2};
52+
elseif contains(g_vector,'rec2_')
53+
fpkmTable2 = Transcriptomic(:,[1,3]);
54+
values = fpkmTable2{ismember(strrep(cellstr(num2str(fpkmTable2.entrezID)),' ',''), strrep(cellstr(g_vector),'rec2_','')), 2};
55+
else
56+
values = Transcriptomic{ismember(strrep(cellstr((Transcriptomic.entrezID)),' ',''), cellstr(g_vector)), 2};
57+
58+
end
59+
value = min(values);
60+
61+
% Apply the threshold
62+
if value < Threshold
63+
value = 0;
64+
end
65+
66+
% Add the minimum to the list
67+
orList = vertcat(orList, value);
68+
69+
% end
70+
end
71+
expList{i,1} = orList';
72+
orList = [];
73+
expList{i,2} = sum(expList{i,1});
74+
% Return the sum of the list
75+
% result = sum(orList);
76+
end
77+
78+
79+
% Access the first and second columns and convert them to numeric arrays
80+
firstColumn = expList(:, 1);
81+
secondColumn = cell2mat(expList(:, 2));
82+
medianValue = median(secondColumn);
83+
% Find empty cells in the first column
84+
isEmptyFirstColumn = cellfun('isempty', firstColumn);
85+
86+
% Replace empty cells in the first column with the corresponding row median from the second column
87+
for i = 1:numel(firstColumn)
88+
if isEmptyFirstColumn(i) & model.SConsistentRxnBool
89+
secondColumn(i) = medianValue;
90+
end
91+
end
92+
geneWeight = secondColumn;
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
function [substratesMass, productsMass] = calculateReactionMasses(model)
2+
% This function calculates mass related to each reaction, products mass and
3+
% substrates mass,using the left null space of stochiometric matrix,
4+
% useful to unbiased flux through reactions with massive
5+
% metabolites in entropicFBA
6+
7+
% Author: Samira Ranjbar 2024
8+
%% % Check if metFormula field is provied in the model, if not add it using model0(Recon3DModel_301_xomics_input)
9+
10+
if ~isfield(model,'metFormulas')
11+
model0 = readCbModel('Recon3DModel_301_xomics_input.mat');
12+
% Loop through each entry in the first list
13+
for i = 1:numel(model.mets)
14+
% Find the corresponding entry in the second list
15+
index = find(ismember(model0.mets, regexprep(model.mets{i}, ',(rec[12]|comm[12])\]$', ']')));
16+
17+
% If a matching entry is found, update the formula
18+
if ~isempty(index)
19+
model.metFormulas{i,1} = model0.metFormulas{index};
20+
end
21+
end
22+
23+
% Verify the validity of the updated model
24+
if verifyModel(model, 'simpleCheck', true)
25+
% Save the updated model
26+
% writeCbModel(model, 'updated_model.mat');
27+
disp('Model successfully updated and saved.');
28+
else
29+
disp('The updated model is not valid.');
30+
end
31+
end
32+
%% There is a wrong formula for this a metabolite in the generic model
33+
model.metFormulas(findMetIDs(model,model.mets(contains(model.mets,'paps[')))) = {'C10H11N5O13P2S'};
34+
% model.metFormulas(findMetIDs(model,'paps[c,rec2]')) = {'C10H11N5O13P2S'};
35+
% model.metFormulas(findMetIDs(model,'paps[g,rec1]')) = {'C10H11N5O13P2S'};
36+
% model.metFormulas(findMetIDs(model,'paps[g,rec2]')) = {'C10H11N5O13P2S'};
37+
% model.metFormulas(findMetIDs(model,'paps[c]')) = {'C10H11N5O13P2S'};
38+
% model.metFormulas(findMetIDs(model,'paps[g]')) = {'C10H11N5O13P2S'};
39+
40+
%% Calculate molecular weights using the computeMW function
41+
metList = model.mets;
42+
[molecularWeights, ~] = computeMW(model, metList);
43+
% model = readCbModel('scRecon3D_2-1.mat')
44+
% Get the number of consistent reactions in the model
45+
numReactions = numel(model.rxns(model.SConsistentRxnBool));
46+
sConsistent = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
47+
rxnConsistent = model.rxns(model.SConsistentRxnBool);
48+
% Initialize variables to store results
49+
cf = cell(numReactions, 1); % Cell array for substrate mass results
50+
cr = cell(numReactions, 1); % Cell array for product mass results
51+
52+
53+
% Iterate through all reactions
54+
for i = 1:numReactions
55+
% Get row indices and stoichiometric coefficients for the current reaction
56+
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));
57+
58+
% Identify substrates and products
59+
substrates = model.mets(rowIndices(stoichiometry < 0));
60+
products = model.mets(rowIndices(stoichiometry > 0));
61+
62+
% Calculate mass of substrates and products using molecular weights
63+
substrateCoefficients = stoichiometry(stoichiometry < 0);
64+
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));
65+
66+
productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));
67+
68+
% Store results in cf and cr
69+
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
70+
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
71+
end
72+
for i= 1:length(cf)
73+
Cf(i) = sum(cf{i}(1).mass);
74+
Cr(i) = sum(cr{i}(1).mass);
75+
end
76+
CF = Cf';
77+
CR = Cr';
78+
%% Check if any mass imbalance happen
79+
j=1;
80+
indexmassimbalance =[];
81+
for i = 1:length(CF)
82+
if( round(CF(i), 2)~= round(CR(i), 2) & ~isnan(CF(i)))
83+
indexmassimbalance(j) = i;
84+
j = j + 1;
85+
end
86+
end
87+
% if isempty(indexmassimbalance)
88+
% disp('All reactions that do not include an R-group are mass-balanced.');
89+
% else
90+
% fprintf('%s is not mass-balance\n', cell2mat(model.rxns(indexmassimbalance)));
91+
% end
92+
if ~isempty(indexmassimbalance)
93+
94+
dataTable = [];
95+
96+
for a = indexmassimbalance%[95, 182, 384, 465, 3090, 3180, 3386, 3487, 4008]
97+
if isfield(model,'rxnFormulas')
98+
Formula = model.rxnFormulas(findRxnIDs(model, cf{a, 1}(1).reaction));
99+
else
100+
Formula = printRxnFormula(model, cf{a, 1}(1).reaction);
101+
end
102+
ForwardMass = sum(cf{a, 1}(1).mass);
103+
ReverseMass = sum(cr{a, 1}(1).mass);
104+
105+
% Accumulate data
106+
dataRow = [a, Formula, ReverseMass, ForwardMass];
107+
dataTable = [dataTable; dataRow];
108+
end
109+
110+
% Create a table after the loop
111+
variableNames = {'rxn number', 'Formula', 'Reverse Mass', 'Forward Mass'};
112+
resultTable = array2table(dataTable, 'VariableNames', variableNames);
113+
disp(resultTable);
114+
else
115+
disp('All reactions that do not include an R-group are mass-balanced.')
116+
end
117+
%% linear programming using lsqnonneg method
118+
119+
N = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
120+
A = N';
121+
% Objective function: L2 regularization
122+
objective = @(x) sum(x.^2);
123+
124+
% Nonlinear equality constraint: A*x = 0
125+
nonlinearConstraint = @(x) A*x;
126+
127+
% Initial guess for x (make sure it satisfies A*x = 0 and x > 0)
128+
x0 = ones(size(A, 2), 1);
129+
130+
% Non-negative least squares
131+
x = lsqnonneg(A, zeros(size(A, 1), 1));
132+
133+
% Display the result
134+
if(x >= 0)
135+
LeftNullSpace_nonzero = nnz(x)
136+
% figure('Renderer', 'painters', 'Position', [10 10 1600 800])
137+
% bar(x,'FaceColor', [1, 0, 0], 'FaceAlpha',0.5)
138+
% xlabel('met Index', FontSize=14, FontWeight='bold');
139+
% ylabel('Left null space value', FontSize=14, FontWeight='bold');
140+
else
141+
disp('There is no strictly positive left- null space')
142+
end
143+
%% set undifined molecularweight to zero
144+
for i = 1: length(molecularWeights)
145+
if isnan(molecularWeights(i))
146+
molecularWeights(i) = 0;
147+
end
148+
end
149+
T = table((1:length(x))', model.mets, model.metFormulas, molecularWeights, x, 'VariableNames',...
150+
{'ID','met', 'met formula', 'molecular weigth', 'left null space'});
151+
[~, idx] = sortrows(T, {'molecular weigth', 'left null space'}, {'ascend', 'ascend'});
152+
sortedTable = T(idx, :);
153+
var4Values = sortedTable.("molecular weigth");
154+
var5Values = sortedTable.("left null space");
155+
156+
nonzeroIndices = find(var4Values ~= 0);
157+
%% Removing metabolites that contain R-group
158+
y = var4Values(nonzeroIndices(1):end);
159+
x = var5Values(nonzeroIndices(1):end);
160+
161+
%% Detect and remove outliers using IQR method
162+
x_std = std(x);
163+
y_std = std(y);
164+
165+
x_median = median(x);
166+
y_median = median(y);
167+
168+
% Define a threshold for outliers (e.g., 7 times the standard deviation)
169+
threshold = 7;
170+
171+
% Find indices of outliers
172+
outliers_x = find(abs(x - x_median) > threshold * x_std);
173+
outliers_y = find(abs(y - y_median) > threshold * y_std);
174+
175+
% Combine outlier indices
176+
outliers_indices = unique([outliers_x; outliers_y]);
177+
178+
% Remove outliers from the data
179+
x_no_outliers = x;
180+
y_no_outliers = y;
181+
x_no_outliers(outliers_indices) = [];
182+
y_no_outliers(outliers_indices) = [];
183+
184+
% Perform polynomial regression on data without outliers
185+
degree = 1; % Adjust the degree of the polynomial as needed
186+
coefficients_poly = polyfit(x_no_outliers, y_no_outliers, degree);
187+
188+
% Evaluate the polynomial at various x values for plotting
189+
x_fit_poly = linspace(min(x_no_outliers), max(x_no_outliers), 100);
190+
y_fit_poly = polyval(coefficients_poly, x_fit_poly);
191+
192+
%% Plot the original data and the fitted polynomial
193+
figure('Renderer', 'painters', 'Position', [10 10 1600 800])
194+
plot(x, y, 'o', 'DisplayName', 'Original Data');
195+
hold on;
196+
197+
% Plot the data without outliers
198+
plot(x_no_outliers, y_no_outliers, 'x', 'DisplayName', 'Data without Outliers');
199+
200+
% Plot the fitted polynomial
201+
plot(x_fit_poly, y_fit_poly, '-', 'DisplayName', 'Fitted Line');
202+
hold off
203+
legend('Location', 'Best');
204+
xlabel('left null-space');
205+
ylabel('molecularWeights');
206+
title('Polynomial Regression');
207+
208+
%% Display the coefficients
209+
% Construct the polynomial equation as a string
210+
degree = length(coefficients_poly) - 1;
211+
equation_str = 'y = ';
212+
for i = degree:-1:1
213+
equation_str = [equation_str num2str(coefficients_poly(degree - i + 1)) ' * x^' num2str(i) ' + '];
214+
end
215+
equation_str = [equation_str num2str(coefficients_poly(end))];
216+
217+
% Display the polynomial equation
218+
disp('Fitted Polynomial Equation:');
219+
disp(equation_str);
220+
221+
for i = 1:nonzeroIndices(1)-1
222+
var4Values(i) = coefficients_poly(1) * var5Values(i) + coefficients_poly(2);
223+
end
224+
sortedTable.("molecular weigth") = var4Values;
225+
ST=sortrows(sortedTable, {'ID'}, {'ascend'});
226+
%% calculate mass again for metabolite contain R-group using left null space
227+
molecularWeights = ST.("molecular weigth");
228+
for i = 1:numReactions
229+
% Get row indices and stoichiometric coefficients for the current reaction
230+
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));
231+
232+
% Identify substrates and products
233+
substrates = model.mets(rowIndices(stoichiometry < 0));
234+
products = model.mets(rowIndices(stoichiometry > 0));
235+
236+
% Calculate mass of substrates and products using molecular weights
237+
substrateCoefficients = stoichiometry(stoichiometry < 0);
238+
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));
239+
240+
productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));
241+
242+
% Store results in cf and cr
243+
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
244+
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
245+
end
246+
for i= 1:length(cf)
247+
Cf(i) = sum(cf{i}(1).mass);
248+
Cr(i) = sum(cr{i}(1).mass);
249+
end
250+
CF = Cf';
251+
CR = Cr';
252+
substratesMass = CF;
253+
productsMass = CR;
1.86 MB
Binary file not shown.

0 commit comments

Comments
 (0)