Skip to content

Commit

Permalink
Merge pull request #2385 from Sami990/master
Browse files Browse the repository at this point in the history
functions that calculate weights of cf and cr in eFBA
  • Loading branch information
rmtfleming authored Mar 5, 2025
2 parents 0478293 + 3c0cb15 commit 4978a98
Show file tree
Hide file tree
Showing 3 changed files with 345 additions and 0 deletions.
92 changes: 92 additions & 0 deletions src/visualization/entropicFBA/calculateGeneWeight.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
function geneWeight = calculateGeneWeight(model,Transcriptomic, Threshold)
% Inputs:
% model - A COBRA model with mandatory fields: grRules and SConsistentRxnBool.
% Transcriptomic - A table with entrezID and geneExpression values.
% Threshold - Threshold for transcriptomic data.


% Note: geneWeight is a value that can be used in entropicFBA to assign a weight
% that corresponds to the gene expression value to internal reactions. If you
% want to use this value for this purpose, use the following formula:
% cr = cf = -log(geneWeight + 1e-8) + 1 - ci/g
% Default values: g = 2 , ci = 0

% Author : Samira Ranjbar 2024
% (The algoithm is explained here:
% https://doi.org/10.1016/j.isci.2023.106201)
%-------------------------------------------------
if isfield(model,'grRules')
rule = model.grRules(model.SConsistentRxnBool);
else
error('grRules is missiming')
end
if ~isfield(model,'SConsistentRxnBool')
error('grRules is missiming')
end

orList =[];
% Divide the rule by "or" and iterate over each resulting subrule
for i = 1:length(rule)
% if ~isempty(rule(i))
subrules = strsplit(strjoin(rule(i)), ' or ');

for subruleIndex = 1:length(subrules)
% Split each subrule by "and" to get a list of genes
genes = strsplit(subrules{subruleIndex}, ' and ');

g_vector = {};

% Process each gene
for geneIndex = 1:length(genes)
gene = strrep(genes{geneIndex}, '(', ''); % Remove "("
gene = strrep(gene, ')', ''); % Remove ")"
gene = strrep(gene, ' ', ''); % Remove spaces
g_vector{geneIndex} = [gene];
end
% g_table = cell2table(g_vector', 'VariableNames', {'GeneID'});

% Evaluate the minimum expression value
if contains(g_vector,'rec1_')
fpkmTable1 = Transcriptomic(:,[1,2]);
values = fpkmTable1{ismember(strrep(cellstr(num2str(fpkmTable1.entrezID)),' ',''), strrep(cellstr(g_vector),'rec1_','')), 2};
elseif contains(g_vector,'rec2_')
fpkmTable2 = Transcriptomic(:,[1,3]);
values = fpkmTable2{ismember(strrep(cellstr(num2str(fpkmTable2.entrezID)),' ',''), strrep(cellstr(g_vector),'rec2_','')), 2};
else
values = Transcriptomic{ismember(strrep(cellstr((Transcriptomic.entrezID)),' ',''), cellstr(g_vector)), 2};

end
value = min(values);

% Apply the threshold
if value < Threshold
value = 0;
end

% Add the minimum to the list
orList = vertcat(orList, value);

% end
end
expList{i,1} = orList';
orList = [];
expList{i,2} = sum(expList{i,1});
% Return the sum of the list
% result = sum(orList);
end


% Access the first and second columns and convert them to numeric arrays
firstColumn = expList(:, 1);
secondColumn = cell2mat(expList(:, 2));
medianValue = median(secondColumn);
% Find empty cells in the first column
isEmptyFirstColumn = cellfun('isempty', firstColumn);

% Replace empty cells in the first column with the corresponding row median from the second column
for i = 1:numel(firstColumn)
if isEmptyFirstColumn(i) & model.SConsistentRxnBool
secondColumn(i) = medianValue;
end
end
geneWeight = secondColumn;
253 changes: 253 additions & 0 deletions src/visualization/entropicFBA/calculateReactionMasses.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
function [substratesMass, productsMass] = calculateReactionMasses(model)
% This function calculates mass related to each reaction, products mass and
% substrates mass,using the left null space of stochiometric matrix,
% useful to unbiased flux through reactions with massive
% metabolites in entropicFBA

% Author: Samira Ranjbar 2024
%% % Check if metFormula field is provied in the model, if not add it using model0(Recon3DModel_301_xomics_input)

if ~isfield(model,'metFormulas')
model0 = readCbModel('Recon3DModel_301_xomics_input.mat');
% Loop through each entry in the first list
for i = 1:numel(model.mets)
% Find the corresponding entry in the second list
index = find(ismember(model0.mets, regexprep(model.mets{i}, ',(rec[12]|comm[12])\]$', ']')));

% If a matching entry is found, update the formula
if ~isempty(index)
model.metFormulas{i,1} = model0.metFormulas{index};
end
end

% Verify the validity of the updated model
if verifyModel(model, 'simpleCheck', true)
% Save the updated model
% writeCbModel(model, 'updated_model.mat');
disp('Model successfully updated and saved.');
else
disp('The updated model is not valid.');
end
end
%% There is a wrong formula for this a metabolite in the generic model
model.metFormulas(findMetIDs(model,model.mets(contains(model.mets,'paps[')))) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[c,rec2]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g,rec1]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g,rec2]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[c]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g]')) = {'C10H11N5O13P2S'};

%% Calculate molecular weights using the computeMW function
metList = model.mets;
[molecularWeights, ~] = computeMW(model, metList);
% model = readCbModel('scRecon3D_2-1.mat')
% Get the number of consistent reactions in the model
numReactions = numel(model.rxns(model.SConsistentRxnBool));
sConsistent = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
rxnConsistent = model.rxns(model.SConsistentRxnBool);
% Initialize variables to store results
cf = cell(numReactions, 1); % Cell array for substrate mass results
cr = cell(numReactions, 1); % Cell array for product mass results


% Iterate through all reactions
for i = 1:numReactions
% Get row indices and stoichiometric coefficients for the current reaction
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));

% Identify substrates and products
substrates = model.mets(rowIndices(stoichiometry < 0));
products = model.mets(rowIndices(stoichiometry > 0));

% Calculate mass of substrates and products using molecular weights
substrateCoefficients = stoichiometry(stoichiometry < 0);
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));

productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));

% Store results in cf and cr
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
end
for i= 1:length(cf)
Cf(i) = sum(cf{i}(1).mass);
Cr(i) = sum(cr{i}(1).mass);
end
CF = Cf';
CR = Cr';
%% Check if any mass imbalance happen
j=1;
indexmassimbalance =[];
for i = 1:length(CF)
if( round(CF(i), 2)~= round(CR(i), 2) & ~isnan(CF(i)))
indexmassimbalance(j) = i;
j = j + 1;
end
end
% if isempty(indexmassimbalance)
% disp('All reactions that do not include an R-group are mass-balanced.');
% else
% fprintf('%s is not mass-balance\n', cell2mat(model.rxns(indexmassimbalance)));
% end
if ~isempty(indexmassimbalance)

dataTable = [];

for a = indexmassimbalance%[95, 182, 384, 465, 3090, 3180, 3386, 3487, 4008]
if isfield(model,'rxnFormulas')
Formula = model.rxnFormulas(findRxnIDs(model, cf{a, 1}(1).reaction));
else
Formula = printRxnFormula(model, cf{a, 1}(1).reaction);
end
ForwardMass = sum(cf{a, 1}(1).mass);
ReverseMass = sum(cr{a, 1}(1).mass);

% Accumulate data
dataRow = [a, Formula, ReverseMass, ForwardMass];
dataTable = [dataTable; dataRow];
end

% Create a table after the loop
variableNames = {'rxn number', 'Formula', 'Reverse Mass', 'Forward Mass'};
resultTable = array2table(dataTable, 'VariableNames', variableNames);
disp(resultTable);
else
disp('All reactions that do not include an R-group are mass-balanced.')
end
%% linear programming using lsqnonneg method

N = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
A = N';
% Objective function: L2 regularization
objective = @(x) sum(x.^2);

% Nonlinear equality constraint: A*x = 0
nonlinearConstraint = @(x) A*x;

% Initial guess for x (make sure it satisfies A*x = 0 and x > 0)
x0 = ones(size(A, 2), 1);

% Non-negative least squares
x = lsqnonneg(A, zeros(size(A, 1), 1));

% Display the result
if(x >= 0)
LeftNullSpace_nonzero = nnz(x)
% figure('Renderer', 'painters', 'Position', [10 10 1600 800])
% bar(x,'FaceColor', [1, 0, 0], 'FaceAlpha',0.5)
% xlabel('met Index', FontSize=14, FontWeight='bold');
% ylabel('Left null space value', FontSize=14, FontWeight='bold');
else
disp('There is no strictly positive left- null space')
end
%% set undifined molecularweight to zero
for i = 1: length(molecularWeights)
if isnan(molecularWeights(i))
molecularWeights(i) = 0;
end
end
T = table((1:length(x))', model.mets, model.metFormulas, molecularWeights, x, 'VariableNames',...
{'ID','met', 'met formula', 'molecular weigth', 'left null space'});
[~, idx] = sortrows(T, {'molecular weigth', 'left null space'}, {'ascend', 'ascend'});
sortedTable = T(idx, :);
var4Values = sortedTable.("molecular weigth");
var5Values = sortedTable.("left null space");

nonzeroIndices = find(var4Values ~= 0);
%% Removing metabolites that contain R-group
y = var4Values(nonzeroIndices(1):end);
x = var5Values(nonzeroIndices(1):end);

%% Detect and remove outliers using IQR method
x_std = std(x);
y_std = std(y);

x_median = median(x);
y_median = median(y);

% Define a threshold for outliers (e.g., 7 times the standard deviation)
threshold = 7;

% Find indices of outliers
outliers_x = find(abs(x - x_median) > threshold * x_std);
outliers_y = find(abs(y - y_median) > threshold * y_std);

% Combine outlier indices
outliers_indices = unique([outliers_x; outliers_y]);

% Remove outliers from the data
x_no_outliers = x;
y_no_outliers = y;
x_no_outliers(outliers_indices) = [];
y_no_outliers(outliers_indices) = [];

% Perform polynomial regression on data without outliers
degree = 1; % Adjust the degree of the polynomial as needed
coefficients_poly = polyfit(x_no_outliers, y_no_outliers, degree);

% Evaluate the polynomial at various x values for plotting
x_fit_poly = linspace(min(x_no_outliers), max(x_no_outliers), 100);
y_fit_poly = polyval(coefficients_poly, x_fit_poly);

%% Plot the original data and the fitted polynomial
figure('Renderer', 'painters', 'Position', [10 10 1600 800])
plot(x, y, 'o', 'DisplayName', 'Original Data');
hold on;

% Plot the data without outliers
plot(x_no_outliers, y_no_outliers, 'x', 'DisplayName', 'Data without Outliers');

% Plot the fitted polynomial
plot(x_fit_poly, y_fit_poly, '-', 'DisplayName', 'Fitted Line');
hold off
legend('Location', 'Best');
xlabel('left null-space');
ylabel('molecularWeights');
title('Polynomial Regression');

%% Display the coefficients
% Construct the polynomial equation as a string
degree = length(coefficients_poly) - 1;
equation_str = 'y = ';
for i = degree:-1:1
equation_str = [equation_str num2str(coefficients_poly(degree - i + 1)) ' * x^' num2str(i) ' + '];
end
equation_str = [equation_str num2str(coefficients_poly(end))];

% Display the polynomial equation
disp('Fitted Polynomial Equation:');
disp(equation_str);

for i = 1:nonzeroIndices(1)-1
var4Values(i) = coefficients_poly(1) * var5Values(i) + coefficients_poly(2);
end
sortedTable.("molecular weigth") = var4Values;
ST=sortrows(sortedTable, {'ID'}, {'ascend'});
%% calculate mass again for metabolite contain R-group using left null space
molecularWeights = ST.("molecular weigth");
for i = 1:numReactions
% Get row indices and stoichiometric coefficients for the current reaction
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));

% Identify substrates and products
substrates = model.mets(rowIndices(stoichiometry < 0));
products = model.mets(rowIndices(stoichiometry > 0));

% Calculate mass of substrates and products using molecular weights
substrateCoefficients = stoichiometry(stoichiometry < 0);
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));

productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));

% Store results in cf and cr
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
end
for i= 1:length(cf)
Cf(i) = sum(cf{i}(1).mass);
Cr(i) = sum(cr{i}(1).mass);
end
CF = Cf';
CR = Cr';
substratesMass = CF;
productsMass = CR;
Binary file added src/visualization/entropicFBA/tutorial_eFBA.mlx
Binary file not shown.

0 comments on commit 4978a98

Please sign in to comment.