Merge pull request #2385 from Sami990/master

rmtfleming · web-flow · commit 4978a9854f0c · 2025-03-05T10:03:16.000Z
functions that calculate weights of cf and cr in eFBA
diff --git a/src/visualization/entropicFBA/calculateGeneWeight.m b/src/visualization/entropicFBA/calculateGeneWeight.m
@@ -0,0 +1,92 @@
+function geneWeight = calculateGeneWeight(model,Transcriptomic, Threshold)
+% Inputs:
+%   model              - A COBRA model with mandatory fields: grRules and SConsistentRxnBool.
+%   Transcriptomic     - A table with entrezID and geneExpression values.
+%   Threshold          - Threshold for transcriptomic data.
+
+
+% Note: geneWeight is a value that can be used in entropicFBA to assign a weight
+% that corresponds to the gene expression value to internal reactions. If you
+% want to use this value for this purpose, use the following formula: 
+% cr = cf = -log(geneWeight + 1e-8) + 1 - ci/g
+% Default values: g = 2 , ci = 0
+
+% Author : Samira Ranjbar 2024
+% (The algoithm is explained here:
+% https://doi.org/10.1016/j.isci.2023.106201)
+%-------------------------------------------------
+if isfield(model,'grRules')
+    rule = model.grRules(model.SConsistentRxnBool);
+else
+    error('grRules is missiming')
+end
+if ~isfield(model,'SConsistentRxnBool')
+    error('grRules is missiming')
+end
+
+orList =[];
+% Divide the rule by "or" and iterate over each resulting subrule
+for i = 1:length(rule)
+    % if ~isempty(rule(i))
+    subrules = strsplit(strjoin(rule(i)), ' or ');
+
+    for subruleIndex = 1:length(subrules)
+        % Split each subrule by "and" to get a list of genes
+        genes = strsplit(subrules{subruleIndex}, ' and ');
+
+        g_vector = {};
+
+        % Process each gene
+        for geneIndex = 1:length(genes)
+            gene = strrep(genes{geneIndex}, '(', '');  % Remove "("
+            gene = strrep(gene, ')', '');  % Remove ")"
+            gene = strrep(gene, ' ', '');  % Remove spaces
+            g_vector{geneIndex} = [gene];
+        end
+        % g_table = cell2table(g_vector', 'VariableNames', {'GeneID'});
+
+        % Evaluate the minimum expression value
+        if contains(g_vector,'rec1_')
+            fpkmTable1 = Transcriptomic(:,[1,2]);
+            values = fpkmTable1{ismember(strrep(cellstr(num2str(fpkmTable1.entrezID)),' ',''), strrep(cellstr(g_vector),'rec1_','')), 2};
+        elseif contains(g_vector,'rec2_')
+            fpkmTable2 = Transcriptomic(:,[1,3]);
+            values = fpkmTable2{ismember(strrep(cellstr(num2str(fpkmTable2.entrezID)),' ',''), strrep(cellstr(g_vector),'rec2_','')), 2};
+        else
+            values = Transcriptomic{ismember(strrep(cellstr((Transcriptomic.entrezID)),' ',''), cellstr(g_vector)), 2};
+
+        end
+        value = min(values);
+
+        % Apply the threshold
+        if value < Threshold
+            value = 0;
+        end
+
+        % Add the minimum to the list
+        orList = vertcat(orList, value);
+        
+    % end
+    end
+        expList{i,1} = orList';
+        orList = [];
+        expList{i,2} = sum(expList{i,1});
+        % Return the sum of the list
+        % result = sum(orList);
+end
+
+
+ % Access the first and second columns and convert them to numeric arrays
+firstColumn = expList(:, 1);
+secondColumn = cell2mat(expList(:, 2));
+medianValue = median(secondColumn);
+% Find empty cells in the first column
+isEmptyFirstColumn = cellfun('isempty', firstColumn);
+
+% Replace empty cells in the first column with the corresponding row median from the second column
+for i = 1:numel(firstColumn)
+    if isEmptyFirstColumn(i) & model.SConsistentRxnBool
+        secondColumn(i) = medianValue;
+    end
+end
+geneWeight = secondColumn;
diff --git a/src/visualization/entropicFBA/calculateReactionMasses.m b/src/visualization/entropicFBA/calculateReactionMasses.m
@@ -0,0 +1,253 @@
+function [substratesMass, productsMass] = calculateReactionMasses(model)
+% This function calculates mass related to each reaction, products mass and
+% substrates mass,using the left null space of stochiometric matrix,
+% useful to unbiased flux through reactions with massive
+% metabolites in entropicFBA
+
+% Author: Samira Ranjbar 2024
+%% % Check if metFormula field is provied in the model, if not add it using model0(Recon3DModel_301_xomics_input)
+
+if ~isfield(model,'metFormulas')
+    model0 = readCbModel('Recon3DModel_301_xomics_input.mat');
+    % Loop through each entry in the first list
+    for i = 1:numel(model.mets)      
+        % Find the corresponding entry in the second list
+        index = find(ismember(model0.mets, regexprep(model.mets{i}, ',(rec[12]|comm[12])\]$', ']')));
+    
+        % If a matching entry is found, update the formula
+        if ~isempty(index)
+            model.metFormulas{i,1} = model0.metFormulas{index};
+        end
+    end
+    
+    % Verify the validity of the updated model
+    if verifyModel(model, 'simpleCheck', true)
+        % Save the updated model
+        % writeCbModel(model, 'updated_model.mat');
+        disp('Model successfully updated and saved.');
+    else
+        disp('The updated model is not valid.');
+    end
+end
+%% There is a wrong formula for this a metabolite in the generic model
+model.metFormulas(findMetIDs(model,model.mets(contains(model.mets,'paps[')))) = {'C10H11N5O13P2S'};
+% model.metFormulas(findMetIDs(model,'paps[c,rec2]')) = {'C10H11N5O13P2S'};
+% model.metFormulas(findMetIDs(model,'paps[g,rec1]')) = {'C10H11N5O13P2S'};
+% model.metFormulas(findMetIDs(model,'paps[g,rec2]')) = {'C10H11N5O13P2S'};
+% model.metFormulas(findMetIDs(model,'paps[c]')) = {'C10H11N5O13P2S'};
+% model.metFormulas(findMetIDs(model,'paps[g]')) = {'C10H11N5O13P2S'};
+ 
+%% Calculate molecular weights using the computeMW function
+metList = model.mets;
+[molecularWeights, ~] = computeMW(model, metList);
+% model = readCbModel('scRecon3D_2-1.mat')
+% Get the number of consistent reactions in the model
+numReactions = numel(model.rxns(model.SConsistentRxnBool));
+sConsistent = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
+rxnConsistent = model.rxns(model.SConsistentRxnBool);
+% Initialize variables to store results
+cf = cell(numReactions, 1); % Cell array for substrate mass results
+cr = cell(numReactions, 1); % Cell array for product mass results
+
+
+% Iterate through all reactions
+for i = 1:numReactions
+    % Get row indices and stoichiometric coefficients for the current reaction
+    [rowIndices, ~, stoichiometry] = find(sConsistent(:, i));
+
+    % Identify substrates and products
+    substrates = model.mets(rowIndices(stoichiometry < 0));
+    products = model.mets(rowIndices(stoichiometry > 0));
+
+    % Calculate mass of substrates and products using molecular weights
+    substrateCoefficients = stoichiometry(stoichiometry < 0);
+    substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));
+
+    productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));
+
+    % Store results in cf and cr
+    cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
+    cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
+end
+for i= 1:length(cf)
+    Cf(i) = sum(cf{i}(1).mass);
+    Cr(i) = sum(cr{i}(1).mass); 
+end
+CF = Cf';
+CR = Cr';
+%% Check if any mass imbalance happen
+j=1;
+indexmassimbalance =[];
+for i = 1:length(CF)
+    if( round(CF(i), 2)~= round(CR(i), 2) & ~isnan(CF(i)))
+        indexmassimbalance(j) = i;
+        j = j + 1;
+    end
+end
+% if isempty(indexmassimbalance)
+%     disp('All reactions that do not include an R-group are mass-balanced.');
+% else
+%     fprintf('%s is not mass-balance\n', cell2mat(model.rxns(indexmassimbalance)));
+% end
+if ~isempty(indexmassimbalance)
+    
+    dataTable = [];
+    
+    for a = indexmassimbalance%[95, 182, 384, 465, 3090, 3180, 3386, 3487, 4008]
+        if isfield(model,'rxnFormulas')
+            Formula = model.rxnFormulas(findRxnIDs(model, cf{a, 1}(1).reaction));
+        else
+            Formula = printRxnFormula(model, cf{a, 1}(1).reaction);
+        end
+        ForwardMass = sum(cf{a, 1}(1).mass);
+        ReverseMass = sum(cr{a, 1}(1).mass);
+    
+        % Accumulate data
+        dataRow = [a, Formula, ReverseMass, ForwardMass];
+        dataTable = [dataTable; dataRow];
+    end
+    
+    % Create a table after the loop
+    variableNames = {'rxn number', 'Formula', 'Reverse Mass', 'Forward Mass'};
+    resultTable = array2table(dataTable, 'VariableNames', variableNames);
+    disp(resultTable);
+else
+    disp('All reactions that do not include an R-group are mass-balanced.')
+end
+%% linear programming using lsqnonneg method
+
+N = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
+A = N';
+% Objective function: L2 regularization
+objective = @(x) sum(x.^2);
+
+% Nonlinear equality constraint: A*x = 0
+nonlinearConstraint = @(x) A*x;
+
+% Initial guess for x (make sure it satisfies A*x = 0 and x > 0)
+x0 = ones(size(A, 2), 1);
+
+% Non-negative least squares
+x = lsqnonneg(A, zeros(size(A, 1), 1));
+
+% Display the result
+if(x >= 0)
+    LeftNullSpace_nonzero = nnz(x)
+    % figure('Renderer', 'painters', 'Position', [10 10 1600 800])
+    % bar(x,'FaceColor', [1, 0, 0], 'FaceAlpha',0.5)
+    % xlabel('met Index', FontSize=14, FontWeight='bold');
+    % ylabel('Left null space value', FontSize=14, FontWeight='bold');
+else
+    disp('There is no strictly positive left- null space')
+end
+%% set undifined molecularweight to zero
+for i = 1: length(molecularWeights)
+    if isnan(molecularWeights(i))
+        molecularWeights(i) = 0;
+    end
+end
+T = table((1:length(x))', model.mets, model.metFormulas, molecularWeights, x, 'VariableNames',...
+{'ID','met',	'met formula',	'molecular weigth',	'left null space'});
+[~, idx] = sortrows(T, {'molecular weigth', 'left null space'}, {'ascend', 'ascend'});
+sortedTable = T(idx, :);
+var4Values = sortedTable.("molecular weigth");
+var5Values = sortedTable.("left null space");
+
+nonzeroIndices = find(var4Values ~= 0);
+%% Removing metabolites that contain R-group
+y = var4Values(nonzeroIndices(1):end);
+x = var5Values(nonzeroIndices(1):end);
+
+%% Detect and remove outliers using IQR method
+x_std = std(x);
+y_std = std(y);
+
+x_median = median(x);
+y_median = median(y);
+
+% Define a threshold for outliers (e.g., 7 times the standard deviation)
+threshold = 7;
+
+% Find indices of outliers
+outliers_x = find(abs(x - x_median) > threshold * x_std);
+outliers_y = find(abs(y - y_median) > threshold * y_std);
+
+% Combine outlier indices
+outliers_indices = unique([outliers_x; outliers_y]);
+
+% Remove outliers from the data
+x_no_outliers = x;
+y_no_outliers = y;
+x_no_outliers(outliers_indices) = [];
+y_no_outliers(outliers_indices) = [];
+
+% Perform polynomial regression on data without outliers
+degree = 1; % Adjust the degree of the polynomial as needed
+coefficients_poly = polyfit(x_no_outliers, y_no_outliers, degree);
+
+% Evaluate the polynomial at various x values for plotting
+x_fit_poly = linspace(min(x_no_outliers), max(x_no_outliers), 100);
+y_fit_poly = polyval(coefficients_poly, x_fit_poly);
+
+%% Plot the original data and the fitted polynomial
+figure('Renderer', 'painters', 'Position', [10 10 1600 800])
+plot(x, y, 'o', 'DisplayName', 'Original Data');
+hold on;
+
+% Plot the data without outliers
+plot(x_no_outliers, y_no_outliers, 'x', 'DisplayName', 'Data without Outliers');
+
+% Plot the fitted polynomial
+plot(x_fit_poly, y_fit_poly, '-', 'DisplayName', 'Fitted Line');
+hold off
+legend('Location', 'Best');
+xlabel('left null-space');
+ylabel('molecularWeights');
+title('Polynomial Regression');
+
+%% Display the coefficients
+% Construct the polynomial equation as a string
+degree = length(coefficients_poly) - 1;
+equation_str = 'y = ';
+for i = degree:-1:1
+    equation_str = [equation_str num2str(coefficients_poly(degree - i + 1)) ' * x^' num2str(i) ' + '];
+end
+equation_str = [equation_str num2str(coefficients_poly(end))];
+
+% Display the polynomial equation
+disp('Fitted Polynomial Equation:');
+disp(equation_str);
+
+for i = 1:nonzeroIndices(1)-1
+     var4Values(i) =  coefficients_poly(1) * var5Values(i) + coefficients_poly(2);
+end
+sortedTable.("molecular weigth") = var4Values;
+ST=sortrows(sortedTable, {'ID'}, {'ascend'});
+%% calculate mass again for metabolite contain R-group using left null space
+molecularWeights = ST.("molecular weigth");
+for i = 1:numReactions
+    % Get row indices and stoichiometric coefficients for the current reaction
+    [rowIndices, ~, stoichiometry] = find(sConsistent(:, i));
+
+    % Identify substrates and products
+    substrates = model.mets(rowIndices(stoichiometry < 0));
+    products = model.mets(rowIndices(stoichiometry > 0));
+
+    % Calculate mass of substrates and products using molecular weights
+    substrateCoefficients = stoichiometry(stoichiometry < 0);
+    substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));
+
+    productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));
+
+    % Store results in cf and cr
+    cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
+    cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
+end
+for i= 1:length(cf)
+    Cf(i) = sum(cf{i}(1).mass);
+    Cr(i) = sum(cr{i}(1).mass); 
+end
+CF = Cf';
+CR = Cr';
+substratesMass = CF;
+productsMass = CR;
diff --git a/src/visualization/entropicFBA/tutorial_eFBA.mlx b/src/visualization/entropicFBA/tutorial_eFBA.mlx