|
| 1 | +"""Constrain yeast-GEM to anaerobic conditions (pure cobrapy). |
| 2 | +
|
| 3 | +This is a self-contained port of ``code/otherChanges/anaerobicModel.m`` (and |
| 4 | +the ``changeAminoAcidRatio`` / ``sumBioMass`` / ``rescalePseudoReaction`` |
| 5 | +helpers it relies on) to cobrapy. It has no dependency on the RAVEN toolbox |
| 6 | +or raven-python; the only third-party requirement is ``cobra``. |
| 7 | +
|
| 8 | +By default yeast-GEM represents aerobic metabolism. :func:`anaerobic_model` |
| 9 | +edits exchange reactions and a few intracellular reactions in place so the |
| 10 | +model reaches the exchange rates measured under anaerobic batch growth on |
| 11 | +minimal glucose media. |
| 12 | +
|
| 13 | +Usage:: |
| 14 | +
|
| 15 | + import cobra |
| 16 | + from anaerobic_model import anaerobic_model |
| 17 | +
|
| 18 | + model = cobra.io.read_sbml_model("model/yeast-GEM.xml") |
| 19 | + anaerobic_model(model) |
| 20 | +""" |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import csv |
| 24 | +import re |
| 25 | +from pathlib import Path |
| 26 | + |
| 27 | +import cobra |
| 28 | + |
| 29 | +# Repo paths: this file lives in ``code/``; the repo root is its parent. |
| 30 | +_REPO_ROOT = Path(__file__).resolve().parent.parent |
| 31 | +_AA_TSV = _REPO_ROOT / "data" / "physiology" / "aminoacid_Bjorkeroth2020.tsv" |
| 32 | + |
| 33 | +# Metabolite/reaction IDs reused across helpers. |
| 34 | +_PROTON_ID = "s_0794" # H+ [cytoplasm] |
| 35 | +_PROTEIN_RXN = "r_4047" # protein pseudoreaction |
| 36 | +_PROTEIN_MET_NAME = "protein" # the protein pseudoreaction product (s_3717) |
| 37 | + |
| 38 | +# Atomic weights, identical to the table in ``sumBioMass.m`` |
| 39 | +# (``parseChemicalFormula``). ``R`` is the generic residue placeholder and |
| 40 | +# carries no mass, matching the MATLAB convention. |
| 41 | +_ELEMENT_WEIGHTS = { |
| 42 | + "C": 12.01, "H": 1.008, "N": 14.007, "O": 15.999, "P": 30.974, |
| 43 | + "S": 32.06, "R": 0.0, "Fe": 55.845, "K": 39.098, "Na": 22.99, |
| 44 | + "Cl": 35.45, "Mn": 54.938, "Zn": 65.38, "Ca": 40.078, "Mg": 24.305, |
| 45 | + "Cu": 63.546, |
| 46 | +} |
| 47 | + |
| 48 | +_FORMULA_TOKEN = re.compile(r"([A-Z][a-z]*)(\d*)") |
| 49 | + |
| 50 | + |
| 51 | +def anaerobic_model(model: cobra.Model) -> cobra.Model: |
| 52 | + """Constrain ``model`` to anaerobic conditions in place. |
| 53 | +
|
| 54 | + Parameters |
| 55 | + ---------- |
| 56 | + model : cobra.Model |
| 57 | + yeast-GEM model, aerobic by default. |
| 58 | +
|
| 59 | + Returns |
| 60 | + ------- |
| 61 | + cobra.Model |
| 62 | + The same model object, modified to match anaerobic conditions. |
| 63 | + """ |
| 64 | + # 1. Cofactor pseudoreaction (r_4598): heme a synthesis requires O2, so |
| 65 | + # remove heme a from the biomass cofactor pool and rebalance protons. |
| 66 | + cofactor_rxn = model.reactions.get_by_id("r_4598") |
| 67 | + _set_coefficient(cofactor_rxn, model.metabolites.get_by_id("s_3714"), 0.0) |
| 68 | + _rebalance_proton(model, cofactor_rxn) |
| 69 | + |
| 70 | + # 2. Switch the protein pseudoreaction to the anaerobic amino-acid ratio. |
| 71 | + _change_amino_acid_ratio(model, aerobic=False) |
| 72 | + |
| 73 | + # 3. Exchange reactions: block O2 uptake, and open the sterol / fatty-acid |
| 74 | + # / vitamin uptakes that are essential supplements for anaerobic growth. |
| 75 | + lower_bounds = { |
| 76 | + "r_1992": 0.0, # oxygen (block uptake) |
| 77 | + "r_1757": -1000.0, # ergosterol |
| 78 | + "r_1915": -1000.0, # lanosterol |
| 79 | + "r_2106": -1000.0, # zymosterol |
| 80 | + "r_2134": -1000.0, # 14-demethyllanosterol |
| 81 | + "r_1994": -1000.0, # palmitoleate |
| 82 | + "r_2189": -1000.0, # oleate |
| 83 | + "r_2137": 0.0, # ergosta-5,7,22,24(28)-tetraen-3beta-ol (block) |
| 84 | + "r_1967": -1000.0, # nicotinate |
| 85 | + "r_1548": -1000.0, # (R)-pantothenate |
| 86 | + } |
| 87 | + for rxn_id, lower_bound in lower_bounds.items(): |
| 88 | + model.reactions.get_by_id(rxn_id).lower_bound = lower_bound |
| 89 | + |
| 90 | + # 4. Block MDH2 (r_0714) and IDP2 (r_0659): repressed/undetected under |
| 91 | + # anaerobic glucose growth. |
| 92 | + for rxn_id in ("r_0714", "r_0659"): |
| 93 | + model.reactions.get_by_id(rxn_id).bounds = (0.0, 0.0) |
| 94 | + |
| 95 | + # 5. Fumarate reductase recycles FADH2 from Ero1-driven disulphide-bond |
| 96 | + # formation; add the net FADH2/FAD/H+ turnover to the biomass reaction. |
| 97 | + biomass_rxn = model.reactions.get_by_id("r_4041") |
| 98 | + fadh2_prod = 0.08 |
| 99 | + biomass_rxn.add_metabolites( |
| 100 | + { |
| 101 | + model.metabolites.get_by_id("s_0689"): fadh2_prod, # FADH2 |
| 102 | + model.metabolites.get_by_id("s_0687"): -fadh2_prod, # FAD |
| 103 | + model.metabolites.get_by_id("s_0794"): -2.0 * fadh2_prod, # H+ |
| 104 | + }, |
| 105 | + combine=True, |
| 106 | + ) |
| 107 | + return model |
| 108 | + |
| 109 | + |
| 110 | +# --- helpers ---------------------------------------------------------------- |
| 111 | + |
| 112 | +def _set_coefficient( |
| 113 | + reaction: cobra.Reaction, metabolite: cobra.Metabolite, value: float |
| 114 | +) -> None: |
| 115 | + """Set an *absolute* stoichiometric coefficient (the ``S(i,j) = x`` |
| 116 | + semantics of the MATLAB code). Setting ``value`` to 0 removes the |
| 117 | + metabolite from the reaction. |
| 118 | + """ |
| 119 | + current = reaction.metabolites.get(metabolite, 0.0) |
| 120 | + delta = float(value) - current |
| 121 | + if delta != 0.0: |
| 122 | + reaction.add_metabolites({metabolite: delta}, combine=True) |
| 123 | + |
| 124 | + |
| 125 | +def _rebalance_proton(model: cobra.Model, reaction: cobra.Reaction) -> None: |
| 126 | + """Set the H+ coefficient so ``reaction`` is charge balanced. |
| 127 | +
|
| 128 | + Mirrors the H+ correction repeated in ``anaerobicModel.m`` and |
| 129 | + ``rescalePseudoReaction.m``: zero the proton, then set it to the negative |
| 130 | + of the remaining charge imbalance. Missing charges (cobra ``None``) are |
| 131 | + treated as zero, matching MATLAB's ``'omitnan'``. |
| 132 | + """ |
| 133 | + proton = model.metabolites.get_by_id(_PROTON_ID) |
| 134 | + _set_coefficient(reaction, proton, 0.0) |
| 135 | + imbalance = sum( |
| 136 | + (met.charge or 0.0) * coeff for met, coeff in reaction.metabolites.items() |
| 137 | + ) |
| 138 | + _set_coefficient(reaction, proton, -imbalance) |
| 139 | + |
| 140 | + |
| 141 | +def _change_amino_acid_ratio(model: cobra.Model, *, aerobic: bool = True) -> None: |
| 142 | + """Rewrite the protein pseudoreaction's amino-acid ratios. |
| 143 | +
|
| 144 | + Port of ``changeAminoAcidRatio.m``. The protein mass is snapshotted, the |
| 145 | + tRNA stoichiometries in ``r_4047`` are replaced with the aerobic or |
| 146 | + anaerobic ratios from ``aminoacid_Bjorkeroth2020.tsv``, and the protein |
| 147 | + pseudoreaction is rescaled so the protein fraction returns to its |
| 148 | + pre-switch value. |
| 149 | + """ |
| 150 | + target_protein = _protein_mass(model) |
| 151 | + |
| 152 | + protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN) |
| 153 | + # TSV columns (0-indexed): 0 aa, 1 substrate met, 2 product met, 3 MW, |
| 154 | + # 4 aerobic ratio, 5 anaerobic ratio. |
| 155 | + ratio_column = 4 if aerobic else 5 |
| 156 | + for row in _read_amino_acid_ratios(): |
| 157 | + ratio = float(row[ratio_column]) |
| 158 | + substrate = model.metabolites.get_by_id(row[1]) |
| 159 | + product = model.metabolites.get_by_id(row[2]) |
| 160 | + _set_coefficient(protein_rxn, substrate, -ratio) |
| 161 | + _set_coefficient(protein_rxn, product, ratio) |
| 162 | + |
| 163 | + factor = target_protein / _protein_mass(model) |
| 164 | + _rescale_protein(model, factor) |
| 165 | + |
| 166 | + |
| 167 | +def _protein_mass(model: cobra.Model) -> float: |
| 168 | + """Protein fraction [g/gDW], port of ``sumBioMass.m`` ``getFraction('P')``. |
| 169 | +
|
| 170 | + Sums the molecular weight of the protein pseudoreaction substrates, with |
| 171 | + two protons (2.016) removed from each charged-tRNA formula. |
| 172 | + """ |
| 173 | + protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN) |
| 174 | + total = 0.0 |
| 175 | + for met, coeff in protein_rxn.metabolites.items(): |
| 176 | + if coeff < 0: # substrate |
| 177 | + total += -coeff * (_formula_weight(met) - 2.016) |
| 178 | + return total / 1000.0 |
| 179 | + |
| 180 | + |
| 181 | +def _rescale_protein(model: cobra.Model, factor: float) -> None: |
| 182 | + """Multiply every coefficient in the protein pseudoreaction by ``factor`` |
| 183 | + except the ``protein`` product, then rebalance H+. |
| 184 | +
|
| 185 | + Port of ``rescalePseudoReaction.m`` for the protein component. |
| 186 | + """ |
| 187 | + protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN) |
| 188 | + deltas = { |
| 189 | + met: (factor - 1.0) * coeff |
| 190 | + for met, coeff in protein_rxn.metabolites.items() |
| 191 | + if met.name != _PROTEIN_MET_NAME |
| 192 | + } |
| 193 | + if deltas: |
| 194 | + protein_rxn.add_metabolites(deltas, combine=True) |
| 195 | + _rebalance_proton(model, protein_rxn) |
| 196 | + |
| 197 | + |
| 198 | +def _formula_weight(metabolite: cobra.Metabolite) -> float: |
| 199 | + """Molecular weight from a metabolite formula, using the same atomic |
| 200 | + weights as ``sumBioMass.m`` (so the generic residue ``R`` weighs 0).""" |
| 201 | + formula = metabolite.formula |
| 202 | + if not formula: |
| 203 | + raise ValueError( |
| 204 | + f"Biomass metabolite {metabolite.id} has an empty formula field." |
| 205 | + ) |
| 206 | + weight = 0.0 |
| 207 | + for element, count in _FORMULA_TOKEN.findall(formula): |
| 208 | + if not element: |
| 209 | + continue |
| 210 | + try: |
| 211 | + weight += (int(count) if count else 1) * _ELEMENT_WEIGHTS[element] |
| 212 | + except KeyError as exc: |
| 213 | + raise ValueError( |
| 214 | + f"Unknown element '{element}' in formula '{formula}'." |
| 215 | + ) from exc |
| 216 | + return weight |
| 217 | + |
| 218 | + |
| 219 | +def _read_amino_acid_ratios() -> list[list[str]]: |
| 220 | + """Read ``aminoacid_Bjorkeroth2020.tsv`` (one header line, tab separated). |
| 221 | +
|
| 222 | + Returns the data rows verbatim; columns are |
| 223 | + ``[aa, substrate_met, product_met, MW, aerobic, anaerobic]``. |
| 224 | + """ |
| 225 | + rows: list[list[str]] = [] |
| 226 | + with open(_AA_TSV, newline="", encoding="utf-8") as handle: |
| 227 | + reader = csv.reader(handle, delimiter="\t") |
| 228 | + next(reader) # skip header |
| 229 | + for record in reader: |
| 230 | + if record and record[0].strip(): |
| 231 | + rows.append(record) |
| 232 | + return rows |
0 commit comments