Skip to content

Commit 628105f

Browse files
committed
feat: python anaerobic_model script
1 parent cf4285a commit 628105f

10 files changed

Lines changed: 565 additions & 762 deletions

code/anaerobic_model.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
"""Constrain yeast-GEM to anaerobic conditions (pure cobrapy).
2+
3+
This is a self-contained port of ``code/otherChanges/anaerobicModel.m`` (and
4+
the ``changeAminoAcidRatio`` / ``sumBioMass`` / ``rescalePseudoReaction``
5+
helpers it relies on) to cobrapy. It has no dependency on the RAVEN toolbox
6+
or raven-python; the only third-party requirement is ``cobra``.
7+
8+
By default yeast-GEM represents aerobic metabolism. :func:`anaerobic_model`
9+
edits exchange reactions and a few intracellular reactions in place so the
10+
model reaches the exchange rates measured under anaerobic batch growth on
11+
minimal glucose media.
12+
13+
Usage::
14+
15+
import cobra
16+
from anaerobic_model import anaerobic_model
17+
18+
model = cobra.io.read_sbml_model("model/yeast-GEM.xml")
19+
anaerobic_model(model)
20+
"""
21+
from __future__ import annotations
22+
23+
import csv
24+
import re
25+
from pathlib import Path
26+
27+
import cobra
28+
29+
# Repo paths: this file lives in ``code/``; the repo root is its parent.
30+
_REPO_ROOT = Path(__file__).resolve().parent.parent
31+
_AA_TSV = _REPO_ROOT / "data" / "physiology" / "aminoacid_Bjorkeroth2020.tsv"
32+
33+
# Metabolite/reaction IDs reused across helpers.
34+
_PROTON_ID = "s_0794" # H+ [cytoplasm]
35+
_PROTEIN_RXN = "r_4047" # protein pseudoreaction
36+
_PROTEIN_MET_NAME = "protein" # the protein pseudoreaction product (s_3717)
37+
38+
# Atomic weights, identical to the table in ``sumBioMass.m``
39+
# (``parseChemicalFormula``). ``R`` is the generic residue placeholder and
40+
# carries no mass, matching the MATLAB convention.
41+
_ELEMENT_WEIGHTS = {
42+
"C": 12.01, "H": 1.008, "N": 14.007, "O": 15.999, "P": 30.974,
43+
"S": 32.06, "R": 0.0, "Fe": 55.845, "K": 39.098, "Na": 22.99,
44+
"Cl": 35.45, "Mn": 54.938, "Zn": 65.38, "Ca": 40.078, "Mg": 24.305,
45+
"Cu": 63.546,
46+
}
47+
48+
_FORMULA_TOKEN = re.compile(r"([A-Z][a-z]*)(\d*)")
49+
50+
51+
def anaerobic_model(model: cobra.Model) -> cobra.Model:
52+
"""Constrain ``model`` to anaerobic conditions in place.
53+
54+
Parameters
55+
----------
56+
model : cobra.Model
57+
yeast-GEM model, aerobic by default.
58+
59+
Returns
60+
-------
61+
cobra.Model
62+
The same model object, modified to match anaerobic conditions.
63+
"""
64+
# 1. Cofactor pseudoreaction (r_4598): heme a synthesis requires O2, so
65+
# remove heme a from the biomass cofactor pool and rebalance protons.
66+
cofactor_rxn = model.reactions.get_by_id("r_4598")
67+
_set_coefficient(cofactor_rxn, model.metabolites.get_by_id("s_3714"), 0.0)
68+
_rebalance_proton(model, cofactor_rxn)
69+
70+
# 2. Switch the protein pseudoreaction to the anaerobic amino-acid ratio.
71+
_change_amino_acid_ratio(model, aerobic=False)
72+
73+
# 3. Exchange reactions: block O2 uptake, and open the sterol / fatty-acid
74+
# / vitamin uptakes that are essential supplements for anaerobic growth.
75+
lower_bounds = {
76+
"r_1992": 0.0, # oxygen (block uptake)
77+
"r_1757": -1000.0, # ergosterol
78+
"r_1915": -1000.0, # lanosterol
79+
"r_2106": -1000.0, # zymosterol
80+
"r_2134": -1000.0, # 14-demethyllanosterol
81+
"r_1994": -1000.0, # palmitoleate
82+
"r_2189": -1000.0, # oleate
83+
"r_2137": 0.0, # ergosta-5,7,22,24(28)-tetraen-3beta-ol (block)
84+
"r_1967": -1000.0, # nicotinate
85+
"r_1548": -1000.0, # (R)-pantothenate
86+
}
87+
for rxn_id, lower_bound in lower_bounds.items():
88+
model.reactions.get_by_id(rxn_id).lower_bound = lower_bound
89+
90+
# 4. Block MDH2 (r_0714) and IDP2 (r_0659): repressed/undetected under
91+
# anaerobic glucose growth.
92+
for rxn_id in ("r_0714", "r_0659"):
93+
model.reactions.get_by_id(rxn_id).bounds = (0.0, 0.0)
94+
95+
# 5. Fumarate reductase recycles FADH2 from Ero1-driven disulphide-bond
96+
# formation; add the net FADH2/FAD/H+ turnover to the biomass reaction.
97+
biomass_rxn = model.reactions.get_by_id("r_4041")
98+
fadh2_prod = 0.08
99+
biomass_rxn.add_metabolites(
100+
{
101+
model.metabolites.get_by_id("s_0689"): fadh2_prod, # FADH2
102+
model.metabolites.get_by_id("s_0687"): -fadh2_prod, # FAD
103+
model.metabolites.get_by_id("s_0794"): -2.0 * fadh2_prod, # H+
104+
},
105+
combine=True,
106+
)
107+
return model
108+
109+
110+
# --- helpers ----------------------------------------------------------------
111+
112+
def _set_coefficient(
113+
reaction: cobra.Reaction, metabolite: cobra.Metabolite, value: float
114+
) -> None:
115+
"""Set an *absolute* stoichiometric coefficient (the ``S(i,j) = x``
116+
semantics of the MATLAB code). Setting ``value`` to 0 removes the
117+
metabolite from the reaction.
118+
"""
119+
current = reaction.metabolites.get(metabolite, 0.0)
120+
delta = float(value) - current
121+
if delta != 0.0:
122+
reaction.add_metabolites({metabolite: delta}, combine=True)
123+
124+
125+
def _rebalance_proton(model: cobra.Model, reaction: cobra.Reaction) -> None:
126+
"""Set the H+ coefficient so ``reaction`` is charge balanced.
127+
128+
Mirrors the H+ correction repeated in ``anaerobicModel.m`` and
129+
``rescalePseudoReaction.m``: zero the proton, then set it to the negative
130+
of the remaining charge imbalance. Missing charges (cobra ``None``) are
131+
treated as zero, matching MATLAB's ``'omitnan'``.
132+
"""
133+
proton = model.metabolites.get_by_id(_PROTON_ID)
134+
_set_coefficient(reaction, proton, 0.0)
135+
imbalance = sum(
136+
(met.charge or 0.0) * coeff for met, coeff in reaction.metabolites.items()
137+
)
138+
_set_coefficient(reaction, proton, -imbalance)
139+
140+
141+
def _change_amino_acid_ratio(model: cobra.Model, *, aerobic: bool = True) -> None:
142+
"""Rewrite the protein pseudoreaction's amino-acid ratios.
143+
144+
Port of ``changeAminoAcidRatio.m``. The protein mass is snapshotted, the
145+
tRNA stoichiometries in ``r_4047`` are replaced with the aerobic or
146+
anaerobic ratios from ``aminoacid_Bjorkeroth2020.tsv``, and the protein
147+
pseudoreaction is rescaled so the protein fraction returns to its
148+
pre-switch value.
149+
"""
150+
target_protein = _protein_mass(model)
151+
152+
protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN)
153+
# TSV columns (0-indexed): 0 aa, 1 substrate met, 2 product met, 3 MW,
154+
# 4 aerobic ratio, 5 anaerobic ratio.
155+
ratio_column = 4 if aerobic else 5
156+
for row in _read_amino_acid_ratios():
157+
ratio = float(row[ratio_column])
158+
substrate = model.metabolites.get_by_id(row[1])
159+
product = model.metabolites.get_by_id(row[2])
160+
_set_coefficient(protein_rxn, substrate, -ratio)
161+
_set_coefficient(protein_rxn, product, ratio)
162+
163+
factor = target_protein / _protein_mass(model)
164+
_rescale_protein(model, factor)
165+
166+
167+
def _protein_mass(model: cobra.Model) -> float:
168+
"""Protein fraction [g/gDW], port of ``sumBioMass.m`` ``getFraction('P')``.
169+
170+
Sums the molecular weight of the protein pseudoreaction substrates, with
171+
two protons (2.016) removed from each charged-tRNA formula.
172+
"""
173+
protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN)
174+
total = 0.0
175+
for met, coeff in protein_rxn.metabolites.items():
176+
if coeff < 0: # substrate
177+
total += -coeff * (_formula_weight(met) - 2.016)
178+
return total / 1000.0
179+
180+
181+
def _rescale_protein(model: cobra.Model, factor: float) -> None:
182+
"""Multiply every coefficient in the protein pseudoreaction by ``factor``
183+
except the ``protein`` product, then rebalance H+.
184+
185+
Port of ``rescalePseudoReaction.m`` for the protein component.
186+
"""
187+
protein_rxn = model.reactions.get_by_id(_PROTEIN_RXN)
188+
deltas = {
189+
met: (factor - 1.0) * coeff
190+
for met, coeff in protein_rxn.metabolites.items()
191+
if met.name != _PROTEIN_MET_NAME
192+
}
193+
if deltas:
194+
protein_rxn.add_metabolites(deltas, combine=True)
195+
_rebalance_proton(model, protein_rxn)
196+
197+
198+
def _formula_weight(metabolite: cobra.Metabolite) -> float:
199+
"""Molecular weight from a metabolite formula, using the same atomic
200+
weights as ``sumBioMass.m`` (so the generic residue ``R`` weighs 0)."""
201+
formula = metabolite.formula
202+
if not formula:
203+
raise ValueError(
204+
f"Biomass metabolite {metabolite.id} has an empty formula field."
205+
)
206+
weight = 0.0
207+
for element, count in _FORMULA_TOKEN.findall(formula):
208+
if not element:
209+
continue
210+
try:
211+
weight += (int(count) if count else 1) * _ELEMENT_WEIGHTS[element]
212+
except KeyError as exc:
213+
raise ValueError(
214+
f"Unknown element '{element}' in formula '{formula}'."
215+
) from exc
216+
return weight
217+
218+
219+
def _read_amino_acid_ratios() -> list[list[str]]:
220+
"""Read ``aminoacid_Bjorkeroth2020.tsv`` (one header line, tab separated).
221+
222+
Returns the data rows verbatim; columns are
223+
``[aa, substrate_met, product_met, MW, aerobic, anaerobic]``.
224+
"""
225+
rows: list[list[str]] = []
226+
with open(_AA_TSV, newline="", encoding="utf-8") as handle:
227+
reader = csv.reader(handle, delimiter="\t")
228+
next(reader) # skip header
229+
for record in reader:
230+
if record and record[0].strip():
231+
rows.append(record)
232+
return rows

code/modelTests/anaerobic_flux_predictions.m

Lines changed: 0 additions & 64 deletions
This file was deleted.

0 commit comments

Comments
 (0)