Commit 6b1892ec authored by eladnoor's avatar eladnoor

supporting units and alternative DBs in pathway SBtab

parent 703c961b
Pipeline #46898819 (#117) passed with stage
in 19 minutes and 9 seconds
......@@ -34,7 +34,7 @@ from numpy import log
from equilibrator_api.settings import (
default_conc_lb, default_conc_ub, standard_concentration)
from . import Compound, ccache, ureg
from . import Q_, Compound, ccache, ureg
class InvalidBounds(Exception):
......@@ -242,41 +242,68 @@ class Bounds(BaseBounds):
bounds._check_bounds()
return bounds
def to_dataframe(self) -> pd.DataFrame:
data = []
for c in self.lower_bounds.keys():
data.append((str(c), self.GetLowerBound(c), self.GetUpperBound(c)))
compound_df = pd.DataFrame(data=data, columns=['compound', 'lb', 'ub'])
return compound_df
@classmethod
@ureg.check(None, None, '[concentration]', '[concentration]',
@ureg.check(None, None, None, '[concentration]',
'[concentration]')
def from_dataframe(cls,
df: pd.DataFrame,
concentration_unit: ureg.Unit,
default_lb: float = default_conc_lb,
default_ub: float = default_conc_ub) -> object:
def from_dataframe(
cls,
df: pd.DataFrame,
bounds_unit: str = None,
default_lb: float = default_conc_lb,
default_ub: float = default_conc_ub
) -> Tuple[object, Dict[Compound, str]]:
"""Read bounds from a pandas.DataFrame
Args:
df: a pandas.DataFrame with the bounds
default_lb: default lower bound to if no specific one is provided
default_lb: default upper bound to if no specific one is provided
"""
lbs: Dict[Compound, float] = dict()
ubs: Dict[Compound, float] = dict()
for idx in df.index:
row = df.loc[idx]
cid = row['Compound:Identifiers:kegg.compound']
compound = ccache.get_compound_from_registry('kegg', cid)
lbs[compound] = ureg(row['Concentration:Min']) * concentration_unit
ubs[compound] = ureg(row['Concentration:Max']) * concentration_unit
bounds = Bounds(lbs, ubs, default_lb, default_ub)
# The column "Compound" contains the keynames of the metabolites used in
# the SBtab (e.g. in the reaction definitions). We generate a
# dictionary from these keynames to Compound objects
df.set_index("Compound", inplace=True)
if "Compound:Identifiers:kegg.compound" in df.columns:
df["Compound:Identifiers"] = "KEGG:" + df[
"Compound:Identifiers:kegg.compound"]
if "Compound:Identifiers" not in df.columns:
raise KeyError("Could not find a column of Compound:Identifiers "
"in the ConcentrationConstraints table")
df["Compound"] = df["Compound:Identifiers"].apply(ccache.get_compound)
if pd.isnull(df.Compound).any():
accessions_not_found = df.loc[pd.isnull(df.Compound),
"Compound:Identifiers"]
error_msg = str(accessions_not_found.to_dict())
raise KeyError(f"Some compounds not found in equilibrator-cache: "
f"{error_msg}")
name_to_compound = df.Compound.to_dict()
df.set_index("Compound", inplace=True)
if bounds_unit is not None:
lbs = df['Concentration:Min'].apply(lambda x: Q_(float(x),
bounds_unit))
ubs = df['Concentration:Max'].apply(lambda x: Q_(float(x),
bounds_unit))
else:
lbs = df['Concentration:Min'].apply(Q_)
ubs = df['Concentration:Max'].apply(Q_)
bounds = Bounds(lbs.to_dict(), ubs.to_dict(), default_lb, default_ub)
bounds._check_bounds()
return bounds
def to_dataframe(self) -> pd.DataFrame:
data = []
for c in self.lower_bounds.keys():
data.append((str(c), self.GetLowerBound(c), self.GetUpperBound(c)))
compound_df = pd.DataFrame(data=data, columns=['compound', 'lb', 'ub'])
return compound_df
return bounds, name_to_compound
def _check_bounds(self) -> None:
if self.default_lb > self.default_ub:
......
......@@ -65,7 +65,8 @@ class Pathway(object):
Designed for checking input prior to converting to a stoichiometric model.
"""
@ureg.check(None, None, None, "[energy]/[substance]", "[energy]/[substance]", None)
@ureg.check(None, None, None, "[energy]/[substance]",
"[energy]/[substance]", None)
def __init__(self,
reactions: List[PhasedReaction],
fluxes: np.array,
......@@ -244,61 +245,60 @@ class Pathway(object):
sbtab = sbtabdoc.get_sbtab_by_id(table_id)
if sbtab is None:
tables = ', '.join(map(lambda s: s.table_id, sbtabdoc.sbtabs))
logging.error('%s contains the following TableIDs: %s' %
(file_name, tables))
raise PathwayParseError('The SBtabDocument must have a table '
'with the following TableID: %s'
% table_id)
raise PathwayParseError(f"The SBtabDocument must have a table "
f"with the following ID: {table_id}, "
f"however, only these tables were "
f"found: {tables}")
dfs.append(sbtab.to_data_frame())
bounds_df, reaction_df, flux_df, param_df = dfs
# TODO support also other identifiers (i.e. parse the column name and
# use the last string as the registry name)
bounds_df.index = bounds_df['Compound:Identifiers:kegg.compound'].apply(
lambda kegg_id: ccache.get_compound_from_registry('kegg', kegg_id))
compound_to_name = bounds_df.Compound.to_dict()
name_to_compound = dict(map(reversed, compound_to_name.items()))
bounds_unit_str = sbtabdoc.get_sbtab_by_id(
'ConcentrationConstraint').get_attribute('Unit')
bounds_unit = ureg(bounds_unit_str)
assert bounds_unit.check('[concentration]'), "Bounds must be in " \
"concentration units, " \
f"not '{bounds_unit_str}'"
bounds = Bounds.from_dataframe(bounds_df, bounds_unit)
try:
bounds_sbtab = sbtabdoc.get_sbtab_by_id('ConcentrationConstraint')
bounds_unit_str = bounds_sbtab.get_attribute('Unit')
bounds, name_to_compound = Bounds.from_dataframe(bounds_df,
bounds_unit_str)
except SBtab.SBtabError:
# if the unit is not defined in the header, we assume it is given
# next to each bound individually
bounds, name_to_compound = Bounds.from_dataframe(bounds_df)
reactions = []
for _, row in reaction_df.iterrows():
rxn = PhasedReaction.parse_formula(row['ReactionFormula'],
row['ID'],
reaction_ids = []
for row in reaction_df.itertuples():
rxn = PhasedReaction.parse_formula(row.ReactionFormula,
row.ID,
name_to_compound.get)
if not rxn.is_balanced(fix_protons=True,
fix_water=True,
raise_exception=False):
logging.warning(f"Reaction {rxn.rid} is not balanced")
reactions.append(rxn)
if row.ID in reaction_ids:
raise KeyError(f"Reaction IDs must be unique, but you have "
f"{row.ID} twice")
reaction_ids.append(row.ID)
reaction_ids = reaction_df['ID']
flux_df = flux_df[flux_df['QuantityType'] == 'flux'
].set_index('Reaction')['Value'].apply(float)
v = np.array(flux_df.loc[reaction_ids].tolist()) * Q_("dimensionless")
flux_df = flux_df[flux_df['QuantityType'] == 'flux']
flux_df.set_index('Reaction', inplace=True)
flux_df.Value = flux_df.Value.apply(float)
v = flux_df.loc[reaction_ids, 'Value'].values * Q_("dimensionless")
# grab rows containing keqs and calculate the standard Gibbs
# reaction energies from the equilibrium constants, in units of RT
keq_df = param_df[param_df['QuantityType'] == 'equilibrium constant'
].set_index('Reaction')['Value'].apply(float)
keq = np.array(keq_df.loc[reaction_ids].tolist()) * Q_("dimensionless")
# reaction energies from the equilibrium constants
keq_df = param_df[param_df['QuantityType'] == 'equilibrium constant']
keq_df.set_index('Reaction', inplace=True)
keq_df.Value = keq_df.Value.apply(float)
keq = keq_df.loc[reaction_ids, 'Value'].values
standard_dg_primes = -np.log(keq) * R * default_T
dg_uncertainties = np.zeros((len(reactions), len(reactions))) * Q_("kJ/mol")
dg_uncertainties = np.zeros((len(reactions), len(reactions))) * Q_(
"kJ/mol")
pp = Pathway(reactions, v, standard_dg_primes,
dg_uncertainties, bounds=bounds)
pp = Pathway(reactions, v, standard_dg_primes, dg_uncertainties,
bounds=bounds)
# override the compound names with the ones in the SBtab
compound_to_name = dict(map(reversed, name_to_compound.items()))
pp.set_compound_names(compound_to_name.get)
return pp
!!SBtab TableID='Reaction' TableType='Reaction' Document='Pathway Model' SBtabVersion='1.0'
!ID !ReactionFormula !Identifiers:kegg.reaction
glucokinas_R00299 d_glucose + atp <=> glucose_6_phosphate + adp R00299
phosphohex_R00771 glucose_6_phosphate <=> d_fructose_6_phosphoric_acid R00771
6_phosphof_R04779 atp + d_fructose_6_phosphoric_acid <=> adp + fructose_16_bisphosphate R04779
fructose_b_R01070 fructose_16_bisphosphate <=> glycerone_phosphate + d_glyceraldehyde_3_phosphate R01070
triose_pho_R01015 glycerone_phosphate <=> d_glyceraldehyde_3_phosphate R01015
glyceralde_R01061 orthophosphate + nad + d_glyceraldehyde_3_phosphate <=> nadh + 3_phospho_d_glyceroyl_phosphate R01061
phosphogly_R01512 adp + 3_phospho_d_glyceroyl_phosphate <=> atp + 3_phospho_d_glycerate R01512
phosphogly_R01518 3_phospho_d_glycerate <=> 2_phospho_d_glycerate R01518
phosphopyr_R00658 2_phospho_d_glycerate <=> phosphoenolpyruvate + h2o R00658
pyruvate_k_R00200 adp + phosphoenolpyruvate <=> pyruvate + atp R00200
pyruvate_d_R00224 pyruvate <=> acetaldehyde + co2 R00224
retinal_re_R00754 acetaldehyde + nadh <=> ethanol + nad R00754
!!SBtab TableID='RelativeFlux' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0'
!QuantityType !Reaction !Reaction:Identifiers:kegg.reaction !Value
flux glucokinas_R00299 R00299 1
flux phosphohex_R00771 R00771 1
flux 6_phosphof_R04779 R04779 1
flux fructose_b_R01070 R01070 1
flux triose_pho_R01015 R01015 1
flux glyceralde_R01061 R01061 2
flux phosphogly_R01512 R01512 2
flux phosphogly_R01518 R01518 2
flux phosphopyr_R00658 R00658 2
flux pyruvate_k_R00200 R00200 2
flux pyruvate_d_R00224 R00224 2
flux retinal_re_R00754 R00754 2
!!SBtab TableID='Parameter' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0' pH='7.00' 'IonicStrength='0.10' IonicStrengthUnit='M'
!QuantityType !Reaction !Value !Unit !Reaction:Identifiers:kegg.reaction !ID
equilibrium constant glucokinas_R00299 1061.24772470112 dimensionless R00299 kEQ_R0
equilibrium constant phosphohex_R00771 0.360889245415876 dimensionless R00771 kEQ_R1
equilibrium constant 6_phosphof_R04779 190.14472323307 dimensionless R04779 kEQ_R2
equilibrium constant fructose_b_R01070 0.000745159162699 dimensionless R01070 kEQ_R3
equilibrium constant triose_pho_R01015 0.109662570962434 dimensionless R01015 kEQ_R4
equilibrium constant glyceralde_R01061 0.043290979452048 dimensionless R01061 kEQ_R5
equilibrium constant phosphogly_R01512 1716.16163695823 dimensionless R01512 kEQ_R6
equilibrium constant phosphogly_R01518 0.182159355264605 dimensionless R01518 kEQ_R7
equilibrium constant phosphopyr_R00658 5.19572002201854 dimensionless R00658 kEQ_R8
equilibrium constant pyruvate_k_R00200 71137.9254008532 dimensionless R00200 kEQ_R9
equilibrium constant pyruvate_d_R00224 1620.6623703988 dimensionless R00224 kEQ_R10
equilibrium constant retinal_re_R00754 6320.79405106526 dimensionless R00754 kEQ_R11
!!SBtab TableID='ConcentrationConstraint' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0' Unit='M'
!QuantityType !Compound !Compound:Identifiers:kegg.compound !Concentration:Min !Concentration:Max
concentration co2 C00011 1E-05 1E-05
concentration d_fructose_6_phosphoric_acid C00085 1E-06 0.01
concentration nadh C00004 0.0001 0.0001
concentration glycerone_phosphate C00111 1E-06 0.01
concentration 2_phospho_d_glycerate C00631 1E-06 0.01
concentration orthophosphate C00009 0.01 0.01
concentration adp C00008 0.0005 0.0005
concentration 3_phospho_d_glycerate C00197 1E-06 0.01
concentration d_glucose C00031 1E-06 0.01
concentration nad C00003 0.001 0.001
concentration atp C00002 0.005 0.005
concentration phosphoenolpyruvate C00074 1E-06 0.01
concentration d_glyceraldehyde_3_phosphate C00118 1E-06 0.01
concentration acetaldehyde C00084 1E-06 0.01
concentration ethanol C00469 1E-06 0.01
concentration h2o C00001 1 1
concentration pyruvate C00022 1E-06 0.01
concentration glucose_6_phosphate C00092 1E-06 0.01
concentration fructose_16_bisphosphate C05378 1E-06 0.01
concentration 3_phospho_d_glyceroyl_phosphate C00236 1E-06 0.01
!!SBtab TableID='Reaction' TableType='Reaction' Document='Pathway Model' SBtabVersion='1.0'
!ID !ReactionFormula
glucokinase glucose + atp <=> g6p + adp
pgi g6p <=> f6p
pfk atp + f6p <=> adp + fbp
ald fbp <=> dhap + gap
tim dhap <=> gap
gapdh pi + nad + gap <=> nadh + bpg
pgk adp + bpg <=> atp + 3pg
pgm 3pg <=> 2pg
eno 2pg <=> pep + h2o
pdh adp + pep <=> pyr + atp
pyruvate_decarboxylase pyr <=> acetaldehyde + co2
adh acetaldehyde + nadh <=> ethanol + nad
!!SBtab TableID='RelativeFlux' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0'
!QuantityType !Reaction !Value
flux glucokinase 1
flux pgi 1
flux pfk 1
flux ald 1
flux tim 1
flux gapdh 2
flux pgk 2
flux pgm 2
flux eno 2
flux pdh 2
flux pyruvate_decarboxylase 2
flux adh 2
!!SBtab TableID='Parameter' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0' pH='7.00' 'IonicStrength='100 mM'
!QuantityType !Reaction !Value !ID
equilibrium constant glucokinase 1061.24772470112 kEQ_R0
equilibrium constant pgi 0.360889245415876 kEQ_R1
equilibrium constant pfk 190.14472323307 kEQ_R2
equilibrium constant ald 0.000745159162699 kEQ_R3
equilibrium constant tim 0.109662570962434 kEQ_R4
equilibrium constant gapdh 0.043290979452048 kEQ_R5
equilibrium constant pgk 1716.16163695823 kEQ_R6
equilibrium constant pgm 0.182159355264605 kEQ_R7
equilibrium constant eno 5.19572002201854 kEQ_R8
equilibrium constant pdh 71137.9254008532 kEQ_R9
equilibrium constant pyruvate_decarboxylase 1620.6623703988 kEQ_R10
equilibrium constant adh 6320.79405106526 kEQ_R11
!!SBtab TableID='ConcentrationConstraint' TableType='Quantity' Document='Pathway Model' SBtabVersion='1.0'
!QuantityType !Compound !Compound:Identifiers !Concentration:Min !Concentration:Max
concentration co2 bigg.metabolite:co2 10 uM 10 uM
concentration f6p bigg.metabolite:f6p 1 uM 10 mM
concentration nadh bigg.metabolite:nadh 0.1 mM 0.1 mM
concentration dhap bigg.metabolite:dhap 1 uM 10 mM
concentration 2pg bigg.metabolite:2pg 1 uM 10 mM
concentration pi bigg.metabolite:pi 10 mM 10 mM
concentration adp bigg.metabolite:adp 0.5 mM 0.5 mM
concentration 3pg bigg.metabolite:3pg 1 uM 10 mM
concentration glucose bigg.metabolite:glc__D 1 uM 10 mM
concentration nad bigg.metabolite:nad 1 mM 1 mM
concentration atp bigg.metabolite:atp 5 mM 5 mM
concentration pep bigg.metabolite:pep 1 uM 10 mM
concentration gap bigg.metabolite:g3p 1 uM 10 mM
concentration acetaldehyde bigg.metabolite:acald 1 uM 10 mM
concentration ethanol bigg.metabolite:etoh 1 uM 10 mM
concentration h2o bigg.metabolite:h2o 1 M 1 M
concentration pyr bigg.metabolite:pyr 1 uM 10 mM
concentration g6p bigg.metabolite:g6p 1 uM 10 mM
concentration fbp bigg.metabolite:fdp 1 uM 10 mM
concentration bpg bigg.metabolite:13dpg 1 uM 10 mM
......@@ -58,10 +58,10 @@ def test_mdf():
shadow_prices = pp_mdf_data.reaction_df.set_index(
'reaction_id').shadow_price
assert shadow_prices['glucokinas_R00299'] == pytest.approx(0.0, abs=1e-2)
assert shadow_prices['fructose_b_R01070'] == pytest.approx(0.25, abs=1e-2)
assert shadow_prices['triose_pho_R01015'] == pytest.approx(0.25, abs=1e-2)
assert shadow_prices['glyceralde_R01061'] == pytest.approx(0.50, abs=1e-2)
assert shadow_prices['glucokinase'] == pytest.approx(0.0, abs=1e-2)
assert shadow_prices['ald'] == pytest.approx(0.25, abs=1e-2)
assert shadow_prices['tim'] == pytest.approx(0.25, abs=1e-2)
assert shadow_prices['gapdh'] == pytest.approx(0.50, abs=1e-2)
compound_prices = pp_mdf_data.compound_df.set_index('compound').shadow_price
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment