Commit 6d870095 authored by Elad Noor's avatar Elad Noor

Merge branch 'uncertainty_representation' into 'develop'

implementing uncertainty_representation flag for standard_dg_prime_multi

See merge request !15
parents 01cfd19b 43f4865f
Pipeline #200767686 passed with stages
in 8 minutes and 46 seconds
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
import logging import logging
import warnings
from typing import Dict, Iterable, List, Optional, Tuple, Union from typing import Dict, Iterable, List, Optional, Tuple, Union
import component_contribution import component_contribution
...@@ -43,6 +44,7 @@ from equilibrator_cache import ( ...@@ -43,6 +44,7 @@ from equilibrator_cache import (
create_compound_cache_from_quilt, create_compound_cache_from_quilt,
) )
from equilibrator_cache.api import DEFAULT_QUILT_PKG, DEFAULT_QUILT_VERSION from equilibrator_cache.api import DEFAULT_QUILT_PKG, DEFAULT_QUILT_VERSION
from equilibrator_cache.exceptions import MissingDissociationConstantsException
from equilibrator_cache.reaction import ( from equilibrator_cache.reaction import (
create_stoichiometric_matrix_from_reactions, create_stoichiometric_matrix_from_reactions,
) )
...@@ -481,8 +483,8 @@ class ComponentContribution(object): ...@@ -481,8 +483,8 @@ class ComponentContribution(object):
Returns Returns
------- -------
standard_dg : Quantity standard_dg : Quantity
the CC estimation of the major microspecies' standard formation the estimated standard reaction Gibbs energies based on the the
energy major microspecies
dg_uncertainty : Quantity dg_uncertainty : Quantity
the uncertainty matrix (in either 'cov', 'sqrt' or 'fullrank' the uncertainty matrix (in either 'cov', 'sqrt' or 'fullrank'
format) format)
...@@ -553,21 +555,31 @@ class ComponentContribution(object): ...@@ -553,21 +555,31 @@ class ComponentContribution(object):
) )
def standard_dg_prime_multi( def standard_dg_prime_multi(
self, reactions: List[PhasedReaction] self,
reactions: List[PhasedReaction],
uncertainty_representation: str = "cov",
) -> Tuple[np.array, np.array]: ) -> Tuple[np.array, np.array]:
"""Calculate the transformed reaction energies of a list of reactions. """Calculate the transformed reaction energies of a list of reactions.
Parameters
----------
reactions : List[PhasedReaction]
a list of PhasedReaction objects to estimate
uncertainty_representation : str
which representation to use for the uncertainties. 'cov' would
return a full covariance matrix. 'sqrt' would return a sqaure root
of the covariance, based on the uncertainty vectors.
'fullrank' would return a full-rank square root of the covariance
which is a compressed form of the 'sqrt' result.
(Default value: 'cov')
Returns Returns
------- -------
standard_dg_primes : ndarray standard_dg : Quantity
a 1D NumPy array containing the CC estimates for the reactions' the CC estimation of the reactions' standard transformed energies
transformed dG0 dg_uncertainty : Quantity
dg_sigma : ndarray the uncertainty co-variance matrix
the second is a 2D numpy array containing the covariance matrix (in either 'cov', 'sqrt' or 'fullrank' format)
of the standard errors of the estimates. one can use the
eigenvectors of the matrix to define a confidence high-dimensional
space, or use dg_sigma as the covariance of a Gaussian used for
sampling (where 'standard_dg_primes' is the mean of that Gaussian).
""" """
rxn_dg_pairs = map( rxn_dg_pairs = map(
lambda r: r.separate_stored_dg_prime( lambda r: r.separate_stored_dg_prime(
...@@ -581,14 +593,41 @@ class ComponentContribution(object): ...@@ -581,14 +593,41 @@ class ComponentContribution(object):
residual_reactions, stored_dg_primes = zip(*rxn_dg_pairs) residual_reactions, stored_dg_primes = zip(*rxn_dg_pairs)
stored_dg_primes = np.array(stored_dg_primes) stored_dg_primes = np.array(stored_dg_primes)
(standard_dg_prime, dg_sigma) = self.predictor.standard_dg_prime_multi( mu, sigma, residual = self.predictor.get_reaction_prediction_multi(
residual_reactions, residual_reactions
p_h=self.p_h,
p_mg=self.p_mg,
ionic_strength=self.ionic_strength,
temperature=self.temperature,
) )
return standard_dg_prime + stored_dg_primes, dg_sigma standard_dg_prime = Q_(stored_dg_primes + mu, "kJ/mol")
for i, r in enumerate(residual_reactions):
try:
standard_dg_prime[i] += r.transform(
p_h=self.p_h,
p_mg=self.p_mg,
ionic_strength=self.ionic_strength,
temperature=self.temperature,
)
except MissingDissociationConstantsException as e:
warnings.warn(
f"Cannot calculate Legendre transform for reaction #{i}: "
+ str(e)
)
cov_sqrt = np.hstack([sigma, residual.T])
if uncertainty_representation == "cov":
dg_uncertainty = Q_(cov_sqrt @ cov_sqrt.T, "(kJ/mol)**2")
elif uncertainty_representation == "sqrt":
dg_uncertainty = Q_(cov_sqrt, "kJ/mol")
elif uncertainty_representation == "fullrank":
dg_uncertainty = Q_(
LINALG.qr_rank_deficient(cov_sqrt.T).T, "kJ/mol"
)
else:
return ValueError(
"uncertainty_representation must be 'cov', 'sqrt' or 'fullrank'"
)
return standard_dg_prime, dg_uncertainty
def physiological_dg_prime( def physiological_dg_prime(
self, reaction: PhasedReaction self, reaction: PhasedReaction
......
...@@ -58,10 +58,11 @@ def comp_contribution() -> ComponentContribution: ...@@ -58,10 +58,11 @@ def comp_contribution() -> ComponentContribution:
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def reaction_dict(comp_contribution) -> Dict[str, Reaction]: def reaction_dict(comp_contribution) -> Dict[str, Reaction]:
"""Create a ATP hydrolysis reaction.""" """Create a list of reaction objects for testing."""
formulas = [ formulas = [
("missing_water", "kegg:C00002 = kegg:C00008 + kegg:C00009"), ("missing_water", "kegg:C00002 = kegg:C00008 + kegg:C00009"),
("atpase", "kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"), ("atpase", "kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"),
("adenylate_kinase", "2 kegg:C00008 = kegg:C00002 + kegg:C00020"),
("fermentation_gas", "kegg:C00031 = 2 kegg:C00469 + 2 kegg:C00011"), ("fermentation_gas", "kegg:C00031 = 2 kegg:C00469 + 2 kegg:C00011"),
("pyruvate_decarboxylase", "kegg:C00022 = kegg:C00084 + kegg:C00011"), ("pyruvate_decarboxylase", "kegg:C00022 = kegg:C00084 + kegg:C00011"),
("oxaloacetate_half", "kegg:C00036 = kegg:C00149"), ("oxaloacetate_half", "kegg:C00036 = kegg:C00149"),
...@@ -70,6 +71,10 @@ def reaction_dict(comp_contribution) -> Dict[str, Reaction]: ...@@ -70,6 +71,10 @@ def reaction_dict(comp_contribution) -> Dict[str, Reaction]:
"kegg:C09844 + kegg:C00003 + kegg:C00001 => " "kegg:C09844 + kegg:C00003 + kegg:C00001 => "
"kegg:C03092 + kegg:C00004", "kegg:C03092 + kegg:C00004",
), ),
(
"nucleoside_diphosphase",
"kegg:C00454 + kegg:C00001 = kegg:C00215 + kegg:C00009",
), # this reaction includes generic compounds
] ]
return { return {
name: comp_contribution.parse_reaction_formula(formula) name: comp_contribution.parse_reaction_formula(formula)
......
...@@ -29,32 +29,50 @@ ...@@ -29,32 +29,50 @@
import pytest import pytest
def test_uncertainty_vectors(comp_contribution): def test_uncertainty_vectors(comp_contribution, reaction_dict):
"""Test how reactions are balanced by oxidation (with Mg2+). """Test the different formats of the uncertainty matrix."""
reactions = [reaction_dict["atpase"], reaction_dict["atpase"]]
Include Mg2+ effects in the training procedure. _, Ucov = comp_contribution.standard_dg_prime_multi(
""" reactions, uncertainty_representation="cov"
rxn1 = comp_contribution.parse_reaction_formula(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"
) )
rxn2 = comp_contribution.parse_reaction_formula( _, Usqrt = comp_contribution.standard_dg_prime_multi(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009" reactions, uncertainty_representation="sqrt"
) )
_, Ufullrank = comp_contribution.standard_dg_prime_multi(
reactions, uncertainty_representation="fullrank"
)
assert Ucov.shape == (2, 2)
assert Usqrt.shape == (2, 669)
assert Ufullrank.shape == (2, 1)
assert Ucov.m_as("(kJ/mol)**2") == pytest.approx(0.0926, rel=1e-2)
assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3)
assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3)
def test_uncertainty_residuals(comp_contribution, reaction_dict):
"""Test that uncertainty due to generic compounds is handeled correctly."""
reactions = [
reaction_dict["nucleoside_diphosphase"],
reaction_dict["adenylate_kinase"],
reaction_dict["atpase"],
]
_, Ucov = comp_contribution.standard_dg_multi( _, Ucov = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="cov" reactions, uncertainty_representation="cov"
) )
_, Usqrt = comp_contribution.standard_dg_multi( _, Usqrt = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="sqrt" reactions, uncertainty_representation="sqrt"
) )
_, Ufullrank = comp_contribution.standard_dg_multi( _, Ufullrank = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="fullrank" reactions, uncertainty_representation="fullrank"
) )
assert Ucov.shape == (2, 2) assert Ucov.shape == (3, 3)
assert Usqrt.shape == (2, 669) assert Usqrt.shape == (3, 671)
assert Ufullrank.shape == (2, 1) assert Ufullrank.shape == (3, 3)
assert Ucov.m_as("(kJ/mol)**2") == pytest.approx(0.0926, rel=1e-2)
assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3) assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3)
assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3) assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment