Commit 6d870095 authored by Elad Noor's avatar Elad Noor

Merge branch 'uncertainty_representation' into 'develop'

implementing uncertainty_representation flag for standard_dg_prime_multi

See merge request !15
parents 01cfd19b 43f4865f
Pipeline #200767686 passed with stages
in 8 minutes and 46 seconds
......@@ -25,6 +25,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import logging
import warnings
from typing import Dict, Iterable, List, Optional, Tuple, Union
import component_contribution
......@@ -43,6 +44,7 @@ from equilibrator_cache import (
create_compound_cache_from_quilt,
)
from equilibrator_cache.api import DEFAULT_QUILT_PKG, DEFAULT_QUILT_VERSION
from equilibrator_cache.exceptions import MissingDissociationConstantsException
from equilibrator_cache.reaction import (
create_stoichiometric_matrix_from_reactions,
)
......@@ -481,8 +483,8 @@ class ComponentContribution(object):
Returns
-------
standard_dg : Quantity
the CC estimation of the major microspecies' standard formation
energy
the estimated standard reaction Gibbs energies based on the the
major microspecies
dg_uncertainty : Quantity
the uncertainty matrix (in either 'cov', 'sqrt' or 'fullrank'
format)
......@@ -553,21 +555,31 @@ class ComponentContribution(object):
)
def standard_dg_prime_multi(
self, reactions: List[PhasedReaction]
self,
reactions: List[PhasedReaction],
uncertainty_representation: str = "cov",
) -> Tuple[np.array, np.array]:
"""Calculate the transformed reaction energies of a list of reactions.
Parameters
----------
reactions : List[PhasedReaction]
a list of PhasedReaction objects to estimate
uncertainty_representation : str
which representation to use for the uncertainties. 'cov' would
return a full covariance matrix. 'sqrt' would return a sqaure root
of the covariance, based on the uncertainty vectors.
'fullrank' would return a full-rank square root of the covariance
which is a compressed form of the 'sqrt' result.
(Default value: 'cov')
Returns
-------
standard_dg_primes : ndarray
a 1D NumPy array containing the CC estimates for the reactions'
transformed dG0
dg_sigma : ndarray
the second is a 2D numpy array containing the covariance matrix
of the standard errors of the estimates. one can use the
eigenvectors of the matrix to define a confidence high-dimensional
space, or use dg_sigma as the covariance of a Gaussian used for
sampling (where 'standard_dg_primes' is the mean of that Gaussian).
standard_dg : Quantity
the CC estimation of the reactions' standard transformed energies
dg_uncertainty : Quantity
the uncertainty co-variance matrix
(in either 'cov', 'sqrt' or 'fullrank' format)
"""
rxn_dg_pairs = map(
lambda r: r.separate_stored_dg_prime(
......@@ -581,14 +593,41 @@ class ComponentContribution(object):
residual_reactions, stored_dg_primes = zip(*rxn_dg_pairs)
stored_dg_primes = np.array(stored_dg_primes)
(standard_dg_prime, dg_sigma) = self.predictor.standard_dg_prime_multi(
residual_reactions,
p_h=self.p_h,
p_mg=self.p_mg,
ionic_strength=self.ionic_strength,
temperature=self.temperature,
mu, sigma, residual = self.predictor.get_reaction_prediction_multi(
residual_reactions
)
return standard_dg_prime + stored_dg_primes, dg_sigma
standard_dg_prime = Q_(stored_dg_primes + mu, "kJ/mol")
for i, r in enumerate(residual_reactions):
try:
standard_dg_prime[i] += r.transform(
p_h=self.p_h,
p_mg=self.p_mg,
ionic_strength=self.ionic_strength,
temperature=self.temperature,
)
except MissingDissociationConstantsException as e:
warnings.warn(
f"Cannot calculate Legendre transform for reaction #{i}: "
+ str(e)
)
cov_sqrt = np.hstack([sigma, residual.T])
if uncertainty_representation == "cov":
dg_uncertainty = Q_(cov_sqrt @ cov_sqrt.T, "(kJ/mol)**2")
elif uncertainty_representation == "sqrt":
dg_uncertainty = Q_(cov_sqrt, "kJ/mol")
elif uncertainty_representation == "fullrank":
dg_uncertainty = Q_(
LINALG.qr_rank_deficient(cov_sqrt.T).T, "kJ/mol"
)
else:
return ValueError(
"uncertainty_representation must be 'cov', 'sqrt' or 'fullrank'"
)
return standard_dg_prime, dg_uncertainty
def physiological_dg_prime(
self, reaction: PhasedReaction
......
......@@ -58,10 +58,11 @@ def comp_contribution() -> ComponentContribution:
@pytest.fixture(scope="module")
def reaction_dict(comp_contribution) -> Dict[str, Reaction]:
"""Create a ATP hydrolysis reaction."""
"""Create a list of reaction objects for testing."""
formulas = [
("missing_water", "kegg:C00002 = kegg:C00008 + kegg:C00009"),
("atpase", "kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"),
("adenylate_kinase", "2 kegg:C00008 = kegg:C00002 + kegg:C00020"),
("fermentation_gas", "kegg:C00031 = 2 kegg:C00469 + 2 kegg:C00011"),
("pyruvate_decarboxylase", "kegg:C00022 = kegg:C00084 + kegg:C00011"),
("oxaloacetate_half", "kegg:C00036 = kegg:C00149"),
......@@ -70,6 +71,10 @@ def reaction_dict(comp_contribution) -> Dict[str, Reaction]:
"kegg:C09844 + kegg:C00003 + kegg:C00001 => "
"kegg:C03092 + kegg:C00004",
),
(
"nucleoside_diphosphase",
"kegg:C00454 + kegg:C00001 = kegg:C00215 + kegg:C00009",
), # this reaction includes generic compounds
]
return {
name: comp_contribution.parse_reaction_formula(formula)
......
......@@ -29,32 +29,50 @@
import pytest
def test_uncertainty_vectors(comp_contribution):
"""Test how reactions are balanced by oxidation (with Mg2+).
def test_uncertainty_vectors(comp_contribution, reaction_dict):
"""Test the different formats of the uncertainty matrix."""
reactions = [reaction_dict["atpase"], reaction_dict["atpase"]]
Include Mg2+ effects in the training procedure.
"""
rxn1 = comp_contribution.parse_reaction_formula(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"
_, Ucov = comp_contribution.standard_dg_prime_multi(
reactions, uncertainty_representation="cov"
)
rxn2 = comp_contribution.parse_reaction_formula(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"
_, Usqrt = comp_contribution.standard_dg_prime_multi(
reactions, uncertainty_representation="sqrt"
)
_, Ufullrank = comp_contribution.standard_dg_prime_multi(
reactions, uncertainty_representation="fullrank"
)
assert Ucov.shape == (2, 2)
assert Usqrt.shape == (2, 669)
assert Ufullrank.shape == (2, 1)
assert Ucov.m_as("(kJ/mol)**2") == pytest.approx(0.0926, rel=1e-2)
assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3)
assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3)
def test_uncertainty_residuals(comp_contribution, reaction_dict):
"""Test that uncertainty due to generic compounds is handeled correctly."""
reactions = [
reaction_dict["nucleoside_diphosphase"],
reaction_dict["adenylate_kinase"],
reaction_dict["atpase"],
]
_, Ucov = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="cov"
reactions, uncertainty_representation="cov"
)
_, Usqrt = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="sqrt"
reactions, uncertainty_representation="sqrt"
)
_, Ufullrank = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="fullrank"
reactions, uncertainty_representation="fullrank"
)
assert Ucov.shape == (2, 2)
assert Usqrt.shape == (2, 669)
assert Ufullrank.shape == (2, 1)
assert Ucov.shape == (3, 3)
assert Usqrt.shape == (3, 671)
assert Ufullrank.shape == (3, 3)
assert Ucov.m_as("(kJ/mol)**2") == pytest.approx(0.0926, rel=1e-2)
assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3)
assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment