Commit 01cfd19b authored by Elad Noor's avatar Elad Noor

adding optional uncertainty matrix representation formats

parent 3396e37b
Pipeline #192382168 passed with stage
in 7 minutes and 32 seconds
......@@ -15,6 +15,7 @@ classifiers =
License :: OSI Approved :: MIT License
Natural Language :: English
Programming Language :: Python :: 3
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
license = MIT License
......@@ -29,11 +30,11 @@ keywords =
[options]
zip_safe = True
install_requires =
pyparsing>=2.4.7
python-slugify>=4.0.1
pyparsing~=2.4
python-slugify~=4.0
equilibrator-cache==0.3.2
component-contribution==0.3.2
python_requires = >=3.7
component-contribution==0.4.0b1
python_requires = >=3.6
tests_require =
tox
packages = find:
......
......@@ -27,15 +27,10 @@
import logging
from typing import Dict, Iterable, List, Optional, Tuple, Union
import component_contribution
import numpy as np
import pandas as pd
from component_contribution import DEFAULT_QUILT_PKG as DEFAULT_QUILT_PKG_CC
from component_contribution import (
DEFAULT_QUILT_VERSION as DEFAULT_QUILT_VERSION_CC,
)
from component_contribution import (
LEGACY_QUILT_VERSION as LEGACY_QUILT_VERSION_CC,
)
from component_contribution.linalg import LINALG
from component_contribution.predict import (
CCModelParameters,
GibbsEnergyPredictor,
......@@ -47,10 +42,7 @@ from equilibrator_cache import (
CompoundMicrospecies,
create_compound_cache_from_quilt,
)
from equilibrator_cache.api import DEFAULT_QUILT_PKG as DEFAULT_QUILT_PKG_CACHE
from equilibrator_cache.api import (
DEFAULT_QUILT_VERSION as DEFAULT_QUILT_VERSION_CACHE,
)
from equilibrator_cache.api import DEFAULT_QUILT_PKG, DEFAULT_QUILT_VERSION
from equilibrator_cache.reaction import (
create_stoichiometric_matrix_from_reactions,
)
......@@ -150,8 +142,8 @@ class ComponentContribution(object):
self.ccache = ccache or create_compound_cache_from_quilt()
if predictor is None:
parameters = CCModelParameters.from_quilt(
version=DEFAULT_QUILT_VERSION_CC, force=True
parameters = CCModelParameters.from_zenodo(
zenodo_doi=component_contribution.ZENODO_DOI_PARAMETERS
)
self.predictor = GibbsEnergyPredictor(
parameters=parameters, rmse_inf=rmse_inf
......@@ -222,7 +214,7 @@ class ComponentContribution(object):
A ComponentContribution object
"""
cc = ComponentContribution.initialize_custom_version(
quilt_version_cc=LEGACY_QUILT_VERSION_CC
zenodo_doi_cc=component_contribution.ZENODO_DOI_PARAMETERS_LEGACY
)
cc.p_mg = Q_(14) # i.e. set [Mg2+] to (essentially) zero
return cc
......@@ -230,14 +222,11 @@ class ComponentContribution(object):
@staticmethod
def initialize_custom_version(
rmse_inf: Q_ = default_rmse_inf,
quilt_package_cache: str = DEFAULT_QUILT_PKG_CACHE,
quilt_version_cache: Optional[str] = DEFAULT_QUILT_VERSION_CACHE,
quilt_package_cache: str = DEFAULT_QUILT_PKG,
quilt_version_cache: Optional[str] = DEFAULT_QUILT_VERSION,
quilt_tag_cache: Optional[str] = None,
quilt_hash_cache: Optional[str] = None,
quilt_package_cc: str = DEFAULT_QUILT_PKG_CC,
quilt_version_cc: Optional[str] = DEFAULT_QUILT_VERSION_CC,
quilt_tag_cc: Optional[str] = None,
quilt_hash_cc: Optional[str] = None,
zenodo_doi_cc: str = component_contribution.ZENODO_DOI_PARAMETERS,
) -> "ComponentContribution":
"""Initialize a ComponentContribution object with custom quilt versions.
......@@ -254,12 +243,8 @@ class ComponentContribution(object):
(Default value: "0.2.10")
quilt_tag_cache : str, optional
quilt_hash_cache: str, optional
quilt_package_cc : str, optional
(Default value: "equilibrator/component_contribution")
quilt_version_cc : str, optional
(Default value: "0.3.0")
quilt_tag_cc : str, optional
quilt_hash_cc: str, optional
zenodo_doi_cc : str, optional
(Default value: "10.5281/zenodo.4013789")
Returns
-------
......@@ -273,13 +258,7 @@ class ComponentContribution(object):
version=quilt_version_cache,
force=True,
)
parameters = CCModelParameters.from_quilt(
package=quilt_package_cc,
hash=quilt_hash_cc,
tag=quilt_tag_cc,
version=quilt_version_cc,
force=True,
)
parameters = CCModelParameters.from_zenodo(zenodo_doi_cc)
predictor = GibbsEnergyPredictor(
parameters=parameters, rmse_inf=rmse_inf
)
......@@ -479,28 +458,59 @@ class ComponentContribution(object):
return standard_dg + stored_dg
def standard_dg_multi(
self, reactions: List[PhasedReaction]
self,
reactions: List[PhasedReaction],
uncertainty_representation: str = "cov",
) -> Tuple[np.ndarray, np.ndarray]:
"""Calculate the chemical reaction energies of a list of reactions.
Using the major microspecies of each of the reactants.
Parameters
----------
reactions : List[PhasedReaction]
a list of PhasedReaction objects to estimate
uncertainty_representation : str
which representation to use for the uncertainties. 'cov' would
return a full covariance matrix. 'sqrt' would return a sqaure root
of the covariance, based on the uncertainty vectors.
'fullrank' would return a full-rank square root of the covariance
which is a compressed form of the 'sqrt' result.
(Default value: 'cov')
Returns
-------
standard_dg : ndarray
standard_dg : Quantity
the CC estimation of the major microspecies' standard formation
energy
dg_sigma : ndarray
the uncertainty matrix.
dg_uncertainty : Quantity
the uncertainty matrix (in either 'cov', 'sqrt' or 'fullrank'
format)
"""
rxn_dg_pairs = map(lambda r: r.separate_stored_dg(), reactions)
residual_reactions, stored_dg = zip(*rxn_dg_pairs)
stored_dg = np.array(stored_dg)
(standard_dg, dg_sigma) = self.predictor.standard_dg_multi(
mu, sigma, residual = self.predictor.get_reaction_prediction_multi(
residual_reactions
)
return standard_dg + stored_dg, dg_sigma
standard_dg = Q_(stored_dg + mu, "kJ/mol")
cov_sqrt = np.hstack([sigma, residual.T])
if uncertainty_representation == "cov":
dg_uncertainty = Q_(cov_sqrt @ cov_sqrt.T, "(kJ/mol)**2")
elif uncertainty_representation == "sqrt":
dg_uncertainty = Q_(cov_sqrt, "kJ/mol")
elif uncertainty_representation == "fullrank":
dg_uncertainty = Q_(
LINALG.qr_rank_deficient(cov_sqrt.T).T, "kJ/mol"
)
else:
return ValueError(
"uncertainty_representation must be 'cov', 'sqrt' or 'fullrank'"
)
return standard_dg, dg_uncertainty
def standard_dg_prime(self, reaction: PhasedReaction) -> ureg.Measurement:
"""Calculate the transformed reaction energies of a reaction.
......
"""unit test for uncertainties."""
# The MIT License (MIT)
#
# Copyright (c) 2013 Weizmann Institute of Science
# Copyright (c) 2018 Institute for Molecular Systems Biology,
# ETH Zurich
# Copyright (c) 2018 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import pytest
def test_uncertainty_vectors(comp_contribution):
"""Test how reactions are balanced by oxidation (with Mg2+).
Include Mg2+ effects in the training procedure.
"""
rxn1 = comp_contribution.parse_reaction_formula(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"
)
rxn2 = comp_contribution.parse_reaction_formula(
"kegg:C00002 + kegg:C00001 = kegg:C00008 + kegg:C00009"
)
_, Ucov = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="cov"
)
_, Usqrt = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="sqrt"
)
_, Ufullrank = comp_contribution.standard_dg_multi(
[rxn1, rxn2], uncertainty_representation="fullrank"
)
assert Ucov.shape == (2, 2)
assert Usqrt.shape == (2, 669)
assert Ufullrank.shape == (2, 1)
assert Ucov.m_as("(kJ/mol)**2") == pytest.approx(0.0926, rel=1e-2)
assert Ucov - Usqrt @ Usqrt.T == pytest.approx(0, abs=1e-3)
assert Ucov - Ufullrank @ Ufullrank.T == pytest.approx(0, abs=1e-3)
[tox]
envlist = isort, black, flake8, safety, py{37,38}
envlist = isort, black, flake8, safety, py{38}
[testenv]
setenv =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment