Commit c6ee1731 authored by Elad Noor's avatar Elad Noor

using Git LFS and gzip to make things smaller and faster

parent b0a5e530
Pipeline #39006372 failed with stage
in 2 minutes and 6 seconds
*.json.gz filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
......@@ -50,7 +50,7 @@ where = src
equilibrator_api.data =
*.csv
*.tsv
*.json
*.json.gz
*.npz
[wheel]
......
This diff is collapsed.
import json
import logging
import re
from gzip import decompress
import pandas as pd
import pkg_resources
......@@ -14,8 +15,8 @@ from equilibrator_api.compound import Compound
class Reaction(object):
_json_str = pkg_resources.resource_stream('equilibrator_api',
'data/cc_compounds.json')
COMPOUND_JSON = json.loads(_json_str.read().decode('utf8'))
'data/cc_compounds.json.gz')
COMPOUND_JSON = json.loads(decompress(_json_str.read()).decode('utf8'))
# load formation energies from the JSON file
COMPOUND_DICT = {}
......@@ -32,13 +33,13 @@ class Reaction(object):
self.kegg_id_to_compound = {}
for kegg_id in self.kegg_id_to_coeff.keys():
self.get_compound(kegg_id, create_if_missing=True)
if rid is not None:
self.reaction_id = rid
else:
self.reaction_id = 'R%05d' % Reaction.REACTION_COUNTER
Reaction.REACTION_COUNTER += 1
@property
def kegg_ids(self):
return self.kegg_id_to_coeff.keys()
......@@ -79,27 +80,27 @@ class Reaction(object):
return None
dG0_r_prime += coeff * dG0_f_prime
return dG0_r_prime
def dG_correction(self, kegg_id_to_conc):
"""
Calculate the concentration adjustment in the dG' of reaction.
Arguments:
kegg_id_to_conc - a dictionary mapping KEGG compound ID
to concentration in M (default is 1M)
kegg_id_to_conc - a dictionary mapping KEGG compound ID
to concentration in M (default is 1M)
"""
kegg_ids = set(self.kegg_ids_without_water).intersection(
kegg_id_to_conc.keys())
dG_correction = sum([self.get_coeff(c) * log(kegg_id_to_conc[c])
for c in kegg_ids])
return settings.RT * dG_correction
def dG_prime(self, kegg_id_to_conc,
def dG_prime(self, kegg_id_to_conc,
pH=settings.DEFAULT_PH, pMg=settings.DEFAULT_PMG,
ionic_strength=settings.DEFAULT_IONIC_STRENGTH):
"""
Calculated the concentration adjusted dG' of reaction.
Arguments:
kegg_id_to_conc - a dictionary mapping KEGG compound ID
kegg_id_to_conc - a dictionary mapping KEGG compound ID
to concentration in M (default is 1M)
pH, pMg, ionic_strength - aqueous conditions
"""
......@@ -124,7 +125,7 @@ class Reaction(object):
"""
abs_sum_coeff = sum(map(lambda cid: abs(self.get_coeff(cid)),
self.kegg_ids_without_water))
return abs_sum_coeff
return abs_sum_coeff
def dGm_correction(self):
"""
......@@ -138,7 +139,8 @@ class Reaction(object):
"""
return self.dG0_prime() + self.dGm_correction()
def reversibility_index(self, pH=settings.DEFAULT_PH, pMg=settings.DEFAULT_PMG,
def reversibility_index(self, pH=settings.DEFAULT_PH,
pMg=settings.DEFAULT_PMG,
ionic_strength=settings.DEFAULT_IONIC_STRENGTH):
"""
Calculates the reversiblity index according to Noor et al. 2012:
......@@ -179,7 +181,7 @@ class Reaction(object):
return {}
compound_bag = {}
for member in re.split('\s+\+\s+', s):
for member in re.split(r'\s+\+\s+', s):
tokens = member.split(None, 1) # check for stoichiometric coeff
if len(tokens) == 0:
continue
......@@ -201,7 +203,7 @@ class Reaction(object):
def parse_formula(formula, name_to_cid=None, rid=None):
"""
Parses a two-sided formula such as: 2 C00001 = C00002 + C00003
Args:
formula - a string representation of the chemical formula
name_to_cid - (optional) a dictionary mapping names to KEGG IDs
......@@ -239,10 +241,10 @@ class Reaction(object):
sparse_reaction = \
dict(zip(map(name_to_cid.get, sparse_reaction.keys()),
sparse_reaction.values()))
if 'C00080' in sparse_reaction:
sparse_reaction.pop('C00080')
return Reaction(sparse_reaction, rid=rid)
@staticmethod
......@@ -266,7 +268,7 @@ class Reaction(object):
def __str__(self):
return self.write_formula()
def __repr__(self):
return '<Reaction %s at 0x%x>' % (self.reaction_id, id(self))
......@@ -319,7 +321,7 @@ class Reaction(object):
atom_balance_dict = self._get_reaction_atom_balance()
if atom_balance_dict is None:
return None
n_e = atom_balance_dict.pop('e-', 0)
if not self._check_balancing(atom_balance_dict):
return None
......@@ -377,61 +379,64 @@ class Reaction(object):
for atom, coeff in atom_bag.items():
S.at[atom, kegg_id] = coeff
S.fillna(0, inplace=True)
balancing_atoms = S.index
atom_bag = self._get_reaction_atom_balance()
if atom_bag is None:
logging.warning('Cannot balance this reaction due to'
' missing chemical formulas')
return self
atom_vector = array(list(map(lambda a: atom_bag.get(a, 0),
balancing_atoms)), ndmin=2).T
balancing_atoms)), ndmin=2).T
other_atoms = set(atom_bag.keys()).difference(balancing_atoms)
if other_atoms:
raise ValueError('Cannot oxidize compounds with these atoms: '
'%s\nFormula is %s' %
(str(other_atoms), self.write_formula()))
imbalance = inv(S) @ atom_vector
for kegg_id, coeff in zip(balancing_ids, imbalance.flat):
self.kegg_id_to_coeff[kegg_id] = \
self.kegg_id_to_coeff.get(kegg_id, 0) - coeff
return self
@staticmethod
def get_oxidation_reaction(kegg_id):
"""
Generate a Reaction object which represents the oxidation reaction
of this compound using O2. If there are N atoms, the product must
of this compound using O2. If there are N atoms, the product must
be NH3 (and not N2) to represent biological processes.
Other atoms other than C, N, H, and O will raise an exception.
"""
return Reaction({kegg_id: -1}).balance_by_oxidation()
if __name__ == '__main__':
import sys
r = Reaction.get_oxidation_reaction('C00031')
print(r.write_formula())
print('standard oxidation energy of glucose: %.2f kJ/mol' % r.dG0_prime())
r = Reaction.get_oxidation_reaction('C00064')
print(r.write_formula())
print('standard oxidation energy of acetate: %.2f kJ/mol' % r.dG0_prime())
r = Reaction.parse_formula('C00031 = ').balance_by_oxidation()
print(r.write_formula())
print('standard oxidation energy of glucose: %.2f kJ/mol' % r.dG0_prime())
print('oxidation energy of 1 mM glucose: %.2f kJ/mol' % r.dG_prime({'C00031': 1e-3}))
print('oxidation energy of 1 mM glucose: %.2f kJ/mol' %
r.dG_prime({'C00031': 1e-3}))
print('\nNow, trying to use a compound with an unspecific formula:')
sys.stdout.flush()
r = Reaction.get_oxidation_reaction('C04619')
print(r.write_formula())
print('standard oxidation energy of (3R)-3-Hydroxydecanoyl-[acyl-carrier protein]: %s kJ/mol' % r.dG0_prime())
print('standard oxidation energy of '
'(3R)-3-Hydroxydecanoyl-[acyl-carrier protein]: %s kJ/mol'
% r.dG0_prime())
......@@ -29,6 +29,7 @@ import os
import warnings
import pytest
from equilibrator_api import (
ComponentContribution, Pathway, Reaction, ReactionMatcher)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment