Commit 17ff48f0 authored by Mark Abraham's avatar Mark Abraham
Browse files

Use better data structure to implement ResidueType

Now insertions and lookups occur in constant time, rather than linear
time. This greatly improves the many lookups needed in grompp to
build the ndx file.

Renamed entry field as entries, which works better for naming a
container.
parent 612a3f43
......@@ -330,7 +330,7 @@ void write_pdbfile_indexed(FILE* out,
fprintf(out, "MODEL %8d\n", model_nr > 0 ? model_nr : 1);
ResidueType rt;
ResidueTypeMap rt;
for (int ii = 0; ii < nindex; ii++)
{
int i = index[ii];
......
......@@ -47,15 +47,15 @@
#include "gromacs/topology/topology.h"
#include "gromacs/utility/fatalerror.h"
std::vector<t_dlist> mk_dlist(FILE* log,
const t_atoms* atoms,
gmx_bool bPhi,
gmx_bool bPsi,
gmx_bool bChi,
gmx_bool bHChi,
int maxchi,
int r0,
ResidueType* rt)
std::vector<t_dlist> mk_dlist(FILE* log,
const t_atoms* atoms,
gmx_bool bPhi,
gmx_bool bPsi,
gmx_bool bChi,
gmx_bool bHChi,
int maxchi,
int r0,
ResidueTypeMap* rt)
{
int i, j, ii;
t_dihatms atm, prev;
......@@ -231,7 +231,7 @@ std::vector<t_dlist> mk_dlist(FILE* log,
/* Prevent use of unknown residues. If one adds a custom residue to
* residuetypes.dat but somehow loses it, changes it, or does analysis on
* another machine, the residue type will be unknown. */
if (!rt->nameIndexedInResidueTypes(thisres))
if (!rt->nameIndexedInResidueTypeMap(thisres))
{
gmx_fatal(FARGS,
"Unknown residue %s when searching for residue type.\n"
......
......@@ -521,7 +521,7 @@ static void histogramming(FILE* log,
// Build a list of unique residue names found in the dihedral
// list, so we can loop over those unique names conveniently. The
// names are the same as the residue names found in rt in the
// caller, but ResidueType doesn't yet have a way to loop over its
// caller, but ResidueTypeMap doesn't yet have a way to loop over its
// contents.
std::unordered_set<std::string> uniqueResidueNames;
for (const auto& dihedral : dlist)
......@@ -1530,7 +1530,7 @@ int gmx_chi(int argc, char* argv[])
}
fprintf(log, "Title: %s\n", name);
ResidueType rt;
ResidueTypeMap rt;
std::vector<t_dlist> dlist = mk_dlist(log, &atoms, bPhi, bPsi, bChi, bHChi, maxchi, r0, &rt);
fprintf(stderr, "%zu residues with dihedrals found\n", dlist.size());
......
......@@ -44,7 +44,7 @@
#include "gromacs/topology/index.h"
struct gmx_output_env_t;
class ResidueType;
class ResidueTypeMap;
/* must correspond with 'leg' g_chi.c:727 */
enum
......@@ -247,15 +247,15 @@ void do_pp2shifts(FILE* fp, int nframes, gmx::ArrayRef<const t_dlist> dlist, rea
gmx_bool has_dihedral(int Dih, const t_dlist& dlist);
std::vector<t_dlist> mk_dlist(FILE* log,
const t_atoms* atoms,
gmx_bool bPhi,
gmx_bool bPsi,
gmx_bool bChi,
gmx_bool bHChi,
int maxchi,
int r0,
ResidueType* rt);
std::vector<t_dlist> mk_dlist(FILE* log,
const t_atoms* atoms,
gmx_bool bPhi,
gmx_bool bPsi,
gmx_bool bChi,
gmx_bool bHChi,
int maxchi,
int r0,
ResidueTypeMap* rt);
void pr_dlist(FILE* fp,
gmx::ArrayRef<const t_dlist> dlist,
......
......@@ -578,7 +578,7 @@ print_bonds(FILE* fp, int o2n[], int nrHatoms, const int Hatoms[], int Heavy, in
fprintf(fp, "\n");
}
static int get_atype(int atom, t_atoms* at, gmx::ArrayRef<const PreprocessResidue> rtpFFDB, ResidueType* rt)
static int get_atype(int atom, t_atoms* at, gmx::ArrayRef<const PreprocessResidue> rtpFFDB, ResidueTypeMap* rt)
{
int type;
bool bNterm;
......@@ -610,7 +610,7 @@ static int vsite_nm2type(const char* name, PreprocessingAtomTypes* atype)
return *tp;
}
static real get_amass(int atom, t_atoms* at, gmx::ArrayRef<const PreprocessResidue> rtpFFDB, ResidueType* rt)
static real get_amass(int atom, t_atoms* at, gmx::ArrayRef<const PreprocessResidue> rtpFFDB, ResidueTypeMap* rt)
{
real mass;
bool bNterm;
......@@ -1799,7 +1799,7 @@ void do_vsites(gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
/* make index to tell which residues were already processed */
std::vector<bool> bResProcessed(at->nres);
ResidueType rt;
ResidueTypeMap rt;
/* generate vsite constructions */
/* loop over all atoms */
......
......@@ -657,7 +657,7 @@ int read_pdball(const char* inf,
matrix box,
bool bRemoveH,
t_symtab* symtab,
ResidueType* rt,
ResidueTypeMap* rt,
const char* watres,
AtomProperties* aps,
bool bVerbose)
......@@ -984,7 +984,7 @@ int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerb
return pdba->nr;
}
void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueTypeMap* rt)
{
std::string startResidueString =
gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
......@@ -1058,7 +1058,7 @@ void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
}
}
void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueTypeMap* rt, const gmx::MDLogger& logger)
{
int i;
std::optional<std::string> startrestype;
......@@ -2014,7 +2014,7 @@ int pdb2gmx::run()
open_symtab(&symtab);
/* Residue type database */
ResidueType rt;
ResidueTypeMap rt;
/* Read residue renaming database(s), if present */
std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
......
......@@ -462,7 +462,7 @@ void choose_watermodel(const char* wmsel, const char* ffdir, char** watermodel,
static int name2type(t_atoms* at,
int** cgnr,
gmx::ArrayRef<const PreprocessResidue> usedPpResidues,
ResidueType* rt,
ResidueTypeMap* rt,
const gmx::MDLogger& logger)
{
int i, j, prevresind, i0, prevcg, cg, curcg;
......@@ -1563,7 +1563,7 @@ void pdb2top(FILE* top_file,
int i, nmissat;
gmx::EnumerationArray<BondedTypes, int> bts;
ResidueType rt;
ResidueTypeMap rt;
/* Make bonds */
at2bonds(&(plist[F_BONDS]), globalPatches, atoms, *x, long_bond_dist, short_bond_dist, cyclicBondsIndex, logger);
......
......@@ -4,7 +4,7 @@
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
* Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
* Copyright (c) 2019,2020, by the GROMACS development team, led by
* Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
......@@ -148,7 +148,7 @@ void rename_atoms(const char* xlfile,
t_symtab* symtab,
gmx::ArrayRef<const PreprocessResidue> localPpResidue,
bool bResname,
ResidueType* rt,
ResidueTypeMap* rt,
bool bReorderNum,
bool bVerbose)
{
......
......@@ -3,7 +3,7 @@
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
* Copyright (c) 2013,2014,2015,2018,2019,2020, by the GROMACS development team, led by
* Copyright (c) 2013,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
......@@ -38,7 +38,7 @@
#ifndef GMX_GMXPREPROCESS_XLATE_H
#define GMX_GMXPREPROCESS_XLATE_H
class ResidueType;
class ResidueTypeMap;
struct t_atoms;
struct PreprocessResidue;
struct t_symtab;
......@@ -58,7 +58,7 @@ void rename_atoms(const char* xlfile,
t_symtab* symtab,
gmx::ArrayRef<const PreprocessResidue> restp,
bool bResname,
ResidueType* rt,
ResidueTypeMap* rt,
bool bReorderNum,
bool bVerbose);
......
......@@ -109,7 +109,7 @@ public:
//! The different atom properties.
AtomProperty prop[epropNR];
//! The residue types.
ResidueType restype;
ResidueTypeMap residueTypeMap;
};
/*! \brief
......@@ -151,21 +151,21 @@ static int compareToDatabase(const std::string& search, const std::string& datab
* Finds the index for the property being searched.
*
* \param[in] ap Property to search for.
* \param[in] restype Residuetypes in database.
* \param[in] residueTypeMap Residuetypes in database.
* \param[in] residueName The name of the residue to look for.
* \param[in] atomName The name of the atom to look for.
* \param[in] bExact Do we have the correct match.
* \returns The index for the property.
*/
static int findPropertyIndex(AtomProperty* ap,
ResidueType* restype,
ResidueTypeMap* residueTypeMap,
const std::string& residueName,
const std::string& atomName,
gmx_bool* bExact)
{
int j = NOTFOUND;
bool bProtein = restype->namedResidueHasType(residueName, "Protein");
bool bProtein = residueTypeMap->namedResidueHasType(residueName, "Protein");
bool bProtWild = residueName == "AAA";
int malen = NOTFOUND;
int mrlen = NOTFOUND;
......@@ -221,21 +221,21 @@ static int findPropertyIndex(AtomProperty* ap,
* Add new property to list.
*
* \param[in] ap Atomproperty to add.
* \param[in] restype Residue type database to use.
* \param[in] residueTypeMap Residue type database to use.
* \param[in] residueName Name of the residue.
* \param[in] atomName Name of the atom.
* \param[in] propValue Value of property.
* \param[in] line Where to add property.
*/
static void addProperty(AtomProperty* ap,
ResidueType* restype,
ResidueTypeMap* residueTypeMap,
const std::string& residueName,
const std::string& atomName,
real propValue,
int line)
{
bool bExact = false;
int j = findPropertyIndex(ap, restype, residueName, atomName, &bExact);
int j = findPropertyIndex(ap, residueTypeMap, residueName, atomName, &bExact);
if (!bExact)
{
......@@ -281,10 +281,10 @@ static void addProperty(AtomProperty* ap,
* Read property value into structure.
*
* \param[in] ap Atomproperty to be read in.
* \param[in] restype Library of residue types.
* \param[in] residueTypeMap Library of residue types.
* \param[in] factor Scaling factor for property.
*/
static void readProperty(AtomProperty* ap, ResidueType* restype, double factor)
static void readProperty(AtomProperty* ap, ResidueTypeMap* residueTypeMap, double factor)
{
char line[STRLEN], resnm[32], atomnm[32];
......@@ -297,7 +297,7 @@ static void readProperty(AtomProperty* ap, ResidueType* restype, double factor)
if (sscanf(line, "%31s %31s %20lf", resnm, atomnm, &pp) == 3)
{
pp *= factor;
addProperty(ap, restype, resnm, atomnm, pp, line_no);
addProperty(ap, residueTypeMap, resnm, atomnm, pp, line_no);
}
else
{
......@@ -311,12 +311,12 @@ static void readProperty(AtomProperty* ap, ResidueType* restype, double factor)
* Set value for properties.
*
* \param[in] ap Atomproperty to set.
* \param[in] restype Library of residue types.
* \param[in] residueTypeMap Library of residue types.
* \param[in] eprop Which property to set.
* \param[in] haveBeenWarned If we already set a warning before
* \returns True of warning should be printed.
*/
static bool setProperties(AtomProperty* ap, ResidueType* restype, int eprop, bool haveBeenWarned)
static bool setProperties(AtomProperty* ap, ResidueTypeMap* residueTypeMap, int eprop, bool haveBeenWarned)
{
const char* fns[epropNR] = {
"atommass.dat", "vdwradii.dat", "dgsolv.dat", "electroneg.dat", "elements.dat"
......@@ -329,7 +329,7 @@ static bool setProperties(AtomProperty* ap, ResidueType* restype, int eprop, boo
{
ap->db = fns[eprop];
ap->def = def[eprop];
readProperty(ap, restype, fac[eprop]);
readProperty(ap, residueTypeMap, fac[eprop]);
if (debug)
{
......@@ -353,9 +353,9 @@ AtomProperty* AtomProperties::prop(int eprop)
return &impl_->prop[eprop];
}
ResidueType* AtomProperties::restype()
ResidueTypeMap* AtomProperties::residueTypeMap()
{
return &impl_->restype;
return &impl_->residueTypeMap;
}
//! Print warning that vdW radii and masses are guessed.
......@@ -391,7 +391,7 @@ bool AtomProperties::setAtomProperty(int eprop,
std::string tmpAtomName, tmpResidueName;
bool bExact = false;
if (setProperties(prop(eprop), restype(), eprop, impl_->bWarned))
if (setProperties(prop(eprop), residueTypeMap(), eprop, impl_->bWarned))
{
printWarning();
impl_->bWarned = true;
......@@ -407,7 +407,7 @@ bool AtomProperties::setAtomProperty(int eprop,
tmpAtomName = atomName;
}
const int j = findPropertyIndex(
&(impl_->prop[eprop]), &impl_->restype, residueName, tmpAtomName, &bExact);
&(impl_->prop[eprop]), &impl_->residueTypeMap, residueName, tmpAtomName, &bExact);
if (eprop == epropVDW && !impl_->bWarnVDW)
{
......@@ -429,7 +429,7 @@ bool AtomProperties::setAtomProperty(int eprop,
std::string AtomProperties::elementFromAtomNumber(int atomNumber)
{
if (setProperties(prop(epropElement), restype(), epropElement, impl_->bWarned))
if (setProperties(prop(epropElement), residueTypeMap(), epropElement, impl_->bWarned))
{
printWarning();
impl_->bWarned = true;
......@@ -446,7 +446,7 @@ std::string AtomProperties::elementFromAtomNumber(int atomNumber)
int AtomProperties::atomNumberFromElement(const char* element)
{
if (setProperties(prop(epropElement), restype(), epropElement, impl_->bWarned))
if (setProperties(prop(epropElement), residueTypeMap(), epropElement, impl_->bWarned))
{
printWarning();
impl_->bWarned = true;
......
......@@ -54,7 +54,7 @@ enum
};
struct AtomProperty;
class ResidueType;
class ResidueTypeMap;
/*! \brief
* Holds all the atom property information loaded.
*/
......@@ -108,10 +108,11 @@ public:
* \returns Pointer to property entry.
*/
AtomProperty* prop(int eprop);
//! Get handle to residuetype library.
ResidueType* restype();
private:
//! Get handle to residuetype library.
ResidueTypeMap* residueTypeMap();
//! Implementation pointer.
class Impl;
......
......@@ -562,7 +562,7 @@ void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_
add_grp(gb, gn, aid, "System");
/* For every residue, get a pointer to the residue type name */
ResidueType rt;
ResidueTypeMap rt;
std::vector<std::string> restype;
std::vector<std::string> previousTypename;
......
......@@ -43,6 +43,7 @@
#include <algorithm>
#include <optional>
#include <string>
#include <unordered_map>
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/cstringutil.h"
......@@ -52,31 +53,27 @@
#include "gromacs/utility/strdb.h"
//! Definition for residue type that is not known.
const std::string c_undefinedResidueType = "Other";
const ResidueType c_undefinedResidueType = "Other";
//! Single ResidueType entry object
struct ResidueTypeEntry
//! Function object for comparisons used in std::unordered_map
class EqualCaseInsensitive
{
//! Default constructor creates complete object.
ResidueTypeEntry(const std::string& rName, const std::string& rType) :
residueName(rName), residueType(rType)
public:
bool operator()(const ResidueName& lhs, const ResidueName& rhs) const
{
return gmx::equalCaseInsensitive(lhs, rhs);
}
//! Name of the residue in the entry.
std::string residueName;
//! Type of the residue in the entry.
std::string residueType;
};
//! Implementation detail for ResidueTypes
class ResidueType::Impl
//! Implementation detail for ResidueTypeMap
class ResidueTypeMap::Impl
{
public:
//! Storage object for entries.
std::vector<ResidueTypeEntry> entry;
std::unordered_map<ResidueName, ResidueType, std::hash<ResidueName>, EqualCaseInsensitive> entries;
};
ResidueType::ResidueType() : impl_(new Impl)
ResidueTypeMap::ResidueTypeMap() : impl_(new Impl)
{
char line[STRLEN];
char resname[STRLEN], restype[STRLEN], dum[STRLEN];
......@@ -100,64 +97,53 @@ ResidueType::ResidueType() : impl_(new Impl)
}
}
ResidueType::~ResidueType() {}
ResidueTypeMap::~ResidueTypeMap() = default;
/*! \brief
* Return an optional const iterator to a residue entry that matches the given name.
*
* \param[in] entries Currently registered residue entries in the database.
* \param[in] residueName Name of a residue to compare to database.
* \returns An optional iterator to the residue entry that was found.
*/
static std::optional<gmx::ArrayRef<const ResidueTypeEntry>::const_iterator>
findResidueEntryWithName(gmx::ArrayRef<const ResidueTypeEntry> entries, const std::string& residueName)
bool ResidueTypeMap::nameIndexedInResidueTypeMap(const ResidueName& residueName)
{
auto foundIt =
std::find_if(entries.begin(), entries.end(), [&residueName](const ResidueTypeEntry& old) {
return gmx::equalCaseInsensitive(residueName, old.residueName);
});
return (foundIt != entries.end()) ? std::make_optional(foundIt) : std::nullopt;
return impl_->entries.find(residueName) != impl_->entries.end();
}
bool ResidueType::nameIndexedInResidueTypes(const std::string& residueName)
void ResidueTypeMap::addResidue(const ResidueName& residueName, const ResidueType& residueType)
{
return findResidueEntryWithName(impl_->entry, residueName).has_value();
}
void ResidueType::addResidue(const std::string& residueName, const std::string& residueType)
{
if (auto foundIt = findResidueEntryWithName(impl_->entry, residueName))
if (auto [foundIt, insertionTookPlace] = impl_->entries.insert({ residueName, residueType });
!insertionTookPlace)
{
if (!gmx::equalCaseInsensitive((*foundIt)->residueType, residueType))
if (!gmx::equalCaseInsensitive(foundIt->second, residueType))
{
fprintf(stderr,
"Warning: Residue '%s' already present with type '%s' in database, ignoring "
"new type '%s'.\n",
residueName.c_str(),
(*foundIt)->residueType.c_str(),
foundIt->second.c_str(),
residueType.c_str());
}
}
else
{
impl_->entry.emplace_back(residueName, residueType);
}
}
bool ResidueType::namedResidueHasType(const std::string& residueName, const std::string& residueType)
bool ResidueTypeMap::namedResidueHasType(const ResidueName& residueName, const ResidueType& residueType)
{
auto foundIt = findResidueEntryWithName(impl_->entry, residueName);
return foundIt ? gmx::equalCaseInsensitive(residueType, (*foundIt)->residueType) : false;
if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end())
{
return gmx::equalCaseInsensitive(residueType, foundIt->second);
}
return false;
}
std::string ResidueType::typeOfNamedDatabaseResidue(const std::string& residueName)
ResidueType ResidueTypeMap::typeOfNamedDatabaseResidue(const ResidueName& residueName)
{
auto foundIt = findResidueEntryWithName(impl_->entry, residueName);
return foundIt ? (*foundIt)->residueType : c_undefinedResidueType;
if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end())
{
return foundIt->second;
}
return c_undefinedResidueType;
}
std::optional<std::string> ResidueType::optionalTypeOfNamedDatabaseResidue(const std::string& residueName)
std::optional<ResidueType> ResidueTypeMap::optionalTypeOfNamedDatabaseResidue(const ResidueName& residueName)
{
auto foundIt = findResidueEntryWithName(impl_->entry, residueName);
return foundIt ? std::make_optional((*foundIt)->residueType) : std::nullopt;
if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end())
{
return std::make_optional(foundIt->second);
}
return std::nullopt;
}
......@@ -41,18 +41,22 @@
#include "gromacs/utility/basedefinitions.h"
struct ResidueTypeEntry;
/*! \brief Convenience type aliases
*
* These are not as useful as strong types, but they will
* help clarify usage to humans in some cases. */
//! \{
using ResidueName = std::string;
using ResidueType = std::string;
//! \}
class ResidueType
class ResidueTypeMap
{
public:
//! Default constructor.
ResidueType();
ResidueTypeMap();
//! Default destructor.
~ResidueType();
//! Get handle to underlying residue type data.
ResidueTypeEntry* ResidueTypes();
~ResidueTypeMap();
/*! \brief
* Return true if residue \p residueName is found or false otherwise.
......@@ -60,14 +64,14 @@ public:
* \param[in] residueName Residue name to search database for.
* \returns true if successful.
*/
bool nameIndexedInResidueTypes(const std::string& residueName);
bool nameIndexedInResidueTypeMap(const ResidueName& residueName);
/*! \brief
* Add entry to ResidueTypes if unique.
* Add entry to ResidueTypeMap if unique.
*
* \param[in] residueName Name of new residue.
* \param[in] residueType Type of new residue.
*/
void addResidue(const std::string& residueName, const std::string& residueType);
void addResidue(const ResidueName& residueName, const ResidueType& residueType);
/*! \brief
* Checks if the indicated \p residueName if of \p residueType.
*
......@@ -75,21 +79,21 @@ public:
* \param[in] residueType Which ResidueType the residue should have.
* \returns If the check was successful.
*/
bool namedResidueHasType(const std::string& residueName, const std::string& residueType);
bool namedResidueHasType(const ResidueName& residueName, const ResidueType& residueType);
/*! \brief
* Return the residue type if a residue with that name exists, or "Other"
*
* \param[in] residueName Name of the residue to search for.