Skip to content
Snippets Groups Projects
Commit 1b756fee authored by Vedran Miletić's avatar Vedran Miletić
Browse files

Ported sdrmsd and sdtether to Python 3 and Open Babel 3

Cleaned up inconsistent use of tabs and spaces and replaced old style
string formatting (%) with new style (.format()). Used autopep8 for
further PEP 8-compliant style fixes.

Open Babel 3 puts pybabel module inside openbabel, so import has been
changed to try that first and fall back to 2.x style import if the first
import fails.
parent 566b230f
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python
#
# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT)
# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT)
# Script distributed under GNU LGPL 3.0 along rDock software.
#
#
# This algorithm takes into account molecular automorphism. That is, it identifies
# molecules which are the same but might have atom orders changed and still be able to
# molecules which are the same but might have atom orders changed and still be able to
# match the pairs and correctly calculate the RMSD.
#
# Author: Daniel Alvarez-Garcia
# Date: 08-11-2013
import math
import pybel
import numpy as npy
import optparse
try:
# Open Babel 3.0
from openbabel import pybel
except ImportError:
# Open Babel 2.x
import pybel
def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
def superpose3D(ref, target, weights=None, refmask=None, targetmask=None, returnRotMat=False):
"""superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
definition : superpose3D(ref, target, weights,refmask,targetmask)
@parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance)
......@@ -30,36 +36,38 @@ def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRot
Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
Returns a set of new coordinates, aligned to the target state as well as the rmsd
"""
if weights == None :
weights=1.0
if refmask == None :
refmask=npy.ones(len(ref),"bool")
if targetmask == None :
targetmask=npy.ones(len(target),"bool")
#first get the centroid of both states
ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
#print ref_centroid
refCenteredCoords=ref-ref_centroid
#print refCenteredCoords
target_centroid=npy.mean(target[targetmask]*weights,axis=0)
targetCenteredCoords=target-target_centroid
#print targetCenteredCoords
#the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
if weights == None:
weights = 1.0
if refmask == None:
refmask = npy.ones(len(ref), "bool")
if targetmask == None:
targetmask = npy.ones(len(target), "bool")
# first get the centroid of both states
ref_centroid = npy.mean(ref[refmask]*weights, axis=0)
# print ref_centroid
refCenteredCoords = ref-ref_centroid
# print refCenteredCoords
target_centroid = npy.mean(target[targetmask]*weights, axis=0)
targetCenteredCoords = target-target_centroid
# print targetCenteredCoords
# the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
# Initial residual, see Kabsch.
E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
reftmp=npy.copy(refCenteredCoords[refmask])
targettmp=npy.copy(targetCenteredCoords[targetmask])
#print refCenteredCoords[refmask]
#single value decomposition of the dotProduct of both position vectors
E0 = npy.sum(npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights, axis=0), axis=0) + npy.sum(
npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights, axis=0), axis=0)
reftmp = npy.copy(refCenteredCoords[refmask])
targettmp = npy.copy(targetCenteredCoords[targetmask])
# print refCenteredCoords[refmask]
# single value decomposition of the dotProduct of both position vectors
try:
dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
V, S, Wt = npy.linalg.svd(dotProd )
dotProd = npy.dot(npy.transpose(reftmp), targettmp * weights)
V, S, Wt = npy.linalg.svd(dotProd)
except Exception:
try:
dotProd = npy.dot( npy.transpose(reftmp), targettmp)
V, S, Wt = npy.linalg.svd(dotProd )
dotProd = npy.dot(npy.transpose(reftmp), targettmp)
V, S, Wt = npy.linalg.svd(dotProd)
except Exception:
print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
print(
"Couldn't perform the Single Value Decomposition, skipping alignment", file=sys.stderr)
return ref, 0
# we already have our solution, in the results from SVD.
# we just need to check for reflections and then produce
......@@ -68,73 +76,81 @@ def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRot
reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
if reflect == -1.0:
S[-1] = -S[-1]
V[:,-1] = -V[:,-1]
V[:, -1] = -V[:, -1]
rmsd = E0 - (2.0 * sum(S))
rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd
#U is simply V*Wt
U = npy.dot(V, Wt) #get the rotation matrix
rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) # get the rmsd
# U is simply V*Wt
U = npy.dot(V, Wt) # get the rotation matrix
# rotate and translate the molecule
new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate
new_coords = npy.dot((refCenteredCoords), U) + \
target_centroid # translate & rotate
#new_coords=(refCenteredCoords + target_centroid)
#print U
if returnRotMat :
return new_coords,rmsd, U
return new_coords,rmsd
# print U
if returnRotMat:
return new_coords, rmsd, U
return new_coords, rmsd
def squared_distance(coordsA, coordsB):
"""Find the squared distance between two 3-tuples"""
sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
sqrdist = sum((a-b)**2 for a, b in zip(coordsA, coordsB))
return sqrdist
def rmsd(allcoordsA, allcoordsB):
"""Find the RMSD between two lists of 3-tuples"""
deviation = sum(squared_distance(atomA, atomB) for
(atomA, atomB) in zip(allcoordsA, allcoordsB))
return math.sqrt(deviation / float(len(allcoordsA)))
def mapToCrystal(xtal, pose):
"""Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
this will mess up the rmsd calculation with OpenBabel"""
query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
mapper = pybel.ob.OBIsomorphismMapper.GetInstance(query)
mappingpose = pybel.ob.vvpairUIntUInt()
exit=mapper.MapUnique(pose.OBMol,mappingpose)
exit = mapper.MapUnique(pose.OBMol, mappingpose)
return mappingpose[0]
def parseArguments():
optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog
epilog = """Args:
reference.sdf SDF file with the reference molecule.
input.sdf SDF file with the molecules to be compared to reference.\n"""
parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog)
parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False,
help="Superpose molecules before RMSD calculation")
parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1,
help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float)
parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False,
help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.")
(options, args) = parser.parse_args()
#Check we have two arguments
if len(args) < 2:
parser.error("Incorrect number of arguments. Use -h or --help options to print help.")
return options, args
optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog
epilog = """Args:
reference.sdf SDF file with the reference molecule.
input.sdf SDF file with the molecules to be compared to reference.\n"""
parser = optparse.OptionParser(
"usage: %prog [options] reference.sdf input.sdf", epilog=epilog)
parser.add_option("-f", "--fit", dest="fit", action="store_true", default=False,
help="Superpose molecules before RMSD calculation")
parser.add_option("-t", "--threshold", dest="threshold", action="store", nargs=1,
help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float)
parser.add_option("-o", "--out", dest="outfilename", metavar="FILE", default=False,
help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.")
(options, args) = parser.parse_args()
# Check we have two arguments
if len(args) < 2:
parser.error(
"Incorrect number of arguments. Use -h or --help options to print help.")
return options, args
def updateCoords(obmol, newcoords):
"Update OBMol coordinates. newcoords is a numpy array"
for i,atom in enumerate(obmol):
for i, atom in enumerate(obmol):
atom.OBAtom.SetVector(*newcoords[i])
def getAutomorphRMSD(target, molec, fit=False):
"""
Use Automorphism to reorder target coordinates to match ref coordinates atom order
for correct RMSD comparison. Only the lowest RMSD will be returned.
Returns:
If fit=False: bestRMSD (float) RMSD of the best matching mapping.
If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates.
If fit=False: bestRMSD (float) RMSD of the best matching mapping.
If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates.
"""
mappings = pybel.ob.vvpairUIntUInt()
bitvec = pybel.ob.OBBitVec()
......@@ -144,53 +160,57 @@ def getAutomorphRMSD(target, molec, fit=False):
success = pybel.ob.FindAutomorphisms(target.OBMol, mappings)
targetcoords = [atom.coords for atom in target]
mappose = npy.array(mapToCrystal(target, molec))
mappose = mappose[npy.argsort(mappose[:,0])][:,1]
mappose = mappose[npy.argsort(mappose[:, 0])][:, 1]
posecoords = npy.array([atom.coords for atom in molec])[mappose]
resultrmsd = 999999999999
for mapping in mappings:
automorph_coords = [None] * len(targetcoords)
for x, y in mapping:
automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)]
mapping_rmsd = rmsd(posecoords, automorph_coords)
if mapping_rmsd < resultrmsd:
resultrmsd = mapping_rmsd
fitted_result = False
if fit:
fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords))
if fitted_rmsd < resultrmsd:
resultrmsd = fitted_rmsd
fitted_result = fitted_pose
automorph_coords = [None] * len(targetcoords)
for x, y in mapping:
automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)]
mapping_rmsd = rmsd(posecoords, automorph_coords)
if mapping_rmsd < resultrmsd:
resultrmsd = mapping_rmsd
fitted_result = False
if fit:
fitted_pose, fitted_rmsd = superpose3D(
npy.array(automorph_coords), npy.array(posecoords))
if fitted_rmsd < resultrmsd:
resultrmsd = fitted_rmsd
fitted_result = fitted_pose
if fit:
return (resultrmsd, fitted_pose)
return (resultrmsd, fitted_pose)
else:
return resultrmsd
return resultrmsd
def saveMolecWithRMSD(outsdf, molec, rmsd, population=False):
newData = pybel.ob.OBPairData()
newData = pybel.ob.OBPairData()
newData.SetAttribute("RMSD")
newData.SetValue('%.3f'%rmsd)
newData.SetValue('{:.3f}'.format(rmsd))
if population:
popData = pybel.ob.OBPairData()
popData.SetAttribute("Population")
popData.SetValue('%i'%population)
molec.OBMol.CloneData(popData)
popData = pybel.ob.OBPairData()
popData.SetAttribute("Population")
popData.SetValue('{:d}'.format(population))
molec.OBMol.CloneData(popData)
molec.OBMol.CloneData(newData) # Add new data
outsdf.write(molec)
if __name__ == "__main__":
import sys, os
(opts, args) = parseArguments()
import sys
import os
(opts, args) = parseArguments()
xtal = args[0]
poses = args[1]
if not os.path.exists(xtal) or not os.path.exists(poses):
sys.exit("Input files not found. Please check the path given is correct.")
sys.exit("Input files not found. Please check the path given is correct.")
fit = opts.fit
outfname = opts.outfilename
threshold = opts.threshold
......@@ -200,73 +220,83 @@ if __name__ == "__main__":
crystal.removeh()
crystalnumatoms = len(crystal.atoms)
#If outfname is defined, prepare an output SDF sink to write molecules
# If outfname is defined, prepare an output SDF sink to write molecules
if outfname:
outsdf = pybel.Outputfile('sdf', outfname, overwrite=True)
outsdf = pybel.Outputfile('sdf', outfname, overwrite=True)
# Find the RMSD between the crystal pose and each docked pose
dockedposes = pybel.readfile("sdf", poses)
if fit: print "POSE\tRMSD_FIT"
else: print "POSE\tRMSD_NOFIT"
if fit:
print("POSE\tRMSD_FIT")
else:
print("POSE\tRMSD_NOFIT")
skipped = []
moleclist = {} # Save all poses with their dockid
population = {} # Poses to be written
moleclist = {} # Save all poses with their dockid
population = {} # Poses to be written
outlist = {}
for docki, dockedpose in enumerate(dockedposes):
dockedpose.removeh()
natoms = len(dockedpose.atoms)
if natoms != crystalnumatoms:
skipped.append(docki+1)
continue
if fit:
resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True)
updateCoords(dockedpose, fitted_result)
else:
resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False)
if threshold:
# Calculate RMSD between all previous poses
# Discard if rmsd < FILTER threshold
if moleclist:
match = None
bestmatchrmsd = 999999
for did,prevmol in moleclist.iteritems():
tmprmsd = getAutomorphRMSD(prevmol, dockedpose)
if tmprmsd < threshold:
if tmprmsd < bestmatchrmsd:
bestmatchrmsd = tmprmsd
match = did
if match != None:
# Do not write this one
# sum one up to the matching previous molecule id
print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd)
population[match] += 1
else:
# There's no match. Print info for this one and write to outsdf if needed
# Save this one!
if outfname: outlist[docki] = (dockedpose, resultrmsd)
print "%d\t%.2f"%((docki+1),resultrmsd)
moleclist[docki] = dockedpose
population[docki] = 1
else:
# First molecule in list. Append for sure
moleclist[docki] = dockedpose
population[docki] = 1
if outfname: outlist[docki] = (dockedpose, resultrmsd)
else:
# Just write best rmsd found and the molecule to outsdf if demanded
if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd)
print "%d\t%.2f"%((docki+1),resultrmsd)
natoms = len(dockedpose.atoms)
if natoms != crystalnumatoms:
skipped.append(docki+1)
continue
if fit:
resultrmsd, fitted_result = getAutomorphRMSD(
crystal, dockedpose, fit=True)
updateCoords(dockedpose, fitted_result)
else:
resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False)
if threshold:
# Calculate RMSD between all previous poses
# Discard if rmsd < FILTER threshold
if moleclist:
match = None
bestmatchrmsd = 999999
for did, prevmol in moleclist.iteritems():
tmprmsd = getAutomorphRMSD(prevmol, dockedpose)
if tmprmsd < threshold:
if tmprmsd < bestmatchrmsd:
bestmatchrmsd = tmprmsd
match = did
if match != None:
# Do not write this one
# sum one up to the matching previous molecule id
print("Pose {:d} matches pose {:d} with {:.3f} RMSD".format(
docki+1, match+1, bestmatchrmsd), file=sys.stderr)
population[match] += 1
else:
# There's no match. Print info for this one and write to outsdf if needed
# Save this one!
if outfname:
outlist[docki] = (dockedpose, resultrmsd)
print("{:d}\t{:.2f}".format(docki+1, resultrmsd))
moleclist[docki] = dockedpose
population[docki] = 1
else:
# First molecule in list. Append for sure
moleclist[docki] = dockedpose
population[docki] = 1
if outfname:
outlist[docki] = (dockedpose, resultrmsd)
else:
# Just write best rmsd found and the molecule to outsdf if demanded
if outfname:
saveMolecWithRMSD(outsdf, dockedpose, resultrmsd)
print("{:d}\t{:.2f}".format(docki+1, resultrmsd))
if outlist:
# Threshold applied and outlist need to be written
for docki in sorted(outlist.iterkeys()):
molrmsd = outlist[docki]
# Get number of matchs in thresholding operation
pop = population.get(docki)
if not pop: pop = 1
# Save molecule
saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop)
if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped
# Threshold applied and outlist need to be written
for docki in sorted(outlist.iterkeys()):
molrmsd = outlist[docki]
# Get number of matchs in thresholding operation
pop = population.get(docki)
if not pop:
pop = 1
# Save molecule
saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop)
if skipped:
print("SKIPPED input molecules due to number of atom missmatch: {}".format(
skipped), file=sys.stderr)
#! /usr/bin/env python
#
# Substitute for rbtether of rDock. Will align input molecules to a reference fragment defined by a smarts string,
# it will add a TETHERED ATOM property field to the output SDF that is correctly understood by rDock
# Substitute for rbtether of rDock. Will align input molecules to a reference fragment defined by a smarts string,
# it will add a TETHERED ATOM property field to the output SDF that is correctly understood by rDock
# rDock will restrain the matching atom positions to the reference molecule coordinates.
#
# Initially implemented with a conformational search algorithm to better match target coordinates.
# But had problems with OBabel FF generating non-sense conformers. So in this version the conformer search is commented out.
# Now if the input molecule do not have a good conformation, might not align well with the target. This effect will be
# Now if the input molecule do not have a good conformation, might not align well with the target. This effect will be
# dimished or even vanish if the SMARTS string is defined for a rigid region (like a ring).
# I'm still trying to incorporate somehow this conformational search.
#
# Script distributed under GNU LGPL 3.0 along rDock software.
#
#
# Author: Daniel Alvarez-Garcia
# Date: 08-11-2013
import math
import pybel
import numpy as npy
try:
# Open Babel 3.0
from openbabel import pybel
except ImportError:
# Open Babel 2.x
import pybel
def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
def superpose3D(ref, target, weights=None, refmask=None, targetmask=None, returnRotMat=False):
"""superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
definition : superpose3D(ref, target, weights,refmask,targetmask)
@parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance)
......@@ -34,36 +40,38 @@ def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRot
Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
Returns a set of new coordinates, aligned to the target state as well as the rmsd
"""
if weights == None :
weights=1.0
if refmask == None :
refmask=npy.ones(len(ref),"bool")
if targetmask == None :
targetmask=npy.ones(len(target),"bool")
#first get the centroid of both states
ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
#print ref_centroid
refCenteredCoords=ref-ref_centroid
#print refCenteredCoords
target_centroid=npy.mean(target[targetmask]*weights,axis=0)
targetCenteredCoords=target-target_centroid
#print targetCenteredCoords
#the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
if weights == None:
weights = 1.0
if refmask == None:
refmask = npy.ones(len(ref), "bool")
if targetmask == None:
targetmask = npy.ones(len(target), "bool")
# first get the centroid of both states
ref_centroid = npy.mean(ref[refmask]*weights, axis=0)
# print ref_centroid
refCenteredCoords = ref-ref_centroid
# print refCenteredCoords
target_centroid = npy.mean(target[targetmask]*weights, axis=0)
targetCenteredCoords = target-target_centroid
# print targetCenteredCoords
# the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
# Initial residual, see Kabsch.
E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
reftmp=npy.copy(refCenteredCoords[refmask])
targettmp=npy.copy(targetCenteredCoords[targetmask])
#print refCenteredCoords[refmask]
#single value decomposition of the dotProduct of both position vectors
E0 = npy.sum(npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights, axis=0), axis=0) + npy.sum(
npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights, axis=0), axis=0)
reftmp = npy.copy(refCenteredCoords[refmask])
targettmp = npy.copy(targetCenteredCoords[targetmask])
# print refCenteredCoords[refmask]
# single value decomposition of the dotProduct of both position vectors
try:
dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
V, S, Wt = npy.linalg.svd(dotProd )
dotProd = npy.dot(npy.transpose(reftmp), targettmp * weights)
V, S, Wt = npy.linalg.svd(dotProd)
except Exception:
try:
dotProd = npy.dot( npy.transpose(reftmp), targettmp)
V, S, Wt = npy.linalg.svd(dotProd )
dotProd = npy.dot(npy.transpose(reftmp), targettmp)
V, S, Wt = npy.linalg.svd(dotProd)
except Exception:
print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
print(
"Couldn't perform the Single Value Decomposition, skipping alignment", file=sys.stderr)
return ref, 0
# we already have our solution, in the results from SVD.
# we just need to check for reflections and then produce
......@@ -72,66 +80,76 @@ def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRot
reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
if reflect == -1.0:
S[-1] = -S[-1]
V[:,-1] = -V[:,-1]
V[:, -1] = -V[:, -1]
rmsd = E0 - (2.0 * sum(S))
rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd
#U is simply V*Wt
U = npy.dot(V, Wt) #get the rotation matrix
rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) # get the rmsd
# U is simply V*Wt
U = npy.dot(V, Wt) # get the rotation matrix
# rotate and translate the molecule
new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate
new_coords = npy.dot((refCenteredCoords), U) + \
target_centroid # translate & rotate
#new_coords=(refCenteredCoords + target_centroid)
#print U
if returnRotMat :
# print U
if returnRotMat:
return U, ref_centroid, target_centroid, rmsd
return new_coords,rmsd
return new_coords, rmsd
def squared_distance(coordsA, coordsB):
"""Find the squared distance between two 3-tuples"""
sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
sqrdist = sum((a-b)**2 for a, b in zip(coordsA, coordsB))
return sqrdist
def rmsd(allcoordsA, allcoordsB):
"""Find the RMSD between two lists of 3-tuples"""
deviation = sum(squared_distance(atomA, atomB) for
(atomA, atomB) in zip(allcoordsA, allcoordsB))
return math.sqrt(deviation / float(len(allcoordsA)))
def mapToCrystal(xtal, pose):
"""Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
this will mess up the rmsd calculation with OpenBabel"""
query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
mapper = pybel.ob.OBIsomorphismMapper.GetInstance(query)
mappingpose = pybel.ob.vvpairUIntUInt()
exit=mapper.MapUnique(pose.OBMol,mappingpose)
exit = mapper.MapUnique(pose.OBMol, mappingpose)
return mappingpose[0]
def takeCoords(obmol):
"""Take coordinates of an OBMol as a npy array"""
return npy.array([atom.coords for atom in obmol])
def updateCoords(obmol, newcoords):
"Update OBMol coordinates. newcoords is a numpy array"
for i,atom in enumerate(obmol):
for i, atom in enumerate(obmol):
atom.OBAtom.SetVector(*newcoords[i])
def prepareAtomString(idlist):
s = ""
n = len(idlist)
for i, id in enumerate(idlist):
s += "%i"%id
if (i+1) == n: s+="\n"
elif (i+1)%35 == 0: s+=",\n"
else: s+=","
s += "{:d}".format(id)
if (i+1) == n:
s += "\n"
elif (i+1) % 35 == 0:
s += ",\n"
else:
s += ","
return s
if __name__ == "__main__":
import sys
if len(sys.argv) != 5:
sys.exit("USAGE: %s reference.sdf input.sdf output.sdf 'SMARTS'"%sys.argv[0])
sys.exit(
"USAGE: {} reference.sdf input.sdf output.sdf 'SMARTS'".format(sys.argv[0]))
refsdf = sys.argv[1]
molsdf = sys.argv[2]
outsdf = sys.argv[3]
......@@ -144,120 +162,128 @@ if __name__ == "__main__":
numRefMatchs = len(refMatchIds)
if not numRefMatchs:
sys.exit("No match found in the reference structure and the SMARTS string given. Please check it.")
sys.exit(
"No match found in the reference structure and the SMARTS string given. Please check it.")
if numRefMatchs > 1:
print "More than one match in the reference molecule for the SMARTS string given. Will tether each input molecule all possible ways."
if numRefMatchs > 1:
print("More than one match in the reference molecule for the SMARTS string given. Will tether each input molecule all possible ways.")
refIndxPerMatch = [npy.array(rmi) - 1 for rmi in refMatchIds]
# Take coordinates for the reference matched atoms
refCoords = takeCoords(ref)
refMatchCoords = [npy.take(refCoords, refIndx, axis=0) for refIndx in refIndxPerMatch]
refMatchCoords = [npy.take(refCoords, refIndx, axis=0)
for refIndx in refIndxPerMatch]
# Do the same for molecule in molsdf
out=pybel.Outputfile('sdf', outsdf, overwrite=True)
out = pybel.Outputfile('sdf', outsdf, overwrite=True)
molSupp = pybel.readfile("sdf", molsdf)
ff = pybel.ob.OBForceField_FindForceField('MMFF94')
for i,mol in enumerate(molSupp):
print "## Molecule %i"%(i+1),
mol.OBMol.DeleteNonPolarHydrogens()
molMatchAllIds = smarts.findall(mol)
numMatchs = len(molMatchAllIds)
if numMatchs == 0:
print "No_Match",
continue
elif numMatchs ==1:
print "Match",
elif numMatchs > 1:
print "Multiple_Match SMART Matches for this molecule (%d)"%numMatchs,
# If more than one match, write an output of the same molecule for each match
# Start a default bestcoord and rmsd for later looping for each pose
bestCoordPerMatch = [[0 for i in range(numMatchs)] for i in range(numRefMatchs)]
bestRMSPerMatch = [[999 for i in range(numMatchs)] for i in range(numRefMatchs)]
for i, mol in enumerate(molSupp):
print("## Molecule {:d}".format(i+1), end=" ")
mol.OBMol.DeleteNonPolarHydrogens()
molMatchAllIds = smarts.findall(mol)
numMatchs = len(molMatchAllIds)
if numMatchs == 0:
print("No_Match", end=" ")
continue
elif numMatchs == 1:
print("Match", end=" ")
elif numMatchs > 1:
print("Multiple_Match SMART Matches for this molecule {:d}".format(
numMatchs), end=" ")
# If more than one match, write an output of the same molecule for each match
# Start a default bestcoord and rmsd for later looping for each pose
bestCoordPerMatch = [
[0 for i in range(numMatchs)] for i in range(numRefMatchs)]
bestRMSPerMatch = [
[999 for i in range(numMatchs)] for i in range(numRefMatchs)]
# Will do a randomrotorsearch to find conformer with the lower rmsd when superposing
# At least 20 when possible
#ff.Setup(mol.OBMol)
#numats = mol.OBMol.NumAtoms()
#numrot = mol.OBMol.NumRotors()
#print "Atoms: %i, Rotors: %i"%(numats, numrot)
#geomopt = 300
#genconf = 100
# increase iterations if bigger molecule or bigger number of rotatable bonds
# for allowing better sampling
#if numats > 40 and numrot > 5:
# geomopt = 300
# genconf = 150
#if numats > 55 and numrot > 7:
# genconf = 100
# geomopt = 500
#print "\tDoing conformational search with WeightedRotorSearch (%i, %i)..."%(genconf, geomopt),
#ff.SteepestDescent(500, 1.0e-4)
#ff.WeightedRotorSearch(genconf,geomopt)
#ff.ConjugateGradients(500, 1.0e-6)
#ff.GetConformers(mol.OBMol)
#numconf = mol.OBMol.NumConformers()
numconf = 1
#print "%i conformers generated"%numconf
# ff.Setup(mol.OBMol)
#numats = mol.OBMol.NumAtoms()
#numrot = mol.OBMol.NumRotors()
# print "Atoms: %i, Rotors: %i"%(numats, numrot)
#geomopt = 300
#genconf = 100
# increase iterations if bigger molecule or bigger number of rotatable bonds
# for allowing better sampling
# if numats > 40 and numrot > 5:
# geomopt = 300
# genconf = 150
# if numats > 55 and numrot > 7:
# genconf = 100
# geomopt = 500
# print "\tDoing conformational search with WeightedRotorSearch (%i, %i)..."%(genconf, geomopt),
#ff.SteepestDescent(500, 1.0e-4)
# ff.WeightedRotorSearch(genconf,geomopt)
#ff.ConjugateGradients(500, 1.0e-6)
# ff.GetConformers(mol.OBMol)
#numconf = mol.OBMol.NumConformers()
numconf = 1
# print "%i conformers generated"%numconf
if numconf > 1:
# Doing conf search
#for i in range(numconf):
# for i in range(numconf):
# mol.OBMol.SetConformer(i)
# confCoords = takeCoords(mol)
# print 'coord:',confCoords[0,:]
#
# print 'coord:',confCoords[0,:]
#
# for imatch, molMatchIds in enumerate(molMatchAllIds):
# molMatchIndx = npy.array(molMatchIds) - 1
# confMatchCoords = npy.take(confCoords, molMatchIndx, axis=0)
#
#
# # Align: Get rotation matrix between the two sets of coords
# # Apply rotation to the whole target molecule
# rotMat, targetCentroid, refCentroid, rmsd = superpose3D(confMatchCoords, refMatchCoords, returnRotMat=True)
# if rmsd < bestRMSPerMatch[imatch]:
# if rmsd < bestRMSPerMatch[imatch]:
# newcoords = npy.dot((confCoords - targetCentroid), rotMat) + refCentroid
# bestRMSPerMatch[imatch] = rmsd
# bestCoordPerMatch[imatch] = newcoords
# #if bestrms < 0.01: break
pass
# #if bestrms < 0.01: break
pass
else:
molCoords = takeCoords(mol)
for imatch, molMatchIds in enumerate(molMatchAllIds):
# loop in each matching way for the input molecule
molMatchIndx = npy.array(molMatchIds) - 1
molMatchCoords = npy.take(molCoords, molMatchIndx, axis=0)
# Loop over the reference matches
# Align: Get rotation matrix between the two sets of coords
# Apply rotation to the whole target molecule
for ir, refMatchCoord in enumerate(refMatchCoords):
rotMat, targetCentroid, refCentroid, rmsd = superpose3D(molMatchCoords, refMatchCoord, returnRotMat=True)
if rmsd < bestRMSPerMatch[ir][imatch]:
newcoords = npy.dot((molCoords - targetCentroid), rotMat) + refCentroid
bestRMSPerMatch[ir][imatch] = rmsd
bestCoordPerMatch[ir][imatch] = newcoords
# loop in each matching way for the input molecule
molMatchIndx = npy.array(molMatchIds) - 1
molMatchCoords = npy.take(molCoords, molMatchIndx, axis=0)
# Loop over the reference matches
# Align: Get rotation matrix between the two sets of coords
# Apply rotation to the whole target molecule
for ir, refMatchCoord in enumerate(refMatchCoords):
rotMat, targetCentroid, refCentroid, rmsd = superpose3D(
molMatchCoords, refMatchCoord, returnRotMat=True)
if rmsd < bestRMSPerMatch[ir][imatch]:
newcoords = npy.dot(
(molCoords - targetCentroid), rotMat) + refCentroid
bestRMSPerMatch[ir][imatch] = rmsd
bestCoordPerMatch[ir][imatch] = newcoords
# Finally update molecule coordinates with the best matching coordinates found
# change molecule coordinates, set TETHERED ATOMS property and save
for imatch in range(numMatchs):
for irefmatch in range(numRefMatchs):
bestCoord = bestCoordPerMatch[irefmatch][imatch]
bestRMS = bestRMSPerMatch[irefmatch][imatch]
print "\tBest RMSD reached (match %d, refmatch %d): %s"%(imatch, irefmatch, bestRMS)
molMatchID = molMatchAllIds[imatch]
updateCoords(mol, bestCoord)
newData = pybel.ob.OBPairData()
newData.SetAttribute("TETHERED ATOMS")
newData.SetValue(prepareAtomString(molMatchID))
mol.OBMol.DeleteData("TETHERED ATOMS") # Remove Previous DATA
mol.OBMol.CloneData(newData) # Add new data
out.write(mol)
# change molecule coordinates, set TETHERED ATOMS property and save
for imatch in range(numMatchs):
for irefmatch in range(numRefMatchs):
bestCoord = bestCoordPerMatch[irefmatch][imatch]
bestRMS = bestRMSPerMatch[irefmatch][imatch]
print("\tBest RMSD reached (match {:d}, refmatch {:d}): {}".format(
imatch, irefmatch, bestRMS))
molMatchID = molMatchAllIds[imatch]
updateCoords(mol, bestCoord)
newData = pybel.ob.OBPairData()
newData.SetAttribute("TETHERED ATOMS")
newData.SetValue(prepareAtomString(molMatchID))
mol.OBMol.DeleteData("TETHERED ATOMS") # Remove Previous DATA
mol.OBMol.CloneData(newData) # Add new data
out.write(mol)
out.close()
print "DONE"
print("DONE")
sys.stdout.close()
sys.stderr.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment