1
|
|
"""functions to manipulate, read and write OpenEye and Psi4 molecules"""
|
2
|
|
|
3
|
4
|
try:
|
4
|
4
|
from openeye import oechem, oeomega, oeiupac, oedepict, oequacpac, oeszybki, oegrapheme, oegraphsim
|
5
|
0
|
except ImportError:
|
6
|
0
|
raise Warning("Need license for OpenEye!")
|
7
|
|
|
8
|
4
|
import cmiles
|
9
|
4
|
from .utils import logger, BOHR_2_ANGSTROM, carboxylic_acid_hack
|
10
|
|
|
11
|
4
|
import os
|
12
|
4
|
import numpy as np
|
13
|
4
|
import itertools
|
14
|
4
|
import warnings
|
15
|
4
|
import copy
|
16
|
4
|
from math import radians
|
17
|
|
|
18
|
|
"""
|
19
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
20
|
|
Functions to charge, generate conformers and
|
21
|
|
manipulate OpenEye molecules
|
22
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
23
|
|
"""
|
24
|
|
|
25
|
|
|
26
|
4
|
def get_charges(molecule, max_confs=800, strict_stereo=True,
|
27
|
|
normalize=True, keep_confs=None, legacy=True, **kwargs):
|
28
|
|
"""Generate charges for an OpenEye OEMol molecule.
|
29
|
|
Parameters
|
30
|
|
----------
|
31
|
|
molecule : OEMol
|
32
|
|
Molecule for which to generate conformers.
|
33
|
|
Omega will be used to generate max_confs conformations.
|
34
|
|
max_confs : int, optional, default=800
|
35
|
|
Max number of conformers to generate
|
36
|
|
strictStereo : bool, optional, default=True
|
37
|
|
If False, permits smiles strings with unspecified stereochemistry.
|
38
|
|
See https://docs.eyesopen.com/omega/usage.html
|
39
|
|
normalize : bool, optional, default=True
|
40
|
|
If True, normalize the molecule by checking aromaticity, adding
|
41
|
|
explicit hydrogens, and renaming by IUPAC name.
|
42
|
|
keep_confs : int, optional, default=None
|
43
|
|
If None, apply the charges to the provided conformation and return
|
44
|
|
this conformation, unless no conformation is present.
|
45
|
|
Otherwise, return some or all of the generated
|
46
|
|
conformations. If -1, all generated conformations are returned.
|
47
|
|
Otherwise, keep_confs = N will return an OEMol with up to N
|
48
|
|
generated conformations. Multiple conformations are still used to
|
49
|
|
*determine* the charges.
|
50
|
|
legacy : bool, default=True
|
51
|
|
If False, uses the new OpenEye charging engine.
|
52
|
|
See https://docs.eyesopen.com/toolkits/python/quacpactk/OEProtonFunctions/OEAssignCharges.html#
|
53
|
|
Returns
|
54
|
|
-------
|
55
|
|
charged_copy : OEMol
|
56
|
|
A molecule with OpenEye's recommended AM1BCC charge selection scheme.
|
57
|
|
Notes
|
58
|
|
-----
|
59
|
|
Roughly follows
|
60
|
|
http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
|
61
|
|
"""
|
62
|
|
|
63
|
|
# If there is no geometry, return at least one conformation.
|
64
|
4
|
if molecule.GetConfs() == 0:
|
65
|
0
|
keep_confs = 1
|
66
|
|
|
67
|
4
|
if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!"))
|
68
|
4
|
if not oequacpac.OEQuacPacIsLicensed(): raise(ImportError("Need License for oequacpac!"))
|
69
|
|
|
70
|
4
|
if normalize:
|
71
|
4
|
molecule = normalize_molecule(molecule, molecule.GetTitle())
|
72
|
|
else:
|
73
|
4
|
molecule = oechem.OEMol(molecule)
|
74
|
|
|
75
|
4
|
charged_copy = generate_conformers(molecule, max_confs=max_confs, strict_stereo=strict_stereo, **kwargs) # Generate up to max_confs conformers
|
76
|
|
|
77
|
|
# fix issue that causes carboxylic acid to fail charging
|
78
|
4
|
carboxylic_acid_hack(charged_copy)
|
79
|
|
|
80
|
4
|
if not legacy:
|
81
|
|
# 2017.2.1 OEToolkits new charging function
|
82
|
0
|
status = oequacpac.OEAssignCharges(charged_copy, oequacpac.OEAM1BCCCharges())
|
83
|
0
|
if not status: raise(RuntimeError("OEAssignCharges failed."))
|
84
|
|
else:
|
85
|
|
# AM1BCCSym recommended by Chris Bayly to KAB+JDC, Oct. 20 2014.
|
86
|
4
|
status = oequacpac.OEAssignPartialCharges(charged_copy, oequacpac.OECharges_AM1BCCSym)
|
87
|
4
|
if not status: raise(RuntimeError("OEAssignPartialCharges returned error code %d" % status))
|
88
|
|
|
89
|
|
#Determine conformations to return
|
90
|
4
|
if keep_confs is None:
|
91
|
|
#If returning original conformation
|
92
|
4
|
original = molecule.GetCoords()
|
93
|
|
#Delete conformers over 1
|
94
|
4
|
for k, conf in enumerate( charged_copy.GetConfs() ):
|
95
|
4
|
if k > 0:
|
96
|
4
|
charged_copy.DeleteConf(conf)
|
97
|
|
#Copy coordinates to single conformer
|
98
|
4
|
charged_copy.SetCoords( original )
|
99
|
4
|
elif keep_confs > 0:
|
100
|
4
|
logger().debug("keep_confs was set to %s. Molecule positions will be reset." % keep_confs)
|
101
|
|
|
102
|
|
#Otherwise if a number is provided, return this many confs if available
|
103
|
4
|
for k, conf in enumerate( charged_copy.GetConfs() ):
|
104
|
4
|
if k > keep_confs - 1:
|
105
|
4
|
charged_copy.DeleteConf(conf)
|
106
|
4
|
elif keep_confs == -1:
|
107
|
|
#If we want all conformations, continue
|
108
|
4
|
pass
|
109
|
|
else:
|
110
|
|
#Not a valid option to keep_confs
|
111
|
4
|
raise(ValueError('Not a valid option to keep_confs in get_charges.'))
|
112
|
|
|
113
|
4
|
return charged_copy
|
114
|
|
|
115
|
|
|
116
|
4
|
def generate_conformers(molecule, max_confs=800, dense=False, strict_stereo=True, ewindow=15.0, rms_threshold=1.0, strict_types=True,
|
117
|
|
can_order=True, copy=True, timeout=100):
|
118
|
|
"""Generate conformations for the supplied molecule
|
119
|
|
Parameters
|
120
|
|
----------
|
121
|
|
molecule : OEMol
|
122
|
|
Molecule for which to generate conformers
|
123
|
|
max_confs : int, optional, default=800
|
124
|
|
Max number of conformers to generate. If None, use default OE Value.
|
125
|
|
strict_stereo : bool, optional, default=True
|
126
|
|
If False, permits smiles strings with unspecified stereochemistry.
|
127
|
|
strict_types : bool, optional, default=True
|
128
|
|
If True, requires that Omega have exact MMFF types for atoms in molecule; otherwise, allows the closest atom
|
129
|
|
type of the same element to be used.
|
130
|
|
Returns
|
131
|
|
-------
|
132
|
|
molcopy : OEMol
|
133
|
|
A multi-conformer molecule with up to max_confs conformers.
|
134
|
|
Notes
|
135
|
|
-----
|
136
|
|
Roughly follows
|
137
|
|
http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
|
138
|
|
"""
|
139
|
4
|
if copy:
|
140
|
4
|
molcopy = oechem.OEMol(molecule)
|
141
|
|
else:
|
142
|
0
|
molcopy = molecule
|
143
|
|
|
144
|
4
|
if dense:
|
145
|
0
|
omega_opts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense)
|
146
|
0
|
omega = oeomega.OEOmega(omega_opts)
|
147
|
|
else:
|
148
|
4
|
omega = oeomega.OEOmega()
|
149
|
|
|
150
|
4
|
atom_map = False
|
151
|
4
|
if cmiles.utils.has_atom_map(molcopy):
|
152
|
4
|
atom_map = True
|
153
|
4
|
cmiles.utils.remove_atom_map(molcopy)
|
154
|
|
|
155
|
|
# These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
|
156
|
4
|
omega.SetMaxConfs(max_confs)
|
157
|
4
|
omega.SetIncludeInput(True)
|
158
|
4
|
omega.SetCanonOrder(can_order)
|
159
|
|
|
160
|
4
|
omega.SetSampleHydrogens(True) # Word to the wise: skipping this step can lead to significantly different charges!
|
161
|
4
|
omega.SetEnergyWindow(ewindow)
|
162
|
4
|
omega.SetRMSThreshold(rms_threshold) # Word to the wise: skipping this step can lead to significantly different charges!
|
163
|
|
|
164
|
4
|
omega.SetStrictStereo(strict_stereo)
|
165
|
4
|
omega.SetStrictAtomTypes(strict_types)
|
166
|
|
|
167
|
4
|
omega.SetIncludeInput(False) # don't include input
|
168
|
4
|
omega.SetMaxSearchTime(timeout)
|
169
|
4
|
if max_confs is not None:
|
170
|
4
|
omega.SetMaxConfs(max_confs)
|
171
|
|
|
172
|
4
|
status = omega(molcopy) # generate conformation
|
173
|
4
|
if not status:
|
174
|
0
|
raise(RuntimeError("omega returned error code %d" % status))
|
175
|
|
|
176
|
4
|
if atom_map:
|
177
|
4
|
cmiles.utils.restore_atom_map(molcopy)
|
178
|
|
|
179
|
4
|
return molcopy
|
180
|
|
|
181
|
|
|
182
|
4
|
def generate_grid_conformers(molecule, dihedrals, intervals, max_rotation=360, copy_mol=True, mapped=True, **kwargs):
|
183
|
|
"""
|
184
|
|
Generate conformers using torsion angle grids.
|
185
|
|
|
186
|
|
Parameters
|
187
|
|
----------
|
188
|
|
molecule: OEMol
|
189
|
|
dihedrals: list of tuples
|
190
|
|
list of dihedrals to rotate
|
191
|
|
intervals: list of ints
|
192
|
|
number in degree of dihedral intervals
|
193
|
|
max_rotation: int, optional, defalult 360
|
194
|
|
maximum rotation in degrees
|
195
|
|
copy_mol: bool, optional, default True
|
196
|
|
If True, return a copy of molecule and do not change incoming molecule
|
197
|
|
mapped: bool, optional, default True
|
198
|
|
If True, will proceed without checking if map indices are missing. Only use this option if you know what you are doing.
|
199
|
|
It is needed when new fragments are generated and are missing map indices on the hydrogens that were used in capping
|
200
|
|
|
201
|
|
Returns
|
202
|
|
-------
|
203
|
|
conf_mol: OEMol that includes conformers
|
204
|
|
|
205
|
|
"""
|
206
|
|
# molecule must be mapped
|
207
|
4
|
if copy_mol:
|
208
|
4
|
molecule = copy.deepcopy(molecule)
|
209
|
4
|
if mapped:
|
210
|
|
# Check for missing atom maps
|
211
|
4
|
if cmiles.utils.is_missing_atom_map(molecule):
|
212
|
4
|
raise ValueError("Molecule must have atom indices")
|
213
|
|
|
214
|
|
# Check length of dihedrals match length of intervals
|
215
|
|
|
216
|
4
|
conf_mol = generate_conformers(molecule, max_confs=1, **kwargs)
|
217
|
4
|
conf = conf_mol.GetConfs().next()
|
218
|
4
|
coords = oechem.OEFloatArray(conf.GetMaxAtomIdx()*3)
|
219
|
4
|
conf.GetCoords(coords)
|
220
|
|
|
221
|
4
|
torsions = [[conf_mol.GetAtom(oechem.OEHasMapIdx(i+1)) for i in dih] for dih in dihedrals]
|
222
|
|
|
223
|
4
|
for i, tor in enumerate(torsions):
|
224
|
4
|
copy_conf_mol = copy.deepcopy(conf_mol)
|
225
|
4
|
conf_mol.DeleteConfs()
|
226
|
4
|
for conf in copy_conf_mol.GetConfs():
|
227
|
4
|
coords = oechem.OEFloatArray(conf.GetMaxAtomIdx()*3)
|
228
|
4
|
conf.GetCoords(coords)
|
229
|
4
|
for angle in range(5, max_rotation+5, intervals[i]):
|
230
|
4
|
newconf = conf_mol.NewConf(coords)
|
231
|
4
|
oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], radians(angle))
|
232
|
|
|
233
|
4
|
cmiles.utils.restore_atom_map(conf_mol)
|
234
|
4
|
return conf_mol
|
235
|
|
|
236
|
|
|
237
|
4
|
def resolve_clashes(mol):
|
238
|
|
"""
|
239
|
|
Taken from quanformer (https://github.com/MobleyLab/quanformer/blob/master/quanformer/initialize_confs.py#L54
|
240
|
|
Minimize conformers with severe steric interaction.
|
241
|
|
Parameters
|
242
|
|
----------
|
243
|
|
mol : single OEChem molecule (single conformer)
|
244
|
|
clashfile : string
|
245
|
|
name of file to write output
|
246
|
|
Returns
|
247
|
|
-------
|
248
|
|
boolean
|
249
|
|
True if completed successfully, False otherwise.
|
250
|
|
"""
|
251
|
|
|
252
|
|
# set general energy options along with the single-point specification
|
253
|
0
|
spSzybki = oeszybki.OESzybkiOptions()
|
254
|
0
|
spSzybki.SetForceFieldType(oeszybki.OEForceFieldType_MMFF94S)
|
255
|
0
|
spSzybki.SetSolventModel(oeszybki.OESolventModel_Sheffield)
|
256
|
0
|
spSzybki.SetRunType(oeszybki.OERunType_SinglePoint)
|
257
|
|
|
258
|
|
# generate the szybki MMFF94 engine for single points
|
259
|
0
|
szSP = oeszybki.OESzybki(spSzybki)
|
260
|
|
|
261
|
|
# construct minimiz options from single-points options to get general optns
|
262
|
0
|
optSzybki = oeszybki.OESzybkiOptions(spSzybki)
|
263
|
|
|
264
|
|
# now reset the option for minimization
|
265
|
0
|
optSzybki.SetRunType(oeszybki.OERunType_CartesiansOpt)
|
266
|
|
|
267
|
|
# generate szybki MMFF94 engine for minimization
|
268
|
0
|
szOpt = oeszybki.OESzybki(optSzybki)
|
269
|
|
# add strong harmonic restraints to nonHs
|
270
|
|
|
271
|
0
|
szOpt.SetHarmonicConstraints(10.0)
|
272
|
|
# construct a results object to contain the results of a szybki calculation
|
273
|
|
|
274
|
0
|
szResults = oeszybki.OESzybkiResults()
|
275
|
|
# work on a copy of the molecule
|
276
|
0
|
tmpmol = oechem.OEMol(mol)
|
277
|
0
|
if not szSP(tmpmol, szResults):
|
278
|
0
|
print('szybki run failed for %s' % tmpmol.GetTitle())
|
279
|
0
|
return False
|
280
|
0
|
Etotsp = szResults.GetTotalEnergy()
|
281
|
0
|
Evdwsp = szResults.GetEnergyTerm(oeszybki.OEPotentialTerms_MMFFVdW)
|
282
|
0
|
if Evdwsp > 35:
|
283
|
0
|
if not szOpt(tmpmol, szResults):
|
284
|
0
|
print('szybki run failed for %s' % tmpmol.GetTitle())
|
285
|
0
|
return False
|
286
|
0
|
Etot = szResults.GetTotalEnergy()
|
287
|
0
|
Evdw = szResults.GetEnergyTerm(oeszybki.OEPotentialTerms_MMFFVdW)
|
288
|
|
# wfile = open(clashfile, 'a')
|
289
|
|
# wfile.write(
|
290
|
|
# '%s resolved bad clash: initial vdW: %.4f ; '
|
291
|
|
# 'resolved EvdW: %.4f\n' % (tmpmol.GetTitle(), Evdwsp, Evdw))
|
292
|
|
# wfile.close()
|
293
|
0
|
mol.SetCoords(tmpmol.GetCoords())
|
294
|
0
|
oechem.OESetSDData(mol, oechem.OESDDataPair('MM Szybki Single Point Energy', "%.12f" % szResults.GetTotalEnergy()))
|
295
|
0
|
return True
|
296
|
|
|
297
|
|
|
298
|
4
|
def remove_clash(multi_conformer, in_place=True):
|
299
|
|
# resolve clashes
|
300
|
0
|
if not in_place:
|
301
|
0
|
multi_conformer = copy.deepcopy(multi_conformer)
|
302
|
0
|
confs_to_remove = []
|
303
|
0
|
for conf in multi_conformer.GetConfs():
|
304
|
0
|
if not resolve_clashes(conf):
|
305
|
0
|
confs_to_remove.append(conf)
|
306
|
0
|
for i in confs_to_remove:
|
307
|
0
|
multi_conformer.DeleteConf(i)
|
308
|
|
|
309
|
0
|
if not in_place:
|
310
|
0
|
return multi_conformer
|
311
|
|
|
312
|
4
|
def has_conformer(molecule, check_two_dimension=False):
|
313
|
|
"""
|
314
|
|
Check if conformer exists for molecule. Return True or False
|
315
|
|
Parameters
|
316
|
|
----------
|
317
|
|
molecule
|
318
|
|
check_two_dimension: bool, optional. Default False
|
319
|
|
If True, will also check if conformation is a 2D conformation (all z coordinates are zero) and return False if
|
320
|
|
conformation is 2D
|
321
|
|
|
322
|
|
Returns
|
323
|
|
-------
|
324
|
|
|
325
|
|
"""
|
326
|
4
|
conformer_bool = True
|
327
|
4
|
try:
|
328
|
4
|
if molecule.NumConfs() <= 1:
|
329
|
|
# Check if xyz coordinates are not zero
|
330
|
4
|
for conf in molecule.GetConfs():
|
331
|
|
# print(conf.GetCoords().__len__())
|
332
|
|
# coords = molecule.GetCoords()
|
333
|
|
# values = np.asarray(list(coords.values()))
|
334
|
|
# print(values)
|
335
|
|
# print(values.all())
|
336
|
|
# if not values.all():
|
337
|
|
# conformer_bool = False
|
338
|
|
#for i in range(conf.GetCoords().__len__()):
|
339
|
4
|
values = np.asarray([conf.GetCoords().__getitem__(i) == (0.0, 0.0, 0.0) for i in
|
340
|
|
conf.GetCoords()])
|
341
|
4
|
if values.all():
|
342
|
4
|
conformer_bool = False
|
343
|
0
|
except AttributeError:
|
344
|
0
|
conformer_bool = False
|
345
|
|
|
346
|
4
|
if conformer_bool and check_two_dimension:
|
347
|
4
|
for conf in molecule.GetConfs():
|
348
|
4
|
values = np.asarray([conf.GetCoords().__getitem__(i)[-1] == 0.0 for i in conf.GetCoords()])
|
349
|
4
|
if values.all():
|
350
|
0
|
conformer_bool = False
|
351
|
4
|
return conformer_bool
|
352
|
|
|
353
|
|
|
354
|
4
|
def get_charge(molecule):
|
355
|
|
"""
|
356
|
|
Get total charge of molecules
|
357
|
|
|
358
|
|
Parameters
|
359
|
|
----------
|
360
|
|
molecule: OEMol
|
361
|
|
|
362
|
|
Returns
|
363
|
|
-------
|
364
|
|
charge: int
|
365
|
|
total charge of molecule
|
366
|
|
"""
|
367
|
|
|
368
|
4
|
charge = 0
|
369
|
4
|
for atom in molecule.GetAtoms():
|
370
|
4
|
charge += atom.GetFormalCharge()
|
371
|
4
|
return charge
|
372
|
|
|
373
|
|
|
374
|
|
"""
|
375
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
376
|
|
Functions for bond orders (Psi4 and OpenEye)
|
377
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
378
|
|
"""
|
379
|
|
|
380
|
|
|
381
|
4
|
def bond_order_from_psi4_raw_output(psi_output):
|
382
|
|
"""
|
383
|
|
Extract Wiberg and Mayer bond order from raw psi4 output
|
384
|
|
|
385
|
|
Parameters
|
386
|
|
----------
|
387
|
|
psi_output: str
|
388
|
|
psi4 raw output. This can be extracted from JSON_data['raw_output'] or by reading entire psi4 output
|
389
|
|
file
|
390
|
|
|
391
|
|
Returns
|
392
|
|
-------
|
393
|
|
bond_order_arrays: dict of numpy arrays
|
394
|
|
{Wiberg_psi4: np.array, Mayer_psi4: np.array}
|
395
|
|
N x N array. N is the number of atoms in the molecule. Indices in this array corresponds to tag in
|
396
|
|
the molecule given by `tagged_smiles` in QC_JSON spec
|
397
|
|
|
398
|
|
"""
|
399
|
0
|
size = None
|
400
|
0
|
Mayer = []
|
401
|
0
|
Wiberg = []
|
402
|
0
|
FLAG = None
|
403
|
0
|
for line in psi_output.split('\n'):
|
404
|
0
|
if not line:
|
405
|
0
|
continue
|
406
|
0
|
if 'Wiberg' in line:
|
407
|
0
|
FLAG = 'Wiberg'
|
408
|
0
|
if 'Mayer' in line:
|
409
|
0
|
FLAG = 'Mayer'
|
410
|
0
|
if 'Size' in line:
|
411
|
0
|
size = line.split()
|
412
|
0
|
size = int(size[3]), int(size[-1])
|
413
|
0
|
if FLAG is 'Mayer':
|
414
|
0
|
Mayer.append(line.split())
|
415
|
0
|
if FLAG is 'Wiberg':
|
416
|
0
|
Wiberg.append(line.split())
|
417
|
0
|
if not size:
|
418
|
0
|
raise Warning("Wiberg and Mayer bond orders were not found")
|
419
|
0
|
Wiberg_array = np.zeros(size)
|
420
|
0
|
Mayer_array = np.zeros(size)
|
421
|
|
|
422
|
0
|
for i, lines in enumerate(zip(Wiberg[2:], Mayer[2:])):
|
423
|
0
|
line_w = lines[0]
|
424
|
0
|
line_m = lines[1]
|
425
|
0
|
if i == 0:
|
426
|
0
|
elements = line_w
|
427
|
0
|
continue
|
428
|
0
|
if not i%float(size[0]+1) and i<((float(size[0]+1))*float((size[0]/5))):
|
429
|
0
|
if len(line_w) !=5:
|
430
|
0
|
if str(size[0]) in line_w:
|
431
|
0
|
elements = line_w
|
432
|
0
|
continue
|
433
|
0
|
elements = line_w
|
434
|
0
|
continue
|
435
|
0
|
j = line_w[0]
|
436
|
0
|
for k, bo in enumerate(zip(line_w[1:], line_m[1:])):
|
437
|
0
|
bo_w = bo[0]
|
438
|
0
|
bo_m = bo[1]
|
439
|
0
|
try:
|
440
|
0
|
Wiberg_array[int(elements[k])-1][int(j)-1] = bo_w
|
441
|
0
|
Mayer_array[int(elements[k])-1][int(j)-1] = bo_m
|
442
|
|
|
443
|
0
|
except (ValueError, IndexError):
|
444
|
0
|
pass
|
445
|
|
|
446
|
0
|
return {'Wiberg_psi4': Wiberg_array, 'Mayer_psi4': Mayer_array}
|
447
|
|
|
448
|
|
|
449
|
4
|
def bond_order_to_bond_graph(bond_order, threshold=0.8, hydrogen_bond=True, molecule=None, atom_map=None):
|
450
|
|
"""
|
451
|
|
Get bond graph from bond orders. This function returns a set of bonds where the bond order is above a threshold
|
452
|
|
Parameters
|
453
|
|
----------
|
454
|
|
bond_order: np array
|
455
|
|
threshold: int
|
456
|
|
|
457
|
|
Returns
|
458
|
|
-------
|
459
|
|
bonds: set
|
460
|
|
|
461
|
|
"""
|
462
|
0
|
bonds = set()
|
463
|
0
|
for i in range(bond_order.shape[0]):
|
464
|
0
|
for j in range(bond_order.shape[1]):
|
465
|
0
|
if bond_order[i, j] >= threshold:
|
466
|
0
|
if not hydrogen_bond:
|
467
|
0
|
idx_1 = atom_map[i+1]
|
468
|
0
|
idx_2 = atom_map[j+1]
|
469
|
0
|
atom_1 = molecule.GetAtom(oechem.OEHasMapIdx(idx_1))
|
470
|
0
|
atom_2 = molecule.GetAtom(oechem.OEHasAtomIdx(idx_2))
|
471
|
0
|
if atom_1.IsHydrogen() or atom_2.IsHydrogen():
|
472
|
0
|
continue
|
473
|
0
|
if (j+1, i+1) in bonds:
|
474
|
0
|
continue
|
475
|
0
|
bonds.add((i+1, j+1))
|
476
|
0
|
return bonds
|
477
|
|
|
478
|
|
|
479
|
4
|
def boltzman_average_bond_order(bond_orders):
|
480
|
|
"""
|
481
|
|
Calculate the Boltzmann weighted bond order average.
|
482
|
|
|
483
|
|
Parameters
|
484
|
|
----------
|
485
|
|
bond_orders: Dictionary of bond orders. The key is the energy of the molecule.
|
486
|
|
|
487
|
|
Returns
|
488
|
|
-------
|
489
|
|
bond_order_arrays: Dictionary of Boltzmann weighted bond orders.
|
490
|
|
|
491
|
|
"""
|
492
|
0
|
energies = np.asarray(list(bond_orders.keys()))
|
493
|
0
|
weights = np.exp(-energies/298.15)
|
494
|
0
|
denominator = weights.sum()
|
495
|
0
|
weights = weights/denominator
|
496
|
|
|
497
|
0
|
Wiberg = np.zeros((tuple([len(energies)]) + bond_orders[energies[0]]['Wiberg_psi4'].shape))
|
498
|
0
|
Mayer = np.zeros((tuple([len(energies)]) + bond_orders[energies[0]]['Wiberg_psi4'].shape))
|
499
|
0
|
for i, energy in enumerate(energies):
|
500
|
0
|
Wiberg[i] = bond_orders[energy]['Wiberg_psi4']
|
501
|
0
|
Mayer[i] = bond_orders[energy]['Mayer_psi4']
|
502
|
|
|
503
|
0
|
average_Wiberg =( weights[:, np.newaxis, np.newaxis] * Wiberg).sum(axis=0)
|
504
|
0
|
average_Mayer = (weights[:, np.newaxis, np.newaxis] * Mayer).sum(axis=0)
|
505
|
0
|
bond_order_arrays = {'Wiberg_psi4': average_Wiberg, 'Mayer_psi4': average_Mayer}
|
506
|
0
|
return bond_order_arrays
|
507
|
|
|
508
|
|
|
509
|
|
"""
|
510
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
511
|
|
Functions to read and write molecules and SMILES
|
512
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
513
|
|
"""
|
514
|
|
|
515
|
4
|
def smiles_to_oemol(smiles, normalize=True, **kwargs):
|
516
|
|
"""Create a OEMolBuilder from a smiles string.
|
517
|
|
Parameters
|
518
|
|
----------
|
519
|
|
smiles : str
|
520
|
|
SMILES representation of desired molecule.
|
521
|
|
Returns
|
522
|
|
-------
|
523
|
|
molecule : OEMol
|
524
|
|
A normalized molecule with desired smiles string.
|
525
|
|
"""
|
526
|
|
|
527
|
4
|
molecule = oechem.OEMol()
|
528
|
4
|
if not oechem.OESmilesToMol(molecule, smiles):
|
529
|
0
|
raise ValueError("The supplied SMILES '%s' could not be parsed." % smiles)
|
530
|
|
|
531
|
4
|
if normalize:
|
532
|
4
|
molecule = normalize_molecule(molecule, **kwargs)
|
533
|
|
|
534
|
4
|
return molecule
|
535
|
|
|
536
|
4
|
def normalize_molecule(molecule, name='', add_atom_map=False):
|
537
|
|
"""Normalize a copy of the molecule by checking aromaticity, adding explicit hydrogens and renaming by IUPAC name
|
538
|
|
or given title
|
539
|
|
|
540
|
|
Parameters
|
541
|
|
----------
|
542
|
|
molecule: OEMol
|
543
|
|
The molecule to be normalized:
|
544
|
|
title: str
|
545
|
|
Name of molecule. If the string is empty, will use IUPAC name
|
546
|
|
|
547
|
|
Returns
|
548
|
|
-------
|
549
|
|
molcopy: OEMol
|
550
|
|
A (copied) version of the normalized molecule
|
551
|
|
|
552
|
|
"""
|
553
|
4
|
molcopy = oechem.OEMol(molecule)
|
554
|
|
|
555
|
|
# Assign aromaticity.
|
556
|
4
|
oechem.OEAssignAromaticFlags(molcopy, oechem.OEAroModelOpenEye)
|
557
|
|
|
558
|
|
# Add hydrogens.
|
559
|
4
|
oechem.OEAddExplicitHydrogens(molcopy)
|
560
|
|
|
561
|
|
# Set title to IUPAC name.
|
562
|
4
|
title = name
|
563
|
4
|
if not title:
|
564
|
4
|
title = oeiupac.OECreateIUPACName(molcopy)
|
565
|
4
|
molcopy.SetTitle(title)
|
566
|
|
|
567
|
|
# Check for any missing atom names, if found reassign all of them.
|
568
|
4
|
if any([atom.GetName() == '' for atom in molcopy.GetAtoms()]):
|
569
|
4
|
oechem.OETriposAtomNames(molcopy)
|
570
|
|
|
571
|
|
# Add canonical ordered atom maps
|
572
|
4
|
if add_atom_map:
|
573
|
4
|
cmiles.utils.add_atom_map(molcopy)
|
574
|
4
|
return molcopy
|
575
|
|
|
576
|
4
|
def smiles_to_smi(smiles, filename, return_fname=False):
|
577
|
|
"""
|
578
|
|
This function writes out an .smi file for a list of SMILES
|
579
|
|
Parameters
|
580
|
|
----------
|
581
|
|
smiles: list of SMILES.
|
582
|
|
The list can also contain strings that include name for SMILES separated by a space. ("SMILES Name")
|
583
|
|
filename: str
|
584
|
|
name of output file
|
585
|
|
return_fname: bool, optional, default=False
|
586
|
|
If True, returns absolute path to filename.
|
587
|
|
|
588
|
|
"""
|
589
|
|
|
590
|
4
|
smiles_list = map(lambda x: x+"\n", list(smiles))
|
591
|
4
|
with open(filename, 'w') as outf:
|
592
|
4
|
outf.writelines(smiles_list)
|
593
|
|
|
594
|
4
|
if return_fname:
|
595
|
0
|
filename = os.path.join(os.getcwd(), filename)
|
596
|
0
|
return filename
|
597
|
|
|
598
|
|
|
599
|
4
|
def file_to_oemols(filename, title=True, verbose=False):
|
600
|
|
"""Create OEMol from file. If more than one mol in file, return list of OEMols.
|
601
|
|
|
602
|
|
Parameters
|
603
|
|
----------
|
604
|
|
filename: str
|
605
|
|
absolute path to
|
606
|
|
title: str, title
|
607
|
|
title for molecule. If None, IUPAC name will be given as title.
|
608
|
|
|
609
|
|
Returns
|
610
|
|
-------
|
611
|
|
mollist: list
|
612
|
|
list of OEMol for multiple molecules. OEMol if file only has one molecule.
|
613
|
|
"""
|
614
|
|
|
615
|
4
|
if not os.path.exists(filename):
|
616
|
0
|
raise Exception("File {} not found".format(filename))
|
617
|
4
|
if verbose:
|
618
|
0
|
logger().info("Loading molecules from {}".format(filename))
|
619
|
|
|
620
|
4
|
ifs = oechem.oemolistream(filename)
|
621
|
4
|
mollist = []
|
622
|
|
|
623
|
4
|
molecule = oechem.OEMol()
|
624
|
4
|
while oechem.OEReadMolecule(ifs, molecule):
|
625
|
4
|
molecule_copy = oechem.OEMol(molecule)
|
626
|
4
|
oechem.OEPerceiveChiral(molecule)
|
627
|
4
|
oechem.OE3DToAtomStereo(molecule)
|
628
|
4
|
oechem.OE3DToBondStereo(molecule)
|
629
|
4
|
if title:
|
630
|
4
|
title = molecule_copy.GetTitle()
|
631
|
4
|
if verbose:
|
632
|
0
|
logger().info("Reading molecule {}".format(title))
|
633
|
|
|
634
|
4
|
mollist.append(normalize_molecule(molecule_copy, title))
|
635
|
4
|
ifs.close()
|
636
|
|
|
637
|
4
|
return mollist
|
638
|
|
|
639
|
|
|
640
|
4
|
def smifile_to_rdmols(filename):
|
641
|
|
"""
|
642
|
|
Read SMILES file and return list of RDmols
|
643
|
|
|
644
|
|
Parameters
|
645
|
|
----------
|
646
|
|
filename: str. Path to file
|
647
|
|
|
648
|
|
Returns
|
649
|
|
-------
|
650
|
|
rd_mols: list
|
651
|
|
list of RDKit molecules
|
652
|
|
|
653
|
|
"""
|
654
|
0
|
from rdkit import Chem
|
655
|
0
|
smiles_txt = open(filename, 'r').read()
|
656
|
|
# Check first line
|
657
|
0
|
first_line = smiles_txt.split('\n')[0]
|
658
|
0
|
if first_line != 'SMILES':
|
659
|
0
|
smiles_txt = 'SMILES\n' + smiles_txt
|
660
|
|
|
661
|
0
|
rd_mol_supp = Chem.SmilesMolSupplierFromText(smiles_txt)
|
662
|
0
|
rd_mols = [x for x in rd_mol_supp]
|
663
|
|
|
664
|
|
# Check for failure to parse
|
665
|
0
|
nones = []
|
666
|
0
|
for i, mol in enumerate(rd_mols):
|
667
|
0
|
if mol is None:
|
668
|
0
|
nones.append(i)
|
669
|
|
|
670
|
0
|
if len(nones) > 0:
|
671
|
|
# Find SMILES that did not parse
|
672
|
0
|
smiles_list = smiles_txt.split('\n')[1:]
|
673
|
0
|
missing_mols = [smiles_list[none] for none in nones]
|
674
|
0
|
lines = [int(none) + 1 for none in nones]
|
675
|
0
|
error = RuntimeError("Not all SMILES were parsed properly. {} indices are None in the rd_mols list. The corresponding"
|
676
|
|
"SMILES are {}. They are on lines {} in the file ".format(nones, missing_mols, lines))
|
677
|
0
|
error.results = rd_mols
|
678
|
0
|
raise error
|
679
|
|
|
680
|
0
|
return rd_mols
|
681
|
|
|
682
|
|
|
683
|
4
|
def oemols_to_smiles_list(OEMols, strict_stereo=False, **kwargs):
|
684
|
|
"""
|
685
|
|
Write oemols to SMILES list
|
686
|
|
Parameters
|
687
|
|
----------
|
688
|
|
OEMols : list of OEMols
|
689
|
|
strict_stereo : bool, optional, False
|
690
|
|
If True, will raise ValueError is oemol is missing stereo
|
691
|
|
kwargs : arguments passed to `cmiles.utils.mol_to_smiles`
|
692
|
|
|
693
|
|
Returns
|
694
|
|
-------
|
695
|
|
SMILES: list of SMILES
|
696
|
|
|
697
|
|
"""
|
698
|
|
|
699
|
4
|
if not isinstance(OEMols, list):
|
700
|
0
|
OEMols = [OEMols]
|
701
|
|
|
702
|
4
|
SMILES = []
|
703
|
4
|
for mol in OEMols:
|
704
|
4
|
try:
|
705
|
4
|
SMILES.append(cmiles.utils.mol_to_smiles(mol, **kwargs))
|
706
|
0
|
except ValueError:
|
707
|
0
|
if strict_stereo:
|
708
|
0
|
raise ValueError("SMILES does not have stereo defined")
|
709
|
0
|
s = oechem.OEMolToSmiles(mol)
|
710
|
0
|
SMILES.append(s)
|
711
|
0
|
warnings.warn("SMILES will be missing steroe. {}".format(s))
|
712
|
|
|
713
|
4
|
return SMILES
|
714
|
|
|
715
|
|
|
716
|
4
|
def file_to_smiles_list(filename, return_titles=True, **kwargs):
|
717
|
|
"""
|
718
|
|
Read file of molecule to list of SMILES
|
719
|
|
Parameters
|
720
|
|
----------
|
721
|
|
filename: str
|
722
|
|
name of file to read. Any file format that OpenEye reads
|
723
|
|
return_titles: bool, optional, default True
|
724
|
|
If True, also return list of molecule names
|
725
|
|
**kwargs: parameters passed to `cmiles.utils.mol_to_smiles`
|
726
|
|
Returns
|
727
|
|
-------
|
728
|
|
list of SMILES
|
729
|
|
"""
|
730
|
|
|
731
|
4
|
oemols = file_to_oemols(filename)
|
732
|
|
# Check if oemols have names
|
733
|
4
|
if return_titles:
|
734
|
0
|
names = []
|
735
|
0
|
for mol in oemols:
|
736
|
0
|
title = mol.GetTitle()
|
737
|
0
|
if not title:
|
738
|
0
|
logger().warning("an oemol does not have a name. Adding an empty str to the titles list")
|
739
|
0
|
names.append(title)
|
740
|
4
|
smiles_list = oemols_to_smiles_list(oemols, **kwargs)
|
741
|
|
|
742
|
4
|
if return_titles:
|
743
|
0
|
return smiles_list, names
|
744
|
4
|
return smiles_list
|
745
|
|
|
746
|
|
|
747
|
|
"""
|
748
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
749
|
|
Functions to work with mapped SMILES
|
750
|
|
These functions are used to keep the orders atoms consistent across different molecule graphs
|
751
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
752
|
|
"""
|
753
|
|
|
754
|
4
|
def to_mapped_xyz(molecule, atom_map=None, conformer=None, xyz_format=True, filename=None):
|
755
|
|
"""
|
756
|
|
Generate xyz coordinates for molecule in the order given by the atom_map. atom_map is a dictionary that maps the
|
757
|
|
tag on the SMILES to the atom idex in OEMol.
|
758
|
|
Parameters
|
759
|
|
----------
|
760
|
|
molecule: OEMol with conformers
|
761
|
|
atom_map: dict
|
762
|
|
maps tag in SMILES to atom index
|
763
|
|
conformer: int
|
764
|
|
Which conformer to write xyz file for. If None, write out all conformers. Default is None
|
765
|
|
xyz_format: bool
|
766
|
|
If True, will write out number of atoms and molecule name. If false, will only write out elements and coordinates
|
767
|
|
filename: str
|
768
|
|
Name of file to save to. If None, only returns a string.
|
769
|
|
|
770
|
|
Returns
|
771
|
|
-------
|
772
|
|
str: elements and xyz coordinates (in angstroms) in order of tagged SMILES
|
773
|
|
|
774
|
|
"""
|
775
|
4
|
if not atom_map and not cmiles.utils.has_atom_map(molecule):
|
776
|
4
|
raise ValueError("If molecule does not have atom map, you must provide an atom map")
|
777
|
4
|
if not has_conformer(molecule, check_two_dimension=True):
|
778
|
4
|
raise ValueError("Molecule must have conformers")
|
779
|
4
|
xyz = ""
|
780
|
4
|
for k, mol in enumerate(molecule.GetConfs()):
|
781
|
4
|
if k == conformer or conformer is None:
|
782
|
4
|
if xyz_format:
|
783
|
4
|
xyz += "{}\n".format(mol.GetMaxAtomIdx())
|
784
|
4
|
xyz += "{}\n".format(mol.GetTitle())
|
785
|
4
|
coords = oechem.OEFloatArray(mol.GetMaxAtomIdx() * 3)
|
786
|
4
|
mol.GetCoords(coords)
|
787
|
4
|
if k != 0 and not xyz_format:
|
788
|
0
|
xyz += "*"
|
789
|
|
|
790
|
4
|
for mapping in range(1, molecule.NumAtoms()+1):
|
791
|
4
|
if not atom_map:
|
792
|
4
|
atom = molecule.GetAtom(oechem.OEHasMapIdx(mapping))
|
793
|
4
|
idx = atom.GetIdx()
|
794
|
|
else:
|
795
|
4
|
idx = atom_map[mapping]
|
796
|
4
|
atom = mol.GetAtom(oechem.OEHasAtomIdx(idx))
|
797
|
4
|
syb = oechem.OEGetAtomicSymbol(atom.GetAtomicNum())
|
798
|
4
|
xyz += " {} {:05.3f} {:05.3f} {:05.3f}\n".format(syb,
|
799
|
|
coords[idx * 3],
|
800
|
|
coords[idx * 3 + 1],
|
801
|
|
coords[idx * 3 + 2])
|
802
|
|
|
803
|
4
|
if filename:
|
804
|
0
|
file = open("{}.xyz".format(filename), 'w')
|
805
|
0
|
file.write(xyz)
|
806
|
0
|
file.close()
|
807
|
|
else:
|
808
|
4
|
return xyz
|
809
|
|
|
810
|
|
|
811
|
4
|
def from_mapped_xyz_to_mol_idx_order(mapped_coords, atom_map):
|
812
|
|
"""
|
813
|
|
Reorder xyz coordinates from the mapped order to the order in the molecule atom map is from
|
814
|
|
"""
|
815
|
|
# reshape
|
816
|
0
|
mapped_coords = np.array(mapped_coords, dtype=float).reshape(int(len(mapped_coords)/3), 3)
|
817
|
0
|
coords = np.zeros((mapped_coords.shape))
|
818
|
0
|
for m in atom_map:
|
819
|
0
|
coords[atom_map[m]] = mapped_coords[m-1]
|
820
|
|
|
821
|
|
# flatten
|
822
|
0
|
coords = coords.flatten()
|
823
|
0
|
return coords
|
824
|
|
|
825
|
|
|
826
|
4
|
def qcschema_to_xyz_format(qcschema, name=None, filename=None):
|
827
|
|
"""
|
828
|
|
Write qcschema molecule to xyz format
|
829
|
|
Parameters
|
830
|
|
----------
|
831
|
|
qcschema: dict
|
832
|
|
qcschema molecule. Must have symbols and geometry
|
833
|
|
name: str, optional, default None
|
834
|
|
name of molecule
|
835
|
|
filename: str, optional, default None
|
836
|
|
If filename given, write out file to disk. If not, will return xyz string
|
837
|
|
|
838
|
|
Returns
|
839
|
|
-------
|
840
|
|
xyz: str
|
841
|
|
qcschema molecule in xyz format
|
842
|
|
|
843
|
|
"""
|
844
|
0
|
if not isinstance(qcschema, list):
|
845
|
0
|
qcschema = [qcschema]
|
846
|
0
|
xyz = ""
|
847
|
0
|
for qcmol in qcschema:
|
848
|
0
|
symbols = qcmol['symbols']
|
849
|
0
|
coords = qcmol['geometry']
|
850
|
0
|
coords = np.asarray(coords)*BOHR_2_ANGSTROM
|
851
|
0
|
xyz += "{}\n".format(len(symbols))
|
852
|
0
|
xyz += "{}\n".format(name)
|
853
|
0
|
for i, s in enumerate(symbols):
|
854
|
0
|
xyz += " {} {:05.3f} {:05.3f} {:05.3f}\n".format(s,
|
855
|
|
coords[i * 3],
|
856
|
|
coords[i * 3 + 1],
|
857
|
|
coords[i * 3 + 2])
|
858
|
0
|
if filename:
|
859
|
0
|
with open(filename, 'w') as f:
|
860
|
0
|
f.write(xyz)
|
861
|
|
else:
|
862
|
0
|
return xyz
|
863
|
|
|
864
|
|
|
865
|
4
|
def qcschema_to_xyz_traj(final_molecule_grid, filename=None):
|
866
|
|
"""
|
867
|
|
Generate an xyz trajectory from QCArchive final molecule output from torsion drive.
|
868
|
|
The input should be the grid for one torsiondrive job. Remember to deserialize the output from QCArchive
|
869
|
|
This function assumes a 1D torsion scan
|
870
|
|
Parameters
|
871
|
|
----------
|
872
|
|
final_molecule_grid: dict
|
873
|
|
maps grid id to qcschema molecule
|
874
|
|
filename: str, optional, default None
|
875
|
|
If a name is given, an xyz trajectory will be written to file. If not, xyz string will be returned
|
876
|
|
"""
|
877
|
0
|
xyz = ""
|
878
|
0
|
angles = sorted(list(final_molecule_grid.keys()))
|
879
|
0
|
for angle in angles:
|
880
|
0
|
molecule = final_molecule_grid[angle]
|
881
|
0
|
name = str(angle)
|
882
|
0
|
xyz += qcschema_to_xyz_format(molecule, name)
|
883
|
0
|
if filename:
|
884
|
0
|
with open(filename, 'w') as f:
|
885
|
0
|
f.write(xyz)
|
886
|
|
else:
|
887
|
0
|
return xyz
|
888
|
|
|
889
|
|
"""
|
890
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
891
|
|
Functions for molecule visualization
|
892
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
893
|
|
"""
|
894
|
|
|
895
|
4
|
_KELLYS_COLORS = ['#ebce2b', '#702c8c', '#db6917', '#96cde6', '#ba1c30', '#c0bd7f', '#7f7e80',
|
896
|
|
'#5fa641', '#d485b2', '#4277b6', '#df8461', '#463397', '#e1a11a', '#91218c', '#e8e948', '#7e1510',
|
897
|
|
'#92ae31', '#6f340d', '#d32b1e', '#2b3514']
|
898
|
|
|
899
|
|
|
900
|
4
|
def tag_conjugated_bond(mol, tautomers=None, tag=None, threshold=1.05):
|
901
|
|
"""
|
902
|
|
Add conjugated bond data tag. If the bond order is above the threshold, this tag will be True, otherwise it will be
|
903
|
|
False
|
904
|
|
Parameters
|
905
|
|
----------
|
906
|
|
mol: OEMol
|
907
|
|
tautomers: list of OEMols, optional, Default None
|
908
|
|
If a list is provided, the conjugation tag will be true if the bond in any of the set of molecules is double.
|
909
|
|
The list should consist of resonance structures of the molecules. You can get that from oequacpac.EnumerateTautomres
|
910
|
|
tag: str, optional, Default None
|
911
|
|
If provided, will use that bond order. Options are WibergBondOrder, Wiberg_psi4, Mayer_psi4
|
912
|
|
threshold: int, optional, Default is 1.05
|
913
|
|
The fractional bond order threshold above which the bond will be considered conjugated.
|
914
|
|
|
915
|
|
Returns
|
916
|
|
-------
|
917
|
|
atom_indices: list of atom indices in conjugated bonds.
|
918
|
|
|
919
|
|
"""
|
920
|
0
|
atom_indices = []
|
921
|
0
|
for bond in mol.GetBonds():
|
922
|
0
|
resonance = False
|
923
|
0
|
if tautomers is not None:
|
924
|
0
|
for tmol in tautomers:
|
925
|
0
|
t_bond = tmol.GetBond(oechem.OEHasBondIdx(bond.GetIdx()))
|
926
|
0
|
if t_bond.GetOrder() > 1:
|
927
|
0
|
resonance = True
|
928
|
0
|
break
|
929
|
0
|
elif bond.GetData()[tag] >= threshold:
|
930
|
0
|
resonance = True
|
931
|
|
# Add tag to bond
|
932
|
0
|
conj_tag = oechem.OEGetTag("conjugated")
|
933
|
0
|
bond.SetData(conj_tag, resonance)
|
934
|
0
|
if resonance:
|
935
|
0
|
a1 = bond.GetBgnIdx()
|
936
|
0
|
a2 = bond.GetEndIdx()
|
937
|
0
|
atom_indices.extend([a1, a2])
|
938
|
0
|
return atom_indices
|
939
|
|
|
940
|
|
|
941
|
4
|
def highlight_bond_by_map_idx(mol, fname, bond_map_idx=None, width=600, height=400, wbo=None, map_idx=False, label_scale=2.0, scale_bondwidth=True,
|
942
|
|
color=None):
|
943
|
|
"""
|
944
|
|
|
945
|
|
Parameters
|
946
|
|
----------
|
947
|
|
mol
|
948
|
|
bonds: list of tuples
|
949
|
|
tuples should be map idx of atoms in bond
|
950
|
|
fname
|
951
|
|
|
952
|
|
Returns
|
953
|
|
-------
|
954
|
|
|
955
|
|
"""
|
956
|
0
|
mol = cmiles.utils.load_molecule(mol, toolkit='openeye')
|
957
|
|
# make copy
|
958
|
0
|
mol = oechem.OEMol(mol)
|
959
|
|
# make sure mol has atom map
|
960
|
0
|
if not cmiles.utils.has_atom_map(mol):
|
961
|
0
|
raise ValueError("molecule needs atom map")
|
962
|
0
|
atom_bond_sets = []
|
963
|
|
#atom_bond_set = oechem.OEAtomBondSet()
|
964
|
0
|
if not isinstance(bond_map_idx, list):
|
965
|
0
|
bond_map_idx = [bond_map_idx]
|
966
|
0
|
b = None
|
967
|
0
|
for bond in bond_map_idx:
|
968
|
0
|
if not bond:
|
969
|
0
|
break
|
970
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
971
|
0
|
a1 = mol.GetAtom(oechem.OEHasMapIdx(bond[0]))
|
972
|
0
|
a2 = mol.GetAtom(oechem.OEHasMapIdx(bond[1]))
|
973
|
0
|
b = mol.GetBond(a1, a2)
|
974
|
0
|
if wbo:
|
975
|
0
|
b.SetData('WibergBondOrder', wbo)
|
976
|
0
|
if not b:
|
977
|
0
|
raise RuntimeError("{} is not connected in molecule".format(bond))
|
978
|
0
|
atom_bond_set.AddAtom(a1)
|
979
|
0
|
atom_bond_set.AddAtom(a2)
|
980
|
0
|
atom_bond_set.AddBond(b)
|
981
|
0
|
atom_bond_sets.append(atom_bond_set)
|
982
|
|
|
983
|
0
|
dopt = oedepict.OEPrepareDepictionOptions()
|
984
|
0
|
dopt.SetSuppressHydrogens(True)
|
985
|
0
|
oedepict.OEPrepareDepiction(mol, dopt)
|
986
|
|
|
987
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
988
|
0
|
opts.SetTitleLocation(oedepict.OETitleLocation_Hidden)
|
989
|
|
|
990
|
0
|
if wbo is not None:
|
991
|
0
|
opts.SetBondPropertyFunctor(LabelWibergBondOrder())
|
992
|
0
|
if map_idx:
|
993
|
0
|
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomMapIdx())
|
994
|
0
|
opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEBlack))
|
995
|
0
|
opts.SetAtomPropLabelFontScale(label_scale)
|
996
|
0
|
opts.SetBondWidthScaling(scale_bondwidth)
|
997
|
|
#opts.SetPropertyOffset(1.0)
|
998
|
|
|
999
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1000
|
0
|
if color:
|
1001
|
|
|
1002
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1003
|
0
|
hcolor = oechem.OEColor(color)
|
1004
|
0
|
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
|
1005
|
|
else:
|
1006
|
0
|
highlight = oedepict.OEHighlightOverlayByBallAndStick(oechem.OEGetContrastColors())
|
1007
|
0
|
oedepict.OEAddHighlightOverlay(disp, highlight, atom_bond_sets)
|
1008
|
|
|
1009
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1010
|
|
|
1011
|
|
|
1012
|
4
|
def highlight_bonds_with_label(mol_copy, fname, conjugation=True, rotor=False, width=600, height=400, label=None, scale=2.0):
|
1013
|
|
"""
|
1014
|
|
Generate image of molecule with highlighted bonds. The bonds can either be highlighted with a conjugation tag
|
1015
|
|
or if it is rotatable.
|
1016
|
|
|
1017
|
|
Parameters
|
1018
|
|
----------
|
1019
|
|
mol_copy: OEMol
|
1020
|
|
fname: str
|
1021
|
|
Name of image file
|
1022
|
|
conjugation: Bool, optional, Default is True
|
1023
|
|
If True, the bonds with conjugation tag set to True will be highlighted
|
1024
|
|
rotor: Bool, optional, Default is False
|
1025
|
|
If True, the rotatable bonds will be highlighted.
|
1026
|
|
width: int
|
1027
|
|
height: int
|
1028
|
|
label: string. Optional, Default is None
|
1029
|
|
The bond order label. The options are WibergBondOrder, Wiberg_psi4, Mayer_psi4.
|
1030
|
|
|
1031
|
|
"""
|
1032
|
0
|
mol = oechem.OEMol(mol_copy)
|
1033
|
0
|
bond_index_list = []
|
1034
|
0
|
for bond in mol.GetBonds():
|
1035
|
0
|
if conjugation:
|
1036
|
0
|
try:
|
1037
|
0
|
if bond.GetData('conjugated'):
|
1038
|
0
|
bond_index_list.append(bond.GetIdx())
|
1039
|
0
|
except ValueError:
|
1040
|
0
|
pass
|
1041
|
0
|
if rotor:
|
1042
|
0
|
if bond.IsRotor():
|
1043
|
0
|
bond_index_list.append(bond.GetIdx())
|
1044
|
|
|
1045
|
0
|
atomBondSet = oechem.OEAtomBondSet()
|
1046
|
0
|
for bond in mol.GetBonds():
|
1047
|
0
|
if bond.GetIdx() in bond_index_list:
|
1048
|
0
|
atomBondSet.AddBond(bond)
|
1049
|
0
|
atomBondSet.AddAtom(bond.GetBgn())
|
1050
|
0
|
atomBondSet.AddAtom(bond.GetEnd())
|
1051
|
|
|
1052
|
0
|
dopt = oedepict.OEPrepareDepictionOptions()
|
1053
|
0
|
dopt.SetSuppressHydrogens(True)
|
1054
|
0
|
oedepict.OEPrepareDepiction(mol, dopt)
|
1055
|
|
|
1056
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1057
|
0
|
opts.SetTitleLocation(oedepict.OETitleLocation_Hidden)
|
1058
|
|
|
1059
|
0
|
if label is not None:
|
1060
|
0
|
bond_label = {'WibergBondOrder': LabelWibergBondOrder, 'Wiberg_psi4': LabelWibergPsiBondOrder,
|
1061
|
|
'Mayer_psi4': LabelMayerPsiBondOrder}
|
1062
|
|
|
1063
|
0
|
bondlabel = bond_label[label]
|
1064
|
0
|
opts.SetBondPropertyFunctor(bondlabel(oedepict.OE2DPoint(50, 50)))
|
1065
|
0
|
opts.SetBondPropLabelFontScale(scale)
|
1066
|
|
|
1067
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1068
|
|
|
1069
|
0
|
aroStyle = oedepict.OEHighlightStyle_Color
|
1070
|
0
|
aroColor = oechem.OEColor(oechem.OEBlack)
|
1071
|
0
|
oedepict.OEAddHighlighting(disp, aroColor, aroStyle,
|
1072
|
|
oechem.OEIsAromaticAtom(), oechem.OEIsAromaticBond() )
|
1073
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1074
|
0
|
hcolor = oechem.OEColor(oechem.OELightBlue)
|
1075
|
0
|
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atomBondSet)
|
1076
|
|
|
1077
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1078
|
|
|
1079
|
|
|
1080
|
4
|
def highltigh_torsion_by_cluster(mapped_molecule, clustered_dihedrals, fname, width=600, height=400):
|
1081
|
|
"""
|
1082
|
|
Highlight torsion by cluster. This is used to visualize clustering output.
|
1083
|
|
|
1084
|
|
Parameters
|
1085
|
|
----------
|
1086
|
|
mapped_molecule: oemol with map indices
|
1087
|
|
clustered_dihedrals
|
1088
|
|
fname
|
1089
|
|
width
|
1090
|
|
height
|
1091
|
|
|
1092
|
|
Returns
|
1093
|
|
-------
|
1094
|
|
|
1095
|
|
"""
|
1096
|
0
|
mol = oechem.OEMol(mapped_molecule)
|
1097
|
0
|
atom_bond_sets = []
|
1098
|
|
|
1099
|
0
|
for cluster in clustered_dihedrals:
|
1100
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1101
|
0
|
for dihedral in clustered_dihedrals[cluster]:
|
1102
|
0
|
a = mol.GetAtom(oechem.OEHasMapIdx(dihedral[0]+1))
|
1103
|
0
|
atom_bond_set.AddAtom(a)
|
1104
|
0
|
for idx in dihedral[1:]:
|
1105
|
0
|
a2 = mol.GetAtom(oechem.OEHasMapIdx(idx+1))
|
1106
|
0
|
atom_bond_set.AddAtom(a2)
|
1107
|
0
|
bond = mol.GetBond(a, a2)
|
1108
|
0
|
atom_bond_set.AddBond(bond)
|
1109
|
0
|
a=a2
|
1110
|
0
|
atom_bond_sets.append(atom_bond_set)
|
1111
|
|
|
1112
|
0
|
dopt = oedepict.OEPrepareDepictionOptions()
|
1113
|
0
|
dopt.SetSuppressHydrogens(False)
|
1114
|
0
|
oedepict.OEPrepareDepiction(mol, dopt)
|
1115
|
|
|
1116
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1117
|
0
|
opts.SetTitleLocation(oedepict.OETitleLocation_Hidden)
|
1118
|
|
|
1119
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1120
|
|
|
1121
|
0
|
aroStyle = oedepict.OEHighlightStyle_Color
|
1122
|
0
|
aroColor = oechem.OEColor(oechem.OEBlack)
|
1123
|
0
|
oedepict.OEAddHighlighting(disp, aroColor, aroStyle,
|
1124
|
|
oechem.OEIsAromaticAtom(), oechem.OEIsAromaticBond() )
|
1125
|
|
# hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1126
|
|
|
1127
|
|
# if color:
|
1128
|
|
# highlight = oechem.OEColor(color)
|
1129
|
|
# # combine all atom_bond_sets
|
1130
|
|
# atom_bond_set = oechem.OEAtomBondSet()
|
1131
|
|
# for ab_set in atom_bond_sets:
|
1132
|
|
# for a in ab_set.GetAtoms():
|
1133
|
|
# atom_bond_set.AddAtom(a)
|
1134
|
|
# for b in ab_set.GetBonds():
|
1135
|
|
# atom_bond_set.AddBond(b)
|
1136
|
|
# oedepict.OEAddHighlighting(disp, highlight, hstyle, atom_bond_set)
|
1137
|
|
# else:
|
1138
|
0
|
highlight = oedepict.OEHighlightOverlayByBallAndStick(oechem.OEGetContrastColors())
|
1139
|
0
|
oedepict.OEAddHighlightOverlay(disp, highlight, atom_bond_sets)
|
1140
|
|
#hcolor = oechem.OEColor(oechem.OELightBlue)
|
1141
|
|
#oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_sets)
|
1142
|
|
|
1143
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1144
|
|
|
1145
|
|
|
1146
|
4
|
def highlight_torsion(mapped_molecule, dihedrals, fname, width=600, height=400, combine_central_bond=True, color=None):
|
1147
|
|
|
1148
|
0
|
mol = oechem.OEMol(mapped_molecule)
|
1149
|
|
|
1150
|
0
|
atom_bond_sets = []
|
1151
|
|
|
1152
|
0
|
if combine_central_bond:
|
1153
|
0
|
central_bonds = [(tor[1], tor[2]) for tor in dihedrals]
|
1154
|
0
|
eq_torsions = {cb : [tor for tor in dihedrals if cb == (tor[1], tor[2]) or cb ==(tor[2], tor[1])] for cb in central_bonds}
|
1155
|
|
|
1156
|
0
|
for cb in eq_torsions:
|
1157
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1158
|
0
|
for dihedral in eq_torsions[cb]:
|
1159
|
0
|
a = mol.GetAtom(oechem.OEHasMapIdx(dihedral[0]+1))
|
1160
|
0
|
atom_bond_set.AddAtom(a)
|
1161
|
|
|
1162
|
0
|
for idx in dihedral[1:]:
|
1163
|
0
|
a2 = mol.GetAtom(oechem.OEHasMapIdx(idx+1))
|
1164
|
0
|
atom_bond_set.AddAtom((a2))
|
1165
|
0
|
bond = mol.GetBond(a, a2)
|
1166
|
0
|
atom_bond_set.AddBond(bond)
|
1167
|
0
|
a = a2
|
1168
|
0
|
atom_bond_sets.append(atom_bond_set)
|
1169
|
|
|
1170
|
0
|
if not combine_central_bond:
|
1171
|
0
|
for dihedral in dihedrals:
|
1172
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1173
|
0
|
a = mol.GetAtom(oechem.OEHasMapIdx(dihedral[0]+1))
|
1174
|
0
|
atom_bond_set.AddAtom(a)
|
1175
|
|
|
1176
|
0
|
for idx in dihedral[1:]:
|
1177
|
0
|
a2 = mol.GetAtom(oechem.OEHasMapIdx(idx+1))
|
1178
|
0
|
atom_bond_set.AddAtom((a2))
|
1179
|
0
|
bond = mol.GetBond(a, a2)
|
1180
|
0
|
atom_bond_set.AddBond(bond)
|
1181
|
0
|
a = a2
|
1182
|
0
|
atom_bond_sets.append(atom_bond_set)
|
1183
|
|
|
1184
|
0
|
dopt = oedepict.OEPrepareDepictionOptions()
|
1185
|
0
|
dopt.SetSuppressHydrogens(False)
|
1186
|
0
|
oedepict.OEPrepareDepiction(mol, dopt)
|
1187
|
|
|
1188
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1189
|
0
|
opts.SetTitleLocation(oedepict.OETitleLocation_Hidden)
|
1190
|
|
|
1191
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1192
|
|
|
1193
|
0
|
aroStyle = oedepict.OEHighlightStyle_Color
|
1194
|
0
|
aroColor = oechem.OEColor(oechem.OEBlack)
|
1195
|
0
|
oedepict.OEAddHighlighting(disp, aroColor, aroStyle,
|
1196
|
|
oechem.OEIsAromaticAtom(), oechem.OEIsAromaticBond() )
|
1197
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1198
|
|
|
1199
|
0
|
if color:
|
1200
|
0
|
highlight = oechem.OEColor(color)
|
1201
|
|
# combine all atom_bond_sets
|
1202
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1203
|
0
|
for ab_set in atom_bond_sets:
|
1204
|
0
|
for a in ab_set.GetAtoms():
|
1205
|
0
|
atom_bond_set.AddAtom(a)
|
1206
|
0
|
for b in ab_set.GetBonds():
|
1207
|
0
|
atom_bond_set.AddBond(b)
|
1208
|
0
|
oedepict.OEAddHighlighting(disp, highlight, hstyle, atom_bond_set)
|
1209
|
|
else:
|
1210
|
0
|
highlight = oedepict.OEHighlightOverlayByBallAndStick(oechem.OEGetContrastColors())
|
1211
|
0
|
oedepict.OEAddHighlightOverlay(disp, highlight, atom_bond_sets)
|
1212
|
|
#hcolor = oechem.OEColor(oechem.OELightBlue)
|
1213
|
|
#oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_sets)
|
1214
|
|
|
1215
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1216
|
|
|
1217
|
|
|
1218
|
4
|
def bond_order_tag(molecule, atom_map, bond_order_array):
|
1219
|
|
"""
|
1220
|
|
Add psi bond order to bond in molecule. This function adds a tag to the GetData dictionary
|
1221
|
|
in bond.GetData()
|
1222
|
|
|
1223
|
|
Parameters
|
1224
|
|
----------
|
1225
|
|
molecule: OEMol
|
1226
|
|
This molecule must have tags that corresponds to the atom_map
|
1227
|
|
atom_map: dict
|
1228
|
|
dictionary that maps atom tag to atom index
|
1229
|
|
bond_order_array: dict
|
1230
|
|
maps Wiberg and Meyer bond indices to N x N numpy arrays.
|
1231
|
|
N - atoms in molecule. This array contains the bond order for bond(i,j) where i,j correspond to
|
1232
|
|
tag on atom and index in bond_order_array
|
1233
|
|
"""
|
1234
|
0
|
wiberg_bond_order = bond_order_array['Wiberg_psi4']
|
1235
|
0
|
mayer_bond_order = bond_order_array['Mayer_psi4']
|
1236
|
|
# Sanity check, both arrays are same shape
|
1237
|
0
|
for i, j in itertools.combinations(range(wiberg_bond_order.shape[0]), 2):
|
1238
|
0
|
idx_1 = atom_map[i+1]
|
1239
|
0
|
idx_2 = atom_map[j+1]
|
1240
|
0
|
atom_1 = molecule.GetAtom(oechem.OEHasAtomIdx(idx_1))
|
1241
|
0
|
atom_2 = molecule.GetAtom(oechem.OEHasAtomIdx(idx_2))
|
1242
|
0
|
bond = molecule.GetBond(atom_1, atom_2)
|
1243
|
0
|
if bond:
|
1244
|
0
|
wbo = wiberg_bond_order[i][j]
|
1245
|
0
|
mbo = mayer_bond_order[i][j]
|
1246
|
0
|
tag = oechem.OEGetTag('Wiberg_psi4')
|
1247
|
0
|
bond.SetData(tag, wbo)
|
1248
|
0
|
tag = oechem.OEGetTag('Mayer_psi4')
|
1249
|
0
|
bond.SetData(tag, mbo)
|
1250
|
|
|
1251
|
|
|
1252
|
4
|
def mol_to_image(mol, fname, width=600, height=400, scale_bondwidth=True):
|
1253
|
0
|
oedepict.OEPrepareDepiction(mol)
|
1254
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1255
|
0
|
opts.SetBondWidthScaling(scale_bondwidth)
|
1256
|
|
|
1257
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1258
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1259
|
|
|
1260
|
|
|
1261
|
4
|
def mol_to_image_atoms_label(mol, fname, map_idx=True, width=600, height=400, label_scale=2.0, scale_bondwidth=True):
|
1262
|
|
|
1263
|
|
"""Write out png file of molecule with atoms labeled with their map index.
|
1264
|
|
|
1265
|
|
Parameters
|
1266
|
|
----------
|
1267
|
|
smiles: str
|
1268
|
|
SMILES
|
1269
|
|
fname: str
|
1270
|
|
absolute path and filename for png
|
1271
|
|
map_idx: bool
|
1272
|
|
If True, lable atoms with map index instead of atom index. If set to True, input SMILES must have map indices.
|
1273
|
|
|
1274
|
|
"""
|
1275
|
0
|
if not isinstance(mol, oechem.OEMol) and isinstance(mol, str):
|
1276
|
|
# Try converting smiles to mol
|
1277
|
0
|
mol = cmiles.utils.load_molecule(mol, toolkit='openeye')
|
1278
|
|
|
1279
|
0
|
mol = oechem.OEMol(mol)
|
1280
|
0
|
oedepict.OEPrepareDepiction(mol)
|
1281
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1282
|
|
|
1283
|
0
|
if map_idx:
|
1284
|
|
# check if molecule has map
|
1285
|
0
|
if not cmiles.utils.has_atom_map(mol):
|
1286
|
0
|
raise ValueError("Input SMILES must have atom maps to display map indices in image")
|
1287
|
0
|
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomMapIdx())
|
1288
|
0
|
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomMapIdx())
|
1289
|
0
|
if not map_idx:
|
1290
|
0
|
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomIdx())
|
1291
|
|
|
1292
|
0
|
opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEDarkGreen))
|
1293
|
0
|
opts.SetAtomPropLabelFontScale(label_scale)
|
1294
|
0
|
opts.SetBondWidthScaling(scale_bondwidth)
|
1295
|
|
|
1296
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1297
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1298
|
|
|
1299
|
|
|
1300
|
4
|
def mol_to_image_bond_labels(mol, fname, width=600, height=400, label='WibergBondOrder', supress_h=False):
|
1301
|
|
"""
|
1302
|
|
Generate png figure of molecule. Bonds should include bond order defined in label
|
1303
|
|
|
1304
|
|
Parameters
|
1305
|
|
----------
|
1306
|
|
mol: OpenEye OEMol
|
1307
|
|
fname: str
|
1308
|
|
filename for png
|
1309
|
|
width: int
|
1310
|
|
height: int
|
1311
|
|
label: str
|
1312
|
|
Which label to print. Options are WibergBondOrder, Wiberg_psi4 and Mayer_psi4
|
1313
|
|
|
1314
|
|
Returns
|
1315
|
|
-------
|
1316
|
|
bool:
|
1317
|
|
"""
|
1318
|
|
# copy mole
|
1319
|
0
|
mol = oechem.OEMol(mol)
|
1320
|
0
|
oedepict.OEPrepareDepiction(mol, False, supress_h)
|
1321
|
|
|
1322
|
|
|
1323
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1324
|
0
|
opts.SetHydrogenStyle(oedepict.OEHydrogenStyle_ExplicitAll|oedepict.OEHydrogenStyle_ExplicitHetero)
|
1325
|
|
# opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomIdx())
|
1326
|
|
# opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEDarkGreen))
|
1327
|
0
|
for b in mol.GetBonds():
|
1328
|
0
|
if not label in b.GetData():
|
1329
|
0
|
raise KeyError("Missing BO")
|
1330
|
0
|
bond_label = {'WibergBondOrder': LabelWibergBondOrder, 'Wiberg_psi4': LabelWibergPsiBondOrder,
|
1331
|
|
'Mayer_psi4': LabelMayerPsiBondOrder}
|
1332
|
0
|
bondlabel = bond_label[label]
|
1333
|
0
|
opts.SetBondPropertyFunctor(bondlabel())
|
1334
|
|
|
1335
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1336
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1337
|
|
|
1338
|
4
|
def png_bond_idx(mol, fname, width=600, height=400):
|
1339
|
|
"""
|
1340
|
|
Generate png figure of molecule. Bonds should include bond order defined in label
|
1341
|
|
|
1342
|
|
Parameters
|
1343
|
|
----------
|
1344
|
|
mol: OpenEye OEMol
|
1345
|
|
fname: str
|
1346
|
|
filename for png
|
1347
|
|
width: int
|
1348
|
|
height: int
|
1349
|
|
label: str
|
1350
|
|
Which label to print. Options are WibergBondOrder, Wiberg_psi4 and Mayer_psi4
|
1351
|
|
|
1352
|
|
Returns
|
1353
|
|
-------
|
1354
|
|
bool:
|
1355
|
|
"""
|
1356
|
|
|
1357
|
0
|
oedepict.OEPrepareDepiction(mol)
|
1358
|
|
|
1359
|
|
|
1360
|
0
|
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
|
1361
|
|
# opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomIdx())
|
1362
|
|
# opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEDarkGreen))
|
1363
|
|
|
1364
|
0
|
opts.SetBondPropertyFunctor(oedepict.OEDisplayBondIdx())
|
1365
|
|
|
1366
|
0
|
disp = oedepict.OE2DMolDisplay(mol, opts)
|
1367
|
0
|
return oedepict.OERenderMolecule(fname, disp)
|
1368
|
|
|
1369
|
|
|
1370
|
4
|
class ColorAtomByFragmentIndex(oegrapheme.OEAtomGlyphBase):
|
1371
|
|
"""
|
1372
|
|
This class was taken from OpeneEye cookbook
|
1373
|
|
https://docs.eyesopen.com/toolkits/cookbook/python/depiction/enumfrags.html
|
1374
|
|
"""
|
1375
|
4
|
def __init__(self, colorlist, tag):
|
1376
|
4
|
oegrapheme.OEAtomGlyphBase.__init__(self)
|
1377
|
4
|
self.colorlist = colorlist
|
1378
|
4
|
self.tag = tag
|
1379
|
|
|
1380
|
4
|
def RenderGlyph(self, disp, atom):
|
1381
|
|
|
1382
|
4
|
a_disp = disp.GetAtomDisplay(atom)
|
1383
|
4
|
if a_disp is None or not a_disp.IsVisible():
|
1384
|
0
|
return False
|
1385
|
|
|
1386
|
4
|
if not atom.HasData(self.tag):
|
1387
|
0
|
return False
|
1388
|
|
|
1389
|
4
|
linewidth = disp.GetScale() / 1.5
|
1390
|
4
|
color = self.colorlist[atom.GetData(self.tag)]
|
1391
|
4
|
radius = disp.GetScale() / 4.8
|
1392
|
4
|
pen = oedepict.OEPen(color, color, oedepict.OEFill_Off, linewidth)
|
1393
|
|
|
1394
|
4
|
layer = disp.GetLayer(oedepict.OELayerPosition_Below)
|
1395
|
4
|
oegrapheme.OEDrawCircle(layer, oegrapheme.OECircleStyle_Default, a_disp.GetCoords(), radius, pen)
|
1396
|
|
|
1397
|
4
|
return True
|
1398
|
|
|
1399
|
4
|
def ColorAtomByFragmentIndex(self):
|
1400
|
0
|
return ColorAtomByFragmentIndex(self.colorlist, self.tag).__disown__()
|
1401
|
|
|
1402
|
|
|
1403
|
4
|
class LabelFragBondOrder(oedepict.OEDisplayBondPropBase):
|
1404
|
4
|
def __init__(self):
|
1405
|
4
|
oedepict.OEDisplayBondPropBase.__init__(self)
|
1406
|
|
|
1407
|
4
|
def __call__(self, bond):
|
1408
|
4
|
if 'WibergBondOrder_frag' in bond.GetData():
|
1409
|
0
|
bondOrder = bond.GetData('WibergBondOrder_frag')
|
1410
|
0
|
label = "{:.2f}".format(bondOrder)
|
1411
|
0
|
return label
|
1412
|
4
|
return ' '
|
1413
|
|
|
1414
|
4
|
def CreateCopy(self):
|
1415
|
4
|
copy = LabelFragBondOrder()
|
1416
|
4
|
return copy.__disown__()
|
1417
|
|
|
1418
|
4
|
class LabelWibergBondOrder(oedepict.OEDisplayBondPropBase):
|
1419
|
4
|
def __init__(self):
|
1420
|
4
|
oedepict.OEDisplayBondPropBase.__init__(self)
|
1421
|
|
|
1422
|
4
|
def __call__(self, bond):
|
1423
|
4
|
if 'WibergBondOrder' in bond.GetData():
|
1424
|
4
|
bondOrder = bond.GetData('WibergBondOrder')
|
1425
|
4
|
label = "{:.2f}".format(bondOrder)
|
1426
|
4
|
return label
|
1427
|
4
|
return ' '
|
1428
|
|
|
1429
|
4
|
def CreateCopy(self):
|
1430
|
4
|
copy = LabelWibergBondOrder()
|
1431
|
4
|
return copy.__disown__()
|
1432
|
|
|
1433
|
|
|
1434
|
4
|
class LabelWibergPsiBondOrder(oedepict.OEDisplayBondPropBase):
|
1435
|
4
|
def __init__(self):
|
1436
|
0
|
oedepict.OEDisplayBondPropBase.__init__(self)
|
1437
|
|
|
1438
|
4
|
def __call__(self, bond):
|
1439
|
0
|
bondOrder = bond.GetData('Wiberg_psi4')
|
1440
|
0
|
label = "{:.2f}".format(bondOrder)
|
1441
|
0
|
return label
|
1442
|
|
|
1443
|
4
|
def CreateCopy(self):
|
1444
|
0
|
copy = LabelWibergPsiBondOrder()
|
1445
|
0
|
return copy.__disown__()
|
1446
|
|
|
1447
|
|
|
1448
|
4
|
class LabelMayerPsiBondOrder(oedepict.OEDisplayBondPropBase):
|
1449
|
4
|
def __init__(self):
|
1450
|
0
|
oedepict.OEDisplayBondPropBase.__init__(self)
|
1451
|
|
|
1452
|
4
|
def __call__(self, bond):
|
1453
|
0
|
bondOrder = bond.GetData('Mayer_psi4')
|
1454
|
0
|
label = "{:.2f}".format(bondOrder)
|
1455
|
0
|
return label
|
1456
|
|
|
1457
|
4
|
def CreateCopy(self):
|
1458
|
0
|
copy = LabelMayerPsiBondOrder()
|
1459
|
|
|
1460
|
0
|
return copy.__disown__()
|
1461
|
|
|
1462
|
4
|
def to_pdf(molecules, fname, rows=5, cols=3, bond_map_idx=None, bo=False, align=False, supress_h=True, color=None, names=None):
|
1463
|
|
"""
|
1464
|
|
Generate PDF of list of oemols or SMILES
|
1465
|
|
|
1466
|
|
Parameters
|
1467
|
|
----------
|
1468
|
|
molecules : list of OEMols or SMILES
|
1469
|
|
fname : str
|
1470
|
|
Name of PDF
|
1471
|
|
rows : int
|
1472
|
|
How many rows of molecules per page
|
1473
|
|
cols : int
|
1474
|
|
How many columns of molecule per page
|
1475
|
|
bond_map_idx :
|
1476
|
|
bo :
|
1477
|
|
supress_h :
|
1478
|
|
color : tuple or list of ints, optional, default None
|
1479
|
|
If tuple of ints all bonds selected will be highlighted with that color
|
1480
|
|
If list of OEColors, the list needs to be the same length as the incoming molecules
|
1481
|
|
names :
|
1482
|
|
|
1483
|
|
Returns
|
1484
|
|
-------
|
1485
|
|
|
1486
|
|
"""
|
1487
|
0
|
itf = oechem.OEInterface()
|
1488
|
|
#PageByPage = True
|
1489
|
|
|
1490
|
0
|
ropts = oedepict.OEReportOptions(rows, cols)
|
1491
|
0
|
ropts.SetHeaderHeight(25)
|
1492
|
0
|
ropts.SetFooterHeight(25)
|
1493
|
0
|
ropts.SetCellGap(2)
|
1494
|
0
|
ropts.SetPageMargins(10)
|
1495
|
0
|
report = oedepict.OEReport(ropts)
|
1496
|
|
|
1497
|
0
|
cellwidth, cellheight = report.GetCellWidth(), report.GetCellHeight()
|
1498
|
0
|
opts = oedepict.OE2DMolDisplayOptions(cellwidth, cellheight, oedepict.OEScale_AutoScale)
|
1499
|
0
|
oedepict.OESetup2DMolDisplayOptions(opts, itf)
|
1500
|
|
|
1501
|
0
|
if align:
|
1502
|
0
|
if isinstance(align, str):
|
1503
|
0
|
ref_mol = oechem.OEGraphMol()
|
1504
|
0
|
oechem.OESmilesToMol(ref_mol, align)
|
1505
|
0
|
elif isinstance(align, (oechem.OEMol, oechem.OEMolBase, oechem.OEGraphMol)):
|
1506
|
0
|
ref_mol = align
|
1507
|
0
|
oedepict.OEPrepareDepiction(ref_mol)
|
1508
|
|
|
1509
|
0
|
b = None
|
1510
|
0
|
for i, mol in enumerate(molecules):
|
1511
|
0
|
cell = report.NewCell()
|
1512
|
0
|
if isinstance(mol, str):
|
1513
|
0
|
m = oechem.OEMol()
|
1514
|
0
|
oechem.OESmilesToMol(m, mol)
|
1515
|
0
|
mol = m
|
1516
|
0
|
if names is not None:
|
1517
|
0
|
mol.SetTitle(names[i])
|
1518
|
0
|
mol_copy = oechem.OEMol(mol)
|
1519
|
0
|
oedepict.OEPrepareDepiction(mol_copy, False, supress_h)
|
1520
|
|
# if bo:
|
1521
|
|
# b.SetData('WibergBondOrder', bo[i])
|
1522
|
|
# opts.SetBondPropertyFunctor(LabelWibergBondOrder())
|
1523
|
|
|
1524
|
0
|
if isinstance(bond_map_idx, tuple) and len(bond_map_idx) == 2:
|
1525
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1526
|
0
|
a1 = mol_copy.GetAtom(oechem.OEHasMapIdx(bond_map_idx[0]))
|
1527
|
0
|
a2 = mol_copy.GetAtom(oechem.OEHasMapIdx(bond_map_idx[1]))
|
1528
|
0
|
b = mol_copy.GetBond(a1, a2)
|
1529
|
0
|
if bo:
|
1530
|
0
|
b.SetData('WibergBondOrder', bo[i])
|
1531
|
0
|
opts.SetBondPropertyFunctor(LabelWibergBondOrder())
|
1532
|
0
|
atom_bond_set.AddAtom(a1)
|
1533
|
0
|
atom_bond_set.AddAtom(a2)
|
1534
|
0
|
atom_bond_set.AddBond(b)
|
1535
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1536
|
0
|
if isinstance(color, list):
|
1537
|
0
|
hcolor = oechem.OEColor(color[i])
|
1538
|
0
|
elif color is not None:
|
1539
|
0
|
hcolor = oechem.OEColor(color)
|
1540
|
|
else:
|
1541
|
0
|
hcolor = oechem.OEColor(oechem.OELightBlue)
|
1542
|
0
|
if align:
|
1543
|
0
|
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, mol_copy, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))
|
1544
|
0
|
oedepict.OEPrepareMultiAlignedDepiction(mol_copy, ref_mol, overlaps)
|
1545
|
0
|
disp = oedepict.OE2DMolDisplay(mol_copy, opts)
|
1546
|
0
|
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
|
1547
|
0
|
elif isinstance(bond_map_idx, tuple) and len(bond_map_idx) != 2:
|
1548
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1549
|
|
# Find all atoms and bonds between them
|
1550
|
0
|
for m1, m2 in itertools.combinations(bond_map_idx, 2):
|
1551
|
0
|
a1 = mol_copy.GetAtom(oechem.OEHasMapIdx(m1))
|
1552
|
0
|
a2 = mol_copy.GetAtom(oechem.OEHasMapIdx(m2))
|
1553
|
0
|
atom_bond_set.AddAtom(a1)
|
1554
|
0
|
atom_bond_set.AddAtom(a2)
|
1555
|
0
|
b = mol_copy.GetBond(a1, a2)
|
1556
|
0
|
if b:
|
1557
|
0
|
atom_bond_set.AddBond(b)
|
1558
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1559
|
0
|
if color is not None:
|
1560
|
0
|
hcolor = oechem.OEColor(color)
|
1561
|
|
else:
|
1562
|
0
|
hcolor = oechem.OEColor(oechem.OELightBlue)
|
1563
|
0
|
if align:
|
1564
|
0
|
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, mol_copy, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))
|
1565
|
0
|
oedepict.OEPrepareMultiAlignedDepiction(mol_copy, ref_mol, overlaps)
|
1566
|
0
|
disp = oedepict.OE2DMolDisplay(mol_copy, opts)
|
1567
|
0
|
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
|
1568
|
|
|
1569
|
0
|
elif isinstance(bond_map_idx, list):
|
1570
|
0
|
atom_bond_set = oechem.OEAtomBondSet()
|
1571
|
0
|
if any(isinstance(el,list) for el in bond_map_idx):
|
1572
|
|
# More than one bond is being highlighted
|
1573
|
0
|
for bm_idx in bond_map_idx[i]:
|
1574
|
0
|
a1 = mol_copy.GetAtom(oechem.OEHasMapIdx(bm_idx[0]))
|
1575
|
0
|
a2 = mol_copy.GetAtom(oechem.OEHasMapIdx(bm_idx[1]))
|
1576
|
0
|
b = mol_copy.GetBond(a1, a2)
|
1577
|
0
|
atom_bond_set.AddAtom(a1)
|
1578
|
0
|
atom_bond_set.AddAtom(a2)
|
1579
|
0
|
atom_bond_set.AddBond(b)
|
1580
|
|
else:
|
1581
|
0
|
a1 = mol_copy.GetAtom(oechem.OEHasMapIdx(bond_map_idx[i][0]))
|
1582
|
0
|
a2 = mol_copy.GetAtom(oechem.OEHasMapIdx(bond_map_idx[i][1]))
|
1583
|
0
|
b = mol_copy.GetBond(a1, a2)
|
1584
|
0
|
if bo:
|
1585
|
0
|
b.SetData('WibergBondOrder', bo[i])
|
1586
|
0
|
opts.SetBondPropertyFunctor(LabelWibergBondOrder())
|
1587
|
0
|
atom_bond_set.AddAtom(a1)
|
1588
|
0
|
atom_bond_set.AddAtom(a2)
|
1589
|
0
|
atom_bond_set.AddBond(b)
|
1590
|
0
|
hstyle = oedepict.OEHighlightStyle_BallAndStick
|
1591
|
0
|
if color is not None:
|
1592
|
0
|
hcolor = oechem.OEColor(color)
|
1593
|
|
else:
|
1594
|
0
|
hcolor = oechem.OEColor(oechem.OELightBlue)
|
1595
|
0
|
if align:
|
1596
|
0
|
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, mol_copy, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))
|
1597
|
0
|
oedepict.OEPrepareMultiAlignedDepiction(mol_copy, ref_mol, overlaps)
|
1598
|
|
# if not align_res:
|
1599
|
|
# print('Could not align')
|
1600
|
0
|
disp = oedepict.OE2DMolDisplay(mol_copy, opts)
|
1601
|
0
|
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
|
1602
|
|
else:
|
1603
|
0
|
if align:
|
1604
|
0
|
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, mol_copy, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))
|
1605
|
0
|
oedepict.OEPrepareMultiAlignedDepiction(mol_copy, ref_mol, overlaps)
|
1606
|
0
|
disp = oedepict.OE2DMolDisplay(mol_copy, opts)
|
1607
|
|
|
1608
|
|
#if align:
|
1609
|
|
# if align_res.IsValid()
|
1610
|
|
|
1611
|
0
|
if not b and bond_map_idx:
|
1612
|
0
|
warnings.warn("{} is not connected in molecule".format(bond_map_idx))
|
1613
|
|
#raise RuntimeError("{} is not connected in molecule".format(bond_map_idx))
|
1614
|
|
|
1615
|
|
|
1616
|
0
|
oedepict.OERenderMolecule(cell, disp)
|
1617
|
0
|
oedepict.OEDrawCurvedBorder(cell, oedepict.OELightGreyPen, 10.0)
|
1618
|
|
|
1619
|
0
|
oedepict.OEWriteReport(fname, report)
|