1
"""
2
cp_openeye.py
3

4
Cheminformatics tools using OpenEye Toolkits
5

6
The classes provided here follow the structure in adapters.
7
This is a wrapper allowing our actual package to use openeye toolkits
8

9
AUTHORS:
10

11
Caitlin C. Bannan <bannanc@uci.edu>, Mobley Group, University of California Irvine
12
"""
13

14 3
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
15 3
from openeye import oechem
16

17

18
# =======================================
19
# Molecule Class
20
# =======================================
21

22 3
class Mol(MolAdapter):
23
    """
24
    Wrapper for OEMol to create a chemper Mol
25
    """
26 3
    def __init__(self, mol):
27
        """
28
        Parameters
29
        ----------
30
        mol: openeye OEMol object
31
            openeye molecule to convert to chemper Mol object
32
        """
33 3
        if not isinstance(mol, oechem.OEMolBase):
34 3
            raise TypeError("Expecting an OEMol object instead of %s" % type(mol))
35 3
        self.mol = mol
36

37 3
    def __str__(self): return self.get_smiles()
38

39 3
    def set_aromaticity_mdl(self):
40
        """
41
        Sets the aromaticity flags in this molecule to use the MDL model
42
        """
43 3
        oechem.OEClearAromaticFlags(self.mol)
44 3
        oechem.OEAssignAromaticFlags(self.mol, oechem.OEAroModel_MDL)
45 3
        oechem.OEAssignHybridization(self.mol)
46

47 3
    def get_atoms(self):
48
        """
49
        Returns
50
        -------
51
        atom_list: list of chemper Atoms
52
            list of all atoms in the molecule
53
        """
54 3
        return [Atom(a) for a in self.mol.GetAtoms()]
55

56 3
    def get_atom_by_index(self, idx):
57
        """
58
        Parameters
59
        ----------
60
        idx: int
61
            atom index
62

63
        Returns
64
        -------
65
        atom: chemper Atom object
66
            atom with index idx
67
        """
68 3
        return Atom(self.mol.GetAtom(oechem.OEHasAtomIdx(idx)))
69

70 3
    def get_bonds(self):
71
        """
72
        Returns
73
        -------
74
        bond_list: list of chemper Bonds
75
            list of all bonds in the molecule
76
        """
77 3
        return [Bond(b) for b in self.mol.GetBonds()]
78

79 3
    def get_bond_by_index(self, idx):
80
        """
81
        Parameters
82
        ----------
83
        idx: ing
84
            bond index
85

86
        Returns
87
        -------
88
        bond: chemper Bond object
89
            bond with index idx
90
        """
91 3
        return Bond(self.mol.GetBond(oechem.OEHasBondIdx(idx)))
92

93 3
    def get_bond_by_atoms(self, atom1, atom2):
94
        """
95
        Finds a bond between two atoms
96
        Parameters
97
        ----------
98
        atom1: chemper Atom object
99
        atom2: chemper Atom object
100

101
        Returns
102
        -------
103
        bond: chemper Bond object or None
104
            if atoms are connected returns bond otherwise None
105
        """
106 3
        if not atom1.is_connected_to(atom2):
107 3
            return None
108 3
        return Bond(self.mol.GetBond(atom1.atom, atom2.atom))
109

110 3
    def smirks_search(self, smirks):
111
        """
112
        Performs a substructure search on the molecule with the provided
113
        SMIRKS pattern. Note - this function expects SMIRKS patterns with indexed atoms
114
        that is with :n for at least some atoms.
115

116
        Parameters
117
        ----------
118
        smirks: str
119
            SMIRKS pattern with indexed atoms (:n)
120
        Returns
121
        -------
122
        matches: list of dictionaries
123
            dictionary for each match with the form {smirks index: atom index}
124
        """
125 3
        cmol = oechem.OEMol(self.mol)
126

127 3
        matches = list()
128

129 3
        ss = oechem.OESubSearch()
130 3
        if not ss.Init(smirks):
131 3
            raise ValueError("Error parsing SMIRKS %s" % smirks)
132

133
        # set maximum matches in substructure search to infinite (0 in API)
134 3
        ss.SetMaxMatches(0)
135 3
        for match in ss.Match(cmol, False):
136 3
            d = dict()
137 3
            for ma in match.GetAtoms():
138 3
                smirks_idx = ma.pattern.GetMapIdx()
139
                # if the map index is 0 then it isn't a "tagged" atom in the SMIRKS
140 3
                if smirks_idx !=0:
141 3
                    d[smirks_idx] = self.get_atom_by_index(ma.target.GetIdx())
142

143 3
            matches.append(d)
144

145 3
        return matches
146

147 3
    def get_smiles(self):
148
        """
149
        Returns
150
        -------
151
        smiles: str
152
            SMILES string for the molecule
153
        """
154 3
        smiles = oechem.OEMolToSmiles(self.mol)
155 3
        return smiles
156

157 3
class MolFromSmiles(Mol):
158
    """
159
    Creates a chemper Mol from a smiles string
160
    It automatically adds explicit hydrogens.
161
    """
162 3
    def __init__(self, smiles):
163
        """
164
        Parameters
165
        ----------
166
        smiles: str
167
            SMILES string for a molecule
168
        """
169 3
        mol = oechem.OEMol()
170 3
        if not oechem.OESmilesToMol(mol, smiles):
171 3
            raise ValueError('Could not parse SMILES %s' % smiles)
172 3
        oechem.OEAddExplicitHydrogens(mol)
173 3
        Mol.__init__(self, mol)
174

175
# =======================================
176
# Atom Class
177
# =======================================
178

179 3
class Atom(AtomAdapter):
180
    """
181
    Wrapper for OEAtomBase to create a chemper Atom
182
    """
183 3
    def __init__(self, atom):
184
        """
185
        Parameters
186
        ----------
187
        atom: OEAtomBase
188
            Atom object from an OpenEye molecule
189
        """
190 3
        if not isinstance(atom, oechem.OEAtomBase):
191 3
            raise TypeError("Expecting an OEAtomBase object instead of %s" % type(atom))
192 3
        self.atom = atom
193 3
        self._idx = self.atom.GetIdx()
194

195 3
    def __str__(self): return "%i%s" % (self._idx,
196
                                        oechem.OEGetAtomicSymbol(self.atomic_number()))
197

198 3
    def atomic_number(self):
199
        """
200
        Returns
201
        -------
202
        atomic_number: int
203
            atomic number for the atom
204
        """
205 3
        return self.atom.GetAtomicNum()
206

207 3
    def degree(self):
208
        """
209
        Returns
210
        -------
211
        degree: int
212
            degree or number of explicit bonds around the atom
213
        """
214 3
        return self.atom.GetDegree()
215

216 3
    def connectivity(self):
217
        """
218
        Returns
219
        -------
220
        connectivity: int
221
            connectivity or total number of bonds around the atom
222
        """
223 3
        return len([b for b in self.atom.GetBonds()])
224

225 3
    def valence(self):
226
        """
227
        Returns
228
        -------
229
        valence: int
230
            the atoms valence
231
        """
232 3
        return self.atom.GetValence()
233

234 3
    def formal_charge(self):
235
        """
236
        Returns
237
        -------
238
        formal_charge: int
239
            the atom's formal charge
240
        """
241 3
        return self.atom.GetFormalCharge()
242

243 3
    def hydrogen_count(self):
244
        """
245
        Returns
246
        -------
247
        H_count: int
248
            total number of hydrogen atoms connected to this Atom
249
        """
250 3
        return self.atom.GetTotalHCount()
251

252 3
    def ring_connectivity(self):
253
        """
254
        Returns
255
        -------
256
        ring_connectivity: int
257
            number of bonds on the atom that are a part of a ring
258
        """
259 3
        return len([b for b in self.atom.GetBonds(oechem.OEBondIsInRing())])
260

261 3
    def min_ring_size(self):
262
        """
263
        Returns
264
        -------
265
        min_ring_size: int
266
            size of the smallest ring this atom is a part of
267
        """
268 3
        return oechem.OEAtomGetSmallestRingSize(self.atom)
269

270 3
    def is_aromatic(self):
271
        """
272
        Returns
273
        -------
274
        is_aromatic: boolean
275
            True if the atom is aromatic otherwise False
276
        """
277 3
        return self.atom.IsAromatic()
278

279 3
    def get_index(self):
280
        """
281
        Returns
282
        -------
283
        index: int
284
            atom index in its molecule
285
        """
286 3
        return self._idx
287

288 3
    def is_connected_to(self, atom2):
289
        """
290
        Parameters
291
        ----------
292
        atom2: chemper Atom object
293
            atom to check if it is connected to this atom
294

295
        Returns
296
        -------
297
        connected: boolean
298
            True if atom2 is a direct neighbor or atom1
299
        """
300 3
        if not isinstance(atom2.atom, oechem.OEAtomBase):
301 0
            return False
302 3
        return self.atom.IsConnected(atom2.atom)
303

304 3
    def get_neighbors(self):
305
        """
306
        Returns
307
        -------
308
        neighbors: list of chemper Atoms
309
            atoms that are one bond away from this atom
310
        """
311 3
        return [Atom(a) for a in self.atom.GetAtoms()]
312

313 3
    def get_bonds(self):
314
        """
315
        Returns
316
        -------
317
        bonds: list of chemper Bonds
318
            bonds connected to this atom
319
        """
320 3
        return [Bond(b) for b in self.atom.GetBonds()]
321

322 3
    def get_molecule(self):
323
        """
324
        Extracts the parent molecule this atom is in
325

326
        Returns
327
        -------
328
        mol: chemper Mol
329
            molecule this atom is stored in
330
        """
331 3
        mol = oechem.OEMol(self.atom.GetParent())
332 3
        self.atom = mol.GetAtom(oechem.OEHasAtomIdx(self._idx))
333 3
        return Mol(mol)
334

335
# =======================================
336
# Bond Class
337
# =======================================
338

339

340 3
class Bond(BondAdapter):
341
    """
342
    Wrapper for OEBondBase to create a chemper Bond
343
    """
344 3
    def __init__(self, bond):
345
        """
346
        Parameters
347
        ----------
348
        bond: OEBondBase
349
            Bond object from an OpenEye molecule
350
        """
351 3
        if not isinstance(bond, oechem.OEBondBase):
352 3
            raise TypeError("Expecting an OEBondBase object instead of %s" % type(bond))
353 3
        self.bond = bond
354

355
        # save index
356 3
        self._idx = self.bond.GetIdx()
357

358
        # store order information
359 3
        self._order = self.bond.GetOrder()
360 3
        if self.is_aromatic():
361 3
            self._order = 1.5
362

363 3
        orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
364 3
        self._order_symbol = orders.get(self._order, '~')
365

366
        # save atoms in bond
367 3
        self.beginning = Atom(self.bond.GetBgn())
368 3
        self.end = Atom(self.bond.GetEnd())
369

370 3
    def __str__(self):
371 0
        return "%i %s%s%s" % (self.get_index(), self.beginning,
372
                              self._order_symbol, self.end)
373

374 3
    def get_order(self):
375
        """
376
        Returns
377
        -------
378
        order: int or float
379
            This is the absolute order, returns 1.5 if bond is aromatic
380
        """
381 3
        return self._order
382

383 3
    def get_atoms(self):
384
        """
385
        Returns
386
        -------
387
        atoms: list of chemper Atoms
388
            the two atoms connected by this bond
389
        """
390 3
        return [self.beginning, self.end]
391

392 3
    def is_ring(self):
393
        """
394
        Returns
395
        -------
396
        is_ring: boolean
397
            True if bond is a part of a ring, otherwise False
398
        """
399 3
        return self.bond.IsInRing()
400

401 3
    def is_aromatic(self):
402
        """
403
        Returns
404
        -------
405
        is_aromatic: boolean
406
            True if it is an aromatic bond
407
        """
408 3
        return self.bond.IsAromatic()
409

410 3
    def is_single(self):
411
        """
412
        Returns
413
        -------
414
        is_single: boolean
415
            True if it is a single bond
416
        """
417 3
        return self._order == 1
418

419 3
    def is_double(self):
420
        """
421
        Returns
422
        -------
423
        is_double: boolean
424
            True if it is a double bond
425
        """
426 3
        return self._order == 2
427

428 3
    def is_triple(self):
429
        """
430
        Returns
431
        -------
432
        is_triple: boolean
433
            True if it is a triple bond
434
        """
435 3
        return self._order == 3
436

437 3
    def get_molecule(self):
438
        """
439
        Extracts the parent molecule this bond is in
440

441
        Returns
442
        -------
443
        mol: chemper Mol
444
            molecule this bond is stored in
445
        """
446 3
        mol = oechem.OEMol(self.bond.GetParent())
447 3
        self.bond = mol.GetBond(oechem.OEHasBondIdx(self._idx))
448 3
        return Mol(mol)
449

450 3
    def get_index(self):
451
        """
452
        Returns
453
        -------
454
        index: int
455
            index of this bond in its parent molecule
456
        """
457 3
        return self._idx
458

459
# =====================================================================
460
# functions for importing molecules from files
461
# =====================================================================
462

463 3
def mols_from_mol2(mol2_file):
464 3
    return mols_from_file(mol2_file)
465

466 3
def mols_from_file(mol_file):
467
    """
468
    Parses a standard molecule file into chemper molecules using OpenEye toolkits
469

470
    Parameters
471
    ----------
472
    mol_file: str
473
              relative or full path to molecule containing the molecule file
474
              that is accessible from the current working directory
475

476
    Returns
477
    -------
478
    mols: list of chemper Mols
479
          list of molecules in the mol2 file as chemper Mols
480
    """
481 3
    import os
482 3
    if not os.path.exists(mol_file):
483 3
        from chemper.chemper_utils import get_data_path
484 3
        mol_path = get_data_path(os.path.join('molecules', mol_file))
485

486 3
        if not os.path.exists(mol_path):
487 0
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol_file)
488
        else:
489 3
            mol_file = mol_path
490

491 3
    molecules = list()
492

493
    # make Openeye input file stream
494 3
    ifs = oechem.oemolistream(mol_file)
495

496 3
    oemol = oechem.OECreateOEGraphMol()
497 3
    while oechem.OEReadMolecule(ifs, oemol):
498
        # if an SD file, the molecule name may be in the SD tags
499 3
        if oemol.GetTitle() == '':
500 0
            name = oechem.OEGetSDData(oemol, 'name').strip()
501 0
            oemol.SetTitle(name)
502
        # Append to list.
503 3
        molecules.append(Mol(oechem.OEMol(oemol)))
504 3
    ifs.close()
505

506 3
    return molecules
507

Read our documentation on viewing source code .

Loading