1
"""
2
cp_rdk.py
3

4
Cheminformatics tools using RDKit
5

6
The classes provided here follow the structure in adapters.
7
This is a wrapper allowing our actual package to use RDKit
8
"""
9

10 71
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
11 71
from rdkit import Chem
12

13

14
# =======================================
15
# Molecule Class
16
# =======================================
17

18 71
class Mol(MolAdapter):
19 71
    def __init__(self, mol):
20
        """
21
        Create a ChemPer Mol from an RDMol
22

23
        Parameters
24
        ----------
25
        mol : openeye RDKMol object
26
              openeye molecule to convert to ChemPer Mol object
27
        """
28 71
        if not isinstance(mol, Chem.rdchem.Mol):
29 71
            raise TypeError("Expecting an rdchem.Mol instead of %s" % type(mol))
30 71
        self.mol = mol
31

32 71
    def __str__(self): return self.get_smiles()
33

34 71
    @classmethod
35
    def from_smiles(cls, smiles):
36 71
        mol = Chem.MolFromSmiles(smiles)
37 71
        if mol is None:
38 71
            raise ValueError('Could not parse SMILES %s' % smiles)
39 71
        return cls(Chem.AddHs(mol))
40

41 71
    def set_aromaticity_mdl(self):
42 57
        Chem.SanitizeMol(self.mol, Chem.SANITIZE_ALL^Chem.SANITIZE_SETAROMATICITY)
43 57
        Chem.SetAromaticity(self.mol, Chem.AromaticityModel.AROMATICITY_MDL)
44

45 71
    def get_atoms(self):
46 71
        return [Atom(a) for a in self.mol.GetAtoms()]
47

48 71
    def get_atom_by_index(self, idx):
49 71
        return Atom(self.mol.GetAtomWithIdx(idx))
50

51 71
    def get_bonds(self):
52 71
        return [Bond(b) for b in self.mol.GetBonds()]
53

54 71
    def get_bond_by_index(self, idx):
55 71
        return Bond(self.mol.GetBondWithIdx(idx))
56

57 71
    def get_bond_by_atoms(self, atom1, atom2):
58 57
        if not atom1.is_connected_to(atom2):
59 57
            return None
60 57
        return Bond(self.mol.GetBondBetweenAtoms(atom1.get_index(), atom2.get_index()))
61

62 71
    def smirks_search(self, smirks):
63 71
        cmol = Chem.Mol(self.mol)
64

65 71
        matches = list()
66

67 71
        ss = Chem.MolFromSmarts(smirks)
68 71
        if ss is None:
69 71
            raise ValueError("Error parsing SMIRKS %s" % smirks)
70

71
        # get atoms in query mol with smirks index
72 71
        maps = dict()
73 71
        for qatom in ss.GetAtoms():
74 71
            smirks_idx = qatom.GetAtomMapNum()
75 71
            if smirks_idx != 0:
76 71
                maps[smirks_idx] = qatom.GetIdx()
77

78 71
        for match in cmol.GetSubstructMatches(ss, False):
79 71
            d = {k:self.get_atom_by_index(match[e]) for k,e in maps.items()}
80 71
            matches.append(d)
81

82 71
        return matches
83

84 71
    def get_smiles(self):
85 71
        smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol))
86 71
        return smiles
87

88

89
# =======================================
90
# Atom Class
91
# =======================================
92

93

94 71
class Atom(AtomAdapter):
95 71
    def __init__(self, atom):
96
        """
97
        Create a ChemPer Atom from an RDAtom
98

99
        Parameters
100
        ----------
101
        atom : RDKit Atom
102
               Atom object from an RDK molecule
103
        """
104 71
        if not isinstance(atom, Chem.rdchem.Atom):
105 71
            raise TypeError("Expecting a rdchem.Atom instead of %s" % type(atom))
106 71
        self.atom = atom
107 71
        self._idx = self.atom.GetIdx()
108

109 71
    def __str__(self): return '%i%s' % (self._idx, self.atom.GetSymbol())
110

111 71
    def atomic_number(self): return self.atom.GetAtomicNum()
112

113 71
    def degree(self): return self.atom.GetDegree()
114

115 71
    def connectivity(self): return self.atom.GetTotalDegree()
116

117 71
    def valence(self): return self.atom.GetTotalValence()
118

119 71
    def formal_charge(self): return self.atom.GetFormalCharge()
120

121 71
    def hydrogen_count(self):
122 71
        return self.atom.GetTotalNumHs(includeNeighbors=True)
123

124 71
    def ring_connectivity(self):
125 71
        return len([b for b in self.atom.GetBonds() if b.IsInRing()])
126

127 71
    def min_ring_size(self):
128 71
        if not self.atom.IsInRing():
129 71
            return 0
130 57
        for i in range(10000):
131 57
            if self.atom.IsInRingSize(i):
132 57
                return i
133
        # TODO: raise exception instead?
134 0
        return 10000
135

136 71
    def is_aromatic(self): return self.atom.GetIsAromatic()
137

138 71
    def get_index(self): return self._idx
139

140 71
    def is_connected_to(self, atom2):
141 71
        if not isinstance(atom2.atom, Chem.rdchem.Atom):
142 0
            return False
143 71
        neighbors = [a.GetIdx() for a in self.atom.GetNeighbors()]
144 71
        return atom2.get_index() in neighbors
145

146 71
    def get_neighbors(self):
147 71
        return [Atom(a) for a in self.atom.GetNeighbors()]
148

149 71
    def get_bonds(self):
150 71
        return [Bond(b) for b in self.atom.GetBonds()]
151

152 71
    def get_molecule(self):
153 71
        mol = Chem.Mol(self.atom.GetOwningMol())
154 71
        return Mol(mol)
155

156
# =======================================
157
# Bond Class
158
# =======================================
159

160

161 71
class Bond(BondAdapter):
162 71
    def __init__(self, bond):
163
        """
164
        Creates a ChemPer Bond from an RDK Bond
165

166
        Parameters
167
        ----------
168
        bond : RDK Bond
169
               Bond object from an RDK molecule
170
        """
171 71
        if not isinstance(bond, Chem.rdchem.Bond):
172 71
            raise TypeError("Expecting an rdchem.Bond instead of %s" % type(bond))
173 71
        self.bond = bond
174

175
        # save index
176 71
        self._idx = self.bond.GetIdx()
177

178
        # save order information
179 71
        self._order = self.bond.GetBondTypeAsDouble()
180 71
        orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
181 71
        self._order_symbol = orders.get(self._order, '~')
182

183
        # save atoms in bond
184 71
        self._beginning = Atom(self.bond.GetBeginAtom())
185 71
        self._end = Atom(self.bond.GetEndAtom())
186

187 71
    def __str__(self):
188 0
        return "%i %s%s%s" % (self.get_index(), self._beginning,
189
                              self._order_symbol, self._end)
190

191 71
    def get_order(self): return self._order
192

193 71
    def get_atoms(self): return [self._beginning, self._end]
194

195 71
    def is_ring(self): return self.bond.IsInRing()
196

197 71
    def is_aromatic(self): return self.bond.GetIsAromatic()
198

199 71
    def is_single(self): return self._order == 1
200

201 71
    def is_double(self): return self._order == 2
202

203 71
    def is_triple(self): return self._order == 3
204

205 71
    def get_molecule(self):
206 71
        mol = Chem.Mol(self.bond.GetOwningMol())
207 71
        return Mol(mol)
208

209 71
    def get_index(self): return self._idx
210

211
# =====================================================================
212
# functions for importing molecules from files
213
# =====================================================================
214

215 71
def mols_from_mol2(mol2_file):
216
    """
217
    Parses a mol2 file into ChemPer molecules using RDKit
218

219
    This is a hack for separating mol2 files taken from a Source Forge discussion here:
220
    https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01510.html
221
    It splits up a mol2 file into blocks and then uses RDKit to parse those blocks
222

223
    Parameters
224
    ----------
225
    mol2_file : str
226
                relative or absolute path to a mol2 file you want to parse
227
                accessible form the current directory
228

229
    Returns
230
    -------
231
    mols : list[ChemPer Mol]
232
           list of molecules in the mol2 file as ChemPer molecules
233
    """
234
    # TODO: check that this works with mol2 files with a single molecule
235
    # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type
236 71
    import os
237

238 71
    if not os.path.exists(mol2_file):
239 71
        from chemper.chemper_utils import get_data_path
240 71
        mol_path = get_data_path(os.path.join('molecules', mol2_file))
241

242 71
        if not os.path.exists(mol_path):
243 0
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol2_file)
244
        else:
245 71
            mol2_file = mol_path
246

247 71
    delimiter="@<TRIPOS>MOLECULE"
248

249 71
    if mol2_file.split('.')[-1] != "mol2":
250 0
        raise IOError("File '%s' is not a mol2 file" % mol2_file)
251

252 71
    if not os.path.exists(mol2_file):
253 0
        raise IOError("File '%s' not found." % mol2_file)
254

255 71
    molecules = list()
256 71
    mol2_block = list()
257

258 71
    file_open = open(mol2_file)
259

260 71
    for line in file_open:
261 71
        if line.startswith(delimiter) and mol2_block:
262 71
            rdmol = Chem.MolFromMol2Block("".join(mol2_block))
263 71
            if rdmol is not None:
264 71
                molecules.append(Mol(rdmol))
265 71
            mol2_block = []
266 71
        mol2_block.append(line)
267 71
    if mol2_block:
268 71
        rdmol = Chem.MolFromMol2Block("".join(mol2_block))
269 71
        if rdmol is not None:
270 71
            molecules.append(Mol(rdmol))
271

272 71
    file_open.close()
273 71
    return molecules
274

Read our documentation on viewing source code .

Loading