MobleyLab / chemper
1
"""
2
cp_rdk.py
3

4
Cheminformatics tools using RDKit
5

6
The classes provided here follow the structure in adapters.
7
This is a wrapper allowing our actual package to use RDKit
8

9
AUTHORS:
10

11
Caitlin C. Bannan <bannanc@uci.edu>, Mobley Group, University of California Irvine
12
"""
13

14 0
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
15 0
from rdkit import Chem
16

17

18
# =======================================
19
# Molecule Class
20
# =======================================
21

22 0
class Mol(MolAdapter):
23 0
    def __init__(self, mol):
24
        """
25
        Create a ChemPer Mol from an RDMol
26

27
        Parameters
28
        ----------
29
        mol : openeye RDKMol object
30
              openeye molecule to convert to ChemPer Mol object
31
        """
32 0
        if not isinstance(mol, Chem.rdchem.Mol):
33 0
            raise TypeError("Expecting an rdchem.Mol instead of %s" % type(mol))
34 0
        self.mol = mol
35

36 0
    def __str__(self): return self.get_smiles()
37

38 0
    @classmethod
39
    def from_smiles(cls, smiles):
40 0
        mol = Chem.MolFromSmiles(smiles)
41 0
        if mol is None:
42 0
            raise ValueError('Could not parse SMILES %s' % smiles)
43 0
        cls(Chem.AddHs(mol))
44

45 0
    def set_aromaticity_mdl(self):
46 0
        Chem.SanitizeMol(self.mol, Chem.SANITIZE_ALL^Chem.SANITIZE_SETAROMATICITY)
47 0
        Chem.SetAromaticity(self.mol, Chem.AromaticityModel.AROMATICITY_MDL)
48

49 0
    def get_atoms(self):
50 0
        return [Atom(a) for a in self.mol.GetAtoms()]
51

52 0
    def get_atom_by_index(self, idx):
53 0
        return Atom(self.mol.GetAtomWithIdx(idx))
54

55 0
    def get_bonds(self):
56 0
        return [Bond(b) for b in self.mol.GetBonds()]
57

58 0
    def get_bond_by_index(self, idx):
59 0
        return Bond(self.mol.GetBondWithIdx(idx))
60

61 0
    def get_bond_by_atoms(self, atom1, atom2):
62 0
        if not atom1.is_connected_to(atom2):
63 0
            return None
64 0
        return Bond(self.mol.GetBondBetweenAtoms(atom1.get_index(), atom2.get_index()))
65

66 0
    def smirks_search(self, smirks):
67 0
        cmol = Chem.Mol(self.mol)
68

69 0
        matches = list()
70

71 0
        ss = Chem.MolFromSmarts(smirks)
72 0
        if ss is None:
73 0
            raise ValueError("Error parsing SMIRKS %s" % smirks)
74

75
        # get atoms in query mol with smirks index
76 0
        maps = dict()
77 0
        for qatom in ss.GetAtoms():
78 0
            smirks_idx = qatom.GetAtomMapNum()
79 0
            if smirks_idx != 0:
80 0
                maps[smirks_idx] = qatom.GetIdx()
81

82 0
        for match in cmol.GetSubstructMatches(ss, False):
83 0
            d = {k:self.get_atom_by_index(match[e]) for k,e in maps.items()}
84 0
            matches.append(d)
85

86 0
        return matches
87

88 0
    def get_smiles(self):
89 0
        smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol))
90 0
        return smiles
91

92

93
# =======================================
94
# Atom Class
95
# =======================================
96

97

98 0
class Atom(AtomAdapter):
99 0
    def __init__(self, atom):
100
        """
101
        Create a ChemPer Atom from an RDAtom
102

103
        Parameters
104
        ----------
105
        atom : RDKit Atom
106
               Atom object from an RDK molecule
107
        """
108 0
        if not isinstance(atom, Chem.rdchem.Atom):
109 0
            raise TypeError("Expecting a rdchem.Atom instead of %s" % type(atom))
110 0
        self.atom = atom
111 0
        self._idx = self.atom.GetIdx()
112

113 0
    def __str__(self): return '%i%s' % (self._idx, self.atom.GetSymbol())
114

115 0
    def atomic_number(self): return self.atom.GetAtomicNum()
116

117 0
    def degree(self): return self.atom.GetDegree()
118

119 0
    def connectivity(self): return self.atom.GetTotalDegree()
120

121 0
    def valence(self): return self.atom.GetTotalValence()
122

123 0
    def formal_charge(self): return self.atom.GetFormalCharge()
124

125 0
    def hydrogen_count(self):
126 0
        return self.atom.GetTotalNumHs(includeNeighbors=True)
127

128 0
    def ring_connectivity(self):
129 0
        return len([b for b in self.atom.GetBonds() if b.IsInRing()])
130

131 0
    def min_ring_size(self):
132 0
        if not self.atom.IsInRing():
133 0
            return 0
134 0
        for i in range(10000):
135 0
            if self.atom.IsInRingSize(i):
136 0
                return i
137
        # TODO: raise exception instead?
138 0
        return 10000
139

140 0
    def is_aromatic(self): return self.atom.GetIsAromatic()
141

142 0
    def get_index(self): return self._idx
143

144 0
    def is_connected_to(self, atom2):
145 0
        if not isinstance(atom2.atom, Chem.rdchem.Atom):
146 0
            return False
147 0
        neighbors = [a.GetIdx() for a in self.atom.GetNeighbors()]
148 0
        return atom2.get_index() in neighbors
149

150 0
    def get_neighbors(self):
151 0
        return [Atom(a) for a in self.atom.GetNeighbors()]
152

153 0
    def get_bonds(self):
154 0
        return [Bond(b) for b in self.atom.GetBonds()]
155

156 0
    def get_molecule(self):
157 0
        mol = Chem.Mol(self.atom.GetOwningMol())
158 0
        return Mol(mol)
159

160
# =======================================
161
# Bond Class
162
# =======================================
163

164

165 0
class Bond(BondAdapter):
166 0
    def __init__(self, bond):
167
        """
168
        Creates a ChemPer Bond from an RDK Bond
169

170
        Parameters
171
        ----------
172
        bond : RDK Bond
173
               Bond object from an RDK molecule
174
        """
175 0
        if not isinstance(bond, Chem.rdchem.Bond):
176 0
            raise TypeError("Expecting an rdchem.Bond instead of %s" % type(bond))
177 0
        self.bond = bond
178

179
        # save index
180 0
        self._idx = self.bond.GetIdx()
181

182
        # save order information
183 0
        self._order = self.bond.GetBondTypeAsDouble()
184 0
        orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
185 0
        self._order_symbol = orders.get(self._order, '~')
186

187
        # save atoms in bond
188 0
        self._beginning = Atom(self.bond.GetBeginAtom())
189 0
        self._end = Atom(self.bond.GetEndAtom())
190

191 0
    def __str__(self):
192 0
        return "%i %s%s%s" % (self.get_index(), self._beginning,
193
                              self._order_symbol, self._end)
194

195 0
    def get_order(self): return self._order
196

197 0
    def get_atoms(self): return [self._beginning, self._end]
198

199 0
    def is_ring(self): return self.bond.IsInRing()
200

201 0
    def is_aromatic(self): return self.bond.GetIsAromatic()
202

203 0
    def is_single(self): return self._order == 1
204

205 0
    def is_double(self): return self._order == 2
206

207 0
    def is_triple(self): return self._order == 3
208

209 0
    def get_molecule(self):
210 0
        mol = Chem.Mol(self.bond.GetOwningMol())
211 0
        return Mol(mol)
212

213 0
    def get_index(self): return self._idx
214

215
# =====================================================================
216
# functions for importing molecules from files
217
# =====================================================================
218

219 0
def mols_from_mol2(mol2_file):
220
    """
221
    Parses a mol2 file into ChemPer molecules using RDKit
222

223
    This is a hack for separating mol2 files taken from a Source Forge discussion here:
224
    https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01510.html
225
    It splits up a mol2 file into blocks and then uses RDKit to parse those blocks
226

227
    Parameters
228
    ----------
229
    mol2_file : str
230
                relative or absolute path to a mol2 file you want to parse
231
                accessible form the current directory
232

233
    Returns
234
    -------
235
    mols : list[ChemPer Mol]
236
           list of molecules in the mol2 file as ChemPer molecules
237
    """
238
    # TODO: check that this works with mol2 files with a single molecule
239
    # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type
240 0
    import os
241

242 0
    if not os.path.exists(mol2_file):
243 0
        from chemper.chemper_utils import get_data_path
244 0
        mol_path = get_data_path(os.path.join('molecules', mol2_file))
245

246 0
        if not os.path.exists(mol_path):
247 0
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol2_file)
248
        else:
249 0
            mol2_file = mol_path
250

251 0
    delimiter="@<TRIPOS>MOLECULE"
252

253 0
    if mol2_file.split('.')[-1] != "mol2":
254 0
        raise IOError("File '%s' is not a mol2 file" % mol2_file)
255

256 0
    if not os.path.exists(mol2_file):
257 0
        raise IOError("File '%s' not found." % mol2_file)
258

259 0
    molecules = list()
260 0
    mol2_block = list()
261

262 0
    file_open = open(mol2_file)
263

264 0
    for line in file_open:
265 0
        if line.startswith(delimiter) and mol2_block:
266 0
            rdmol = Chem.MolFromMol2Block("".join(mol2_block))
267 0
            if rdmol is not None:
268 0
                molecules.append(Mol(rdmol))
269 0
            mol2_block = []
270 0
        mol2_block.append(line)
271 0
    if mol2_block:
272 0
        rdmol = Chem.MolFromMol2Block("".join(mol2_block))
273 0
        if rdmol is not None:
274 0
            molecules.append(Mol(rdmol))
275

276 0
    file_open.close()
277 0
    return molecules
278

Read our documentation on viewing source code .

Loading