MobleyLab / chemper
1
"""
2
single_graph.py
3

4
ChemPerGraph is a class for storing smirks decorators for a molecular fragment.
5
These can be used to convert a molecular sub-graph or an entire molecule into a SMIRKS
6
pattern with all decorators specified.
7

8
For example, imagine you want a SMIRKS for the carbon in methane, it would become:
9

10
"[#6AH4X4x0!r+0:1]"
11

12
with decorators:
13
#6: atomic number 6 for carbon
14
A: aliphatic (a would be aromatic)
15
H4: a total hydrogen count of 4, 4 neighbors are hydrogen
16
X4: connectivity of 4, that is number of neighbors, not valence or sum of bond orders
17
x0: ring connectivity of 0, no ring bonds
18
!r: not in a ring, for atoms in a ring this decorator is `rn` where n is the size of the smallest ring
19
+0: 0 formal charge
20

21
To the best of the authors knowledge, this is the first open source tool capable
22
of converting a molecule (or sub-graph) into a detailed SMIRKS pattern.
23
"""
24

25 151
import networkx as nx
26 151
from functools import total_ordering
27 151
from chemper.mol_toolkits import mol_toolkit
28

29

30 151
@total_ordering
31
class SingleGraph:
32
    """
33
    ChemPerGraphs are a graph based class for storing atom and bond information.
34
    They use the chemper.mol_toolkits Atoms, Bonds, and Mols
35
    """
36 151
    @total_ordering
37
    class AtomStorage:
38
        """
39
        AtomStorage tracks information about an atom
40
        """
41 151
        def __init__(self, atom=None, label=None):
42
            """
43
            Initializes AtomStorage based on a provided atom
44

45
            Parameters
46
            ----------
47
            atom : chemper Atom object
48
            label : int
49
                integer for labeling this atom in a SMIRKS
50
                or if negative number just used to track the atom locally
51
            """
52 151
            self.atom = atom
53

54 151
            if atom is None:
55 151
                self.atomic_number = None
56 151
                self.aromatic = None
57 151
                self.charge = None
58 151
                self.hydrogen_count = None
59 151
                self.connectivity = None
60 151
                self.ring_connectivity = None
61 151
                self.min_ring_size = None
62 151
                self.atom_index = None
63

64
            else:
65 151
                self.atomic_number = atom.atomic_number()
66 151
                self.aromatic = atom.is_aromatic()
67 151
                self.charge = atom.formal_charge()
68 151
                self.hydrogen_count = atom.hydrogen_count()
69 151
                self.connectivity = atom.connectivity()
70 151
                self.ring_connectivity = atom.ring_connectivity()
71 151
                self.min_ring_size = atom.min_ring_size()
72 151
                self.atom_index = atom.get_index()
73

74 151
            self.label = label
75

76 151
        def __lt__(self, other):
77
            """
78
            Overrides the default implementation
79
            This method was primarily written for making SMIRKS patterns predictable.
80
            If atoms are sortable, then the SMIRKS patterns are always the same making
81
            tests easier to write. However, the specific sorting was created to also make SMIRKS
82
            output as human readable as possible, that is to at least make it easier for a
83
            human to see how the indexed atoms are related to each other.
84
            It is typically easier for humans to read SMILES/SMARTS/SMIRKS with less branching (indicated with ()).
85

86
            For example in:
87
            [C:1]([H])([H])~[N:2]([C])~[O:3]
88
            it is easier to see that the atoms C~N~O are connected in a "line" instead of:
89
            [C:1]([N:2]([O:3])[C])([H])[H]
90
            which is equivalent, but with all the () it is hard for a human to read the branching
91

92
            Parameters
93
            ----------
94
            other : AtomStorage
95

96
            Returns
97
            -------
98
            is_less_than : boolean
99
                self is less than other
100
            """
101
            # if either smirks index is None, then you can't directly compare
102
            # make a temporary index that is negative if it was None
103 151
            self_index = self.label if self.label is not None else -1000
104 151
            other_index = other.label if other.label is not None else -1000
105
            # if either index is greater than 0, the one that is largest should go at the end of the list
106 151
            if self_index > 0 or other_index > 0:
107 151
                return self_index < other_index
108

109
            # Both SMIRKS indices are not positive or None so compare the SMIRKS patterns instead
110 151
            return self.as_smirks() < other.as_smirks()
111

112 151
        def __eq__(self, other): return self.as_smirks() == other.as_smirks() and self.label == other.label
113

114 151
        def __hash__(self): return id(self)
115

116 151
        def __str__(self): return self.as_smirks()
117

118 151
        def as_smirks(self, compress=False):
119
            """
120
            Parameters
121
            -----------
122
            compress : bool
123
                Creates a compressed version of the SMIRKS with only
124
                the atomic number and atom index no other decorators
125

126
            Returns
127
            -------
128
            smirks : str
129
                how this atom would be represented in a SMIRKS string
130
            """
131 151
            if self.atom is None:
132 151
                if self.label is None or self.label <= 0:
133 151
                    return '[*]'
134 151
                return '[*:%i]' % self.label
135

136 151
            aromatic = 'a' if self.aromatic else 'A'
137 151
            if self.charge >= 0:
138 151
                charge = '+%i' % self.charge
139
            else:
140 151
                charge = '%i' % self.charge
141 151
            if self.min_ring_size == 0:
142 151
                ring = '!r'
143
            else:
144 151
                ring = 'r%i' % self.min_ring_size
145

146 151
            if compress:
147 151
                base_smirks = "#%i" % self.atomic_number
148
            else:
149 151
                base_smirks = '#%i%sH%iX%ix%i%s%s' % (self.atomic_number,
150
                                                      aromatic,
151
                                                      self.hydrogen_count,
152
                                                      self.connectivity,
153
                                                      self.ring_connectivity,
154
                                                      ring,
155
                                                      charge)
156

157 151
            if self.label is None or self.label <= 0:
158 151
                return '[%s]' % base_smirks
159

160 151
            return '[%s:%i]' % (base_smirks, self.label)
161

162 151
    @total_ordering
163
    class BondStorage:
164
        """
165
        BondStorage tracks information about a bond
166
        """
167 151
        def __init__(self, bond=None, label=None):
168
            """
169
            Parameters
170
            ----------
171
            bond : chemper Bond object
172
            label : int or float
173
                Bonds don't have SMIRKS indices so this is only used for internal
174
                tracking of the object.
175
            """
176 151
            if bond is None:
177 151
                self.order = None
178 151
                self.ring = None
179 151
                self.bond_index = None
180
            else:
181 151
                self.order = bond.get_order()
182 151
                self.ring = bond.is_ring()
183 151
                self.bond_index = bond.get_index()
184

185 151
            self._bond = bond
186 151
            self.label = label
187

188 151
        def __str__(self): return self.as_smirks()
189

190 151
        def __lt__(self, other):
191 0
            if self.as_smirks() == other.as_smirks():
192 0
                return self.label < other.label
193 0
            return self.as_smirks() < other.as_smirks()
194

195 151
        def __eq__(self, other):
196 0
            return self.label == other.label and self.as_smirks() == other.as__smirks()
197

198 151
        def __hash__(self): return id(self)
199

200 151
        def as_smirks(self):
201
            """
202
            Returns
203
            -------
204
            SMIRKS : str
205
                how this bond should appear in a SMIRKS string
206
            """
207 151
            if self.ring is None:
208 151
                ring = ''
209 151
            elif self.ring:
210 0
                ring = '@'
211
            else:
212 151
                ring = '!@'
213

214 151
            order = {1:'-', 1.5:':', 2:'=', 3:'#', None:'~'}.get(self.order)
215

216 151
            return order+ring
217

218 151
    def __init__(self, mol=None, smirks_atoms=None, layers=0):
219
        """
220
        Parameters
221
        ----------
222
        mol : Mol
223
            this can be a chemper mol or a molecule from any supported toolkit
224
            (currently OpenEye or RDKit)
225
        smirks_atoms : tuple of integers
226
            This is a tuple of the atom indices which will have SMIRKS indices.
227
            For example, if (1,2) is provided then the atom in molecule with indices
228
            1 and 2 will be used to create a SMIRKS with two indexed atoms.
229
        layers : int or 'all'
230
            how many atoms out from the smirks indexed atoms do you wish save (default=0)
231
            'all' will lead to all atoms in the molecule being specified
232
        """
233 151
        self._graph = nx.Graph()
234 151
        self.atom_by_label = dict() # stores a dictionary of atoms by label
235 151
        self.bond_by_label = dict() # stores a dictionary of bonds by label
236 151
        self.atom_by_index = dict()
237

238 151
        if mol is None:
239 151
            self.mol = None
240 151
            if smirks_atoms is not None:
241 0
                raise TypeError("Must provide a molecule if smirks_atoms are specified/")
242

243
        else:
244 151
            self.mol = mol_toolkit.Mol(mol)
245 151
            if smirks_atoms is None:
246 0
                raise TypeError("Must provide smirks_atoms when a molecule is given")
247

248 151
            self._add_smirks_atoms(smirks_atoms)
249
            # loop over indexed atoms and then add layers to each
250
            # note: the keys must be pulled out first because the
251
            #       atom_by_label dictionary is updated when layers are added
252 151
            keys = list(self.atom_by_label.keys())
253 151
            for smirks_key in keys:
254 151
                atom_storage = self.atom_by_label[smirks_key]
255 151
                self._add_layers(atom_storage, layers)
256

257 151
    def __str__(self): return self.as_smirks()
258

259 151
    def __lt__(self, other): return self.as_smirks() < other.as_smirks()
260

261 151
    def __eq__(self, other): return self.as_smirks() == self.as_smirks()
262

263 151
    def __hash__(self): return id(self)
264

265 151
    def as_smirks(self, compress=False):
266
        """
267
        Parameters
268
        ----------
269
        compress : boolean
270
            returns the shorter version of atom SMIRKS patterns
271
            that is the atoms only include atomic numbers rather
272
            than the full list of decorators
273

274
        Returns
275
        -------
276
        SMIRKS : str
277
            a SMIRKS string matching the exact atom and bond information stored
278
        """
279

280
        # If no atoms have been added
281 151
        if len(self._graph.nodes()) == 0:
282 151
            return None
283

284 151
        if self.atom_by_label:
285
            # sometimes we use negative numbers for internal indexing
286
            # the first atom in a smirks pattern should be based on actual smirks indices (positive)
287 151
            smirks_indices = [k for k in self.atom_by_label.keys() if k > 0]
288 151
            if len(smirks_indices) != 0:
289 151
                min_smirks = min(smirks_indices)
290
            else:
291 0
                min_smirks = min([k for k in self.atom_by_label.keys()])
292 151
            init_atom = self.atom_by_label[min_smirks]
293
        else:
294 151
            init_atom = self.get_atoms()[0]
295

296
        # sort neighboring atoms to keep consist output
297 151
        neighbors = sorted(self.get_neighbors(init_atom))
298 151
        return self._as_smirks(init_atom, neighbors, compress)
299

300 151
    def _as_smirks(self, init_atom, neighbors, compress=False):
301
        """
302
        This is an internal/private method used to add all AtomStorage to the SMIRKS pattern
303

304
        Parameters
305
        ----------
306
        init_atom : AtomStorage object
307
            current atom
308
        neighbors : list of AtomStorage objects
309
            list of neighbor atoms you wanted added to the SMIRKS pattern
310

311
        Returns
312
        -------
313
        SMIRKS : str
314
            This graph as a SMIRKS string
315
        """
316 151
        smirks = init_atom.as_smirks(compress)
317 151
        for idx, neighbor in enumerate(neighbors):
318 151
            bond = self.get_connecting_bond(init_atom, neighbor)
319 151
            bond_smirks = bond.as_smirks()
320

321 151
            new_neighbors = sorted(self.get_neighbors(neighbor))
322 151
            new_neighbors.remove(init_atom)
323

324 151
            atom_smirks = self._as_smirks(neighbor, new_neighbors,compress)
325

326 151
            if idx < len(neighbors) - 1:
327 151
                smirks += '(' + bond_smirks + atom_smirks + ')'
328
            else:
329 151
                smirks += bond_smirks + atom_smirks
330

331 151
        return smirks
332

333 151
    def get_atoms(self):
334
        """
335
        Returns
336
        -------
337
        atoms : list of AtomStorages
338
            all atoms stored in the graph
339
        """
340 151
        return list(self._graph.nodes())
341

342 151
    def get_connecting_bond(self, atom1, atom2):
343
        """
344
        Parameters
345
        ----------
346
        atom1 : AtomStorage
347
        atom2 : AtomStorage
348

349
        Returns
350
        -------
351
        bond : BondStorage or None
352
            bond between the two given atoms or None if not connected
353
        """
354 151
        bond = self._graph.get_edge_data(atom1, atom2)
355 151
        if bond is not None:
356 151
            return bond['bond']
357 0
        return None
358

359 151
    def get_bonds(self):
360
        """
361
        Returns
362
        -------
363
        bonds : list of BondStorages
364
            all bonds stored as edges in this graph
365
        """
366 151
        return [data['bond'] for a1, a2, data in self._graph.edges(data=True)]
367

368 151
    def get_neighbors(self, atom):
369
        """
370
        Parameters
371
        ----------
372
        atom : AtomStorage
373

374
        Returns
375
        -------
376
        atoms: list of AtomStorages
377
            list of atoms one bond (edge) away from the given atom
378
        """
379 151
        return list(self._graph.neighbors(atom))
380

381 151
    def remove_atom(self, atom):
382
        """
383
        Removes the provided atom and all connected atoms.
384
        Indexed atoms and atoms not in the current graph
385
        cannot be removed
386

387
        Parameters
388
        -----------
389
        atom : AtomStorage
390

391
        Returns
392
        --------
393
        removed : bool
394
            True if the atom was successfully removed.
395
            False if not, meaning the graph is unchanged.
396
        """
397
        # if atom isn't in the graph, it can't be removed
398 151
        if atom not in self._graph.nodes():
399 0
            return False
400
        # if atom is "indexed" that is has a SMIRKS index > 0 it can't be removed
401 151
        if atom.label > 0:
402 0
            return False
403
        # remove specified atom
404 151
        self._graph.remove_node(atom)
405
        # find atoms on that "branch" of the molecule
406
        # we do this by looking for atoms that are no longer connected to
407
        # the base of the graph, where we consider the base a positively indexed atom
408 151
        ref_atom = [n for n in self._graph.nodes if n.label > 0][0]
409 151
        remove_atoms_list = list()
410 151
        for n in self._graph.nodes:
411 151
            if not nx.has_path(self._graph, n, ref_atom):
412 151
                remove_atoms_list.append(n)
413
        # remove the disconnected atoms
414 151
        self._graph.remove_nodes_from(remove_atoms_list)
415 151
        return True
416

417 151
    def add_atom(self, new_atom, new_bond=None, bond_to_atom=None,
418
                 new_label=None, new_bond_label=None):
419
        """
420
        Expand the graph by adding one new atom including relevant bond
421

422
        Parameters
423
        ----------
424
        new_atom : ChemPer Atom
425
        new_bond : ChemPer Bond
426
        bond_to_atom : SingleGraph AtomStorage
427
            This is where you want to connect the new atom, required if the graph isn't empty
428
        new_label : int
429
            (optional) index for SMIRKS or internal storage if less than zero
430
        new_bond_label : anything hashable
431
            (optional) label used to track BondStorage in graph
432

433
        Returns
434
        -------
435
        AtomStorage : AtomStorage object or None
436
            If the atom was successfully added then the AtomStorage object is returned
437
            None is returned if the atom wasn't able to be added
438
        """
439 151
        if bond_to_atom is None and len(self.get_atoms()) > 0:
440 151
            return None
441

442 151
        new_atom_storage = self.AtomStorage(new_atom, label=new_label)
443 151
        self._graph.add_node(new_atom_storage)
444 151
        if new_label is not None:
445 151
            self.atom_by_label[new_label] = new_atom_storage
446

447
        # This is the first atom added to the graph
448 151
        if bond_to_atom is None:
449 151
            return new_atom_storage
450

451 151
        new_bond_storage = self.BondStorage(new_bond, new_bond_label)
452 151
        self.bond_by_label[new_bond_label] = new_bond_storage
453

454 151
        self._graph.add_edge(bond_to_atom, new_atom_storage, bond = new_bond_storage)
455 151
        return new_atom_storage
456

457 151
    def _add_smirks_atoms(self, smirks_atoms):
458
        """
459
        private function for adding atoms to the graph
460

461
        Parameters
462
        ----------
463
        smirks_atoms : tuple of integers
464
            This is a tuple of the atom indices which will have SMIRKS indices.
465
        """
466
        # add all smirks atoms to the graph
467 151
        for key, atom_index in enumerate(smirks_atoms, 1):
468 151
            atom1 = self.mol.get_atom_by_index(atom_index)
469 151
            new_atom_storage = self.AtomStorage(atom1, key)
470 151
            self._graph.add_node(new_atom_storage)
471 151
            self.atom_by_label[key] = new_atom_storage
472 151
            self.atom_by_index[atom_index] = new_atom_storage
473
            # Check for bonded atoms already in the graph
474 151
            for neighbor_key, neighbor_index in enumerate(smirks_atoms, 1):
475 151
                if not neighbor_key in self.atom_by_label:
476 151
                    continue
477

478
                # check if atoms are already connected on the graph
479 151
                neighbor_storage = self.atom_by_label[neighbor_key]
480 151
                if nx.has_path(self._graph, new_atom_storage, neighbor_storage):
481 151
                    continue
482

483
                # check if atoms are connected in the molecule
484 151
                atom2 = self.mol.get_atom_by_index(neighbor_index)
485 151
                bond = self.mol.get_bond_by_atoms(atom1, atom2)
486

487 151
                if bond is not None: # Atoms are connected add edge
488 151
                    bond_index = max(neighbor_key, key)-1
489 151
                    bond_storage = self.BondStorage(bond, bond_index)
490 151
                    self.bond_by_label[bond_index] = bond_storage
491 151
                    self._graph.add_edge(new_atom_storage,
492
                                         self.atom_by_label[neighbor_key],
493
                                         bond=bond_storage)
494

495 151
    def _add_layers(self, atom_storage, add_layer):
496
        """
497
        private function for expanding beyond the initial SMIRKS atoms.
498
        For now this is recursive so the input is:
499

500
        Parameters
501
        ----------
502
        atom_storage : AtomStorage object
503
            atom whose's neighbors you currently need to add
504
        add_layer : int
505
            how many more layers need to be added
506
        """
507 151
        if add_layer == 0:
508 151
            return
509

510 151
        new_label = min(1, atom_storage.label) - 1
511

512 151
        for new_atom in atom_storage.atom.get_neighbors():
513 151
            if new_atom.get_index() in self.atom_by_index:
514 151
                continue
515

516 151
            new_bond = self.mol.get_bond_by_atoms(atom_storage.atom, new_atom)
517 151
            new_storage = self.add_atom(new_atom, new_bond, atom_storage,
518
                                        new_label, new_label)
519 151
            self.atom_by_index[new_atom.get_index()] = new_storage
520 151
            if add_layer == 'all':
521 151
                self._add_layers(new_storage, add_layer)
522 151
            elif add_layer > 1:
523 151
                self._add_layers(new_storage, add_layer-1)

Read our documentation on viewing source code .

Loading