1
"""
2
single_graph.py
3

4
ChemPerGraph is a class for storing smirks decorators for a molecular fragment.
5
These can be used to convert a molecular sub-graph or an entire molecule into a SMIRKS
6
pattern with all decorators specified.
7

8
For example, imagine you want a SMIRKS for the carbon in methane, it would become:
9

10
"[#6AH4X4x0!r+0:1]"
11

12
with decorators:
13
#6: atomic number 6 for carbon
14
A: aliphatic (a would be aromatic)
15
H4: a total hydrogen count of 4, 4 neighbors are hydrogen
16
X4: connectivity of 4, that is number of neighbors, not valence or sum of bond orders
17
x0: ring connectivity of 0, no ring bonds
18
!r: not in a ring, for atoms in a ring this decorator is `rn` where n is the size of the smallest ring
19
+0: 0 formal charge
20

21
To the best of the authors knowledge, this is the first open source tool capable
22
of converting a molecule (or sub-graph) into a detailed SMIRKS pattern.
23
"""
24

25 100
import networkx as nx
26 100
from functools import total_ordering
27 100
from chemper.mol_toolkits import mol_toolkit
28

29

30 100
@total_ordering
31
class SingleGraph:
32
    """
33
    ChemPerGraphs are a graph based class for storing atom and bond information.
34
    They use the chemper.mol_toolkits Atoms, Bonds, and Mols
35
    """
36 100
    @total_ordering
37
    class AtomStorage:
38
        """
39
        AtomStorage tracks information about an atom
40
        """
41 100
        def __init__(self, atom=None, label=None):
42
            """
43
            Initializes AtomStorage based on a provided atom
44

45
            Parameters
46
            ----------
47
            atom : chemper Atom object
48
            label : int
49
                integer for labeling this atom in a SMIRKS
50
                or if negative number just used to track the atom locally
51
            """
52 100
            self.atom = atom
53

54 100
            if atom is None:
55 100
                self.atomic_number = None
56 100
                self.aromatic = None
57 100
                self.charge = None
58 100
                self.hydrogen_count = None
59 100
                self.connectivity = None
60 100
                self.ring_connectivity = None
61 100
                self.min_ring_size = None
62 100
                self.atom_index = None
63

64
            else:
65 100
                self.atomic_number = atom.atomic_number()
66 100
                self.aromatic = atom.is_aromatic()
67 100
                self.charge = atom.formal_charge()
68 100
                self.hydrogen_count = atom.hydrogen_count()
69 100
                self.connectivity = atom.connectivity()
70 100
                self.ring_connectivity = atom.ring_connectivity()
71 100
                self.min_ring_size = atom.min_ring_size()
72 100
                self.atom_index = atom.get_index()
73

74 100
            self.label = label
75

76 100
        def __lt__(self, other):
77
            """
78
            Overrides the default implementation
79
            This method was primarily written for making SMIRKS patterns predictable.
80
            If atoms are sortable, then the SMIRKS patterns are always the same making
81
            tests easier to write. However, the specific sorting was created to also make SMIRKS
82
            output as human readable as possible, that is to at least make it easier for a
83
            human to see how the indexed atoms are related to each other.
84
            It is typically easier for humans to read SMILES/SMARTS/SMIRKS with less branching (indicated with ()).
85

86
            For example in:
87
            [C:1]([H])([H])~[N:2]([C])~[O:3]
88
            it is easier to see that the atoms C~N~O are connected in a "line" instead of:
89
            [C:1]([N:2]([O:3])[C])([H])[H]
90
            which is equivalent, but with all the () it is hard for a human to read the branching
91

92
            Parameters
93
            ----------
94
            other : AtomStorage
95

96
            Returns
97
            -------
98
            is_less_than : boolean
99
                self is less than other
100
            """
101
            # if either smirks index is None, then you can't directly compare
102
            # make a temporary index that is negative if it was None
103 100
            self_index = self.label if self.label is not None else -1000
104 100
            other_index = other.label if other.label is not None else -1000
105
            # if either index is greater than 0, the one that is largest should go at the end of the list
106 100
            if self_index > 0 or other_index > 0:
107 100
                return self_index < other_index
108

109
            # Both SMIRKS indices are not positive or None so compare the SMIRKS patterns instead
110 100
            return self.as_smirks() < other.as_smirks()
111

112 100
        def __eq__(self, other): return self.as_smirks() == other.as_smirks() and self.label == other.label
113

114 100
        def __hash__(self): return id(self)
115

116 100
        def __str__(self): return self.as_smirks()
117

118 100
        def as_smirks(self, compress=False):
119
            """
120
            Parameters
121
            -----------
122
            compress : bool
123
                Creates a compressed version of the SMIRKS with only
124
                the atomic number and atom index no other decorators
125

126
            Returns
127
            -------
128
            smirks : str
129
                how this atom would be represented in a SMIRKS string
130
            """
131 100
            if self.atom is None:
132 100
                if self.label is None or self.label <= 0:
133 100
                    return '[*]'
134 100
                return '[*:%i]' % self.label
135

136 100
            aromatic = 'a' if self.aromatic else 'A'
137 100
            if self.charge >= 0:
138 100
                charge = '+%i' % self.charge
139
            else:
140 100
                charge = '%i' % self.charge
141 100
            if self.min_ring_size == 0:
142 100
                ring = '!r'
143
            else:
144 100
                ring = 'r%i' % self.min_ring_size
145

146 100
            if compress:
147 100
                base_smirks = "#%i" % self.atomic_number
148
            else:
149 100
                base_smirks = '#%i%sH%iX%ix%i%s%s' % (self.atomic_number,
150
                                                      aromatic,
151
                                                      self.hydrogen_count,
152
                                                      self.connectivity,
153
                                                      self.ring_connectivity,
154
                                                      ring,
155
                                                      charge)
156

157 100
            if self.label is None or self.label <= 0:
158 100
                return '[%s]' % base_smirks
159

160 100
            return '[%s:%i]' % (base_smirks, self.label)
161

162 100
    @total_ordering
163
    class BondStorage:
164
        """
165
        BondStorage tracks information about a bond
166
        """
167 100
        def __init__(self, bond=None, label=None):
168
            """
169
            Parameters
170
            ----------
171
            bond : chemper Bond object
172
            label : int or float
173
                Bonds don't have SMIRKS indices so this is only used for internal
174
                tracking of the object.
175
            """
176 100
            if bond is None:
177 100
                self.order = None
178 100
                self.ring = None
179 100
                self.bond_index = None
180
            else:
181 100
                self.order = bond.get_order()
182 100
                self.ring = bond.is_ring()
183 100
                self.bond_index = bond.get_index()
184

185 100
            self._bond = bond
186 100
            self.label = label
187

188 100
        def __str__(self): return self.as_smirks()
189

190 100
        def __lt__(self, other):
191 0
            if self.as_smirks() == other.as_smirks():
192 0
                return self.label < other.label
193 0
            return self.as_smirks() < other.as_smirks()
194

195 100
        def __eq__(self, other):
196 0
            return self.label == other.label and self.as_smirks() == other.as__smirks()
197

198 100
        def __hash__(self): return id(self)
199

200 100
        def as_smirks(self):
201
            """
202
            Returns
203
            -------
204
            SMIRKS : str
205
                how this bond should appear in a SMIRKS string
206
            """
207 100
            if self.ring is None:
208 100
                ring = ''
209 100
            elif self.ring:
210 0
                ring = '@'
211
            else:
212 100
                ring = '!@'
213

214 100
            order = {1:'-', 1.5:':', 2:'=', 3:'#', None:'~'}.get(self.order)
215

216 100
            return order+ring
217

218 100
    def __init__(self, mol=None, smirks_atoms=None, layers=0):
219
        """
220
        Parameters
221
        ----------
222
        mol : Mol
223
            this can be a chemper mol or a molecule from any supported toolkit
224
            (currently OpenEye or RDKit)
225
        smirks_atoms : tuple of integers
226
            This is a tuple of the atom indices which will have SMIRKS indices.
227
            For example, if (1,2) is provided then the atom in molecule with indices
228
            1 and 2 will be used to create a SMIRKS with two indexed atoms.
229
        layers : int or 'all'
230
            how many atoms out from the smirks indexed atoms do you wish save (default=0)
231
            'all' will lead to all atoms in the molecule being specified
232
        """
233 100
        self._graph = nx.Graph()
234 100
        self.atom_by_label = dict() # stores a dictionary of atoms by label
235 100
        self.bond_by_label = dict() # stores a dictionary of bonds by label
236 100
        self.atom_by_index = dict()
237

238 100
        if mol is None:
239 100
            self.mol = None
240 100
            if smirks_atoms is not None:
241 0
                raise TypeError("Must provide a molecule if smirks_atoms are specified/")
242

243
        else:
244 100
            self.mol = mol_toolkit.Mol(mol)
245 100
            if smirks_atoms is None:
246 0
                raise TypeError("Must provide smirks_atoms when a molecule is given")
247

248 100
            self._add_smirks_atoms(smirks_atoms)
249
            # loop over indexed atoms and then add layers to each
250
            # note: the keys must be pulled out first because the
251
            #       atom_by_label dictionary is updated when layers are added
252 100
            keys = list(self.atom_by_label.keys())
253 100
            for smirks_key in keys:
254 100
                atom_storage = self.atom_by_label[smirks_key]
255 100
                self._add_layers(atom_storage, layers)
256

257 100
    def __str__(self): return self.as_smirks()
258

259 100
    def __lt__(self, other): return self.as_smirks() < other.as_smirks()
260

261 100
    def __eq__(self, other): return self.as_smirks() == self.as_smirks()
262

263 100
    def __hash__(self): return id(self)
264

265 100
    def as_smirks(self, compress=False):
266
        """
267
        Parameters
268
        ----------
269
        compress : boolean
270
            returns the shorter version of atom SMIRKS patterns
271
            that is the atoms only include atomic numbers rather
272
            than the full list of decorators
273

274
        Returns
275
        -------
276
        SMIRKS : str
277
            a SMIRKS string matching the exact atom and bond information stored
278
        """
279

280
        # If no atoms have been added
281 100
        if len(self._graph.nodes()) == 0:
282 100
            return None
283

284 100
        if self.atom_by_label:
285
            # sometimes we use negative numbers for internal indexing
286
            # the first atom in a smirks pattern should be based on actual smirks indices (positive)
287 100
            smirks_indices = [k for k in self.atom_by_label.keys() if k > 0]
288 100
            if len(smirks_indices) != 0:
289 100
                min_smirks = min(smirks_indices)
290
            else:
291 0
                min_smirks = min([k for k in self.atom_by_label.keys()])
292 100
            init_atom = self.atom_by_label[min_smirks]
293
        else:
294 100
            init_atom = self.get_atoms()[0]
295

296
        # sort neighboring atoms to keep consist output
297 100
        neighbors = sorted(self.get_neighbors(init_atom))
298 100
        return self._as_smirks(init_atom, neighbors, compress)
299

300 100
    def _as_smirks(self, init_atom, neighbors, compress=False):
301
        """
302
        This is an internal/private method used to add all AtomStorage to the SMIRKS pattern
303

304
        Parameters
305
        ----------
306
        init_atom : AtomStorage object
307
            current atom
308
        neighbors : list of AtomStorage objects
309
            list of neighbor atoms you wanted added to the SMIRKS pattern
310

311
        Returns
312
        -------
313
        SMIRKS : str
314
            This graph as a SMIRKS string
315
        """
316 100
        smirks = init_atom.as_smirks(compress)
317 100
        for idx, neighbor in enumerate(neighbors):
318 100
            bond = self.get_connecting_bond(init_atom, neighbor)
319 100
            bond_smirks = bond.as_smirks()
320

321 100
            new_neighbors = sorted(self.get_neighbors(neighbor))
322 100
            new_neighbors.remove(init_atom)
323

324 100
            atom_smirks = self._as_smirks(neighbor, new_neighbors,compress)
325

326 100
            if idx < len(neighbors) - 1:
327 100
                smirks += '(' + bond_smirks + atom_smirks + ')'
328
            else:
329 100
                smirks += bond_smirks + atom_smirks
330

331 100
        return smirks
332

333 100
    def get_atoms(self):
334
        """
335
        Returns
336
        -------
337
        atoms : list of AtomStorages
338
            all atoms stored in the graph
339
        """
340 100
        return list(self._graph.nodes())
341

342 100
    def get_connecting_bond(self, atom1, atom2):
343
        """
344
        Parameters
345
        ----------
346
        atom1 : AtomStorage
347
        atom2 : AtomStorage
348

349
        Returns
350
        -------
351
        bond : BondStorage or None
352
            bond between the two given atoms or None if not connected
353
        """
354 100
        bond = self._graph.get_edge_data(atom1, atom2)
355 100
        if bond is not None:
356 100
            return bond['bond']
357 0
        return None
358

359 100
    def get_bonds(self):
360
        """
361
        Returns
362
        -------
363
        bonds : list of BondStorages
364
            all bonds stored as edges in this graph
365
        """
366 100
        return [data['bond'] for a1, a2, data in self._graph.edges(data=True)]
367

368 100
    def get_neighbors(self, atom):
369
        """
370
        Parameters
371
        ----------
372
        atom : AtomStorage
373

374
        Returns
375
        -------
376
        atoms: list of AtomStorages
377
            list of atoms one bond (edge) away from the given atom
378
        """
379 100
        return list(self._graph.neighbors(atom))
380

381 100
    def remove_atom(self, atom):
382
        """
383
        Removes the provided atom and all connected atoms.
384
        Indexed atoms and atoms not in the current graph
385
        cannot be removed
386

387
        Parameters
388
        -----------
389
        atom : AtomStorage
390

391
        Returns
392
        --------
393
        removed : bool
394
            True if the atom was successfully removed.
395
            False if not, meaning the graph is unchanged.
396
        """
397
        # if atom isn't in the graph, it can't be removed
398 100
        if atom not in self._graph.nodes():
399 0
            return False
400
        # if atom is "indexed" that is has a SMIRKS index > 0 it can't be removed
401 100
        if atom.label > 0:
402 0
            return False
403
        # remove specified atom
404 100
        self._graph.remove_node(atom)
405
        # find atoms on that "branch" of the molecule
406
        # we do this by looking for atoms that are no longer connected to
407
        # the base of the graph, where we consider the base a positively indexed atom
408 100
        ref_atom = [n for n in self._graph.nodes if n.label > 0][0]
409 100
        remove_atoms_list = list()
410 100
        for n in self._graph.nodes:
411 100
            if not nx.has_path(self._graph, n, ref_atom):
412 100
                remove_atoms_list.append(n)
413
        # remove the disconnected atoms
414 100
        self._graph.remove_nodes_from(remove_atoms_list)
415 100
        return True
416

417 100
    def add_atom(self, new_atom, new_bond=None, bond_to_atom=None,
418
                 new_label=None, new_bond_label=None):
419
        """
420
        Expand the graph by adding one new atom including relevant bond
421

422
        Parameters
423
        ----------
424
        new_atom : ChemPer Atom
425
        new_bond : ChemPer Bond
426
        bond_to_atom : SingleGraph AtomStorage
427
            This is where you want to connect the new atom, required if the graph isn't empty
428
        new_label : int
429
            (optional) index for SMIRKS or internal storage if less than zero
430
        new_bond_label : anything hashable
431
            (optional) label used to track BondStorage in graph
432

433
        Returns
434
        -------
435
        AtomStorage : AtomStorage object or None
436
            If the atom was successfully added then the AtomStorage object is returned
437
            None is returned if the atom wasn't able to be added
438
        """
439 100
        if bond_to_atom is None and len(self.get_atoms()) > 0:
440 100
            return None
441

442 100
        new_atom_storage = self.AtomStorage(new_atom, label=new_label)
443 100
        self._graph.add_node(new_atom_storage)
444 100
        if new_label is not None:
445 100
            self.atom_by_label[new_label] = new_atom_storage
446

447
        # This is the first atom added to the graph
448 100
        if bond_to_atom is None:
449 100
            return new_atom_storage
450

451 100
        new_bond_storage = self.BondStorage(new_bond, new_bond_label)
452 100
        self.bond_by_label[new_bond_label] = new_bond_storage
453

454 100
        self._graph.add_edge(bond_to_atom, new_atom_storage, bond = new_bond_storage)
455 100
        return new_atom_storage
456

457 100
    def _add_smirks_atoms(self, smirks_atoms):
458
        """
459
        private function for adding atoms to the graph
460

461
        Parameters
462
        ----------
463
        smirks_atoms : tuple of integers
464
            This is a tuple of the atom indices which will have SMIRKS indices.
465
        """
466
        # add all smirks atoms to the graph
467 100
        for key, atom_index in enumerate(smirks_atoms, 1):
468 100
            atom1 = self.mol.get_atom_by_index(atom_index)
469 100
            new_atom_storage = self.AtomStorage(atom1, key)
470 100
            self._graph.add_node(new_atom_storage)
471 100
            self.atom_by_label[key] = new_atom_storage
472 100
            self.atom_by_index[atom_index] = new_atom_storage
473
            # Check for bonded atoms already in the graph
474 100
            for neighbor_key, neighbor_index in enumerate(smirks_atoms, 1):
475 100
                if not neighbor_key in self.atom_by_label:
476 100
                    continue
477

478
                # check if atoms are already connected on the graph
479 100
                neighbor_storage = self.atom_by_label[neighbor_key]
480 100
                if nx.has_path(self._graph, new_atom_storage, neighbor_storage):
481 100
                    continue
482

483
                # check if atoms are connected in the molecule
484 100
                atom2 = self.mol.get_atom_by_index(neighbor_index)
485 100
                bond = self.mol.get_bond_by_atoms(atom1, atom2)
486

487 100
                if bond is not None: # Atoms are connected add edge
488 100
                    bond_index = max(neighbor_key, key)-1
489 100
                    bond_storage = self.BondStorage(bond, bond_index)
490 100
                    self.bond_by_label[bond_index] = bond_storage
491 100
                    self._graph.add_edge(new_atom_storage,
492
                                         self.atom_by_label[neighbor_key],
493
                                         bond=bond_storage)
494

495 100
    def _add_layers(self, atom_storage, add_layer):
496
        """
497
        private function for expanding beyond the initial SMIRKS atoms.
498
        For now this is recursive so the input is:
499

500
        Parameters
501
        ----------
502
        atom_storage : AtomStorage object
503
            atom whose's neighbors you currently need to add
504
        add_layer : int
505
            how many more layers need to be added
506
        """
507 100
        if add_layer == 0:
508 100
            return
509

510 100
        new_label = min(1, atom_storage.label) - 1
511

512 100
        for new_atom in atom_storage.atom.get_neighbors():
513 100
            if new_atom.get_index() in self.atom_by_index:
514 100
                continue
515

516 100
            new_bond = self.mol.get_bond_by_atoms(atom_storage.atom, new_atom)
517 100
            new_storage = self.add_atom(new_atom, new_bond, atom_storage,
518
                                        new_label, new_label)
519 100
            self.atom_by_index[new_atom.get_index()] = new_storage
520 100
            if add_layer == 'all':
521 100
                self._add_layers(new_storage, add_layer)
522 100
            elif add_layer > 1:
523 100
                self._add_layers(new_storage, add_layer-1)

Read our documentation on viewing source code .

Loading