1
#!/usr/bin/env python
2

3
#=============================================================================================
4
# MODULE DOCSTRING
5
#=============================================================================================
6

7 100
"""
8
smirksify.py
9

10
In this script, we start with a set of clustered molecular fragments with specified
11
indexed atoms as those you would use to build a ClusterGraph.
12
We then build cluster Graphs to create the initial SMIRKS patterns and check
13
that the generated SMIRKS patterns retain the typing from the input cluster.
14
Next we run a series of iterations removing SMIRKS decorators.
15
If this "move" doesn't change the way the molecules are typed then the change is accepted.
16

17
This class takes inspiration from the tool SMIRKY previously published by the
18
Open Force Field Initiative:
19
github.com/openforcefield/smarty
20

21
In theory, it is possible this process of removing decorators could be more
22
systematic/deterministic, however this is a first approach to see
23
if extracted SMIRKS patterns can do better than SMIRKY.
24
Also, this approach will be more general since the input clusters do not
25
rely on a reference force field.
26
"""
27
#=============================================================================================
28
# GLOBAL IMPORTS
29
#=============================================================================================
30

31 100
import copy
32

33 100
from chemper.graphs.environment import ChemicalEnvironment as CE
34 100
from chemper.mol_toolkits import mol_toolkit
35 100
from chemper.chemper_utils import ImproperDict, ValenceDict, \
36
    get_typed_molecules, match_reference
37 100
from chemper.graphs.cluster_graph import ClusterGraph
38

39 100
import numpy as np
40

41

42
# =============================================================================================
43
# private subroutines
44
# =============================================================================================
45

46 100
def print_smirks(smirks_list):
47
    """
48
    Prints out the provided smirks list
49
    in a table like format with label | SMIRKS
50

51
    Parameters
52
    -----------
53
    smirks_list : list of tuples
54
        list in the form [ (label, SMIRKS), ...]
55
    """
56 100
    str_form = " {0:<20} | {1:} "
57

58 100
    print()
59 100
    print(str_form.format("Label", "SMIRKS"))
60 100
    print('='*80)
61 100
    for label, smirks in smirks_list:
62 100
        print(str_form.format(label, smirks))
63 100
        print('-'*80)
64 100
    print()
65

66

67 100
class ClusteringError(Exception):
68
    """
69
    Exception for when the SMIRKSifier is unable to create
70
    a list of SMIRKS to maintain the input clusters.
71
    """
72 100
    def __init__(self, msg):
73 100
        Exception.__init__(self, msg)
74 100
        self.msg = msg
75

76

77
# =============================================================================================
78
# SMIRKSifier
79
# =============================================================================================
80

81 100
class SMIRKSifier(object):
82
    """
83
    Generates complex SMIRKS for a given cluster of substructures
84
    and then reduces the decorators in those smirks
85
    """
86 100
    def __init__(self, molecules, cluster_list,
87
                 max_layers=5, verbose=True, strict_smirks=True):
88
        """
89
        Parameters
90
        ----------
91
        molecules : list of Mols
92
            These can be chemper Mols or molecules from any supported toolkit
93
            (currently OpenEye or RDKit)
94

95
        cluster_list : list of labels and smirks_atom_lists
96
            For each label the user should provide a list tuples for atom indices
97
            in each molecule you want included in that cluster.
98

99
            For example, if you wanted all atoms with indices (0,1) and (1,2) to be in cluster 'c1'
100
            and atoms (2,3) in cluster 'c2' for each of two molecules then cluster_list would be
101

102
            [ ('c1', [ (0,1), (1,2) ], [ (0,1), (1,2) ]),
103
              ('c2', [ (2,3)        ], [ (2,3)        ]) ]
104

105
            To see an example of this in action checkout
106
            https://github.com/MobleyLab/chemper/tree/master/examples
107

108
        max_layers : int (optional)
109
            default = 5
110
            how many atoms away from the indexed atoms should
111
            we consider at the maximum
112

113
        verbose : boolean (optional)
114
            default = True
115
            If true information is printed to the command line during reducing
116

117
        strict_smirks : boolean (optional)
118
            default = True
119
            If False it will not raise an error when incapable of making SMIRKS
120
            This setting is not recommended unless you are a master user
121
            or developer trying to test current behavior.
122
            The variable SMIRKSifier.checks will tell you if the SMIRKS
123
            generation failed when strict_smirks = False
124
        """
125 100
        self.molecules = [mol_toolkit.Mol(m) for m in molecules]
126 100
        self.intermediate_smirks = dict()
127 100
        self.cluster_list = cluster_list
128 100
        self.verbose = verbose
129 100
        self.max_layers = max_layers
130 100
        self.strict_smirks = strict_smirks
131

132
        # determine the type of SMIRKS for symmetry in indices purposes
133
        # This is done by making a test SMIRKS
134 100
        graph = ClusterGraph(self.molecules, cluster_list[0][1], 0)
135 100
        test_smirks = graph.as_smirks(compress=True)
136 100
        env = CE(test_smirks)
137 100
        if env.get_type() is None:
138
            # corresponds to an unknown chemical pattern
139 0
            self.dict_type = dict
140 100
        elif env.get_type().lower() == 'impropertorsion':
141 0
            self.dict_type = ImproperDict
142
        else:
143 100
            self.dict_type = ValenceDict
144

145
        # Convert input "smirks_atom_list" into a dictionary with the form:
146
        # {mol_idx: {(atom indices): label, ...}, ... }
147 100
        self.cluster_dict = dict()
148 100
        self.ref_labels = set()
149 100
        self.total = 0
150
        # form of cluster_list is [(label, [for each mol [ (tuples of atom indices)] ) ]
151 100
        for label, mol_list in self.cluster_list:
152 100
            self.ref_labels.add(label)
153
            # [for each mol [ (tuples of atom indices)]
154 100
            for mol_idx, atom_indice_tuples in enumerate(mol_list):
155 100
                if mol_idx not in self.cluster_dict:
156 100
                    self.cluster_dict[mol_idx] = self.dict_type()
157 100
                for atom_tuple in atom_indice_tuples:
158 100
                    self.total += 1
159 100
                    self.cluster_dict[mol_idx][atom_tuple] = label
160

161
        # make SMIRKS patterns for input clusters
162 100
        self.current_smirks, self.layers = self.make_smirks()
163 100
        if self.verbose: print_smirks(self.current_smirks)
164
        # check SMIRKS and save the matches to input clusters
165 100
        self.type_matches, self.checks = self.types_match_reference()
166

167 100
        if not self.checks:
168 100
            msg = """
169
                      SMIRKSifier was not able to create SMIRKS for the provided
170
                      clusters with %i layers. Try increasing the number of layers
171
                      or changing your clusters
172
                      """ % self.max_layers
173 100
            if self.strict_smirks:
174 100
                raise ClusteringError(msg)
175
            else:
176 0
                print("WARNING!", msg)
177

178 100
    def make_smirks(self):
179
        """
180
        Create a list of SMIRKS patterns for the input clusters.
181
        This includes a determining how far away from the indexed atom should
182
        be included in the SMIRKS (or the number of max_layers is reached)
183

184
        Returns
185
        -------
186
        smirks_list : list of tuples
187
            list of tuples in the form (label, smirks)
188
        layers : int
189
            number of layers actually used to specify the set clusters
190
        """
191 100
        layers = 0
192
        # try generating smirks with no layers
193 100
        smirks_list = self._make_cluster_graphs(layers)
194
        # store intermediate smirks and check them
195 100
        self.intermediate_smirks[layers] = smirks_list
196 100
        _, checks = self.types_match_reference(current_types=smirks_list)
197

198 100
        while not checks and (layers < self.max_layers):
199 100
            layers += 1
200 100
            smirks_list = self._make_cluster_graphs(layers)
201
            # store intermediate smirks
202 100
            self.intermediate_smirks[layers] = smirks_list
203
            # check current smirks patterns
204 100
            _, checks = self.types_match_reference(current_types=smirks_list)
205

206 100
        return smirks_list, layers
207

208 100
    def _make_cluster_graphs(self, layers):
209
        """
210
        Creates a list of SMIRKS using the stored
211
        molecules and clusters with the specified number
212
        of layers (atoms away from the indexed atoms)
213

214
        Parameters
215
        -----------
216
        layers : int
217
            number of layers (atoms away from indexed atoms) to
218
            include in this round of graphs
219

220
        Returns
221
        --------
222
        smirks_list : list of two tuples
223
            SMIRKS list in the form [ (label: SMIRKS), ...]
224
        """
225 100
        smirks_list = list()
226

227
        # loop through the list of fragment clusters
228 100
        for label, smirks_atom_list in self.cluster_list:
229
            # make a ClusterGraph for that label
230 100
            graph = ClusterGraph(self.molecules, smirks_atom_list, layers)
231

232
            # extract and save the SMIRKS for the cluster
233 100
            smirks = graph.as_smirks(compress=True)
234 100
            smirks_list.append(('zz_'+str(label), smirks))
235

236 100
        return smirks_list
237

238 100
    def types_match_reference(self, current_types=None):
239
        """
240
        Determine best match for each parameter with reference types
241

242
        Parameters
243
        ----------
244
        current_types : list of tuples with form [ (label, smirks), ]
245

246
        Returns
247
        -------
248
        type_matches : list of tuples (current_label, reference_label, counts)
249
            pair of current and reference labels with the number of fragments that match it
250
        """
251 100
        if current_types is None:
252 100
            current_types = self.current_smirks
253

254 100
        if len(current_types) != len(self.ref_labels):
255 0
            return set(), False
256

257 100
        current_assignments = get_typed_molecules(current_types, self.molecules)
258

259 100
        type_matches, checks_pass = match_reference(current_assignments, self.cluster_dict)
260 100
        return type_matches, checks_pass
261

262 100
    def reduce(self, max_its=1000, verbose=None):
263
        """
264
        Reduce the SMIRKS decorators for a number of iterations
265

266
        Parameters
267
        ----------
268
        max_its : int
269
            default = 1000
270
            The specified number of iterations
271
        verbose : boolean
272
            default = None
273
            will set the verboseness while running
274
            (if None, the current verbose variable will be used)
275

276
        Returns
277
        ----------
278
        final_smirks : list of tuples
279
            list of final smirks patterns after reducing in the form
280
            [(label, smirks)]
281
        """
282 100
        if not self.checks:
283 0
            print("Cannot reduce since unable to create reliable SMIRKS")
284 0
            return self.current_smirks
285

286 100
        red = Reducer(self.current_smirks, self.molecules, verbose)
287 100
        self.current_smirks = red.run(max_its)
288 100
        return self.current_smirks
289

290

291
# ==================================================================================
292
# Reducer class
293
# ==================================================================================
294

295 100
class Reducer():
296
    """
297
    Reducer starts with any list of SMIRKS and removes unnecessary decorators
298
    while maintaining typing on input molecules.
299
    This was created to be used as a part of the SMIRKSifier.reduce function.
300
    However, if you have complex SMIRKS and a list of molecules you can
301
    also reduce those patterns independently.
302

303
    Attributes
304
    ----------
305
    current_smirks : list of tuples
306
                    current SMIRKS patterns in the form (label, smirks)
307
    mols : list of chemper molecules
308
          molecules being used to reduce the input SMIRKS
309
    cluster_dict : dictionary
310
                  Dictionary specifying typing using current SMIRKS in the form:
311
                  {mol_idx:
312
                        { (tuple of atom indices): label } }
313
    """
314 100
    def __init__(self, smirks_list, mols, verbose=False):
315
        """
316
        Parameters
317
        ----------
318
        smirks_list : list of tuples
319
            set of SMIRKS patterns in the form (label, smirks)
320
        mols : list of molecules
321
            Any chemper compatible molecules accepted (ChemPer, OpenEye, or RDKit)
322
        """
323 100
        self.verbose = verbose
324 100
        self.current_smirks = copy.deepcopy(smirks_list)
325

326
        # make reference clusters
327 100
        ref_smirks = [('ref_%s' % l, smirks) for l, smirks in smirks_list]
328 100
        self.molecules = [mol_toolkit.Mol(m) for m in mols]
329 100
        self.cluster_dict = get_typed_molecules(ref_smirks, self.molecules)
330

331 100
    def remove_one_sub_dec(self, input_ors, ref_idx):
332
        """
333
        Remove one OR decorator from the specified index
334
        # i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, [+0]), (#7, [X3]) ]
335

336
        Parameters
337
        -----------
338
        input_ors : list of two tuples
339
            OR decorators in the form from ChemicalEnvironments
340
            that is [ (base, [decorators, ]), ... ]
341
        ref_idx : int
342
            The index from this list to use when removing one sub-decorator
343

344
        Returns
345
        --------
346
        new_ors : list of two tuples
347
            New OR decorators
348
        """
349 100
        new_ors = copy.deepcopy(input_ors)
350
        # remove one or decorator from a single type
351 100
        ref_or = new_ors[ref_idx]
352 100
        decs = ref_or[1]
353 100
        decs.remove(np.random.choice(decs))
354 100
        new_ors[ref_idx] = (ref_or[0], decs)
355 100
        return new_ors
356

357 100
    def remove_ref_sub_decs(self, input_ors, ref_idx):
358
        """
359
        Remove all of the ORdecorators at the specified index
360
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, []), (#7, [X3]) ]
361

362
        Parameters
363
        -----------
364
        input_ors : list of two tuples
365
            OR decorators in the form from ChemicalEnvironments
366
            that is [ (base, [decorators, ]), ... ]
367
        ref_idx : int
368
            The index from this list to use when removing one set of sub-decorators
369

370
        Returns
371
        --------
372
        new_ors : list of two tuples
373
            New OR decorators
374
        """
375 100
        new_ors = copy.deepcopy(input_ors)
376
        # remove all decorators on one OR type
377 100
        ref_or = new_ors[ref_idx]
378 100
        new_ors[ref_idx] = (ref_or[0], list())
379 100
        return new_ors
380

381 100
    def remove_ref(self, input_ors, ref_idx):
382
        """
383
        Remove the decorator at the referenced index
384
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#7, [X3])]
385

386
        Parameters
387
        -----------
388
        input_ors : list of two tuples
389
            OR decorators in the form from ChemicalEnvironments
390
            that is [ (base, [decorators, ]), ... ]
391
        ref_idx : int
392
            The OR decorators at ref_idx will be removed entirely
393

394
        Returns
395
        --------
396
        new_ors : list of two tuples
397
            New OR decorators
398
        """
399 100
        new_ors = copy.deepcopy(input_ors)
400
        # remove the single OR type at or_idx
401 100
        ref = new_ors[ref_idx]
402 100
        new_ors.remove(ref)
403 100
        return new_ors
404

405 100
    def remove_all_bases(self, input_ors):
406
        """
407
        convert all bases to [*]
408
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(*, [X4, +0]), (*, [X3]) ]
409

410
        Parameters
411
        -----------
412
        input_ors : list of two tuples
413
            OR decorators in the form from ChemicalEnvironments
414
            that is [ (base, [decorators, ]), ... ]
415

416
        Returns
417
        --------
418
        new_ors : list of two tuples
419
            New OR decorators
420
        """
421 100
        new_all_ors = [('*', d) for b, d in input_ors]
422 100
        return new_all_ors
423

424 100
    def remove_all_dec_type(self, input_ors):
425
        """
426
        remove all decorators of the same type, like all 'X' decorators
427
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, [+0]), (#7, []) ]
428

429
        Parameters
430
        -----------
431
        input_ors : list of two tuples
432
            OR decorators in the form from ChemicalEnvironments
433
            that is [ (base, [decorators, ]), ... ]
434

435
        Returns
436
        --------
437
        new_ors : list of two tuples
438
            New OR decorators
439
        """
440 100
        all_decs = set([d for b,decs in input_ors for d in decs])
441 100
        remove_dec = np.random.choice(list(all_decs))
442

443
        # we start by defining a criteria for when the decorator
444
        # should not be removed.
445 100
        def criteria(x): return remove_dec[0] not in x
446

447 100
        if '+' in remove_dec or '-' in remove_dec:
448 0
            def criteria(x): return not ('+' in x or '-' in x)
449 100
        elif remove_dec in ['a', 'A']:
450 0
            def criteria(x): return x.lower() != 'a'
451 100
        elif 'r' in remove_dec:
452 96
            def criteria(x): return 'r' not in x
453

454 100
        new_ors = list()
455 100
        for b, decs in input_ors:
456 100
            new_decs = [d for d in decs if criteria(d)]
457 100
            new_ors.append((b, new_decs))
458 100
        return new_ors
459

460 100
    def remove_or_atom(self, input_all_ors, or_idx):
461
        """
462
        makes specific types of changes based on atom OR decorators
463

464
        Parameters
465
        ----------
466
        input_all_ors: list of OR decorators
467
            [ (base, [decs]), ...]
468
        or_idx: index that should be used to guide changes
469

470
        Returns
471
        --------
472
        new_ors : list of two tuples
473
            new or decorators
474
        """
475 100
        ref_or = input_all_ors[or_idx]
476

477
        # Start by checking what removal choices are available with the
478
        # current list of OR decorators
479
        # ---------------------------------------------------------------------
480
        # one choice is to remove all decorators (0)
481
        # it is always an option
482 100
        choices = ['all']
483
        # are there non-generic bases? If so removing all bases is an option
484 100
        if len(set([b for b,ds in input_all_ors])) > 1:
485 87
            choices.append('gen_base')
486
        # there are two options if the ref type has OR decorators
487 100
        if len(ref_or[1]) > 0:
488
            # remove all OR decorators (#6, [X4, +0]) --> (#6, [])
489 100
            choices.append('all_ref_decs')
490 100
            if len(ref_or[1]) > 1:
491
                # remove one OR decorator (#6, [X4, +0]) --> (#6, [+0])
492 100
                choices.append('one_dec')
493
        # if there are more than one or type
494 100
        if len(input_all_ors) > 1:
495
            # you can remove just one ORtype (the reference)
496 100
            choices.append('remove_ref')
497
            # check that there are decorators
498 100
            all_decs = set([d for b, decs in input_all_ors for d in decs])
499 100
            if len(all_decs) > 0:
500
                # you can remove 1 type of decorator, i.e. all 'Xn' decorators
501 100
                choices.append('all_one_dec')
502
        # ---------------------------------------------------------------------
503

504
        # Make a random choice from the available change options
505 100
        change = np.random.choice(choices)
506

507
        # Based on the option chosen call the appropriate method
508 100
        if change == 'all':
509
            # remove ALL OR decorators
510
            # i.e.[(  # 6, [X4, +0]), (#7, [X3]) ] --> []
511 100
            return list()
512

513 100
        if change == 'remove_ref':
514 100
            return self.remove_ref(input_all_ors, or_idx)
515

516 100
        if change == 'all_ref_decs':
517 100
            return self.remove_ref_sub_decs(input_all_ors, or_idx)
518

519 100
        if change == 'one_dec':
520 100
            return self.remove_one_sub_dec(input_all_ors, or_idx)
521

522 100
        if change == 'gen_base':
523 40
            return self.remove_all_bases(input_all_ors)
524

525
        # change = 'all_one_dec'
526 100
        return self.remove_all_dec_type(input_all_ors)
527

528 100
    def remove_or(self, input_all_ors, is_bond=False):
529
        """
530
        Changes the OR decorators by removing some of them
531

532
        Parameters
533
        -----------
534
        input_all_ors : list of tuples
535
            these are the OR decorators for an atom or bond
536
            from a ChemicalEnvironment
537
        is_bond : boolean
538
            are these decorators from from a bond (False for atom)
539

540
        Returns
541
        --------
542
        new_ors : list of two tuples
543
            new OR decorators
544
        """
545 100
        if len(input_all_ors) == 0:
546 100
            return input_all_ors, False
547

548
        # chose a set of or decorators to change
549
        # these come in the form [base, (decorators)]
550 100
        all_ors = copy.deepcopy(input_all_ors)
551 100
        or_idx = np.random.randint(len(all_ors))
552

553
        # atoms have more OR decorators and therefore more options for
554
        # how they can be removed
555 100
        if not is_bond:
556 100
            return self.remove_or_atom(all_ors, or_idx), True
557

558
        # For a bond, either one OR type is removed
559
        # or they are all removed.
560 100
        if np.random.rand() > 0.5:
561
            # remove just one or type
562 96
            return self.remove_ref(all_ors, or_idx), True
563
        # remove all ors
564 100
        return list(), True
565

566 100
    def remove_and(self, input_all_ands):
567
        """
568
        removes a decorator that is AND'd in the original SMIRKS
569

570
        Parameters
571
        -----------
572
        input_all_ands : list
573
            List of AND decorators
574

575
        Returns
576
        --------
577
        new_ands : list
578
            List of new AND decorators
579
        """
580
        # if there are no ands return with no changes
581 100
        if len(input_all_ands) == 0:
582 100
            return input_all_ands, False
583

584 100
        if np.random.rand() > 0.5:
585
            # otherwise randomly remove an AND decorator
586 100
            all_ands = copy.deepcopy(input_all_ands)
587 100
            all_ands.remove(np.random.choice(all_ands))
588
        else:
589 100
            all_ands = list()
590 100
        return all_ands, True
591

592 100
    def _get_item_and_remove_options(self, env):
593
        """
594
        This function chooses and Atom or Bond
595
        which will then have some of its decorators removed.
596
        It also determines what part of the component can be
597
        removed: OR decorators(0), AND decorators(1), the whole atom(2)
598

599
        Parameters
600
        ----------
601
        env: ChemicalEnvironment
602

603
        Returns
604
        -------
605
        item: an Atom or Bond from the Chemical Environment
606
        dec_opts: list
607
            0 = has OR decorators to remove
608
            1 = has AND decorators to remove
609
            2 = is an atom that could be removed
610
        """
611 100
        items = list()
612 100
        odds = list()
613 100
        for a_b in env.get_atoms() + env.get_bonds():
614 100
            count = len(a_b.and_types)
615 100
            for o in a_b.or_types:
616 100
                if o[0] != '*':
617 100
                    count += 1
618 100
                count += len(o[1])
619

620
            # a wild card, atom with no index should be considered ([*])
621
            # should also be on this list so it can be removed
622 100
            if isinstance(a_b, CE.Atom) and not isinstance(a_b, CE.Bond) and env.is_unindexed(a_b):
623 100
                count += 1
624

625 100
            items.append(a_b)
626 100
            odds.append(count)
627

628
        # normalize odds to weights
629 100
        odds = np.array(odds)
630

631
        # if the odds for changing all atoms and bonds is zero return None
632 100
        if odds.sum() == 0:
633 100
            return None, list()
634

635 100
        weights = odds / odds.sum()
636

637
        # choose an atom or bond with the probabilities:
638 100
        item = np.random.choice(items, p=weights)
639 100
        dec_opts = list()
640 100
        if len(item.or_types) > 0:
641 100
            dec_opts.append('remove_ors')
642 100
        if len(item.and_types) > 0:
643 100
            dec_opts.append('remove_ands')
644

645 100
        if not isinstance(item, CE.Bond): # then it is an atom
646 100
            if env.get_valence(item) == 1 and env.is_unindexed(item):
647 100
                dec_opts.append('remove_atom')
648

649 100
        return item, dec_opts
650

651 100
    def remove_decorator(self, smirks):
652
        """
653
        Chose an atom or bond in the input smirks pattern
654
        and then remove one decorator from it.
655

656
        Parameters
657
        -----------
658
        smirks : str
659
            A SMIRKS string which should be reduced
660

661
        Returns
662
        --------
663
        new_smirks : str
664
            A new SMIRKS pattern
665
        is_changed : bool
666
            True if some of the decorators were successfully removed
667
        """
668 100
        env = CE(smirks)
669 100
        sub, dec_opts = self._get_item_and_remove_options(env)
670

671
        # note this should be impossible
672 100
        if sub is None or len(dec_opts) == 0:
673 100
            return smirks, False
674

675 100
        change = np.random.choice(dec_opts)
676 100
        if change == 'remove_ors':
677 100
            new_or_types, changed = self.remove_or(sub.or_types, isinstance(sub, CE.Bond))
678 100
            if not changed:
679 0
                return smirks, False
680 100
            sub.or_types = new_or_types
681 100
        elif change == 'remove_ands':
682 100
            new_and_types, changed = self.remove_and(sub.and_types)
683 100
            if not changed:
684 0
                return smirks, False
685 100
            sub.and_types = new_and_types
686
        else: # change == 'remove_atom'
687 100
            remove = env.remove_atom(sub)
688 100
            if not remove:
689 0
                return smirks, False
690

691 100
        return env.as_smirks(), True
692

693 100
    def run(self, max_its=1000, verbose=None):
694
        """
695
        Reduce the SMIRKS decorators for a number of iterations
696

697
        Parameters
698
        ----------
699
        max_its : int
700
            The specified number of iterations
701
        verbose : boolean
702
            will set the verboseness while running
703
            (if None, the current verbose variable will be used)
704

705
        Returns
706
        ----------
707
        final_smirks : list of tuples
708
            list of final smirks patterns after reducing in the form
709
            [(label, smirks)]
710
        """
711 100
        temp_verbose = self.verbose
712 100
        if verbose is not None:
713 0
            self.verbose = verbose
714

715 100
        for it in range(max_its):
716 100
            if self.verbose: print("Iteration: ", it)
717

718 100
            proposed_smirks = copy.deepcopy(self.current_smirks)
719
            # chose a SMIRKS to change
720 100
            change_idx = np.random.randint(len(proposed_smirks))
721 100
            change_entry = proposed_smirks[change_idx]
722

723
            # generate new smirks
724 100
            new_smirks, changed = self.remove_decorator(change_entry[1])
725 100
            if self.verbose: print("Attempting to change SMIRKS #%i\n%s  -->  %s"\
726
                                   % (change_idx, change_entry[1], new_smirks))
727 100
            if not changed:
728 100
                if self.verbose:
729 0
                    print("Rejected!\nNo change made to SMIRKS\n%s" % change_entry[1])
730 0
                    print('-'*90)
731 0
                continue
732

733
            # update proposed list and check if it still matches the reference
734 100
            proposed_smirks[change_idx] = (change_entry[0], new_smirks)
735

736 100
            current_assignments = get_typed_molecules(proposed_smirks, self.molecules)
737 100
            _, proposed_checks = match_reference(current_assignments, self.cluster_dict)
738

739 100
            if proposed_checks:
740 100
                if self.verbose: print("Accepted! ")
741 100
                self.current_smirks = copy.deepcopy(proposed_smirks)
742
            else:
743 100
                if self.verbose: print("Rejected!\n proposed SMIRKS changed the way fragments are clustered")
744 100
            if self.verbose: print('-'*90)
745

746 100
        if self.verbose: print_smirks(self.current_smirks)
747 100
        self.verbose = temp_verbose
748

749 100
        return self.current_smirks
750

751

752

753

Read our documentation on viewing source code .

Loading