1
#!/usr/bin/env python
2

3
# =============================================================================================
4
# MODULE DOCSTRING
5
# =============================================================================================
6 8
"""
7
Class definitions to represent a molecular system and its chemical components
8

9
.. todo::
10

11
   * Create MoleculeImage, ParticleImage, AtomImage, VirtualSiteImage here. (Or ``MoleculeInstance``?)
12
   * Create ``MoleculeGraph`` to represent fozen set of atom elements and bonds that can used as a key for compression
13
   * Add hierarchical way of traversing Topology (chains, residues)
14
   * Make all classes hashable and serializable.
15
   * JSON/BSON representations of objects?
16
   * Use `attrs <http://www.attrs.org/>`_ for object setter boilerplate?
17

18
"""
19

20
# =============================================================================================
21
# GLOBAL IMPORTS
22
# =============================================================================================
23

24 8
import itertools
25 8
from collections import OrderedDict
26 8
from collections.abc import MutableMapping
27

28 8
import numpy as np
29 8
from simtk import unit
30 8
from simtk.openmm import app
31

32 8
from openforcefield.typing.chemistry import ChemicalEnvironment
33 8
from openforcefield.utils import MessageException
34 8
from openforcefield.utils.serialization import Serializable
35 8
from openforcefield.utils.toolkits import (
36
    ALLOWED_AROMATICITY_MODELS,
37
    ALLOWED_CHARGE_MODELS,
38
    ALLOWED_FRACTIONAL_BOND_ORDER_MODELS,
39
    DEFAULT_AROMATICITY_MODEL,
40
    GLOBAL_TOOLKIT_REGISTRY,
41
)
42

43
# =============================================================================================
44
# Exceptions
45
# =============================================================================================
46

47

48 8
class DuplicateUniqueMoleculeError(MessageException):
49
    """
50
    Exception for when the user provides indistinguishable unique molecules when trying to identify atoms from a PDB
51
    """
52

53 8
    pass
54

55

56 8
class NotBondedError(MessageException):
57
    """
58
    Exception for when a function requires a bond between two atoms, but none is present
59
    """
60

61 8
    pass
62

63

64 8
class InvalidBoxVectorsError(MessageException):
65
    """
66
    Exception for setting invalid box vectors
67
    """
68

69

70
# =============================================================================================
71
# PRIVATE SUBROUTINES
72
# =============================================================================================
73

74

75 8
class _TransformedDict(MutableMapping):
76
    """A dictionary that transform and sort keys.
77

78
    The function __keytransform__ can be inherited to apply an arbitrary
79
    key-altering function before accessing the keys.
80

81
    The function __sortfunc__ can be inherited to specify a particular
82
    order over which to iterate over the dictionary.
83

84
    """
85

86 8
    def __init__(self, *args, **kwargs):
87 8
        self.store = OrderedDict()
88 8
        self.update(dict(*args, **kwargs))  # use the free update to set keys
89

90 8
    def __getitem__(self, key):
91 8
        return self.store[self.__keytransform__(key)]
92

93 8
    def __setitem__(self, key, value):
94 8
        self.store[self.__keytransform__(key)] = value
95

96 8
    def __delitem__(self, key):
97 0
        del self.store[self.__keytransform__(key)]
98

99 8
    def __iter__(self):
100 8
        return iter(sorted(self.store, key=self.__sortfunc__))
101

102 8
    def __len__(self):
103 8
        return len(self.store)
104

105 8
    def __keytransform__(self, key):
106 0
        return key
107

108 8
    @staticmethod
109
    def __sortfunc__(key):
110 8
        return key
111

112

113
# TODO: Encapsulate this atom ordering logic directly into Atom/Bond/Angle/Torsion classes?
114 8
class ValenceDict(_TransformedDict):
115
    """Enforce uniqueness in atom indices."""
116

117 8
    def __keytransform__(self, key):
118
        """Reverse tuple if first element is larger than last element."""
119
        # Ensure key is a tuple.
120 8
        key = tuple(key)
121
        # Reverse the key if the first element is bigger than the last.
122 8
        if key[0] > key[-1]:
123 8
            key = tuple(reversed(key))
124 8
        return key
125

126

127 8
class SortedDict(_TransformedDict):
128
    """Enforce uniqueness of atom index tuples, without any restrictions on atom reordering."""
129

130 8
    def __keytransform__(self, key):
131
        """Sort tuple from lowest to highest."""
132
        # Ensure key is a tuple.
133 8
        key = tuple(sorted(key))
134
        # Reverse the key if the first element is bigger than the last.
135 8
        return key
136

137

138 8
class ImproperDict(_TransformedDict):
139
    """Symmetrize improper torsions."""
140

141 8
    def __keytransform__(self, key):
142
        """Reorder tuple in numerical order except for element[1] which is the central atom; it retains its position."""
143
        # Ensure key is a tuple
144 8
        key = tuple(key)
145
        # Retrieve connected atoms
146 8
        connectedatoms = [key[0], key[2], key[3]]
147
        # Sort connected atoms
148 8
        connectedatoms.sort()
149
        # Re-store connected atoms
150 8
        key = tuple([connectedatoms[0], key[1], connectedatoms[1], connectedatoms[2]])
151 8
        return key
152

153

154
# =============================================================================================
155
# TOPOLOGY OBJECTS
156
# =============================================================================================
157

158
# =============================================================================================
159
# TopologyAtom
160
# =============================================================================================
161

162

163 8
class TopologyAtom(Serializable):
164
    """
165
    A TopologyAtom is a lightweight data structure that represents a single openforcefield.topology.molecule.Atom in
166
    a Topology. A TopologyAtom consists of two references -- One to its fully detailed "atom", an
167
    openforcefield.topology.molecule.Atom, and another to its parent "topology_molecule", which occupies a spot in
168
    the parent Topology's TopologyMolecule list.
169

170
    As some systems can be very large, there is no always-existing representation of a TopologyAtom. They are created on
171
    demand as the user requests them.
172

173
    .. warning :: This API is experimental and subject to change.
174

175
    """
176

177 8
    def __init__(self, atom, topology_molecule):
178
        """
179
        Create a new TopologyAtom.
180

181
        Parameters
182
        ----------
183
        atom : An openforcefield.topology.molecule.Atom
184
            The reference atom
185
        topology_molecule : An openforcefield.topology.TopologyMolecule
186
            The TopologyMolecule that this TopologyAtom belongs to
187
        """
188
        # TODO: Type checks
189 8
        self._atom = atom
190 8
        self._topology_molecule = topology_molecule
191

192 8
    @property
193
    def atom(self):
194
        """
195
        Get the reference Atom for this TopologyAtom.
196

197
        Returns
198
        -------
199
        an openforcefield.topology.molecule.Atom
200
        """
201 8
        return self._atom
202

203 8
    @property
204
    def atomic_number(self):
205
        """
206
        Get the atomic number of this atom
207

208
        Returns
209
        -------
210
        int
211
        """
212 8
        return self._atom.atomic_number
213

214 8
    @property
215
    def topology_molecule(self):
216
        """
217
        Get the TopologyMolecule that this TopologyAtom belongs to.
218

219
        Returns
220
        -------
221
        openforcefield.topology.TopologyMolecule
222
        """
223 8
        return self._topology_molecule
224

225 8
    @property
226
    def molecule(self):
227
        """
228
        Get the reference Molecule that this TopologyAtom belongs to.
229

230
        Returns
231
        -------
232
        openforcefield.topology.molecule.Molecule
233
        """
234 0
        return self._topology_molecule.molecule
235

236 8
    @property
237
    def topology_atom_index(self):
238
        """
239
        Get the index of this atom in its parent Topology.
240

241
        Returns
242
        -------
243
        int
244
            The index of this atom in its parent topology.
245
        """
246 8
        mapped_molecule_atom_index = self._topology_molecule._ref_to_top_index[
247
            self._atom.molecule_atom_index
248
        ]
249 8
        return (
250
            self._topology_molecule.atom_start_topology_index
251
            + mapped_molecule_atom_index
252
        )
253

254 8
    @property
255
    def topology_particle_index(self):
256
        """
257
        Get the index of this particle in its parent Topology.
258

259
        Returns
260
        -------
261
        int
262
            The index of this atom in its parent topology.
263
        """
264
        # This assumes that the particles in a topology are listed with all atoms from all TopologyMolecules
265
        # first, followed by all VirtualSites from all TopologyMolecules second
266 8
        return self.topology_atom_index
267

268 8
    @property
269
    def topology_bonds(self):
270
        """
271
        Get the TopologyBonds connected to this TopologyAtom.
272

273
        Returns
274
        -------
275
        iterator of openforcefield.topology.TopologyBonds
276
        """
277

278 8
        for bond in self._atom.bonds:
279 8
            reference_mol_bond_index = bond.molecule_bond_index
280 8
            yield self._topology_molecule.bond(reference_mol_bond_index)
281

282 8
    def __eq__(self, other):
283 8
        return (self._atom == other._atom) and (
284
            self._topology_molecule == other._topology_molecule
285
        )
286

287 8
    def __repr__(self):
288 8
        return "TopologyAtom {} with reference atom {} and parent TopologyMolecule {}".format(
289
            self.topology_atom_index, self._atom, self._topology_molecule
290
        )
291

292 8
    def to_dict(self):
293
        """Convert to dictionary representation."""
294
        # Implement abstract method Serializable.to_dict()
295 0
        raise NotImplementedError()  # TODO
296

297 8
    @classmethod
298
    def from_dict(cls, d):
299
        """Static constructor from dictionary representation."""
300
        # Implement abstract method Serializable.to_dict()
301 0
        raise NotImplementedError()  # TODO
302

303
    # @property
304
    # def bonds(self):
305
    #    """
306
    #    Get the Bonds connected to this TopologyAtom.
307
    #
308
    #    Returns
309
    #    -------
310
    #    iterator of openforcefield.topology.molecule.Bonds
311
    #    """
312
    #    for bond in self._atom.bonds:
313
    #        yield bond
314

315
    # TODO: Add all atom properties here? Or just make people use TopologyAtom.atom for that?
316

317

318
# =============================================================================================
319
# TopologyBond
320
# =============================================================================================
321

322

323 8
class TopologyBond(Serializable):
324
    """
325
    A TopologyBond is a lightweight data structure that represents a single openforcefield.topology.molecule.Bond in
326
    a Topology. A TopologyBond consists of two references -- One to its fully detailed "bond", an
327
    openforcefield.topology.molecule.Bond, and another to its parent "topology_molecule", which occupies a spot in
328
    the parent Topology's TopologyMolecule list.
329

330
    As some systems can be very large, there is no always-existing representation of a TopologyBond. They are created on
331
    demand as the user requests them.
332

333
    .. warning :: This API is experimental and subject to change.
334

335
    """
336

337 8
    def __init__(self, bond, topology_molecule):
338
        """
339

340
        Parameters
341
        ----------
342
        bond : An openforcefield.topology.molecule.Bond
343
            The reference bond.
344
        topology_molecule : An openforcefield.topology.TopologyMolecule
345
            The TopologyMolecule that this TopologyBond belongs to.
346
        """
347
        # TODO: Type checks
348 8
        self._bond = bond
349 8
        self._topology_molecule = topology_molecule
350

351 8
    @property
352
    def bond(self):
353
        """
354
        Get the reference Bond for this TopologyBond.
355

356
        Returns
357
        -------
358
        an openforcefield.topology.molecule.Bond
359
        """
360 8
        return self._bond
361

362 8
    @property
363
    def topology_molecule(self):
364
        """
365
        Get the TopologyMolecule that this TopologyBond belongs to.
366

367
        Returns
368
        -------
369
        openforcefield.topology.TopologyMolecule
370
        """
371 0
        return self._topology_molecule
372

373 8
    @property
374
    def topology_bond_index(self):
375
        """
376
        Get the index of this bond in its parent Topology.
377

378
        Returns
379
        -------
380
        int
381
            The index of this bond in its parent topology.
382
        """
383 0
        return (
384
            self._topology_molecule.bond_start_topology_index
385
            + self._bond.molecule_bond_index
386
        )
387

388 8
    @property
389
    def molecule(self):
390
        """
391
        Get the reference Molecule that this TopologyBond belongs to.
392

393
        Returns
394
        -------
395
        openforcefield.topology.molecule.Molecule
396
        """
397 0
        return self._topology_molecule.molecule
398

399 8
    @property
400
    def bond_order(self):
401
        """
402
        Get the order of this TopologyBond.
403

404
        Returns
405
        -------
406
        int : bond order
407
        """
408 8
        return self._bond.bond_order
409

410 8
    @property
411
    def atoms(self):
412
        """
413
        Get the TopologyAtoms connected to this TopologyBond.
414

415
        Returns
416
        -------
417
        iterator of openforcefield.topology.TopologyAtom
418
        """
419 8
        for ref_atom in self._bond.atoms:
420 8
            yield TopologyAtom(ref_atom, self._topology_molecule)
421

422 8
    def to_dict(self):
423
        """Convert to dictionary representation."""
424
        # Implement abstract method Serializable.to_dict()
425 0
        raise NotImplementedError()  # TODO
426

427 8
    @classmethod
428
    def from_dict(cls, d):
429
        """Static constructor from dictionary representation."""
430
        # Implement abstract method Serializable.to_dict()
431 0
        raise NotImplementedError()  # TODO
432

433

434
# =============================================================================================
435
# TopologyVirtualSite
436
# =============================================================================================
437

438

439 8
class TopologyVirtualSite(Serializable):
440
    """
441
    A TopologyVirtualSite is a lightweight data structure that represents a single
442
    openforcefield.topology.molecule.VirtualSite in a Topology. A TopologyVirtualSite consists of two references --
443
    One to its fully detailed "VirtualSite", an openforcefield.topology.molecule.VirtualSite, and another to its parent
444
    "topology_molecule", which occupies a spot in the parent Topology's TopologyMolecule list.
445

446
    As some systems can be very large, there is no always-existing representation of a TopologyVirtualSite. They are
447
    created on demand as the user requests them.
448

449
    .. warning :: This API is experimental and subject to change.
450

451
    """
452

453 8
    def __init__(self, virtual_site, topology_molecule):
454
        """
455

456
        Parameters
457
        ----------
458
        virtual_site : An openforcefield.topology.molecule.VirtualSite
459
            The reference virtual site
460
        topology_molecule : An openforcefield.topology.TopologyMolecule
461
            The TopologyMolecule that this TopologyVirtualSite belongs to
462
        """
463
        # TODO: Type checks
464 8
        self._virtual_site = virtual_site
465 8
        self._topology_molecule = topology_molecule
466

467 8
    def atom(self, index):
468
        """
469
        Get the atom at a specific index in this TopologyVirtualSite
470

471
        Parameters
472
        ----------
473
        index : int
474
            The index of the atom in the reference VirtualSite to retrieve
475

476
        Returns
477
        -------
478
        TopologyAtom
479

480
        """
481 8
        return TopologyAtom(self._virtual_site.atoms[index], self.topology_molecule)
482

483 8
    @property
484
    def atoms(self):
485
        """
486
        Get the TopologyAtoms involved in this TopologyVirtualSite.
487

488
        Returns
489
        -------
490
        iterator of openforcefield.topology.TopologyAtom
491
        """
492 8
        for ref_atom in self._virtual_site.atoms:
493 8
            yield TopologyAtom(ref_atom, self._topology_molecule)
494

495 8
    @property
496
    def virtual_site(self):
497
        """
498
        Get the reference VirtualSite for this TopologyVirtualSite.
499

500
        Returns
501
        -------
502
        an openforcefield.topology.molecule.VirtualSite
503
        """
504 0
        return self._virtual_site
505

506 8
    @property
507
    def topology_molecule(self):
508
        """
509
        Get the TopologyMolecule that this TopologyVirtualSite belongs to.
510

511
        Returns
512
        -------
513
        openforcefield.topology.TopologyMolecule
514
        """
515 8
        return self._topology_molecule
516

517 8
    @property
518
    def topology_virtual_site_index(self):
519
        """
520
        Get the index of this virtual site in its parent Topology.
521

522
        Returns
523
        -------
524
        int
525
            The index of this virtual site in its parent topology.
526
        """
527 0
        return (
528
            self._topology_molecule.virtual_site_start_topology_index
529
            + self._virtual_site.molecule_virtual_site_index
530
        )
531

532 8
    @property
533
    def topology_particle_index(self):
534
        """
535
        Get the index of this particle in its parent Topology.
536

537
        Returns
538
        -------
539
        int
540
            The index of this particle in its parent topology.
541
        """
542
        # This assumes that the particles in a topology are listed with all atoms from all TopologyMolecules
543
        # first, followed by all VirtualSites from all TopologyMolecules second
544 0
        return self.topology.n_topology_atoms + self.topology_virtual_site_index
545

546
        # return self._topology_molecule.particle_start_topology_index + self._virtual_site.molecule_particle_index
547

548 8
    @property
549
    def molecule(self):
550
        """
551
        Get the reference Molecule that this TopologyVirtualSite belongs to.
552

553
        Returns
554
        -------
555
        openforcefield.topology.molecule.Molecule
556
        """
557 0
        return self._topology_molecule.molecule
558

559 8
    @property
560
    def type(self):
561
        """
562
        Get the type of this virtual site
563

564
        Returns
565
        -------
566
        str : The class name of this virtual site
567
        """
568 8
        return self._virtual_site.type
569

570 8
    def __eq__(self, other):
571 0
        return (self._virtual_site == other._virtual_site) and (
572
            self._topology_molecule == other._topology_molecule
573
        )
574

575 8
    def to_dict(self):
576
        """Convert to dictionary representation."""
577
        # Implement abstract method Serializable.to_dict()
578 0
        raise NotImplementedError()  # TODO
579

580 8
    @classmethod
581
    def from_dict(cls, d):
582
        """Static constructor from dictionary representation."""
583
        # Implement abstract method Serializable.to_dict()
584 0
        raise NotImplementedError()  # TODO
585

586

587
# =============================================================================================
588
# TopologyMolecule
589
# =============================================================================================
590

591

592 8
class TopologyMolecule:
593
    """
594
    TopologyMolecules are built to be an efficient way to store large numbers of copies of the same molecule for
595
    parameterization and system preparation.
596

597
    .. warning :: This API is experimental and subject to change.
598

599
    """
600

601 8
    def __init__(
602
        self, reference_molecule, topology, local_topology_to_reference_index=None
603
    ):
604
        """
605
        Create a new TopologyMolecule.
606

607
        Parameters
608
        ----------
609
        reference_molecule : an openforcefield.topology.molecule.Molecule
610
            The reference molecule, with details like formal charges, partial charges, bond orders, partial bond orders,
611
            and atomic symbols.
612
        topology : an openforcefield.topology.Topology
613
            The topology that this TopologyMolecule belongs to
614
        local_topology_to_reference_index : dict, optional, default=None
615
            Dictionary of {TopologyMolecule_atom_index : Molecule_atom_index} for the TopologyMolecule that will be built
616
        """
617
        # TODO: Type checks
618 8
        self._reference_molecule = reference_molecule
619 8
        self._topology = topology
620 8
        if local_topology_to_reference_index is None:
621 4
            local_topology_to_reference_index = dict(
622
                [(i, i) for i in range(reference_molecule.n_atoms)]
623
            )
624 8
        self._top_to_ref_index = local_topology_to_reference_index
625 8
        self._ref_to_top_index = dict(
626
            (k, j) for j, k in local_topology_to_reference_index.items()
627
        )
628

629
        # Initialize cached data
630 8
        self._atom_start_topology_index = None
631 8
        self._bond_start_topology_index = None
632 8
        self._virtual_site_start_topology_index = None
633

634 8
    def _invalidate_cached_data(self):
635
        """Unset all cached data, in response to an appropriate change"""
636 0
        self._atom_start_topology_index = None
637 0
        self._bond_start_topology_index = None
638 0
        self._virtual_site_start_topology_index = None
639

640 8
    @property
641
    def topology(self):
642
        """
643
        Get the topology that this TopologyMolecule belongs to
644

645
        Returns
646
        -------
647
        an openforcefield.topology.Topology
648
        """
649 8
        return self._topology
650

651 8
    @property
652
    def reference_molecule(self):
653
        """
654
        Get the reference molecule for this TopologyMolecule
655

656
        Returns
657
        -------
658
        an openforcefield.topology.molecule.Molecule
659
        """
660 8
        return self._reference_molecule
661

662 8
    @property
663
    def n_atoms(self):
664
        """
665
        The number of atoms in this topology.
666

667
        Returns
668
        -------
669
        int
670
        """
671 8
        return self._reference_molecule.n_atoms
672

673 8
    def atom(self, index):
674
        """
675
        Get the TopologyAtom with a given topology atom index in this TopologyMolecule.
676

677
        Parameters
678
        ----------
679
        index : int
680
            Index of the TopologyAtom within this TopologyMolecule to retrieve
681

682
        Returns
683
        -------
684
        an openforcefield.topology.TopologyAtom
685
        """
686 8
        ref_mol_atom_index = self._top_to_ref_index[index]
687 8
        return TopologyAtom(self._reference_molecule.atoms[ref_mol_atom_index], self)
688

689 8
    @property
690
    def atoms(self):
691
        """
692
        Return an iterator of all the TopologyAtoms in this TopologyMolecule
693

694
        Returns
695
        -------
696
        an iterator of openforcefield.topology.TopologyAtoms
697
        """
698 8
        iterate_order = list(self._top_to_ref_index.items())
699
        # Sort by topology index
700 8
        iterate_order.sort(key=lambda x: x[0])
701 8
        for top_index, ref_index in iterate_order:
702
            # self._reference_molecule.atoms:
703 8
            yield TopologyAtom(self._reference_molecule.atoms[ref_index], self)
704

705 8
    @property
706
    def atom_start_topology_index(self):
707
        """
708
        Get the topology index of the first atom in this TopologyMolecule
709

710
        """
711
        # If cached value is not available, generate it.
712 8
        if self._atom_start_topology_index is None:
713 8
            atom_start_topology_index = 0
714 8
            for topology_molecule in self._topology.topology_molecules:
715 8
                if self == topology_molecule:
716 8
                    self._atom_start_topology_index = atom_start_topology_index
717 8
                    break
718 8
                atom_start_topology_index += topology_molecule.n_atoms
719

720
        # Return cached value
721 8
        return self._atom_start_topology_index
722

723 8
    def bond(self, index):
724
        """
725
        Get the TopologyBond with a given reference molecule index in this TopologyMolecule
726

727
        Parameters
728
        ----------
729
        index : int
730
            Index of the TopologyBond within this TopologyMolecule to retrieve
731

732
        Returns
733
        -------
734
        an openforcefield.topology.TopologyBond
735
        """
736 8
        return TopologyBond(self.reference_molecule.bonds[index], self)
737

738 8
    @property
739
    def bonds(self):
740
        """
741
        Return an iterator of all the TopologyBonds in this TopologyMolecule
742

743
        Returns
744
        -------
745
        an iterator of openforcefield.topology.TopologyBonds
746
        """
747 8
        for bond in self._reference_molecule.bonds:
748 8
            yield TopologyBond(bond, self)
749

750 8
    @property
751
    def n_bonds(self):
752
        """Get the number of bonds in this TopologyMolecule
753

754
        Returns
755
        -------
756
        int : number of bonds
757
        """
758 8
        return self._reference_molecule.n_bonds
759

760 8
    @property
761
    def bond_start_topology_index(self):
762
        """Get the topology index of the first bond in this TopologyMolecule"""
763
        # If cached value is not available, generate it.
764 0
        if self._bond_start_topology_index is None:
765 0
            bond_start_topology_index = 0
766 0
            for topology_molecule in self._topology.topology_molecules:
767 0
                if self == topology_molecule:
768 0
                    self._bond_start_topology_index = bond_start_topology_index
769 0
                    break
770 0
                bond_start_topology_index += topology_molecule.n_bonds
771

772
        # Return cached value
773 0
        return self._bond_start_topology_index
774

775 8
    def particle(self, index):
776
        """
777
        Get the TopologyParticle with a given reference molecule index in this TopologyMolecule
778

779
        Parameters
780
        ----------
781
        index : int
782
            Index of the TopologyParticle within this TopologyMolecule to retrieve
783

784
        Returns
785
        -------
786
        an openforcefield.topology.TopologyParticle
787
        """
788 0
        return TopologyParticle(self.reference_molecule.particles[index], self)
789

790 8
    @property
791
    def particles(self):
792
        """
793
        Return an iterator of all the TopologyParticles in this TopologyMolecules
794

795
        Returns
796
        -------
797
        an iterator of openforcefield.topology.TopologyParticle
798
        """
799
        # Note: This assumes that particles are all Atoms (in topology map order), and then virtualsites
800 8
        yield_order = list(self._top_to_ref_index.items())
801
        # Sort by topology atom index
802 8
        yield_order.sort(key=lambda x: x[0])
803 8
        for top_atom_index, ref_mol_atom_index in yield_order:
804 8
            ref_atom = self._reference_molecule.atoms[ref_mol_atom_index]
805 8
            yield TopologyAtom(ref_atom, self)
806

807
        # TODO: Add ordering scheme here
808 8
        for vsite in self.reference_molecule.virtual_sites:
809 0
            yield TopologyVirtualSite(vsite, self)
810

811 8
    @property
812
    def n_particles(self):
813
        """Get the number of particles in this TopologyMolecule
814

815
        Returns
816
        -------
817
        int : The number of particles
818
        """
819 0
        return self._reference_molecule.n_particles
820

821 8
    def virtual_site(self, index):
822
        """
823
        Get the TopologyVirtualSite with a given reference molecule index in this TopologyMolecule
824

825
        Parameters
826
        ----------
827
        index : int
828
            Index of the TopologyVirtualSite within this TopologyMolecule to retrieve
829

830
        Returns
831
        -------
832
        an openforcefield.topology.TopologyVirtualSite
833
        """
834 8
        return TopologyVirtualSite(self.reference_molecule.virtual_sites[index], self)
835

836 8
    @property
837
    def virtual_sites(self):
838
        """
839
        Return an iterator of all the TopologyVirtualSites in this TopologyMolecules
840

841
        Returns
842
        -------
843
        an iterator of openforcefield.topology.TopologyVirtualSite
844
        """
845 8
        for vs in self._reference_molecule.virtual_sites:
846 8
            yield TopologyVirtualSite(vs, self)
847

848 8
    @property
849
    def n_virtual_sites(self):
850
        """Get the number of virtual sites in this TopologyMolecule
851

852
        Returns
853
        -------
854
        int
855
        """
856 8
        return self._reference_molecule.n_virtual_sites
857

858 8
    @property
859
    def angles(self):
860
        """Iterable of Tuple[TopologyAtom]: iterator over the angles in this Topology."""
861 8
        return self._convert_to_topology_atom_tuples(self._reference_molecule.angles)
862

863 8
    @property
864
    def n_angles(self):
865
        """int: number of angles in this Topology."""
866 8
        return self._reference_molecule.n_angles
867

868 8
    @property
869
    def propers(self):
870
        """Iterable of Tuple[TopologyAtom]: iterator over the proper torsions in this Topology."""
871 8
        return self._convert_to_topology_atom_tuples(self._reference_molecule.propers)
872

873 8
    @property
874
    def n_propers(self):
875
        """int: number of proper torsions in this Topology."""
876 8
        return self._reference_molecule.n_propers
877

878 8
    @property
879
    def impropers(self):
880
        """Iterable of Tuple[TopologyAtom]: iterator over the improper torsions in this Topology."""
881 8
        return self._convert_to_topology_atom_tuples(self._reference_molecule.impropers)
882

883 8
    @property
884
    def n_impropers(self):
885
        """int: number of proper torsions in this Topology."""
886 8
        return self._reference_molecule.n_impropers
887

888 8
    @property
889
    def virtual_site_start_topology_index(self):
890
        """Get the topology index of the first virtual site in this TopologyMolecule"""
891
        # If the cached value is not available, generate it
892 0
        if self._virtual_site_start_topology_index is None:
893 0
            virtual_site_start_topology_index = 0
894 0
            for topology_molecule in self._topology.topology_molecules:
895 0
                if self == topology_molecule:
896 0
                    self._virtual_site_start_topology_index = (
897
                        virtual_site_start_topology_index
898
                    )
899 0
                virtual_site_start_topology_index += topology_molecule.n_virtual_sites
900
        # Return cached value
901 0
        return self._virtual_site_start_topology_index
902

903 8
    def to_dict(self):
904
        """Convert to dictionary representation."""
905
        # Implement abstract method Serializable.to_dict()
906 0
        raise NotImplementedError()  # TODO
907

908 8
    @classmethod
909
    def from_dict(cls, d):
910
        """Static constructor from dictionary representation."""
911
        # Implement abstract method Serializable.to_dict()
912 0
        raise NotImplementedError()  # TODO
913

914 8
    def _convert_to_topology_atom_tuples(self, molecule_atom_tuples):
915 8
        for atom_tuple in molecule_atom_tuples:
916 8
            mol_atom_indices = (a.molecule_atom_index for a in atom_tuple)
917 8
            top_mol_atom_indices = (
918
                self._ref_to_top_index[mol_idx] for mol_idx in mol_atom_indices
919
            )
920 8
            yield tuple(self.atom(i) for i in top_mol_atom_indices)
921

922

923
# TODO: pick back up figuring out how we want TopologyMolecules to know their starting TopologyParticle indices
924

925
# =============================================================================================
926
# Topology
927
# =============================================================================================
928

929
# TODO: Revise documentation and remove chains
930

931

932 8
class Topology(Serializable):
933
    """
934
    A Topology is a chemical representation of a system containing one or more molecules appearing in a specified order.
935

936
    As of the 0.7.0 release, the Topology particle indexing system puts all atoms before all virtualsites.
937
    This ensures that atoms keep the same Topology particle index value, even if the Topology
938
    is modified during system creation by the addition of virtual sites.
939

940
    .. warning :: This API is experimental and subject to change.
941

942
    Examples
943
    --------
944

945
    Import some utilities
946

947
    >>> from simtk.openmm import app
948
    >>> from openforcefield.tests.utils import get_data_file_path, get_packmol_pdb_file_path
949
    >>> pdb_filepath = get_packmol_pdb_file_path('cyclohexane_ethanol_0.4_0.6')
950
    >>> monomer_names = ('cyclohexane', 'ethanol')
951

952
    Create a Topology object from a PDB file and sdf files defining the molecular contents
953

954
    >>> from openforcefield.topology import Molecule, Topology
955
    >>> pdbfile = app.PDBFile(pdb_filepath)
956
    >>> sdf_filepaths = [get_data_file_path(f'systems/monomers/{name}.sdf') for name in monomer_names]
957
    >>> unique_molecules = [Molecule.from_file(sdf_filepath) for sdf_filepath in sdf_filepaths]
958
    >>> topology = Topology.from_openmm(pdbfile.topology, unique_molecules=unique_molecules)
959

960
    Create a Topology object from a PDB file and IUPAC names of the molecular contents
961

962
    >>> pdbfile = app.PDBFile(pdb_filepath)
963
    >>> unique_molecules = [Molecule.from_iupac(name) for name in monomer_names]
964
    >>> topology = Topology.from_openmm(pdbfile.topology, unique_molecules=unique_molecules)
965

966
    Create an empty Topology object and add a few copies of a single benzene molecule
967

968
    >>> topology = Topology()
969
    >>> molecule = Molecule.from_iupac('benzene')
970
    >>> molecule_topology_indices = [topology.add_molecule(molecule) for index in range(10)]
971

972
    """
973

974 8
    def __init__(self, other=None):
975
        """
976
        Create a new Topology.
977

978
        Parameters
979
        ----------
980
        other : optional, default=None
981
            If specified, attempt to construct a copy of the Topology from the specified object.
982
            This might be a Topology object, or a file that can be used to construct a Topology object
983
            or serialized Topology object.
984

985
        """
986 8
        from openforcefield.topology.molecule import FrozenMolecule
987

988
        # Assign cheminformatics models
989 8
        model = DEFAULT_AROMATICITY_MODEL
990 8
        self._aromaticity_model = model
991
        # self._fractional_bond_order_model = DEFAULT_FRACTIONAL_BOND_ORDER_MODEL
992
        # self._charge_model = DEFAULT_CHARGE_MODEL
993

994
        # Initialize storage
995 8
        self._initialize()
996

997
        # TODO: Try to construct Topology copy from `other` if specified
998 8
        if isinstance(other, Topology):
999 0
            self.copy_initializer(other)
1000 8
        elif isinstance(other, FrozenMolecule):
1001 0
            self.from_molecules([other])
1002 8
        elif isinstance(other, OrderedDict):
1003 0
            self._initialize_from_dict(other)
1004

1005 8
    def _initialize(self):
1006
        """
1007
        Initializes a blank Topology.
1008
        """
1009 8
        self._aromaticity_model = DEFAULT_AROMATICITY_MODEL
1010 8
        self._constrained_atom_pairs = dict()
1011 8
        self._box_vectors = None
1012
        # self._is_periodic = False
1013
        # self._reference_molecule_dicts = set()
1014
        # TODO: Look into weakref and what it does. Having multiple topologies might cause a memory leak.
1015 8
        self._reference_molecule_to_topology_molecules = OrderedDict()
1016 8
        self._topology_molecules = list()
1017

1018 8
    @property
1019
    def reference_molecules(self):
1020
        """
1021
        Get an iterator of reference molecules in this Topology.
1022

1023
        Returns
1024
        -------
1025
        iterable of openforcefield.topology.Molecule
1026
        """
1027 8
        for ref_mol in self._reference_molecule_to_topology_molecules.keys():
1028 8
            yield ref_mol
1029

1030 8
    @classmethod
1031
    def from_molecules(cls, molecules):
1032
        """
1033
        Create a new Topology object containing one copy of each of the specified molecule(s).
1034

1035
        Parameters
1036
        ----------
1037
        molecules : Molecule or iterable of Molecules
1038
            One or more molecules to be added to the Topology
1039

1040
        Returns
1041
        -------
1042
        topology : Topology
1043
            The Topology created from the specified molecule(s)
1044
        """
1045
        # Ensure that we are working with an iterable
1046 8
        try:
1047 8
            iter(molecules)
1048 8
        except TypeError as te:
1049
            # Make iterable object
1050 8
            molecules = [molecules]
1051

1052
        # Create Topology and populate it with specified molecules
1053 8
        topology = cls()
1054 8
        for molecule in molecules:
1055 8
            topology.add_molecule(molecule)
1056

1057 8
        return topology
1058

1059 8
    def assert_bonded(self, atom1, atom2):
1060
        """
1061
        Raise an exception if the specified atoms are not bonded in the topology.
1062

1063
        Parameters
1064
        ----------
1065
        atom1, atom2 : openforcefield.topology.Atom or int
1066
            The atoms or atom topology indices to check to ensure they are bonded
1067

1068
        """
1069 8
        if (type(atom1) is int) and (type(atom2) is int):
1070 8
            atom1 = self.atom(atom1)
1071 8
            atom2 = self.atom(atom2)
1072

1073
        # else:
1074 8
        if not (self.is_bonded(atom1, atom2)):
1075
            # TODO: Raise more specific exception.
1076 8
            raise Exception(
1077
                "Atoms {} and {} are not bonded in topology".format(atom1, atom2)
1078
            )
1079

1080 8
    @property
1081
    def aromaticity_model(self):
1082
        """
1083
        Get the aromaticity model applied to all molecules in the topology.
1084

1085
        Returns
1086
        -------
1087
        aromaticity_model : str
1088
            Aromaticity model in use.
1089
        """
1090 0
        return self._aromaticity_model
1091

1092 8
    @aromaticity_model.setter
1093
    def aromaticity_model(self, aromaticity_model):
1094
        """
1095
        Set the aromaticity model applied to all molecules in the topology.
1096

1097
        Parameters
1098
        ----------
1099
        aromaticity_model : str
1100
            Aromaticity model to use. One of: ['OEAroModel_MDL']
1101
        """
1102

1103 0
        if not aromaticity_model in ALLOWED_AROMATICITY_MODELS:
1104 0
            msg = "Aromaticity model must be one of {}; specified '{}'".format(
1105
                ALLOWED_AROMATICITY_MODELS, aromaticity_model
1106
            )
1107 0
            raise ValueError(msg)
1108 0
        self._aromaticity_model = aromaticity_model
1109

1110 8
    @property
1111
    def box_vectors(self):
1112
        """Return the box vectors of the topology, if specified
1113
        Returns
1114
        -------
1115
        box_vectors : simtk.unit.Quantity wrapped numpy array of shape (3, 3)
1116
            The unit-wrapped box vectors of this topology
1117
        """
1118 8
        return self._box_vectors
1119

1120 8
    @box_vectors.setter
1121
    def box_vectors(self, box_vectors):
1122
        """
1123
        Sets the box vectors to be used for this topology.
1124

1125
        Parameters
1126
        ----------
1127
        box_vectors : simtk.unit.Quantity wrapped numpy array of shape (3, 3)
1128
            The unit-wrapped box vectors
1129

1130
        """
1131 8
        if box_vectors is None:
1132 8
            self._box_vectors = None
1133 8
            return
1134 8
        if not hasattr(box_vectors, "unit"):
1135 8
            raise InvalidBoxVectorsError("Given unitless box vectors")
1136 8
        if not (unit.angstrom.is_compatible(box_vectors.unit)):
1137 8
            raise InvalidBoxVectorsError(
1138
                "Attempting to set box vectors in units that are incompatible with simtk.unit.Angstrom"
1139
            )
1140

1141 8
        if hasattr(box_vectors, "shape"):
1142 8
            if box_vectors.shape == (3,):
1143 8
                box_vectors *= np.eye(3)
1144 8
            if box_vectors.shape != (3, 3):
1145 8
                raise InvalidBoxVectorsError(
1146
                    f"Box vectors must be shape (3, 3). Found shape {box_vectors.shape}"
1147
                )
1148
        else:
1149 8
            assert len(box_vectors) == 3
1150 8
        self._box_vectors = box_vectors
1151

1152 8
    @property
1153
    def charge_model(self):
1154
        """
1155
        Get the partial charge model applied to all molecules in the topology.
1156

1157
        Returns
1158
        -------
1159
        charge_model : str
1160
            Charge model used for all molecules in the Topology.
1161

1162
        """
1163 0
        return self._charge_model
1164

1165 8
    @charge_model.setter
1166
    def charge_model(self, charge_model):
1167
        """
1168
        Set the partial charge model used for all molecules in the topology.
1169

1170
        Parameters
1171
        ----------
1172
        charge_model : str
1173
            Charge model to use for all molecules in the Topology.
1174
            Allowed values: ['AM1-BCC']
1175
            * ``AM1-BCC``: Canonical AM1-BCC scheme
1176
        """
1177 0
        if not charge_model in ALLOWED_CHARGE_MODELS:
1178 0
            raise ValueError(
1179
                "Charge model must be one of {}; specified '{}'".format(
1180
                    ALLOWED_CHARGE_MODELS, charge_model
1181
                )
1182
            )
1183 0
        self._charge_model = charge_model
1184

1185 8
    @property
1186
    def constrained_atom_pairs(self):
1187
        """Returns the constrained atom pairs of the Topology
1188

1189
        Returns
1190
        -------
1191
        constrained_atom_pairs : dict
1192
             dictionary of the form d[(atom1_topology_index, atom2_topology_index)] = distance (float)
1193
        """
1194 8
        return self._constrained_atom_pairs
1195

1196 8
    @property
1197
    def fractional_bond_order_model(self):
1198
        """
1199
        Get the fractional bond order model for the Topology.
1200

1201
        Returns
1202
        -------
1203
        fractional_bond_order_model : str
1204
            Fractional bond order model in use.
1205

1206
        """
1207 0
        return self._fractional_bond_order_model
1208

1209 8
    @fractional_bond_order_model.setter
1210
    def fractional_bond_order_model(self, fractional_bond_order_model):
1211
        """
1212
        Set the fractional bond order model applied to all molecules in the topology.
1213

1214
        Parameters
1215
        ----------
1216
        fractional_bond_order_model : str
1217
            Fractional bond order model to use. One of: ['Wiberg']
1218

1219
        """
1220 0
        if not fractional_bond_order_model in ALLOWED_FRACTIONAL_BOND_ORDER_MODELS:
1221 0
            raise ValueError(
1222
                "Fractional bond order model must be one of {}; specified '{}'".format(
1223
                    ALLOWED_FRACTIONAL_BOND_ORDER_MODELS, fractional_bond_order_model
1224
                )
1225
            )
1226 0
        self._fractional_bond_order_model = fractional_bond_order_model
1227

1228 8
    @property
1229
    def n_reference_molecules(self):
1230
        """
1231
        Returns the number of reference (unique) molecules in in this Topology.
1232

1233
        Returns
1234
        -------
1235
        n_reference_molecules : int
1236
        """
1237 8
        count = 0
1238 8
        for i in self.reference_molecules:
1239 8
            count += 1
1240 8
        return count
1241

1242 8
    @property
1243
    def n_topology_molecules(self):
1244
        """
1245
        Returns the number of topology molecules in in this Topology.
1246

1247
        Returns
1248
        -------
1249
        n_topology_molecules : int
1250
        """
1251 8
        return len(self._topology_molecules)
1252

1253 8
    @property
1254
    def topology_molecules(self):
1255
        """Returns an iterator over all the TopologyMolecules in this Topology
1256

1257
        Returns
1258
        -------
1259
        topology_molecules : Iterable of TopologyMolecule
1260
        """
1261 8
        return self._topology_molecules
1262

1263 8
    @property
1264
    def n_topology_atoms(self):
1265
        """
1266
        Returns the number of topology atoms in in this Topology.
1267

1268
        Returns
1269
        -------
1270
        n_topology_atoms : int
1271
        """
1272 8
        n_atoms = 0
1273 8
        for reference_molecule in self.reference_molecules:
1274 8
            n_atoms_per_topology_molecule = reference_molecule.n_atoms
1275 8
            n_instances_of_topology_molecule = len(
1276
                self._reference_molecule_to_topology_molecules[reference_molecule]
1277
            )
1278 8
            n_atoms += n_atoms_per_topology_molecule * n_instances_of_topology_molecule
1279 8
        return n_atoms
1280

1281 8
    @property
1282
    def topology_atoms(self):
1283
        """Returns an iterator over the atoms in this Topology. These will be in ascending order of topology index (Note
1284
        that this is not necessarily the same as the reference molecule index)
1285

1286
        Returns
1287
        -------
1288
        topology_atoms : Iterable of TopologyAtom
1289
        """
1290 8
        for topology_molecule in self._topology_molecules:
1291 8
            for atom in topology_molecule.atoms:
1292 8
                yield atom
1293

1294 8
    @property
1295
    def n_topology_bonds(self):
1296
        """
1297
        Returns the number of TopologyBonds in in this Topology.
1298

1299
        Returns
1300
        -------
1301
        n_bonds : int
1302
        """
1303 8
        n_bonds = 0
1304 8
        for reference_molecule in self.reference_molecules:
1305 8
            n_bonds_per_topology_molecule = reference_molecule.n_bonds
1306 8
            n_instances_of_topology_molecule = len(
1307
                self._reference_molecule_to_topology_molecules[reference_molecule]
1308
            )
1309 8
            n_bonds += n_bonds_per_topology_molecule * n_instances_of_topology_molecule
1310 8
        return n_bonds
1311

1312 8
    @property
1313
    def topology_bonds(self):
1314
        """Returns an iterator over the bonds in this Topology
1315

1316
        Returns
1317
        -------
1318
        topology_bonds : Iterable of TopologyBond
1319
        """
1320 8
        for topology_molecule in self._topology_molecules:
1321 8
            for bond in topology_molecule.bonds:
1322 8
                yield bond
1323

1324 8
    @property
1325
    def n_topology_particles(self):
1326
        """
1327
        Returns the number of topology particles (TopologyAtoms and TopologyVirtualSites) in in this Topology.
1328

1329
        Returns
1330
        -------
1331
        n_topology_particles : int
1332
        """
1333 8
        n_particles = 0
1334 8
        for reference_molecule in self.reference_molecules:
1335 8
            n_particles_per_topology_molecule = reference_molecule.n_particles
1336 8
            n_instances_of_topology_molecule = len(
1337
                self._reference_molecule_to_topology_molecules[reference_molecule]
1338
            )
1339 8
            n_particles += (
1340
                n_particles_per_topology_molecule * n_instances_of_topology_molecule
1341
            )
1342 8
        return n_particles
1343

1344 8
    @property
1345
    def topology_particles(self):
1346
        """Returns an iterator over the particles (TopologyAtoms and TopologyVirtualSites) in this Topology. The
1347
        TopologyAtoms will be in order of ascending Topology index (Note that this may differ from the
1348
        order of atoms in the reference molecule index).
1349

1350
        Returns
1351
        --------
1352
        topology_particles : Iterable of TopologyAtom and TopologyVirtualSite
1353
        """
1354 8
        for topology_molecule in self._topology_molecules:
1355 8
            for atom in topology_molecule.atoms:
1356 8
                yield atom
1357 8
        for topology_molecule in self._topology_molecules:
1358 8
            for vs in topology_molecule.virtual_sites:
1359 8
                yield vs
1360

1361 8
    @property
1362
    def n_topology_virtual_sites(self):
1363
        """
1364
        Returns the number of TopologyVirtualSites in in this Topology.
1365

1366
        Returns
1367
        -------
1368
        n_virtual_sites : iterable of TopologyVirtualSites
1369
        """
1370 8
        n_virtual_sites = 0
1371 8
        for reference_molecule in self.reference_molecules:
1372 8
            n_virtual_sites_per_topology_molecule = reference_molecule.n_virtual_sites
1373 8
            n_instances_of_topology_molecule = len(
1374
                self._reference_molecule_to_topology_molecules[reference_molecule]
1375
            )
1376 8
            n_virtual_sites += (
1377
                n_virtual_sites_per_topology_molecule * n_instances_of_topology_molecule
1378
            )
1379 8
        return n_virtual_sites
1380

1381 8
    @property
1382
    def topology_virtual_sites(self):
1383
        """Get an iterator over the virtual sites in this Topology
1384

1385
        Returns
1386
        -------
1387
        topology_virtual_sites : Iterable of TopologyVirtualSite
1388
        """
1389 8
        for topology_molecule in self._topology_molecules:
1390 8
            for virtual_site in topology_molecule.virtual_sites:
1391 8
                yield virtual_site
1392

1393 8
    @property
1394
    def n_angles(self):
1395
        """int: number of angles in this Topology."""
1396 8
        return sum(mol.n_angles for mol in self._topology_molecules)
1397

1398 8
    @property
1399
    def angles(self):
1400
        """Iterable of Tuple[TopologyAtom]: iterator over the angles in this Topology."""
1401 8
        for topology_molecule in self._topology_molecules:
1402 8
            for angle in topology_molecule.angles:
1403 8
                yield angle
1404

1405 8
    @property
1406
    def n_propers(self):
1407
        """int: number of proper torsions in this Topology."""
1408 8
        return sum(mol.n_propers for mol in self._topology_molecules)
1409

1410 8
    @property
1411
    def propers(self):
1412
        """Iterable of Tuple[TopologyAtom]: iterator over the proper torsions in this Topology."""
1413 8
        for topology_molecule in self._topology_molecules:
1414 8
            for proper in topology_molecule.propers:
1415 8
                yield proper
1416

1417 8
    @property
1418
    def n_impropers(self):
1419
        """int: number of improper torsions in this Topology."""
1420 8
        return sum(mol.n_impropers for mol in self._topology_molecules)
1421

1422 8
    @property
1423
    def impropers(self):
1424
        """Iterable of Tuple[TopologyAtom]: iterator over the improper torsions in this Topology."""
1425 8
        for topology_molecule in self._topology_molecules:
1426 8
            for improper in topology_molecule.impropers:
1427 8
                yield improper
1428

1429 8
    class _ChemicalEnvironmentMatch:
1430
        """Represents the match of a given chemical environment query, storing
1431
        both the matched topology atom indices and the indices of the corresponding
1432
        reference molecule atoms, as well as a reference to the reference molecule.
1433
        """
1434

1435 8
        @property
1436
        def reference_atom_indices(self):
1437
            """tuple of int: The indices of the corresponding reference molecule atoms."""
1438 8
            return self._reference_atom_indices
1439

1440 8
        @property
1441
        def reference_molecule(self):
1442
            """topology.molecule.Molecule: The corresponding reference molecule."""
1443 8
            return self._reference_molecule
1444

1445 8
        @property
1446
        def topology_atom_indices(self):
1447
            """tuple of int: The matched topology atom indices."""
1448 8
            return self._topology_atom_indices
1449

1450 8
        def __init__(
1451
            self, reference_atom_indices, reference_molecule, topology_atom_indices
1452
        ):
1453
            """Constructs a new _ChemicalEnvironmentMatch object
1454

1455
            Parameters
1456
            ----------
1457
            reference_atom_indices: tuple of int
1458
                The indices of the corresponding reference molecule atoms.
1459
            reference_molecule: topology.molecule.Molecule
1460
                The corresponding reference molecule.
1461
            topology_atom_indices: tuple of int
1462
                The matched topology atom indices.
1463
            """
1464

1465 8
            assert len(reference_atom_indices) == len(topology_atom_indices)
1466

1467 8
            self._reference_atom_indices = reference_atom_indices
1468 8
            self._reference_molecule = reference_molecule
1469

1470 8
            self._topology_atom_indices = topology_atom_indices
1471

1472 8
    def chemical_environment_matches(
1473
        self, query, aromaticity_model="MDL", toolkit_registry=GLOBAL_TOOLKIT_REGISTRY
1474
    ):
1475
        """
1476
        Retrieve all matches for a given chemical environment query.
1477

1478
        TODO:
1479
        * Do we want to generalize this to other kinds of queries too, like mdtraj DSL, pymol selections, atom index slices, etc?
1480
          We could just call it topology.matches(query)
1481

1482
        Parameters
1483
        ----------
1484
        query : str or ChemicalEnvironment
1485
            SMARTS string (with one or more tagged atoms) or ``ChemicalEnvironment`` query
1486
            Query will internally be resolved to SMARTS using ``query.as_smarts()`` if it has an ``.as_smarts`` method.
1487
        aromaticity_model : str
1488
            Override the default aromaticity model for this topology and use the specified aromaticity model instead.
1489
            Allowed values: ['MDL']
1490

1491
        Returns
1492
        -------
1493
        matches : list of Topology._ChemicalEnvironmentMatch
1494
            A list of tuples, containing the topology indices of the matching atoms.
1495

1496
        """
1497

1498
        # Render the query to a SMARTS string
1499 8
        if type(query) is str:
1500 8
            smarts = query
1501 0
        elif type(query) is ChemicalEnvironment:
1502 0
            smarts = query.as_smarts()
1503
        else:
1504 0
            raise ValueError(
1505
                f"Don't know how to convert query '{query}' into SMARTS string"
1506
            )
1507

1508
        # Perform matching on each unique molecule, unrolling the matches to all matching copies
1509
        # of that molecule in the Topology object.
1510 8
        matches = list()
1511

1512 8
        for ref_mol in self.reference_molecules:
1513

1514
            # Find all atomsets that match this definition in the reference molecule
1515
            # This will automatically attempt to match chemically identical atoms in
1516
            # a canonical order within the Topology
1517 8
            ref_mol_matches = ref_mol.chemical_environment_matches(
1518
                smarts, toolkit_registry=toolkit_registry
1519
            )
1520

1521 8
            if len(ref_mol_matches) == 0:
1522 8
                continue
1523

1524
            # Unroll corresponding atom indices over all instances of this molecule.
1525 8
            for topology_molecule in self._reference_molecule_to_topology_molecules[
1526
                ref_mol
1527
            ]:
1528

1529
                # topology_molecule_start_index = topology_molecule.atom_start_topology_index
1530

1531
                # Loop over matches
1532 8
                for reference_match in ref_mol_matches:
1533

1534
                    # Collect indices of matching TopologyAtoms.
1535 8
                    topology_atom_indices = []
1536 8
                    for reference_molecule_atom_index in reference_match:
1537 8
                        reference_atom = topology_molecule.reference_molecule.atoms[
1538
                            reference_molecule_atom_index
1539
                        ]
1540 8
                        topology_atom = TopologyAtom(reference_atom, topology_molecule)
1541 8
                        topology_atom_indices.append(
1542
                            topology_atom.topology_particle_index
1543
                        )
1544

1545 8
                    environment_match = Topology._ChemicalEnvironmentMatch(
1546
                        tuple(reference_match), ref_mol, tuple(topology_atom_indices)
1547
                    )
1548

1549 8
                    matches.append(environment_match)
1550 8
        return matches
1551

1552 8
    def to_dict(self):
1553
        """Convert to dictionary representation."""
1554
        # Implement abstract method Serializable.to_dict()
1555 0
        raise NotImplementedError()  # TODO
1556

1557 8
    @classmethod
1558
    def from_dict(cls, d):
1559
        """Static constructor from dictionary representation."""
1560
        # Implement abstract method Serializable.to_dict()
1561 0
        raise NotImplementedError()  # TODO
1562

1563
    # TODO: Merge this into Molecule.from_networkx if/when we implement that.
1564
    # TODO: can we now remove this as we have the ability to do this in the Molecule class?
1565 8
    @staticmethod
1566
    def _networkx_to_hill_formula(mol_graph):
1567
        """
1568
        Convert a networkX representation of a molecule to a molecule formula. Used in printing out
1569
        informative error messages when a molecule from an openmm topology can't be matched.
1570

1571
        Parameters
1572
        ----------
1573
        mol_graph : a networkX graph
1574
            The graph representation of a molecule
1575

1576
        Returns
1577
        -------
1578
        formula : str
1579
            The molecular formula of the graph molecule
1580
        """
1581 0
        from simtk.openmm.app import Element
1582

1583
        # Make a flat list of all atomic numbers in the molecule
1584 0
        atom_nums = []
1585 0
        for idx in mol_graph.nodes:
1586 0
            atom_nums.append(mol_graph.nodes[idx]["atomic_number"])
1587

1588
        # Count the number of instances of each atomic number
1589 0
        at_num_to_counts = dict([(unq, atom_nums.count(unq)) for unq in atom_nums])
1590

1591 0
        symbol_to_counts = {}
1592
        # Check for C and H first, to make a correct hill formula (remember dicts in python 3.6+ are ordered)
1593 0
        if 6 in at_num_to_counts:
1594 0
            symbol_to_counts["C"] = at_num_to_counts[6]
1595 0
            del at_num_to_counts[6]
1596

1597 0
        if 1 in at_num_to_counts:
1598 0
            symbol_to_counts["H"] = at_num_to_counts[1]
1599 0
            del at_num_to_counts[1]
1600

1601
        # Now count instances of all elements other than C and H, in order of ascending atomic number
1602 0
        sorted_atom_nums = sorted(at_num_to_counts.keys())
1603 0
        for atom_num in sorted_atom_nums:
1604 0
            symbol_to_counts[
1605
                Element.getByAtomicNumber(atom_num).symbol
1606
            ] = at_num_to_counts[atom_num]
1607

1608
        # Finally format the formula as string
1609 0
        formula = ""
1610 0
        for ele, count in symbol_to_counts.items():
1611 0
            formula += f"{ele}{count}"
1612 0
        return formula
1613

1614 8
    @classmethod
1615 8
    def from_openmm(cls, openmm_topology, unique_molecules=None):
1616
        """
1617
        Construct an openforcefield Topology object from an OpenMM Topology object.
1618

1619
        Parameters
1620
        ----------
1621
        openmm_topology : simtk.openmm.app.Topology
1622
            An OpenMM Topology object
1623
        unique_molecules : iterable of objects that can be used to construct unique Molecule objects
1624
            All unique molecules must be provided, in any order, though multiple copies of each molecule are allowed.
1625
            The atomic elements and bond connectivity will be used to match the reference molecules
1626
            to molecule graphs appearing in the OpenMM ``Topology``. If bond orders are present in the
1627
            OpenMM ``Topology``, these will be used in matching as well.
1628
            If all bonds have bond orders assigned in ``mdtraj_topology``, these bond orders will be used to attempt to construct
1629
            the list of unique Molecules if the ``unique_molecules`` argument is omitted.
1630

1631
        Returns
1632
        -------
1633
        topology : openforcefield.topology.Topology
1634
            An openforcefield Topology object
1635
        """
1636 8
        import networkx as nx
1637

1638 8
        from openforcefield.topology.molecule import Molecule
1639

1640
        # Check to see if the openMM system has defined bond orders, by looping over all Bonds in the Topology.
1641 8
        omm_has_bond_orders = True
1642 8
        for omm_bond in openmm_topology.bonds():
1643 8
            if omm_bond.order is None:
1644 8
                omm_has_bond_orders = False
1645

1646
        # Convert all unique mols to graphs
1647 8
        topology = cls()
1648 8
        graph_to_unq_mol = {}
1649 8
        for unq_mol in unique_molecules:
1650 8
            unq_mol_graph = unq_mol.to_networkx()
1651 8
            for existing_graph in graph_to_unq_mol.keys():
1652 8
                if Molecule.are_isomorphic(
1653
                    existing_graph,
1654
                    unq_mol_graph,
1655
                    return_atom_map=False,
1656
                    aromatic_matching=False,
1657
                    formal_charge_matching=False,
1658
                    bond_order_matching=omm_has_bond_orders,
1659
                    atom_stereochemistry_matching=False,
1660
                    bond_stereochemistry_matching=False,
1661
                )[0]:
1662 4
                    msg = (
1663
                        "Error: Two unique molecules have indistinguishable "
1664
                        "graphs: {} and {}".format(
1665
                            unq_mol, graph_to_unq_mol[existing_graph]
1666
                        )
1667
                    )
1668 4
                    raise DuplicateUniqueMoleculeError(msg)
1669 8
            graph_to_unq_mol[unq_mol_graph] = unq_mol
1670

1671
        # Convert all openMM mols to graphs
1672 8
        omm_topology_G = nx.Graph()
1673 8
        for atom in openmm_topology.atoms():
1674 8
            omm_topology_G.add_node(
1675
                atom.index, atomic_number=atom.element.atomic_number
1676
            )
1677 8
        for bond in openmm_topology.bonds():
1678 8
            omm_topology_G.add_edge(
1679
                bond.atom1.index, bond.atom2.index, bond_order=bond.order
1680
            )
1681

1682
        # For each connected subgraph (molecule) in the topology, find its match in unique_molecules
1683 8
        topology_molecules_to_add = list()
1684 8
        for omm_mol_G in (
1685
            omm_topology_G.subgraph(c).copy()
1686
            for c in nx.connected_components(omm_topology_G)
1687
        ):
1688 8
            match_found = False
1689 8
            for unq_mol_G in graph_to_unq_mol.keys():
1690 8
                isomorphic, mapping = Molecule.are_isomorphic(
1691
                    omm_mol_G,
1692
                    unq_mol_G,
1693
                    return_atom_map=True,
1694
                    aromatic_matching=False,
1695
                    formal_charge_matching=False,
1696
                    bond_order_matching=omm_has_bond_orders,
1697
                    atom_stereochemistry_matching=False,
1698
                    bond_stereochemistry_matching=False,
1699
                )
1700 8
                if isomorphic:
1701
                    # Take the first valid atom indexing map
1702 8
                    first_topology_atom_index = min(mapping.keys())
1703 8
                    topology_molecules_to_add.append(
1704
                        (first_topology_atom_index, unq_mol_G, mapping.items())
1705
                    )
1706 8
                    match_found = True
1707 8
                    break
1708 8
            if match_found is False:
1709 8
                hill_formula = Molecule.to_hill_formula(omm_mol_G)
1710 8
                msg = f"No match found for molecule {hill_formula}. "
1711 8
                probably_missing_conect = [
1712
                    "C",
1713
                    "H",
1714
                    "O",
1715
                    "N",
1716
                    "P",
1717
                    "S",
1718
                    "F",
1719
                    "Cl",
1720
                    "Br",
1721
                ]
1722 8
                if hill_formula in probably_missing_conect:
1723 8
                    msg += (
1724
                        "This would be a very unusual molecule to try and parameterize, "
1725
                        "and it is likely that the data source it was read from does not "
1726
                        "contain connectivity information. If this molecule is coming from "
1727
                        "PDB, please ensure that the file contains CONECT records. The PDB "
1728
                        "format documentation (https://www.wwpdb.org/documentation/"
1729
                        'file-format-content/format33/sect10.html) states "CONECT records '
1730
                        "are mandatory for HET groups (excluding water) and for other bonds "
1731
                        'not specified in the standard residue connectivity table."'
1732
                    )
1733 8
                raise ValueError(msg)
1734

1735
        # The connected_component_subgraph function above may have scrambled the molecule order, so sort molecules
1736
        # by their first atom's topology index
1737 8
        topology_molecules_to_add.sort(key=lambda x: x[0])
1738 8
        for first_index, unq_mol_G, top_to_ref_index in topology_molecules_to_add:
1739 8
            local_top_to_ref_index = dict(
1740
                [
1741
                    (top_index - first_index, ref_index)
1742
                    for top_index, ref_index in top_to_ref_index
1743
                ]
1744
            )
1745 8
            topology.add_molecule(
1746
                graph_to_unq_mol[unq_mol_G],
1747
                local_topology_to_reference_index=local_top_to_ref_index,
1748
            )
1749

1750 8
        topology.box_vectors = openmm_topology.getPeriodicBoxVectors()
1751
        # TODO: How can we preserve metadata from the openMM topology when creating the OFF topology?
1752 8
        return topology
1753

1754 8
    def to_openmm(self, ensure_unique_atom_names=True):
1755
        """
1756
        Create an OpenMM Topology object.
1757

1758
        The OpenMM ``Topology`` object will have one residue per topology
1759
        molecule. Currently, the number of chains depends on how many copies
1760
        of the same molecule are in the ``Topology``. Molecules with more
1761
        than 5 copies are all assigned to a single chain, otherwise one
1762
        chain is created for each molecule. This behavior may change in
1763
        the future.
1764

1765
        Parameters
1766
        ----------
1767
        openmm_topology : simtk.openmm.app.Topology
1768
            An OpenMM Topology object
1769
        ensure_unique_atom_names : bool, optional. Default=True
1770
            Whether to check that the molecules in each molecule have
1771
            unique atom names, and regenerate them if not. Note that this
1772
            looks only at molecules, and does not guarantee uniqueness in
1773
            the entire Topology.
1774
        """
1775 8
        from simtk.openmm.app import Aromatic, Double, Single
1776 8
        from simtk.openmm.app import Topology as OMMTopology
1777 8
        from simtk.openmm.app import Triple
1778 8
        from simtk.openmm.app.element import Element as OMMElement
1779

1780 8
        omm_topology = OMMTopology()
1781

1782
        # Create unique atom names
1783 8
        if ensure_unique_atom_names:
1784 8
            for ref_mol in self.reference_molecules:
1785 8
                if not ref_mol.has_unique_atom_names:
1786 8
                    ref_mol.generate_unique_atom_names()
1787

1788
        # Keep track of which chains and residues have been added.
1789 8
        mol_to_chains = {}
1790 8
        mol_to_residues = {}
1791

1792
        # Go through atoms in OpenFF to preserve the order.
1793 8
        omm_atoms = []
1794
        # We need to iterate over the topology molecules if we want to
1795
        # keep track of chains/residues as Atom.topology_molecule is
1796
        # instantiated every time and can't be used as a key.
1797 8
        for topology_molecule in self.topology_molecules:
1798 8
            for atom in topology_molecule.atoms:
1799 8
                reference_molecule = topology_molecule.reference_molecule
1800 8
                n_molecules = len(
1801
                    self._reference_molecule_to_topology_molecules[reference_molecule]
1802
                )
1803

1804
                # Add 1 chain per molecule unless there are more than 5 copies,
1805
                # in which case we add a single chain for all of them.
1806 8
                if n_molecules <= 5:
1807
                    # We associate a chain to each molecule.
1808 8
                    key_molecule = topology_molecule
1809
                else:
1810
                    # We associate a chain to all the topology molecule.
1811 0
                    key_molecule = reference_molecule
1812

1813
                # Create a new chain if it doesn't exit.
1814 8
                try:
1815 8
                    chain = mol_to_chains[key_molecule]
1816 8
                except KeyError:
1817 8
                    chain = omm_topology.addChain()
1818 8
                    mol_to_chains[key_molecule] = chain
1819

1820
                # Add one molecule for each topology molecule.
1821 8
                try:
1822 8
                    residue = mol_to_residues[topology_molecule]
1823 8
                except KeyError:
1824 8
                    residue = omm_topology.addResidue(reference_molecule.name, chain)
1825 8
                    mol_to_residues[topology_molecule] = residue
1826

1827
                # Add atom.
1828 8
                element = OMMElement.getByAtomicNumber(atom.atomic_number)
1829 8
                omm_atom = omm_topology.addAtom(atom.atom.name, element, residue)
1830

1831
                # Make sure that OpenFF and OpenMM Topology atoms have the same indices.
1832 8
                assert atom.topology_atom_index == int(omm_atom.id) - 1
1833 8
                omm_atoms.append(omm_atom)
1834

1835
        # Add all bonds.
1836 8
        bond_types = {1: Single, 2: Double, 3: Triple}
1837 8
        for bond in self.topology_bonds:
1838 8
            atom1, atom2 = bond.atoms
1839 8
            atom1_idx, atom2_idx = atom1.topology_atom_index, atom2.topology_atom_index
1840 8
            bond_type = (
1841
                Aromatic if bond.bond.is_aromatic else bond_types[bond.bond_order]
1842
            )
1843 8
            omm_topology.addBond(
1844
                omm_atoms[atom1_idx],
1845
                omm_atoms[atom2_idx],
1846
                type=bond_type,
1847
                order=bond.bond_order,
1848
            )
1849

1850 8
        if self.box_vectors is not None:
1851 0
            omm_topology.setPeriodicBoxVectors(self.box_vectors)
1852 8
        return omm_topology
1853

1854 8
    @staticmethod
1855 8
    def from_mdtraj(mdtraj_topology, unique_molecules=None):
1856
        """
1857
        Construct an openforcefield Topology object from an MDTraj Topology object.
1858

1859
        Parameters
1860
        ----------
1861
        mdtraj_topology : mdtraj.Topology
1862
            An MDTraj Topology object
1863
        unique_molecules : iterable of objects that can be used to construct unique Molecule objects
1864
            All unique molecules mult be provided, in any order, though multiple copies of each molecule are allowed.
1865
            The atomic elements and bond connectivity will be used to match the reference molecules
1866
            to molecule graphs appearing in the MDTraj ``Topology``. If bond orders are present in the
1867
            MDTraj ``Topology``, these will be used in matching as well.
1868
            If all bonds have bond orders assigned in ``mdtraj_topology``, these bond orders will be used to attempt to construct
1869
            the list of unique Molecules if the ``unique_molecules`` argument is omitted.
1870

1871
        Returns
1872
        -------
1873
        topology : openforcefield.Topology
1874
            An openforcefield Topology object
1875
        """
1876 0
        return Topology.from_openmm(
1877
            mdtraj_topology.to_openmm(), unique_molecules=unique_molecules
1878
        )
1879

1880
    # TODO: Jeff prepended an underscore on this before 0.2.0 release to remove it from the API.
1881
    #       Before exposing this, we should look carefully at the information that is preserved/lost during this
1882
    #       conversion, and make it clear what would happen to this information in a round trip. For example,
1883
    #       we should know what would happen to formal and partial bond orders and charges, stereochemistry, and
1884
    #       multi-conformer information. It will be important to document these risks to users, as all of these
1885
    #       factors could lead to unintended behavior during system parameterization.
1886 8
    def _to_mdtraj(self):
1887
        """
1888
        Create an MDTraj Topology object.
1889

1890
        Returns
1891
        ----------
1892
        mdtraj_topology : mdtraj.Topology
1893
            An MDTraj Topology object
1894
        #"""
1895 0
        import mdtraj as md
1896

1897 0
        return md.Topology.from_openmm(self.to_openmm())
1898

1899 8
    @staticmethod
1900 8
    def from_parmed(parmed_structure, unique_molecules=None):
1901
        """
1902
        .. warning:: This functionality will be implemented in a future toolkit release.
1903

1904
        Construct an openforcefield Topology object from a ParmEd Structure object.
1905

1906
        Parameters
1907
        ----------
1908
        parmed_structure : parmed.Structure
1909
            A ParmEd structure object
1910
        unique_molecules : iterable of objects that can be used to construct unique Molecule objects
1911
            All unique molecules mult be provided, in any order, though multiple copies of each molecule are allowed.
1912
            The atomic elements and bond connectivity will be used to match the reference molecules
1913
            to molecule graphs appearing in the structure's ``topology`` object. If bond orders are present in the
1914
            structure's ``topology`` object, these will be used in matching as well.
1915
            If all bonds have bond orders assigned in the structure's ``topology`` object,
1916
            these bond orders will be used to attempt to construct
1917
            the list of unique Molecules if the ``unique_molecules`` argument is omitted.
1918

1919
        Returns
1920
        -------
1921
        topology : openforcefield.Topology
1922
            An openforcefield Topology object
1923
        """
1924 0
        import parmed
1925

1926
        # TODO: Implement functionality
1927 0
        raise NotImplementedError
1928

1929 8
    def to_parmed(self):
1930
        """
1931

1932
        .. warning:: This functionality will be implemented in a future toolkit release.
1933

1934
        Create a ParmEd Structure object.
1935

1936
        Returns
1937
        ----------
1938
        parmed_structure : parmed.Structure
1939
            A ParmEd Structure objecft
1940
        """
1941 0
        import parmed
1942

1943
        # TODO: Implement functionality
1944 0
        raise NotImplementedError
1945

1946
    # TODO: Jeff prepended an underscore on this before 0.2.0 release to remove it from the API.
1947
    #       This function is deprecated and expects the OpenEye toolkit. We need to discuss what
1948
    #       to do with this functionality in light of our move to the ToolkitWrapper architecture.
1949
    #       Also, as written, this function implies several things about our toolkit's ability to
1950
    #       handle biopolymers. We need to discuss how much of this functionality we will expose
1951
    #       and how we can make our toolkit's current scope clear to users..
1952 8
    @staticmethod
1953
    def _from_openeye(oemol):
1954
        """
1955
        Create a Molecule from an OpenEye molecule.
1956

1957
        Requires the OpenEye toolkit to be installed.
1958

1959
        Parameters
1960
        ----------
1961
        oemol : openeye.oechem.OEMol
1962
            An OpenEye molecule
1963

1964
        Returns
1965
        -------
1966
        molecule : openforcefield.Molecule
1967
            An openforcefield molecule
1968

1969
        """
1970
        # TODO: Convert this to cls.from_molecules(Molecule.from_openeye())?
1971
        # OE Hierarchical molecule view
1972 0
        hv = oechem.OEHierView(
1973
            oemol,
1974
            oechem.OEAssumption_BondedResidue
1975
            + oechem.OEAssumption_ResPerceived
1976
            + oechem.OEAssumption_PDBOrder,
1977
        )
1978

1979
        # Create empty OpenMM Topology
1980 0
        topology = app.Topology()
1981
        # Dictionary used to map oe atoms to openmm atoms
1982 0
        oe_atom_to_openmm_at = {}
1983

1984 0
        for chain in hv.GetChains():
1985
            # TODO: Fail if hv contains more than one molecule.
1986

1987
            # Create empty OpenMM Chain
1988 0
            openmm_chain = topology.addChain(chain.GetChainID())
1989

1990 0
            for frag in chain.GetFragments():
1991

1992 0
                for hres in frag.GetResidues():
1993

1994
                    # Get OE residue
1995 0
                    oe_res = hres.GetOEResidue()
1996
                    # Create OpenMM residue
1997 0
                    openmm_res = topology.addResidue(oe_res.GetName(), openmm_chain)
1998

1999 0
                    for oe_at in hres.GetAtoms():
2000
                        # Select atom element based on the atomic number
2001 0
                        element = app.element.Element.getByAtomicNumber(
2002
                            oe_at.GetAtomicNum()
2003
                        )
2004
                        # Add atom OpenMM atom to the topology
2005 0
                        openmm_at = topology.addAtom(
2006
                            oe_at.GetName(), element, openmm_res
2007
                        )
2008 0
                        openmm_at.index = oe_at.GetIdx()
2009
                        # Add atom to the mapping dictionary
2010 0
                        oe_atom_to_openmm_at[oe_at] = openmm_at
2011

2012 0
        if topology.getNumAtoms() != mol.NumAtoms():
2013 0
            oechem.OEThrow.Error(
2014
                "OpenMM topology and OEMol number of atoms mismatching: "
2015
                "OpenMM = {} vs OEMol  = {}".format(
2016
                    topology.getNumAtoms(), mol.NumAtoms()
2017
                )
2018
            )
2019

2020
        # Count the number of bonds in the openmm topology
2021 0
        omm_bond_count = 0
2022

2023 0
        def IsAmideBond(oe_bond):
2024
            # TODO: Can this be replaced by a SMARTS query?
2025

2026
            # This supporting function checks if the passed bond is an amide bond or not.
2027
            # Our definition of amide bond C-N between a Carbon and a Nitrogen atom is:
2028
            #          O
2029
            #          ║
2030
            #  CA or O-C-N-
2031
            #            |
2032

2033
            # The amide bond C-N is a single bond
2034 0
            if oe_bond.GetOrder() != 1:
2035 0
                return False
2036

2037 0
            atomB = oe_bond.GetBgn()
2038 0
            atomE = oe_bond.GetEnd()
2039

2040
            # The amide bond is made by Carbon and Nitrogen atoms
2041 0
            if not (
2042
                atomB.IsCarbon()
2043
                and atomE.IsNitrogen()
2044
                or (atomB.IsNitrogen() and atomE.IsCarbon())
2045
            ):
2046 0
                return False
2047

2048
            # Select Carbon and Nitrogen atoms
2049 0
            if atomB.IsCarbon():
2050 0
                C_atom = atomB
2051 0
                N_atom = atomE
2052
            else:
2053 0
                C_atom = atomE
2054 0
                N_atom = atomB
2055

2056
            # Carbon and Nitrogen atoms must have 3 neighbour atoms
2057 0
            if not (C_atom.GetDegree() == 3 and N_atom.GetDegree() == 3):
2058 0
                return False
2059

2060 0
            double_bonds = 0
2061 0
            single_bonds = 0
2062

2063 0
            for bond in C_atom.GetBonds():
2064
                # The C-O bond can be single or double.
2065 0
                if (bond.GetBgn() == C_atom and bond.GetEnd().IsOxygen()) or (
2066
                    bond.GetBgn().IsOxygen() and bond.GetEnd() == C_atom
2067
                ):
2068 0
                    if bond.GetOrder() == 2:
2069 0
                        double_bonds += 1
2070 0
                    if bond.GetOrder() == 1:
2071 0
                        single_bonds += 1
2072
                # The CA-C bond is single
2073 0
                if (bond.GetBgn() == C_atom and bond.GetEnd().IsCarbon()) or (
2074
                    bond.GetBgn().IsCarbon() and bond.GetEnd() == C_atom
2075
                ):
2076 0
                    if bond.GetOrder() == 1:
2077 0
                        single_bonds += 1
2078
            # Just one double and one single bonds are connected to C
2079
            # In this case the bond is an amide bond
2080 0
            if double_bonds == 1 and single_bonds == 1:
2081 0
                return True
2082
            else:
2083 0
                return False
2084

2085
        # Creating bonds
2086 0
        for oe_bond in mol.GetBonds():
2087
            # Set the bond type
2088 0
            if oe_bond.GetType() is not "":
2089 0
                if oe_bond.GetType() in [
2090
                    "Single",
2091
                    "Double",
2092
                    "Triple",
2093
                    "Aromatic",
2094
                    "Amide",
2095
                ]:
2096 0
                    off_bondtype = oe_bond.GetType()
2097
                else:
2098 0
                    off_bondtype = None
2099
            else:
2100 0
                if oe_bond.IsAromatic():
2101 0
                    oe_bond.SetType("Aromatic")
2102 0
                    off_bondtype = "Aromatic"
2103 0
                elif oe_bond.GetOrder() == 2:
2104 0
                    oe_bond.SetType("Double")
2105 0
                    off_bondtype = "Double"
2106 0
                elif oe_bond.GetOrder() == 3:
2107 0
                    oe_bond.SetType("Triple")
2108 0
                    off_bondtype = "Triple"
2109 0
                elif IsAmideBond(oe_bond):
2110 0
                    oe_bond.SetType("Amide")
2111 0
                    off_bondtype = "Amide"
2112 0
                elif oe_bond.GetOrder() == 1:
2113 0
                    oe_bond.SetType("Single")
2114 0
                    off_bondtype = "Single"
2115
                else:
2116 0
                    off_bondtype = None
2117

2118 0
            molecule.add_bond(
2119
                oe_atom_to_openmm_at[oe_bond.GetBgn()],
2120
                oe_atom_to_openmm_at[oe_bond.GetEnd()],
2121
                type=off_bondtype,
2122
                order=oe_bond.GetOrder(),
2123
            )
2124

2125 0
        if molecule.n_bondsphe != mol.NumBonds():
2126 0
            oechem.OEThrow.Error(
2127
                "OpenMM topology and OEMol number of bonds mismatching: "
2128
                "OpenMM = {} vs OEMol  = {}".format(omm_bond_count, mol.NumBonds())
2129
            )
2130

2131 0
        dic = mol.GetCoords()
2132 0
        positions = [Vec3(v[0], v[1], v[2]) for k, v in dic.items()] * unit.angstrom
2133

2134 0
        return topology, positions
2135

2136
    # TODO: Jeff prepended an underscore on this before 0.2.0 release to remove it from the API.
2137
    #       This function is deprecated and expects the OpenEye toolkit. We need to discuss what
2138
    #       to do with this functionality in light of our move to the ToolkitWrapper architecture.
2139
    #       It also expects Topology to be organized by chain, which is not currently the case.
2140
    #       Bringing this function back would require non-trivial engineering and testing, and we
2141
    #       would want to discuss what "guarantee" of correctness it could offer.
2142 8
    def _to_openeye(self, positions=None, aromaticity_model=DEFAULT_AROMATICITY_MODEL):
2143
        """
2144
        Create an OpenEye OEMol from the topology
2145

2146
        Requires the OpenEye toolkit to be installed.
2147

2148
        Returns
2149
        -------
2150
        oemol : openeye.oechem.OEMol
2151
            An OpenEye molecule
2152
        positions : simtk.unit.Quantity with shape [nparticles,3], optional, default=None
2153
            Positions to use in constructing OEMol.
2154
            If virtual sites are present in the Topology, these indices will be skipped.
2155

2156
        NOTE: This comes from https://github.com/oess/oeommtools/blob/master/oeommtools/utils.py
2157

2158
        """
2159 0
        oe_mol = oechem.OEMol()
2160

2161
        # Python set used to identify atoms that are not in protein residues
2162 0
        keep = set(proteinResidues).union(dnaResidues).union(rnaResidues)
2163

2164 0
        for chain in topology.chains():
2165 0
            for res in chain.residues():
2166
                # Create an OEResidue
2167 0
                oe_res = oechem.OEResidue()
2168
                # Set OEResidue name
2169 0
                oe_res.SetName(res.name)
2170
                # If the atom is not a protein atom then set its heteroatom
2171
                # flag to True
2172 0
                if res.name not in keep:
2173 0
                    oe_res.SetFragmentNumber(chain.index + 1)
2174 0
                    oe_res.SetHetAtom(True)
2175
                # Set OEResidue Chain ID
2176 0
                oe_res.SetChainID(chain.id)
2177
                # res_idx = int(res.id) - chain.index * len(chain._residues)
2178
                # Set OEResidue number
2179 0
                oe_res.SetResidueNumber(int(res.id))
2180

2181 0
                for openmm_at in res.atoms():
2182
                    # Create an OEAtom  based on the atomic number
2183 0
                    oe_atom = oe_mol.NewAtom(openmm_at.element._atomic_number)
2184
                    # Set atom name
2185 0
                    oe_atom.SetName(openmm_at.name)
2186
                    # Set Symbol
2187 0
                    oe_atom.SetType(openmm_at.element.symbol)
2188
                    # Set Atom index
2189 0
                    oe_res.SetSerialNumber(openmm_at.index + 1)
2190
                    # Commit the changes
2191 0
                    oechem.OEAtomSetResidue(oe_atom, oe_res)
2192
                    # Update the dictionary OpenMM to OE
2193 0
                    openmm_atom_to_oe_atom[openmm_at] = oe_atom
2194

2195 0
        if self.n_atoms != oe_mol.NumAtoms():
2196 0
            raise Exception(
2197
                "OEMol has an unexpected number of atoms: "
2198
                "Molecule has {} atoms, while OEMol has {} atoms".format(
2199
                    topology.n_atom, oe_mol.NumAtoms()
2200
                )
2201
            )
2202

2203
        # Create bonds
2204 0
        for off_bond in self.topology_bonds():
2205 0
            oe_mol.NewBond(oe_atoms[bond.atom1], oe_atoms[bond.atom2], bond.bond_order)
2206 0
            if off_bond.type:
2207 0
                if off_bond.type == "Aromatic":
2208 0
                    oe_atom0.SetAromatic(True)
2209 0
                    oe_atom1.SetAromatic(True)
2210 0
                    oe_bond.SetAromatic(True)
2211 0
                    oe_bond.SetType("Aromatic")
2212 0
                elif off_bond.type in ["Single", "Double", "Triple", "Amide"]:
2213 0
                    oe_bond.SetType(omm_bond.type)
2214
                else:
2215 0
                    oe_bond.SetType("")
2216

2217 0
        if self.n_bonds != oe_mol.NumBonds():
2218 0
            oechem.OEThrow.Erorr(
2219
                "OEMol has an unexpected number of bonds:: "
2220
                "Molecule has {} bonds, while OEMol has {} bonds".format(
2221
                    self.n_bond, oe_mol.NumBonds()
2222
                )
2223
            )
2224

2225 0
        if positions is not None:
2226
            # Set the OEMol positions
2227 0
            particle_indices = [
2228
                atom.particle_index for atom in self.topology_atoms
2229
            ]  # get particle indices
2230 0
            pos = positions[particle_indices].value_in_units_of(unit.angstrom)
2231 0
            pos = list(itertools.chain.from_iterable(pos))
2232 0
            oe_mol.SetCoords(pos)
2233 0
            oechem.OESetDimensionFromCoords(oe_mol)
2234

2235 0
        return oe_mol
2236

2237 8
    def get_bond_between(self, i, j):
2238
        """Returns the bond between two atoms
2239

2240
        Parameters
2241
        ----------
2242
        i, j : int or TopologyAtom
2243
            Atoms or atom indices to check
2244

2245
        Returns
2246
        -------
2247
        bond : TopologyBond
2248
            The bond between i and j.
2249

2250
        """
2251 8
        if (type(i) is int) and (type(j) is int):
2252 0
            atomi = self.atom(i)
2253 0
            atomj = self.atom(j)
2254 8
        elif (type(i) is TopologyAtom) and (type(j) is TopologyAtom):
2255 8
            atomi = i
2256 8
            atomj = j
2257
        else:
2258 0
            raise Exception(
2259
                "Invalid input passed to is_bonded(). Expected ints or TopologyAtoms, "
2260
                "got {} and {}".format(i, j)
2261
            )
2262

2263 8
        for top_bond in atomi.topology_bonds:
2264 8
            for top_atom in top_bond.atoms:
2265 8
                if top_atom == atomi:
2266 8
                    continue
2267 8
                if top_atom == atomj:
2268 8
                    return top_bond
2269

2270 8
        raise NotBondedError("No bond between atom {} and {}".format(i, j))
2271

2272 8
    def is_bonded(self, i, j):
2273
        """Returns True if the two atoms are bonded
2274

2275
        Parameters
2276
        ----------
2277
        i, j : int or TopologyAtom
2278
            Atoms or atom indices to check
2279

2280
        Returns
2281
        -------
2282
        is_bonded : bool
2283
            True if atoms are bonded, False otherwise.
2284

2285
        """
2286 8
        try:
2287 8
            self.get_bond_between(i, j)
2288 8
            return True
2289 8
        except NotBondedError:
2290 8
            return False
2291

2292 8
    def atom(self, atom_topology_index):
2293
        """
2294
        Get the TopologyAtom at a given Topology atom index.
2295

2296
        Parameters
2297
        ----------
2298
        atom_topology_index : int
2299
             The index of the TopologyAtom in this Topology
2300

2301
        Returns
2302
        -------
2303
        An openforcefield.topology.TopologyAtom
2304
        """
2305 8
        assert type(atom_topology_index) is int
2306 8
        assert 0 <= atom_topology_index < self.n_topology_atoms
2307 8
        this_molecule_start_index = 0
2308 8
        next_molecule_start_index = 0
2309 8
        for topology_molecule in self._topology_molecules:
2310 8
            next_molecule_start_index += topology_molecule.n_atoms
2311 8
            if next_molecule_start_index > atom_topology_index:
2312 8
                atom_molecule_index = atom_topology_index - this_molecule_start_index
2313
                # NOTE: the index here should still be in the topology index order, NOT the reference molecule's
2314 8
                return topology_molecule.atom(atom_molecule_index)
2315 8
            this_molecule_start_index += topology_molecule.n_atoms
2316

2317
        # Potentially more computationally efficient lookup ( O(largest_molecule_natoms)? )
2318
        # start_index_2_top_mol is an ordered dict of [starting_atom_index] --> [topology_molecule]
2319
        # search_range = range(atom_topology_index - largest_molecule_natoms, atom_topology_index)
2320
        # search_index = atom_topology_index
2321
        # while not(search_index in start_index_2_top_mol.keys()): # Only efficient if start_index_2_top_mol.keys() is a set (constant time lookups)
2322
        #     search_index -= 1
2323
        # topology_molecule = start_index_2_top_mol(search_index)
2324
        # atom_molecule_index = atom_topology_index - search_index
2325
        # return topology_molecule.atom(atom_molecule_index)
2326

2327 8
    def virtual_site(self, vsite_topology_index):
2328
        """
2329
        Get the TopologyAtom at a given Topology atom index.
2330

2331
        Parameters
2332
        ----------
2333
        vsite_topology_index : int
2334
             The index of the TopologyVirtualSite in this Topology
2335

2336
        Returns
2337
        -------
2338
        An openforcefield.topology.TopologyVirtualSite
2339

2340
        """
2341 8
        assert type(vsite_topology_index) is int
2342 8
        assert 0 <= vsite_topology_index < self.n_topology_virtual_sites
2343 8
        this_molecule_start_index = 0
2344 8
        next_molecule_start_index = 0
2345 8
        for topology_molecule in self._topology_molecules:
2346 8
            next_molecule_start_index += topology_molecule.n_virtual_sites
2347 8
            if next_molecule_start_index > vsite_topology_index:
2348 8
                vsite_molecule_index = vsite_topology_index - this_molecule_start_index
2349 8
                return topology_molecule.virtual_site(vsite_molecule_index)
2350 8
            this_molecule_start_index += topology_molecule.n_virtual_sites
2351

2352 8
    def bond(self, bond_topology_index):
2353
        """
2354
        Get the TopologyBond at a given Topology bond index.
2355

2356
        Parameters
2357
        ----------
2358
        bond_topology_index : int
2359
             The index of the TopologyBond in this Topology
2360

2361
        Returns
2362
        -------
2363
        An openforcefield.topology.TopologyBond
2364
        """
2365 8
        assert type(bond_topology_index) is int
2366 8
        assert 0 <= bond_topology_index < self.n_topology_bonds
2367 8
        this_molecule_start_index = 0
2368 8
        next_molecule_start_index = 0
2369 8
        for topology_molecule in self._topology_molecules:
2370 8
            next_molecule_start_index += topology_molecule.n_bonds
2371 8
            if next_molecule_start_index > bond_topology_index:
2372 8
                bond_molecule_index = bond_topology_index - this_molecule_start_index
2373 8
                return topology_molecule.bond(bond_molecule_index)
2374 8
            this_molecule_start_index += topology_molecule.n_bonds
2375

2376 8
    def add_particle(self, particle):
2377
        """Add a Particle to the Topology.
2378

2379
        Parameters
2380
        ----------
2381
        particle : Particle
2382
            The Particle to be added.
2383
            The Topology will take ownership of the Particle.
2384

2385
        """
2386 0
        pass
2387

2388 8
    def add_molecule(self, molecule, local_topology_to_reference_index=None):
2389
        """Add a Molecule to the Topology. You can optionally request that the atoms be added to the Topology in
2390
        a different order than they appear in the Molecule.
2391

2392
        Parameters
2393
        ----------
2394
        molecule : Molecule
2395
            The Molecule to be added.
2396
        local_topology_to_reference_index: dict, optional, default = None
2397
            Dictionary of {TopologyMolecule_atom_index : Molecule_atom_index} for the TopologyMolecule that will be
2398
            built. If None, this function will add the atoms to the Topology in the order that they appear in the
2399
            reference molecule.
2400

2401
        Returns
2402
        -------
2403
        index : int
2404
            The index of this molecule in the topology
2405
        """
2406 8
        from openforcefield.topology.molecule import FrozenMolecule, Molecule
2407

2408 8
        if local_topology_to_reference_index is None:
2409 8
            local_topology_to_reference_index = dict(
2410
                (i, i) for i in range(molecule.n_atoms)
2411
            )
2412

2413 8
        mol_smiles = molecule.to_smiles()
2414 8
        reference_molecule = None
2415 8
        for potential_ref_mol in self._reference_molecule_to_topology_molecules.keys():
2416 8
            if mol_smiles == potential_ref_mol.to_smiles():
2417
                # If the molecule is already in the Topology.reference_molecules, add another reference to it in
2418
                # Topology.molecules
2419 8
                reference_molecule = potential_ref_mol
2420

2421
                # Graph-match this molecule to see if it's in the same order
2422
                # Default settings use full matching
2423 8
                _, atom_map = Molecule.are_isomorphic(
2424
                    molecule, reference_molecule, return_atom_map=True
2425
                )
2426 8
                if atom_map is None:
2427 0
                    raise Exception(1)
2428 8
                new_mapping = {}
2429 8
                for local_top_idx, ref_idx in local_topology_to_reference_index.items():
2430 8
                    new_mapping[local_top_idx] = atom_map[ref_idx]
2431 8
                local_topology_to_reference_index = new_mapping
2432
                # raise Exception(local_topology_to_reference_index)
2433 8
                break
2434 8
        if reference_molecule is None:
2435
            # If it's a new unique molecule, make and store an immutable copy of it
2436 8
            reference_molecule = FrozenMolecule(molecule)
2437 8
            self._reference_molecule_to_topology_molecules[reference_molecule] = list()
2438

2439 8
        topology_molecule = TopologyMolecule(
2440
            reference_molecule, self, local_topology_to_reference_index
2441
        )
2442 8
        self._topology_molecules.append(topology_molecule)
2443 8
        self._reference_molecule_to_topology_molecules[reference_molecule].append(
2444
            self._topology_molecules[-1]
2445
        )
2446

2447 8
        index = len(self._topology_molecules) - 1
2448 8
        return index
2449

2450 8
    def add_constraint(self, iatom, jatom, distance=True):
2451
        """
2452
        Mark a pair of atoms as constrained.
2453

2454
        Constraints between atoms that are not bonded (e.g., rigid waters) are permissible.
2455

2456
        Parameters
2457
        ----------
2458
        iatom, jatom : Atom
2459
            Atoms to mark as constrained
2460
            These atoms may be bonded or not in the Topology
2461
        distance : simtk.unit.Quantity, optional, default=True
2462
            Constraint distance
2463
            ``True`` if distance has yet to be determined
2464
            ``False`` if constraint is to be removed
2465

2466
        """
2467
        # Check that constraint hasn't already been specified.
2468 8
        if (iatom, jatom) in self._constrained_atom_pairs:
2469 8
            existing_distance = self._constrained_atom_pairs[(iatom, jatom)]
2470 8
            if unit.is_quantity(existing_distance) and (distance is True):
2471 0
                raise Exception(
2472
                    f"Atoms ({iatom},{jatom}) already constrained with distance {existing_distance} but attempting to override with unspecified distance"
2473
                )
2474 8
            if (existing_distance is True) and (distance is True):
2475 0
                raise Exception(
2476
                    f"Atoms ({iatom},{jatom}) already constrained with unspecified distance but attempting to override with unspecified distance"
2477
                )
2478 8
            if distance is False:
2479 0
                del self._constrained_atom_pairs[(iatom, jatom)]
2480 0
                del self._constrained_atom_pairs[(jatom, iatom)]
2481 0
                return
2482

2483 8
        self._constrained_atom_pairs[(iatom, jatom)] = distance
2484 8
        self._constrained_atom_pairs[(jatom, iatom)] = distance
2485

2486 8
    def is_constrained(self, iatom, jatom):
2487
        """
2488
        Check if a pair of atoms are marked as constrained.
2489

2490
        Parameters
2491
        ----------
2492
        iatom, jatom : int
2493
            Indices of atoms to mark as constrained.
2494

2495
        Returns
2496
        -------
2497
        distance : simtk.unit.Quantity or bool
2498
            True if constrained but constraints have not yet been applied
2499
            Distance if constraint has already been added to System
2500

2501
        """
2502 8
        if (iatom, jatom) in self._constrained_atom_pairs:
2503 8
            return self._constrained_atom_pairs[(iatom, jatom)]
2504
        else:
2505 8
            return False

Read our documentation on viewing source code .

Loading