Showing 10 of 32 files from the diff.

@@ -18,11 +18,15 @@
Loading
18 18
import os
19 19
from chemper.chemper_utils import get_data_path
20 20
from chemper.mol_toolkits.adapters import MolAdapter, BondAdapter, AtomAdapter
21 -
# define default_toolkit:
21 +
22 +
# identify which toolkits are available
23 +
HAS_OE = False
24 +
HAS_RDK = False
22 25
try:
23 26
    from openeye import oechem
24 -
    from chemper.mol_toolkits import cp_openeye
25 -
    HAS_OE = True
27 +
    if oechem.OEChemIsLicensed():
28 +
        from chemper.mol_toolkits import cp_openeye
29 +
        HAS_OE = True
26 30
except ImportError:
27 31
    HAS_OE = False
28 32
@@ -33,101 +37,219 @@
Loading
33 37
except ImportError:
34 38
    HAS_RDK = False
35 39
36 -
if not HAS_OE and not HAS_RDK:
37 -
    raise ImportWarning("No Cheminformatics toolkit was found."\
38 -
                        " currently chemper supports OpenEye and RDKit")
39 -
40 -
def Mol(mol):
41 -
    """
42 -
43 -
    Parameters
44 -
    ----------
45 -
    mol - a molecule object from any supported toolkit
46 -
47 -
    Returns
48 -
    -------
49 -
    mol - a chemper Mol
50 -
51 -
    """
52 -
    # if it is already a chemper molecule return as is
53 -
    if isinstance(mol, MolAdapter):
54 -
        return mol
55 -
56 -
    # check if this is an Openeye molecule
57 -
    if HAS_OE and isinstance(mol, oechem.OEMolBase):
58 -
        return cp_openeye.Mol(mol)
59 -
60 -
    # check if it is an RDK molecule
61 -
    if HAS_RDK and isinstance(mol, Chem.rdchem.Mol):
62 -
        return cp_rdk.Mol(mol)
63 -
64 -
    err_msg = """
65 -
    Your molecule has the type %s.
66 -
    Currently chemper only supports OpenEye and RDKit.
67 -
    To add support to a new toolkit submit an issue on GitHub at
68 -
    github.com/MobleyLab/chemper
69 -
    """
70 -
    raise TypeError(err_msg % type(mol))
71 -
72 -
73 -
def Atom(atom):
74 -
    """
75 -
76 -
    Parameters
77 -
    ----------
78 -
    atom - Atom object from any supported toolkit
79 40
80 -
    Returns
81 -
    -------
82 -
    atom - a chemper Atom object
83 -
84 -
    """
85 -
    if isinstance(atom, AtomAdapter):
86 -
        return atom
87 -
88 -
    if HAS_OE and isinstance(atom, oechem.OEAtomBase):
89 -
        return cp_openeye.Atom(atom)
90 -
91 -
    if HAS_RDK and isinstance(atom, Chem.rdchem.Atom):
92 -
        return cp_rdk.Atom(atom)
93 -
94 -
    err_msg = """
95 -
    Your atom has the type %s.
96 -
    Currently chemper only supports OpenEye and RDKit.
97 -
    To add support to a new toolkit submit an issue on GitHub at
98 -
    github.com/MobleyLab/chemper
99 -
    """
100 -
    raise TypeError(err_msg % type(atom))
101 -
102 -
103 -
def Bond(bond):
104 -
    """
41 +
# ======================================================================
42 +
# Find super Mol/Atom/Bond classes
43 +
# ======================================================================
105 44
106 -
    Parameters
107 -
    ----------
108 -
    bond - Bond object from any supported toolkit
109 -
110 -
    Returns
111 -
    -------
112 -
    bond - a chemper Bond object
113 -
114 -
    """
115 -
    if isinstance(bond, BondAdapter):
116 -
        return bond
117 -
118 -
    if HAS_OE and isinstance(bond, oechem.OEBondBase):
119 -
        return cp_openeye.Bond(bond)
120 -
121 -
    if HAS_RDK and isinstance(bond, Chem.rdchem.Bond):
122 -
        return cp_rdk.Bond(bond)
45 +
if not HAS_OE and not HAS_RDK:
46 +
    raise ImportWarning("No Cheminformatics toolkit was found." \
47 +
                        " currently ChemPer supports OpenEye and RDKit")
48 +
49 +
50 +
class Mol(MolAdapter):
51 +
    # TODO: This is a really interesting implementation. It basically says "I inherit from MolAdapter, and -- trust me
52 +
    #  -- whatever you get out of this will have all the MolAdapter functionality implemented". The class is basically
53 +
    #  just a switchboard to figure out which OTHER MolAdapter subclass is appropriate to the input. This design pattern
54 +
    #  makes me nervous because it would seem to break a contract with the user, where they call one class's __init__
55 +
    #  function, but receive an object of a different class.
56 +
    #  Alternatives to this would include
57 +
    #       * Having each MolAdapter subclass have a `from_object` method, similar to OFFTK. Then, the MolAdapter
58 +
    #         constructor could iterate over all subclasses (using something like an `all_subclasses` method) and
59 +
    #         try creating each class from the input, until one succeeds.
60 +
    #       * Just use the OFF Molecule class, which avoids this trouble by copying the data OUT of the OEMol or RDMol
61 +
    #         into a toolkit-independent format (OFFMol). Then, when manipulation operations occur, the OFFMol is
62 +
    #         converted back into the appropriate toolkit molecule, and the operation happens natively there. I think
63 +
    #         that smirks_search is the only function that would need to be ported over if that was done.
64 +
65 +
    def __init__(self, mol):
66 +
        # check if its a ChemPer Mol with OE wrapper
67 +
        if HAS_OE and isinstance(mol, cp_openeye.Mol):
68 +
            self.mol = mol.mol
69 +
            self.__class__ = cp_openeye.Mol
70 +
71 +
        # check if this is an Openeye molecule
72 +
        elif HAS_OE and isinstance(mol, oechem.OEMolBase):
73 +
            self.__class__ = cp_openeye.Mol
74 +
            self.__class__.__init__(self,mol)
75 +
76 +
        # check if its a ChemPer Mol with RDK wrapper
77 +
        elif HAS_RDK and isinstance(mol, cp_rdk.Mol):
78 +
            self.mol = mol.mol
79 +
            self.__class__ = cp_rdk.Mol
80 +
81 +
        # check if it is an RDK molecule
82 +
        elif HAS_RDK and isinstance(mol, Chem.rdchem.Mol):
83 +
            self.__class__ = cp_rdk.Mol
84 +
            self.__class__.__init__(self, mol)
85 +
86 +
        else:
87 +
            err_msg = """
88 +
            Your molecule has the type %s.
89 +
            Currently ChemPer only supports OpenEye and RDKit.
90 +
            To add support to a new toolkit submit an issue on GitHub at
91 +
            github.com/MobleyLab/chemper
92 +
            """
93 +
            raise TypeError(err_msg % type(mol))
94 +
95 +
    @staticmethod
96 +
    def from_smiles(smiles):
97 +
        if HAS_OE:
98 +
            return cp_openeye.Mol.from_smiles(smiles)
99 +
        return cp_rdk.Mol.from_smiles(smiles)
100 +
101 +
102 +
    def set_aromaticity_mdl(self):
103 +
        """
104 +
        Sets the aromaticity flags in this molecule to use the MDL model
105 +
        """
106 +
        raise NotImplementedError()
107 +
108 +
    def get_atoms(self):
109 +
        """
110 +
        Returns
111 +
        -------
112 +
        atom_list : list[ChemPer Atoms]
113 +
            list of all atoms in the molecule
114 +
        """
115 +
        raise NotImplementedError()
116 +
117 +
    def get_atom_by_index(self, idx):
118 +
        """
119 +
        Parameters
120 +
        ----------
121 +
        idx : int
122 +
            atom index
123 +
124 +
        Returns
125 +
        -------
126 +
        atom : ChemPer Atom
127 +
            atom with index idx
128 +
        """
129 +
        raise NotImplementedError()
130 +
131 +
    def get_bonds(self):
132 +
        """
133 +
        Returns
134 +
        -------
135 +
        bond_list : list[ChemPer Bonds]
136 +
            list of all bonds in the molecule
137 +
        """
138 +
        raise NotImplementedError()
139 +
140 +
    def get_bond_by_index(self, idx):
141 +
        """
142 +
        Parameters
143 +
        ----------
144 +
        idx: int
145 +
            bond index
146 +
147 +
        Returns
148 +
        -------
149 +
        bond: ChemPer Bond
150 +
            bond with index idx
151 +
        """
152 +
        raise NotImplementedError()
153 +
154 +
    def get_bond_by_atoms(self, atom1, atom2):
155 +
        """
156 +
        Finds a bond between two atoms
157 +
158 +
        Parameters
159 +
        ----------
160 +
        atom1 : ChemPer Atom
161 +
        atom2 : ChemPer Atom
162 +
163 +
        Returns
164 +
        -------
165 +
        bond : ChemPer Bond or None
166 +
            If atoms are connected returns bond otherwise None
167 +
        """
168 +
        raise NotImplementedError()
169 +
170 +
    def smirks_search(self, smirks):
171 +
        """
172 +
        Performs a substructure search on the molecule with the provided
173 +
        SMIRKS pattern. Note - this function expects SMIRKS patterns with indexed atoms
174 +
        that is with :n for at least some atoms.
175 +
176 +
        Parameters
177 +
        ----------
178 +
        smirks : str
179 +
            SMIRKS pattern with indexed atoms (:n)
180 +
181 +
        Returns
182 +
        -------
183 +
        matches : list[match dictionary]
184 +
            match dictionaries have the form {smirks index: atom index}
185 +
        """
186 +
        raise NotImplementedError()
187 +
188 +
    def get_smiles(self):
189 +
        """
190 +
        Returns
191 +
        -------
192 +
        smiles: str
193 +
            SMILES string for the molecule
194 +
        """
195 +
        raise NotImplementedError()
196 +
197 +
class Atom:
198 +
    def __init__(self, atom):
199 +
200 +
        if HAS_OE and isinstance(atom, cp_openeye.Atom):
201 +
            self.atom = atom.atom
202 +
            self.__class__ = cp_openeye.Atom
203 +
204 +
        elif HAS_OE and isinstance(atom, oechem.OEAtomBase):
205 +
            self.__class__ = cp_openeye.Atom
206 +
            self.__class__.__init__(self, atom)
207 +
208 +
        elif HAS_RDK and isinstance(atom, cp_rdk.Atom):
209 +
            self.atom = atom.atom
210 +
            self.__class__ = cp_rdk.Atom
211 +
212 +
        elif HAS_RDK and isinstance(atom, Chem.rdchem.Atom):
213 +
            self.__class__ = cp_rdk.Atom
214 +
            self.__class__.__init__(self, atom)
215 +
216 +
        else:
217 +
            err_msg = """
218 +
            Your atom has the type %s.
219 +
            Currently ChemPer only supports OpenEye and RDKit.
220 +
            To add support to a new toolkit submit an issue on GitHub at
221 +
            github.com/MobleyLab/chemper
222 +
            """
223 +
            raise TypeError(err_msg % type(atom))
224 +
225 +
226 +
class Bond:
227 +
    def __init__(self, bond):
228 +
        if HAS_OE and isinstance(bond, cp_openeye.Bond):
229 +
            self.bond = bond.bond
230 +
            self.__class__ = cp_openeye.Bond
231 +
232 +
        elif HAS_OE and isinstance(bond, oechem.OEBondBase):
233 +
            self.__class__ = cp_openeye.Bond
234 +
            self.__class__.__init__(self,bond)
235 +
236 +
        elif HAS_RDK and isinstance(bond, cp_rdk.Bond):
237 +
            self.__class__ = cp_rdk.Bond
238 +
            self.bond = bond.bond
239 +
240 +
        elif HAS_RDK and isinstance(bond, Chem.rdchem.Bond):
241 +
            self.__class__ = cp_rdk.Bond
242 +
            self.__class__.__init__(self,bond)
243 +
244 +
        else:
245 +
            err_msg = """
246 +
            Your bond has the type %s.
247 +
            Currently ChemPer only supports OpenEye and RDKit.
248 +
            To add support to a new toolkit submit an issue on GitHub at
249 +
            github.com/MobleyLab/chemper
250 +
            """
251 +
            raise TypeError(err_msg % type(bond))
123 252
124 -
    err_msg = """
125 -
    Your bond has the type %s.
126 -
    Currently chemper only supports OpenEye and RDKit.
127 -
    To add support to a new toolkit submit an issue on GitHub at
128 -
    github.com/MobleyLab/chemper
129 -
    """
130 -
    raise TypeError(err_msg % type(bond))
131 253
132 254
# =======================================
133 255
# check user specifications
@@ -138,16 +260,16 @@
Loading
138 260
139 261
    Parameters
140 262
    ----------
141 -
    toolkit - str
142 -
        'openeye', 'rdkit', or None
143 -
        if None then the toolkit will be picked automatically
263 +
    toolkit : str or None
264 +
              'openeye', 'rdkit', or None
265 +
              if None then the toolkit will be picked automatically
144 266
145 267
    Returns
146 268
    -------
147 -
    toolkit - str
148 -
    returns the name of the toolkit to be used.
149 -
    If the package isn't available for the specified toolkit
150 -
    then an error is raised instead
269 +
    toolkit : str
270 +
              returns the name of the toolkit to be used.
271 +
              If the package isn't available for the specified toolkit
272 +
              then an error is raised instead
151 273
    """
152 274
    # check for a stable
153 275
    if toolkit is None:
@@ -166,23 +288,24 @@
Loading
166 288
        raise ImportError("Toolkit (%s) was not importable" % toolkit)
167 289
168 290
    else:
169 -
        raise ImportError("The provided toolkit (%s) isn't supported,"\
291 +
        raise ImportError("The provided toolkit (%s) is not supported,"\
170 292
                          " ChemPer only supports 'openeye' and 'rdkit'" \
171 293
                          % toolkit)
172 294
295 +
173 296
def check_mol_file(file_name):
174 297
    """
175 298
176 299
    Parameters
177 300
    ----------
178 -
    file_name - str
179 -
        path to a molecule file
301 +
    file_name : str
302 +
                path to a molecule file
180 303
181 304
    Returns
182 305
    -------
183 -
    path - str
184 -
        absolute path to a molecule file
185 -
        raises error if file isn't available
306 +
    path : str
307 +
           absolute path to a molecule file
308 +
           raises error if file isn't available
186 309
    """
187 310
    # is it a local file?
188 311
    if os.path.exists(file_name):
@@ -196,53 +319,29 @@
Loading
196 319
    return path
197 320
198 321
199 -
# ================================================================
200 -
# get molecule from SMILES
201 -
# ================================================================
202 -
203 -
def MolFromSmiles(smiles, toolkit=None):
204 -
    """
205 -
206 -
    Parameters
207 -
    ----------
208 -
    smiles - str
209 -
        SMILES string
210 -
    toolkit - str or None
211 -
        'openeye' or 'rdkit' or None to let chemper pick
212 -
213 -
    Returns
214 -
    -------
215 -
    mol - ChemPer Mol
216 -
    """
217 -
    toolkit = check_toolkit(toolkit)
218 -
    if toolkit.lower() == 'openeye':
219 -
        return cp_openeye.MolFromSmiles(smiles)
220 -
221 -
    return cp_rdk.MolFromSmiles(smiles)
222 -
223 322
# =======================================
224 323
# get molecules from files
225 324
# =======================================
226 325
227 326
def mols_from_mol2(mol2_file, toolkit=None):
228 327
    """
229 -
    Creates a list of chemper Mols from the provided mol2 file
328 +
    Creates a list of ChemPer Mols from the provided mol2 file
230 329
    using a specified or default toolkit
231 330
232 331
    Parameters
233 332
    ----------
234 -
    mol2_file: str
235 -
               path to mol2 file, this can be a relative or absolute path locally
236 -
               or the path to a molecules file stored in chemper at chemper/data/molecules/
237 -
    toolkit: None or str
238 -
             'openeye' or 'rdkit' are the two supported toolkits
239 -
             if None then the first package available (in the order listed here)
240 -
             will be used
333 +
    mol2_file : str
334 +
                path to mol2 file, this can be a relative or absolute path locally
335 +
                or the path to a molecules file stored in ChemPer at chemper/data/molecules/
336 +
    toolkit : None or str
337 +
              'openeye' or 'rdkit' are the two supported toolkits
338 +
              if None then the first package available (in the order listed here)
339 +
              will be used
241 340
242 341
    Returns
243 342
    -------
244 -
    mol2s: list of chemper Mol
245 -
           list of molecules in the provided mol2 file
343 +
    mol2s : list[ChemPer Mol]
344 +
            List of molecules in the provided mol2 file
246 345
    """
247 346
    toolkit = check_toolkit(toolkit)
248 347
    mol2_path = check_mol_file(mol2_file)
@@ -251,6 +350,3 @@
Loading
251 350
        return cp_openeye.mols_from_mol2(mol2_path)
252 351
253 352
    return cp_rdk.mols_from_mol2(mol2_path)
254 -
255 -
256 -

@@ -5,10 +5,6 @@
Loading
5 5
6 6
The classes provided here follow the structure in adapters.
7 7
This is a wrapper allowing our actual package to use RDKit
8 -
9 -
AUTHORS:
10 -
11 -
Caitlin C. Bannan <bannanc@uci.edu>, Mobley Group, University of California Irvine
12 8
"""
13 9
14 10
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
@@ -20,108 +16,50 @@
Loading
20 16
# =======================================
21 17
22 18
class Mol(MolAdapter):
23 -
    """
24 -
    Wrapper for RDKMol to create a chemper Mol
25 -
    """
26 19
    def __init__(self, mol):
27 20
        """
21 +
        Create a ChemPer Mol from an RDMol
22 +
28 23
        Parameters
29 24
        ----------
30 -
        mol: openeye RDKMol object
31 -
            openeye molecule to convert to chemper Mol object
25 +
        mol : openeye RDKMol object
26 +
              openeye molecule to convert to ChemPer Mol object
32 27
        """
33 -
        if type(mol) != Chem.rdchem.Mol:
34 -
            raise Exception("Expecting an rdchem.Mol instead of %s" % type(mol))
28 +
        if not isinstance(mol, Chem.rdchem.Mol):
29 +
            raise TypeError("Expecting an rdchem.Mol instead of %s" % type(mol))
35 30
        self.mol = mol
36 31
37 32
    def __str__(self): return self.get_smiles()
38 33
34 +
    @classmethod
35 +
    def from_smiles(cls, smiles):
36 +
        mol = Chem.MolFromSmiles(smiles)
37 +
        if mol is None:
38 +
            raise ValueError('Could not parse SMILES %s' % smiles)
39 +
        return cls(Chem.AddHs(mol))
40 +
39 41
    def set_aromaticity_mdl(self):
40 -
        """
41 -
        Sets the aromaticity flags in this molecule to use the MDL model
42 -
        """
43 42
        Chem.SanitizeMol(self.mol, Chem.SANITIZE_ALL^Chem.SANITIZE_SETAROMATICITY)
44 43
        Chem.SetAromaticity(self.mol, Chem.AromaticityModel.AROMATICITY_MDL)
45 44
46 45
    def get_atoms(self):
47 -
        """
48 -
        Returns
49 -
        -------
50 -
        atom_list: list of chemper Atoms
51 -
            list of all atoms in the molecule
52 -
        """
53 46
        return [Atom(a) for a in self.mol.GetAtoms()]
54 47
55 48
    def get_atom_by_index(self, idx):
56 -
        """
57 -
        Parameters
58 -
        ----------
59 -
        idx: int
60 -
            atom index
61 -
62 -
        Returns
63 -
        -------
64 -
        atom: chemper Atom object
65 -
            atom with index idx
66 -
        """
67 49
        return Atom(self.mol.GetAtomWithIdx(idx))
68 50
69 51
    def get_bonds(self):
70 -
        """
71 -
        Returns
72 -
        -------
73 -
        bond_list: list of chemper Bonds
74 -
            list of all bonds in the molecule
75 -
        """
76 52
        return [Bond(b) for b in self.mol.GetBonds()]
77 53
78 54
    def get_bond_by_index(self, idx):
79 -
        """
80 -
        Parameters
81 -
        ----------
82 -
        idx: ing
83 -
            bond index
84 -
85 -
        Returns
86 -
        -------
87 -
        bond: chemper Bond object
88 -
            bond with index idx
89 -
        """
90 55
        return Bond(self.mol.GetBondWithIdx(idx))
91 56
92 57
    def get_bond_by_atoms(self, atom1, atom2):
93 -
        """
94 -
        Finds a bond between two atoms
95 -
        Parameters
96 -
        ----------
97 -
        atom1: chemper Atom object
98 -
        atom2: chemper Atom object
99 -
100 -
        Returns
101 -
        -------
102 -
        bond: chemper Bond object or None
103 -
            if atoms are connected returns bond otherwise None
104 -
        """
105 58
        if not atom1.is_connected_to(atom2):
106 59
            return None
107 60
        return Bond(self.mol.GetBondBetweenAtoms(atom1.get_index(), atom2.get_index()))
108 61
109 62
    def smirks_search(self, smirks):
110 -
        """
111 -
        Performs a substructure search on the molecule with the provided
112 -
        SMIRKS pattern. Note - this function expects SMIRKS patterns with indexed atoms
113 -
        that is with :n for at least some atoms.
114 -
115 -
        Parameters
116 -
        ----------
117 -
        smirks: str
118 -
            SMIRKS pattern with indexed atoms (:n)
119 -
120 -
        Returns
121 -
        -------
122 -
        matches: list of dictionaries
123 -
            dictionary for each match with the form {smirks index: atom index}
124 -
        """
125 63
        cmol = Chem.Mol(self.mol)
126 64
127 65
        matches = list()
@@ -144,31 +82,9 @@
Loading
144 82
        return matches
145 83
146 84
    def get_smiles(self):
147 -
        """
148 -
        Returns
149 -
        -------
150 -
        smiles: str
151 -
            SMILES string for the molecule
152 -
        """
153 85
        smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol))
154 86
        return smiles
155 87
156 -
class MolFromSmiles(Mol):
157 -
    """
158 -
    Creates a chemper Mol from a smiles string
159 -
    It automatically adds explicit hydrogens.
160 -
    """
161 -
    def __init__(self, smiles):
162 -
        """
163 -
        Parameters
164 -
        ----------
165 -
        smiles: str
166 -
            SMILES string for a molecule
167 -
        """
168 -
        mol = Chem.MolFromSmiles(smiles)
169 -
        if mol is None:
170 -
            raise ValueError('Could not parse SMILES %s' % smiles)
171 -
        Mol.__init__(self, Chem.AddHs(mol))
172 88
173 89
# =======================================
174 90
# Atom Class
@@ -176,90 +92,39 @@
Loading
176 92
177 93
178 94
class Atom(AtomAdapter):
179 -
    """
180 -
    Wrapper for RDKAtom to create a chemper Atom
181 -
    """
182 95
    def __init__(self, atom):
183 96
        """
97 +
        Create a ChemPer Atom from an RDAtom
98 +
184 99
        Parameters
185 100
        ----------
186 -
        atom: RDKAtom
187 -
            Atom object from an RDK molecule
101 +
        atom : RDKit Atom
102 +
               Atom object from an RDK molecule
188 103
        """
189 -
        if type(atom) != Chem.rdchem.Atom:
190 -
            raise Exception("Expecting an rdchem.Atom instead of %s" % type(atom))
104 +
        if not isinstance(atom, Chem.rdchem.Atom):
105 +
            raise TypeError("Expecting a rdchem.Atom instead of %s" % type(atom))
191 106
        self.atom = atom
107 +
        self._idx = self.atom.GetIdx()
192 108
193 -
    def atomic_number(self):
194 -
        """
195 -
        Returns
196 -
        -------
197 -
        atomic_number: int
198 -
            atomic number for the atom
199 -
        """
200 -
        return self.atom.GetAtomicNum()
109 +
    def __str__(self): return '%i%s' % (self._idx, self.atom.GetSymbol())
201 110
202 -
    def degree(self):
203 -
        """
204 -
        Returns
205 -
        -------
206 -
        degree: int
207 -
            degree or number of explicit bonds around the atom
208 -
        """
209 -
        return self.atom.GetDegree()
111 +
    def atomic_number(self): return self.atom.GetAtomicNum()
210 112
211 -
    def connectivity(self):
212 -
        """
213 -
        Returns
214 -
        -------
215 -
        connectivity: int
216 -
            connectivity or total number of bonds around the atom
217 -
        """
218 -
        return self.atom.GetTotalDegree()
113 +
    def degree(self): return self.atom.GetDegree()
219 114
220 -
    def valence(self):
221 -
        """
222 -
        Returns
223 -
        -------
224 -
        valence: int
225 -
            the atoms valence
226 -
        """
227 -
        return self.atom.GetTotalValence()
115 +
    def connectivity(self): return self.atom.GetTotalDegree()
228 116
229 -
    def formal_charge(self):
230 -
        """
231 -
        Returns
232 -
        -------
233 -
        formal_charge: int
234 -
            the atom's formal charge
235 -
        """
236 -
        return self.atom.GetFormalCharge()
117 +
    def valence(self): return self.atom.GetTotalValence()
118 +
119 +
    def formal_charge(self): return self.atom.GetFormalCharge()
237 120
238 121
    def hydrogen_count(self):
239 -
        """
240 -
        Returns
241 -
        -------
242 -
        H_count: int
243 -
            total number of hydrogen atoms connected to this Atom
244 -
        """
245 122
        return self.atom.GetTotalNumHs(includeNeighbors=True)
246 123
247 124
    def ring_connectivity(self):
248 -
        """
249 -
        Returns
250 -
        -------
251 -
        ring_connectivity: int
252 -
            number of bonds on the atom that are a part of a ring
253 -
        """
254 125
        return len([b for b in self.atom.GetBonds() if b.IsInRing()])
255 126
256 127
    def min_ring_size(self):
257 -
        """
258 -
        Returns
259 -
        -------
260 -
        min_ring_size: int
261 -
            size of the smallest ring this atom is a part of
262 -
        """
263 128
        if not self.atom.IsInRing():
264 129
            return 0
265 130
        for i in range(10000):
@@ -268,69 +133,23 @@
Loading
268 133
        # TODO: raise exception instead?
269 134
        return 10000
270 135
271 -
    def is_aromatic(self):
272 -
        """
273 -
        Returns
274 -
        -------
275 -
        is_aromatic: boolean
276 -
            True if the atom is aromatic otherwise False
277 -
        """
278 -
        return self.atom.GetIsAromatic()
136 +
    def is_aromatic(self): return self.atom.GetIsAromatic()
279 137
280 -
    def get_index(self):
281 -
        """
282 -
        Returns
283 -
        -------
284 -
        index: int
285 -
            atom index in its molecule
286 -
        """
287 -
        return self.atom.GetIdx()
138 +
    def get_index(self): return self._idx
288 139
289 140
    def is_connected_to(self, atom2):
290 -
        """
291 -
        Parameters
292 -
        ----------
293 -
        atom2: chemper Atom object
294 -
            atom to check if it is connected to this atom
295 -
296 -
        Returns
297 -
        -------
298 -
        connected: boolean
299 -
            True if atom2 is a direct neighbor or atom1
300 -
        """
301 -
        if not type(atom2.atom) is Chem.rdchem.Atom:
302 -
            # TODO: raise exception/return something else?
141 +
        if not isinstance(atom2.atom, Chem.rdchem.Atom):
303 142
            return False
304 143
        neighbors = [a.GetIdx() for a in self.atom.GetNeighbors()]
305 144
        return atom2.get_index() in neighbors
306 145
307 146
    def get_neighbors(self):
308 -
        """
309 -
        Returns
310 -
        -------
311 -
        neighbors: list of chemper Atoms
312 -
            atoms that are one bond away from this atom
313 -
        """
314 147
        return [Atom(a) for a in self.atom.GetNeighbors()]
315 148
316 149
    def get_bonds(self):
317 -
        """
318 -
        Returns
319 -
        -------
320 -
        bonds: list of chemper Bonds
321 -
            bonds connected to this atom
322 -
        """
323 150
        return [Bond(b) for b in self.atom.GetBonds()]
324 151
325 152
    def get_molecule(self):
326 -
        """
327 -
        Extracts the parent molecule this atom is in
328 -
329 -
        Returns
330 -
        -------
331 -
        mol: chemper Mol
332 -
            molecule this atom is stored in
333 -
        """
334 153
        mol = Chem.Mol(self.atom.GetOwningMol())
335 154
        return Mol(mol)
336 155
@@ -340,110 +159,62 @@
Loading
340 159
341 160
342 161
class Bond(BondAdapter):
343 -
    """
344 -
    Wrapper for RDKBond to create a chemper Bond
345 -
    """
346 162
    def __init__(self, bond):
347 163
        """
164 +
        Creates a ChemPer Bond from an RDK Bond
165 +
348 166
        Parameters
349 167
        ----------
350 -
        bond: RDKBond
351 -
            Bond object from an RDK molecule
168 +
        bond : RDK Bond
169 +
               Bond object from an RDK molecule
352 170
        """
353 -
        if type(bond) != Chem.rdchem.Bond:
354 -
            raise Exception("Expecting an rdchem.Bond instead of %s" % type(bond))
171 +
        if not isinstance(bond, Chem.rdchem.Bond):
172 +
            raise TypeError("Expecting an rdchem.Bond instead of %s" % type(bond))
355 173
        self.bond = bond
356 -
        self.order = self.bond.GetBondTypeAsDouble()
357 -
        self.beginning = Atom(self.bond.GetBeginAtom())
358 -
        self.end = Atom(self.bond.GetEndAtom())
359 174
360 -
    def get_order(self):
361 -
        """
362 -
        Returns
363 -
        -------
364 -
        order: int or float
365 -
            This is the absolute order, returns 1.5 if bond is aromatic
366 -
        """
367 -
        return self.order
175 +
        # save index
176 +
        self._idx = self.bond.GetIdx()
368 177
369 -
    def get_atoms(self):
370 -
        """
371 -
        Returns
372 -
        -------
373 -
        atoms: list of chemper Atoms
374 -
            the two atoms connected by this bond
375 -
        """
376 -
        return [self.beginning, self.end]
178 +
        # save order information
179 +
        self._order = self.bond.GetBondTypeAsDouble()
180 +
        orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
181 +
        self._order_symbol = orders.get(self._order, '~')
377 182
378 -
    def is_ring(self):
379 -
        """
380 -
        Returns
381 -
        -------
382 -
        is_ring: boolean
383 -
            True if bond is a part of a ring, otherwise False
384 -
        """
385 -
        return self.bond.IsInRing()
183 +
        # save atoms in bond
184 +
        self._beginning = Atom(self.bond.GetBeginAtom())
185 +
        self._end = Atom(self.bond.GetEndAtom())
386 186
387 -
    def is_aromatic(self):
388 -
        """
389 -
        Returns
390 -
        -------
391 -
        is_aromatic: boolean
392 -
            True if it is an aromatic bond
393 -
        """
394 -
        return self.bond.GetIsAromatic()
187 +
    def __str__(self):
188 +
        return "%i %s%s%s" % (self.get_index(), self._beginning,
189 +
                              self._order_symbol, self._end)
395 190
396 -
    def is_single(self):
397 -
        """
398 -
        Returns
399 -
        -------
400 -
        is_single: boolean
401 -
            True if it is a single bond
402 -
        """
403 -
        return self.order == 1
191 +
    def get_order(self): return self._order
404 192
405 -
    def is_double(self):
406 -
        """
407 -
        Returns
408 -
        -------
409 -
        is_double: boolean
410 -
            True if it is a double bond
411 -
        """
412 -
        return self.order == 2
193 +
    def get_atoms(self): return [self._beginning, self._end]
413 194
414 -
    def is_triple(self):
415 -
        """
416 -
        Returns
417 -
        -------
418 -
        is_triple: boolean
419 -
            True if it is a triple bond
420 -
        """
421 -
        return self.order == 3
195 +
    def is_ring(self): return self.bond.IsInRing()
422 196
423 -
    def get_molecule(self):
424 -
        """
425 -
        Extracts the parent molecule this bond is in
197 +
    def is_aromatic(self): return self.bond.GetIsAromatic()
426 198
427 -
        Returns
428 -
        -------
429 -
        mol: chemper Mol
430 -
            molecule this bond is stored in
431 -
        """
199 +
    def is_single(self): return self._order == 1
200 +
201 +
    def is_double(self): return self._order == 2
202 +
203 +
    def is_triple(self): return self._order == 3
204 +
205 +
    def get_molecule(self):
432 206
        mol = Chem.Mol(self.bond.GetOwningMol())
433 207
        return Mol(mol)
434 208
435 -
    def get_index(self):
436 -
        """
437 -
        Returns
438 -
        -------
439 -
        index: int
440 -
            index of this bond in its parent molecule
441 -
        """
442 -
        return self.bond.GetIdx()
209 +
    def get_index(self): return self._idx
210 +
211 +
# =====================================================================
212 +
# functions for importing molecules from files
213 +
# =====================================================================
443 214
444 215
def mols_from_mol2(mol2_file):
445 216
    """
446 -
    Parses a mol2 file into chemper molecules using RDKit
217 +
    Parses a mol2 file into ChemPer molecules using RDKit
447 218
448 219
    This is a hack for separating mol2 files taken from a Source Forge discussion here:
449 220
    https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01510.html
@@ -451,14 +222,14 @@
Loading
451 222
452 223
    Parameters
453 224
    ----------
454 -
    mol2_file: str
455 -
               relative or absolute path to a mol2 file you want to parse
456 -
               accessible form the current directory
225 +
    mol2_file : str
226 +
                relative or absolute path to a mol2 file you want to parse
227 +
                accessible form the current directory
457 228
458 229
    Returns
459 230
    -------
460 -
    mols: list of chemper Mols
461 -
          list of molecules in the mol2 file as chemper molecules
231 +
    mols : list[ChemPer Mol]
232 +
           list of molecules in the mol2 file as ChemPer molecules
462 233
    """
463 234
    # TODO: check that this works with mol2 files with a single molecule
464 235
    # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type
@@ -469,7 +240,7 @@
Loading
469 240
        mol_path = get_data_path(os.path.join('molecules', mol2_file))
470 241
471 242
        if not os.path.exists(mol_path):
472 -
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol_file)
243 +
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol2_file)
473 244
        else:
474 245
            mol2_file = mol_path
475 246
@@ -484,7 +255,7 @@
Loading
484 255
    molecules = list()
485 256
    mol2_block = list()
486 257
487 -
    file_open = open(mol2_file, 'r')
258 +
    file_open = open(mol2_file)
488 259
489 260
    for line in file_open:
490 261
        if line.startswith(delimiter) and mol2_block:

@@ -1,18 +1,14 @@
Loading
1 1
"""
2 2
adapters.py
3 3
4 -
This script contains adapters or the structure for atoms, molecules and
5 -
substructure searches.
4 +
This script contains adapters or the structure for
5 +
molecules, atoms, and bonds.
6 6
Our chemical perception code is designed to be independent of the users
7 7
cheminformatics packages. For each cheminformatics package we support we
8 8
will provide classes following the structure in these adapters.
9 -
10 -
AUTHORS:
11 -
12 -
Caitlin C. Bannan <bannanc@uci.edu>, Mobley Group, University of California Irvine
13 9
"""
14 10
15 -
from abc import ABC, abstractmethod
11 +
from abc import ABC, abstractmethod, abstractclassmethod
16 12
17 13
18 14
# =======================================
@@ -21,12 +17,32 @@
Loading
21 17
22 18
class MolAdapter(ABC):
23 19
    """
24 -
    Template class for wrapping a molecule object
25 -
    from a given cheminformatics package into a chemper Mol.
26 -
    chemper `Mol` are initiated from the reference package molecule object.
27 -
    The class MolFromSmiles initiates a chemper Mol from a SMILES string.
28 -
    Currently we support OpenEye toolkits and RDKit
20 +
    This is a ChemPer wrapper for a molecule from
21 +
    one of the cheminformatics toolkits.
22 +
    ChemPer molecules are initiated from the reference package molecule object.
23 +
    Currently we support OpenEye and RDKit toolkits.
24 +
25 +
    Attributes
26 +
    ----------
27 +
    mol : toolkit Mol
28 +
          Mol object from the reference cheminformatics toolkit
29 29
    """
30 +
31 +
    @classmethod
32 +
    @abstractmethod
33 +
    def from_smiles(cls, smiles):
34 +
        """
35 +
        Creates a ChemPer Mol form a SMILES string
36 +
37 +
        Parameters
38 +
        ----------
39 +
        smiles : str
40 +
                 SMILES used to create molecule with wrapped toolkit
41 +
42 +
        Returns
43 +
        -------
44 +
        Mol : ChemPer Mol
45 +
        """
30 46
    @abstractmethod
31 47
    def set_aromaticity_mdl(self):
32 48
        """
@@ -39,7 +55,7 @@
Loading
39 55
        """
40 56
        Returns
41 57
        -------
42 -
        atom_list: list of chemper Atoms
58 +
        atom_list : list[ChemPer Atoms]
43 59
            list of all atoms in the molecule
44 60
        """
45 61
        pass
@@ -49,12 +65,12 @@
Loading
49 65
        """
50 66
        Parameters
51 67
        ----------
52 -
        idx: int
68 +
        idx : int
53 69
            atom index
54 70
55 71
        Returns
56 72
        -------
57 -
        atom: chemper Atom object
73 +
        atom : ChemPer Atom
58 74
            atom with index idx
59 75
        """
60 76
        pass
@@ -64,7 +80,7 @@
Loading
64 80
        """
65 81
        Returns
66 82
        -------
67 -
        bond_list: list of chemper Bonds
83 +
        bond_list : list[ChemPer Bonds]
68 84
            list of all bonds in the molecule
69 85
        """
70 86
        pass
@@ -74,12 +90,12 @@
Loading
74 90
        """
75 91
        Parameters
76 92
        ----------
77 -
        idx: ing
93 +
        idx: int
78 94
            bond index
79 95
80 96
        Returns
81 97
        -------
82 -
        bond: chemper Bond object
98 +
        bond: ChemPer Bond
83 99
            bond with index idx
84 100
        """
85 101
        pass
@@ -88,15 +104,16 @@
Loading
88 104
    def get_bond_by_atoms(self, atom1, atom2):
89 105
        """
90 106
        Finds a bond between two atoms
107 +
91 108
        Parameters
92 109
        ----------
93 -
        atom1: chemper Atom object
94 -
        atom2: chemper Atom object
110 +
        atom1 : ChemPer Atom
111 +
        atom2 : ChemPer Atom
95 112
96 113
        Returns
97 114
        -------
98 -
        bond: chemper Bond object or None
99 -
            if atoms are connected returns bond otherwise None
115 +
        bond : ChemPer Bond or None
116 +
            If atoms are connected returns bond otherwise None
100 117
        """
101 118
        pass
102 119
@@ -109,13 +126,13 @@
Loading
109 126
110 127
        Parameters
111 128
        ----------
112 -
        smirks: str
129 +
        smirks : str
113 130
            SMIRKS pattern with indexed atoms (:n)
114 131
115 132
        Returns
116 133
        -------
117 -
        matches: list of dictionaries
118 -
            dictionary for each match with the form {smirks index: atom index}
134 +
        matches : list[match dictionary]
135 +
            match dictionaries have the form {smirks index: atom index}
119 136
        """
120 137
        pass
121 138
@@ -136,18 +153,21 @@
Loading
136 153
137 154
class AtomAdapter(ABC):
138 155
    """
139 -
    Template class for wrapping an atom object
140 -
    from a given cheminformatics package into a chemper Atom.
141 -
    These are always initiated from a reference package Atom object.
142 -
    Currently we support OpenEye toolkits and RDKit
156 +
    This is a ChemPer wrapper for an atom from
157 +
    one of the cheminformatics toolkits.
158 +
    ChemPer Atoms are initiated from the reference package object.
159 +
    Currently we support OpenEye and RDKit toolkits.
160 +
161 +
    Attributes
162 +
    ----------
163 +
    atom : Atom from reference toolkit
143 164
    """
144 -
145 165
    @abstractmethod
146 166
    def atomic_number(self):
147 167
        """
148 168
        Returns
149 169
        -------
150 -
        atomic_number: int
170 +
        atomic_number : int
151 171
            atomic number for the atom
152 172
        """
153 173
        pass
@@ -157,8 +177,8 @@
Loading
157 177
        """
158 178
        Returns
159 179
        -------
160 -
        degree: int
161 -
            degree or number of explicit bonds around the atom
180 +
        degree : int
181 +
            degree or number of explicit bond orders around the atom
162 182
        """
163 183
        pass
164 184
@@ -167,8 +187,8 @@
Loading
167 187
        """
168 188
        Returns
169 189
        -------
170 -
        connectivity: int
171 -
            connectivity or total number of bonds around the atom
190 +
        connectivity : int
191 +
            connectivity or total number of bonds (regardless of order) around the atom
172 192
        """
173 193
        pass
174 194
@@ -177,8 +197,8 @@
Loading
177 197
        """
178 198
        Returns
179 199
        -------
180 -
        valence: int
181 -
            the atoms valence
200 +
        valence : int
201 +
            the atoms valence (equivalent to degree when all bonds are explicit)
182 202
        """
183 203
        pass
184 204
@@ -187,7 +207,7 @@
Loading
187 207
        """
188 208
        Returns
189 209
        -------
190 -
        formal_charge: int
210 +
        formal_charge : int
191 211
            the atom's formal charge
192 212
        """
193 213
        pass
@@ -197,7 +217,7 @@
Loading
197 217
        """
198 218
        Returns
199 219
        -------
200 -
        H_count: int
220 +
        H_count : int
201 221
            total number of hydrogen atoms connected to this Atom
202 222
        """
203 223
        pass
@@ -207,7 +227,7 @@
Loading
207 227
        """
208 228
        Returns
209 229
        -------
210 -
        min_ring_size: int
230 +
        min_ring_size : int
211 231
            size of the smallest ring this atom is a part of
212 232
        """
213 233
        pass
@@ -217,7 +237,7 @@
Loading
217 237
        """
218 238
        Returns
219 239
        -------
220 -
        ring_connectivity: int
240 +
        ring_connectivity : int
221 241
            number of bonds on the atom that are a part of a ring
222 242
        """
223 243
        pass
@@ -227,7 +247,7 @@
Loading
227 247
        """
228 248
        Returns
229 249
        -------
230 -
        is_aromatic: boolean
250 +
        is_aromatic : boolean
231 251
            True if the atom is aromatic otherwise False
232 252
        """
233 253
        pass
@@ -237,7 +257,7 @@
Loading
237 257
        """
238 258
        Returns
239 259
        -------
240 -
        index: int
260 +
        index : int
241 261
            atom index in its molecule
242 262
        """
243 263
        pass
@@ -247,13 +267,13 @@
Loading
247 267
        """
248 268
        Parameters
249 269
        ----------
250 -
        atom2: chemper Atom object
251 -
            atom to check if it is connected to this atom
270 +
        atom2 : ChemPer Atom
271 +
            Atom to check if it is bonded to this atom
252 272
253 273
        Returns
254 274
        -------
255 -
        connected: boolean
256 -
            True if atom2 is a direct neighbor or atom1
275 +
        connected : boolean
276 +
            True if atom2 is a bonded to atom1
257 277
        """
258 278
        pass
259 279
@@ -262,8 +282,8 @@
Loading
262 282
        """
263 283
        Returns
264 284
        -------
265 -
        neighbors: list of chemper Atoms
266 -
            atoms that are one bond away from this atom
285 +
        neighbors : list[ChemPer Atoms]
286 +
            Atoms that are one bond away from this atom
267 287
        """
268 288
        pass
269 289
@@ -272,20 +292,20 @@
Loading
272 292
        """
273 293
        Returns
274 294
        -------
275 -
        bonds: list of chemper Bonds
276 -
            bonds connected to this atom
295 +
        bonds : list[ChemPer Bonds]
296 +
            Bonds connected to this atom
277 297
        """
278 298
        pass
279 299
280 300
    @abstractmethod
281 301
    def get_molecule(self):
282 302
        """
283 -
        Extracts the parent molecule this atom is in
303 +
        Extracts the parent molecule this atom is from.
284 304
285 305
        Returns
286 306
        -------
287 -
        mol: chemper Mol
288 -
            molecule this atom is stored in
307 +
        mol : ChemPer Mol
308 +
            Molecule this atom is stored in
289 309
        """
290 310
        pass
291 311
@@ -296,18 +316,21 @@
Loading
296 316
297 317
class BondAdapter(ABC):
298 318
    """
299 -
    Template class for wrapping a bond object
300 -
    from a given cheminformatics package into a chemper Bond.
301 -
    These are always initiated from a reference package Bond object.
302 -
    Currently we support OpenEye toolkits and RDKit
319 +
    This is a ChemPer wrapper for a bond from
320 +
    one of the cheminformatics toolkits.
321 +
    ChemPer Bonds are initiated from the reference package object.
322 +
    Currently we support OpenEye and RDKit toolkits.
323 +
324 +
    Attributes
325 +
    ----------
326 +
    bond : Bond from reference class
303 327
    """
304 -
305 328
    @abstractmethod
306 329
    def get_order(self):
307 330
        """
308 331
        Returns
309 332
        -------
310 -
        order: int or float
333 +
        order : int or float
311 334
            This is the absolute order, returns 1.5 if bond is aromatic
312 335
        """
313 336
        pass
@@ -317,8 +340,8 @@
Loading
317 340
        """
318 341
        Returns
319 342
        -------
320 -
        atoms: list of chemper Atoms
321 -
            the two atoms connected by this bond
343 +
        atoms : list[ChemPer Atoms]
344 +
            The two atoms connected by this bond
322 345
        """
323 346
        pass
324 347
@@ -327,7 +350,7 @@
Loading
327 350
        """
328 351
        Returns
329 352
        -------
330 -
        is_ring: boolean
353 +
        is_ring : boolean
331 354
            True if bond is a part of a ring, otherwise False
332 355
        """
333 356
        pass
@@ -337,7 +360,7 @@
Loading
337 360
        """
338 361
        Returns
339 362
        -------
340 -
        is_aromatic: boolean
363 +
        is_aromatic : boolean
341 364
            True if it is an aromatic bond
342 365
        """
343 366
        pass
@@ -347,7 +370,7 @@
Loading
347 370
        """
348 371
        Returns
349 372
        -------
350 -
        is_single: boolean
373 +
        is_single : boolean
351 374
            True if it is a single bond
352 375
        """
353 376
        pass
@@ -357,7 +380,7 @@
Loading
357 380
        """
358 381
        Returns
359 382
        -------
360 -
        is_double: boolean
383 +
        is_double : boolean
361 384
            True if it is a double bond
362 385
        """
363 386
        pass
@@ -367,7 +390,7 @@
Loading
367 390
        """
368 391
        Returns
369 392
        -------
370 -
        is_triple: boolean
393 +
        is_triple : boolean
371 394
            True if it is a triple bond
372 395
        """
373 396
        pass
@@ -375,12 +398,12 @@
Loading
375 398
    @abstractmethod
376 399
    def get_molecule(self):
377 400
        """
378 -
        Extracts the parent molecule this bond is in
401 +
        Extracts the parent molecule this bond is from
379 402
380 403
        Returns
381 404
        -------
382 -
        mol: chemper Mol
383 -
            molecule this bond is stored in
405 +
        mol : ChemPer Mol
406 +
            Molecule this bond is stored in
384 407
        """
385 408
        pass
386 409
@@ -389,7 +412,7 @@
Loading
389 412
        """
390 413
        Returns
391 414
        -------
392 -
        index: int
415 +
        index : int
393 416
            index of this bond in its parent molecule
394 417
        """
395 418
        pass

@@ -23,10 +23,6 @@
Loading
23 23
if extracted SMIRKS patterns can do better than SMIRKY.
24 24
Also, this approach will be more general since the input clusters do not
25 25
rely on a reference force field.
26 -
27 -
AUTHORS
28 -
29 -
Caitlin Bannan <bannanc@uci.edu>, UC Irvine, Mobley Group
30 26
"""
31 27
#=============================================================================================
32 28
# GLOBAL IMPORTS
@@ -49,8 +45,13 @@
Loading
49 45
50 46
def print_smirks(smirks_list):
51 47
    """
52 -
    Prints out the current or provided smirks list
53 -
    in a table like format
48 +
    Prints out the provided smirks list
49 +
    in a table like format with label | SMIRKS
50 +
51 +
    Parameters
52 +
    -----------
53 +
    smirks_list : list of tuples
54 +
        list in the form [ (label, SMIRKS), ...]
54 55
    """
55 56
    str_form = " {0:<20} | {1:} "
56 57
@@ -65,13 +66,14 @@
Loading
65 66
66 67
class ClusteringError(Exception):
67 68
    """
68 -
    Exception for cases where smirks are inappropriate
69 -
    for the environment type they are being parsed into
69 +
    Exception for when the SMIRKSifier is unable to create
70 +
    a list of SMIRKS to maintain the input clusters.
70 71
    """
71 72
    def __init__(self, msg):
72 73
        Exception.__init__(self, msg)
73 74
        self.msg = msg
74 75
76 +
75 77
# =============================================================================================
76 78
# SMIRKSifier
77 79
# =============================================================================================
@@ -86,11 +88,11 @@
Loading
86 88
        """
87 89
        Parameters
88 90
        ----------
89 -
        molecules: list of Mols
91 +
        molecules : list of Mols
90 92
            These can be chemper Mols or molecules from any supported toolkit
91 93
            (currently OpenEye or RDKit)
92 94
93 -
        cluster_list: list of labels and smirks_atom_lists
95 +
        cluster_list : list of labels and smirks_atom_lists
94 96
            For each label the user should provide a list tuples for atom indices
95 97
            in each molecule you want included in that cluster.
96 98
@@ -103,22 +105,28 @@
Loading
103 105
            To see an example of this in action checkout
104 106
            https://github.com/MobleyLab/chemper/tree/master/examples
105 107
106 -
        max_layers: int (optional)
107 -
            how many atoms away from the indexed atoms should we consider at the maximum
108 +
        max_layers : int (optional)
108 109
            default = 5
110 +
            how many atoms away from the indexed atoms should
111 +
            we consider at the maximum
109 112
110 -
        verbose: boolean (optional)
111 -
            If true information is printed to the command line during reducing
113 +
        verbose : boolean (optional)
112 114
            default = True
115 +
            If true information is printed to the command line during reducing
113 116
114 -
        strict_smirks: boolean (optional)
115 -
            If False it will not raise an error when incapable of making SMIRKS letting
116 -
            a master user trouble shoot
117 +
        strict_smirks : boolean (optional)
118 +
            default = True
119 +
            If False it will not raise an error when incapable of making SMIRKS
120 +
            This setting is not recommended unless you are a master user
121 +
            or developer trying to test current behavior.
122 +
            The variable SMIRKSifier.checks will tell you if the SMIRKS
123 +
            generation failed when strict_smirks = False
117 124
        """
118 125
        self.molecules = [mol_toolkit.Mol(m) for m in molecules]
119 126
        self.intermediate_smirks = dict()
120 127
        self.cluster_list = cluster_list
121 128
        self.verbose = verbose
129 +
        self.max_layers = max_layers
122 130
        self.strict_smirks = strict_smirks
123 131
124 132
        # determine the type of SMIRKS for symmetry in indices purposes
@@ -126,10 +134,10 @@
Loading
126 134
        graph = ClusterGraph(self.molecules, cluster_list[0][1], 0)
127 135
        test_smirks = graph.as_smirks(compress=True)
128 136
        env = CE(test_smirks)
129 -
        if env.getType() is None:
137 +
        if env.get_type() is None:
130 138
            # corresponds to an unknown chemical pattern
131 139
            self.dict_type = dict
132 -
        elif env.getType().lower() == 'impropertorsion':
140 +
        elif env.get_type().lower() == 'impropertorsion':
133 141
            self.dict_type = ImproperDict
134 142
        else:
135 143
            self.dict_type = ValenceDict
@@ -151,7 +159,7 @@
Loading
151 159
                    self.cluster_dict[mol_idx][atom_tuple] = label
152 160
153 161
        # make SMIRKS patterns for input clusters
154 -
        self.current_smirks, self.layers = self.make_smirks(max_layers)
162 +
        self.current_smirks, self.layers = self.make_smirks()
155 163
        if self.verbose: print_smirks(self.current_smirks)
156 164
        # check SMIRKS and save the matches to input clusters
157 165
        self.type_matches, self.checks = self.types_match_reference()
@@ -161,28 +169,23 @@
Loading
161 169
                      SMIRKSifier was not able to create SMIRKS for the provided
162 170
                      clusters with %i layers. Try increasing the number of layers
163 171
                      or changing your clusters
164 -
                      """ % max_layers
172 +
                      """ % self.max_layers
165 173
            if self.strict_smirks:
166 174
                raise ClusteringError(msg)
167 175
            else:
168 176
                print("WARNING!", msg)
169 177
170 -
    def make_smirks(self, max_layers):
178 +
    def make_smirks(self):
171 179
        """
172 180
        Create a list of SMIRKS patterns for the input clusters.
173 181
        This includes a determining how far away from the indexed atom should
174 -
        be included in the SMIRKS (or the number of layers
175 -
176 -
        Parameters
177 -
        ----------
178 -
        max_layers: int
179 -
            maximum number of bonds away from the indexed atoms should be included
182 +
        be included in the SMIRKS (or the number of max_layers is reached)
180 183
181 184
        Returns
182 185
        -------
183 -
        smirks_list: list of tuples
186 +
        smirks_list : list of tuples
184 187
            list of tuples in the form (label, smirks)
185 -
        layers: int
188 +
        layers : int
186 189
            number of layers actually used to specify the set clusters
187 190
        """
188 191
        layers = 0
@@ -192,7 +195,7 @@
Loading
192 195
        self.intermediate_smirks[layers] = smirks_list
193 196
        _, checks = self.types_match_reference(current_types=smirks_list)
194 197
195 -
        while not checks and (layers < max_layers):
198 +
        while not checks and (layers < self.max_layers):
196 199
            layers += 1
197 200
            smirks_list = self._make_cluster_graphs(layers)
198 201
            # store intermediate smirks
@@ -204,9 +207,20 @@
Loading
204 207
205 208
    def _make_cluster_graphs(self, layers):
206 209
        """
207 -
        Creates a list of SMIRKS with the form
208 -
        [ (label: SMIRKS), ]
209 -
        using the stored molecules and cluster_list
210 +
        Creates a list of SMIRKS using the stored
211 +
        molecules and clusters with the specified number
212 +
        of layers (atoms away from the indexed atoms)
213 +
214 +
        Parameters
215 +
        -----------
216 +
        layers : int
217 +
            number of layers (atoms away from indexed atoms) to
218 +
            include in this round of graphs
219 +
220 +
        Returns
221 +
        --------
222 +
        smirks_list : list of two tuples
223 +
            SMIRKS list in the form [ (label: SMIRKS), ...]
210 224
        """
211 225
        smirks_list = list()
212 226
@@ -252,14 +266,16 @@
Loading
252 266
        Parameters
253 267
        ----------
254 268
        max_its : int
269 +
            default = 1000
255 270
            The specified number of iterations
256 -
        verbose: boolean
271 +
        verbose : boolean
272 +
            default = None
257 273
            will set the verboseness while running
258 274
            (if None, the current verbose variable will be used)
259 275
260 276
        Returns
261 277
        ----------
262 -
        final_smirks: list of tuples
278 +
        final_smirks : list of tuples
263 279
            list of final smirks patterns after reducing in the form
264 280
            [(label, smirks)]
265 281
        """
@@ -278,16 +294,31 @@
Loading
278 294
279 295
class Reducer():
280 296
    """
281 -
    # TODO: add a description
297 +
    Reducer starts with any list of SMIRKS and removes unnecessary decorators
298 +
    while maintaining typing on input molecules.
299 +
    This was created to be used as a part of the SMIRKSifier.reduce function.
300 +
    However, if you have complex SMIRKS and a list of molecules you can
301 +
    also reduce those patterns independently.
302 +
303 +
    Attributes
304 +
    ----------
305 +
    current_smirks : list of tuples
306 +
                    current SMIRKS patterns in the form (label, smirks)
307 +
    mols : list of chemper molecules
308 +
          molecules being used to reduce the input SMIRKS
309 +
    cluster_dict : dictionary
310 +
                  Dictionary specifying typing using current SMIRKS in the form:
311 +
                  {mol_idx:
312 +
                        { (tuple of atom indices): label } }
282 313
    """
283 314
    def __init__(self, smirks_list, mols, verbose=False):
284 315
        """
285 316
        Parameters
286 317
        ----------
287 -
        smirks_list: list of tuples
318 +
        smirks_list : list of tuples
288 319
            set of SMIRKS patterns in the form (label, smirks)
289 -
        mols: list of molecules
290 -
            Any chemper compatible molecules accepted
320 +
        mols : list of molecules
321 +
            Any chemper compatible molecules accepted (ChemPer, OpenEye, or RDKit)
291 322
        """
292 323
        self.verbose = verbose
293 324
        self.current_smirks = copy.deepcopy(smirks_list)
@@ -299,43 +330,93 @@
Loading
299 330
300 331
    def remove_one_sub_dec(self, input_ors, ref_idx):
301 332
        """
302 -
        Remove one ORdecorator from the specified index
333 +
        Remove one OR decorator from the specified index
303 334
        # i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, [+0]), (#7, [X3]) ]
335 +
336 +
        Parameters
337 +
        -----------
338 +
        input_ors : list of two tuples
339 +
            OR decorators in the form from ChemicalEnvironments
340 +
            that is [ (base, [decorators, ]), ... ]
341 +
        ref_idx : int
342 +
            The index from this list to use when removing one sub-decorator
343 +
344 +
        Returns
345 +
        --------
346 +
        new_ors : list of two tuples
347 +
            New OR decorators
304 348
        """
305 -
        ors = copy.deepcopy(input_ors)
349 +
        new_ors = copy.deepcopy(input_ors)
306 350
        # remove one or decorator from a single type
307 -
        ref_or = ors[ref_idx]
351 +
        ref_or = new_ors[ref_idx]
308 352
        decs = ref_or[1]
309 353
        decs.remove(np.random.choice(decs))
310 -
        ors[ref_idx] = (ref_or[0], decs)
311 -
        return ors
354 +
        new_ors[ref_idx] = (ref_or[0], decs)
355 +
        return new_ors
312 356
313 357
    def remove_ref_sub_decs(self, input_ors, ref_idx):
314 358
        """
315 359
        Remove all of the ORdecorators at the specified index
316 360
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, []), (#7, [X3]) ]
361 +
362 +
        Parameters
363 +
        -----------
364 +
        input_ors : list of two tuples
365 +
            OR decorators in the form from ChemicalEnvironments
366 +
            that is [ (base, [decorators, ]), ... ]
367 +
        ref_idx : int
368 +
            The index from this list to use when removing one set of sub-decorators
369 +
370 +
        Returns
371 +
        --------
372 +
        new_ors : list of two tuples
373 +
            New OR decorators
317 374
        """
318 -
        ors = copy.deepcopy(input_ors)
375 +
        new_ors = copy.deepcopy(input_ors)
319 376
        # remove all decorators on one OR type
320 -
        ref_or = ors[ref_idx]
321 -
        ors[ref_idx] = (ref_or[0], list())
322 -
        return ors
377 +
        ref_or = new_ors[ref_idx]
378 +
        new_ors[ref_idx] = (ref_or[0], list())
379 +
        return new_ors
323 380
324 -
    def remove_ref(self, input_decs, ref_idx):
381 +
    def remove_ref(self, input_ors, ref_idx):
325 382
        """
326 383
        Remove the decorator at the referenced index
327 384
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#7, [X3])]
385 +
386 +
        Parameters
387 +
        -----------
388 +
        input_ors : list of two tuples
389 +
            OR decorators in the form from ChemicalEnvironments
390 +
            that is [ (base, [decorators, ]), ... ]
391 +
        ref_idx : int
392 +
            The OR decorators at ref_idx will be removed entirely
393 +
394 +
        Returns
395 +
        --------
396 +
        new_ors : list of two tuples
397 +
            New OR decorators
328 398
        """
329 -
        decs = copy.deepcopy(input_decs)
399 +
        new_ors = copy.deepcopy(input_ors)
330 400
        # remove the single OR type at or_idx
331 -
        ref = decs[ref_idx]
332 -
        decs.remove(ref)
333 -
        return decs
401 +
        ref = new_ors[ref_idx]
402 +
        new_ors.remove(ref)
403 +
        return new_ors
334 404
335 405
    def remove_all_bases(self, input_ors):
336 406
        """
337 407
        convert all bases to [*]
338 408
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(*, [X4, +0]), (*, [X3]) ]
409 +
410 +
        Parameters
411 +
        -----------
412 +
        input_ors : list of two tuples
413 +
            OR decorators in the form from ChemicalEnvironments
414 +
            that is [ (base, [decorators, ]), ... ]
415 +
416 +
        Returns
417 +
        --------
418 +
        new_ors : list of two tuples
419 +
            New OR decorators
339 420
        """
340 421
        new_all_ors = [('*', d) for b, d in input_ors]
341 422
        return new_all_ors
@@ -344,6 +425,17 @@
Loading
344 425
        """
345 426
        remove all decorators of the same type, like all 'X' decorators
346 427
        i.e. [(#6, [X4, +0]), (#7, [X3]) ] --> [(#6, [+0]), (#7, []) ]
428 +
429 +
        Parameters
430 +
        -----------
431 +
        input_ors : list of two tuples
432 +
            OR decorators in the form from ChemicalEnvironments
433 +
            that is [ (base, [decorators, ]), ... ]
434 +
435 +
        Returns
436 +
        --------
437 +
        new_ors : list of two tuples
438 +
            New OR decorators
347 439
        """
348 440
        all_decs = set([d for b,decs in input_ors for d in decs])
349 441
        remove_dec = np.random.choice(list(all_decs))
@@ -374,8 +466,17 @@
Loading
374 466
        input_all_ors: list of OR decorators
375 467
            [ (base, [decs]), ...]
376 468
        or_idx: index that should be used to guide changes
469 +
470 +
        Returns
471 +
        --------
472 +
        new_ors : list of two tuples
473 +
            new or decorators
377 474
        """
378 475
        ref_or = input_all_ors[or_idx]
476 +
477 +
        # Start by checking what removal choices are available with the
478 +
        # current list of OR decorators
479 +
        # ---------------------------------------------------------------------
379 480
        # one choice is to remove all decorators (0)
380 481
        # it is always an option
381 482
        choices = ['all']
@@ -398,9 +499,12 @@
Loading
398 499
            if len(all_decs) > 0:
399 500
                # you can remove 1 type of decorator, i.e. all 'Xn' decorators
400 501
                choices.append('all_one_dec')
502 +
        # ---------------------------------------------------------------------
401 503
504 +
        # Make a random choice from the available change options
402 505
        change = np.random.choice(choices)
403 506
507 +
        # Based on the option chosen call the appropriate method
404 508
        if change == 'all':
405 509
            # remove ALL OR decorators
406 510
            # i.e.[(  # 6, [X4, +0]), (#7, [X3]) ] --> []
@@ -427,10 +531,16 @@
Loading
427 531
428 532
        Parameters
429 533
        -----------
430 -
        input_all_ors: list of tuples
431 -
            these are the ORdecorators from a ChemicalEnvironment
432 -
        is_bond: boolean
534 +
        input_all_ors : list of tuples
535 +
            these are the OR decorators for an atom or bond
536 +
            from a ChemicalEnvironment
537 +
        is_bond : boolean
433 538
            are these decorators from from a bond (False for atom)
539 +
540 +
        Returns
541 +
        --------
542 +
        new_ors : list of two tuples
543 +
            new OR decorators
434 544
        """
435 545
        if len(input_all_ors) == 0:
436 546
            return input_all_ors, False
@@ -440,12 +550,12 @@
Loading
440 550
        all_ors = copy.deepcopy(input_all_ors)
441 551
        or_idx = np.random.randint(len(all_ors))
442 552
443 -
        # atoms have more ORdecorators and therefore more options for
553 +
        # atoms have more OR decorators and therefore more options for
444 554
        # how they can be removed
445 555
        if not is_bond:
446 556
            return self.remove_or_atom(all_ors, or_idx), True
447 557
448 -
        # For a bond, either one ORtype is removed
558 +
        # For a bond, either one OR type is removed
449 559
        # or they are all removed.
450 560
        if np.random.rand() > 0.5:
451 561
            # remove just one or type
@@ -455,7 +565,17 @@
Loading
455 565
456 566
    def remove_and(self, input_all_ands):
457 567
        """
458 -
        removes a decorated that is AND'd in the original SMIRKS
568 +
        removes a decorator that is AND'd in the original SMIRKS
569 +
570 +
        Parameters
571 +
        -----------
572 +
        input_all_ands : list
573 +
            List of AND decorators
574 +
575 +
        Returns
576 +
        --------
577 +
        new_ands : list
578 +
            List of new AND decorators
459 579
        """
460 580
        # if there are no ands return with no changes
461 581
        if len(input_all_ands) == 0:
@@ -471,6 +591,11 @@
Loading
471 591
472 592
    def _get_item_and_remove_options(self, env):
473 593
        """
594 +
        This function chooses and Atom or Bond
595 +
        which will then have some of its decorators removed.
596 +
        It also determines what part of the component can be
597 +
        removed: OR decorators(0), AND decorators(1), the whole atom(2)
598 +
474 599
        Parameters
475 600
        ----------
476 601
        env: ChemicalEnvironment
@@ -485,16 +610,16 @@
Loading
485 610
        """
486 611
        items = list()
487 612
        odds = list()
488 -
        for a_b in env.getAtoms() + env.getBonds():
489 -
            count = len(a_b.getANDtypes())
490 -
            for o in a_b.getORtypes():
613 +
        for a_b in env.get_atoms() + env.get_bonds():
614 +
            count = len(a_b.and_types)
615 +
            for o in a_b.or_types:
491 616
                if o[0] != '*':
492 617
                    count += 1
493 618
                count += len(o[1])
494 619
495 620
            # a wild card, atom with no index should be considered ([*])
496 621
            # should also be on this list so it can be removed
497 -
            if isinstance(a_b, CE.Atom) and not isinstance(a_b, CE.Bond) and env.isUnindexed(a_b):
622 +
            if isinstance(a_b, CE.Atom) and not isinstance(a_b, CE.Bond) and env.is_unindexed(a_b):
498 623
                count += 1
499 624
500 625
            items.append(a_b)
@@ -512,13 +637,13 @@
Loading
512 637
        # choose an atom or bond with the probabilities:
513 638
        item = np.random.choice(items, p=weights)
514 639
        dec_opts = list()
515 -
        if len(item.getORtypes()) > 0:
640 +
        if len(item.or_types) > 0:
516 641
            dec_opts.append('remove_ors')
517 -
        if len(item.getANDtypes()) > 0:
642 +
        if len(item.and_types) > 0:
518 643
            dec_opts.append('remove_ands')
519 644
520 645
        if not isinstance(item, CE.Bond): # then it is an atom
521 -
            if env.getValence(item) == 1 and env.isUnindexed(item):
646 +
            if env.get_valence(item) == 1 and env.is_unindexed(item):
522 647
                dec_opts.append('remove_atom')
523 648
524 649
        return item, dec_opts
@@ -527,6 +652,18 @@
Loading
527 652
        """
528 653
        Chose an atom or bond in the input smirks pattern
529 654
        and then remove one decorator from it.
655 +
656 +
        Parameters
657 +
        -----------
658 +
        smirks : str
659 +
            A SMIRKS string which should be reduced
660 +
661 +
        Returns
662 +
        --------
663 +
        new_smirks : str
664 +
            A new SMIRKS pattern
665 +
        is_changed : bool
666 +
            True if some of the decorators were successfully removed
530 667
        """
531 668
        env = CE(smirks)
532 669
        sub, dec_opts = self._get_item_and_remove_options(env)
@@ -537,21 +674,21 @@
Loading
537 674
538 675
        change = np.random.choice(dec_opts)
539 676
        if change == 'remove_ors':
540 -
            new_or_types, changed = self.remove_or(sub.getORtypes(), isinstance(sub, CE.Bond))
677 +
            new_or_types, changed = self.remove_or(sub.or_types, isinstance(sub, CE.Bond))
541 678
            if not changed:
542 679
                return smirks, False
543 -
            sub.setORtypes(new_or_types)
680 +
            sub.or_types = new_or_types
544 681
        elif change == 'remove_ands':
545 -
            new_and_types, changed = self.remove_and(sub.getANDtypes())
682 +
            new_and_types, changed = self.remove_and(sub.and_types)
546 683
            if not changed:
547 684
                return smirks, False
548 -
            sub.setANDtypes(new_and_types)
685 +
            sub.and_types = new_and_types
549 686
        else: # change == 'remove_atom'
550 -
            remove = env.removeAtom(sub)
687 +
            remove = env.remove_atom(sub)
551 688
            if not remove:
552 689
                return smirks, False
553 690
554 -
        return env.asSMIRKS(), True
691 +
        return env.as_smirks(), True
555 692
556 693
    def run(self, max_its=1000, verbose=None):
557 694
        """
@@ -561,13 +698,13 @@
Loading
561 698
        ----------
562 699
        max_its : int
563 700
            The specified number of iterations
564 -
        verbose: boolean
701 +
        verbose : boolean
565 702
            will set the verboseness while running
566 703
            (if None, the current verbose variable will be used)
567 704
568 705
        Returns
569 706
        ----------
570 -
        final_smirks: list of tuples
707 +
        final_smirks : list of tuples
571 708
            list of final smirks patterns after reducing in the form
572 709
            [(label, smirks)]
573 710
        """

@@ -5,123 +5,66 @@
Loading
5 5
6 6
The classes provided here follow the structure in adapters.
7 7
This is a wrapper allowing our actual package to use openeye toolkits
8 -
9 -
AUTHORS:
10 -
11 -
Caitlin C. Bannan <bannanc@uci.edu>, Mobley Group, University of California Irvine
12 8
"""
13 9
14 10
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
15 11
from openeye import oechem
16 12
17 13
14 +
# Note - doc strings on these functions are inherited from
15 +
#        there Adapters. To read these strings see adapters.py.
16 +
18 17
# =======================================
19 18
# Molecule Class
20 19
# =======================================
21 20
22 21
class Mol(MolAdapter):
23 -
    """
24 -
    Wrapper for OEMol to create a chemper Mol
25 -
    """
26 22
    def __init__(self, mol):
27 23
        """
24 +
        ChemPer created from an OEMol
25 +
28 26
        Parameters
29 27
        ----------
30 -
        mol: openeye OEMol object
31 -
            openeye molecule to convert to chemper Mol object
28 +
        mol : openeye OEMol object
29 +
            openeye molecule to convert to ChemPer Mol object
32 30
        """
33 31
        if not isinstance(mol, oechem.OEMolBase):
34 -
            raise Exception("Expecting an OEMol object instead of %s" % type(mol))
32 +
            raise TypeError("Expecting an OEMol object instead of %s" % type(mol))
35 33
        self.mol = mol
36 34
37 35
    def __str__(self): return self.get_smiles()
38 36
37 +
    @classmethod
38 +
    def from_smiles(cls, smiles):
39 +
        mol = oechem.OEMol()
40 +
        if not oechem.OESmilesToMol(mol, smiles):
41 +
            raise ValueError('Could not parse SMILES %s' % smiles)
42 +
        oechem.OEAddExplicitHydrogens(mol)
43 +
        return cls(mol)
44 +
39 45
    def set_aromaticity_mdl(self):
40 -
        """
41 -
        Sets the aromaticity flags in this molecule to use the MDL model
42 -
        """
43 46
        oechem.OEClearAromaticFlags(self.mol)
44 47
        oechem.OEAssignAromaticFlags(self.mol, oechem.OEAroModel_MDL)
45 48
        oechem.OEAssignHybridization(self.mol)
46 49
47 50
    def get_atoms(self):
48 -
        """
49 -
        Returns
50 -
        -------
51 -
        atom_list: list of chemper Atoms
52 -
            list of all atoms in the molecule
53 -
        """
54 51
        return [Atom(a) for a in self.mol.GetAtoms()]
55 52
56 53
    def get_atom_by_index(self, idx):
57 -
        """
58 -
        Parameters
59 -
        ----------
60 -
        idx: int
61 -
            atom index
62 -
63 -
        Returns
64 -
        -------
65 -
        atom: chemper Atom object
66 -
            atom with index idx
67 -
        """
68 54
        return Atom(self.mol.GetAtom(oechem.OEHasAtomIdx(idx)))
69 55
70 56
    def get_bonds(self):
71 -
        """
72 -
        Returns
73 -
        -------
74 -
        bond_list: list of chemper Bonds
75 -
            list of all bonds in the molecule
76 -
        """
77 57
        return [Bond(b) for b in self.mol.GetBonds()]
78 58
79 59
    def get_bond_by_index(self, idx):
80 -
        """
81 -
        Parameters
82 -
        ----------
83 -
        idx: ing
84 -
            bond index
85 -
86 -
        Returns
87 -
        -------
88 -
        bond: chemper Bond object
89 -
            bond with index idx
90 -
        """
91 60
        return Bond(self.mol.GetBond(oechem.OEHasBondIdx(idx)))
92 61
93 62
    def get_bond_by_atoms(self, atom1, atom2):
94 -
        """
95 -
        Finds a bond between two atoms
96 -
        Parameters
97 -
        ----------
98 -
        atom1: chemper Atom object
99 -
        atom2: chemper Atom object
100 -
101 -
        Returns
102 -
        -------
103 -
        bond: chemper Bond object or None
104 -
            if atoms are connected returns bond otherwise None
105 -
        """
106 63
        if not atom1.is_connected_to(atom2):
107 64
            return None
108 65
        return Bond(self.mol.GetBond(atom1.atom, atom2.atom))
109 66
110 67
    def smirks_search(self, smirks):
111 -
        """
112 -
        Performs a substructure search on the molecule with the provided
113 -
        SMIRKS pattern. Note - this function expects SMIRKS patterns with indexed atoms
114 -
        that is with :n for at least some atoms.
115 -
116 -
        Parameters
117 -
        ----------
118 -
        smirks: str
119 -
            SMIRKS pattern with indexed atoms (:n)
120 -
        Returns
121 -
        -------
122 -
        matches: list of dictionaries
123 -
            dictionary for each match with the form {smirks index: atom index}
124 -
        """
125 68
        cmol = oechem.OEMol(self.mol)
126 69
127 70
        matches = list()
@@ -145,186 +88,68 @@
Loading
145 88
        return matches
146 89
147 90
    def get_smiles(self):
148 -
        """
149 -
        Returns
150 -
        -------
151 -
        smiles: str
152 -
            SMILES string for the molecule
153 -
        """
154 91
        smiles = oechem.OEMolToSmiles(self.mol)
155 92
        return smiles
156 93
157 -
class MolFromSmiles(Mol):
158 -
    """
159 -
    Creates a chemper Mol from a smiles string
160 -
    It automatically adds explicit hydrogens.
161 -
    """
162 -
    def __init__(self, smiles):
163 -
        """
164 -
        Parameters
165 -
        ----------
166 -
        smiles: str
167 -
            SMILES string for a molecule
168 -
        """
169 -
        mol = oechem.OEMol()
170 -
        if not oechem.OESmilesToMol(mol, smiles):
171 -
            raise ValueError('Could not parse SMILES %s' % smiles)
172 -
        oechem.OEAddExplicitHydrogens(mol)
173 -
        Mol.__init__(self, mol)
174 -
175 94
# =======================================
176 95
# Atom Class
177 96
# =======================================
178 97
179 98
class Atom(AtomAdapter):
180 -
    """
181 -
    Wrapper for OEAtomBase to create a chemper Atom
182 -
    """
183 99
    def __init__(self, atom):
184 100
        """
101 +
        ChemPer Atom created from an OEAtom
102 +
185 103
        Parameters
186 104
        ----------
187 105
        atom: OEAtomBase
188 106
            Atom object from an OpenEye molecule
189 107
        """
190 108
        if not isinstance(atom, oechem.OEAtomBase):
191 -
            raise Exception("Expecting an OEAtomBase object instead of %s" % type(atom))
109 +
            raise TypeError("Expecting an OEAtomBase object instead of %s" % type(atom))
192 110
        self.atom = atom
193 -
        self._index = self.atom.GetIdx()
111 +
        self._idx = self.atom.GetIdx()
194 112
195 -
    def atomic_number(self):
196 -
        """
197 -
        Returns
198 -
        -------
199 -
        atomic_number: int
200 -
            atomic number for the atom
201 -
        """
202 -
        return self.atom.GetAtomicNum()
113 +
    def __str__(self): return "%i%s" % (self._idx,
114 +
                                        oechem.OEGetAtomicSymbol(self.atomic_number()))
203 115
204 -
    def degree(self):
205 -
        """
206 -
        Returns
207 -
        -------
208 -
        degree: int
209 -
            degree or number of explicit bonds around the atom
210 -
        """
211 -
        return self.atom.GetDegree()
116 +
    def atomic_number(self): return self.atom.GetAtomicNum()
117 +
118 +
    def degree(self): return self.atom.GetDegree()
212 119
213 120
    def connectivity(self):
214 -
        """
215 -
        Returns
216 -
        -------
217 -
        connectivity: int
218 -
            connectivity or total number of bonds around the atom
219 -
        """
220 121
        return len([b for b in self.atom.GetBonds()])
221 122
222 -
    def valence(self):
223 -
        """
224 -
        Returns
225 -
        -------
226 -
        valence: int
227 -
            the atoms valence
228 -
        """
229 -
        return self.atom.GetValence()
123 +
    def valence(self): return self.atom.GetValence()
230 124
231 -
    def formal_charge(self):
232 -
        """
233 -
        Returns
234 -
        -------
235 -
        formal_charge: int
236 -
            the atom's formal charge
237 -
        """
238 -
        return self.atom.GetFormalCharge()
125 +
    def formal_charge(self): return self.atom.GetFormalCharge()
239 126
240 -
    def hydrogen_count(self):
241 -
        """
242 -
        Returns
243 -
        -------
244 -
        H_count: int
245 -
            total number of hydrogen atoms connected to this Atom
246 -
        """
247 -
        return self.atom.GetTotalHCount()
127 +
    def hydrogen_count(self): return self.atom.GetTotalHCount()
248 128
249 129
    def ring_connectivity(self):
250 -
        """
251 -
        Returns
252 -
        -------
253 -
        ring_connectivity: int
254 -
            number of bonds on the atom that are a part of a ring
255 -
        """
256 130
        return len([b for b in self.atom.GetBonds(oechem.OEBondIsInRing())])
257 131
258 132
    def min_ring_size(self):
259 -
        """
260 -
        Returns
261 -
        -------
262 -
        min_ring_size: int
263 -
            size of the smallest ring this atom is a part of
264 -
        """
265 133
        return oechem.OEAtomGetSmallestRingSize(self.atom)
266 134
267 -
    def is_aromatic(self):
268 -
        """
269 -
        Returns
270 -
        -------
271 -
        is_aromatic: boolean
272 -
            True if the atom is aromatic otherwise False
273 -
        """
274 -
        return self.atom.IsAromatic()
135 +
    def is_aromatic(self): return self.atom.IsAromatic()
275 136
276 -
    def get_index(self):
277 -
        """
278 -
        Returns
279 -
        -------
280 -
        index: int
281 -
            atom index in its molecule
282 -
        """
283 -
        return self._index
137 +
    def get_index(self): return self._idx
284 138
285 139
    def is_connected_to(self, atom2):
286 -
        """
287 -
        Parameters
288 -
        ----------
289 -
        atom2: chemper Atom object
290 -
            atom to check if it is connected to this atom
291 -
292 -
        Returns
293 -
        -------
294 -
        connected: boolean
295 -
            True if atom2 is a direct neighbor or atom1
296 -
        """
140 +
        if not isinstance(atom2.atom, oechem.OEAtomBase):
141 +
            return False
297 142
        return self.atom.IsConnected(atom2.atom)
298 143
299 144
    def get_neighbors(self):
300 -
        """
301 -
        Returns
302 -
        -------
303 -
        neighbors: list of chemper Atoms
304 -
            atoms that are one bond away from this atom
305 -
        """
306 145
        return [Atom(a) for a in self.atom.GetAtoms()]
307 146
308 147
    def get_bonds(self):
309 -
        """
310 -
        Returns
311 -
        -------
312 -
        bonds: list of chemper Bonds
313 -
            bonds connected to this atom
314 -
        """
315 148
        return [Bond(b) for b in self.atom.GetBonds()]
316 149
317 150
    def get_molecule(self):
318 -
        """
319 -
        Extracts the parent molecule this atom is in
320 -
321 -
        Returns
322 -
        -------
323 -
        mol: chemper Mol
324 -
            molecule this atom is stored in
325 -
        """
326 151
        mol = oechem.OEMol(self.atom.GetParent())
327 -
        self.atom = mol.GetAtom(oechem.OEHasAtomIdx(self._index))
152 +
        self.atom = mol.GetAtom(oechem.OEHasAtomIdx(self._idx))
328 153
        return Mol(mol)
329 154
330 155
# =======================================
@@ -333,111 +158,58 @@
Loading
333 158
334 159
335 160
class Bond(BondAdapter):
336 -
    """
337 -
    Wrapper for OEBondBase to create a chemper Bond
338 -
    """
339 161
    def __init__(self, bond):
340 162
        """
163 +
        ChemPer Bond created from an OEBond
164 +
341 165
        Parameters
342 166
        ----------
343 167
        bond: OEBondBase
344 168
            Bond object from an OpenEye molecule
345 169
        """
346 170
        if not isinstance(bond, oechem.OEBondBase):
347 -
            raise Exception("Expecting an OEBondBase object instead of %s" % type(bond))
171 +
            raise TypeError("Expecting an OEBondBase object instead of %s" % type(bond))
348 172
        self.bond = bond
173 +
174 +
        # save index
175 +
        self._idx = self.bond.GetIdx()
176 +
177 +
        # store order information
349 178
        self._order = self.bond.GetOrder()
350 179
        if self.is_aromatic():
351 180
            self._order = 1.5
352 181
353 -
        self._idx = self.bond.GetIdx()
182 +
        orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
183 +
        self._order_symbol = orders.get(self._order, '~')
354 184
355 -
    def get_order(self):
356 -
        """
357 -
        Returns
358 -
        -------
359 -
        order: int or float
360 -
            This is the absolute order, returns 1.5 if bond is aromatic
361 -
        """
362 -
        return self._order
185 +
        # save atoms in bond
186 +
        self._beginning = Atom(self.bond.GetBgn())
187 +
        self._end = Atom(self.bond.GetEnd())
363 188
364 -
    def get_atoms(self):
365 -
        """
366 -
        Returns
367 -
        -------
368 -
        atoms: list of chemper Atoms
369 -
            the two atoms connected by this bond
370 -
        """
371 -
        beginning = Atom(self.bond.GetBgn())
372 -
        end = Atom(self.bond.GetEnd())
373 -
        return [beginning, end]
189 +
    def __str__(self):
190 +
        return "%i %s%s%s" % (self.get_index(), self._beginning,
191 +
                              self._order_symbol, self._end)
374 192
375 -
    def is_ring(self):
376 -
        """
377 -
        Returns
378 -
        -------
379 -
        is_ring: boolean
380 -
            True if bond is a part of a ring, otherwise False
381 -
        """
382 -
        return self.bond.IsInRing()
193 +
    def get_order(self): return self._order
383 194
384 -
    def is_aromatic(self):
385 -
        """
386 -
        Returns
387 -
        -------
388 -
        is_aromatic: boolean
389 -
            True if it is an aromatic bond
390 -
        """
391 -
        return self.bond.IsAromatic()
195 +
    def get_atoms(self): return [self._beginning, self._end]
392 196
393 -
    def is_single(self):
394 -
        """
395 -
        Returns
396 -
        -------
397 -
        is_single: boolean
398 -
            True if it is a single bond
399 -
        """
400 -
        return self._order == 1
197 +
    def is_ring(self): return self.bond.IsInRing()
401 198
402 -
    def is_double(self):
403 -
        """
404 -
        Returns
405 -
        -------
406 -
        is_double: boolean
407 -
            True if it is a double bond
408 -
        """
409 -
        return self._order == 2
199 +
    def is_aromatic(self): return self.bond.IsAromatic()
410 200
411 -
    def is_triple(self):
412 -
        """
413 -
        Returns
414 -
        -------
415 -
        is_triple: boolean
416 -
            True if it is a triple bond
417 -
        """
418 -
        return self._order == 3
201 +
    def is_single(self): return self._order == 1
419 202
420 -
    def get_molecule(self):
421 -
        """
422 -
        Extracts the parent molecule this bond is in
203 +
    def is_double(self): return self._order == 2
423 204
424 -
        Returns
425 -
        -------
426 -
        mol: chemper Mol
427 -
            molecule this bond is stored in
428 -
        """
205 +
    def is_triple(self): return self._order == 3
206 +
207 +
    def get_molecule(self):
429 208
        mol = oechem.OEMol(self.bond.GetParent())
430 209
        self.bond = mol.GetBond(oechem.OEHasBondIdx(self._idx))
431 210
        return Mol(mol)
432 211
433 -
    def get_index(self):
434 -
        """
435 -
        Returns
436 -
        -------
437 -
        index: int
438 -
            index of this bond in its parent molecule
439 -
        """
440 -
        return self._idx
212 +
    def get_index(self): return self._idx
441 213
442 214
# =====================================================================
443 215
# functions for importing molecules from files
@@ -448,7 +220,7 @@
Loading
448 220
449 221
def mols_from_file(mol_file):
450 222
    """
451 -
    Parses a standard molecule file into chemper molecules using OpenEye toolkits
223 +
    Parses a standard molecule file into ChemPer molecules using OpenEye toolkits
452 224
453 225
    Parameters
454 226
    ----------
@@ -458,8 +230,8 @@
Loading
458 230
459 231
    Returns
460 232
    -------
461 -
    mols: list of chemper Mols
462 -
          list of molecules in the mol2 file as chemper Mols
233 +
    mols: list[ChemPer Mol]
234 +
          list of molecules in the mol2 file as ChemPer Mols
463 235
    """
464 236
    import os
465 237
    if not os.path.exists(mol_file):

@@ -8,22 +8,17 @@
Loading
8 8
environment.py
9 9
this is adapted from the openforcefield ChemicalEnvironments class.
10 10
11 -
When the openforcefield package fully supports RDKit and OpenEye I will think about
12 -
switching to that as a dependency, although it would mean we're locked into RDKit
13 -
or OpenEye and not other toolkits
14 -
This script includes the base ChemicalEnvironment class with isValid adapted
15 -
to use chemper's is_smirks_valid.
16 -
17 -
If we decide we really need our own version of this code I will do a full update here.
18 -
If, as I suspect will happen, we make openforcefield a dependency
19 -
I will put in a PR to that repo with this code updated to be match pep8.
20 -
21 -
AUTHORS
22 -
23 -
Caitlin Bannan <bannanc@uci.edu>, Mobley Lab, University of California Irvine,
24 -
with contributions from John Chodera, Memorial Sloan Kettering Cancer Center
25 -
and David Mobley, UC Irvine.
26 -
11 +
There have been some on going debates over where environment.py should live,
12 +
here or in the base openforcefield toolkit. Due to the want to update
13 +
this module for use in chemper amidst the openforcefield API overall,
14 +
this environment.py has been updated independently in this repository.
15 +
These updates have been fairly substantial, specifically the getter
16 +
and setter functions for decorators were removed and replaced with more
17 +
pythonic @property and @[property].setter functions instead.
18 +
All methods have also been renamed to use snake case.
19 +
20 +
The only function of real use in openforcefield.py is the `get_type` function,
21 +
which has also been updated here.
27 22
"""
28 23
29 24
#==============================================================================
@@ -46,13 +41,28 @@
Loading
46 41
    Finds the substring surrounded by the in_char and out_char
47 42
    intended use is to identify embedded bracketed sequences
48 43
49 -
    string - a string you want separated
50 -
    in_char - regular expression for the character you're looking for '\(' for '('
51 -
    out_char - regular expression for the closing character such as '\)' for ')'
52 -
44 +
    For example, if you have the input
53 45
    string = "[#1$(*-C(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]):1]"
54 46
    sub_string, in_idx, out_idx = _find_embedded_brackets(string, '\(','\)')
55 47
    # sub_string = (*-C(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])  in_idx = 4, out_idx = 50
48 +
49 +
    Parameters
50 +
    -----------
51 +
    string : str
52 +
        a string you want separated
53 +
    in_char : str
54 +
        regular expression for the character you're looking for '\(' for '('
55 +
    out_char : str
56 +
        regular expression for the closing character such as '\)' for ')'
57 +
58 +
    Returns
59 +
    --------
60 +
    substring : str
61 +
        string between the first occurances of the in_char and out_char
62 +
    in_idx : int
63 +
        index from initial string with the first in_char
64 +
    out_idx : int
65 +
        index from initial string with the first out_char
56 66
    """
57 67
    in_list = [m.start() for m in re.finditer(in_char, string)]
58 68
    out_list = [m.start() for m in re.finditer(out_char, string)]
@@ -73,25 +83,34 @@
Loading
73 83
    out_idx = out_list[list_idx]
74 84
    return string[in_idx:out_idx+1], in_idx, out_idx
75 85
76 -
def _convert_embedded_SMIRKS(smirks):
86 +
87 +
def _convert_embedded_smirks(smirks):
77 88
    """
78 89
    Converts a SMIRKS string with the $(...) in an atom to the
79 90
    form expected by the environment parser
80 91
81 -
    smirks = any smirks string, if no $(...) then the original smirks is returned
92 +
    For example, if you provide initial_smirks = "[#1$(*~[#6]):1]"
93 +
    then new_smirks = _convert_embedded_smirks(initial_smirks)
94 +
    will return new_smirks = [#1:1]~[#6]
95 +
96 +
    Parameters
97 +
    -----------
98 +
    smirks : str
99 +
        any smirks string, if no $(...) then the original smirks is returned
82 100
83 -
    initial_smirks = "[#1$(*~[#6]):1]"
84 -
    new_smirks = _convert_embedded_SMIRKS(initial_smirks)
85 -
    # new_smirks = [#1:1]~[#6]
101 +
    Returns
102 +
    --------
103 +
    updated_smirks: str
104 +
        smirks string with no recursive smirks
86 105
    """
87 106
    a_out = 0
88 107
    while smirks.find('$(') != -1:
89 108
        # Find first atom
90 -
        atom, a_in, a_out = _find_embedded_brackets(smirks, '\[', '\]')
109 +
        atom, a_in, a_out = _find_embedded_brackets(smirks, r'\[', r'\]')
91 110
        d = atom.find('$(')
92 111
        # Find atom with the $ string embedded
93 112
        while d == -1:
94 -
            atom, temp_in, temp_out = _find_embedded_brackets(smirks[a_out+1:], '\[', '\]')
113 +
            atom, temp_in, temp_out = _find_embedded_brackets(smirks[a_out+1:], r'\[', r'\]')
95 114
            a_in = a_out + temp_in + 1
96 115
            a_out += temp_out + 1
97 116
            d = atom.find('$(')
@@ -109,12 +128,12 @@
Loading
109 128
        else:
110 129
            ring_out = ''
111 130
112 -
        embedded, p_in, p_out = _find_embedded_brackets(atom, '\(', '\)')
131 +
        embedded, p_in, p_out = _find_embedded_brackets(atom, r'\(', r'\)')
113 132
        # two forms of embedded strings $(*~stuff) or $([..]~stuff)
114 133
        # in the latter case the first atom refers the current atom
115 134
        if embedded[1] == '[':
116 -
            first, f_in, f_out = _find_embedded_brackets(embedded, '\[','\]')
117 -
            first = _convert_embedded_SMIRKS(first)
135 +
            first, f_in, f_out = _find_embedded_brackets(embedded, r'\[',r'\]')
136 +
            first = _convert_embedded_smirks(first)
118 137
            new_atom = atom[:d]+first[1:-1]+atom[p_out+1:]
119 138
            embedded = embedded[f_out+1:]
120 139
            # if embedded is empty between brackets, remove it
@@ -136,7 +155,7 @@
Loading
136 155
            else:
137 156
                new_atom = atom[:d]+atom[p_out+1]
138 157
139 -
        # Look for ring insided embedded SMIRKS "[#6$(*1CCC1)]"
158 +
        # Look for ring inside embedded SMIRKS "[#6$(*1CCC1)]"
140 159
        match = re.match(r'(\d+)', embedded)
141 160
        if match is not None: # embedded starts with an int
142 161
            ring_in = re.findall(r'(\d+)', embedded)[0]
@@ -151,10 +170,27 @@
Loading
151 170
152 171
    return smirks
153 172
173 +
154 174
def _remove_blanks_repeats(init_list, remove_list = ['']):
155 175
    """
156 176
    Returns the input list 'init_list'
157 177
    without any repeating entries or blank strings ''
178 +
179 +
    Parameters
180 +
    -----------
181 +
    init_list : list
182 +
        This is a list of anything, but intended for decorator strings
183 +
    remove_list : list
184 +
        List of things you want removed from the init_list
185 +
        For decorators we don't need empty strings
186 +
187 +
    Returns
188 +
    --------
189 +
    final_list : list
190 +
        The init_list with no duplicates and nothing from the remove_list
191 +
192 +
    TODO: this changes the order of inputs potentially so this function
193 +
          will need to be updated if order of init_list is important.
158 194
    """
159 195
    final_list = [item for item in init_list if item not in remove_list]
160 196
    return list( set(final_list) )
@@ -169,6 +205,7 @@
Loading
169 205
        super(SMIRKSMismatchError, self).__init__(self,msg)
170 206
        self.msg = msg
171 207
208 +
172 209
class SMIRKSParsingError(Exception):
173 210
    """
174 211
    Exception for when SMIRKS are not parseable for any environment
@@ -177,217 +214,219 @@
Loading
177 214
        super(SMIRKSParsingError, self).__init__(self, msg)
178 215
        self.msg = msg
179 216
217 +
180 218
class ChemicalEnvironment(object):
181 219
    """Chemical environment abstract base class that matches an atom, bond, angle, etc.
182 220
    """
183 221
    class Atom(object):
184 -
        """Atom representation, which may have some ORtypes and ANDtypes properties.
222 +
        """Atom representation, which may have some or_types and ANDtypes properties.
185 223
186 224
        Attributes
187 225
        ----------
188 -
        ORtypes : list of tuples in the form (base, [list of decorators])
226 +
        or_types : list of tuples in the form (base, [list of decorators])
189 227
            where bases and decorators are both strings
190 228
            The descriptor types that will be combined with logical OR
191 -
        ANDtypes : list of string
229 +
        and_types : list of string
192 230
            The descriptor types  that will be combined with logical AND
193 231
        """
194 -
        def __init__(self, ORtypes = None, ANDtypes = None, index = None, ring = None):
232 +
        def __init__(self, or_types = None, and_types = None, index = 0, ring = None):
195 233
            """Initialize an Atom object with optional descriptors.
196 234
197 235
            Parameters
198 236
            -----------
199 -
            ORtypes: list of tuples for ORbases and ORdecorators,
237 +
            or_types : list of tuples for ORbases and ORdecorators,
200 238
                in the form (base, [list of decorators])
201 239
                optional, default = []
202 -
            ANDtypes: list of str,
240 +
            and_types : list of str
203 241
                strings that will be AND'd together in a SMARTS
204 242
                optional, default = None
205 -
            index : int, optional, default=None
206 -
                If not None, the specified index will be attached as a SMIRKS index (e.g. '[#6:1]')
243 +
            index : int
244 +
                If greater than zero,
245 +
                the specified index will be attached as a SMIRKS index (e.g. '[#6:1]')
246 +
                otherwise, it is only used for accessing atom information
207 247
            ring : int, optional, default = None
208 248
                If not None, the specified ring index will be attached at the end of the atom i.e. '[#6:1]1'
209 249
            """
210 250
            # List of 2 tuples in the form [ (ORbase, ORdecorator), ...]
211 -
            if ORtypes is not None:
212 -
                self.ORtypes = copy.deepcopy(ORtypes)
251 +
            if or_types is not None:
252 +
                self._or_types = copy.deepcopy(or_types)
213 253
            else:
214 -
                self.ORtypes = list()
254 +
                self._or_types = list()
215 255
216 256
            # Set of strings that will be AND'd to the the end
217 -
            if ANDtypes is not None:
218 -
                self.ANDtypes = list(copy.deepcopy(ANDtypes))
257 +
            if and_types is not None:
258 +
                self._and_types = list(copy.deepcopy(and_types))
219 259
            else:
220 -
                self.ANDtypes = list()
260 +
                self._and_types = list()
221 261
222 262
            self.index = index
223 263
            self.ring = ring
224 -
            self._atom = True
264 +
            self.is_atom = True
225 265
226 266
        def is_generic(self):
227 267
            """
228 268
            returns True if there are no decorators on this atom
229 269
            (IMPORTANT: this is newly added and in chemper only as of 8/9/18)
230 270
            """
231 -
            if not self.ORtypes:
232 -
                if not self.ANDtypes:
271 +
            if not self._or_types:
272 +
                if not self._and_types:
233 273
                    return True
234 274
            return False
235 275
236 -
        def asSMARTS(self):
276 +
        def as_smarts(self):
237 277
            """Return the atom representation as SMARTS.
238 278
239 279
            Returns
240 280
            --------
241 281
            smarts : str
242 -
            The SMARTS string for this atom
282 +
            The SMARTS string for this atom, meaning it has no :n index
243 283
            """
244 284
245 285
            smarts = '['
246 286
247 287
            # Add the OR'd features
248 -
            if self.ORtypes:
249 -
                ORList = list()
250 -
                for (base, ORdecorators) in self.ORtypes:
288 +
            if self._or_types:
289 +
                or_list = list()
290 +
                for (base, or_decorators) in self._or_types:
251 291
                    if len(base) > 0 and base[0] == '$':
252 292
                        # after a $base an explicit '&' is necessary
253 -
                        if ORdecorators:
254 -
                            OR = base+'&'+''.join(ORdecorators)
293 +
                        if or_decorators:
294 +
                            or_bit = base + '&' + ''.join(or_decorators)
255 295
                        else:
256 -
                            OR = base
296 +
                            or_bit = base
257 297
                    else: # base doesn't start with $
258 -
                        OR = base+''.join(ORdecorators)
259 -
                    ORList.append(OR)
260 -
                smarts += ','.join(ORList)
298 +
                        or_bit = base + ''.join(or_decorators)
299 +
                    or_list.append(or_bit)
300 +
                smarts += ','.join(or_list)
261 301
            else:
262 302
                smarts += '*'
263 303
264 -
            if len(self.ANDtypes) > 0:
265 -
                smarts += ';' + ';'.join(self.ANDtypes)
304 +
            if len(self._and_types) > 0:
305 +
                smarts += ';' + ';'.join(self._and_types)
266 306
267 307
            if self.ring is not None:
268 308
                return smarts + ']' + str(self.ring)
269 309
            else:
270 310
                return smarts + ']'
271 311
272 -
        def asSMIRKS(self):
312 +
        def as_smirks(self):
273 313
            """Return the atom representation as SMIRKS.
274 314
275 315
            Returns
276 316
            --------
277 317
            smirks : str
278 -
            The SMIRKS string for this atom
318 +
            The SMIRKS string for this atom, same as SMARTS, but with :n index
279 319
            """
280 -
            smirks = self.asSMARTS()
320 +
            smirks = self.as_smarts()
281 321
282 322
            # No index specified so SMIRKS = SMARTS
283 -
            if self.index is None:
323 +
            if self.index <= 0:
284 324
                return smirks
285 325
286 326
            # Add label to the end of SMARTS
287 -
            else:
288 -
                sub_string, start, end = _find_embedded_brackets(smirks, '\[','\]')
289 -
                if self.ring is not None:
290 -
                    return sub_string[:-1] + ':' + str(self.index) + ']'+str(self.ring)
291 -
                else:
292 -
                    return sub_string[:-1] + ':' + str(self.index) + ']'
327 +
            sub_string, start, end = _find_embedded_brackets(smirks, r'\[', r'\]')
328 +
            end_string = smirks[end:]
329 +
            return sub_string[:-1] + ':' + str(self.index) + end_string
293 330
294 -
        def addORtype(self, ORbase, ORdecorators):
331 +
        def add_or_type(self, or_base, or_decorators):
295 332
            """
296 333
            Adds ORtype to the set for this atom.
297 334
298 335
            Parameters
299 -
            --------
300 -
            ORbase: string, such as '#6'
301 -
            ORdecorators: list of strings, such as ['X4','+0']
336 +
            -----------
337 +
            or_base : string, such as '#6'
338 +
            or_decorators : list of strings, such as ['X4','+0']
302 339
            """
303 -
            ORdecorators = _remove_blanks_repeats(ORdecorators, ['',ORbase])
304 -
            self.ORtypes.append((ORbase, ORdecorators))
340 +
            or_decorators = _remove_blanks_repeats(or_decorators, ['', or_base])
341 +
            self._or_types.append((or_base, or_decorators))
305 342
306 -
        def addANDtype(self, ANDtype):
343 +
        def add_and_type(self, and_type):
307 344
            """
308 345
            Adds ANDtype to the set for this atom.
309 346
310 347
            Parameters
311 348
            --------
312 -
            ANDtype: string
313 -
                added to the list of ANDtypes for this atom
349 +
            and_type : string
350 +
                added to the list of and_types for this atom
314 351
            """
315 -
            self.ANDtypes.append(ANDtype)
316 -
            self.ANDtypes = _remove_blanks_repeats(self.ANDtypes)
352 +
            self._and_types.append(and_type)
353 +
            self._and_types = _remove_blanks_repeats(self._and_types)
317 354
318 -
        def getORtypes(self):
319 -
            """
320 -
            returns a copy of the dictionary of ORtypes for this atom
321 -
            """
322 -
            return copy.deepcopy(self.ORtypes)
355 +
        @property
356 +
        def or_types(self):
357 +
            """Provides the or_types in this atom"""
358 +
            return self._or_types
323 359
324 -
        def setORtypes(self, newORtypes):
360 +
        @or_types.setter
361 +
        def or_types(self, new_or_types):
325 362
            """
326 -
            sets new ORtypes for this atom
363 +
            sets new or_types for this atom
327 364
328 365
            Parameters
329 366
            ----------
330 -
            newORtypes: list of tuples in the form (base, [ORdecorators])
331 -
                for example: ('#6', ['X4','H0','+0']) --> '#6X4H0+0'
367 +
            new_or_types : list of tuples in the form (base, [ORdecorators])
368 +
                for example : ('#6', ['X4','H0','+0']) --> '#6X4H0+0'
332 369
            """
333 -
            self.ORtypes = list()
334 -
            if newORtypes is not None:
335 -
                for (base, decs) in newORtypes:
370 +
            self._or_types = list()
371 +
            if new_or_types is not None:
372 +
                for (base, decs) in new_or_types:
336 373
                    adjusted_decs = _remove_blanks_repeats(decs, ['', base])
337 -
                    self.ORtypes.append( (base, adjusted_decs) )
374 +
                    self._or_types.append((base, adjusted_decs))
338 375
339 -
        def getANDtypes(self):
376 +
        @property
377 +
        def and_types(self):
340 378
            """
341 -
            returns a copy of the list of ANDtypes for this atom
379 +
            returns a copy of the list of and_types for this atom
342 380
            """
343 -
            return list(copy.deepcopy(self.ANDtypes))
381 +
            return list(copy.deepcopy(self._and_types))
344 382
345 -
        def setANDtypes(self, newANDtypes):
383 +
        @and_types.setter
384 +
        def and_types(self, new_and_types):
346 385
            """
347 -
            sets new ANDtypes for this atom
386 +
            sets new and_types for this atom
348 387
349 388
            Parameters
350 389
            ----------
351 -
            newANDtypes: list of strings
390 +
            new_and_types : list of strings
352 391
                strings that will be AND'd together in a SMARTS
353 392
            """
354 -
            if newANDtypes is None:
355 -
                self.ANDtypes = list()
393 +
            if new_and_types is None:
394 +
                self._and_types = list()
356 395
            else:
357 -
                self.ANDtypes = _remove_blanks_repeats(newANDtypes)
396 +
                self._and_types = _remove_blanks_repeats(new_and_types)
358 397
359 398
    class Bond(Atom):
360 399
        """Bond representation, which may have ORtype and ANDtype descriptors.
361 400
362 401
        Attributes
363 402
        ----------
364 -
        ORtypes : list of tuples of ORbases and ORdecorators
403 +
        or_types : list of tuples of ORbases and ORdecorators
365 404
            in form (base: [list of decorators])
366 405
            The ORtype types that will be combined with logical OR
367 -
        ANDtypes : list of string
368 -
            The ANDtypes that will be combined with logical AND
406 +
        and_types : list of string
407 +
            The and_types that will be combined with logical AND
369 408
370 409
        """
371 410
        # Implementation identical to atoms apart from what is put in the asSMARTS/asSMIRKS strings
372 411
373 -
        def __init__(self, ORtypes = None, ANDtypes = None):
412 +
        def __init__(self, or_types = None, and_types = None, index = 0):
374 413
            """
375 414
            Parameters
376 415
            -----------
377 -
            ORtypes: list of tuples, optional, default = None
416 +
            or_types : list of tuples, optional, default = None
378 417
                tuples have form (base, [ORdecorators])
379 418
                bond descriptors that will be OR'd together in a SMARTS
380 -
            ANDtypes: list of str, optional, default = None
419 +
            and_types : list of str, optional, default = None
381 420
                strings that will be AND'd together in a SMARTS
382 -
            index: integer, default = None
421 +
            index : integer, default = 0
383 422
                This is for book keeping inside environments and will not be shown in SMARTS or SMIRKS
384 423
                example: bond1 in a Bond is the bond between atom1 and atom2
385 424
            """
386 -
            super(ChemicalEnvironment.Bond,self).__init__(ORtypes, ANDtypes, None, None)
387 -
            self._atom = False
425 +
            super(ChemicalEnvironment.Bond,self).__init__(or_types, and_types, index)
426 +
            self.is_atom = False
388 427
            return
389 428
390 -
        def asSMARTS(self):
429 +
        def as_smarts(self):
391 430
            """Return the atom representation as SMARTS.
392 431
393 432
            Returns
@@ -395,82 +434,90 @@
Loading
395 434
            smarts : str
396 435
                The SMARTS string for just this atom
397 436
            """
398 -
            if self.ORtypes:
399 -
                ORcombos = list()
400 -
                for (ORbase, ORdecorators) in self.ORtypes:
401 -
                    ORcombos.append(ORbase+''.join(ORdecorators))
402 -
                smarts = ','.join(ORcombos)
437 +
            if self._or_types:
438 +
                or_combos = list()
439 +
                for (OR_base, OR_decorators) in self._or_types:
440 +
                    or_combos.append(OR_base + ''.join(OR_decorators))
441 +
                smarts = ','.join(or_combos)
403 442
            else:
404 443
                smarts = '~'
405 444
406 -
            if len(self.ANDtypes) > 0:
407 -
                smarts += ';' + ';'.join(self.ANDtypes)
445 +
            if len(self._and_types) > 0:
446 +
                smarts += ';' + ';'.join(self._and_types)
408 447
409 448
            return smarts
410 449
411 -
        def asSMIRKS(self):
450 +
        def as_smirks(self):
412 451
            """
413 452
            Returns
414 453
            --------
415 454
            smarts : str
416 455
                The SMIRKS string for just this bond
417 456
            """
418 -
            #the same as asSMARTS()
419 -
            #    for consistency asSMARTS() or asSMIRKS() can be called
457 +
            # the same as as_smarts()
458 +
            #    for consistency as_smarts() or as_smirks() can be called
420 459
            #    for all environment objects
421 -
            return self.asSMARTS()
460 +
            return self.as_smarts()
422 461
423 -
        def getOrder(self):
462 +
        def get_order(self):
424 463
            """
425 464
            Returns a float for the order of this bond
426 -
            for multiple ORtypes or ~ it returns the minimum possible order
465 +
            for multiple or_types or ~ it returns the minimum possible order
427 466
            the intended application is for checking valence around a given atom
467 +
468 +
            Returns
469 +
            --------
470 +
            min_order : float
471 +
                minimum order for this bond (i.e. 1 for a '-' decorator)
428 472
            """
429 -
            # Minimum order for empty ORtypes is 1:
430 -
            if not self.ORtypes:
473 +
            # Minimum order for empty or_types is 1:
474 +
            if not self._or_types:
431 475
                return 1
432 476
433 -
            orderDict = {'~':1.,
477 +
            order_dict = {'~':1.,
434 478
                    '-':1., ':': 1.5, '=':2., '#':3.,
435 479
                    '!-':1.5, '!:':1., '!=':1., '!#':1.}
436 -
            orderList = [orderDict[base] for (base, decor) in self.ORtypes]
437 -
            return min(orderList)
480 +
            order_list = [order_dict.get(base,1) for (base, decor) in self._or_types]
481 +
            return min(order_list)
438 482
439 483
    def __init__(self, smirks = None, label = None, replacements = None):
440 484
        """Initialize a chemical environment abstract base class.
441 485
442 -
        smirks = string, optional
486 +
        Parameters
487 +
        -----------
488 +
        smirks : string, optional
443 489
            if smirks is not None, a chemical environment is built
444 490
            from the provided SMIRKS string
445 -
        label = anything, optional
491 +
        label : anything, optional
446 492
            intended to be used to label this chemical environment
447 493
            could be a string, int, or float, or anything
448 -
        replacements = list of lists, optional,
494 +
        replacements : list of lists, optional,
449 495
            [substitution, smarts] form for parsing SMIRKS
450 496
        """
451 497
        # Define the regular expressions used for all SMIRKS decorators
452 498
        # There are a limited number of descriptors for smirks string they are:
453 499
        # That is a # followed by one or more ints w/or w/o at ! in front '!#16'
454 -
        element_num = "!?[#]\d+"
500 +
        element_num = r"!?[#]\d+"
455 501
        # covers element symbols, i.e. N,C,O,Br not followed by a number
456 502
        element_sym = "!?[A-Z][a-z]?"
457 503
        # covers element symbols that are aromatic:
458 504
        aro_sym = "!?[cnops]"
459 505
        # replacement strings
460 -
        replace_str = "\$\w+"
506 +
        replace_str = r"\$\w+"
461 507
        # a or A w/ or w/o a ! in front 'A'
462 508
        aro_ali = "!?[aA]"
463 509
        # the decorators (D,H,j,r,V,X,^) followed by one or more integers
464 -
        needs_int = "!?[DjVX^]\d+"
510 +
        needs_int = r"!?[DjVX^]\d+"
465 511
        # R(x), +, - do not need to be followed by a integer w/ or w/o a ! 'R2'
466 -
        optional_int = "!?[RHhrx+-]\d*"
512 +
        optional_int = r"!?[RHhrx+-]\d*"
467 513
        # chirality options, "@", "@@", "@int" w/ or w/o a ! in front
468 -
        chirality = "!?[@]\d+|!?[@]@?"
514 +
        chirality = r"!?[@]\d+|!?[@]@?"
469 515
470 516
        # Generate RegEx string for decorators:
471 517
        self.no_bracket_atom_reg = r'('+'|'.join([element_sym, aro_sym, replace_str])+')'
472 518
        self.atom_reg = '|'.join([element_num, aro_ali, needs_int,
473 -
            optional_int, chirality, replace_str, element_sym, aro_sym])
519 +
                                  optional_int, chirality, replace_str,
520 +
                                  element_sym, aro_sym])
474 521
        self.atom_reg = r'('+self.atom_reg+')'
475 522
476 523
        # Define bond regular expression options below in order:
@@ -488,7 +535,7 @@
Loading
488 535
489 536
        if smirks is not None:
490 537
            # Check that it is a valid SMIRKS
491 -
            if not self.isValid(smirks):
538 +
            if not self.is_valid(smirks):
492 539
                raise SMIRKSParsingError("Error Provided SMIRKS ('%s') was \
493 540
not parseable with current toolkit" % smirks)
494 541
@@ -510,13 +557,28 @@
Loading
510 557
                        into a ChemicalEnvironment" % smirks)
511 558
512 559
        # Check that the created Environment is valid
513 -
        if not self.isValid():
560 +
        if not self.is_valid():
514 561
            raise SMIRKSParsingError("Input SMIRKS (%s), converted to %s \
515 -
                    is now invalid" % (smirks, self.asSMIRKS()))
562 +
                    is now invalid" % (smirks, self.as_smirks()))
516 563
517 564
        return
518 565
519 566
    def _graph_remove_node(self, node):
567 +
        """
568 +
        removes a node from the graph, kept separate from other
569 +
        functions so if (when) networkx has an API change we only
570 +
        have to change one place.
571 +
572 +
        Parameters
573 +
        -----------
574 +
        node : node in self._graph
575 +
576 +
        Returns
577 +
        --------
578 +
        node_removed : bool
579 +
        """
580 +
        if node not in self._graph:
581 +
            return False
520 582
        self._graph.remove_node(node)
521 583
        return True
522 584
@@ -524,6 +586,19 @@
Loading
524 586
        """
525 587
        When data is False returns a list of nodes in graph
526 588
        otherwise returns a dictionary in the form {node: data}
589 +
590 +
        Parameters
591 +
        -----------
592 +
        data : bool
593 +
            include data for each node
594 +
595 +
        Returns
596 +
        --------
597 +
        nodes : list or dict
598 +
            if data is False, returns a list in the form:
599 +
                [node1, node2, ...]
600 +
            if data is True, returns a dictionary in the form:
601 +
                {node: {data_key: data, ...}, ... }
527 602
        """
528 603
        if data:
529 604
            return dict(self._graph.nodes(data=True))
@@ -531,10 +606,27 @@
Loading
531 606
532 607
    def _graph_edges(self, data=False, node=None):
533 608
        """
534 -
        returns a list of tuples,
535 -
        If data is False it has the form [(node1, node2)]
536 -
        Otherwise it includes the data [(node1, node2, data_dictionary)]
537 -
        If node is not None then the list includes only edges connected to that node
609 +
        Returns all edges (node=None) or edges associated
610 +
        with a specific node. We use a custom internal function
611 +
        so that if (when) networkx changes their API we only
612 +
        have to change one place in the script.
613 +
614 +
        Parameters
615 +
        -----------
616 +
        data : bool
617 +
            include data on edges (bonds)?
618 +
        node : graph node
619 +
            get only edges connected to that edge
620 +
621 +
        Returns
622 +
        --------
623 +
        edges : list of edges
624 +
            Returns all edges (node=None)
625 +
            or edges connected to the specified node.
626 +
            If data is False then the list has the form:
627 +
                [ (node1, node2), ... ]
628 +
            otherwise, if data is True is has the form:
629 +
                [ (node1, node2, {dictionary of data}), ...]
538 630
        """
539 631
        if node is None:
540 632
            return list(self._graph.edges(data=data))
@@ -542,24 +634,56 @@
Loading
542 634
543 635
    def _graph_neighbors(self, node):
544 636
        """
545 -
        returns a list of neighbors for the given node
637 +
        Returns a list of neighbors for the given node.
638 +
        This is done in a custom function so we have only
639 +
        one place to change if (when) networkx changes the API.
640 +
641 +
        Parameters
642 +
        -----------
643 +
        node : graph node
644 +
645 +
        Returns
646 +
        --------
647 +
        neighbors : list of neighboring nodes
546 648
        """
547 649
        return list(self._graph.neighbors(node))
548 650
549 651
    def _graph_get_edge_data(self, node1, node2):
550 652
        """
551 -
        returns a dictionary for the data at the edged connecting
552 -
        node1 and node2 in graph
653 +
        Returns a dictionary for the data at the edged connecting
654 +
        node1 and node2 in graph. We set this in a custom function
655 +
        so we only have to change one place if (when) networkx
656 +
        changes their API.
657 +
658 +
        Parameters
659 +
        -----------
660 +
        node1 : graph node
661 +
        node2 : a different graph node
662 +
663 +
        Returns
664 +
        --------
665 +
        data_dict : dict
666 +
            dictionary of the data stored on the edge between node1 and node2
553 667
        """
554 668
        return self._graph.get_edge_data(node1, node2)
555 669
556 -
    def isValid(self, smirks = None):
670 +
    def is_valid(self, smirks=None):
557 671
        """
558 -
        Returns if the environment is valid, that is if it
559 -
        creates a parseable SMIRKS string.
672 +
        Checks if the provided SMIRKS or the one created
673 +
        by the environment is valid according to ChemPer rules.
674 +
675 +
        Parameters
676 +
        -----------
677 +
        smirks : str or None
678 +
            if None then we call self.as_smirks()
679 +
680 +
        Returns
681 +
        --------
682 +
        is_valid : bool
683 +
            True if this is a valid ChemPer SMIRKS
560 684
        """
561 685
        if smirks is None:
562 -
            smirks = self._asSMIRKS()
686 +
            smirks = self._as_smirks()
563 687
        from chemper.chemper_utils import is_valid_smirks
564 688
        return is_valid_smirks(smirks)
565 689
@@ -567,13 +691,13 @@
Loading
567 691
        """
568 692
        This function converts a smirks string to a Chemical Environment
569 693
        """
570 -
        smirks = _convert_embedded_SMIRKS(input_smirks)
694 +
        smirks = _convert_embedded_smirks(input_smirks)
571 695
        atoms = dict() # store created atom
572 696
        idx = 1 # current atom being created
573 697
        store = list() # to store indices while branching
574 -
        bondingTo = idx # which atom are we going to bond to
698 +
        bonding_to = idx # which atom are we going to bond to
575 699
576 -
        atom_string, start, end = _find_embedded_brackets(smirks, '\[', '\]')
700 +
        atom_string, start, end = _find_embedded_brackets(smirks, r'\[', r'\]')
577 701
578 702
        if start != 0: # first atom is not in square brackets
579 703
            if start != -1:
@@ -598,34 +722,33 @@
Loading
598 722
599 723
        # Check for ring index, i.e. the 1s in "[#6:1]1-CCCCC1"
600 724
        match = re.match(r'(\d+)',leftover)
601 -
        if match is not None: # leftover starts with int
725 +
        if match is not None:  # leftover starts with int
602 726
            ring = re.findall(r'(\d+)',leftover)[0]
603 727
            leftover = leftover[match.end():]
604 728
        else:
605 729
            ring = None
606 730
607 731
        # Get atom information and create first atom
608 -
        OR, AND, index = self._getAtomInfo(atom_string)
609 -
        new_atom = self.addAtom(None, newORtypes = OR, newANDtypes = AND,
610 -
                newAtomIndex = index, newAtomRing = ring, beyondBeta = True)
732 +
        ors, ands, index = self._get_atom_info(atom_string)
733 +
        new_atom = self.add_atom(None, new_or_types= ors, new_and_types= ands,
734 +
                                 new_atom_index= index, new_atom_ring= ring, beyond_beta= True)
611 735
        atoms[idx] = new_atom
612 736
613 737
        while len(leftover) > 0:
614 738
            idx += 1
615 -
616 739
            # Check for branching
617 740
            if leftover[0] == ')':
618 -
                bondingTo = store.pop()
741 +
                bonding_to = store.pop()
619 742
                leftover = leftover[1:]
620 743
                continue
621 744
622 745
            if leftover[0] == '(':
623 -
                store.append(bondingTo)
746 +
                store.append(bonding_to)
624 747
                leftover = leftover[1:]
625 748
                continue
626 749
627 750
            # find beginning and end of next [atom]
628 -
            atom_string, start, end = _find_embedded_brackets(leftover, '\[', '\]')
751 +
            atom_string, start, end = _find_embedded_brackets(leftover, r'\[', r'\]')
629 752
630 753
            if start != -1: # no more square brackets
631 754
                bond_string = leftover[:start]
@@ -649,8 +772,8 @@
Loading
649 772
                leftover = leftover[end+1:]
650 773
651 774
            # Get bond and atom info
652 -
            bOR, bAND = self._getBondInfo(bond_string)
653 -
            aOR, aAND, index = self._getAtomInfo(atom_string[1:-1])
775 +
            b_or, b_and = self._get_bond_info(bond_string)
776 +
            a_or, a_and, index = self._get_atom_info(atom_string[1:-1])
654 777
655 778
            # Check for ring index, i.e. the 1s in "[#6:1]1-CCCCC1"
656 779
            match = re.match(r'(\d+)',leftover)
@@ -661,18 +784,30 @@
Loading
661 784
                ring = None
662 785
663 786
            # create new atom
664 -
            new_atom = self.addAtom(atoms[bondingTo], bondORtypes=bOR,
665 -
                    bondANDtypes=bAND, newORtypes=aOR, newANDtypes=aAND,
666 -
                    newAtomIndex=index, newAtomRing=ring, beyondBeta=True)
787 +
            new_atom = self.add_atom(atoms[bonding_to], bond_or_types=b_or,
788 +
                                     bond_and_types=b_and, new_or_types=a_or, new_and_types=a_and,
789 +
                                     new_atom_index=index, new_atom_ring=ring, beyond_beta=True)
667 790
668 791
            # update state
669 792
            atoms[idx] = new_atom
670 -
            bondingTo = idx
793 +
            bonding_to = idx
671 794
        return
672 795
673 -
    def _getAtomInfo(self, atom):
796 +
    def _get_atom_info(self, atom):
674 797
        """
675 -
        given atom string, returns ORtypes, ANDtypes, and index
798 +
        Parses string for one atom
799 +
800 +
        Parameters
801 +
        -----------
802 +
        atom : str
803 +
            string for one atom (the part between brackets)
804 +
805 +
        Returns
806 +
        --------
807 +
        or_types : list of tuples
808 +
            OR decorators are in the form [ (base, [decorators]), ...]
809 +
        and_types : list
810 +
        index : int
676 811
        """
677 812
        # Find atom index
678 813
        colon = atom.find(':')
@@ -684,37 +819,49 @@
Loading
684 819
685 820
        split = atom.split(';')
686 821
687 -
        # Get ANDtypes (and split them if they don't use ;)
688 -
        ANDtypes = list()
822 +
        # Get and_types (and split them if they don't use ;)
823 +
        and_types = list()
689 824
        for a in split[1:]:
690 -
            ANDtypes += re.findall(self.atom_reg, a)
825 +
            and_types += re.findall(self.atom_reg, a)
691 826
692 -
        # Get ORtypes
693 -
        ORList = split[0].split(',')
694 -
        ORtypes = list()
695 -
        # Separate ORtypes into bases and decorators
696 -
        for OR in ORList:
697 -
            ORbase, ORdecors = self._separateORtypes(OR)
698 -
            if ORbase is not None:
699 -
                ORtypes.append( (ORbase, ORdecors) )
827 +
        # Get or_types
828 +
        or_list = split[0].split(',')
829 +
        or_types = list()
830 +
        # Separate or_types into bases and decorators
831 +
        for OR in or_list:
832 +
            or_base, or_decors = self._separate_or_types(OR)
833 +
            if or_base is not None:
834 +
                or_types.append((or_base, or_decors))
700 835
701 -
        return ORtypes, ANDtypes, index
836 +
        return or_types, and_types, index
702 837
703 -
    def _separateORtypes(self, ORtype):
838 +
    def _separate_or_types(self, or_type):
704 839
        """
705 840
        Separates ORtype (i.e. "#6X4R+0") into
706 841
        a base and decorators (i.e. '#6', ['X4','R','+0'] )
842 +
843 +
        Parameters
844 +
        -----------
845 +
        or_type : str
846 +
            string for one or_type
847 +
848 +
        Returns
849 +
        --------
850 +
        base : str
851 +
            #n, element symbol, or *
852 +
        decs : list
853 +
            list of decorators
707 854
        """
708 855
        # special case 1: wild card
709 -
        if ORtype == '*':
856 +
        if or_type == '*':
710 857
            return None, []
711 858
712 -
        # if ORbase is a wildcard
713 -
        if ORtype[0] == '*':
714 -
            return '*', re.findall(self.atom_reg, ORtype[1:])
859 +
        # if OR base is a wildcard
860 +
        if or_type[0] == '*':
861 +
            return '*', re.findall(self.atom_reg, or_type[1:])
715 862
716 863
        # Split up decorators by RegEx strings for atoms
717 -
        split = re.findall(self.atom_reg, ORtype)
864 +
        split = re.findall(self.atom_reg, or_type)
718 865
        if len(split) == 0:
719 866
            return None, []
720 867
@@ -722,124 +869,152 @@
Loading
722 869
        decs = _remove_blanks_repeats(split[1:], ['',base])
723 870
        return base, decs
724 871
725 -
    def _getBondInfo(self, bond):
872 +
    def _get_bond_info(self, bond):
726 873
        """
727 -
        given bond strings returns ORtypes and ANDtypes
874 +
        Given bond strings returns or_types and and_types
875 +
876 +
        Parameters
877 +
        -----------
878 +
        bond : str
879 +
            string for one bond (i.e. '-,:;!@')
880 +
881 +
        Returns
882 +
        --------
883 +
        or_types : list
884 +
            list of or_type decorators, following atom tuple format
885 +
            in the '-,:;!@' example you get [ ('-', []), (':', []) ]
886 +
        and_types : list
887 +
            list of and_type decorators
888 +
            in this example you get ['!@']
728 889
        """
729 890
        # blank bond string is single or aromatic
730 -
        # empty ORtypes in Chemical Environments are treated as ~ bonds
891 +
        # empty or_types in Chemical Environments are treated as ~ bonds
731 892
        if bond == "":
732 -
            ANDtypes = list()
733 -
            ORtypes = [ ('-', []), (':', []) ]
734 -
            return ORtypes, ANDtypes
893 +
            and_types = list()
894 +
            or_types = [('-', []), (':', [])]
895 +
            return or_types, and_types
735 896
736 897
        # AND types indicated by ; at the end
737 898
        split = bond.split(';')
738 -
        ANDtypes = list()
899 +
        and_types = list()
739 900
        for a in split[1:]:
740 -
            ANDtypes += re.findall(self.bond_regs, a)
901 +
            and_types += re.findall(self.bond_regs, a)
741 902
742 -
        # ORtypes are divided by ,
743 -
        ORList = split[0].split(',')
744 -
        ORtypes = list()
745 -
        for OR in ORList:
903 +
        # or_types are divided by ,
904 +
        or_list = split[0].split(',')
905 +
        or_types = list()
906 +
        for OR in or_list:
746 907
            if OR == '~':
747 908
                continue
748 909
            or_divide = re.findall(self.bond_regs, OR)
749 910
            if len(or_divide) > 0:
750 -
                ORtypes.append( (or_divide[0], or_divide[1:]))
911 +
                or_types.append((or_divide[0], or_divide[1:]))
751 912
752 -
        return ORtypes, ANDtypes
913 +
        return or_types, and_types
753 914
754 -
    def asSMIRKS(self, smarts = False):
915 +
    def as_smirks(self, smarts = False):
755 916
        """
756 917
        Returns a SMIRKS representation of the chemical environment
757 918
758 919
        Parameters
759 920
        -----------
760 -
        smarts: optional, boolean
921 +
        smarts : optional, boolean
761 922
            if True, returns a SMARTS instead of SMIRKS without index labels
923 +
924 +
        Returns
925 +
        --------
926 +
        smirks : str
927 +
            SMIRKS string for this environment
762 928
        """
763 -
        init_atom = self.selectAtom(1)
764 -
        return self._asSMIRKS(init_atom, None, smarts)
929 +
        init_atom = self.select_atom(1)
930 +
        return self._as_smirks(init_atom, None, smarts)
765 931
766 -
    def _asSMIRKS(self, initialAtom = None, neighbors = None, smarts = False):
767 -
        """Return a SMIRKS representation of the chemical environment.
932 +
    def _as_smirks(self, initial_atom = None, neighbors = None, smarts = False):
933 +
        """
934 +
        Return a SMIRKS representation of the chemical environment.
935 +
        This uses a recursive structure to combine SMIRKS for every
936 +
        atom in this environment.
937 +
        TODO: figure out if this can be done with a while loop instead
768