Showing 19 of 66 files from the diff.
Other files ignored by Codecov
docs/conf.py has changed.
versioneer.py has changed.
setup.py has changed.
setup.cfg has changed.
README.md has changed.

@@ -0,0 +1,52 @@
Loading
1 +
"""
2 +
The base schema.
3 +
"""
4 +
from typing import Union
5 +
6 +
import numpy as np
7 +
from pydantic import BaseModel
8 +
9 +
10 +
class SchemaBase(BaseModel):
11 +
    """
12 +
    This is the Schema base class which is adapted to the other schema as required.
13 +
    """
14 +
15 +
    # set any enum fields here to make sure json and yaml work
16 +
    _enum_fields = []
17 +
18 +
    class Config:
19 +
        extra = "forbid"
20 +
        allow_mutation = True
21 +
        validate_assignment = True
22 +
        json_encoders = {
23 +
            np.ndarray: lambda v: v.flatten().tolist(),
24 +
        }
25 +
26 +
    def dict(
27 +
        self,
28 +
        *,
29 +
        include: Union["AbstractSetIntStr", "MappingIntStrAny"] = None,
30 +
        exclude: Union["AbstractSetIntStr", "MappingIntStrAny"] = None,
31 +
        by_alias: bool = False,
32 +
        skip_defaults: bool = None,
33 +
        exclude_unset: bool = False,
34 +
        exclude_defaults: bool = False,
35 +
        exclude_none: bool = False,
36 +
    ) -> "DictStrAny":
37 +
38 +
        # correct the enum dict rep
39 +
        data = super().dict(
40 +
            include=include,
41 +
            exclude=exclude,
42 +
            by_alias=by_alias,
43 +
            skip_defaults=skip_defaults,
44 +
            exclude_unset=exclude_unset,
45 +
            exclude_defaults=exclude_defaults,
46 +
            exclude_none=exclude_none,
47 +
        )
48 +
        exclude = exclude or []
49 +
        for field in self._enum_fields:
50 +
            if field not in exclude:
51 +
                data[field] = getattr(self, field).value
52 +
        return data
0 53
imilarity index 75%
1 54
ename from bespokefit/schema/smirks.py
2 55
ename to openff/bespokefit/schema/smirks.py

@@ -21,9 +21,24 @@
Loading
21 21
    def __init__(self, forcefield_name: str):
22 22
        """
23 23
        Gather the forcefield ready for manipulation.
24 +
25 +
        Parameters
26 +
        ----------
27 +
        forcefield_name: str
28 +
            The string of the target forcefield path.
29 +
30 +
        Notes
31 +
        ------
32 +
            This will always try to strip the constraints parameter handler as the FF should be unconstrained for fitting.
24 33
        """
25 34
        self.forcefield = ForceField(forcefield_name, allow_cosmetic_attributes=True)
26 35
36 +
        # try and strip a constraint handler
37 +
        try:
38 +
            del self.forcefield._parameter_handlers["Constraints"]
39 +
        except KeyError:
40 +
            pass
41 +
27 42
    def add_smirks(
28 43
        self,
29 44
        smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]],
@@ -39,6 +54,12 @@
Loading
39 54
            SmirksType.ProperTorsions: ProperTorsionHandler.ProperTorsionType,
40 55
            SmirksType.Vdw: vdWHandler.vdWType,
41 56
        }
57 +
        _smirks_ids = {
58 +
            SmirksType.Bonds.value: "b",
59 +
            SmirksType.Angles.value: "a",
60 +
            SmirksType.ProperTorsions.value: "t",
61 +
            SmirksType.Vdw.value: "n",
62 +
        }
42 63
        new_params = {}
43 64
44 65
        for smirk in smirks:
@@ -52,8 +73,8 @@
Loading
52 73
                smirk_type
53 74
            ).parameters
54 75
            no_params = len(current_params)
55 -
            for i, parameter in enumerate(parameters):
56 -
                parameter["id"] = f"t{no_params + i}"
76 +
            for i, parameter in enumerate(parameters, start=1):
77 +
                parameter["id"] = _smirks_ids[smirk_type] + str(no_params + i)
57 78
                current_params.insert(-1, _smirks_conversion[smirk_type](**parameter))
58 79
59 80
    def label_molecule(self, molecule: off.Molecule) -> Dict[str, str]:
60 81
imilarity index 100%
61 82
ename from bespokefit/optimizers/__init__.py
62 83
ename to openff/bespokefit/optimizers/__init__.py

@@ -23,7 +23,7 @@
Loading
23 23
24 24
class TargetRegisterError(BespokeFitException):
25 25
    """
26 -
    The registering the target raised an error.
26 +
    Registering the target raised an error.
27 27
    """
28 28
29 29
    error_type = "target_register_error"
@@ -73,3 +73,48 @@
Loading
73 73
74 74
    error_type = "workflow_update_error"
75 75
    header = "Workflow Update Error"
76 +
77 +
78 +
class TargetNotSetError(BespokeFitException):
79 +
    """
80 +
    Raised when the target is referenced but not set.
81 +
    """
82 +
83 +
    error_type = "target_not_set_error"
84 +
    header = "Target Not Set Error"
85 +
86 +
87 +
class ForceFieldError(BespokeFitException):
88 +
    """
89 +
    Raised if the forcefield has an incorrect format or can not be loaded.
90 +
    """
91 +
92 +
    error_type = "force_field_error"
93 +
    header = "Force Field Error"
94 +
95 +
96 +
class SMIRKSTypeError(BespokeFitException):
97 +
    """
98 +
    Raised when an incorect smirks pattern is used to make a Smirks schema, eg tagged one atom in a bond smirks.
99 +
    """
100 +
101 +
    error_type = "smirks_type_error"
102 +
    header = "SMIRKS Type Error"
103 +
104 +
105 +
class MissingWorkflowError(BespokeFitException):
106 +
    """
107 +
    Raised when we try and complete a fitting entry that has no workflow set.
108 +
    """
109 +
110 +
    error_type = "missing_workflow_error"
111 +
    header = "Missing Workflow Error"
112 +
113 +
114 +
class MoleculeMissMatchError(BespokeFitException):
115 +
    """
116 +
    Raised when two molecules do not match in a results update.
117 +
    """
118 +
119 +
    error_type = "molecule_miss_match_error"
120 +
    header = "Molecule Miss Match Error"

@@ -0,0 +1,109 @@
Loading
1 +
"""
2 +
Here we register all optimizers with bespokefit.
3 +
"""
4 +
from typing import Dict, List, Union
5 +
6 +
from ..exceptions import OptimizerError
7 +
from .forcebalance import ForceBalanceOptimizer
8 +
from .model import Optimizer
9 +
10 +
optimizers: Dict[str, Optimizer] = {}
11 +
12 +
13 +
def register_optimizer(optimizer: Optimizer, replace: bool = False) -> None:
14 +
    """
15 +
    Register a new valid optimizer with bespokefit.
16 +
17 +
    Parameters
18 +
    ----------
19 +
    optimizer: Optimizer
20 +
        The optimizer class that should be registered.
21 +
    replace: bool
22 +
        If the optimizer should replace another optimizer registered with the same name.
23 +
24 +
    Raises
25 +
    ------
26 +
    OptimizerError
27 +
        If the optimizer is already registered or if the optimizer object is not compatible.
28 +
    """
29 +
30 +
    if isinstance(optimizer, Optimizer):
31 +
        optimier_name = optimizer.optimizer_name.lower()
32 +
        if optimier_name not in optimizers or (optimier_name in optimizers and replace):
33 +
            optimizers[optimizer.optimizer_name.lower()] = optimizer
34 +
        else:
35 +
            raise OptimizerError(
36 +
                f"An optimizer is already registered under the name {optimizer.optimizer_name}, to replace this please use the `replace=True` flag."
37 +
            )
38 +
    else:
39 +
        raise OptimizerError(
40 +
            f"The optimizer {optimizer} could not be registered it must be a subclass of openff.bespokefit.optimzers.Optimizer"
41 +
        )
42 +
43 +
44 +
def deregister_optimizer(optimizer: Union[Optimizer, str]) -> None:
45 +
    """
46 +
    Remove an optimizer from the list of valid optimizers.
47 +
48 +
    Parameters
49 +
    ----------
50 +
    optimizer: Union[Optimizer, str]
51 +
        The optimizer class or name of the class that should be removed.
52 +
    """
53 +
54 +
    if isinstance(optimizer, Optimizer):
55 +
        optimizer_name = optimizer.optimizer_name.lower()
56 +
    else:
57 +
        optimizer_name = optimizer.lower()
58 +
59 +
    opt = optimizers.pop(optimizer_name, None)
60 +
    if opt is None:
61 +
        raise OptimizerError(
62 +
            f"The optimizer {optimizer} was not registered with bespokefit."
63 +
        )
64 +
65 +
66 +
def get_optimizer(optimizer_name: str, **kwargs) -> Optimizer:
67 +
    """
68 +
    Get the optimizer class from the list of registered optimizers in bespokefit by name.
69 +
70 +
    Parameters
71 +
    ----------
72 +
    optimizer_name: str
73 +
        The `optimizer_name` attribute of the optimizer that should be fetched.
74 +
    kwargs: dict
75 +
        Any kwargs that should be passed into the optimizer.
76 +
77 +
    Returns
78 +
    -------
79 +
    Optimizer
80 +
        The requested optimizer matching the given optimizer name.
81 +
    """
82 +
83 +
    opt = optimizers.get(optimizer_name.lower(), None)
84 +
    if opt is None:
85 +
        raise OptimizerError(
86 +
            f"The optimizer {optimizer_name} was not registered with bespokefit."
87 +
        )
88 +
89 +
    if kwargs:
90 +
        return opt.parse_obj(kwargs)
91 +
    else:
92 +
        return opt
93 +
94 +
95 +
def list_optimizers() -> List[str]:
96 +
    """
97 +
    Get the list of registered optimizers with bespokefit.
98 +
99 +
    Returns
100 +
    -------
101 +
    List[str]
102 +
        A list of the optimizer classes registered.
103 +
    """
104 +
105 +
    return list(optimizers.keys())
106 +
107 +
108 +
# register the built in optimizers
109 +
register_optimizer(ForceBalanceOptimizer())
0 110
imilarity index 88%
1 111
ename from bespokefit/optimizers/forcebalance.py
2 112
ename to openff/bespokefit/optimizers/forcebalance.py

@@ -5,16 +5,11 @@
Loading
5 5
from typing import Any, Dict, List, Tuple
6 6
7 7
from openforcefield import topology as off
8 -
from simtk import unit
9 -
10 -
from bespokefit.collection_workflows import (
11 -
    CollectionMethod,
12 -
    TorsiondriveWorkflow,
13 -
    WorkflowStage,
14 -
)
15 8
from qcsubmit.results import SingleResult
16 9
from qcsubmit.serializers import serialize
10 +
from simtk import unit
17 11
12 +
from ..collection_workflows import CollectionMethod, TorsiondriveWorkflow, WorkflowStage
18 13
from ..common_structures import FragmentData, ProperTorsionSettings
19 14
from ..exceptions import FragmenterError, MissingReferenceError
20 15
from ..schema.fitting import FittingEntry, TargetSchema
@@ -35,7 +30,10 @@
Loading
35 30
    parameter_targets: List[ProperTorsionSettings] = [ProperTorsionSettings()]
36 31
    collection_workflow: List[WorkflowStage] = TorsiondriveWorkflow
37 32
    fragmentation: bool = True  # should we fragment the molecule
38 -
    torsion_selection: TorsionSelection = TorsionSelection.All  # which bonds should be fit
33 +
    weight: float = 1.0
34 +
    torsion_selection: TorsionSelection = (
35 +
        TorsionSelection.All
36 +
    )  # which bonds should be fit
39 37
    fit_gradient: bool = False
40 38
    # torsiondrive settings
41 39
    grid_spacings: List[int] = [15]
@@ -51,10 +49,14 @@
Loading
51 49
        """
52 50
        Take a molecule and fragment it using WBOfragmenter across all rotatable bonds.
53 51
54 -
        Parameters:
55 -
            off_molecule: The openforcefield molecule that should be fragmented.
52 +
        Parameters
53 +
        ----------
54 +
        off_molecule: off.Molecule
55 +
            The openforcefield molecule that should be fragmented.
56 56
57 -
        Returns:
57 +
        Returns
58 +
        -------
59 +
        List[FragmentData]
58 60
            A list of FragmentData classes which hold the relations between the parent molecule and the fragment.
59 61
        """
60 62
@@ -84,7 +86,7 @@
Loading
84 86
                # get the fragment parent mapping
85 87
                frag_dihedral = data["dihedral"][0][1:3]
86 88
87 -
                # in some cases we get one fragment back which is th parent molecule
89 +
                # in some cases we get one fragment back which is the parent molecule
88 90
                # we should not work out a mapping
89 91
                if not off_molecule.is_isomorphic_with(off_frag):
90 92
                    mapping = self._get_fragment_parent_mapping(
@@ -136,14 +138,19 @@
Loading
136 138
            )
137 139
138 140
    def generate_fitting_schema(
139 -
        self, molecule: off.Molecule, conformers: int = 5,
141 +
        self,
142 +
        molecule: off.Molecule,
143 +
        conformers: int = 5,
140 144
    ) -> TargetSchema:
141 145
        """
142 146
        This method will consume a molecule and produce a fitting schema related to that molecule for this target.
143 147
144 -
        Parameters:
145 -
            molecule: The molecule that the fitting schema should be created for.
146 -
            conformers: The number of input conformers to supply.
148 +
        Parameters
149 +
        ----------
150 +
        molecule: off.Molecule
151 +
            The molecule that the fitting schema should be created for.
152 +
        conformers: int, default=5
153 +
            The number of input conformers to supply for the torsiondrive.
147 154
        """
148 155
        # the provenance here captures the settings used in the target including the priors.
149 156
        target_schema = TargetSchema(target_name=self.name, provenance=self.dict())
@@ -156,7 +163,10 @@
Loading
156 163
                torsions = self.get_all_torsions(
157 164
                    fragment.fragment_torsion, fragment.fragment_molecule
158 165
                )
159 -
                fragment.fragment_molecule.generate_conformers(n_conformers=conformers)
166 +
                if fragment.fragment_molecule.n_conformers < conformers:
167 +
                    fragment.fragment_molecule.generate_conformers(
168 +
                        n_conformers=conformers
169 +
                    )
160 170
                # make the fitting entry with metadata
161 171
                fitting_entry = FittingEntry(
162 172
                    name=fragment.fragment_molecule.to_smiles(explicit_hydrogens=False),
@@ -164,7 +174,11 @@
Loading
164 174
                    collection_workflow=self.collection_workflow,
165 175
                    qc_spec=self.qc_spec,
166 176
                    input_conformers=fragment.fragment_molecule.conformers,
167 -
                    extras={"dihedrals": [torsions[0],]},
177 +
                    extras={
178 +
                        "dihedrals": [
179 +
                            torsions[0],
180 +
                        ]
181 +
                    },
168 182
                    provenance=self.provenance(),
169 183
                )
170 184
                # for each torsion make a new smirks
@@ -206,7 +220,8 @@
Loading
206 220
            # for each rotatable bond we should generate a torsiondrive
207 221
            attributes = get_molecule_cmiles(molecule)
208 222
            rotatable_bonds = self.select_rotatable_bonds(molecule)
209 -
            molecule.generate_conformers(n_conformers=conformers)
223 +
            if molecule.n_conformers < conformers:
224 +
                molecule.generate_conformers(n_conformers=conformers)
210 225
            while rotatable_bonds:
211 226
                # get a bond
212 227
                bond = rotatable_bonds.pop()
@@ -218,7 +233,11 @@
Loading
218 233
                    collection_workflow=self.collection_workflow,
219 234
                    qc_spec=self.qc_spec,
220 235
                    input_conformers=molecule.conformers,
221 -
                    extras={"dihedrals": [torsions[0],]},
236 +
                    extras={
237 +
                        "dihedrals": [
238 +
                            torsions[0],
239 +
                        ]
240 +
                    },
222 241
                    provenance=self.provenance(),
223 242
                )
224 243
                # make a new smirks pattern for each dihedral
@@ -226,7 +245,8 @@
Loading
226 245
                    smirks = TorsionSmirks(
227 246
                        atoms={torsion},
228 247
                        smirks=self._get_new_single_graph_smirks(
229 -
                            atoms=torsion, molecule=molecule,
248 +
                            atoms=torsion,
249 +
                            molecule=molecule,
230 250
                        ),
231 251
                    )
232 252
                    smirks.parameterize = self.parameter_targets[0].k_values
@@ -267,6 +287,16 @@
Loading
267 287
    def select_rotatable_bonds(self, molecule: off.Molecule) -> List[Tuple[int, int]]:
268 288
        """
269 289
        Gather a list of rotatable bonds based on the chosen torsion selection method.
290 +
291 +
        Parameters
292 +
        ----------
293 +
        molecule: off.Molecule
294 +
            The molecule whoes rotatable bonds we want to find.
295 +
296 +
        Returns
297 +
        -------
298 +
        List[Tuple[int, int]]
299 +
            A list of central bond atom index tuples.
270 300
        """
271 301
272 302
        if self.torsion_selection == TorsionSelection.NonTerminal:
@@ -287,15 +317,19 @@
Loading
287 317
        self, bond: Tuple[int, int], molecule: off.Molecule
288 318
    ) -> List[Tuple[int, int, int, int]]:
289 319
        """
290 -
        Get all torsions that pass through the central bond to generate smirks paterns.
291 -
292 -
        Parameters:
293 -
            bond: The bond which we want all torsions for.
294 -
            molecule: The molecule which the bond corresponds to.
295 -
296 -
        Returns:
297 -
            A list of all of the torsion tuples passing through this central bond ordered atomic weight with
298 -
            the heaviest terminal atom first.
320 +
        Get all torsions that pass through the central bond to generate smirks patterns.
321 +
322 +
        Parameters
323 +
        ----------
324 +
        bond: Tuple[int, int]
325 +
            The bond which we want all torsions for.
326 +
        molecule: off.Molecule
327 +
            The molecule which the bond corresponds to.
328 +
329 +
        Returns
330 +
        -------
331 +
        List[Tuple[int, int, int, int]]
332 +
            A list of all of the torsion tuples passing through this central bond.
299 333
        """
300 334
301 335
        torsions = []
@@ -360,10 +394,9 @@
Loading
360 394
361 395
    name = "AbInitio_SMIRNOFF"
362 396
    description = "Static single point energy and gradient fitting."
363 -
    weight: float = 1.0
364 397
    keywords: Dict[str, Any] = {
365 398
        "writelevel": 1,
366 -
        "mol2": "molecule.sdf",
399 +
        "mol2": "molecule.mol2",
367 400
        "pdb": "molecule.pdb",
368 401
        "coords": "scan.xyz",
369 402
    }
@@ -406,13 +439,13 @@
Loading
406 439
            )
407 440
            # remove the conformers
408 441
            molecule._conformers = []
409 -
            for result in entry.reference_data:
442 +
            for result in entry.get_reference_data():
410 443
                geometry = unit.Quantity(result.molecule.geometry, unit=unit.bohrs)
411 444
                molecule.add_conformer(geometry)
412 445
            molecule.to_file("scan.xyz", "xyz")
413 446
            # now make the qdata file
414 447
            self.create_qdata(
415 -
                entry.reference_data,
448 +
                entry.get_reference_data(),
416 449
                fit_gradient=fitting_target.provenance.get("fit_gradient", False),
417 450
            )
418 451
            # move back to the home dir
@@ -475,7 +508,7 @@
Loading
475 508
            "energy_upper_limit": self.energy_upper_limit,
476 509
            "attributes": entry.attributes,
477 510
            "torsion_grid_ids": [
478 -
                data.extras["dihedral_angle"] for data in entry.reference_data
511 +
                data.extras["dihedral_angle"] for data in entry.get_reference_data()
479 512
            ],
480 513
        }
481 514
        # now write to file
482 515
imilarity index 100%
483 516
ename from bespokefit/tests/__init__.py
484 517
ename to openff/bespokefit/tests/__init__.py

@@ -1,6 +1,7 @@
Loading
1 1
import abc
2 2
from typing import Dict, Optional, Set, Tuple, Union
3 3
4 +
from chemper.graphs.environment import ChemicalEnvironment
4 5
from openforcefield.typing.engines.smirnoff import (
5 6
    AngleHandler,
6 7
    BondHandler,
@@ -12,6 +13,7 @@
Loading
12 13
from simtk import unit
13 14
14 15
from ..common_structures import SmirksType
16 +
from ..exceptions import SMIRKSTypeError
15 17
from ..utils import compare_smirks_graphs
16 18
from .schema import SchemaBase
17 19
@@ -20,25 +22,49 @@
Loading
20 22
    """
21 23
    Take a length value and return the validated version if the unit is missing.
22 24
    """
25 +
    if isinstance(length, str):
26 +
        length = length.split()[0]
23 27
    return f"{length} * angstrom"
24 28
25 29
26 30
def _to_degrees(angle: float) -> str:
31 +
    if isinstance(angle, str):
32 +
        angle = angle.split()[0]
27 33
    return f"{angle} * degree"
28 34
29 35
30 36
def _to_bond_force(force: float) -> str:
31 -
    return f"{force} * angstroms**-2 * mole**-1 * kilocalrie"
37 +
    if isinstance(force, str):
38 +
        force = force.split()[0]
39 +
    return f"{force} * angstrom**-2 * mole**-1 * kilocalorie"
32 40
33 41
34 42
def _to_angle_force(force: float) -> str:
43 +
    if isinstance(force, str):
44 +
        force = force.split()[0]
35 45
    return f"{force} * mole**-1 * radian**-2 * kilocalorie"
36 46
37 47
38 48
def _to_kcals_mol(force: float) -> str:
49 +
    if isinstance(force, str):
50 +
        force = force.split()[0]
39 51
    return f"{force} * mole**-1 * kilocalorie"
40 52
41 53
54 +
def _validate_smirks(smirks: str, expected_tags: int) -> str:
55 +
    """
56 +
    Make sure the supplied smirks has the correct number of tagged atoms.
57 +
    """
58 +
    smirk = ChemicalEnvironment(smirks=smirks)
59 +
    tagged_atoms = len(smirk.get_indexed_atoms())
60 +
    if tagged_atoms != expected_tags:
61 +
        raise SMIRKSTypeError(
62 +
            f"The smirks pattern ({smirks}) has {tagged_atoms} tagged atoms, but should have {expected_tags}."
63 +
        )
64 +
    else:
65 +
        return smirks
66 +
67 +
42 68
class SmirksSchema(SchemaBase):
43 69
    """
44 70
    This schema identifies new smirks patterns and the corresponding atoms they should be applied to.
@@ -47,7 +73,8 @@
Loading
47 73
    atoms: Set[Tuple[int, ...]]
48 74
    smirks: str
49 75
    type: SmirksType
50 -
    parameterize: Set[str] = {}
76 +
    parameterize: Set[str] = set()
77 +
    _enum_fields = ["type"]
51 78
52 79
    def __eq__(self, other: "SmirksSchema"):
53 80
        """
@@ -64,7 +91,7 @@
Loading
64 91
        Construct a dictionary that can be made into an OpenFF parameter.
65 92
        """
66 93
67 -
        data = self.dict(exclude={"atoms", "type", "parameterize"})
94 +
        data = self.dict(exclude={"atoms", "type", "parameterize", "identifier"})
68 95
        # now we have to format parameterize
69 96
        parameterize = ", ".join(self.parameterize)
70 97
        data["parameterize"] = parameterize
@@ -73,17 +100,17 @@
Loading
73 100
74 101
75 102
class ValidatedSmirks(SmirksSchema, abc.ABC):
76 -
    @validator("parameterize", each_item=True)
77 -
    def validate_parameterize(cls, parameter: str) -> str:
103 +
    @validator("parameterize")
104 +
    def _validate_parameterize(cls, parameters: Set[str]) -> Set[str]:
78 105
        """
79 106
        Make sure that the fields are valid for the molecule.
80 107
        """
81 -
        if parameter.lower() in cls.__fields__:
82 -
            return parameter.lower()
83 -
        else:
84 -
            raise ValueError(
85 -
                f"This smirks does not correspond to the parameter attribute  {parameter}"
86 -
            )
108 +
        for parameter in parameters:
109 +
            if parameter.lower() not in cls.__fields__:
110 +
                raise ValueError(
111 +
                    f"This smirks does not have a parameter attribute {parameter}"
112 +
                )
113 +
        return parameters
87 114
88 115
    @abc.abstractmethod
89 116
    def update_parameters(self, off_smirk) -> None:
@@ -112,6 +139,10 @@
Loading
112 139
    _validate_epsilion = validator("epsilon", allow_reuse=True)(_to_kcals_mol)
113 140
    _validate_sigma = validator("rmin_half", allow_reuse=True)(_to_angstrom)
114 141
142 +
    @validator("smirks")
143 +
    def _validate_smirks(cls, smirks: str) -> str:
144 +
        return _validate_smirks(smirks=smirks, expected_tags=1)
145 +
115 146
    def update_parameters(self, off_smirk: vdWHandler.vdWType) -> None:
116 147
        """
117 148
        Update the Atom smirks parameter handler using the corresponding openforcefield parameter handler.
@@ -134,6 +165,10 @@
Loading
134 165
    _validate_force = validator("k", allow_reuse=True)(_to_bond_force)
135 166
    _validate_length = validator("length", allow_reuse=True)(_to_angstrom)
136 167
168 +
    @validator("smirks")
169 +
    def _validate_smirks(cls, smirks: str) -> str:
170 +
        return _validate_smirks(smirks=smirks, expected_tags=2)
171 +
137 172
    def update_parameters(self, off_smirk: BondHandler.BondType) -> None:
138 173
        """
139 174
        Update the Bond smirks parameter handler using the corresponding openforcefield parameter handler.
@@ -146,7 +181,7 @@
Loading
146 181
        self.k = off_smirk.k.value_in_unit(
147 182
            unit=unit.kilocalories_per_mole / unit.angstrom ** 2
148 183
        )
149 -
        self.length = off_smirk.value_in_unit(unit=unit.angstrom)
184 +
        self.length = off_smirk.length.value_in_unit(unit=unit.angstrom)
150 185
151 186
152 187
class AngleSmirks(ValidatedSmirks):
@@ -158,6 +193,10 @@
Loading
158 193
    _validate_force = validator("k", allow_reuse=True)(_to_angle_force)
159 194
    _validate_angle = validator("angle", allow_reuse=True)(_to_degrees)
160 195
196 +
    @validator("smirks")
197 +
    def _validate_smirks(cls, smirks: str) -> str:
198 +
        return _validate_smirks(smirks=smirks, expected_tags=3)
199 +
161 200
    def update_parameters(self, off_smirk: AngleHandler.AngleType) -> None:
162 201
        """
163 202
        Update the Angle smirks parameter handler using the corresponding openforcefield parameter handler.
@@ -193,7 +232,7 @@
Loading
193 232
        self,
194 233
        periodicity: str,
195 234
        phase: Optional[float] = None,
196 -
        force: float = 1e-5,
235 +
        k: float = 1e-5,
197 236
        idivf: float = 1.0,
198 237
    ):
199 238
        """
@@ -206,7 +245,7 @@
Loading
206 245
            else:
207 246
                phase = 180
208 247
209 -
        data = {"periodicity": periodicity, "phase": phase, "k": force, "idivf": idivf}
248 +
        data = {"periodicity": periodicity, "phase": phase, "k": k, "idivf": idivf}
210 249
        super().__init__(**data)
211 250
212 251
@@ -215,6 +254,24 @@
Loading
215 254
    type: SmirksType = SmirksType.ProperTorsions
216 255
    terms: Dict[str, TorsionTerm] = {}
217 256
257 +
    @validator("smirks")
258 +
    def _validate_smirks(cls, smirks: str) -> str:
259 +
        return _validate_smirks(smirks=smirks, expected_tags=4)
260 +
261 +
    @validator("parameterize")
262 +
    def _validate_ks(cls, parameters: Set[str]) -> Set[str]:
263 +
        "Make sure K values are specified"
264 +
        allowed_values = ["k1", "k2", "k3", "k4", "k5", "k6"]
265 +
        for parameter in parameters:
266 +
            if parameter not in allowed_values:
267 +
                raise ValueError(
268 +
                    f"The parameter {parameter} is not supported for parametrization."
269 +
                )
270 +
        return parameters
271 +
272 +
    def __eq__(self, other):
273 +
        return super(TorsionSmirks, self).__eq__(other=other)
274 +
218 275
    def update_parameters(
219 276
        self,
220 277
        off_smirk: Union[
@@ -228,14 +285,16 @@
Loading
228 285
        Parameters
229 286
        ----------
230 287
        off_smirk: Union[ProperTorsionHandler.ProperTorsionType, ImproperTorsionHandler.ImproperTorsionType]
231 -
            The Torsion parameter type that the parameters should be extrated from.
288 +
            The Torsion parameter type that the parameters should be extracted from.
232 289
        """
290 +
        # clear out the current terms as the number of k could change
291 +
        self.terms = {}
233 292
        for i, p in enumerate(off_smirk.periodicity):
234 293
            new_term = TorsionTerm(
235 294
                periodicity=p,
236 295
                phase=off_smirk.phase[i].value_in_unit(unit=unit.degree),
237 296
                idivf=off_smirk.idivf[i],
238 -
                force=off_smirk.k[i].value_in_unit(unit=unit.kilocalorie_per_mole),
297 +
                k=off_smirk.k[i].value_in_unit(unit=unit.kilocalorie_per_mole),
239 298
            )
240 299
            self.add_torsion_term(term=new_term)
241 300
@@ -249,17 +308,6 @@
Loading
249 308
250 309
        self.terms[new_term.periodicity] = new_term
251 310
252 -
    @validator("parameterize", each_item=True, pre=True)
253 -
    def validate_ks(cls, parameter) -> str:
254 -
        "Make sure K values are specified"
255 -
        allowed_values = ["k1", "k2", "k3", "k4", "k5", "k6"]
256 -
        if parameter in allowed_values:
257 -
            return parameter
258 -
        else:
259 -
            raise ValueError(
260 -
                f"The parameter {parameter} is not supported for parametrization."
261 -
            )
262 -
263 311
    def to_off_smirks(self) -> Dict[str, str]:
264 312
        """
265 313
        Construct a dictionary that can be converted into an OpenFF parameter type.
266 314
imilarity index 100%
267 315
ename from bespokefit/targets/__init__.py
268 316
ename to openff/bespokefit/targets/__init__.py
269 317
imilarity index 100%
270 318
ename from bespokefit/targets/atom_selection.py
271 319
ename to openff/bespokefit/targets/atom_selection.py
272 320
imilarity index 83%
273 321
ename from bespokefit/targets/model.py
274 322
ename to openff/bespokefit/targets/model.py

@@ -7,6 +7,7 @@
Loading
7 7
from pydantic import PositiveFloat, PositiveInt
8 8
9 9
from ..common_structures import Status
10 +
from ..exceptions import TargetNotSetError
10 11
from ..forcefield_tools import ForceFieldEditor
11 12
from ..schema.fitting import WorkflowSchema
12 13
from ..targets import AbInitio_SMIRNOFF, TorsionProfile_SMIRNOFF
@@ -66,14 +67,17 @@
Loading
66 67
        This is the main optimization method, which will consume a Workflow stage composed of targets and molecules it will prep them all for fitting
67 68
        optimize collect the results and return the completed task.
68 69
69 -
        Parameters:
70 -
            workflow: The workflow schema that should be executed, which contains the targets ready for fitting.
71 -
            initial_forcefield: The name of the initial force field to be used as the optimization starting point.
70 +
        Parameters
71 +
        ----------
72 +
        workflow: WorkflowSchema
73 +
            The workflow schema that should be executed, which contains the targets ready for fitting.
74 +
        initial_forcefield: str
75 +
            The name of the initial force field to be used as the optimization starting point.
72 76
        """
73 77
        # check that the correct optimizer workflow has been supplied
74 78
        priors = {}
75 79
        fitting_targets = {}
76 -
        if workflow.optimizer_name == self.optimizer_name:
80 +
        if workflow.optimizer_name.lower() == self.optimizer_name.lower():
77 81
            # this will set up the file structure and return use back to the current working dir after
78 82
            with forcebalance_setup(workflow.job_id):
79 83
                # now for each target we need to prep the folders
@@ -164,7 +168,11 @@
Loading
164 168
        # now we need the path to the last forcefield file
165 169
        forcefield_dir = os.path.join("result", "optimize")
166 170
        files = os.listdir(forcefield_dir)
167 -
        files.remove("bespoke.offxml")
171 +
        try:
172 +
            files.remove("bespoke.offxml")
173 +
        except ValueError:
174 +
            pass
175 +
168 176
        forcefields = [
169 177
            (int(re.search("[0-9]+", file_name).group()), file_name)
170 178
            for file_name in files
@@ -187,9 +195,17 @@
Loading
187 195
        fitting_targets: Dict[str, List[str]]
188 196
            A dictionary containing the fitting target names sorted by forcebalance target.
189 197
190 -
        Note:
198 +
        Notes
199 +
        -----
191 200
            This function can be used to generate many optimize in files so many force balance jobs can be ran simultaneously.
192 201
        """
202 +
        # check that all of the fitting targets have been set
203 +
        target_names = [target.name.lower() for target in self.optimization_targets]
204 +
        for target_name in fitting_targets.keys():
205 +
            if target_name.lower() not in target_names:
206 +
                raise TargetNotSetError(
207 +
                    f"The target {target_name} is not setup for this optimizer and is required, please add it with runtime options using `set_optimization_target`."
208 +
                )
193 209
194 210
        # grab the template file
195 211
        template_file = get_data(os.path.join("templates", "optimize.txt"))
196 212
imilarity index 73%
197 213
ename from bespokefit/optimizers/model.py
198 214
ename to openff/bespokefit/optimizers/model.py

@@ -1,9 +1,8 @@
Loading
1 1
from enum import Enum
2 2
from typing import Any, Dict, List, Optional
3 3
4 -
from simtk import unit
5 -
6 4
from qcsubmit.results import SingleResult
5 +
from simtk import unit
7 6
8 7
from .common_structures import Status
9 8
from .schema.schema import SchemaBase
@@ -39,6 +38,8 @@
Loading
39 38
    retires: int = 0
40 39
    job_id: str = ""
41 40
41 +
    _enum_fields = ["precedence", "status", "method"]
42 +
42 43
    def get_result_geometries(self) -> List[unit.Quantity]:
43 44
        """
44 45
        For each result in the workflow stage extract the geometries useful for hessian workflows.
45 46
imilarity index 100%
46 47
ename from bespokefit/common_structures.py
47 48
ename to openff/bespokefit/common_structures.py

@@ -1,16 +1,15 @@
Loading
1 1
import hashlib
2 -
from typing import Any, Dict, List, Optional, Union
2 +
from pathlib import Path
3 +
from typing import Any, Dict, List, Optional, Type, Union
3 4
4 5
import numpy as np
5 6
from openforcefield import topology as off
6 7
from openforcefield.typing.engines.smirnoff import ForceField
7 -
from pydantic import validator
8 +
from pydantic import Protocol, validator
8 9
from qcelemental.models.types import Array
9 -
from simtk import unit
10 -
11 -
from bespokefit.collection_workflows import CollectionMethod, Precedence, WorkflowStage
12 10
from qcsubmit.common_structures import QCSpec
13 11
from qcsubmit.datasets import BasicDataset, OptimizationDataset, TorsiondriveDataset
12 +
from qcsubmit.procedures import GeometricProcedure
14 13
from qcsubmit.results import (
15 14
    BasicCollectionResult,
16 15
    BasicResult,
@@ -20,14 +19,30 @@
Loading
20 19
    TorsionDriveCollectionResult,
21 20
    TorsionDriveResult,
22 21
)
22 +
from qcsubmit.serializers import deserialize, serialize
23 23
from qcsubmit.validators import cmiles_validator
24 +
from simtk import unit
24 25
26 +
from ..collection_workflows import CollectionMethod, Precedence, WorkflowStage
25 27
from ..common_structures import Status, Task
26 -
from ..exceptions import DihedralSelectionError, OptimizerError, WorkflowUpdateError
27 -
from ..forcefield_tools import ForceFieldEditor
28 +
from ..exceptions import (
29 +
    DihedralSelectionError,
30 +
    MissingReferenceError,
31 +
    MissingWorkflowError,
32 +
    MoleculeMissMatchError,
33 +
    OptimizerError,
34 +
    WorkflowUpdateError,
35 +
)
28 36
from ..utils import schema_to_datasets
29 37
from .schema import SchemaBase
30 -
from .smirks import AngleSmirks, AtomSmirks, BondSmirks, SmirksSchema, TorsionSmirks
38 +
from .smirks import (
39 +
    AngleSmirks,
40 +
    AtomSmirks,
41 +
    BondSmirks,
42 +
    SmirksSchema,
43 +
    TorsionSmirks,
44 +
    ValidatedSmirks,
45 +
)
31 46
32 47
33 48
class FittingEntry(SchemaBase):
@@ -38,13 +53,49 @@
Loading
38 53
    name: str
39 54
    attributes: Dict[str, str]
40 55
    collection_workflow: List[WorkflowStage] = []
41 -
    target_smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]] = []
56 +
    target_smirks: List[Union[SmirksSchema, ValidatedSmirks]] = []
42 57
    qc_spec: QCSpec = QCSpec()
43 58
    provenance: Dict[str, Any] = {}
44 59
    extras: Dict[str, Any] = {}
45 60
    input_conformers: List[Array[np.ndarray]] = []
46 61
    _validate_attributes = validator("attributes", allow_reuse=True)(cmiles_validator)
47 62
63 +
    @validator("target_smirks", pre=True)
64 +
    def _check_target_smirks(cls, smirks):
65 +
        """
66 +
        A helper method to correctly handle the union of types.
67 +
        """
68 +
        new_smirks = []
69 +
        _type_conversion = {
70 +
            "vdW": AtomSmirks,
71 +
            "Bonds": BondSmirks,
72 +
            "Angles": AngleSmirks,
73 +
            "ProperTorsions": TorsionSmirks,
74 +
        }
75 +
        for smirk in smirks:
76 +
            if isinstance(smirk, dict):
77 +
                # if it is a dict from importing unpack here
78 +
                new_smirk = _type_conversion[smirk["type"]](**smirk)
79 +
                new_smirks.append(new_smirk)
80 +
            else:
81 +
                new_smirks.append(smirk)
82 +
            return new_smirks
83 +
84 +
    @validator("input_conformers")
85 +
    def _check_conformers(
86 +
        cls, conformers: List[Array[np.array]]
87 +
    ) -> List[Array[np.array]]:
88 +
        """
89 +
        Take the list of input conformers which will be flat and reshape them.
90 +
        """
91 +
        reshaped_conformers = []
92 +
        for conformer in conformers:
93 +
            if conformer.shape[-1] != 3:
94 +
                reshaped_conformers.append(conformer.reshape((-1, 3)))
95 +
            else:
96 +
                reshaped_conformers.append(conformer)
97 +
        return reshaped_conformers
98 +
48 99
    @validator("extras")
49 100
    def _validate_extras(cls, extras: Dict[str, Any]) -> Dict[str, Any]:
50 101
        """
@@ -125,7 +176,7 @@
Loading
125 176
                tasks.append(stage)
126 177
                # if the task can be done in parallel return the other tasks
127 178
                if stage.precedence == Precedence.Parallel:
128 -
                    for parallel_stage in self.collection_workflow[i:]:
179 +
                    for parallel_stage in self.collection_workflow[i + 1 :]:
129 180
                        if parallel_stage.precedence == Precedence.Parallel:
130 181
                            job_id = self.get_task_hash(parallel_stage)
131 182
                            parallel_stage.job_id = job_id
@@ -146,7 +197,7 @@
Loading
146 197
        """
147 198
        Collect the general hash data.
148 199
        """
149 -
        inchi = self.initial_molecule.to_inchi(
200 +
        inchi = self.initial_molecule.to_inchikey(
150 201
            fixed_hydrogens=True
151 202
        )  # non standard inchi
152 203
        hash_string = (
@@ -191,12 +242,22 @@
Loading
191 242
        """
192 243
        return self.get_hash() == other.get_hash()
193 244
194 -
    @property
195 -
    def reference_data(self) -> List[SingleResult]:
245 +
    def get_reference_data(self) -> List[SingleResult]:
196 246
        """
197 -
        Return the final result of the collection workflow.
247 +
        Return the final result of the collection workflow if it is set else raise an error.
198 248
        """
199 -
        return self.collection_workflow[-1].result
249 +
        if self.collection_workflow:
250 +
            result = self.collection_workflow[-1].result
251 +
            if result is not None:
252 +
                return result
253 +
            else:
254 +
                raise MissingReferenceError(
255 +
                    f"The workflow has not collected any results yet."
256 +
                )
257 +
        else:
258 +
            raise MissingWorkflowError(
259 +
                f"The Entry has no collection workflow to hold results."
260 +
            )
200 261
201 262
    @property
202 263
    def ready_for_fitting(self) -> bool:
@@ -207,9 +268,12 @@
Loading
207 268
            `True` if all information is present else `False`.
208 269
        """
209 270
        # check the last stage is ready for fitting
210 -
        stage = self.collection_workflow[-1]
211 -
        if stage.status == Status.Complete and stage.result is not None:
212 -
            return True
271 +
        if self.collection_workflow:
272 +
            stage = self.collection_workflow[-1]
273 +
            if stage.status == Status.Complete and stage.result is not None:
274 +
                return True
275 +
276 +
        return False
213 277
214 278
    def update_with_results(
215 279
        self, results: Union[BasicResult, TorsionDriveResult, OptimizationEntryResult]
@@ -276,11 +340,11 @@
Loading
276 340
                            stage.status = Status.Complete
277 341
278 342
                        else:
279 -
                            raise RuntimeError(
343 +
                            raise DihedralSelectionError(
280 344
                                f"Molecules are the same but do not target the same dihedral."
281 345
                            )
282 346
                    else:
283 -
                        raise RuntimeError(
347 +
                        raise MoleculeMissMatchError(
284 348
                            f"Molecules are not isomorphic and the results can not be transferred."
285 349
                        )
286 350
@@ -410,7 +474,7 @@
Loading
410 474
                        ):
411 475
                            try:
412 476
                                entry.update_with_results(td_result)
413 -
                            except RuntimeError:
477 +
                            except (DihedralSelectionError, MoleculeMissMatchError):
414 478
                                continue
415 479
                        else:
416 480
                            continue
@@ -483,6 +547,7 @@
Loading
483 547
        Parameters:
484 548
            initial_forcefield: The name of the initial Forcefield we will be starting at.
485 549
        """
550 +
        from ..forcefield_tools import ForceFieldEditor
486 551
487 552
        # get all of the new target smirks
488 553
        target_smirks = [
@@ -655,6 +720,19 @@
Loading
655 720
    optimizer_settings: Dict[str, Dict[str, Any]] = {}
656 721
    molecules: List[MoleculeSchema] = []
657 722
723 +
    @classmethod
724 +
    def parse_file(
725 +
        cls: Type["Model"],
726 +
        path: Union[str, Path],
727 +
        *,
728 +
        content_type: str = None,
729 +
        encoding: str = "utf8",
730 +
        proto: Protocol = None,
731 +
        allow_pickle: bool = False,
732 +
    ) -> "Model":
733 +
        data = deserialize(file_name=path)
734 +
        return cls(**data)
735 +
658 736
    def add_optimizer(self, optimizer: "Optimizer") -> None:
659 737
        """
660 738
        Add a valid optimizer to the fitting schema.
@@ -667,7 +745,7 @@
Loading
667 745
                    optimizer.optimizer_name.lower()
668 746
                ] = optimizer.dict(exclude={"optimization_targets"})
669 747
        else:
670 -
            raise KeyError(
748 +
            raise OptimizerError(
671 749
                f"The given optimizer {optimizer.optimizer_name} has not been registered with bespokefit, please register first."
672 750
            )
673 751
@@ -714,14 +792,11 @@
Loading
714 792
        """
715 793
        Export the fitting schema to file.
716 794
        """
717 -
        file_type = file_name.split(".")[-1]
718 -
        if file_type.lower() == "json":
719 -
            with open(file_name, "w") as output:
720 -
                output.write(self.json(indent=2))
795 +
796 +
        if "json" in file_name:
797 +
            serialize(self, file_name=file_name)
721 798
        else:
722 -
            raise RuntimeError(
723 -
                f"The given file type: {file_type} is not supported please used json."
724 -
            )
799 +
            raise RuntimeError("The given file type is not supported please used json.")
725 800
726 801
    @property
727 802
    def n_molecules(self) -> int:
@@ -772,7 +847,7 @@
Loading
772 847
            molecule.update_with_results(results)
773 848
774 849
    def generate_qcsubmit_datasets(
775 -
        self,
850 +
        self, geometric_settings: Optional[GeometricProcedure] = None
776 851
    ) -> List[Union[BasicDataset, OptimizationDataset, TorsiondriveDataset]]:
777 852
        """
778 853
        Generate a set of qcsubmit datasets containing all of the tasks required to compute the QM data.
@@ -788,4 +863,5 @@
Loading
788 863
            singlepoint_name=self.singlepoint_dataset_name,
789 864
            optimization_name=self.optimization_dataset_name,
790 865
            torsiondrive_name=self.torsiondrive_dataset_name,
866 +
            geometric_options=geometric_settings,
791 867
        )

@@ -0,0 +1,545 @@
Loading
1 +
import time
2 +
from multiprocessing import Process, Queue
3 +
from typing import Dict, List, Optional, Tuple, Union
4 +
5 +
from qcfractal.interface import FractalClient
6 +
from qcfractal.interface.models.records import OptimizationRecord, ResultRecord
7 +
from qcfractal.interface.models.torsiondrive import TorsionDriveRecord
8 +
from qcsubmit.common_structures import QCSpec
9 +
from qcsubmit.datasets import BasicDataset, OptimizationDataset, TorsiondriveDataset
10 +
from qcsubmit.procedures import GeometricProcedure
11 +
from qcsubmit.results import (
12 +
    BasicCollectionResult,
13 +
    OptimizationCollectionResult,
14 +
    TorsionDriveCollectionResult,
15 +
)
16 +
17 +
from .common_structures import Status
18 +
from .schema.fitting import FittingSchema, MoleculeSchema
19 +
from .utils import schema_to_datasets
20 +
21 +
22 +
class Executor:
23 +
    """
24 +
    This class executes a Fitting schema object, this involves working out what tasks to execute to collect reference data and what optimizations
25 +
    should be ran. While running QCArchive tasks this class will also handle basic error handling in the form of restarts.
26 +
    """
27 +
28 +
    def __init__(self, max_workers: int = 4, max_retries: int = 3) -> None:
29 +
        """
30 +
        Set up the executor data with dataset names and initial tasks which should be submitted.
31 +
        """
32 +
        self.client = None
33 +
        self.server = None
34 +
        self.max_workers = max_workers
35 +
        # activate the client and server
36 +
        self.fitting_schema = None
37 +
        self._torsion_dataset: Optional[str] = None
38 +
        self._optimization_dataset: Optional[str] = None
39 +
        self._energy_dataset: Optional[str] = None
40 +
        self._gradient_dataset: Optional[str] = None
41 +
        self._hessian_dataset: Optional[str] = None
42 +
        self._optimizer_settings = None
43 +
        # keep track of the total number of molecules to be fit
44 +
        self.total_tasks = None
45 +
        # maybe let users set this for error cycling?
46 +
        self.max_retires: int = max_retries
47 +
        # this is the error cycling queue
48 +
        self.collection_queue = Queue()
49 +
        # this is a queue for jobs ready to be optimized
50 +
        self.opt_queue = Queue()
51 +
        self.finished_tasks = Queue()
52 +
        self.task_map: Dict[str, Tuple[str, str, str]] = {}
53 +
        # bump the maxiter for ani optimizations to help convergence
54 +
        self.geometric_settings: Optional[GeometricProcedure] = GeometricProcedure(
55 +
            maxiter=1000
56 +
        )
57 +
58 +
    def execute(
59 +
        self, fitting_schema: FittingSchema, client: Optional[FractalClient] = None
60 +
    ) -> FittingSchema:
61 +
        """
62 +
        Execute a fitting schema. This involves generating QCSubmit datasets and error cycling them and then launching the forcebalance optimizations.
63 +
64 +
65 +
        Parameters:
66 +
            fitting_schema: The fitting schema that should be executed
67 +
            client: Optional fractal client already connected if None we attempt to make a client using the details in the fitting schema.
68 +
69 +
        Returns:
70 +
            The completed fitting schema this will contain any collected results including errors.
71 +
        """
72 +
        # activate the client and server
73 +
        self.activate_client(
74 +
            client=client or fitting_schema.client, workers=self.max_workers
75 +
        )
76 +
        # set up the dataset names for error cycling
77 +
        self._torsion_dataset: str = fitting_schema.torsiondrive_dataset_name
78 +
        self._optimization_dataset: str = fitting_schema.optimization_dataset_name
79 +
        self._energy_dataset: str = fitting_schema.singlepoint_dataset_name + " energy"
80 +
        self._gradient_dataset: str = (
81 +
            fitting_schema.singlepoint_dataset_name + " gradient"
82 +
        )
83 +
        self._hessian_dataset = fitting_schema.singlepoint_dataset_name + " hessain"
84 +
        self._optimizer_settings = fitting_schema.optimizer_settings
85 +
        self.fitting_schema = fitting_schema
86 +
87 +
        # keep track of the total number of molecules to be fit
88 +
        self.total_tasks = len(fitting_schema.molecules)
89 +
        # get the input datasets
90 +
        print("making qcsubmit datasets")
91 +
        input_datasets = fitting_schema.generate_qcsubmit_datasets(
92 +
            geometric_settings=self.geometric_settings
93 +
        )
94 +
        # now generate a mapping between the hash and the dataset entry
95 +
        print("generating task map")
96 +
        self.generate_dataset_task_map(datasets=input_datasets)
97 +
        print("generating collection task queue ...")
98 +
        # generate the initial task queue of collection tasks
99 +
        self.create_input_task_queue(fitting_schema=fitting_schema)
100 +
        print(f"task queue now contains tasks.")
101 +
        if client is None:
102 +
            # if the client is live some tasks might already be present so dont submit the dataset
103 +
            print("starting job submission")
104 +
            responses = self.submit_datasets(datasets=input_datasets)
105 +
            print("client response")
106 +
            print(responses)
107 +
108 +
        return self._execute(fitting_schema=fitting_schema)
109 +
110 +
    def _execute(self, fitting_schema: FittingSchema) -> FittingSchema:
111 +
        print("starting main executor ...")
112 +
        # start the error cycle process
113 +
        error_p = Process(target=self.error_cycle)
114 +
        error_p.start()
115 +
        optimizer_p = Process(target=self.optimizer)
116 +
        optimizer_p.start()
117 +
118 +
        # join them
119 +
        error_p.join()
120 +
        optimizer_p.join()
121 +
        while True:
122 +
            # here we need to watch for results on the parent process
123 +
            task = self.finished_tasks.get()
124 +
            fitting_schema = self.update_fitting_schema(
125 +
                task=task, fitting_schema=fitting_schema
126 +
            )
127 +
            self.total_tasks -= 1
128 +
            if self.total_tasks == 0:
129 +
                break
130 +
        print("all tasks done exporting to file.")
131 +
        fitting_schema.export_schema("final_results.json.xz")
132 +
        return fitting_schema
133 +
134 +
    def activate_client(
135 +
        self, client: Union[str, FractalClient], workers: int = 4
136 +
    ) -> None:
137 +
        """
138 +
        Activate the connection to the chosen qcarchive instance.
139 +
140 +
        Parameters
141 +
        ----------
142 +
        client: str
143 +
            A string of the client name for example snowflake will launch a local snowflake instance. This can
144 +
                be the file name which contains login details which can be passed to FractalClient.
145 +
        workers: int
146 +
            If this is a snowflake worker this will be the number of workers used.
147 +
148 +
        Notes
149 +
        -----
150 +
            This can be a snowflake server or a local qcarchive instance error cycling should still work.
151 +
        """
152 +
        from qcfractal import FractalSnowflake, FractalSnowflakeHandler
153 +
        from qcfractal.interface import FractalClient
154 +
155 +
        if isinstance(client, FractalClient):
156 +
            self.client = client
157 +
            self.server = client.server_information()
158 +
        elif client.lower() == "snowflake_notebook":
159 +
            self.server = FractalSnowflakeHandler()
160 +
            self.client = self.server.client()
161 +
        elif client.lower() == "snowflake":
162 +
            self.server = FractalSnowflake(max_workers=workers)
163 +
            self.client = self.server.client()
164 +
        else:
165 +
            self.client = FractalClient.from_file(client)
166 +
            self.server = self.client.server_information()
167 +
168 +
    def generate_dataset_task_map(
169 +
        self,
170 +
        datasets: List[Union[BasicDataset, OptimizationDataset, TorsiondriveDataset]],
171 +
    ) -> None:
172 +
        """
173 +
        Generate mapping between all of the current tasks in the datasets and their entries updates self.
174 +
175 +
        Parameters:
176 +
            datasets: A list of the qcsubmit datasets which contain tasks to be computed.
177 +
        """
178 +
179 +
        for dataset in datasets:
180 +
            for entry in dataset.dataset.values():
181 +
                self.task_map[entry.attributes["task_hash"]] = (
182 +
                    entry.index,
183 +
                    dataset.dataset_type.lower(),
184 +
                    dataset.dataset_name,
185 +
                )
186 +
187 +
    def create_input_task_queue(self, fitting_schema: FittingSchema) -> None:
188 +
        """
189 +
        Create a task for each molecule in fitting schema and enter them into the collection queue.
190 +
        """
191 +
        for molecule in fitting_schema.molecules:
192 +
            self.collection_queue.put(molecule)
193 +
194 +
    def submit_datasets(
195 +
        self,
196 +
        datasets: List[Union[BasicDataset, OptimizationDataset, TorsiondriveDataset]],
197 +
    ) -> Dict[str, Dict[str, int]]:
198 +
        """
199 +
        Submit the initial datasets to the qcarchive instance and return the response from the server.
200 +
201 +
        Parameters:
202 +
            datasets: The QCSubmit style datasets which are to be submitted.
203 +
        """
204 +
205 +
        responses = {}
206 +
        for dataset in datasets:
207 +
            # make sure there is a molecule in the dataset
208 +
            if dataset.n_molecules > 0:
209 +
                # make sure the metadata is complete
210 +
                dataset.metadata.long_description_url = (
211 +
                    "https://github.com/openforcefield/bespoke-fit"
212 +
                )
213 +
                response = dataset.submit(client=self.client)
214 +
                responses[dataset.dataset_name] = response
215 +
216 +
        return responses
217 +
218 +
    def _get_record_and_index(self, dataset, spec: QCSpec, record_name: str):
219 +
        """
220 +
        Find a record and its dataset index used for result collection.
221 +
        """
222 +
        try:
223 +
            record = dataset.get_record(record_name, spec.spec_name)
224 +
            # loop over the index
225 +
            for td_index, td_entry in dataset.data.records.items():
226 +
                if td_entry.name == record_name:
227 +
                    return record, td_index
228 +
        except KeyError:
229 +
            pass
230 +
231 +
        return None, None
232 +
233 +
    def _update_status(self, collection_tasks, record, task):
234 +
        """
235 +
        Update the collection tasks status with the record progress and the overall task progress.
236 +
        """
237 +
        # error cycle
238 +
        if record.status.value == "ERROR":
239 +
            if collection_tasks[0].collection_stage.retires < self.max_retires:
240 +
                # we should restart the task here
241 +
                print("restarting the record")
242 +
                self.restart_archive_record(record)
243 +
                # now increment the restart counter
244 +
                for collection_task in collection_tasks:
245 +
                    collection_task.collection_stage.retires += 1
246 +
                    collection_task.collection_stage.status = Status.Collecting
247 +
            else:
248 +
                for collection_task in collection_tasks:
249 +
                    collection_task.collection_stage.status = Status.Error
250 +
        # normal execution
251 +
        elif record.status.value == "RUNNING":
252 +
            for collection_task in collection_tasks:
253 +
                collection_task.collection_stage.status = Status.Collecting
254 +
255 +
        elif record.status.value == "COMPLETE":
256 +
            # now we need to save the results
257 +
            for collection_task in collection_tasks:
258 +
                collection_task.collection_stage.status = Status.Complete
259 +
260 +
        # now update the opt stage
261 +
        for opt_stage in task.workflow:
262 +
            for target in opt_stage.targets:
263 +
                for entry in target.entries:
264 +
                    for task in entry.current_tasks():
265 +
                        if task.status == Status.Error:
266 +
                            opt_stage.status = Status.Error
267 +
268 +
    def _error_cycle_task(self, task) -> None:
269 +
        """
270 +
        Specific error cycling for a given task.
271 +
        """
272 +
273 +
        print("task molecule name ", task.molecule)
274 +
275 +
        # first we have to get all of the tasks for this molecule
276 +
        task_map = task.get_task_map()
277 +
278 +
        to_collect = {
279 +
            "dataset": {},
280 +
            "optimizationdataset": {},
281 +
            "torsiondrivedataset": {},
282 +
        }
283 +
        # now for each one we want to query the archive and their status
284 +
        for task_hash, collection_tasks in task_map.items():
285 +
            spec = collection_tasks[0].entry.qc_spec
286 +
            entry_id, dataset_type, dataset_name = self.task_map[task_hash]
287 +
            print("looking for ", entry_id, dataset_type, dataset_name)
288 +
            dataset = self.client.get_collection(dataset_type, dataset_name)
289 +
            # get the record and the df index
290 +
            record, td_id = self._get_record_and_index(
291 +
                dataset=dataset, spec=spec, record_name=entry_id
292 +
            )
293 +
294 +
            # if the record is not found the job has not been generated yet
295 +
            if record is not None:
296 +
                print("updating the status")
297 +
                self._update_status(collection_tasks, record, task)
298 +
                if collection_tasks[0].collection_stage.status == Status.Complete:
299 +
                    try:
300 +
                        to_collect[dataset_type][dataset_name].setdefault(
301 +
                            spec.spec_name, []
302 +
                        ).append(td_id)
303 +
                    except KeyError:
304 +
                        to_collect[dataset_type][dataset_name] = {
305 +
                            spec.spec_name: [
306 +
                                td_id,
307 +
                            ]
308 +
                        }
309 +
310 +
        # if we have values to collect update the task here
311 +
        if any(to_collect.values()):
312 +
            print("collecting results for ", to_collect)
313 +
            self._collect_task_results(task, to_collect)
314 +
315 +
        # now we should look for new tasks to submit
316 +
        print("looking for new tasks ...")
317 +
        if task.get_task_map():
318 +
            response = self.submit_new_tasks(task)
319 +
            print("response of new tasks ... ", response)
320 +
321 +
        print("checking for optimizations to run ...")
322 +
        opt = task.get_next_optimization_stage()
323 +
        if opt is None:
324 +
            # the molecule is done pas to the opt queue to be removed
325 +
            self.opt_queue.put(task)
326 +
        elif opt.ready_for_fitting:
327 +
            print(" found optimization submitting for task", task.molecule)
328 +
            self.opt_queue.put(task)
329 +
        elif opt.status == Status.Error:
330 +
            # one of the collection entries has filed so pass to opt which will fail
331 +
            self.opt_queue.put(task)
332 +
        else:
333 +
            print("task not finished putting back into the queue.")
334 +
            # the molecule is not finished and not ready for opt error cycle again
335 +
            self.collection_queue.put(task)
336 +
337 +
    def error_cycle(self) -> None:
338 +
        """
339 +
        For the given MoleculeSchema check that all collection tasks are running and error cycle jobs. Will also generate new collection tasks as needed
340 +
        for example hessian tasks are created when optimizations are finished.
341 +
342 +
        """
343 +
344 +
        while True:
345 +
            print("pulling task from collection queue")
346 +
            task = self.collection_queue.get()
347 +
            if isinstance(task, str):
348 +
                # this is the kill message so kill the worker
349 +
                break
350 +
            else:
351 +
                self._error_cycle_task(task=task)
352 +
                time.sleep(20)
353 +
354 +
    def _collect_task_results(self, task: MoleculeSchema, collection_dict: Dict):
355 +
        """
356 +
        Gather the results in the collection dict and update the task with them.
357 +
        """
358 +
        results = self.collect_results(record_map=collection_dict)
359 +
        task.update_with_results(results=results)
360 +
361 +
    def update_fitting_schema(
362 +
        self, task: MoleculeSchema, fitting_schema: FittingSchema
363 +
    ) -> FittingSchema:
364 +
        """
365 +
        Update the given task back into the fitting schema so we can keep track of progress.
366 +
        Call this after any result or optimization update.
367 +
        """
368 +
        for i, molecule_task in enumerate(fitting_schema.molecules):
369 +
            if task == molecule_task:
370 +
                print("updating task")
371 +
                # update the schema and break
372 +
                fitting_schema.molecules[i] = task
373 +
                return fitting_schema
374 +
375 +
    def submit_new_tasks(self, task: MoleculeSchema) -> Dict[str, Dict[str, int]]:
376 +
        """
377 +
        For the given molecule schema query it for new tasks to submit and either add them to qcarchive or put them in the
378 +
        local task queue.
379 +
        """
380 +
        datasets = schema_to_datasets(
381 +
            [
382 +
                task,
383 +
            ],
384 +
            singlepoint_name=self.fitting_schema.singlepoint_dataset_name,
385 +
            optimization_name=self.fitting_schema.optimization_dataset_name,
386 +
            torsiondrive_name=self.fitting_schema.torsiondrive_dataset_name,
387 +
            geometric_options=self.geometric_settings,
388 +
        )
389 +
        # now all tasks have been put into the dataset even those running
390 +
        # remove a hash that has been seen before
391 +
        # add new tasks to the hash record
392 +
393 +
        for dataset in datasets:
394 +
            to_remove = []
395 +
            for task_id, entry in dataset.dataset.items():
396 +
                task_hash = entry.attributes["task_hash"]
397 +
                if task_hash in self.task_map:
398 +
                    to_remove.append(task_id)
399 +
                else:
400 +
                    self.task_map[task_hash] = (
401 +
                        entry.index,
402 +
                        dataset.dataset_type.lower(),
403 +
                        dataset.dataset_name,
404 +
                    )
405 +
            # now loop over the records to remove
406 +
            if to_remove:
407 +
                for entry_id in to_remove:
408 +
                    del dataset.dataset[entry_id]
409 +
410 +
        # now submit the datasets
411 +
        return self.submit_datasets(datasets)
412 +
413 +
    def collect_results(
414 +
        self, record_map: Dict[str, Dict[str, Dict[str, List[str]]]]
415 +
    ) -> List[
416 +
        Union[
417 +
            BasicCollectionResult,
418 +
            OptimizationCollectionResult,
419 +
            TorsionDriveCollectionResult,
420 +
        ]
421 +
    ]:
422 +
        """
423 +
        For the given list of record ids per dataset type collect all of the results.
424 +
        """
425 +
        dataset_types = {
426 +
            "dataset": BasicCollectionResult,
427 +
            "optimizationdataset": OptimizationCollectionResult,
428 +
            "torsiondrivedataset": TorsionDriveCollectionResult,
429 +
        }
430 +
        results = []
431 +
        for dataset_type, collection in record_map.items():
432 +
            # get the result class
433 +
            result_type = dataset_types[dataset_type.lower()]
434 +
            for dataset_name, spec_data in collection.items():
435 +
                for spec, records in spec_data.items():
436 +
                    result = result_type.from_server(
437 +
                        client=self.client,
438 +
                        dataset_name=dataset_name,
439 +
                        subset=records,
440 +
                        spec_name=spec,
441 +
                    )
442 +
                    results.append(result)
443 +
444 +
        return results
445 +
446 +
    def restart_archive_record(
447 +
        self, task: Union[ResultRecord, OptimizationRecord, TorsionDriveRecord]
448 +
    ) -> None:
449 +
        """
450 +
        Take a record and dispatch the type of restart to be done.
451 +
        """
452 +
        if isinstance(task, ResultRecord):
453 +
            print("restarting basic ...")
454 +
            self.restart_basic(
455 +
                [
456 +
                    task.id,
457 +
                ]
458 +
            )
459 +
        elif isinstance(task, OptimizationRecord):
460 +
            print("restarting optimizations ...")
461 +
            self.restart_optimizations(
462 +
                [
463 +
                    task.id,
464 +
                ]
465 +
            )
466 +
        else:
467 +
            print("restarting torsiondrives and optimizations ...")
468 +
            # we need the optimization ids first
469 +
            td_opts = []
470 +
            for optimizations in task.optimization_history.values():
471 +
                td_opts.extend(optimizations)
472 +
            # now query the optimizations
473 +
            opt_records = self.client.query_procedures(td_opts)
474 +
            restart_opts = [opt.id for opt in opt_records if opt.status == "ERROR"]
475 +
            # restart opts then torsiondrives
476 +
            self.restart_optimizations(restart_opts)
477 +
            self.restart_torsiondrives(
478 +
                [
479 +
                    task.id,
480 +
                ]
481 +
            )
482 +
483 +
    def restart_torsiondrives(self, torsiondrive_ids: List[int]) -> None:
484 +
        """
485 +
        Restart all torsiondrive records.
486 +
        """
487 +
        for td in torsiondrive_ids:
488 +
            self.client.modify_services("restart", procedure_id=td)
489 +
490 +
    def restart_optimizations(self, optimization_ids: List[int]) -> None:
491 +
        """
492 +
        Restart all optimizations.
493 +
        """
494 +
        for opt in optimization_ids:
495 +
            self.client.modify_tasks(operation="restart", base_result=opt)
496 +
497 +
    def restart_basic(self, basic_ids: List[int]) -> None:
498 +
        """
499 +
        Restart all basic single point tasks.
500 +
        """
501 +
        pass
502 +
503 +
    def optimizer(self) -> None:
504 +
        """
505 +
        Monitors the optimizer queue and runs any tasks that arrive in the list.
506 +
        """
507 +
        sent_tasks = 0
508 +
        while True:
509 +
            print("looking for task in queue")
510 +
            task = self.opt_queue.get()
511 +
            print("found optimizer task for ", task.molecule)
512 +
            # now get the opt
513 +
            opt = task.get_next_optimization_stage()
514 +
            # make sure it is ready
515 +
            if opt is not None and opt.ready_for_fitting:
516 +
                # now we need to set up the optimizer
517 +
                optimizer = self.fitting_schema.get_optimizer(opt.optimizer_name)
518 +
                # remove any tasks
519 +
                optimizer.clear_optimization_targets()
520 +
                result = optimizer.optimize(
521 +
                    workflow=opt, initial_forcefield=task.initial_forcefield
522 +
                )
523 +
                # now we need to update the workflow stage with the result
524 +
                print("applying results ...")
525 +
                print("current task workflow ...")
526 +
                task.update_optimization_stage(result)
527 +
                # check for running QM tasks
528 +
                if task.get_task_map():
529 +
                    # submit to the collection queue again
530 +
                    self.collection_queue.put(task)
531 +
                elif task.get_next_optimization_stage() is not None:
532 +
                    # we have another task to optimize so put back into the queue for collection
533 +
                    self.opt_queue.put(task)
534 +
                else:
535 +
                    # the task is finished so send it back
536 +
                    self.finished_tasks.put(task)
537 +
                    sent_tasks += 1
538 +
            else:
539 +
                # the task has an error so fail it
540 +
                self.finished_tasks.put(task)
541 +
                sent_tasks += 1
542 +
            # kill condition
543 +
            if sent_tasks == self.total_tasks:
544 +
                self.collection_queue.put("END")
545 +
                break
0 546
imilarity index 79%
1 547
ename from bespokefit/forcefield_tools.py
2 548
ename to openff/bespokefit/forcefield_tools.py

@@ -52,7 +52,7 @@
Loading
52 52
                self.get_optimization_target(target, **kwargs)
53 53
            )
54 54
55 -
        elif target.name in self._get_registered_targets().keys():
55 +
        elif target.name.lower() in self._get_registered_targets().keys():
56 56
            self.optimization_targets.append(target)
57 57
58 58
        else:
@@ -78,29 +78,43 @@
Loading
78 78
        targets = self._get_registered_targets()
79 79
        for name, target in targets.items():
80 80
            if name.lower() == target_name.lower():
81 -
                return target.parse_obj(kwargs)
81 +
                if kwargs:
82 +
                    return target.parse_obj(kwargs)
83 +
                else:
84 +
                    return target
85 +
        raise TargetRegisterError(
86 +
            f"No target is registered to this optimizer under the name {target_name.lower()}"
87 +
        )
82 88
83 89
    @classmethod
84 -
    def register_target(cls, target: Target, overwrite: bool = False) -> None:
90 +
    def register_target(cls, target: Target, replace: bool = False) -> None:
85 91
        """
86 92
        Take a target and register it with the optimizer under an alias name which is used to call the target.
87 93
88 -
        Parameters:
89 -
            target: The target class which is to be registered with the optimizer.
90 -
            overwrite: If the alias is already registered overwrite with the new target data with no exception.
94 +
        Parameters
95 +
        ----------
96 +
        target: Target
97 +
            The target class which is to be registered with the optimizer.
98 +
        replace: bool
99 +
            If the alias is already registered replaced with the new target data with no exception.
100 +
101 +
        Raises
102 +
        ------
103 +
        TargetRegisterError
104 +
            If the target has already been registered.
91 105
        """
92 106
93 107
        current_targets = cls._get_registered_targets()
94 -
        if (target.name not in current_targets) or (
95 -
            target.name in current_targets and overwrite
108 +
        if (target.name.lower() not in current_targets) or (
109 +
            target.name.lower() in current_targets and replace
96 110
        ):
97 111
            try:
98 -
                cls._all_targets[cls.__name__][target.name] = target
112 +
                cls._all_targets[cls.__name__][target.name.lower()] = target
99 113
            except KeyError:
100 -
                cls._all_targets[cls.__name__] = {target.name: target}
114 +
                cls._all_targets[cls.__name__] = {target.name.lower(): target}
101 115
        else:
102 116
            raise TargetRegisterError(
103 -
                f"The alias {target.name} has already been registered with this optimizer; to update use overwrite = `True`."
117 +
                f"The alias {target.name.lower()} has already been registered with this optimizer; to update use overwrite = `True`."
104 118
            )
105 119
106 120
    @classmethod
@@ -108,13 +122,24 @@
Loading
108 122
        """
109 123
        Remove a registered target from the optimizer.
110 124
111 -
        Parameters:
112 -
            target_name: The name of the target that should be removed.
125 +
        Parameters
126 +
        ----------
127 +
        target_name: str
128 +
            The name of the target that should be removed.
129 +
130 +
        Raises
131 +
        ------
132 +
        TargetRegisterError
133 +
            If no target is registered under the name to be removed.
113 134
        """
114 135
115 136
        current_targets = cls._get_registered_targets()
116 -
        if target_name in current_targets:
117 -
            del cls._all_targets[cls.__name__][target_name]
137 +
        if target_name.lower() in current_targets:
138 +
            del cls._all_targets[cls.__name__][target_name.lower()]
139 +
        else:
140 +
            raise TargetRegisterError(
141 +
                f"No target with the name {target_name.lower()} was registered."
142 +
            )
118 143
119 144
    @classmethod
120 145
    def _get_registered_targets(cls) -> Dict[str, Target]:
121 146
imilarity index 100%
122 147
ename from bespokefit/schema/__init__.py
123 148
ename to openff/bespokefit/schema/__init__.py
124 149
imilarity index 88%
125 150
ename from bespokefit/schema/fitting.py
126 151
ename to openff/bespokefit/schema/fitting.py

@@ -5,10 +5,11 @@
Loading
5 5
from typing import List, Union
6 6
7 7
from openforcefield import topology as off
8 -
from pydantic import BaseModel
9 -
8 +
from openforcefield.typing.engines.smirnoff import get_available_force_fields
9 +
from pydantic import BaseModel, validator
10 10
from qcsubmit.serializers import deserialize, serialize
11 11
12 +
from .exceptions import ForceFieldError, OptimizerError
12 13
from .optimizers import get_optimizer, list_optimizers
13 14
from .optimizers.model import Optimizer
14 15
from .schema.fitting import FittingSchema, MoleculeSchema, WorkflowSchema
@@ -24,7 +25,9 @@
Loading
24 25
    client: str = "snowflake"  # the type of client that should be used
25 26
    torsiondrive_dataset_name: str = "Bespokefit torsiondrives"
26 27
    optimization_dataset_name: str = "Bespokefit optimizations"
27 -
    singlepoint_dataset_name: str = "Bespokefit single points"  # the driver type will be appended to the name
28 +
    singlepoint_dataset_name: str = (
29 +
        "Bespokefit single points"  # the driver type will be appended to the name
30 +
    )
28 31
    optimization_workflow: List[Optimizer] = []
29 32
30 33
    class Config:
@@ -32,6 +35,19 @@
Loading
32 35
        allow_mutation = True
33 36
        arbitrary_types_allowed = True
34 37
38 +
    @validator("initial_forcefield")
39 +
    def check_forcefield(cls, forcefield: str) -> str:
40 +
        """
41 +
        Check that the forcefield is available via the toolkit.
42 +
        """
43 +
        openff_forcefields = get_available_force_fields()
44 +
        if forcefield not in openff_forcefields:
45 +
            raise ForceFieldError(
46 +
                f"The forcefield {forcefield} is not installed please chose a forcefield from the following {openff_forcefields}"
47 +
            )
48 +
        else:
49 +
            return forcefield
50 +
35 51
    @classmethod
36 52
    def parse_file(
37 53
        cls,
@@ -48,7 +64,7 @@
Loading
48 64
        data = deserialize(file_name=path)
49 65
        optimization_workflow = data.pop("optimization_workflow")
50 66
        workflow = cls.parse_obj(data)
51 -
        # now we need to re initi the optimizer and the targets
67 +
        # now we need to re init the optimizer and the targets
52 68
        for optimizer in optimization_workflow:
53 69
            opt_targets = optimizer.pop("optimization_targets")
54 70
            opt_engine = get_optimizer(**optimizer)
@@ -62,6 +78,11 @@
Loading
62 78
    def add_optimization_stage(self, optimizer: Union[str, Optimizer]) -> None:
63 79
        """
64 80
        Add an optimization stage to the workflow that will be executed in order.
81 +
82 +
        Parameters
83 +
        ----------
84 +
        optimizer: Union[str, Optimizer]
85 +
            The optimizer that should be added to the workflow, targets should also be added before creating the fitting schema.
65 86
        """
66 87
67 88
        if isinstance(optimizer, str):
@@ -73,17 +94,49 @@
Loading
73 94
                opt_engine = optimizer
74 95
75 96
            else:
76 -
                raise KeyError(
97 +
                raise OptimizerError(
77 98
                    f"The requested optimizer {optimizer} was not registered with bespokefit."
78 99
                )
79 100
80 101
        self.optimization_workflow.append(opt_engine)
81 102
103 +
    def remove_optimization_stage(self, optimizer: Union[str, Optimizer]) -> None:
104 +
        """
105 +
        Remove an optimizer from the list of optimization stages.
106 +
107 +
        Parameters
108 +
        ----------
109 +
        optimizer: Union[str, Optimizer]
110 +
            The optimizer that should be removed from the workflow.
111 +
        """
112 +
        # remove by name
113 +
        if isinstance(optimizer, Optimizer):
114 +
            opt_name = optimizer.optimizer_name.lower()
115 +
        else:
116 +
            opt_name = optimizer.lower()
117 +
118 +
        stage_to_remove = None
119 +
        # find the optimizer with this name and remove it
120 +
        for opt in self.optimization_workflow:
121 +
            if opt.optimizer_name.lower() == opt_name:
122 +
                stage_to_remove = opt
123 +
                break
124 +
125 +
        if stage_to_remove is not None:
126 +
            self.optimization_workflow.remove(stage_to_remove)
127 +
        else:
128 +
            raise OptimizerError(
129 +
                f"No optimizer could be found in the workflow with the name {opt_name}."
130 +
            )
131 +
82 132
    def export_workflow(self, file_name: str) -> None:
83 133
        """
84 -
        Export the workflow to file.
85 -
        Parameters:
86 -
            file_name: The name of the file the workflow should be exported to, the type is determined from the name.
134 +
        Export the workflow to yaml or json file.
135 +
136 +
        Parameters
137 +
        ----------
138 +
        file_name: str
139 +
            The name of the file the workflow should be exported to, the type is determined from the name.
87 140
        """
88 141
89 142
        serialize(serializable=self.dict(), file_name=file_name)
@@ -92,13 +145,28 @@
Loading
92 145
        self, molecules: Union[off.Molecule, List[off.Molecule], str, List[str]]
93 146
    ) -> FittingSchema:
94 147
        """
95 -
        This is the main function of the workflow which takes the general fitting metatemplate and generates a specific one for the set of molecules that are passed.
148 +
        This is the main function of the workflow which takes the general fitting metatemplate and generates a specific
149 +
        one for the set of molecules that are passed.
96 150
97 -
        Here for each molecule for each target we should generate a collection job.
151 +
        #TODO Expand to accept the QCSubmit results datasets directly to create the fitting schema and fill the tasks.
98 152
99 -
        Parameters:
100 -
            molecules: The molecule or list of molecules which should be processed by the schema to generate the fitting schema.
153 +
        Parameters
154 +
        ----------
155 +
        molecules: Union[off.Molecule, List[off.Molecule]]
156 +
            The molecule or list of molecules which should be processed by the schema to generate the fitting schema.
101 157
        """
158 +
        # check we have an optimizer in the pipeline
159 +
        if not self.optimization_workflow:
160 +
            raise OptimizerError(
161 +
                "There are no optimization stages in the optimization workflow, first add an optimizer and targets."
162 +
            )
163 +
164 +
        # now check we have targets in each optimizer
165 +
        for opt in self.optimization_workflow:
166 +
            if not opt.optimization_targets:
167 +
                raise OptimizerError(
168 +
                    f"There are no optimization targets for the optimizer {opt.optimizer_name} in the optimization workflow."
169 +
                )
102 170
103 171
        # create a deduplicated list of molecules first.
104 172
        deduplicated_molecules = deduplicated_list(molecules=molecules)
@@ -116,7 +184,8 @@
Loading
116 184
            # for each molecule make the fitting schema
117 185
            mol_name = molecule.to_smiles(mapped=True)
118 186
            molecule_schema = MoleculeSchema(
119 -
                molecule=mol_name, initial_forcefield=self.initial_forcefield,
187 +
                molecule=mol_name,
188 +
                initial_forcefield=self.initial_forcefield,
120 189
            )
121 190
            # add each optimizer
122 191
            # TODO fix job id name for other optimizers
@@ -126,7 +195,9 @@
Loading
126 195
                )
127 196
                # now add all the targets associated with the optimizer
128 197
                for target in optimizer.optimization_targets:
129 -
                    target_entry = target.generate_fitting_schema(molecule=molecule,)
198 +
                    target_entry = target.generate_fitting_schema(
199 +
                        molecule=molecule,
200 +
                    )
130 201
                    workflow_stage.targets.append(target_entry)
131 202
                molecule_schema.workflow.append(workflow_stage)
132 203
            fitting_schema.add_molecule_schema(molecule_schema)

@@ -7,10 +7,9 @@
Loading
7 7
8 8
import openforcefield.topology as off
9 9
from pydantic import BaseModel
10 -
11 -
from bespokefit.collection_workflows import CollectionMethod, WorkflowStage
12 10
from qcsubmit.common_structures import QCSpec
13 11
12 +
from ..collection_workflows import CollectionMethod, WorkflowStage
14 13
from ..common_structures import ParameterSettings
15 14
16 15
@@ -88,18 +87,21 @@
Loading
88 87
            New targets only need to add the name of the dependence to the _extra_dependencies list to have it included.
89 88
        """
90 89
        import importlib
90 +
91 91
        import openforcefield
92 92
        import openforcefields
93 +
        import rdkit
93 94
94 95
        provenance = {
95 96
            "openforcefield": openforcefield.__version__,
96 97
            "openforcefields": openforcefields.__version__,
98 +
            "rdkit": rdkit.__version__,
97 99
            "target": self.name,
98 100
        }
99 101
        # now loop over the extra dependencies
100 -
        for dependencie in self._extra_dependencies:
101 -
            dep = importlib.import_module(dependencie)
102 -
            provenance[dependencie] = dep.__version__
102 +
        for dependency in self._extra_dependencies:
103 +
            dep = importlib.import_module(dependency)
104 +
            provenance[dependency] = dep.__version__
103 105
104 106
        return provenance
105 107
@@ -141,12 +143,18 @@
Loading
141 143
        """
142 144
        Generate a new smirks pattern for the selected atoms of the given molecule.
143 145
144 -
        Parameters:
145 -
            atoms: The indices of the atoms that require a new smirks pattern.
146 -
            molecule: The molecule that that patten should be made for.
147 -
            layers: The number of layers that should be included in the pattern, default to all to make it molecule specific.
148 -
149 -
        Returns:
146 +
        Parameters
147 +
        ----------
148 +
        atoms: Tuple[int]
149 +
            The indices of the atoms that require a new smirks pattern.
150 +
        molecule: off.Molecule
151 +
            The molecule that that patten should be made for.
152 +
        layers: Union[str, int]
153 +
            The number of layers that should be included in the pattern, default to all to make it molecule specific.
154 +
155 +
        Returns
156 +
        -------
157 +
        str
150 158
            A single smirks string encapsulating the atoms requested in the given molecule.
151 159
        """
152 160
@@ -162,11 +170,18 @@
Loading
162 170
        """
163 171
        Generate a new smirks pattern which matches the requested atoms in all of the molecules.
164 172
165 -
        Parameters:
166 -
            atoms: A list of the atom indices that require a smirks pattern in the order of the molecules.
167 -
            molecules: A list of the molecules in the same order as the atom indices.
168 -
169 -
        Returns:
173 +
        Parameters
174 +
        ----------
175 +
        atoms: List[List[Tuple[int]]]
176 +
            A list of the atom indices that require a smirks pattern in the order of the molecules.
177 +
        molecules: List[off.Molecule]
178 +
            A list of the molecules in the same order as the atom indices.
179 +
        layers: int
180 +
            The number of layers to be considered when making the pattern.
181 +
182 +
        Returns
183 +
        -------
184 +
        str
170 185
            A single smirks string which matches all of the atoms requested in each of the molecules.
171 186
        """
172 187
@@ -186,14 +201,20 @@
Loading
186 201
        """
187 202
        Get a mapping between two molecules of different size ie a fragment to a parent.
188 203
189 -
        Parameters:
190 -
            fragment: The fragment molecule that we want to map on to the parent.
191 -
            parent: The parent molecule the fragment was made from.
204 +
        Parameters
205 +
        ----------
206 +
        fragment: off.Molecule
207 +
            The fragment molecule that we want to map on to the parent.
208 +
        parent: off.Molecule
209 +
            The parent molecule the fragment was made from.
192 210
193 -
        Note:
211 +
        Notes
212 +
        -----
194 213
            As the MCS is used to create the mapping it will not be complete, that is some fragment atoms have no relation to the parent.
195 214
196 -
        Returns:
215 +
        Returns
216 +
        -------
217 +
        Dict[int, int]
197 218
            A mapping between the fragment and the parent molecule.
198 219
        """
199 220
@@ -207,7 +228,7 @@
Loading
207 228
            bond_stereochemistry_matching=False,
208 229
            atom_stereochemistry_matching=False,
209 230
        )
210 -
        if atom_map:
231 +
        if atom_map is not None:
211 232
            return atom_map
212 233
213 234
        else:
@@ -222,8 +243,8 @@
Loading
222 243
        Use rdkit MCS function to find the maximum mapping between the fragment and parent molecule.
223 244
        """
224 245
225 -
        from rdkit.Chem import rdFMCS
226 246
        from rdkit import Chem
247 +
        from rdkit.Chem import rdFMCS
227 248
228 249
        parent_rdkit = parent.to_rdkit()
229 250
        fragment_rdkit = fragment.to_rdkit()
230 251
imilarity index 89%
231 252
ename from bespokefit/targets/torsions.py
232 253
ename to openff/bespokefit/targets/torsions.py

@@ -1,13 +1,13 @@
Loading
1 1
import contextlib
2 2
import os
3 3
import shutil
4 -
from typing import Dict, List, Tuple, Union
4 +
from typing import Dict, List, Optional, Tuple, Union
5 5
6 6
import networkx as nx
7 +
import numpy as np
8 +
from chemper.graphs.environment import ChemicalEnvironment
7 9
from openforcefield import topology as off
8 10
from pkg_resources import resource_filename
9 -
10 -
from chemper.graphs.environment import ChemicalEnvironment
11 11
from qcsubmit.datasets import (
12 12
    BasicDataset,
13 13
    ComponentResult,
@@ -15,10 +15,36 @@
Loading
15 15
    TorsiondriveDataset,
16 16
)
17 17
from qcsubmit.factories import BasicDatasetFactory, TorsiondriveDatasetFactory
18 +
from qcsubmit.procedures import GeometricProcedure
18 19
19 20
from .collection_workflows import CollectionMethod
20 21
21 22
23 +
def read_qdata(qdata_file: str) -> Tuple[List[np.array], List[float], List[np.array]]:
24 +
    """
25 +
    Read a torsiondrive and forcebalance qdata files and return the geometry energy and gradients.
26 +
27 +
    Parameters
28 +
    ----------
29 +
    qdata_file: str
30 +
        The file path to the torsiondrive and forcebalance qdata files.
31 +
    """
32 +
33 +
    coords, energies, gradients = [], [], []
34 +
    with open(qdata_file) as qdata:
35 +
        for line in qdata.readlines():
36 +
            if "COORDS" in line:
37 +
                geom = np.array(line.split()[1:])
38 +
                energies.append(geom)
39 +
            elif "ENERGY" in line:
40 +
                energies.append(float(line.split()[-1]))
41 +
            elif "GRADIENT" in line:
42 +
                grad = np.array(line.split()[1:])
43 +
                gradients.append(grad)
44 +
45 +
    return coords, energies, gradients
46 +
47 +
22 48
def compare_smirks_graphs(smirks1: str, smirks2: str):
23 49
    """
24 50
    Compare two smirks schema based on the types of smirks they cover.
@@ -88,11 +114,15 @@
Loading
88 114
        smirks_type = len(env1.get_indexed_atoms())
89 115
90 116
    # define the general node match
91 -
    def node_match(x, y):
117 +
    def general_match(x, y):
92 118
        is_equal = x["_or_types"] == y["_or_types"]
93 119
        is_equal &= x["_and_types"] == y["_and_types"]
94 120
        is_equal &= x["ring"] == y["ring"]
95 121
        is_equal &= x["is_atom"] == y["is_atom"]
122 +
        return is_equal
123 +
124 +
    def node_match(x, y):
125 +
        is_equal = general_match(x, y)
96 126
        is_equal &= environments[smirks_type](x, y)
97 127
        return is_equal
98 128
@@ -100,7 +130,7 @@
Loading
100 130
    env1_graph = make_smirks_attribute_graph(env1)
101 131
    env2_graph = make_smirks_attribute_graph(env2)
102 132
    gm = nx.algorithms.isomorphism.GraphMatcher(
103 -
        env1_graph, env2_graph, node_match=node_match
133 +
        env1_graph, env2_graph, node_match=node_match, edge_match=general_match
104 134
    )
105 135
    return gm.is_isomorphic()
106 136
@@ -110,9 +140,12 @@
Loading
110 140
    Make a new nx.Graph from the environment with attributes.
111 141
    """
112 142
    new_graph = nx.Graph()
143 +
    bonds = chem_env._graph_edges(data=True)
113 144
    nodes = list(chem_env._graph.nodes())
114 145
    new_graph.add_nodes_from([(node, node.__dict__) for node in nodes])
115 -
    new_graph.add_edges_from(list(chem_env._graph.edges()))
146 +
    new_graph.add_edges_from(
147 +
        [(bond[0], bond[1], bond[-1]["bond"].__dict__) for bond in bonds]
148 +
    )
116 149
    return new_graph
117 150
118 151
@@ -171,7 +204,7 @@
Loading
171 204
        relative_path: The relative path to the data
172 205
    """
173 206
174 -
    fn = resource_filename("bespokefit", os.path.join("data", relative_path))
207 +
    fn = resource_filename("openff.bespokefit", os.path.join("data", relative_path))
175 208
176 209
    if not os.path.exists(fn):
177 210
        raise ValueError(
@@ -197,28 +230,12 @@
Loading
197 230
        shutil.rmtree(folder_name)
198 231
199 232
200 -
def string_to_tuple(string: str) -> Tuple:
201 -
    """
202 -
    Convert a n length string to a n length tuple.
203 -
    """
204 -
205 -
    return tuple(int(x) for x in string.split("-"))
206 -
207 -
208 -
def tuple_to_string(data: Tuple) -> str:
209 -
    """
210 -
    Convert a n length tuple into a string of numbers separated by -.
211 -
    """
212 -
    str_data = [str(x) for x in data]
213 -
214 -
    return "-".join(str_data)
215 -
216 -
217 233
def schema_to_datasets(
218 234
    schema: List["MoleculeSchema"],
219 235
    singlepoint_name: str = "Bespokefit single points",
220 236
    optimization_name: str = "Bespokefit optimizations",
221 237
    torsiondrive_name: str = "Bespokefit torsiondrives",
238 +
    geometric_options: Optional[GeometricProcedure] = None,
222 239
) -> List[Union[BasicDataset, OptimizationDataset, TorsiondriveDataset]]:
223 240
    """
224 241
    Generate a set of qcsubmit datasets containing all of the tasks required to compute the QM data.
@@ -228,6 +245,7 @@
Loading
228 245
        singlepoint_name: The common name of the single point datasets used for hessian, energy and gradients
229 246
        optimization_name: The name of the optimization dataset
230 247
        torsiondrive_name: The name of the torsiondrive dataset
248 +
        geometric_options: The geometric optimization settings that should be used.
231 249
232 250
    Note:
233 251
        Local custom tasks not possible in QCArchive are not included and will be ran when the fitting queue is started.
@@ -256,11 +274,18 @@
Loading
256 274
        description=description,
257 275
    )
258 276
    opt_dataset = OptimizationDataset(
259 -
        qc_specifications={}, dataset_name=optimization_name, description=description,
277 +
        qc_specifications={},
278 +
        dataset_name=optimization_name,
279 +
        description=description,
260 280
    )
261 281
    torsion_dataset = TorsiondriveDataset(
262 -
        qc_specifications={}, dataset_name=torsiondrive_name, description=description,
282 +
        qc_specifications={},
283 +
        dataset_name=torsiondrive_name,
284 +
        description=description,
263 285
    )
286 +
    if geometric_options is not None:
287 +
        opt_dataset.optimization_procedure = geometric_options
288 +
        torsion_dataset.optimization_procedure = geometric_options
264 289
265 290
    method_to_dataset = {
266 291
        CollectionMethod.Optimization: opt_dataset,
@@ -295,7 +320,9 @@
Loading
295 320
                        index=index,
296 321
                        molecule=molecule,
297 322
                        attributes=attributes,
298 -
                        dihedrals=[dihedrals,],
323 +
                        dihedrals=[
324 +
                            dihedrals,
325 +
                        ],
299 326
                    )
300 327
                    hashes.add(job_hash)
301 328
                    # is this how we want to handle different specs for different jobs
@@ -312,12 +339,16 @@
Loading
312 339
                    attributes = task.entry.attributes
313 340
                    attributes["task_hash"] = job_hash
314 341
                    index = molecule.to_smiles(
315 -
                        isomeric=True, mapped=False, explicit_hydrogens=False,
342 +
                        isomeric=True,
343 +
                        mapped=False,
344 +
                        explicit_hydrogens=False,
316 345
                    )
317 346
                    # get the specific dataset type
318 347
                    dataset = method_to_dataset[task.collection_stage.method]
319 348
                    dataset.add_molecule(
320 -
                        index=index, molecule=molecule, attributes=attributes,
349 +
                        index=index,
350 +
                        molecule=molecule,
351 +
                        attributes=attributes,
321 352
                    )
322 353
                    hashes.add(job_hash)
323 354
                    if task.entry.qc_spec not in dataset.qc_specifications.values():
324 355
imilarity index 56%
325 356
ename from bespokefit/workflow.py
326 357
ename to openff/bespokefit/workflow.py
Files Coverage
openff/bespokefit 96.14%
Project Totals (19 files) 96.14%