@@ -8,7 +8,11 @@
Loading
8 8
from openff.evaluator.datasets import PropertyPhase
9 9
from openff.evaluator.forcefield import ForceFieldSource
10 10
from openff.evaluator.storage.attributes import QueryAttribute
11 -
from openff.evaluator.storage.data import ForceFieldData, StoredSimulationData
11 +
from openff.evaluator.storage.data import (
12 +
    ForceFieldData,
13 +
    StoredFreeEnergyData,
14 +
    StoredSimulationData,
15 +
)
12 16
from openff.evaluator.substances import ExactAmount, Substance
13 17
from openff.evaluator.thermodynamics import ThermodynamicState
14 18
@@ -152,16 +156,11 @@
Loading
152 156
    )
153 157
154 158
155 -
class SimulationDataQuery(BaseDataQuery):
156 -
    """A class used to query a `StorageBackend` for
157 -
    `StoredSimulationData` which meet the specified set
158 -
    of criteria.
159 +
class BaseSimulationDataQuery(BaseDataQuery, abc.ABC):
160 +
    """The base class for queries which will retrieve ``BaseSimulationData`` derived
161 +
    data.
159 162
    """
160 163
161 -
    @classmethod
162 -
    def data_class(cls):
163 -
        return StoredSimulationData
164 -
165 164
    substance = QueryAttribute(
166 165
        docstring="The substance which the data should have been collected "
167 166
        "for. Data for a subset of this substance can be queried for by "
@@ -201,12 +200,6 @@
Loading
201 200
        optional=True,
202 201
    )
203 202
204 -
    number_of_molecules = QueryAttribute(
205 -
        docstring="The total number of molecules in the system.",
206 -
        type_hint=int,
207 -
        optional=True,
208 -
    )
209 -
210 203
    def _match_substance(self, data_object):
211 204
        """Attempt to match the substance (or a subset of it).
212 205
@@ -272,7 +265,7 @@
Loading
272 265
        if self.substance != UNDEFINED:
273 266
            matches.append(self._match_substance(data_object))
274 267
275 -
        base_matches = super(SimulationDataQuery, self).apply(data_object)
268 +
        base_matches = super(BaseSimulationDataQuery, self).apply(data_object)
276 269
        base_matches = [None] if base_matches is None else base_matches
277 270
278 271
        matches = [*matches, *base_matches]
@@ -284,7 +277,7 @@
Loading
284 277
285 278
    def validate(self, attribute_type=None):
286 279
287 -
        super(SimulationDataQuery, self).validate(attribute_type)
280 +
        super(BaseSimulationDataQuery, self).validate(attribute_type)
288 281
289 282
        if self.substance_query != UNDEFINED and self.substance == UNDEFINED:
290 283
@@ -292,3 +285,29 @@
Loading
292 285
                "The `substance_query` can only be used when the "
293 286
                "`substance` attribute is set."
294 287
            )
288 +
289 +
290 +
class SimulationDataQuery(BaseSimulationDataQuery):
291 +
    """A class used to query a ``StorageBackend`` for ``StoredSimulationData`` objects
292 +
    which meet the specified set of criteria.
293 +
    """
294 +
295 +
    @classmethod
296 +
    def data_class(cls):
297 +
        return StoredSimulationData
298 +
299 +
    number_of_molecules = QueryAttribute(
300 +
        docstring="The total number of molecules in the system.",
301 +
        type_hint=int,
302 +
        optional=True,
303 +
    )
304 +
305 +
306 +
class FreeEnergyDataQuery(BaseSimulationDataQuery):
307 +
    """A class used to query a ``StorageBackend`` for ``FreeEnergyData`` objects which
308 +
    meet the specified set of criteria.
309 +
    """
310 +
311 +
    @classmethod
312 +
    def data_class(cls):
313 +
        return StoredFreeEnergyData

@@ -2,6 +2,7 @@
Loading
2 2
A collection of classes representing data stored by a storage backend.
3 3
"""
4 4
import abc
5 +
from typing import Optional
5 6
6 7
from openff.evaluator.attributes import AttributeClass
7 8
from openff.evaluator.datasets import PropertyPhase
@@ -9,7 +10,7 @@
Loading
9 10
from openff.evaluator.storage.attributes import FilePath, StorageAttribute
10 11
from openff.evaluator.substances import Substance
11 12
from openff.evaluator.thermodynamics import ThermodynamicState
12 -
from openff.evaluator.utils.observables import ObservableFrame
13 +
from openff.evaluator.utils.observables import Observable, ObservableFrame
13 14
14 15
15 16
class BaseStoredData(AttributeClass, abc.ABC):
@@ -161,22 +162,8 @@
Loading
161 162
        return stored_data_1
162 163
163 164
164 -
class StoredSimulationData(ReplaceableData):
165 -
    """A representation of data which has been cached
166 -
    from a single previous simulation.
167 -
168 -
    Notes
169 -
    -----
170 -
    The ancillary directory which stores larger information such
171 -
    as trajectories should be of the form:
172 -
173 -
    .. code-block::
174 -
175 -
        |--- data_object.json
176 -
        |--- data_directory
177 -
             |--- coordinate_file_name.pdb
178 -
             |--- trajectory_file_name.dcd
179 -
    """
165 +
class BaseSimulationData(ReplaceableData, abc.ABC):
166 +
    """A base class for classes which will store the outputs of a molecular simulation"""
180 167
181 168
    substance = StorageAttribute(
182 169
        docstring="A description of the composition of the stored system.",
@@ -201,6 +188,27 @@
Loading
201 188
        type_hint=str,
202 189
    )
203 190
191 +
    @classmethod
192 +
    def has_ancillary_data(cls):
193 +
        return True
194 +
195 +
196 +
class StoredSimulationData(BaseSimulationData):
197 +
    """A representation of data which has been cached from a single previous simulation.
198 +
199 +
    Notes
200 +
    -----
201 +
    The ancillary directory which stores larger information such as trajectories should
202 +
    be of the form:
203 +
204 +
    .. code-block::
205 +
206 +
        |--- data_object.json
207 +
        |--- data_directory
208 +
             |--- coordinate_file_name.pdb
209 +
             |--- trajectory_file_name.dcd
210 +
    """
211 +
204 212
    coordinate_file_name = StorageAttribute(
205 213
        docstring="The name of a coordinate file which encodes the "
206 214
        "topology information of the system.",
@@ -227,10 +235,6 @@
Loading
227 235
        type_hint=int,
228 236
    )
229 237
230 -
    @classmethod
231 -
    def has_ancillary_data(cls):
232 -
        return True
233 -
234 238
    @classmethod
235 239
    def most_information(cls, stored_data_1, stored_data_2):
236 240
        """Returns the data object with the lowest
@@ -273,3 +277,75 @@
Loading
273 277
        from .query import SimulationDataQuery
274 278
275 279
        return SimulationDataQuery.from_data_object(self)
280 +
281 +
282 +
class StoredFreeEnergyData(BaseSimulationData):
283 +
    """A representation of data which has been cached from an free energy calculation
284 +
    which computed the free energy difference between a start and end state.
285 +
286 +
    Notes
287 +
    -----
288 +
    The ancillary directory which stores larger information such as trajectories should
289 +
    be of the form:
290 +
291 +
    .. code-block::
292 +
293 +
        |--- data_object.json
294 +
        |--- data_directory
295 +
             |--- topology_file_name.pdb
296 +
             |--- start_state_trajectory.dcd
297 +
             |--- end_state_trajectory.dcd
298 +
    """
299 +
300 +
    free_energy_difference = StorageAttribute(
301 +
        docstring="The free energy difference between the end state "
302 +
        "and the start state.",
303 +
        type_hint=Observable,
304 +
    )
305 +
306 +
    topology_file_name = StorageAttribute(
307 +
        docstring="The name of a coordinate file which encodes the topology of the "
308 +
        "system.",
309 +
        type_hint=FilePath,
310 +
    )
311 +
312 +
    start_state_trajectory = StorageAttribute(
313 +
        docstring="The name of a .dcd trajectory file containing configurations "
314 +
        "generated by the simulation of the start state of the system.",
315 +
        type_hint=FilePath,
316 +
    )
317 +
    end_state_trajectory = StorageAttribute(
318 +
        docstring="The name of a .dcd trajectory file containing configurations "
319 +
        "generated by the simulation of the end state of the system.",
320 +
        type_hint=FilePath,
321 +
    )
322 +
323 +
    @classmethod
324 +
    def most_information(
325 +
        cls,
326 +
        stored_data_1: "StoredFreeEnergyData",
327 +
        stored_data_2: "StoredFreeEnergyData",
328 +
    ) -> Optional["StoredFreeEnergyData"]:
329 +
        """A comparison function which will always retain both pieces of free energy
330 +
        data. At this time no situation can be envisaged that the same free energy data
331 +
        from exactly the same calculation will be store.
332 +
333 +
        Parameters
334 +
        ----------
335 +
        stored_data_1
336 +
            The first piece of data to compare.
337 +
        stored_data_2:
338 +
            The second piece of data to compare.
339 +
        """
340 +
        return None
341 +
342 +
    def to_storage_query(self):
343 +
        """
344 +
        Returns
345 +
        -------
346 +
        FreeEnergyDataQuery
347 +
            The storage query which would match this data object.
348 +
        """
349 +
        from .query import FreeEnergyDataQuery
350 +
351 +
        return FreeEnergyDataQuery.from_data_object(self)
Files Coverage
openff/evaluator 87.28%
Project Totals (92 files) 87.28%
1
# Codecov configuration to make it a bit less noisy
2
coverage:
3
  status:
4
    patch: false
5
    project:
6
      default:
7
        threshold: 50%
8
comment:
9
  layout: "header"
10
  require_changes: false
11
  branches: null
12
  behavior: default
13
  flags: null
14
  paths: null
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading