1 8
import datetime
2

3 8
from sqlalchemy import (
4
    JSON,
5
    Boolean,
6
    Column,
7
    DateTime,
8
    Enum,
9
    ForeignKey,
10
    Index,
11
    Integer,
12
    String,
13
    UniqueConstraint,
14
    func,
15
    select,
16
)
17 8
from sqlalchemy.dialects.postgresql import JSONB, aggregate_order_by
18 8
from sqlalchemy.ext.hybrid import hybrid_property
19 8
from sqlalchemy.ext.orderinglist import ordering_list
20 8
from sqlalchemy.orm import column_property, relationship
21

22 8
from qcfractal.interface.models.records import DriverEnum, RecordStatusEnum
23 8
from qcfractal.storage_sockets.models.sql_base import Base, MsgpackExt
24 8
from qcfractal.storage_sockets.models.sql_models import KeywordsORM, KVStoreORM, MoleculeORM
25

26

27 8
class BaseResultORM(Base):
28
    """
29
    Abstract Base class for ResultORMs and ProcedureORMs
30
    """
31

32 8
    __tablename__ = "base_result"
33

34
    # for SQL
35 8
    result_type = Column(String)  # for inheritance
36

37
    # Base identification
38 8
    id = Column(Integer, primary_key=True)
39
    # ondelete="SET NULL": when manger is deleted, set this field to None
40 8
    manager_name = Column(String, ForeignKey("queue_manager.name", ondelete="SET NULL"), nullable=True)
41

42 8
    hash_index = Column(String)  # TODO
43 8
    procedure = Column(String(100))  # TODO: may remove
44
    # program = Column(String(100))  # moved to subclasses
45 8
    version = Column(Integer)
46 8
    protocols = Column(JSONB)
47

48
    # Extra fields
49 8
    extras = Column(MsgpackExt)
50 8
    stdout = Column(Integer, ForeignKey("kv_store.id"))
51 8
    stdout_obj = relationship(
52
        KVStoreORM, lazy="noload", foreign_keys=stdout, cascade="all, delete-orphan", single_parent=True
53
    )
54

55 8
    stderr = Column(Integer, ForeignKey("kv_store.id"))
56 8
    stderr_obj = relationship(
57
        KVStoreORM, lazy="noload", foreign_keys=stderr, cascade="all, delete-orphan", single_parent=True
58
    )
59

60 8
    error = Column(Integer, ForeignKey("kv_store.id"))
61 8
    error_obj = relationship(
62
        KVStoreORM, lazy="noload", foreign_keys=error, cascade="all, delete-orphan", single_parent=True
63
    )
64

65
    # Compute status
66 8
    status = Column(Enum(RecordStatusEnum), nullable=False, default=RecordStatusEnum.incomplete)
67

68 8
    created_on = Column(DateTime, default=datetime.datetime.utcnow)
69 8
    modified_on = Column(DateTime, default=datetime.datetime.utcnow)
70

71
    # Carry-ons
72 8
    provenance = Column(JSON)
73

74 8
    __table_args__ = (
75
        Index("ix_base_result_status", "status"),
76
        Index("ix_base_result_type", "result_type"),  # todo: needed?
77
    )
78

79 8
    __mapper_args__ = {"polymorphic_on": "result_type"}
80

81

82
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
83

84

85 8
class WavefunctionStoreORM(Base):
86

87 8
    __tablename__ = "wavefunction_store"
88

89 8
    id = Column(Integer, primary_key=True)
90

91
    # Sparsity is very cheap
92 8
    basis = Column(MsgpackExt, nullable=False)
93 8
    restricted = Column(Boolean, nullable=False)
94

95
    # Core Hamiltonian
96 8
    h_core_a = Column(MsgpackExt, nullable=True)
97 8
    h_core_b = Column(MsgpackExt, nullable=True)
98 8
    h_effective_a = Column(MsgpackExt, nullable=True)
99 8
    h_effective_b = Column(MsgpackExt, nullable=True)
100

101
    # SCF Results
102 8
    scf_orbitals_a = Column(MsgpackExt, nullable=True)
103 8
    scf_orbitals_b = Column(MsgpackExt, nullable=True)
104 8
    scf_density_a = Column(MsgpackExt, nullable=True)
105 8
    scf_density_b = Column(MsgpackExt, nullable=True)
106 8
    scf_fock_a = Column(MsgpackExt, nullable=True)
107 8
    scf_fock_b = Column(MsgpackExt, nullable=True)
108 8
    scf_eigenvalues_a = Column(MsgpackExt, nullable=True)
109 8
    scf_eigenvalues_b = Column(MsgpackExt, nullable=True)
110 8
    scf_occupations_a = Column(MsgpackExt, nullable=True)
111 8
    scf_occupations_b = Column(MsgpackExt, nullable=True)
112

113
    # Extras
114 8
    extras = Column(MsgpackExt, nullable=True)
115

116

117 8
class ResultORM(BaseResultORM):
118
    """
119
    Hold the result of an atomic single calculation
120
    """
121

122 8
    __tablename__ = "result"
123

124 8
    id = Column(Integer, ForeignKey("base_result.id", ondelete="CASCADE"), primary_key=True)
125

126
    # uniquely identifying a result
127 8
    program = Column(String(100), nullable=False)  # example "rdkit", is it the same as program in keywords?
128 8
    driver = Column(String(100), Enum(DriverEnum), nullable=False)
129 8
    method = Column(String(100), nullable=False)  # example "uff"
130 8
    basis = Column(String(100))
131 8
    molecule = Column(Integer, ForeignKey("molecule.id"))
132 8
    molecule_obj = relationship(MoleculeORM, lazy="select")
133

134
    # This is a special case where KeywordsORM are denormalized intentionally as they are part of the
135
    # lookup for a single result and querying a result will not often request the keywords (LazyReference)
136 8
    keywords = Column(Integer, ForeignKey("keywords.id"))
137 8
    keywords_obj = relationship(KeywordsORM, lazy="select")
138

139
    # Primary Result output
140 8
    return_result = Column(MsgpackExt)
141 8
    properties = Column(JSON)  # TODO: may use JSONB in the future
142

143
    # Wavefunction data
144 8
    wavefunction = Column(JSONB, nullable=True)
145 8
    wavefunction_data_id = Column(Integer, ForeignKey("wavefunction_store.id"), nullable=True)
146 8
    wavefunction_data_obj = relationship(
147
        WavefunctionStoreORM,
148
        lazy="noload",
149
        foreign_keys=wavefunction_data_id,
150
        cascade="all, delete-orphan",
151
        single_parent=True,
152
    )
153

154 8
    __table_args__ = (
155
        # TODO: optimize indexes
156
        # A multicolumn GIN index can be used with query conditions that involve any subset of
157
        # the index's columns. Unlike B-tree or GiST, index search effectiveness is the same
158
        # regardless of which index column(s) the query conditions use.
159
        # Index('ix_result_combined', "program", "driver", "method", "basis",
160
        #       "keywords", postgresql_using='gin'),  # gin index
161
        # Index('ix_results_molecule', 'molecule'),  # b-tree index
162
        UniqueConstraint("program", "driver", "method", "basis", "keywords", "molecule", name="uix_results_keys"),
163
    )
164

165 8
    __mapper_args__ = {
166
        "polymorphic_identity": "result",
167
        # to have separate select when querying BaseResultsORM
168
        "polymorphic_load": "selectin",
169
    }
170

171

172
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173

174

175 8
class ProcedureMixin:
176
    """
177
    A procedure mixin to be used by specific procedure types
178
    """
179

180 8
    program = Column(String(100), nullable=False)
181 8
    keywords = Column(JSON)
182 8
    qc_spec = Column(JSON)
183

184

185
# ================== Types of ProcedureORMs ================== #
186

187

188 8
class Trajectory(Base):
189
    """Association table for many to many"""
190

191 8
    __tablename__ = "opt_result_association"
192

193 8
    opt_id = Column(Integer, ForeignKey("optimization_procedure.id", ondelete="cascade"), primary_key=True)
194 8
    result_id = Column(Integer, ForeignKey("result.id", ondelete="cascade"), primary_key=True)
195 8
    position = Column(Integer, primary_key=True)
196
    # Index('opt_id', 'result_id', unique=True)
197

198
    # trajectory_obj = relationship(ResultORM, lazy="noload")
199

200

201 8
class OptimizationProcedureORM(ProcedureMixin, BaseResultORM):
202
    """
203
    An Optimization  procedure
204
    """
205

206 8
    __tablename__ = "optimization_procedure"
207

208 8
    id = Column(Integer, ForeignKey("base_result.id", ondelete="cascade"), primary_key=True)
209

210 8
    def __init__(self, **kwargs):
211 8
        kwargs.setdefault("version", 1)
212 8
        self.procedure = "optimization"
213 8
        super().__init__(**kwargs)
214

215 8
    schema_version = Column(Integer, default=1)
216 8
    initial_molecule = Column(Integer, ForeignKey("molecule.id"))
217 8
    initial_molecule_obj = relationship(MoleculeORM, lazy="select", foreign_keys=initial_molecule)
218

219
    # # Results
220 8
    energies = Column(JSON)  # Column(ARRAY(Float))
221 8
    final_molecule = Column(Integer, ForeignKey("molecule.id"))
222 8
    final_molecule_obj = relationship(MoleculeORM, lazy="select", foreign_keys=final_molecule)
223

224
    # ids, calculated not stored in this table
225
    # NOTE: this won't work in SQLite since it returns ARRAYS, aggregate_order_by
226 8
    trajectory = column_property(
227
        select([func.array_agg(aggregate_order_by(Trajectory.result_id, Trajectory.position))]).where(
228
            Trajectory.opt_id == id
229
        )
230
    )
231

232
    # array of objects (results) - Lazy - raise error of accessed
233 8
    trajectory_obj = relationship(
234
        Trajectory,
235
        cascade="all, delete-orphan",
236
        # backref="optimization_procedure",
237
        order_by=Trajectory.position,
238
        collection_class=ordering_list("position"),
239
    )
240

241 8
    __mapper_args__ = {
242
        "polymorphic_identity": "optimization_procedure",
243
        # to have separate select when querying BaseResultsORM
244
        "polymorphic_load": "selectin",
245
    }
246

247 8
    __table_args__ = (Index("ix_optimization_program", "program"),)  # todo: needed for procedures?
248

249 8
    def update_relations(self, trajectory=None, **kwarg):
250

251
        # update optimization_results relations
252
        # self._update_many_to_many(opt_result_association, 'opt_id', 'result_id',
253
        #                 self.id, trajectory, self.trajectory)
254

255 8
        self.trajectory_obj = []
256 8
        trajectory = [] if not trajectory else trajectory
257 8
        for result_id in trajectory:
258 8
            traj = Trajectory(opt_id=int(self.id), result_id=int(result_id))
259 8
            self.trajectory_obj.append(traj)
260

261
    # def add_relations(self, trajectory):
262
    #     session = object_session(self)
263
    #     # add many to many relation with results if ids are given not objects
264
    #     if trajectory:
265
    #         session.execute(
266
    #             opt_result_association
267
    #                 .insert()  # or update
268
    #                 .values([(self.id, i) for i in trajectory])
269
    #         )
270
    #     session.commit()
271

272

273
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
274

275

276 8
class GridOptimizationAssociation(Base):
277
    """Association table for many to many"""
278

279 8
    __tablename__ = "grid_optimization_association"
280

281 8
    grid_opt_id = Column(Integer, ForeignKey("grid_optimization_procedure.id", ondelete="cascade"), primary_key=True)
282 8
    key = Column(String, nullable=False, primary_key=True)
283

284
    # not primary key
285 8
    opt_id = Column(Integer, ForeignKey("optimization_procedure.id", ondelete="cascade"))
286

287
    # Index('grid_opt_id', 'key', unique=True)
288

289
    # optimization_obj = relationship(OptimizationProcedureORM, lazy="joined")
290

291

292 8
class GridOptimizationProcedureORM(ProcedureMixin, BaseResultORM):
293

294 8
    __tablename__ = "grid_optimization_procedure"
295

296 8
    id = Column(Integer, ForeignKey("base_result.id", ondelete="cascade"), primary_key=True)
297

298 8
    def __init__(self, **kwargs):
299 1
        kwargs.setdefault("version", 1)
300 1
        kwargs.setdefault("procedure", "gridoptimization")
301 1
        kwargs.setdefault("program", "qcfractal")
302 1
        super().__init__(**kwargs)
303

304
    # Input data
305 8
    initial_molecule = Column(Integer, ForeignKey("molecule.id"))
306 8
    initial_molecule_obj = relationship(MoleculeORM, lazy="select", foreign_keys=initial_molecule)
307

308 8
    optimization_spec = Column(JSON)
309

310
    # Output data
311 8
    starting_molecule = Column(Integer, ForeignKey("molecule.id"))
312 8
    starting_molecule_obj = relationship(MoleculeORM, lazy="select", foreign_keys=initial_molecule)
313

314 8
    final_energy_dict = Column(JSON)  # Dict[str, float]
315 8
    starting_grid = Column(JSON)  # tuple
316

317 8
    grid_optimizations_obj = relationship(
318
        GridOptimizationAssociation,
319
        lazy="selectin",
320
        cascade="all, delete-orphan",
321
        backref="grid_optimization_procedure",
322
    )
323

324 8
    @hybrid_property
325 3
    def grid_optimizations(self):
326
        """calculated property when accessed, not saved in the DB
327
        A view of the many to many relation in the form of a dict"""
328

329 1
        return self._grid_optimizations(self.grid_optimizations_obj)
330

331 8
    @staticmethod
332 3
    def _grid_optimizations(grid_optimizations_obj):
333

334 1
        if not grid_optimizations_obj:
335 0
            return {}
336

337 1
        if not isinstance(grid_optimizations_obj, list):
338 1
            grid_optimizations_obj = [grid_optimizations_obj]
339

340 1
        ret = {}
341 1
        try:
342 1
            for obj in grid_optimizations_obj:
343 1
                ret[obj.key] = str(obj.opt_id)
344

345 1
        except Exception as err:
346
            # raises exception of first access!!
347 1
            pass
348
            # print(err)
349

350 1
        return ret
351

352 8
    @grid_optimizations.setter
353 3
    def grid_optimizations(self, dict_values):
354

355 1
        return dict_values
356

357 8
    __table_args__ = (Index("ix_grid_optmization_program", "program"),)  # todo: needed for procedures?
358

359 8
    __mapper_args__ = {
360
        "polymorphic_identity": "grid_optimization_procedure",
361
        # to have separate select when querying BaseResultsORM
362
        "polymorphic_load": "selectin",
363
    }
364

365 8
    def update_relations(self, grid_optimizations=None, **kwarg):
366

367 1
        self.grid_optimizations_obj = []
368 1
        for key, opt_id in grid_optimizations.items():
369 1
            obj = GridOptimizationAssociation(grid_opt_id=int(self.id), opt_id=int(opt_id), key=key)
370 1
            self.grid_optimizations_obj.append(obj)
371

372

373
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
374

375

376 8
class OptimizationHistory(Base):
377
    """Association table for many to many"""
378

379 8
    __tablename__ = "optimization_history"
380

381 8
    torsion_id = Column(Integer, ForeignKey("torsiondrive_procedure.id", ondelete="cascade"), primary_key=True)
382 8
    opt_id = Column(Integer, ForeignKey("optimization_procedure.id", ondelete="cascade"), primary_key=True)
383 8
    key = Column(String, nullable=False, primary_key=True)
384 8
    position = Column(Integer, primary_key=True)
385
    # Index('torsion_id', 'key', unique=True)
386

387
    # optimization_obj = relationship(OptimizationProcedureORM, lazy="joined")
388

389

390 8
class TorsionInitMol(Base):
391
    """
392
    Association table for many to many relation
393
    """
394

395 8
    __tablename__ = "torsion_init_mol_association"
396

397 8
    torsion_id = Column(
398
        "torsion_id", Integer, ForeignKey("torsiondrive_procedure.id", ondelete="cascade"), primary_key=True
399
    )
400 8
    molecule_id = Column("molecule_id", Integer, ForeignKey("molecule.id", ondelete="cascade"), primary_key=True)
401

402

403 8
class TorsionDriveProcedureORM(ProcedureMixin, BaseResultORM):
404
    """
405
    A torsion drive  procedure
406
    """
407

408 8
    __tablename__ = "torsiondrive_procedure"
409

410 8
    id = Column(Integer, ForeignKey("base_result.id", ondelete="cascade"), primary_key=True)
411

412 8
    def __init__(self, **kwargs):
413 8
        kwargs.setdefault("version", 1)
414 8
        self.procedure = "torsiondrive"
415 8
        self.program = "torsiondrive"
416 8
        super().__init__(**kwargs)
417

418
    # input data (along with the mixin)
419

420
    # ids of the many to many relation
421 8
    initial_molecule = column_property(
422
        select([func.array_agg(TorsionInitMol.molecule_id)]).where(TorsionInitMol.torsion_id == id)
423
    )
424
    # actual objects relation M2M, never loaded here
425 8
    initial_molecule_obj = relationship(MoleculeORM, secondary=TorsionInitMol.__table__, uselist=True, lazy="noload")
426

427 8
    optimization_spec = Column(JSON)
428

429
    # Output data
430 8
    final_energy_dict = Column(JSON)
431 8
    minimum_positions = Column(JSON)
432

433 8
    optimization_history_obj = relationship(
434
        OptimizationHistory,
435
        cascade="all, delete-orphan",  # backref="torsiondrive_procedure",
436
        order_by=OptimizationHistory.position,
437
        collection_class=ordering_list("position"),
438
        lazy="selectin",
439
    )
440

441 8
    @hybrid_property
442 3
    def optimization_history(self):
443
        """calculated property when accessed, not saved in the DB
444
        A view of the many to many relation in the form of a dict"""
445

446 8
        return self._optimization_history(self.optimization_history_obj)
447

448 8
    @staticmethod
449 3
    def _optimization_history(optimization_history_obj):
450

451 8
        if not optimization_history_obj:
452 8
            return {}
453

454 8
        if not isinstance(optimization_history_obj, list):
455 8
            optimization_history_obj = [optimization_history_obj]
456

457 8
        ret = {}
458 8
        try:
459 8
            for opt_history in optimization_history_obj:
460 8
                if opt_history.key in ret:
461 8
                    ret[opt_history.key].append(str(opt_history.opt_id))
462
                else:
463 8
                    ret[opt_history.key] = [str(opt_history.opt_id)]
464

465 8
        except Exception as err:
466
            # raises exception of first access!!
467 8
            pass
468
            # print(err)
469

470 8
        return ret
471

472 8
    @optimization_history.setter
473 3
    def optimization_history(self, dict_values):
474
        """A private copy of the opt history as a dict
475
        Key: list of optimization procedures"""
476

477 8
        return dict_values
478

479 8
    __table_args__ = (Index("ix_torsion_drive_program", "program"),)  # todo: needed for procedures?
480

481 8
    __mapper_args__ = {
482
        "polymorphic_identity": "torsiondrive_procedure",
483
        # to have separate select when querying BaseResultsORM
484
        "polymorphic_load": "selectin",
485
    }
486

487 8
    def update_relations(self, initial_molecule=None, optimization_history=None, **kwarg):
488

489
        # update torsion molecule relation
490 8
        self._update_many_to_many(
491
            TorsionInitMol.__table__, "torsion_id", "molecule_id", self.id, initial_molecule, self.initial_molecule
492
        )
493

494 8
        self.optimization_history_obj = []
495 8
        for key in optimization_history:
496 8
            for opt_id in optimization_history[key]:
497 8
                opt_history = OptimizationHistory(torsion_id=int(self.id), opt_id=int(opt_id), key=key)
498 8
                self.optimization_history_obj.append(opt_history)
499

500
        # No need for the following because the session is committed with parent save
501
        # session.add_all(self.optimization_history_obj)
502
        # session.add(self)
503
        # session.commit()
504

505

506
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Read our documentation on viewing source code .

Loading