MolSSI / QCFractal
1
"""
2
Basic ORM models of the QCFractal database
3

4
Note: avoid circular import here by including the name of the class
5
in relations and foreign keys are a string (see TaskQueueORM.base_result_obj)
6
"""
7

8 8
import datetime
9

10
# from sqlalchemy.ext.declarative import declarative_base
11 8
from sqlalchemy import (
12
    JSON,
13
    BigInteger,
14
    Boolean,
15
    Column,
16
    DateTime,
17
    Enum,
18
    Float,
19
    ForeignKey,
20
    Index,
21
    Integer,
22
    LargeBinary,
23
    String,
24
)
25 8
from sqlalchemy.ext.hybrid import hybrid_property
26 8
from sqlalchemy.orm import relationship
27 8
from sqlalchemy.sql import text
28

29 8
from qcfractal.interface.models.task_models import ManagerStatusEnum, PriorityEnum, TaskStatusEnum
30 8
from qcfractal.interface.models.common_models import CompressionEnum
31 8
from qcfractal.storage_sockets.models.sql_base import Base, MsgpackExt
32

33

34 8
class AccessLogORM(Base):
35 8
    __tablename__ = "access_log"
36

37 8
    id = Column(Integer, primary_key=True)
38 8
    access_date = Column(DateTime, default=datetime.datetime.utcnow)
39 8
    access_method = Column(String, nullable=False)
40 8
    access_type = Column(String, nullable=False)
41

42
    # Note: no performance difference between varchar and text in postgres
43
    # will mostly have a serialized JSON, but not stored as JSON for speed
44 8
    extra_params = Column(String)
45

46
    # user info
47 8
    ip_address = Column(String)
48 8
    user_agent = Column(String)
49

50
    # extra computed geo data
51 8
    city = Column(String)
52 8
    country = Column(String)
53 8
    country_code = Column(String)
54 8
    ip_lat = Column(String)
55 8
    ip_long = Column(String)
56 8
    postal_code = Column(String)
57 8
    subdivision = Column(String)
58

59 8
    __table_args__ = (Index("access_type", "access_date"),)
60

61

62 8
class ServerStatsLogORM(Base):
63 8
    __tablename__ = "server_stats_log"
64

65 8
    id = Column(Integer, primary_key=True)
66 8
    timestamp = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
67

68
    # Raw counts
69 8
    collection_count = Column(Integer)
70 8
    molecule_count = Column(Integer)
71 8
    result_count = Column(Integer)
72 8
    kvstore_count = Column(Integer)
73 8
    access_count = Column(Integer)
74

75
    # States
76 8
    result_states = Column(JSON)
77

78
    # Database
79 8
    db_total_size = Column(BigInteger)
80 8
    db_table_size = Column(BigInteger)
81 8
    db_index_size = Column(BigInteger)
82 8
    db_table_information = Column(JSON)
83

84 8
    __table_args__ = (Index("ix_server_stats_log_timestamp", "timestamp"),)
85

86

87 8
class VersionsORM(Base):
88 8
    __tablename__ = "versions"
89

90 8
    id = Column(Integer, primary_key=True)
91 8
    created_on = Column(DateTime, default=datetime.datetime.utcnow)
92 8
    elemental_version = Column(String, nullable=False)
93 8
    fractal_version = Column(String, nullable=False)
94 8
    engine_version = Column(String)
95

96

97 8
class KVStoreORM(Base):
98
    """TODO: rename to """
99

100 8
    __tablename__ = "kv_store"
101

102 8
    id = Column(Integer, primary_key=True)
103 8
    compression = Column(Enum(CompressionEnum), nullable=True)
104 8
    compression_level = Column(Integer, nullable=True)
105 8
    value = Column(JSON, nullable=True)
106 8
    data = Column(LargeBinary, nullable=True)
107

108

109 8
class MoleculeORM(Base):
110
    """
111
    The molecule DB collection is managed by pymongo, so far
112
    """
113

114 8
    __tablename__ = "molecule"
115

116 8
    id = Column(Integer, primary_key=True)
117 8
    molecular_formula = Column(String)
118 8
    molecule_hash = Column(String)
119

120
    # Required data
121 8
    schema_name = Column(String)
122 8
    schema_version = Column(Integer, default=2)
123 8
    symbols = Column(MsgpackExt, nullable=False)
124 8
    geometry = Column(MsgpackExt, nullable=False)
125

126
    # Molecule data
127 8
    name = Column(String, default="")
128 8
    identifiers = Column(JSON)
129 8
    comment = Column(String)
130 8
    molecular_charge = Column(Float, default=0)
131 8
    molecular_multiplicity = Column(Integer, default=1)
132

133
    # Atom data
134 8
    masses = Column(MsgpackExt)
135 8
    real = Column(MsgpackExt)
136 8
    atom_labels = Column(MsgpackExt)
137 8
    atomic_numbers = Column(MsgpackExt)
138 8
    mass_numbers = Column(MsgpackExt)
139

140
    # Fragment and connection data
141 8
    connectivity = Column(JSON)
142 8
    fragments = Column(MsgpackExt)
143 8
    fragment_charges = Column(JSON)  # Column(ARRAY(Float))
144 8
    fragment_multiplicities = Column(JSON)  # Column(ARRAY(Integer))
145

146
    # Orientation
147 8
    fix_com = Column(Boolean, default=False)
148 8
    fix_orientation = Column(Boolean, default=False)
149 8
    fix_symmetry = Column(String)
150

151
    # Extra
152 8
    provenance = Column(JSON)
153 8
    extras = Column(JSON)
154

155
    # def __str__(self):
156
    #     return str(self.id)
157

158 8
    __table_args__ = (
159
        Index("ix_molecule_hash", "molecule_hash", unique=False),  # dafault index is B-tree
160
        # TODO: no index on molecule_formula
161
    )
162

163
    # meta = {
164
    #
165
    #     'indexes': [
166
    #         {
167
    #             'fields': ('molecule_hash', ),
168
    #             'unique': False
169
    #         },  # should almost be unique
170
    #         {
171
    #             'fields': ('molecular_formula', ),
172
    #             'unique': False
173
    #         }
174
    #     ]
175
    # }
176

177

178
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179

180

181 8
class KeywordsORM(Base):
182
    """
183
    KeywordsORM are unique for a specific program and name
184
    """
185

186 8
    __tablename__ = "keywords"
187

188 8
    id = Column(Integer, primary_key=True)
189 8
    hash_index = Column(String, nullable=False)
190 8
    values = Column(JSON)
191

192 8
    lowercase = Column(Boolean, default=True)
193 8
    exact_floats = Column(Boolean, default=False)
194 8
    comments = Column(String)
195

196 8
    __table_args__ = (Index("ix_keywords_hash_index", "hash_index", unique=True),)
197
    # meta = {'indexes': [{'fields': ('hash_index', ), 'unique': True}]}
198

199

200
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201

202

203 8
class TaskQueueORM(Base):
204
    """A queue of tasks corresponding to a procedure
205

206
    Notes: don't sort query results without having the index sorted
207
           will impact the performance
208
    """
209

210 8
    __tablename__ = "task_queue"
211

212 8
    id = Column(Integer, primary_key=True)
213

214 8
    spec = Column(MsgpackExt, nullable=False)
215

216
    # others
217 8
    tag = Column(String, default=None)
218 8
    parser = Column(String, default="")
219 8
    program = Column(String)
220 8
    procedure = Column(String)
221 8
    status = Column(Enum(TaskStatusEnum), default=TaskStatusEnum.waiting)
222 8
    priority = Column(Integer, default=int(PriorityEnum.NORMAL))
223 8
    manager = Column(String, ForeignKey("queue_manager.name", ondelete="SET NULL"), default=None)
224

225 8
    created_on = Column(DateTime, default=datetime.datetime.utcnow)
226 8
    modified_on = Column(DateTime, default=datetime.datetime.utcnow)
227

228
    # TODO: for back-compatibility with mongo, tobe removed
229
    # (requries modifying pydantic model)
230 8
    @hybrid_property
231 3
    def base_result(self):
232 8
        return self.base_result_id
233

234 8
    @base_result.setter
235 3
    def base_result(self, val):
236 8
        self.base_result_id = int(val)
237

238
    # can reference ResultORMs or any ProcedureORM
239 8
    base_result_id = Column(Integer, ForeignKey("base_result.id", ondelete="cascade"), unique=True)
240 8
    base_result_obj = relationship("BaseResultORM", lazy="select")  # or lazy='joined'
241

242
    # An important special case is ORDER BY in combination with LIMIT n: an
243
    # explicit sort will have to process all the data to identify the first n
244
    # rows, but if there is an index matching the ORDER BY, the first n rows
245
    # can be retrieved directly, without scanning the remainder at all.
246 8
    __table_args__ = (
247
        Index("ix_task_queue_created_on", "created_on"),
248
        Index("ix_task_queue_keys", "status", "program", "procedure", "tag"),
249
        Index("ix_task_queue_manager", "manager"),
250
        Index("ix_task_queue_base_result_id", "base_result_id"),
251
        Index("ix_task_waiting_sort", text("priority desc,  created_on")),
252
    )
253

254

255
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256

257

258 8
class ServiceQueueORM(Base):
259

260 8
    __tablename__ = "service_queue"
261

262 8
    id = Column(Integer, primary_key=True)
263

264 8
    status = Column(Enum(TaskStatusEnum), default=TaskStatusEnum.waiting)
265 8
    tag = Column(String, default=None)
266 8
    hash_index = Column(String, nullable=False)
267

268 8
    procedure_id = Column(Integer, ForeignKey("base_result.id"), unique=True)
269 8
    procedure_obj = relationship("BaseResultORM", lazy="joined")
270

271 8
    priority = Column(Integer, default=int(PriorityEnum.NORMAL))
272 8
    created_on = Column(DateTime, default=datetime.datetime.utcnow)
273 8
    modified_on = Column(DateTime, default=datetime.datetime.utcnow)
274

275 8
    extra = Column(MsgpackExt)
276

277 8
    __table_args__ = (
278
        Index("ix_service_queue_status", "status"),
279
        Index("ix_service_queue_priority", "priority"),
280
        Index("ix_service_queue_modified_on", "modified_on"),
281
        Index("ix_service_queue_status_tag_hash", "status", "tag"),
282
        Index("ix_service_queue_hash_index", "hash_index"),
283
    )
284

285

286
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
287

288

289 8
class UserORM(Base):
290

291 8
    __tablename__ = "user"
292

293 8
    id = Column(Integer, primary_key=True)
294

295 8
    username = Column(String, nullable=False, unique=True)  # indexed and unique
296 8
    password = Column(LargeBinary, nullable=False)
297 8
    permissions = Column(JSON)  # Column(ARRAY(String))
298

299

300 8
class QueueManagerLogORM(Base):
301

302 8
    __tablename__ = "queue_manager_logs"
303

304 8
    id = Column(Integer, primary_key=True)
305 8
    manager_id = Column(Integer, ForeignKey("queue_manager.id"), nullable=False)
306

307 8
    timestamp = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
308

309 8
    completed = Column(Integer, nullable=True)
310 8
    submitted = Column(Integer, nullable=True)
311 8
    failures = Column(Integer, nullable=True)
312

313 8
    total_worker_walltime = Column(Float, nullable=True)
314 8
    total_task_walltime = Column(Float, nullable=True)
315 8
    active_tasks = Column(Integer, nullable=True)
316 8
    active_cores = Column(Integer, nullable=True)
317 8
    active_memory = Column(Float, nullable=True)
318

319 8
    __table_args__ = (Index("ix_queue_manager_log_timestamp", "timestamp"),)
320

321

322 8
class QueueManagerORM(Base):
323
    """"""
324

325 8
    __tablename__ = "queue_manager"
326

327 8
    id = Column(Integer, primary_key=True)
328

329 8
    name = Column(String, unique=True)
330 8
    cluster = Column(String)
331 8
    hostname = Column(String)
332 8
    username = Column(String)
333 8
    uuid = Column(String)
334 8
    tag = Column(String)
335

336
    # Count at current time
337 8
    completed = Column(Integer, default=0)
338 8
    submitted = Column(Integer, default=0)
339 8
    failures = Column(Integer, default=0)
340 8
    returned = Column(Integer, default=0)
341

342 8
    total_worker_walltime = Column(Float, nullable=True)
343 8
    total_task_walltime = Column(Float, nullable=True)
344 8
    active_tasks = Column(Integer, nullable=True)
345 8
    active_cores = Column(Integer, nullable=True)
346 8
    active_memory = Column(Float, nullable=True)
347

348
    # Adapter Information
349 8
    configuration = Column(JSON, nullable=True)
350

351 8
    status = Column(Enum(ManagerStatusEnum), default=ManagerStatusEnum.inactive)
352

353 8
    created_on = Column(DateTime, default=datetime.datetime.utcnow)
354 8
    modified_on = Column(DateTime, default=datetime.datetime.utcnow)
355

356 8
    qcengine_version = Column(String)
357 8
    manager_version = Column(String)
358 8
    programs = Column(JSON)
359 8
    procedures = Column(JSON)
360

361 8
    __table_args__ = (Index("ix_queue_manager_status", "status"), Index("ix_queue_manager_modified_on", "modified_on"))

Read our documentation on viewing source code .

Loading