1
|
|
#!/usr/bin/env python
|
2
|
60
|
"""
|
3
|
|
reader.py
|
4
|
|
|
5
|
|
Functions to parse or read in files or OEMols.
|
6
|
|
|
7
|
|
Version: Apr 2 2019
|
8
|
|
By: Victoria T. Lim
|
9
|
|
|
10
|
|
"""
|
11
|
|
|
12
|
60
|
import openeye.oechem as oechem
|
13
|
60
|
import collections # ordered dictionary
|
14
|
60
|
import copy
|
15
|
|
|
16
|
|
|
17
|
60
|
def read_mols(infile, mol_slice=None):
|
18
|
|
"""
|
19
|
|
Open a molecule file and return molecules and conformers as OEMols.
|
20
|
|
|
21
|
|
Parameters
|
22
|
|
----------
|
23
|
|
infile : string
|
24
|
|
name of input file with molecules
|
25
|
|
mol_slice : list
|
26
|
|
list of indices from which to slice mols generator for read_mols
|
27
|
|
[start, stop, step]
|
28
|
|
|
29
|
|
Returns
|
30
|
|
-------
|
31
|
|
mols : OEMols
|
32
|
|
|
33
|
|
"""
|
34
|
60
|
ifs = oechem.oemolistream()
|
35
|
60
|
ifs.SetConfTest(oechem.OEAbsoluteConfTest())
|
36
|
60
|
if not ifs.open(infile):
|
37
|
60
|
raise FileNotFoundError(f"Unable to open {infile} for reading")
|
38
|
60
|
mols = ifs.GetOEMols()
|
39
|
|
|
40
|
60
|
if mol_slice is not None:
|
41
|
60
|
if len(mol_slice) != 3 or mol_slice[0] >= mol_slice[1] or mol_slice[2] <= 0:
|
42
|
60
|
raise ValueError("Check input to mol_slice. Should have len 3, "
|
43
|
|
"start value < stop value, step >= 1.")
|
44
|
|
# TODO more efficient. can't itertools bc lost mol info (name, SD) after next()
|
45
|
|
# adding copy/deepcopy doesnt work on generator objects
|
46
|
|
# also doesn't work to convert generator to list then slice list
|
47
|
|
#mols = itertools.islice(mols, mol_slice[0], mol_slice[1], mol_slice[2])
|
48
|
|
#mlist = mlist[mol_slice[0]:mol_slice[1]:mol_slice[2]]
|
49
|
|
|
50
|
60
|
def incrementer(count, mols, step):
|
51
|
60
|
if step == 1:
|
52
|
60
|
count += 1
|
53
|
60
|
return count
|
54
|
|
# use step-1 because for loop already increments once
|
55
|
60
|
for j in range(step-1):
|
56
|
60
|
count += 1
|
57
|
60
|
next(mols)
|
58
|
60
|
return count
|
59
|
|
|
60
|
60
|
mlist = []
|
61
|
60
|
count = 0
|
62
|
60
|
for i, m in enumerate(mols):
|
63
|
|
|
64
|
60
|
if count >= mol_slice[1]:
|
65
|
60
|
return mlist
|
66
|
60
|
elif count < mol_slice[0]:
|
67
|
60
|
count += 1
|
68
|
60
|
continue
|
69
|
|
else:
|
70
|
|
# important to append copy else still linked to orig generator
|
71
|
60
|
mlist.append(copy.copy(m))
|
72
|
60
|
try:
|
73
|
60
|
count = incrementer(count, mols, mol_slice[2])
|
74
|
0
|
except StopIteration:
|
75
|
0
|
return mlist
|
76
|
|
|
77
|
60
|
return mlist
|
78
|
|
|
79
|
60
|
return mols
|
80
|
|
|
81
|
|
|
82
|
60
|
def read_text_input(infile, reffile=None, ref_index=None):
|
83
|
|
"""
|
84
|
|
Read input file into an ordered dictionary.
|
85
|
|
http://stackoverflow.com/questions/25924244/creating-2d-dictionary-in-python
|
86
|
|
|
87
|
|
Parameters
|
88
|
|
----------
|
89
|
|
TODO
|
90
|
|
|
91
|
|
"""
|
92
|
60
|
linecount = 0
|
93
|
60
|
wholedict = collections.OrderedDict()
|
94
|
60
|
with open(infile) as f:
|
95
|
60
|
for line in f:
|
96
|
60
|
if line.startswith('#'):
|
97
|
60
|
continue
|
98
|
60
|
dataline = [x.strip() for x in line.split(',')]
|
99
|
60
|
wholedict[linecount] = {
|
100
|
|
'theory': dataline[0],
|
101
|
|
'fname': dataline[1],
|
102
|
|
'tagkey': dataline[2],
|
103
|
|
'label': dataline[3],
|
104
|
|
}
|
105
|
60
|
linecount += 1
|
106
|
|
|
107
|
60
|
return wholedict
|
108
|
|
|
109
|
60
|
def separated_theory(theory):
|
110
|
60
|
qmethod = theory.split('/')[0].strip()
|
111
|
60
|
qbasis = theory.split('/')[1].strip()
|
112
|
60
|
return qmethod, qbasis
|
113
|
|
|