1
|
|
#!/usr/bin/env python
|
2
|
60
|
"""
|
3
|
|
pipeline.py
|
4
|
|
|
5
|
|
Main interface to Quanformer pipeline.
|
6
|
|
|
7
|
|
Version: Mar 28 2019
|
8
|
|
By: Victoria T. Lim
|
9
|
|
|
10
|
|
"""
|
11
|
|
|
12
|
60
|
import os
|
13
|
|
|
14
|
60
|
import quanformer.initialize_confs as initialize_confs
|
15
|
60
|
import quanformer.filter_confs as filter_confs
|
16
|
60
|
import quanformer.confs_to_psi as confs_to_psi
|
17
|
60
|
import quanformer.get_psi_results as get_psi_results
|
18
|
|
|
19
|
|
|
20
|
60
|
def name_manager(infile):
|
21
|
|
"""
|
22
|
|
File-checking and parsing for internal use.
|
23
|
|
|
24
|
|
Parameters
|
25
|
|
----------
|
26
|
|
infile : str
|
27
|
|
Ex. "/path/to/file.sdf"
|
28
|
|
|
29
|
|
Returns
|
30
|
|
-------
|
31
|
|
curr_dir : str
|
32
|
|
Ex. "/path/to"
|
33
|
|
checked_infile : str
|
34
|
|
Ex. "/path/to/file.sdf"
|
35
|
|
prefix : str
|
36
|
|
Ex. "file"
|
37
|
|
ext : str
|
38
|
|
Ex. ".sdf"
|
39
|
|
no_path_infile : str
|
40
|
|
Ex. "file.sdf"
|
41
|
|
|
42
|
|
"""
|
43
|
60
|
curr_dir = os.getcwd()
|
44
|
|
|
45
|
60
|
if not os.path.exists(infile):
|
46
|
60
|
raise FileNotFoundError("No such file: {}".format(infile))
|
47
|
|
|
48
|
|
# if infile does not contain full path, then its path is curr dir
|
49
|
60
|
checked_infile = os.path.abspath(infile)
|
50
|
|
|
51
|
|
# get base name without suffix and without extension
|
52
|
60
|
inpath, no_path_infile = os.path.split(checked_infile)
|
53
|
|
|
54
|
|
# get extension of .sdf, .smi, etc.
|
55
|
60
|
all_but_ext, ext = os.path.splitext(checked_infile)
|
56
|
|
|
57
|
|
# replace - with # and split by # to get basename without suffix/extension
|
58
|
60
|
prefix = os.path.basename(all_but_ext).replace('-', '#').split('#')[0]
|
59
|
|
|
60
|
60
|
return curr_dir, checked_infile, prefix, ext, no_path_infile
|
61
|
|
|
62
|
|
|
63
|
60
|
def setup_conformers(infile):
|
64
|
|
"""
|
65
|
|
Generate and filter conformers for input list of SMILES strings.
|
66
|
|
Output is saved in SDF file.
|
67
|
|
|
68
|
|
Parameters
|
69
|
|
----------
|
70
|
|
infile : str
|
71
|
|
filename of the SMILES input data
|
72
|
|
|
73
|
|
"""
|
74
|
60
|
curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
|
75
|
|
infile)
|
76
|
|
|
77
|
|
# append value of suffix to filename after MM opt/filtering
|
78
|
60
|
suffix = '200'
|
79
|
|
|
80
|
60
|
if ext != '.smi':
|
81
|
60
|
raise ValueError("Input should be a SMILES file with extension of .smi")
|
82
|
60
|
print("\nGenerating and filtering conformers for {}".format(infile))
|
83
|
|
|
84
|
|
# generate conformers and MM opt
|
85
|
60
|
initialize_confs.initialize_confs(checked_infile)
|
86
|
|
|
87
|
|
# set filenames; prefix is same filename (w/o extension) of input file
|
88
|
60
|
pre_filt = os.path.join(curr_dir, prefix + '.sdf')
|
89
|
60
|
post_filt = os.path.join(curr_dir, "{}-{}.sdf".format(
|
90
|
|
prefix, suffix))
|
91
|
|
|
92
|
|
# filter conformers
|
93
|
60
|
filter_confs.filter_confs(pre_filt, "MM Szybki SD Energy",
|
94
|
|
post_filt)
|
95
|
|
|
96
|
|
|
97
|
60
|
def setup_calculations(infile, method, basisset, calctype='opt', mem='5.0 Gb'):
|
98
|
|
"""
|
99
|
|
Write input files for Psi4 calculations.
|
100
|
|
|
101
|
|
Parameters
|
102
|
|
----------
|
103
|
|
infile : str
|
104
|
|
filename of the SDF input molecules
|
105
|
|
method : str
|
106
|
|
name of QM method
|
107
|
|
basisset : str
|
108
|
|
name of QM basis set
|
109
|
|
calctype : str
|
110
|
|
'opt' for geometry optimizations,
|
111
|
|
'spe' for single point energy calculations,
|
112
|
|
'hess' for Hessian calculation
|
113
|
|
default in pipeline is 'opt'
|
114
|
|
mem : str
|
115
|
|
allotted memory for each Psi4 calculation, default in pipeline is 5 Gb
|
116
|
|
|
117
|
|
|
118
|
|
"""
|
119
|
60
|
curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
|
120
|
|
infile)
|
121
|
|
|
122
|
|
# check that specified calctype is valid
|
123
|
60
|
if calctype not in {'opt', 'spe', 'hess'}:
|
124
|
60
|
raise ValueError("Specify a valid calculation type.")
|
125
|
|
|
126
|
60
|
print("\nCreating Psi4 input files for %s..." % prefix)
|
127
|
60
|
confs_to_psi.confs_to_psi(checked_infile, method, basisset, calctype, mem)
|
128
|
|
|
129
|
|
|
130
|
60
|
def process_results(infile, calctype='opt', suffix=[], psiout='output.dat', timeout='timer.dat'):
|
131
|
|
"""
|
132
|
|
Process Psi4 output files and filter conformers.
|
133
|
|
|
134
|
|
Parameters
|
135
|
|
----------
|
136
|
|
infile : str
|
137
|
|
filename of the SDF input molecules
|
138
|
|
suffix : list
|
139
|
|
suffix of filename if not following numbering convention of pipeline
|
140
|
|
('200,'210','220','221','222'). this list should contain one string
|
141
|
|
for appending to filename with extracted QM results. if calctype is
|
142
|
|
'opt', the list should also contain a second string for extracted
|
143
|
|
and filtered QM results. Ex. ['qm','qmfilt']
|
144
|
|
|
145
|
|
"""
|
146
|
60
|
curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
|
147
|
|
infile)
|
148
|
|
|
149
|
|
# default of pipeline goes '200' --> '210'/'220' --> '221/'222'
|
150
|
60
|
if len(suffix) > 0:
|
151
|
0
|
out_results = os.path.join(
|
152
|
|
curr_dir, "{}-{}.sdf".format(prefix, suffix[0]))
|
153
|
0
|
if len(suffix) == 2:
|
154
|
0
|
out_filter = os.path.join(
|
155
|
|
curr_dir, "{}-{}.sdf".format(prefix, suffix[1]))
|
156
|
|
else:
|
157
|
60
|
if '-200.sdf' in no_path_infile:
|
158
|
60
|
out_results = os.path.join(curr_dir, prefix + '-210.sdf')
|
159
|
60
|
out_filter = os.path.join(curr_dir, prefix + '-220.sdf')
|
160
|
0
|
elif '-220.sdf' in no_path_infile:
|
161
|
0
|
out_results = os.path.join(curr_dir, prefix + '-221.sdf')
|
162
|
0
|
out_filter = os.path.join(curr_dir, prefix + '-222.sdf')
|
163
|
|
else:
|
164
|
0
|
raise ValueError(
|
165
|
|
"ERROR: Input file does not have usual 200-series "
|
166
|
|
"suffixes (see README).\nPlease specify suffix(es) in a "
|
167
|
|
"list in accordance with documentation.")
|
168
|
|
|
169
|
|
# get psi4 results
|
170
|
60
|
print("Getting Psi4 results for %s ..." % (checked_infile))
|
171
|
60
|
method, basisset = get_psi_results.get_psi_results(
|
172
|
|
checked_infile, out_results, calctype=calctype, psiout=psiout, timeout=timeout)
|
173
|
|
|
174
|
|
# only filter structures after opts; spe/hess should not change geoms
|
175
|
60
|
if calctype == 'opt' and None not in [method, basisset]:
|
176
|
60
|
filter_results(out_results, out_filter, method, basisset)
|
177
|
|
|
178
|
60
|
def filter_results(infile, outfile, method, basisset):
|
179
|
|
# may call this function directly to filter and not extract data
|
180
|
|
|
181
|
60
|
tag = "QM Psi4 Final Opt. Energy (Har) %s/%s" % (method, basisset)
|
182
|
60
|
print("Filtering Psi4 results for %s ..." % (outfile))
|
183
|
60
|
filter_confs.filter_confs(infile, tag, outfile)
|
184
|
|
|