1
#!/usr/bin/env python
2 76
"""
3
pipeline.py
4

5
Main interface to Quanformer pipeline.
6

7
Version:    Mar 28 2019
8
By:         Victoria T. Lim
9

10
"""
11

12 76
import os
13

14 76
import quanformer.initialize_confs as initialize_confs
15 76
import quanformer.filter_confs     as filter_confs
16 76
import quanformer.confs_to_psi     as confs_to_psi
17 76
import quanformer.get_psi_results  as get_psi_results
18

19

20 76
def name_manager(infile):
21
    """
22
    File-checking and parsing for internal use.
23

24
    Parameters
25
    ----------
26
    infile : str
27
        Ex. "/path/to/file.sdf"
28

29
    Returns
30
    -------
31
    curr_dir : str
32
        Ex. "/path/to"
33
    checked_infile : str
34
        Ex. "/path/to/file.sdf"
35
    prefix : str
36
        Ex. "file"
37
    ext : str
38
        Ex. ".sdf"
39
    no_path_infile : str
40
        Ex. "file.sdf"
41

42
    """
43 76
    curr_dir = os.getcwd()
44

45 76
    if not os.path.exists(infile):
46 76
        raise FileNotFoundError("No such file: {}".format(infile))
47

48
    # if infile does not contain full path, then its path is curr dir
49 76
    checked_infile = os.path.abspath(infile)
50

51
    # get base name without suffix and without extension
52 76
    inpath, no_path_infile = os.path.split(checked_infile)
53

54
    # get extension of .sdf, .smi, etc.
55 76
    all_but_ext, ext = os.path.splitext(checked_infile)
56

57
    # replace - with # and split by # to get basename without suffix/extension
58 76
    prefix = os.path.basename(all_but_ext).replace('-', '#').split('#')[0]
59

60 76
    return curr_dir, checked_infile, prefix, ext, no_path_infile
61

62

63 76
def setup_conformers(infile):
64
    """
65
    Generate and filter conformers for input list of SMILES strings.
66
    Output is saved in SDF file.
67

68
    Parameters
69
    ----------
70
    infile : str
71
        filename of the SMILES input data
72

73
    """
74 76
    curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
75
        infile)
76

77
    # append value of suffix to filename after MM opt/filtering
78 76
    suffix = '200'
79

80 76
    if ext != '.smi':
81 76
        raise ValueError("Input should be a SMILES file with extension of .smi")
82 76
    print("\nGenerating and filtering conformers for {}".format(infile))
83

84
    # generate conformers and MM opt
85 76
    initialize_confs.initialize_confs(checked_infile)
86

87
    # set filenames; prefix is same filename (w/o extension) of input file
88 76
    pre_filt = os.path.join(curr_dir, prefix + '.sdf')
89 76
    post_filt = os.path.join(curr_dir, "{}-{}.sdf".format(
90
        prefix, suffix))
91

92
    # filter conformers
93 76
    filter_confs.filter_confs(pre_filt, "MM Szybki SD Energy",
94
                              post_filt)
95

96

97 76
def setup_calculations(infile, method, basisset, calctype='opt', mem='5.0 Gb'):
98
    """
99
    Write input files for Psi4 calculations.
100

101
    Parameters
102
    ----------
103
    infile : str
104
        filename of the SDF input molecules
105
    method : str
106
        name of QM method
107
    basisset : str
108
        name of QM basis set
109
    calctype : str
110
        'opt' for geometry optimizations,
111
        'spe' for single point energy calculations,
112
        'hess' for Hessian calculation
113
        default in pipeline is 'opt'
114
    mem : str
115
        allotted memory for each Psi4 calculation, default in pipeline is 5 Gb
116

117

118
    """
119 76
    curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
120
        infile)
121

122
    # check that specified calctype is valid
123 76
    if calctype not in {'opt', 'spe', 'hess'}:
124 76
        raise ValueError("Specify a valid calculation type.")
125

126 76
    print("\nCreating Psi4 input files for %s..." % prefix)
127 76
    confs_to_psi.confs_to_psi(checked_infile, method, basisset, calctype, mem)
128

129

130 76
def process_results(infile, calctype='opt', suffix=[], psiout='output.dat', timeout='timer.dat'):
131
    """
132
    Process Psi4 output files and filter conformers.
133

134
    Parameters
135
    ----------
136
    infile : str
137
        filename of the SDF input molecules
138
    suffix : list
139
        suffix of filename if not following numbering convention of pipeline
140
        ('200,'210','220','221','222'). this list should contain one string
141
        for appending to filename with extracted QM results. if calctype is
142
        'opt', the list should also contain a second string for extracted
143
        and filtered QM results. Ex. ['qm','qmfilt']
144

145
    """
146 76
    curr_dir, checked_infile, prefix, ext, no_path_infile = name_manager(
147
        infile)
148

149
    # default of pipeline goes '200' --> '210'/'220' --> '221/'222'
150 76
    if len(suffix) > 0:
151 0
        out_results = os.path.join(
152
            curr_dir, "{}-{}.sdf".format(prefix, suffix[0]))
153 0
        if len(suffix) == 2:
154 0
            out_filter = os.path.join(
155
                curr_dir, "{}-{}.sdf".format(prefix, suffix[1]))
156
    else:
157 76
        if '-200.sdf' in no_path_infile:
158 76
            out_results = os.path.join(curr_dir, prefix + '-210.sdf')
159 76
            out_filter = os.path.join(curr_dir, prefix + '-220.sdf')
160 0
        elif '-220.sdf' in no_path_infile:
161 0
            out_results = os.path.join(curr_dir, prefix + '-221.sdf')
162 0
            out_filter = os.path.join(curr_dir, prefix + '-222.sdf')
163
        else:
164 0
            raise ValueError(
165
                "ERROR: Input file does not have usual 200-series "
166
                "suffixes (see README).\nPlease specify suffix(es) in a "
167
                "list in accordance with documentation.")
168

169
    # get psi4 results
170 76
    print("Getting Psi4 results for %s ..." % (checked_infile))
171 76
    method, basisset = get_psi_results.get_psi_results(
172
        checked_infile, out_results, calctype=calctype, psiout=psiout, timeout=timeout)
173

174
    # only filter structures after opts; spe/hess should not change geoms
175 76
    if calctype == 'opt' and None not in [method, basisset]:
176 76
        filter_results(out_results, out_filter, method, basisset)
177

178 76
def filter_results(infile, outfile, method, basisset):
179
    # may call this function directly to filter and not extract data
180

181 76
    tag = "QM Psi4 Final Opt. Energy (Har) %s/%s" % (method, basisset)
182 76
    print("Filtering Psi4 results for %s ..." % (outfile))
183 76
    filter_confs.filter_confs(infile, tag, outfile)
184

Read our documentation on viewing source code .

Loading