1
#!/usr/bin/env python
2 100
"""
3
reader.py
4

5
Functions to parse or read in files or OEMols.
6

7
Version:    Apr 2 2019
8
By:         Victoria T. Lim
9

10
"""
11

12 100
import openeye.oechem as oechem
13 100
import collections  # ordered dictionary
14 100
import copy
15

16

17 100
def read_mols(infile, mol_slice=None):
18
    """
19
    Open a molecule file and return molecules and conformers as OEMols.
20

21
    Parameters
22
    ----------
23
    infile : string
24
        name of input file with molecules
25
    mol_slice : list
26
        list of indices from which to slice mols generator for read_mols
27
        [start, stop, step]
28

29
    Returns
30
    -------
31
    mols : OEMols
32

33
    """
34 100
    ifs = oechem.oemolistream()
35 100
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
36 100
    if not ifs.open(infile):
37 100
        raise FileNotFoundError(f"Unable to open {infile} for reading")
38 100
    mols = ifs.GetOEMols()
39

40 100
    if mol_slice is not None:
41 100
        if len(mol_slice) != 3 or mol_slice[0] >= mol_slice[1] or mol_slice[2] <= 0:
42 100
            raise ValueError("Check input to mol_slice. Should have len 3, "
43
                "start value < stop value, step >= 1.")
44
        # TODO more efficient. can't itertools bc lost mol info (name, SD) after next()
45
        # adding copy/deepcopy doesnt work on generator objects
46
        # also doesn't work to convert generator to list then slice list
47
        #mols = itertools.islice(mols, mol_slice[0], mol_slice[1], mol_slice[2])
48
        #mlist = mlist[mol_slice[0]:mol_slice[1]:mol_slice[2]]
49

50 100
        def incrementer(count, mols, step):
51 100
            if step == 1:
52 100
                count += 1
53 100
                return count
54
            # use step-1 because for loop already increments once
55 100
            for j in range(step-1):
56 100
                count += 1
57 100
                next(mols)
58 100
            return count
59

60 100
        mlist = []
61 100
        count = 0
62 100
        for i, m in enumerate(mols):
63

64 100
            if count >= mol_slice[1]:
65 100
                return mlist
66 100
            elif count < mol_slice[0]:
67 100
                count += 1
68 100
                continue
69
            else:
70
                # important to append copy else still linked to orig generator
71 100
                mlist.append(copy.copy(m))
72 100
                try:
73 100
                    count = incrementer(count, mols, mol_slice[2])
74 0
                except StopIteration:
75 0
                    return mlist
76

77 100
        return mlist
78

79 100
    return mols
80

81

82 100
def read_text_input(infile, reffile=None, ref_index=None):
83
    """
84
    Read input file into an ordered dictionary.
85
    http://stackoverflow.com/questions/25924244/creating-2d-dictionary-in-python
86

87
    Parameters
88
    ----------
89
    TODO
90

91
    """
92 100
    linecount = 0
93 100
    wholedict = collections.OrderedDict()
94 100
    with open(infile) as f:
95 100
        for line in f:
96 100
            if line.startswith('#'):
97 100
                continue
98 100
            dataline = [x.strip() for x in line.split(',')]
99 100
            wholedict[linecount] = {
100
                'theory': dataline[0],
101
                'fname': dataline[1],
102
                'tagkey': dataline[2],
103
                'label': dataline[3],
104
            }
105 100
            linecount += 1
106

107 100
    return wholedict
108

109 100
def separated_theory(theory):
110 100
    qmethod = theory.split('/')[0].strip()
111 100
    qbasis = theory.split('/')[1].strip()
112 100
    return qmethod, qbasis
113

Read our documentation on viewing source code .

Loading