1
#!/usr/bin/env python
2 60
"""
3
reader.py
4

5
Functions to parse or read in files or OEMols.
6

7
Version:    Apr 2 2019
8
By:         Victoria T. Lim
9

10
"""
11

12 60
import openeye.oechem as oechem
13 60
import collections  # ordered dictionary
14 60
import copy
15

16

17 60
def read_mols(infile, mol_slice=None):
18
    """
19
    Open a molecule file and return molecules and conformers as OEMols.
20

21
    Parameters
22
    ----------
23
    infile : string
24
        name of input file with molecules
25
    mol_slice : list
26
        list of indices from which to slice mols generator for read_mols
27
        [start, stop, step]
28

29
    Returns
30
    -------
31
    mols : OEMols
32

33
    """
34 60
    ifs = oechem.oemolistream()
35 60
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
36 60
    if not ifs.open(infile):
37 60
        raise FileNotFoundError(f"Unable to open {infile} for reading")
38 60
    mols = ifs.GetOEMols()
39

40 60
    if mol_slice is not None:
41 60
        if len(mol_slice) != 3 or mol_slice[0] >= mol_slice[1] or mol_slice[2] <= 0:
42 60
            raise ValueError("Check input to mol_slice. Should have len 3, "
43
                "start value < stop value, step >= 1.")
44
        # TODO more efficient. can't itertools bc lost mol info (name, SD) after next()
45
        # adding copy/deepcopy doesnt work on generator objects
46
        # also doesn't work to convert generator to list then slice list
47
        #mols = itertools.islice(mols, mol_slice[0], mol_slice[1], mol_slice[2])
48
        #mlist = mlist[mol_slice[0]:mol_slice[1]:mol_slice[2]]
49

50 60
        def incrementer(count, mols, step):
51 60
            if step == 1:
52 60
                count += 1
53 60
                return count
54
            # use step-1 because for loop already increments once
55 60
            for j in range(step-1):
56 60
                count += 1
57 60
                next(mols)
58 60
            return count
59

60 60
        mlist = []
61 60
        count = 0
62 60
        for i, m in enumerate(mols):
63

64 60
            if count >= mol_slice[1]:
65 60
                return mlist
66 60
            elif count < mol_slice[0]:
67 60
                count += 1
68 60
                continue
69
            else:
70
                # important to append copy else still linked to orig generator
71 60
                mlist.append(copy.copy(m))
72 60
                try:
73 60
                    count = incrementer(count, mols, mol_slice[2])
74 0
                except StopIteration:
75 0
                    return mlist
76

77 60
        return mlist
78

79 60
    return mols
80

81

82 60
def read_text_input(infile, reffile=None, ref_index=None):
83
    """
84
    Read input file into an ordered dictionary.
85
    http://stackoverflow.com/questions/25924244/creating-2d-dictionary-in-python
86

87
    Parameters
88
    ----------
89
    TODO
90

91
    """
92 60
    linecount = 0
93 60
    wholedict = collections.OrderedDict()
94 60
    with open(infile) as f:
95 60
        for line in f:
96 60
            if line.startswith('#'):
97 60
                continue
98 60
            dataline = [x.strip() for x in line.split(',')]
99 60
            wholedict[linecount] = {
100
                'theory': dataline[0],
101
                'fname': dataline[1],
102
                'tagkey': dataline[2],
103
                'label': dataline[3],
104
            }
105 60
            linecount += 1
106

107 60
    return wholedict
108

109 60
def separated_theory(theory):
110 60
    qmethod = theory.split('/')[0].strip()
111 60
    qbasis = theory.split('/')[1].strip()
112 60
    return qmethod, qbasis
113

Read our documentation on viewing source code .

Loading