1 12
module ExcelReaders
2

3
using PyCall, DataValues, Dates
4

5
export openxl, readxl, readxlsheet, ExcelErrorCell, ExcelFile, readxlnames, readxlrange
6

7
const xlrd  = PyNULL()
8

9
include("package_documentation.jl")
10

11
function __init__()
12 15
    copy!(xlrd, pyimport_conda("xlrd", "xlrd"))
13
end
14

15
"""
16
    ExcelFile
17

18
A handle to an open Excel file.
19

20
You can create an instance of an ``ExcelFile`` by calling ``openxl``.
21
"""
22
mutable struct ExcelFile
23 15
    workbook::PyObject
24
    filename::AbstractString
25
end
26

27
"""
28
    ExcelErrorCell
29

30
An Excel cell that has an Excel error.
31

32
You cannot create ``ExcelErrorCell`` objects, they are returned if a cell in an
33
Excel file has an Excel error.
34
"""
35
mutable struct ExcelErrorCell
36 15
    errorcode::Int
37
end
38

39
function Base.show(io::IO, o::ExcelFile)
40 15
    print(io, "ExcelFile <$(o.filename)>")
41
end
42

43
function Base.show(io::IO, o::ExcelErrorCell)
44 15
    print(io, xlrd.error_text_from_code[o.errorcode])
45
end
46

47
"""
48
    openxl(filename)
49

50
Open the Excel file ``filename`` and return an ``ExcelFile`` handle.
51

52
The returned ``ExcelFile`` handle can later be passed as the first argument to
53
``readxl`` or ``readxslsheet`` to read from that file. If you will call either
54
of those functions more than once, performance will be better if you open the
55
file only once with ``openxl``.
56

57
# Example
58
````julia
59
f = openxl("filename.xlsx")
60
data = readxl(f, "Sheet1!A1:C4")
61
````
62
"""
63
function openxl(filename::AbstractString)
64 15
    wb = xlrd.open_workbook(filename)
65 15
    return ExcelFile(wb, basename(filename))
66
end
67

68
function readxlsheet(filename::AbstractString, sheetindex::Int; args...)
69 15
    file = openxl(filename)
70 15
    return readxlsheet(file, sheetindex; args...)
71
end
72

73
function readxlsheet(file::ExcelFile, sheetindex::Int; args...)
74 15
    sheetnames = file.workbook.sheet_names()
75 15
    return readxlsheet(file, sheetnames[sheetindex]; args...)
76
end
77

78
function readxlsheet(filename::AbstractString, sheetname::AbstractString; args...)
79 15
    file = openxl(filename)
80 15
    return readxlsheet(file, sheetname; args...)
81
end
82

83
function readxlsheet(file::ExcelFile, sheetname::AbstractString; args...)
84 15
    sheet = file.workbook.sheet_by_name(sheetname)
85 15
    startrow, startcol, endrow, endcol = convert_args_to_row_col(sheet; args...)
86

87 15
    data = readxl_internal(file, sheetname, startrow, startcol, endrow, endcol)
88

89 15
    return data
90
end
91

92
# Function converts "relative" range like skip rows/cols and size of range to "absolute" from row/col to row/col
93
function convert_args_to_row_col(sheet;skipstartrows::Union{Int,Symbol} = :blanks, skipstartcols::Union{Int,Symbol} = :blanks, nrows::Union{Int,Symbol} = :all, ncols::Union{Int,Symbol} = :all)
94 15
    isa(skipstartrows, Symbol) && skipstartrows != :blanks && error("Only :blank or an integer is a valid argument for skipstartrows")
95 15
    isa(skipstartrows, Int) && skipstartrows < 0 && error("Can't skip a negative number of rows")
96 15
    isa(skipstartcols, Symbol) && skipstartcols != :blanks && error("Only :blank or an integer is a valid argument for skipstartcols")
97 15
    isa(skipstartcols, Int) && skipstartcols < 0 && error("Can't skip a negative number of columns")
98 15
    isa(nrows, Symbol) && nrows != :all && error("Only :all or an integer is a valid argument for nrows")
99 15
    isa(nrows, Int) && nrows < 0 && error("nrows should be :all or positive")
100 15
    isa(ncols, Symbol) && ncols != :all && error("Only :all or an integer is a valid argument for ncols")
101 15
    isa(ncols, Int) && ncols < 0 && error("ncols should be :all or positive")
102 12
    sheet_rows = sheet.nrows
103 12
    sheet_cols = sheet.ncols
104

105 12
    cell_value = sheet.cell_value
106

107 15
    if skipstartrows == :blanks
108
        startrow = -1
109 15
        for cur_row in 1:sheet_rows, cur_col in 1:sheet_cols
110 15
            cellval = cell_value(cur_row - 1, cur_col - 1)
111 15
            if cellval != ""
112
                startrow = cur_row
113 15
                break
114
            end
115
        end
116 15
        if startrow == -1
117 15
            error("Sheet has no data")
118
        else
119 15
            skipstartrows = startrow - 1
120
        end
121
    else
122 12
        startrow = 1 + skipstartrows
123
    end
124

125 15
    if skipstartcols == :blanks
126
        startcol = -1
127 15
        for cur_col in 1:sheet_cols, cur_row in 1:sheet_rows
128 15
            cellval = cell_value(cur_row - 1, cur_col - 1)
129 15
            if cellval != ""
130
                startcol = cur_col
131 15
                break
132
            end
133
        end
134 15
        if startcol == -1
135 0
            error("Sheet has no data")
136
        else
137 15
            skipstartcols = startcol - 1
138
        end
139
    else
140 12
        startcol = 1 + skipstartcols
141
    end
142

143 15
    if nrows == :all
144 15
        endrow = sheet_rows
145
    else
146 12
        endrow = nrows + skipstartrows
147
    end
148

149 15
    if ncols == :all
150 15
        endcol = sheet_cols
151
    else
152 12
        endcol = ncols + skipstartcols
153
    end
154

155 15
    return startrow, startcol, endrow, endcol
156
end
157

158
function colnum(col::AbstractString)
159 15
    cl = uppercase(col)
160
    r = 0
161 15
    for c in cl
162 15
        r = (r * 26) + (c - 'A' + 1)
163
    end
164 15
    return r
165
end
166

167
function convert_ref_to_sheet_row_col(range::AbstractString)
168 15
    r = r"('?[^']+'?|[^!]+)!([A-Za-z]*)(\d*)(:([A-Za-z]*)(\d*))?"
169 15
    m = match(r, range)
170 15
    m == nothing && error("Invalid Excel range specified.")
171 15
    sheetname = String(m.captures[1])
172 15
    startrow = parse(Int, m.captures[3])
173 15
    startcol = colnum(m.captures[2])
174 15
    if m.captures[4] == nothing
175
        endrow = startrow
176 0
        endcol = startcol
177
    else
178 15
        endrow = parse(Int, m.captures[6])
179 15
        endcol = colnum(m.captures[5])
180
    end
181 15
    if (startrow > endrow ) || (startcol > endcol)
182 15
        error("Please provide rectangular region from top left to bottom right corner")
183
    end
184 15
    return sheetname, startrow, startcol, endrow, endcol
185
end
186

187
function readxl(filename::AbstractString, range::AbstractString)
188 15
    excelfile = openxl(filename)
189

190 15
    readxl(excelfile, range)
191
end
192

193
function readxl(file::ExcelFile, range::AbstractString)
194 15
    sheetname, startrow, startcol, endrow, endcol = convert_ref_to_sheet_row_col(range)
195 15
    readxl_internal(file, sheetname, startrow, startcol, endrow, endcol)
196
end
197

198
function get_cell_value(ws, row, col, wb)
199 15
    cellval = ws.cell_value(row - 1, col - 1)
200 15
    if cellval == ""
201 15
        return NA
202
    else
203 15
        celltype = ws.cell_type(row - 1, col - 1)
204 15
        if celltype == xlrd.XL_CELL_TEXT
205 15
            return convert(String, cellval)
206 15
        elseif celltype == xlrd.XL_CELL_NUMBER
207 15
            return convert(Float64, cellval)
208 15
        elseif celltype == xlrd.XL_CELL_DATE
209 15
            date_year, date_month, date_day, date_hour, date_minute, date_sec = xlrd.xldate_as_tuple(cellval, wb.datemode)
210 15
            if date_month == 0
211 15
                return Time(date_hour, date_minute, date_sec)
212
            else
213 15
                return DateTime(date_year, date_month, date_day, date_hour, date_minute, date_sec)
214
            end
215 15
        elseif celltype == xlrd.XL_CELL_BOOLEAN
216 15
            return convert(Bool, cellval)
217 15
        elseif celltype == xlrd.XL_CELL_ERROR
218 15
            return ExcelErrorCell(cellval)
219
        else
220 0
            error("Unknown cell type")
221
        end
222
    end
223
end
224

225
function readxl_internal(file::ExcelFile, sheetname::AbstractString, startrow::Integer, startcol::Integer, endrow::Integer, endcol::Integer)
226 15
    wb = file.workbook
227 15
    ws = wb.sheet_by_name(sheetname)
228

229 15
    if startrow == endrow && startcol == endcol
230 0
        return get_cell_value(ws, startrow, startcol, wb)
231
    else
232

233 12
        data = Array{Any}(undef, endrow - startrow + 1, endcol - startcol + 1)
234

235 15
        for row in startrow:endrow
236 15
            for col in startcol:endcol
237 15
                data[row - startrow + 1, col - startcol + 1] = get_cell_value(ws, row, col, wb)
238
            end
239
        end
240

241 15
        return data
242
    end
243
end
244

245
function readxlnames(f::ExcelFile)
246 0
    return [lowercase(i.name) for i in f.workbook.name_obj_list if i.hidden == 0]
247
end
248

249
function readxlrange(f::ExcelFile, range::AbstractString)
250 0
    name = f.workbook.name_map[lowercase(range)]
251 0
    if length(name) != 1
252 0
        error("More than one reference per name, this case is not yet handled by ExcelReaders.")
253
    end
254

255 0
    formula_text = name[1].formula_text
256 0
    formula_text = replace(formula_text, "\$" => "")
257 0
    formula_text = replace(formula_text, "'" => "")
258

259 0
    return readxl(f, formula_text)
260
end
261

262
end # module

Read our documentation on viewing source code .

Loading