1 25
module ExcelReaders
2

3
using PyCall, DataValues, Dates
4

5
export openxl, readxl, readxlsheet, ExcelErrorCell, ExcelFile, readxlnames, readxlrange
6

7
const xlrd  = PyNULL()
8

9
include("package_documentation.jl")
10

11
function __init__()
12 30
    copy!(xlrd, pyimport_conda("xlrd", "xlrd"))
13
end
14

15
"""
16
    ExcelFile
17

18
A handle to an open Excel file.
19

20
You can create an instance of an ``ExcelFile`` by calling ``openxl``.
21
"""
22
mutable struct ExcelFile
23 30
    workbook::PyObject
24
    filename::AbstractString
25
end
26

27
"""
28
    ExcelErrorCell
29

30
An Excel cell that has an Excel error.
31

32
You cannot create ``ExcelErrorCell`` objects, they are returned if a cell in an
33
Excel file has an Excel error.
34
"""
35
mutable struct ExcelErrorCell
36 30
    errorcode::Int
37
end
38

39
function Base.show(io::IO, o::ExcelFile)
40 30
    print(io, "ExcelFile <$(o.filename)>")
41
end
42

43
function Base.show(io::IO, o::ExcelErrorCell)
44 30
    print(io, xlrd.error_text_from_code[o.errorcode])
45
end
46

47
"""
48
    openxl(filename)
49

50
Open the Excel file ``filename`` and return an ``ExcelFile`` handle.
51

52
The returned ``ExcelFile`` handle can later be passed as the first argument to
53
``readxl`` or ``readxslsheet`` to read from that file. If you will call either
54
of those functions more than once, performance will be better if you open the
55
file only once with ``openxl``.
56

57
# Example
58
````julia
59
f = openxl("filename.xlsx")
60
data = readxl(f, "Sheet1!A1:C4")
61
````
62
"""
63
function openxl(filename::AbstractString)
64 30
    wb = xlrd.open_workbook(filename)
65 30
    return ExcelFile(wb, basename(filename))
66
end
67

68
function readxlsheet(filename::AbstractString, sheetindex::Int; args...)
69 30
    file = openxl(filename)
70 30
    return readxlsheet(file, sheetindex; args...)
71
end
72

73
function readxlsheet(file::ExcelFile, sheetindex::Int; args...)
74 30
    sheetnames = file.workbook.sheet_names()
75 30
    return readxlsheet(file, sheetnames[sheetindex]; args...)
76
end
77

78
function readxlsheet(filename::AbstractString, sheetname::AbstractString; args...)
79 30
    file = openxl(filename)
80 30
    return readxlsheet(file, sheetname; args...)
81
end
82

83
function readxlsheet(file::ExcelFile, sheetname::AbstractString; args...)
84 30
    sheet = file.workbook.sheet_by_name(sheetname)
85 30
    startrow, startcol, endrow, endcol = convert_args_to_row_col(sheet; args...)
86

87 30
    data = readxl_internal(file, sheetname, startrow, startcol, endrow, endcol)
88

89 30
    return data
90
end
91

92
# Function converts "relative" range like skip rows/cols and size of range to "absolute" from row/col to row/col
93
function convert_args_to_row_col(sheet;skipstartrows::Union{Int,Symbol} = :blanks, skipstartcols::Union{Int,Symbol} = :blanks, nrows::Union{Int,Symbol} = :all, ncols::Union{Int,Symbol} = :all)
94 30
    isa(skipstartrows, Symbol) && skipstartrows != :blanks && error("Only :blank or an integer is a valid argument for skipstartrows")
95 30
    isa(skipstartrows, Int) && skipstartrows < 0 && error("Can't skip a negative number of rows")
96 30
    isa(skipstartcols, Symbol) && skipstartcols != :blanks && error("Only :blank or an integer is a valid argument for skipstartcols")
97 30
    isa(skipstartcols, Int) && skipstartcols < 0 && error("Can't skip a negative number of columns")
98 30
    isa(nrows, Symbol) && nrows != :all && error("Only :all or an integer is a valid argument for nrows")
99 30
    isa(nrows, Int) && nrows < 0 && error("nrows should be :all or positive")
100 30
    isa(ncols, Symbol) && ncols != :all && error("Only :all or an integer is a valid argument for ncols")
101 30
    isa(ncols, Int) && ncols < 0 && error("ncols should be :all or positive")
102 25
    sheet_rows = sheet.nrows
103 25
    sheet_cols = sheet.ncols
104

105 25
    cell_value = sheet.cell_value
106

107 30
    if skipstartrows == :blanks
108 5
        startrow = -1
109 30
        for cur_row in 1:sheet_rows, cur_col in 1:sheet_cols
110 30
            cellval = cell_value(cur_row - 1, cur_col - 1)
111 30
            if cellval != ""
112 5
                startrow = cur_row
113 30
                break
114
            end
115
        end
116 30
        if startrow == -1
117 30
            error("Sheet has no data")
118
        else
119 30
            skipstartrows = startrow - 1
120
        end
121
    else
122 25
        startrow = 1 + skipstartrows
123
    end
124

125 30
    if skipstartcols == :blanks
126 5
        startcol = -1
127 30
        for cur_col in 1:sheet_cols, cur_row in 1:sheet_rows
128 30
            cellval = cell_value(cur_row - 1, cur_col - 1)
129 30
            if cellval != ""
130 5
                startcol = cur_col
131 30
                break
132
            end
133
        end
134 30
        if startcol == -1
135 0
            error("Sheet has no data")
136
        else
137 30
            skipstartcols = startcol - 1
138
        end
139
    else
140 25
        startcol = 1 + skipstartcols
141
    end
142

143 30
    if nrows == :all
144 30
        endrow = sheet_rows
145
    else
146 25
        endrow = nrows + skipstartrows
147
    end
148

149 30
    if ncols == :all
150 30
        endcol = sheet_cols
151
    else
152 25
        endcol = ncols + skipstartcols
153
    end
154

155 30
    return startrow, startcol, endrow, endcol
156
end
157

158
function colnum(col::AbstractString)
159 30
    cl = uppercase(col)
160 5
    r = 0
161 30
    for c in cl
162 30
        r = (r * 26) + (c - 'A' + 1)
163
    end
164 30
    return r
165
end
166

167
function convert_ref_to_sheet_row_col(range::AbstractString)
168 30
    r = r"('?[^']+'?|[^!]+)!([A-Za-z]*)(\d*)(:([A-Za-z]*)(\d*))?"
169 30
    m = match(r, range)
170 30
    m == nothing && error("Invalid Excel range specified.")
171 30
    sheetname = String(m.captures[1])
172 30
    startrow = parse(Int, m.captures[3])
173 30
    startcol = colnum(m.captures[2])
174 30
    if m.captures[4] == nothing
175 0
        endrow = startrow
176 0
        endcol = startcol
177
    else
178 30
        endrow = parse(Int, m.captures[6])
179 30
        endcol = colnum(m.captures[5])
180
    end
181 30
    if (startrow > endrow ) || (startcol > endcol)
182 30
        error("Please provide rectangular region from top left to bottom right corner")
183
    end
184 30
    return sheetname, startrow, startcol, endrow, endcol
185
end
186

187
function readxl(filename::AbstractString, range::AbstractString)
188 30
    excelfile = openxl(filename)
189

190 30
    readxl(excelfile, range)
191
end
192

193
function readxl(file::ExcelFile, range::AbstractString)
194 30
    sheetname, startrow, startcol, endrow, endcol = convert_ref_to_sheet_row_col(range)
195 30
    readxl_internal(file, sheetname, startrow, startcol, endrow, endcol)
196
end
197

198
function get_cell_value(ws, row, col, wb)
199 30
    cellval = ws.cell_value(row - 1, col - 1)
200 30
    if cellval == ""
201 30
        return NA
202
    else
203 30
        celltype = ws.cell_type(row - 1, col - 1)
204 30
        if celltype == xlrd.XL_CELL_TEXT
205 30
            return convert(String, cellval)
206 30
        elseif celltype == xlrd.XL_CELL_NUMBER
207 30
            return convert(Float64, cellval)
208 30
        elseif celltype == xlrd.XL_CELL_DATE
209 30
            date_year, date_month, date_day, date_hour, date_minute, date_sec = xlrd.xldate_as_tuple(cellval, wb.datemode)
210 30
            if date_month == 0
211 30
                return Time(date_hour, date_minute, date_sec)
212
            else
213 30
                return DateTime(date_year, date_month, date_day, date_hour, date_minute, date_sec)
214
            end
215 30
        elseif celltype == xlrd.XL_CELL_BOOLEAN
216 30
            return convert(Bool, cellval)
217 30
        elseif celltype == xlrd.XL_CELL_ERROR
218 30
            return ExcelErrorCell(cellval)
219
        else
220 0
            error("Unknown cell type")
221
        end
222
    end
223
end
224

225
function readxl_internal(file::ExcelFile, sheetname::AbstractString, startrow::Integer, startcol::Integer, endrow::Integer, endcol::Integer)
226 30
    wb = file.workbook
227 30
    ws = wb.sheet_by_name(sheetname)
228

229 30
    if startrow == endrow && startcol == endcol
230 0
        return get_cell_value(ws, startrow, startcol, wb)
231
    else
232

233 25
        data = Array{Any}(undef, endrow - startrow + 1, endcol - startcol + 1)
234

235 30
        for row in startrow:endrow
236 30
            for col in startcol:endcol
237 30
                data[row - startrow + 1, col - startcol + 1] = get_cell_value(ws, row, col, wb)
238
            end
239
        end
240

241 30
        return data
242
    end
243
end
244

245 0
function readxlnames(f::ExcelFile)
246 0
    return [lowercase(i.name) for i in f.workbook.name_obj_list if i.hidden == 0]
247
end
248

249 0
function readxlrange(f::ExcelFile, range::AbstractString)
250 0
    name = f.workbook.name_map[lowercase(range)]
251 0
    if length(name) != 1
252 0
        error("More than one reference per name, this case is not yet handled by ExcelReaders.")
253
    end
254

255 0
    formula_text = name[1].formula_text
256 0
    formula_text = replace(formula_text, "\$" => "")
257 0
    formula_text = replace(formula_text, "'" => "")
258

259 0
    return readxl(f, formula_text)
260
end
261

262
end # module

Read our documentation on viewing source code .

Loading