1
module DBFTables
2

3
using Printf, Tables, WeakRefStrings
4

5
"Field/column descriptor, part of the Header"
6
struct FieldDescriptor
7 11
    name::Symbol
8
    type::Type
9
    length::UInt8
10
    ndec::UInt8
11
end
12

13
"DBF header, which also holds all field definitions"
14
struct Header
15 11
    version::UInt8
16
    last_update::String
17
    records::UInt32
18
    hsize::UInt16
19
    rsize::UInt16
20
    incomplete::Bool
21
    encrypted::Bool
22
    mdx::Bool
23
    lang_id::UInt8
24
    fields::Vector{FieldDescriptor}
25
    fieldcolumns::Dict{Symbol, Int}
26
end
27

28
"Struct representing the DBF Table"
29
struct Table
30 7
    header::Header
31
    data::Vector{UInt8}  # WeakRefString references this
32
    strings::StringArray{String,2}
33
end
34

35
"Struct representing a single row or record of the DBF Table"
36
struct Row
37 11
    table::Table
38
    row::Int
39
end
40

41
"Convert DBF data type characters to Julia types"
42
function typemap(fld::Char, ndec::UInt8)
43
    # https://www.clicketyclick.dk/databases/xbase/format/data_types.html
44 7
    rt = Nothing
45 11
    if fld == 'C'
46 11
        rt = String
47 11
    elseif fld == 'D'
48 11
        rt = String
49 11
    elseif fld == 'N'
50 11
        if ndec > 0
51 11
            rt = Float64
52
        else
53
            # TODO do we want this?
54 11
            rt = Int
55
        end
56 11
    elseif fld == 'F' || fld == 'O'
57 11
        rt = Float64
58 11
    elseif fld == 'I' || fld == '+'
59 0
        rt = Int
60 11
    elseif fld == 'L'
61 11
        rt = Bool
62
    else
63 0
        throw(ArgumentError("Unknown record type $fld"))
64
    end
65 11
    return rt
66
end
67

68
"Read a field descriptor from the stream, and create a FieldDescriptor struct"
69 3
function read_dbf_field(io::IO)
70 11
    field_name_raw = String(read!(io, Vector{UInt8}(undef, 11)))
71 7
    field_name = Symbol(strip(replace(field_name_raw, '\0' => ' ')))
72 11
    field_type = read(io, Char)
73 7
    skip(io, 4)  # skip
74 7
    field_len = read(io, UInt8)
75 7
    field_dec = read(io, UInt8)
76 7
    skip(io, 14)  # reserved
77 7
    jltype = typemap(field_type, field_dec)
78 11
    return FieldDescriptor(field_name, jltype, field_len, field_dec)
79
end
80

81
"Read a DBF header from a stream"
82 3
function Header(io::IO)
83 11
    ver = read(io, UInt8)
84 7
    date1 = read(io, UInt8)
85 7
    date2 = read(io, UInt8)
86 7
    date3 = read(io, UInt8)
87 11
    last_update = @sprintf("%4d%02d%02d", date1 + 1900, date2, date3)
88 7
    records = read(io, UInt32)
89 7
    hsize = read(io, UInt16)
90 7
    rsize = read(io, UInt16)
91 7
    skip(io, 2)  # reserved
92 11
    incomplete = Bool(read(io, UInt8))
93 11
    encrypted = Bool(read(io, UInt8))
94 7
    skip(io, 12)  # reserved
95 11
    mdx = Bool(read(io, UInt8))
96 7
    lang_id = read(io, UInt8)
97 7
    skip(io, 2)  # reserved
98 7
    fields = FieldDescriptor[]
99

100
    # use Dict for quicker column index lookup
101 11
    fieldcolumns = Dict{Symbol, Int}()
102 7
    col = 1
103 11
    while !eof(io)
104 11
        field = read_dbf_field(io)
105 11
        fieldcolumns[field.name] = col
106 7
        push!(fields, field)
107 7
        col += 1
108

109
        # peek if we are at the end
110 7
        mark(io)
111 7
        trm = read(io, UInt8)
112 11
        if trm == 0xD
113 11
            break
114
        else
115 11
            reset(io)
116
        end
117
    end
118

119 11
    return Header(
120
        ver,
121
        last_update,
122
        records,
123
        hsize,
124
        rsize,
125
        incomplete,
126
        encrypted,
127
        mdx,
128
        lang_id,
129
        fields,
130
        fieldcolumns,
131
    )
132
end
133

134 11
miss(x) = ifelse(x === nothing, missing, x)
135

136
"Concert a DBF entry string to a Julia value"
137 3
function dbf_value(T::Type{Bool}, str::AbstractString)
138 11
    char = first(str)
139 11
    if char in "YyTt"
140 11
        true
141 11
    elseif char in "NnFf"
142 11
        false
143 11
    elseif char == '?'
144 11
        missing
145
    else
146 0
        throw(ArgumentError("Unknown logical $char"))
147
    end
148
end
149

150 11
dbf_value(T::Union{Type{Int},Type{Float64}}, str::AbstractString) =
151
    miss(tryparse(T, str))
152
# String to avoid returning SubString{String}
153 3
function dbf_value(T::Type{String}, str::AbstractString)
154 7
    stripped = rstrip(str)
155 11
    if isempty(stripped)
156
        # return missing rather than ""
157 11
        return missing
158
    else
159 11
        return String(stripped)
160
    end
161
end
162 0
dbf_value(T::Type{Nothing}, str::AbstractString) = missing
163

164
# define get functions using getfield since we overload getproperty
165
"Access the header of a DBF Table"
166 11
getheader(dbf::Table) = getfield(dbf, :header)
167 7
getfields(dbf::Table) = getheader(dbf).fields
168 11
getstrings(dbf::Table) = getfield(dbf, :strings)
169 11
getrow(row::Row) = getfield(row, :row)
170 11
gettable(row::Row) = getfield(row, :table)
171

172 8
Base.length(dbf::Table) = Int(getheader(dbf).records)
173 11
Base.size(dbf::Table) = (length(dbf), length(getfields(dbf)))
174 11
Base.size(dbf::Table, i) = size(dbf)[i]
175

176
"""
177
	DBFTables.Table(source) => DBFTables.Table
178

179
Read a source, a path to a file or an opened stream, to a DBFTables.Table.
180
This type conforms to the Tables interface, so it can be easily converted
181
to other formats. It is possible to iterate through the rows of this object,
182
or to retrieve columns like `dbf.fieldname`.
183
"""
184 3
function Table(io::IO)
185 7
    header = Header(io)
186
    # consider using mmap here for big dbf files
187 3
    data = Vector{UInt8}(undef, header.rsize * header.records)
188 3
    read!(io, data)
189 7
    strings = _create_stringarray(header, data)
190 7
    Table(header, data, strings)
191
end
192

193 3
function Table(path::AbstractString)
194 11
    open(path) do io
195 7
        Table(io)
196
    end
197
end
198

199
"Collect all the offsets and lenghts from the header to create a StringArray"
200 3
function _create_stringarray(header::Header, data::AbstractVector)
201
    # first make the lengths and offsets for a single record
202 11
    lengths_record = UInt32.(getfield.(header.fields, :length))
203 7
    offsets_record = vcat(0, cumsum(lengths_record)[1:end-1]) .+ 1
204

205
    # the lengths are equal for each record
206 11
    lengths = repeat(lengths_record, 1, header.records)
207
    # the offsets accumulate over records with the record size
208 7
    row_offsets = range(0; length = header.records, step = header.rsize)
209 11
    offsets = repeat(offsets_record, 1, header.records)
210 11
    offsets .+= reshape(row_offsets, 1, :)
211

212 11
    StringArray{String,2}(data, offsets, lengths)
213
end
214

215
"Create a NamedTuple representing a single row"
216 3
function Base.NamedTuple(row::Row)
217 11
    dbf = gettable(row)
218 7
    str = getstrings(dbf)
219 7
    fields = getfields(dbf)
220 7
    ncol = length(fields)
221 7
    rowidx = getrow(row)
222 7
    @inbounds record = @view str[:, rowidx]
223 11
    @inbounds prs = (fields[col].name => dbf_value(
224
        fields[col].type,
225
        record[col],
226
    ) for col = 1:ncol)
227 11
    return (; prs...)
228
end
229

230 3
function Base.show(io::IO, row::Row)
231 11
    show(io, NamedTuple(row))
232
end
233

234 3
function Base.show(io::IO, dbf::Table)
235 11
    nr = length(dbf)
236 7
    nc = length(getfields(dbf))
237 7
    println(io, "DBFTables.Table with $nr rows and $nc columns")
238 11
    println(io, Tables.schema(dbf))
239
end
240

241 0
Base.isempty(dbf::Table) = getheader(dbf).records == 0
242

243
"Get a BitVector which is true for rows that are marked as deleted"
244 3
function isdeleted(dbf::Table)
245 11
    data = getfield(dbf, :data)
246 7
    rsize = getheader(dbf).rsize
247 7
    nrow = getheader(dbf).records
248 7
    idx = range(1, step = rsize, length = nrow)
249 11
    data[idx] .== 0x2a
250
end
251

252
"Check if the row is marked as deleted"
253 3
function isdeleted(dbf::Table, row::Integer)
254 11
    data = getfield(dbf, :data)
255 7
    i = (row - 1) * getheader(dbf).rsize + 1
256 11
    data[i] == 0x2a
257
end
258

259
"Iterate over the rows of a DBF Table, yielding a DBFTables.Row for each row"
260 3
function Base.iterate(dbf::Table, st = 1)
261 11
    st > length(dbf) && return nothing
262 11
    return Row(dbf, st), st + 1
263
end
264

265 3
function Base.getproperty(row::Row, name::Symbol)
266 11
    dbf = gettable(row)
267 7
    header = getheader(dbf)
268 7
    str = getstrings(dbf)
269 11
    colidx = get(header.fieldcolumns, name, nothing)
270 11
    colidx === nothing && throw(ArgumentError("Column not present: $name"))
271 11
    type = @inbounds getfields(dbf)[colidx].type
272 7
    rowidx = getrow(row)
273 11
    return @inbounds dbf_value(type, str[colidx, rowidx])
274
end
275

276 7
Tables.istable(::Type{Table}) = true
277 0
Tables.rowaccess(::Type{Table}) = true
278 0
Tables.columnaccess(::Type{Table}) = true
279 0
Tables.rows(dbf::Table) = dbf
280 7
Tables.columns(dbf::Table) = dbf
281

282
"Get the Tables.Schema of a DBF Table"
283 3
function Tables.schema(dbf::Table)
284 11
    names = Tuple(field.name for field in getfields(dbf))
285
    # since missing is always supported, add it to the schema types
286 11
    types = Tuple(Union{field.type,Missing} for field in getfields(dbf))
287 11
    Tables.Schema(names, types)
288
end
289

290
"List all available DBF column names"
291 11
Base.propertynames(dbf::Table) = getfield.(getfield(dbf, :header).fields, :name)
292 11
Base.propertynames(row::Row) = propertynames(gettable(row))
293

294
"Create a copy of an entire DBF column as a Vector. Usage: `dbf.myfield`"
295 3
function Base.getproperty(dbf::Table, name::Symbol)
296 11
    header = getheader(dbf)
297 11
    col = get(header.fieldcolumns, name, nothing)
298 11
    col === nothing && throw(ArgumentError("Column not present: $name"))
299 7
    nrow = header.records
300 11
    @inbounds type = getfields(dbf)[col].type
301 7
    str = getstrings(dbf)
302 11
    @inbounds colarr = [dbf_value(type, str[col, i]) for i = 1:nrow]
303 11
    return colarr
304
end
305

306
end # module

Read our documentation on viewing source code .

Loading