scheinerman / Counters.jl
1
module Counters
2

3
export Counter, counter, clean!, incr!, mean, csv_print
4

5
import Base: show, length, getindex, sum, keys, (+), (==), hash
6
import Base:  setindex!, collect
7
import Base: iterate
8
#import Base: start, done, next, iterate
9

10
using SparseArrays, Statistics
11
#import SparseArrays: nnz
12
#import Statistics: mean
13

14
"""
15
A `Counter` is a device for keeping a count of how often we observe
16
various objects. It is created by giving a type such as
17
`c=Counter{String}()`.
18

19
Counts are retrieved with square brackets like a dictionary: `c["hello"]`.
20
It is safe to retrieve the count of an object never encountered, e.g.,
21
`c["goodbye"]`; in this case `0` is returned.
22

23
Counts may be assigned with `c[key]=amount`, but the more likely use
24
case is using `c[key]+=1` to count each time `key` is encountered.
25
"""
26
struct Counter{T<:Any} <: AbstractDict{T,Int}
27
  data::Dict{T,Int}
28
  function Counter{T}() where T
29 1
    d = Dict{T,Int}()
30 1
    C = new(d)
31
  end
32
end
33

34 0
Counter() = Counter{Any}()
35

36
# These items enable this to satisfy the Associative properties
37
#
38
# start(c::Counter) = start(c.data)
39
# done(c::Counter,s) = done(c.data,s)
40
# next(c::Counter,s) = next(c.data,s)
41

42 0
iterate(C::Counter{T}) where T = iterate(keys(C.data))
43 0
iterate(C::Counter{T}, s::Int) where T = iterate(keys(C.data), s)
44

45

46

47
"""
48
`length(c::Counter)` gives the number of entries monitored
49
by the Counter. Conceivably, some may have value `0`.
50

51
See also: `nnz`.
52
"""
53 1
length(c::Counter) = length(c.data)
54

55
function show(io::IO, c::Counter{T}) where T
56 0
  n = length(c.data)
57 0
  word = ifelse(n==1, "entry", "entries")
58 0
  msg = "with $n $word"
59 0
  print(io,"Counter{$T} $msg")
60
end
61

62 0
show(c::Counter{T}) where T = show(stdout,c)
63

64
import Base.Multimedia.display
65

66
function display(c::Counter{T}) where T
67 0
  println("Counter{$T} with these nonzero values:")
68 0
  klist = collect(keys(c))
69 0
  try
70 0
    sort!(klist)
71
  catch
72 0
    1+1 # no action required if fail to sort
73
  end
74

75 0
  for k in klist
76 0
    if c[k] != 0
77 0
      println("$k ==> $(c.data[k])")
78
    end
79
  end
80
end
81

82

83

84
function getindex(c::Counter{T}, x::T) where T
85 1
  return get(c.data,x,0)
86
end
87

88
"""
89
`keys(c::Counter)` returns an interator for the things counted by `c`.
90
"""
91 0
keys(c::Counter) = keys(c.data)
92

93

94
"""
95
`sum(c::Counter)` gives the total of the counts for all things
96
in `c`.
97
"""
98 1
sum(c::Counter) = sum(values(c.data))
99

100
"""
101
`nnz(c::Counter)` gives the number of keys in
102
the `Counter` with nonzero value.
103

104
See also: `length`.
105
"""
106
function SparseArrays.nnz(c::Counter)
107 1
  amt::Int = 0
108 1
  for k in keys(c)
109 1
    if c.data[k] != 0
110 1
      amt += 1
111
    end
112
  end
113 1
  return amt
114
end
115

116 1
setindex!(c::Counter{T}, val::Int, k::T) where T = c.data[k] = val>0 ? val : 0
117

118
function ==(c::Counter{T}, d::Counter{T}) where T
119 1
  for k in keys(c)
120 1
    if c[k] != d[k]
121 1
      return false
122
    end
123
  end
124

125 1
  for k in keys(d)
126 1
    if c[k] != d[k]
127 1
      return false
128
    end
129
  end
130

131 1
  return true
132
end
133

134 0
isequal(c::Counter{T},d::Counter{T}) where T = c==d
135

136
"""
137
`clean!(c)` removes all keys from `c` whose value is `0`.
138
Generally, it's not necessary to invoke this unless one
139
suspects that `c` contains *a lot* of keys associated with
140
a zero value.
141
"""
142
function clean!(c::Counter{T}) where T
143 1
  for k in keys(c)
144 1
    if c[k] == 0
145 1
      delete!(c.data,k)
146
    end
147
  end
148 1
  nothing
149
end
150

151
"""
152
`incr!(c,x)` increments the count for `x` by 1. This is equivalent to
153
`c[x]+=1`.
154

155
`incr!(c,items)` is more useful. Here `items` is an iterable collection
156
of keys and we increment the count for each element in `items`.
157

158
`incr!(c,d)` where `c` and `d` are counters will increment `c` by
159
the amounts held in `d`.
160
"""
161 1
incr!(c::Counter{T}, x::T) where T = c[x] += 1
162

163
function incr!(c::Counter, items)
164 0
  for x in items
165 0
    c[x] += 1
166
  end
167
end
168

169
function incr!(c::Counter{T},d::Counter{T}) where T
170 1
  for k in keys(d)
171 1
    c[k] += d[k]
172
  end
173
end
174

175

176
"""
177
If `c` and `d` are `Counter`s, then `c+d` creates a new `Counter`
178
in which the count associated with an object `x` is `c[x]+d[x]`.
179
"""
180
function (+)(c::Counter{T}, d::Counter{T}) where T
181 1
  result = deepcopy(c)
182 1
  incr!(result,d)
183 1
  return result
184
end
185

186
"""
187
`collect(C)` for a `Counter` returns an array containing the elements of `C`
188
each repeated according to its multiplicty.
189
"""
190
function collect(c::Counter{T}) where T
191 0
  result = Vector{T}(undef,sum(c))
192 0
  idx = 0
193 0
  for k in keys(c)
194 0
    m = c[k]
195 0
    for j=1:m
196 0
      idx += 1
197 0
      result[idx] = k
198
    end
199
  end
200 0
  return result
201
end
202

203

204

205
"""
206
`mean(C::Counter)` computes the weighted average of the objects in `C`.
207
Of course, the counted objects must be a `Number`; their multiplicity
208
(weight) in the average is determined by their `C`-value.
209
"""
210
function mean(C::Counter{T}) where T<:Number
211 0
  total = zero(T)
212 0
  for k in keys(C)
213 0
    total += k * C[k]
214
  end
215 0
  return total / sum(C)
216
end
217

218
"""
219
`csv_print(C::Counter)` prints out `C` in a manner suitable for import into
220
a spreadsheet.
221
"""
222
function csv_print(C::Counter)
223 0
  klist = collect(keys(C))
224 0
  try
225 0
    sort!(klist)
226
  catch
227
  end
228 0
  for k in klist
229 0
    println("$k, $(C[k])")
230
  end
231 0
  nothing
232
end
233

234
"""
235
`counter(list)` creates a `Counter` whose elements are the
236
members of `list` with the appropriate multiplicities.
237
This may also be used if `list` is a `Set` or an `IntSet`
238
(in which case multiplicities will all be 1).
239
"""
240
function counter(list::AbstractArray)
241 1
  T = eltype(list)
242
  C = Counter{T}()
243 1
  for x in list
244 1
    incr!(C,x)
245
  end
246 1
  return C
247
end
248

249 1
counter(S::Base.AbstractSet) = counter(collect(S))
250

251

252
"""
253
Performing `hash` on a `Counter` will first apply `clean!` to the
254
`Counter` in order that equal `Counter` objects hash the same.
255
"""
256
function hash(C::Counter, h::UInt64 = UInt64(0))
257 0
    clean!(C)
258 0
    return hash(C.data,h)
259
end
260

261
end # module

Read our documentation on viewing source code .

Loading