R/main.functions.R
changed.
R/internal.functions.R
changed.
R/plot.functions.R
changed.
Other files ignored by Codecov
_pkgdown.yml
is new.
152 | 152 | ||
153 | 153 | benford.dist <- generate.benford.distribution(benford.digits) |
|
154 | 154 | ||
155 | - | empirical.distribution <- generate.empirical.distribution(data, number.of.digits,sign, second.order = FALSE, benford.digits) |
|
155 | + | empirical.distribution <- generate.empirical.distribution(data, number.of.digits, sign = sign, second.order = FALSE, benford.digits = benford.digits) |
|
156 | 156 | ||
157 | 157 | n <- length(empirical.distribution$data) |
|
158 | 158 | ||
159 | - | second.order <- generate.empirical.distribution(data, number.of.digits,sign, second.order = TRUE, benford.digits, discrete = discrete, round = round) |
|
159 | + | second.order <- generate.empirical.distribution(data, number.of.digits, sign = sign, second.order = TRUE, benford.digits = benford.digits, discrete = discrete, round = round) |
|
160 | 160 | ||
161 | 161 | n.second.order <- length(second.order$data) |
|
162 | 162 | ||
163 | + | last.two.digits <- generate.empirical.distribution(data, sign = sign, last.two.dgts = TRUE, benford.digits = benford.digits) |
|
164 | + | ||
163 | 165 | benford.dist.freq <- benford.dist*n |
|
164 | 166 | ||
165 | 167 | ## calculating useful summaries and differences |
226 | 228 | s.o.data = data.table(second.order = second.order$data, |
|
227 | 229 | data.second.order.digits = second.order$data.digits), |
|
228 | 230 | ||
231 | + | last.two.digits = data.table(last.two.digits = 0:99, |
|
232 | + | data.dist.freq = last.two.digits$dist.freq), |
|
233 | + | ||
229 | 234 | bfd = data.table(digits = benford.digits, |
|
230 | 235 | data.dist = empirical.distribution$dist, |
|
231 | 236 | data.second.order.dist = second.order$dist, |
192 | 192 | if (sign == "both") positives <- abs(data[data != 0 & !is.na(data)]) |
|
193 | 193 | ||
194 | 194 | digits.as.str <- as.character(positives) |
|
195 | - | digits.as.str <- gsub("\\.", "", digits.as.str) |
|
196 | - | ltd <- as.integer(substr(digits.as.str, nchar(digits.as.str) - 1, nchar(digits.as.str))) |
|
197 | - | ltd[ltd < 10] <- ltd[ltd < 10]*10 |
|
195 | + | nozeros <- grepl("\\.", digits.as.str) |
|
196 | + | digits.as.str.nz <- digits.as.str[nozeros] |
|
197 | + | dgts.after.dot <- sub("^[0-9]*\\.", "", digits.as.str.nz) |
|
198 | + | ltd.dgts.after.dot <- substr(dgts.after.dot, nchar(dgts.after.dot)-1, nchar(dgts.after.dot)) |
|
199 | + | which.dgts.mult.10 <- grepl("^0", ltd.dgts.after.dot) |
|
200 | + | dgts.mult.10 <- as.character(as.numeric(ltd.dgts.after.dot[which.dgts.mult.10])*10) |
|
201 | + | ltd.dgts.after.dot[which.dgts.mult.10] <- dgts.mult.10 |
|
202 | + | digits.as.str[!nozeros] <- 0 |
|
203 | + | digits.as.str[nozeros] <- ltd.dgts.after.dot |
|
204 | + | ltd <- as.numeric(digits.as.str) |
|
198 | 205 | ||
199 | 206 | results <- data.frame(data = positives, |
|
200 | 207 | data.digits = ltd) |
270 | 277 | return(benford.dist) |
|
271 | 278 | } |
|
272 | 279 | ||
273 | - | generate.empirical.distribution <- function(data, number.of.digits,sign, second.order = FALSE, benford.digits, discrete = TRUE, round = 3){ |
|
280 | + | generate.empirical.distribution <- function(data, number.of.digits, sign, last.two.dgts = FALSE, second.order = FALSE, benford.digits, discrete = TRUE, round = 3){ |
|
274 | 281 | x <- NULL |
|
275 | 282 | v <- NULL |
|
276 | - | data.frame <- extract.digits(data, number.of.digits, sign, second.order, discrete = discrete, round = round) |
|
283 | + | if(last.two.dgts){ |
|
284 | + | data.frame <- last.two.digits(data, sign) |
|
285 | + | DF <- data.table(x = c(data.frame$data.digits, 0:99), |
|
286 | + | v = c(data.frame$data.digits, 0:99)) |
|
287 | + | }else{ |
|
288 | + | data.frame <- extract.digits(data, number.of.digits, sign, second.order, discrete = discrete, round = round) |
|
289 | + | DF <- data.table(x = c(data.frame$data.digits, benford.digits), |
|
290 | + | v = c(data.frame$data.digits, benford.digits) ) |
|
291 | + | } |
|
277 | 292 | n <- length(data.frame$data.digits) |
|
278 | - | DF <- data.table(x = c(data.frame$data.digits, benford.digits), |
|
279 | - | v = c(data.frame$data.digits, benford.digits) ) |
|
280 | 293 | DFcount <- DF[ ,length(x) - 1, by = v][order(v)] |
|
281 | 294 | dist.freq <- DFcount$V1 |
|
282 | 295 | dist <- dist.freq/n |
124 | 124 | } |
|
125 | 125 | ||
126 | 126 | # Separate plots -------------------------------------------------------------------------------------- |
|
127 | - | #utils::globalVariables(c("x", "err.bounds", "alpha", "exp.benford")) |
|
128 | 127 | ||
129 | 128 | histogram.Benford <- function(x, |
|
130 | 129 | obs.freq = "digits", |
183 | 182 | } |
|
184 | 183 | ||
185 | 184 | if(is.null(xlab)){ |
|
186 | - | ord <- c("","-two","-tree", "-order")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
187 | - | xlab <- paste0("First", ord, " Digits") |
|
185 | + | xlab <- c("First Digit", "First-Two Digits", "First-Three Digits", "First-Order Digits")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
188 | 186 | } |
|
189 | 187 | ||
190 | 188 | if(is.null(ylab)){ |
277 | 275 | } |
|
278 | 276 | ||
279 | 277 | if(is.null(xlab)){ |
|
280 | - | ord <- c("","-two","-tree", "-order")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
281 | - | xlab <- paste0("First", ord, " Digits") |
|
278 | + | xlab <- c("First Digit", "First-Two Digits", "First-Three Digits", "First-Order Digits")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
282 | 279 | } |
|
283 | 280 | ||
284 | 281 | if(is.null(ylab)){ |
341 | 338 | ) |
|
342 | 339 | ||
343 | 340 | if(is.null(xlab)){ |
|
344 | - | ord <- c("","-two","-tree", "-order")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
345 | - | xlab <- paste0("First", ord, " Digits") |
|
341 | + | xlab <- c("First Digit", "First-Two Digits", "First-Three Digits", "First-Order Digits")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
346 | 342 | } |
|
347 | 343 | ||
344 | + | ||
348 | 345 | if(is.null(ylab)){ |
|
349 | 346 | ylab <- "Frequency" |
|
350 | 347 | } |
487 | 484 | } |
|
488 | 485 | } |
|
489 | 486 | ||
490 | - | # histogram.Benford(x, obs.freq = "digits", freq = T, main = NULL, xlab = NULL, ylab = NULL, grid = TRUE, col.bar = "lightblue", err.bounds = FALSE, alpha = 0.05, exp.benford = TRUE, ...) |
|
491 | - | # |
|
492 | - | # rootogram.Benford(x,obs.freq = "digits", freq = TRUE,main = NULL,xlab = NULL,ylab = NULL,grid = TRUE,col.bar = "lightblue",err.bounds = FALSE,alpha = 0.05, exp.benford = TRUE, ...) |
|
493 | - | # |
|
494 | - | # needle.Benford(x,discrepancy = "abs diff",main = NULL,xlab = NULL,ylab = NULL,grid = TRUE,col = "blue", ...) |
|
495 | - | # |
|
496 | - | # xyplot.Berford(x,obs.freq = "digits",freq = TRUE,main = "Expected vs observed frequencies",xlab = "Observed Frequency",ylab = "Expected Frequency",grid = TRUE,col = "blue",...) |
|
497 | - | # |
|
498 | - | # histogram.Benford(x, obs.freq = "digits", freq, main = NULL, xlab = NULL, ylab = NULL, grid, col.bar, err.bounds, alpha) |
|
499 | - | ||
500 | 487 | plot.switch <- function(plot_this, x, col.bar, grid, err.bounds, alpha, freq){ |
|
501 | 488 | switch(plot_this, |
|
502 | 489 | "digits" = histogram.Benford(x, obs.freq = "digits", main = "Digits distribution", xlab = NULL, ylab = NULL, grid, col.bar, err.bounds, alpha, freq), |
Files | Coverage |
---|---|
R | 100.00% |
Project Totals (4 files) | 100.00% |