No flags found
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
357f30c
... +3 ...
1a375bf
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
1 | - | #### Benford #### |
|
1 | + | ### TODO |
|
2 | + | # - last-two digits |
|
3 | + | # - verificar se os números do last-two são inteiros (resto da divisão por 1) |
|
4 | + | # - possível solução: se todos forem inteiros, "arrendodar" e não pegar "depois do ponto" |
|
5 | + | # - emitir warning falando que todos numeros parecem ser inteiros, então last-two não pegou decimal |
|
6 | + | # - tem que ter uma opção para definir quantos decimais existem no número (inteiro = zero, dinheiro = 2) |
|
7 | + | # - argumento "round", "digits". |
|
8 | + | ||
9 | + | ||
10 | + | ||
11 | + | ||
12 | + | ||
13 | + | # Tests ------------------------------------------------------------------- |
|
14 | + | ||
15 | + | ||
2 | 16 | ||
3 | 17 | DF <- function(data){ |
|
4 | 18 | data <- data[data >= 10] |
109 | 123 | return(out) |
|
110 | 124 | } |
|
111 | 125 | ||
126 | + | ||
127 | + | ||
128 | + | ||
129 | + | # Extraction functions ---------------------------------------------------- |
|
130 | + | ||
131 | + | ||
132 | + | ||
133 | + | ||
134 | + | ||
112 | 135 | #' @title Extracts the leading digits from the data |
|
113 | 136 | #' @description It extracts the leading digits from the data. |
|
114 | 137 | #' |
168 | 191 | return(results) |
|
169 | 192 | } |
|
170 | 193 | ||
194 | + | #' @title Trunc decimal values. |
|
195 | + | #' @description It trunc decimal values from the data. |
|
196 | + | #' |
|
197 | + | #'This function is used by the main function of the package \code{\link{benford}} |
|
198 | + | #' to trunc the values in its ndec argument to the specified number of decimal places. |
|
199 | + | #' |
|
200 | + | #' @param x a numeric vector. |
|
201 | + | #' @param n.dec integer indicating the number of decimal places (trunc) to be used. |
|
202 | + | #' @return |
|
203 | + | #' @export |
|
204 | + | ||
205 | + | truncDec <- function(x, n.dec = 2){ |
|
206 | + | int <- floor(x) |
|
207 | + | isDec <- grepl("\\.", as.character(x)) |
|
208 | + | dec <- sub("^[0-9]*\\.", "", as.character(x)) |
|
209 | + | truncateDec <- substr(dec, 1, n.dec) |
|
210 | + | truncatedNumber <- vector("character", length = length(dec)) |
|
211 | + | truncatedNumber[isDec] <- paste0(int[isDec], ".", dec[isDec]) |
|
212 | + | truncatedNumber[!isDec] <- as.character(int[!isDec]) |
|
213 | + | truncatedNumber <- as.numeric(truncatedNumber) |
|
214 | + | return(truncatedNumber) |
|
215 | + | } |
|
216 | + | ||
217 | + | ||
171 | 218 | #' @title Extracts the last two digits from the data |
|
172 | 219 | #' @description It extracts the last two digits from the data. |
|
173 | 220 | #' |
|
174 | 221 | #'This function is used by the main function of the package \code{\link{benford}} to extract the |
|
175 | 222 | #'ast two digits of the data. |
|
176 | 223 | #' |
|
177 | 224 | #' @param data a numeric vector. |
|
178 | - | #' @param sign The default value for sign is "positive" and it analyzes only data greater than zero. |
|
225 | + | #' @param ndec it specifies the number of decimals to be used (default 2). |
|
226 | + | #' @param sign The default value for sign is "positive" and it analyzes only data greater than zero. |
|
179 | 227 | #' There are also the options "negative" and "both" that will analyze only negative values or both positive and negative values of the data, |
|
180 | 228 | #' respectively. For large datasets with both positive and negative numbers, |
|
181 | 229 | #' it is usually recommended to perform a separate analysis for each group, |
|
182 | 230 | #' for the incentives to manipulate the numbers are usually different. |
|
183 | 231 | #' @return A data.frame with the data and the last digits. |
|
184 | 232 | #' @export |
|
185 | - | last.two.digits <- function(data, sign="positive") { |
|
233 | + | ||
234 | + | last.two.digits <- function(data, sign="positive", ndec = 2) { |
|
186 | 235 | ||
187 | 236 | if (!is.numeric(data)) stop("Data must be a numeric vector") |
|
188 | 237 |
191 | 240 | if (sign == "negative") positives <- data[data < 0 & !is.na(data)]*(-1) |
|
192 | 241 | if (sign == "both") positives <- abs(data[data != 0 & !is.na(data)]) |
|
193 | 242 | ||
194 | - | digits.as.str <- as.character(positives) |
|
195 | - | nozeros <- grepl("\\.", digits.as.str) |
|
196 | - | digits.as.str.nz <- digits.as.str[nozeros] |
|
197 | - | dgts.after.dot <- sub("^[0-9]*\\.", "", digits.as.str.nz) |
|
198 | - | ltd.dgts.after.dot <- substr(dgts.after.dot, nchar(dgts.after.dot)-1, nchar(dgts.after.dot)) |
|
199 | - | which.dgts.mult.10 <- grepl("^0", ltd.dgts.after.dot) |
|
200 | - | dgts.mult.10 <- as.character(as.numeric(ltd.dgts.after.dot[which.dgts.mult.10])*10) |
|
201 | - | ltd.dgts.after.dot[which.dgts.mult.10] <- dgts.mult.10 |
|
202 | - | digits.as.str[!nozeros] <- 0 |
|
203 | - | digits.as.str[nozeros] <- ltd.dgts.after.dot |
|
204 | - | ltd <- as.numeric(digits.as.str) |
|
243 | + | remainder <- positives %% 1 |
|
244 | + | if (all(remainder == 0)){ |
|
245 | + | warning("Data appears to be integers, so the argument 'ndec' is set to zero.") |
|
246 | + | ndec <- 0 |
|
247 | + | } |
|
205 | 248 | ||
206 | - | results <- data.frame(data = positives, |
|
249 | + | truncated.values <- truncDec(positives, ndec) |
|
250 | + | values.as.str <- as.character(truncated.values) |
|
251 | + | if (ndec == 0){ |
|
252 | + | nchar.values <- nchar(truncated.values) |
|
253 | + | ltd <- substr(truncated.values, nchar.values - 1, nchar.values) |
|
254 | + | ltd <- as.numeric(ltd) |
|
255 | + | }else{ |
|
256 | + | nozeros <- grepl("\\.", values.as.str) |
|
257 | + | values.as.str.nz <- values.as.str[nozeros] |
|
258 | + | dgts.after.dot <- sub("^[0-9]*\\.", "", values.as.str.nz) |
|
259 | + | ltd.dgts.after.dot <- substr(dgts.after.dot, nchar(dgts.after.dot) - 1, nchar(dgts.after.dot)) |
|
260 | + | which.dgts.mult.10 <- nchar(ltd.dgts.after.dot) == 1 |
|
261 | + | dgts.mult.10 <- as.character(as.numeric(ltd.dgts.after.dot[which.dgts.mult.10])*10) |
|
262 | + | ltd.dgts.after.dot[which.dgts.mult.10] <- dgts.mult.10 |
|
263 | + | values.as.str[!nozeros] <- 0 |
|
264 | + | values.as.str[nozeros] <- ltd.dgts.after.dot |
|
265 | + | ltd <- as.numeric(values.as.str) |
|
266 | + | } |
|
267 | + | ||
268 | + | results <- data.frame(data = truncated.values, |
|
207 | 269 | data.digits = ltd) |
|
208 | 270 | return(results) |
|
209 | 271 | } |
|
210 | 272 | ||
273 | + | ||
274 | + | extract.mantissa <- function(positives) { |
|
275 | + | log <- log10(positives) |
|
276 | + | log[log < 0] <- log[log < 0] + as.integer(log[log < 0])*(-1) + 1 |
|
277 | + | mantissa <- log - trunc(log) |
|
278 | + | return(mantissa) |
|
279 | + | } |
|
280 | + | ||
281 | + | ||
282 | + | # Digits probability ------------------------------------------------------ |
|
283 | + | ||
284 | + | ||
285 | + | ||
211 | 286 | #' @title Probability of a digit sequence |
|
212 | 287 | #' @description It calculates the probability of a digit sequence "d". |
|
213 | 288 | #' @usage |
229 | 304 | } |
|
230 | 305 | ||
231 | 306 | ||
307 | + | ||
308 | + | ||
309 | + | ||
310 | + | ||
311 | + | ||
232 | 312 | #' @title Probability of a digit at the nth position |
|
233 | 313 | #' @description It calculates the probability of digit "d" at the "n"th position. |
|
234 | 314 | #' @usage |
264 | 344 | return(sum) |
|
265 | 345 | } |
|
266 | 346 | ||
347 | + | ||
348 | + | ||
349 | + | # Generating functions ---------------------------------------------------- |
|
350 | + | ||
351 | + | ||
352 | + | ||
267 | 353 | generate.benford.digits <- function(number.of.digits) { |
|
268 | 354 | number.of.digits <- as.integer(number.of.digits) |
|
269 | 355 | begin <- 10^(number.of.digits - 1) |
300 | 386 | return(results) |
|
301 | 387 | } |
|
302 | 388 | ||
303 | - | extract.mantissa <- function(positives) { |
|
304 | - | log <- log10(positives) |
|
305 | - | log[log < 0] <- log[log < 0] + as.integer(log[log < 0])*(-1) + 1 |
|
306 | - | mantissa <- log - trunc(log) |
|
307 | - | return(mantissa) |
|
308 | - | } |
|
389 | + | ||
309 | 390 | ||
310 | 391 | generate.summation <- function(benford.digits, data, data.digits) { |
|
311 | 392 | x <- NULL |
328 | 409 | ||
329 | 410 | #### Basic Calculations #### |
|
330 | 411 | ||
331 | - | excess.kurtosis <- function(x) |
|
412 | + | excess.kurtosis <- function(x){ |
|
332 | 413 | (mean((x - mean(x))^4)/(mean((x - mean(x))^2)^2)) - 3 |
|
414 | + | } |
|
333 | 415 | ||
334 | 416 | ||
335 | - | skewness <- function(x) (mean((x - mean(x))^3)/(mean((x - mean(x))^2)^(3/2))) |
|
417 | + | skewness <- function(x) { |
|
418 | + | (mean((x - mean(x))^3)/(mean((x - mean(x))^2)^(3/2))) |
|
419 | + | } |
|
336 | 420 |
33 | 33 | select = c("digits", "second order", "summation", "chi squared", "ex summation"), |
|
34 | 34 | except = NULL, |
|
35 | 35 | multiple = TRUE, |
|
36 | - | col.bar = "lightblue", |
|
36 | + | col.bar = "turquoise3", |
|
37 | 37 | err.bounds = FALSE, |
|
38 | 38 | alpha = 0.05, |
|
39 | 39 | grid = TRUE, |
115 | 115 | #par(mfrow = c(rows, cols)) |
|
116 | 116 | nslots <- rows*cols |
|
117 | 117 | plot_this <- plots |
|
118 | - | lg_size <- ifelse(rows > 1, 1, ifelse(err.bounds, 0.4, 0.7))/rows |
|
118 | + | lg_size <- ifelse(rows > 1, 1, 0.7)/rows |
|
119 | 119 | ||
120 | 120 | for (i in 1:length(plot_this)) { |
|
121 | 121 | plot.switch(plot_this[i], x, col.bar, grid, err.bounds, alpha, exp.benford, freq) |
128 | 128 | old.par <- par(no.readonly = TRUE) |
|
129 | 129 | #on.exit(par(old.par)) |
|
130 | 130 | plot_this <- plots |
|
131 | - | lg_size <- ifelse(err.bounds, 0.4, 0.7) |
|
131 | + | lg_size <- 0.7 |
|
132 | 132 | ||
133 | 133 | for (i in 1:length(plot_this)) { |
|
134 | 134 | plot.switch(plot_this[i], x, col.bar, grid, err.bounds, alpha, exp.benford, freq) |
142 | 142 | # Separate plots -------------------------------------------------------------------------------------- |
|
143 | 143 | ||
144 | 144 | histogram.Benford <- function(x, |
|
145 | - | obs.freq = "digits", |
|
146 | - | main = NULL, |
|
147 | - | xlab = NULL, |
|
148 | - | ylab = NULL, |
|
149 | - | grid = TRUE, |
|
150 | - | col.bar = "lightblue", |
|
151 | - | err.bounds = FALSE, |
|
152 | - | alpha = 0.05, |
|
153 | - | exp.benford = TRUE, |
|
154 | - | freq = TRUE, ...){ |
|
145 | + | obs.freq = "digits", |
|
146 | + | main = NULL, |
|
147 | + | xlab = NULL, |
|
148 | + | ylab = NULL, |
|
149 | + | grid = TRUE, |
|
150 | + | col.bar = "lightblue", |
|
151 | + | err.bounds = FALSE, |
|
152 | + | alpha = 0.05, |
|
153 | + | exp.benford = TRUE, |
|
154 | + | freq = TRUE, ...){ |
|
155 | 155 | ||
156 | 156 | digits <- x[["bfd"]]$digits |
|
157 | 157 | obs.freq <- tolower(obs.freq) |
255 | 255 | ||
256 | 256 | ||
257 | 257 | rootogram.Benford <- function(x, |
|
258 | - | obs.freq = "digits", |
|
259 | - | main = NULL, |
|
260 | - | xlab = NULL, |
|
261 | - | ylab = NULL, |
|
262 | - | grid = TRUE, |
|
263 | - | col.bar = "lightblue", |
|
264 | - | err.bounds = FALSE, |
|
265 | - | alpha = 0.05, |
|
266 | - | exp.benford = TRUE, |
|
267 | - | freq = TRUE, ...){ |
|
258 | + | obs.freq = "digits", |
|
259 | + | main = NULL, |
|
260 | + | xlab = NULL, |
|
261 | + | ylab = NULL, |
|
262 | + | grid = TRUE, |
|
263 | + | col.bar = "lightblue", |
|
264 | + | err.bounds = FALSE, |
|
265 | + | alpha = 0.05, |
|
266 | + | exp.benford = TRUE, |
|
267 | + | freq = TRUE, ...){ |
|
268 | 268 | digits <- x[["bfd"]]$digits |
|
269 | 269 | obs.freq <- tolower(obs.freq) |
|
270 | 270 |
301 | 301 | ylim <- c(min(exp_freq - obs_freq)*1.1, max(abs(exp_freq - obs_freq)*0.5, exp_freq)*1.1) |
|
302 | 302 | } |
|
303 | 303 | ||
304 | - | if(is.null(xlab)){ |
|
305 | - | xlab <- c("First Digit", "First-Two Digits", "First-Three Digits", "First-Order Digits")[ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4)] |
|
304 | + | if (is.null(xlab)) { |
|
305 | + | xlab.options <- c("First Digit", "First-Two Digits", "First-Three Digits", "First-Order Digits") |
|
306 | + | lab.picker <- ifelse(x$info$number.of.digits <= 3, x$info$number.of.digits, 4) |
|
307 | + | xlab <- xlab.options[lab.picker] |
|
306 | 308 | } |
|
307 | 309 | ||
308 | - | if(is.null(ylab)){ |
|
310 | + | if (is.null(ylab)) { |
|
309 | 311 | ylab <- "Frequency" |
|
310 | 312 | } |
|
311 | 313 |
317 | 319 | ylim = ylim, |
|
318 | 320 | yaxs = 'i', xaxs = 'i', xaxt = "n", type = 'n', |
|
319 | 321 | panel.first = { |
|
320 | - | if(grid){ |
|
322 | + | if (grid) { |
|
321 | 323 | grid(nx = NA, ny = NULL, lty = 1, col = "gray90") |
|
322 | 324 | axis(1, at = xmarks[seq(1, length(xmarks), ifelse(length(digits) <= 90, 1, 10))], tck = 1, col.ticks = "gray90", labels = F) |
|
323 | 325 | } |
488 | 490 | col = plot_colors, |
|
489 | 491 | cex = size, |
|
490 | 492 | lwd = c(rep(2, 2), 2), |
|
491 | - | lty = c(rep(1, 2), 2), |
|
492 | - | horiz = TRUE) |
|
493 | + | lty = c(rep(1, 2), 2)) |
|
493 | 494 | }else{ |
|
494 | 495 | plot_colors <- c("lightblue","red") |
|
495 | 496 | legend(x = "topright", |
499 | 500 | col = plot_colors, |
|
500 | 501 | cex = size, |
|
501 | 502 | lwd = 2, |
|
502 | - | lty = rep(1, 2), |
|
503 | - | horiz = TRUE) |
|
503 | + | lty = rep(1, 2)) |
|
504 | 504 | } |
|
505 | 505 | } |
|
506 | 506 |
Files | Coverage |
---|---|
R | 0.01% 99.51% |
Project Totals (4 files) | 99.51% |
#38
1a375bf
23b1341
3fac2a0
b34595f
#38
357f30c