ropensci / visdat

@@ -1,3 +1,4 @@
Loading
1 +
1 2
#' Take the fingerprint of a data.frame - find the class or return NA
2 3
#'
3 4
#' `fingerprint` is an internal function that takes the "fingerprint" of a
@@ -11,13 +12,24 @@
Loading
11 12
fingerprint <- function(x){
12 13
13 14
  # is the data missing?
14 -
  ifelse(is.na(x),
15 -
         # yes? Leave as is NA
16 -
         yes = NA,
17 -
         # no? make that value no equal to the class of this cell.
18 -
         no = glue::glue_collapse(class(x),
19 -
                                  sep = "\n")
20 -
  )
15 +
  if (!is.list(x)) {
16 +
    ifelse(is.na(x),
17 +
           # yes? Leave as is NA
18 +
           yes = NA,
19 +
           # no? make that value no equal to the class of this cell.
20 +
           no = glue::glue_collapse(class(x),
21 +
                                    sep = "\n")
22 +
    )
23 +
  } else {
24 +
    ifelse(purrr::map_lgl(x,~length(.x)==0),
25 +
           # yes? Leave as is NA
26 +
           yes = NA,
27 +
           # no? make that value no equal to the class of this cell.
28 +
           no = glue::glue_collapse(class(x),
29 +
                                    sep = "\n")
30 +
    )
31 +
32 +
  }
21 33
} # end function
22 34
23 35

@@ -79,7 +79,11 @@
Loading
79 79
80 80
  # make a TRUE/FALSE matrix of the data.
81 81
  # This tells us whether it is missing (true) or not (false)
82 -
  x.na <- is.na(x)
82 +
  x.fingerprinted <- x %>%
83 +
    purrr::map_df(fingerprint)
84 +
85 +
  x.na <- x %>%
86 +
    purrr::map_df(~fingerprint(.x) %>% is.na)
83 87
84 88
  # switch for creating the missing clustering
85 89
  if (cluster){
@@ -101,7 +105,7 @@
Loading
101 105
    # code inspired from https://r-forge.r-project.org/scm/viewvc.php/ ...
102 106
    # pkg/R/missing.pattern.plot.R?view=markup&root=mi-dev
103 107
    # get the order of columns with highest missingness
104 -
    na_sort <- order(colSums(is.na(x)), decreasing = TRUE)
108 +
    na_sort <- order(colSums(x.na), decreasing = TRUE)
105 109
106 110
    # get the names of those columns
107 111
    col_order_index <- names(x)[na_sort]
@@ -129,7 +133,7 @@
Loading
129 133
130 134
  if (show_perc) {
131 135
132 -
    temp <- miss_guide_label(x)
136 +
    temp <- miss_guide_label(x.fingerprinted)
133 137
134 138
    p_miss_lab <- temp$p_miss_lab
135 139
@@ -168,7 +172,7 @@
Loading
168 172
      vis_miss_plot +
169 173
        ggplot2::scale_x_discrete(position = "top",
170 174
                                  labels = label_col_missing_pct(
171 -
                                    x,
175 +
                                    x.fingerprinted,
172 176
                                    col_order_index)
173 177
        )
174 178
      # )
@@ -192,7 +196,7 @@
Loading
192 196
      ggplot2::scale_x_discrete(position = "top",
193 197
                                limits = col_order_index,
194 198
                                labels = label_col_missing_pct(
195 -
                                  x,
199 +
                                  x.fingerprinted,
196 200
                                  col_order_index)
197 201
      )
198 202

@@ -100,7 +100,7 @@
Loading
100 100
  # of about 3. This is faster, for the moment.
101 101
102 102
  output <- character(length(x))
103 -
  nas <- is.na(x)
103 +
  nas <- (x %>% fingerprint() %>% is.na() | is.na(x))
104 104
105 105
  output[!nas] <- vapply(FUN = readr::guess_parser,
106 106
                         X = x[!nas],
Files Coverage
R 98.91%
Project Totals (9 files) 98.91%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
        informational: true
10
    patch:
11
      default:
12
        target: auto
13
        threshold: 1%
14
        informational: true
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading