rsquaredacademy / descriptr
1
#' Screen data
2
#'
3
#' Screen data  and return details such as variable names, class, levels and
4
#' missing values. \code{plot.ds_screener()} creates bar plots to visualize %
5
#' of missing observations for each variable in a data set.
6
#'
7
#' @param data A \code{tibble} or a \code{data.frame}.
8
#' @param x An object of class \code{ds_screener}.
9
#' @param ... Further arguments to be passed to or from methods.
10
#'
11
#' @return \code{ds_screener()} returns an object of class \code{"ds_screener"}.
12
#' An object of class \code{"ds_screener"} is a list containing the
13
#' following components:
14
#'
15
#' \item{Rows}{Number of rows in the data frame.}
16
#' \item{Columns}{Number of columns in the data frame.}
17
#' \item{Variables}{Names of the variables in the data frame.}
18
#' \item{Types}{Class of the variables in the data frame.}
19
#' \item{Count}{Length of the variables in the data frame.}
20
#' \item{nlevels}{Number of levels of a factor variable.}
21
#' \item{levels}{Levels of factor variables in the data frame.}
22
#' \item{Missing}{Number of missing observations in each variable.}
23
#' \item{MissingPer}{Percent of missing observations in each variable.}
24
#' \item{MissingTotal}{Total number of missing observations in the data frame.}
25
#' \item{MissingTotPer}{Total percent of missing observations in the data frame.}
26
#' \item{MissingRows}{Total number of rows with missing observations in the
27
#' data frame.}
28
#' \item{MissingCols}{Total number of columns with missing observations in the
29
#' data frame.}
30
#'
31
#' @examples
32
#' # screen data
33
#' ds_screener(mtcarz)
34
#' ds_screener(airquality)
35
#'
36
#' # plot
37
#' x <- ds_screener(airquality)
38
#' plot(x)
39
#'
40
#' @export
41
#'
42 1
ds_screener <- function(data) UseMethod("ds_screener")
43

44
#' @export
45
#'
46
ds_screener.default <- function(data) {
47

48 1
  check_df(data)
49

50 1
  rows     <- nrow(data)
51 1
  cols     <- ncol(data)
52 1
  varnames <- names(data)
53 1
  datatype <- sapply(data, class)
54 1
  counts   <- sapply(data, length)
55 1
  nlev     <- lapply(data, nlevels)
56 1
  lev      <- lapply(data, levels)
57

58 1
  for (i in seq_len(length(lev))) {
59 1
    if (is.null(lev[[i]])) {
60 1
      lev[[i]] <- NA
61
    }
62
  }
63

64 1
  mvalues    <- sapply(data, function(z) sum(is.na(z)))
65 1
  mvaluesper <- round((mvalues / counts) * 100, 2)
66 1
  mtotal     <- sum(is.na(data))
67 1
  mtotalper  <- round((mtotal / sum(counts)) * 100, 2)
68 1
  mrows      <- sum(!complete.cases(data))
69 1
  mcols      <- sum(mvalues != 0)
70

71 1
  result <- list(Rows          = rows,
72 1
                 Columns       = cols,
73 1
                 Variables     = varnames,
74 1
                 Types         = datatype,
75 1
                 Count         = counts,
76 1
                 nlevels       = nlev,
77 1
                 levels        = lev,
78 1
                 Missing       = mvalues,
79 1
                 MissingPer    = mvaluesper,
80 1
                 MissingTotal  = mtotal,
81 1
                 MissingTotPer = mtotalper,
82 1
                 MissingRows   = mrows,
83 1
                 MissingCols   = mcols)
84

85 1
  class(result) <- "ds_screener"
86

87 1
  return(result)
88
}
89

90
#' @export
91
print.ds_screener <- function(x, ...) {
92 1
  print_screen(x)
93
}
94

95
#' @rdname ds_screener
96
#' @export
97
#'
98
plot.ds_screener <- function(x, ...) {
99

100 0
  `% Missing`  <- NULL
101 0
  mydat        <- data.frame(x = names(x$MissingPer), y = x$MissingPer)
102 0
  mydat$y      <- mydat$y / 100
103 0
  mydat$color  <- ifelse(mydat$y >= 0.1, ">= 10%", "< 10%")
104 0
  names(mydat) <- c("x", "y", "% Missing")
105

106 0
  ggplot(mydat) +
107 0
    geom_col(aes(x = reorder(x, y), y = y, fill = `% Missing`)) +
108 0
    scale_y_continuous(labels = scales::percent_format()) +
109 0
    xlab("Column") + ylab("Percentage") +
110 0
    ggtitle("Missing Values (%)") +
111 0
    scale_fill_manual(values = c("green", "red"))
112

113
}

Read our documentation on viewing source code .

Loading