rsquaredacademy / descriptr
1
#' Two way table
2
#'
3
#' Creates two way tables of categorical variables. The tables created can be
4
#' visualized as bar plots and mosaic plots.
5
#'
6
#' @param data A \code{data.frame} or a \code{tibble}.
7
#' @param var_1 First categorical variable.
8
#' @param var_2 Second categorical variable.
9
#' @param x An object of class \code{cross_table}.
10
#' @param stacked If \code{FALSE}, the columns of height are portrayed
11
#' as stacked bars, and if \code{TRUE} the columns are portrayed as juxtaposed bars.
12
#' @param proportional If \code{TRUE}, the height of the bars is proportional.
13
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
14
#' @param ... Further arguments to be passed to or from methods.
15
#'
16
#' @examples
17
#' # cross table
18
#' k <- ds_cross_table(mtcarz, cyl, gear)
19
#' k
20
#'
21
#' # bar plot
22
#' plot(k)
23
#'
24
#' # stacked bar plot
25
#' plot(k, stacked = TRUE)
26
#'
27
#' # proportional bar plot
28
#' plot(k, proportional = TRUE)
29
#'
30
#' # returns tibble
31
#' ds_twoway_table(mtcarz, cyl, gear)
32
#'
33
#' @export
34
#'
35 1
ds_cross_table <- function(data, var_1, var_2) UseMethod("ds_cross_table")
36

37
#' @export
38
ds_cross_table.default <- function(data, var_1, var_2) {
39

40 1
  check_df(data)
41 1
  var1_name <- deparse(substitute(var_1))
42 1
  var2_name <- deparse(substitute(var_2))
43 1
  var_1 <- rlang::enquo(var_1)
44 1
  var_2 <- rlang::enquo(var_2)
45 1
  check_factor(data, !! var_1, var1_name)
46 1
  check_factor(data, !! var_2, var2_name)
47

48 1
  var_names <-
49 1
    data %>%
50 1
    dplyr::select(!! var_1, !! var_2) %>%
51 1
    names()
52

53 1
  varone   <- dplyr::pull(data, !! var_1)
54 1
  vartwo   <- dplyr::pull(data, !! var_2)
55 1
  row_name <- get_names(varone)
56 1
  col_name <- get_names(vartwo)
57

58 1
  x <- as.matrix(table(varone, vartwo))
59 1
  rownames(x) <- NULL
60

61 1
  n        <- sum(x)
62 1
  per_mat  <- round(x / n, 3)
63 1
  row_pct  <- apply(per_mat, 1, sum)
64 1
  col_pct  <- apply(per_mat, 2, sum)
65 1
  rowtotal <- apply(x, 1, sum)
66 1
  coltotal <- apply(x, 2, sum)
67 1
  finalmat <- prep_per_mat(per_mat, row_pct, col_pct)
68 1
  rcent    <- prep_rcent(x, rowtotal, row_pct)
69 1
  ccent    <- prep_ccent(x, coltotal)
70 1
  finaltab <- prep_table(x, rowtotal, row_name)
71

72 1
  utility <- list(obs            = n,
73 1
                  var2_levels    = col_name,
74 1
                  var1_levels    = row_name,
75 1
                  varnames       = var_names,
76 1
                  twowaytable    = finaltab,
77 1
                  percent_table  = finalmat,
78 1
                  row_percent    = rcent,
79 1
                  column_percent = ccent,
80 1
                  column_totals  = coltotal,
81 1
                  percent_column = col_pct,
82 1
                  data           = data)
83

84 1
  ftab <- table(varone, vartwo)
85 1
  names(dimnames(ftab)) <- c(var_names[1], var_names[2])
86

87 1
  result <- list(ftable  = ftab,
88 1
                 utility = utility)
89

90 1
  class(result) <- "ds_cross_table"
91 1
  return(result)
92
}
93

94
#' @export
95
print.ds_cross_table <- function(x, ...) {
96 0
  print_cross(x)
97
}
98

99
#' @export
100
#' @rdname ds_cross_table
101
#'
102
plot.ds_cross_table <- function(x, stacked = FALSE, proportional = FALSE,
103
                                print_plot = TRUE,...) {
104

105 1
  x_lab <-
106 1
    x %>%
107 1
    use_series(utility) %>%
108 1
    use_series(varnames) %>%
109 1
    extract(1)
110

111 1
  y_lab <-
112 1
    x %>%
113 1
    use_series(utility) %>%
114 1
    use_series(varnames) %>%
115 1
    extract(2)
116

117 1
  k <- string_to_name(x)
118 1
  j <- string_to_name(x, 2)
119

120 1
  if (proportional) {
121 1
    p <-
122 1
      x %>%
123 1
      use_series(utility) %>%
124 1
      use_series(data) %>%
125 1
      dplyr::select(x = !! k, y = !! j) %>%
126 1
      table() %>%
127 1
      tibble::as_tibble() %>%
128 1
      ggplot(aes(x = x, y = n, fill = y)) +
129 1
      geom_bar(stat = "identity", position = "fill") +
130 1
      scale_y_continuous(labels = scales::percent_format()) +
131 1
      xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) +
132 1
      labs(fill = y_lab)
133
  } else {
134 1
    if (stacked) {
135 1
      p <-
136 1
        x %>%
137 1
        use_series(utility) %>%
138 1
        use_series(data) %>%
139 1
        dplyr::select(x = !! k, y = !! j) %>%
140 1
        ggplot() +
141 1
        geom_bar(aes(x, fill = y), position = "stack") +
142 1
        xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) +
143 1
        labs(fill = y_lab)
144
    } else {
145 1
      p <-
146 1
        x %>%
147 1
        use_series(utility) %>%
148 1
        use_series(data) %>%
149 1
        dplyr::select(x = !! k, y = !! j) %>%
150 1
        ggplot() +
151 1
        geom_bar(aes(x, fill = y), position = "dodge") +
152 1
        xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) +
153 1
        labs(fill = y_lab)
154
    }
155
  }
156

157 1
  if (print_plot) {
158 1
    print(p)
159
  } else {
160 0
    return(p)
161
  }
162

163
}
164

165
#' @rdname ds_cross_table
166
#' @export
167
#'
168
ds_twoway_table <- function(data, var_1, var_2) {
169

170 1
  check_df(data)
171 1
  var1_name <- deparse(substitute(var_1))
172 1
  var2_name <- deparse(substitute(var_2))
173

174 1
  var_1 <- rlang::enquo(var_1)
175 1
  var_2 <- rlang::enquo(var_2)
176 1
  check_factor(data, !! var_1, var1_name)
177 1
  check_factor(data, !! var_2, var2_name)
178

179 1
  group <-
180 1
    data %>%
181 1
    dplyr::select(!! var_1, !! var_2) %>%
182 1
    na.omit() %>%
183 1
    dplyr::group_by(!! var_1, !! var_2) %>%
184 1
    dplyr::summarise(count = dplyr::n())
185

186 1
  total <-
187 1
    group %>%
188 1
    dplyr::pull(count) %>%
189 1
    sum()
190

191 1
  div_by <-
192 1
    data %>%
193 1
    dplyr::group_by(!! var_2) %>%
194 1
    na.omit() %>%
195 1
    dplyr::tally() %>%
196 1
    dplyr::pull(n)
197

198

199 1
  group2 <-
200 1
    data %>%
201 1
    dplyr::select(!! var_1, !! var_2) %>%
202 1
    na.omit() %>%
203 1
    dplyr::group_by(!! var_2, !! var_1) %>%
204 1
    dplyr::summarise(count = dplyr::n()) %>%
205 1
    dplyr::mutate(
206 1
      col_percent = count / sum(count)
207
    ) %>%
208 1
    dplyr::ungroup()
209

210 1
  group %<>%
211 1
    dplyr::mutate(
212 1
      percent     = count / total,
213 1
      row_percent = count / sum(count)
214
    ) %>%
215 1
    dplyr::ungroup()
216

217 1
  result <- dplyr::inner_join(group, group2)
218 1
  return(result)
219

220
}
221

222
get_names <- function(x) {
223

224 1
  if (is.factor(x)) {
225 1
    varname <- levels(x)
226
  } else {
227 0
    varname <- unique(sort(x))
228
  }
229

230 1
  return(varname)
231

232
}
233

234
prep_table <- function(x, rowtotal, row_name) {
235

236 1
  x1 <- cbind(x, rowtotal)
237 1
  cbind(unname(row_name), x1)
238

239
}
240

241
prep_per_mat <- function(per_mat, row_pct, col_pct) {
242

243 1
  per_mat_1             <- cbind(per_mat, row_pct)
244 1
  per_mat_2             <- suppressWarnings(rbind(per_mat_1, col_pct))
245 1
  d                     <- dim(per_mat_2)
246 1
  per_mat_2[d[1], d[2]] <- 1
247 1
  return(per_mat_2)
248

249
}
250

251
prep_rcent <- function(x, rowtotal, row_pct) {
252

253 1
  rcent_1 <- row_pct(x, rowtotal)
254 1
  rcent_2 <- cbind(rcent_1, row_pct)
255 1
  apply(rcent_2, c(1, 2), rounda)
256

257
}
258

259
prep_ccent <- function(x, coltotal) {
260

261 1
  ccent_1 <- col_pct(x, coltotal)
262 1
  apply(ccent_1, c(1, 2), rounda)
263

264
}

Read our documentation on viewing source code .

Loading