1 ```#' Two way table ``` 2 ```#' ``` 3 ```#' Creates two way tables of categorical variables. The tables created can be ``` 4 ```#' visualized as bar plots and mosaic plots. ``` 5 ```#' ``` 6 ```#' @param data A \code{data.frame} or a \code{tibble}. ``` 7 ```#' @param var_1 First categorical variable. ``` 8 ```#' @param var_2 Second categorical variable. ``` 9 ```#' @param x An object of class \code{cross_table}. ``` 10 ```#' @param stacked If \code{FALSE}, the columns of height are portrayed ``` 11 ```#' as stacked bars, and if \code{TRUE} the columns are portrayed as juxtaposed bars. ``` 12 ```#' @param proportional If \code{TRUE}, the height of the bars is proportional. ``` 13 ```#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object. ``` 14 ```#' @param ... Further arguments to be passed to or from methods. ``` 15 ```#' ``` 16 ```#' @examples ``` 17 ```#' # cross table ``` 18 ```#' k <- ds_cross_table(mtcarz, cyl, gear) ``` 19 ```#' k ``` 20 ```#' ``` 21 ```#' # bar plot ``` 22 ```#' plot(k) ``` 23 ```#' ``` 24 ```#' # stacked bar plot ``` 25 ```#' plot(k, stacked = TRUE) ``` 26 ```#' ``` 27 ```#' # proportional bar plot ``` 28 ```#' plot(k, proportional = TRUE) ``` 29 ```#' ``` 30 ```#' # returns tibble ``` 31 ```#' ds_twoway_table(mtcarz, cyl, gear) ``` 32 ```#' ``` 33 ```#' @export ``` 34 ```#' ``` 35 1 ```ds_cross_table <- function(data, var_1, var_2) UseMethod("ds_cross_table") ``` 36 37 ```#' @export ``` 38 ```ds_cross_table.default <- function(data, var_1, var_2) { ``` 39 40 1 ``` check_df(data) ``` 41 1 ``` var1_name <- deparse(substitute(var_1)) ``` 42 1 ``` var2_name <- deparse(substitute(var_2)) ``` 43 1 ``` var_1 <- rlang::enquo(var_1) ``` 44 1 ``` var_2 <- rlang::enquo(var_2) ``` 45 1 ``` check_factor(data, !! var_1, var1_name) ``` 46 1 ``` check_factor(data, !! var_2, var2_name) ``` 47 48 1 ``` var_names <- ``` 49 1 ``` data %>% ``` 50 1 ``` dplyr::select(!! var_1, !! var_2) %>% ``` 51 1 ``` names() ``` 52 53 1 ``` varone <- dplyr::pull(data, !! var_1) ``` 54 1 ``` vartwo <- dplyr::pull(data, !! var_2) ``` 55 1 ``` row_name <- get_names(varone) ``` 56 1 ``` col_name <- get_names(vartwo) ``` 57 58 1 ``` x <- as.matrix(table(varone, vartwo)) ``` 59 1 ``` rownames(x) <- NULL ``` 60 61 1 ``` n <- sum(x) ``` 62 1 ``` per_mat <- round(x / n, 3) ``` 63 1 ``` row_pct <- apply(per_mat, 1, sum) ``` 64 1 ``` col_pct <- apply(per_mat, 2, sum) ``` 65 1 ``` rowtotal <- apply(x, 1, sum) ``` 66 1 ``` coltotal <- apply(x, 2, sum) ``` 67 1 ``` finalmat <- prep_per_mat(per_mat, row_pct, col_pct) ``` 68 1 ``` rcent <- prep_rcent(x, rowtotal, row_pct) ``` 69 1 ``` ccent <- prep_ccent(x, coltotal) ``` 70 1 ``` finaltab <- prep_table(x, rowtotal, row_name) ``` 71 72 1 ``` utility <- list(obs = n, ``` 73 1 ``` var2_levels = col_name, ``` 74 1 ``` var1_levels = row_name, ``` 75 1 ``` varnames = var_names, ``` 76 1 ``` twowaytable = finaltab, ``` 77 1 ``` percent_table = finalmat, ``` 78 1 ``` row_percent = rcent, ``` 79 1 ``` column_percent = ccent, ``` 80 1 ``` column_totals = coltotal, ``` 81 1 ``` percent_column = col_pct, ``` 82 1 ``` data = data) ``` 83 84 1 ``` ftab <- table(varone, vartwo) ``` 85 1 ``` names(dimnames(ftab)) <- c(var_names[1], var_names[2]) ``` 86 87 1 ``` result <- list(ftable = ftab, ``` 88 1 ``` utility = utility) ``` 89 90 1 ``` class(result) <- "ds_cross_table" ``` 91 1 ``` return(result) ``` 92 ```} ``` 93 94 ```#' @export ``` 95 ```print.ds_cross_table <- function(x, ...) { ``` 96 0 ``` print_cross(x) ``` 97 ```} ``` 98 99 ```#' @export ``` 100 ```#' @rdname ds_cross_table ``` 101 ```#' ``` 102 ```plot.ds_cross_table <- function(x, stacked = FALSE, proportional = FALSE, ``` 103 ``` print_plot = TRUE,...) { ``` 104 105 1 ``` x_lab <- ``` 106 1 ``` x %>% ``` 107 1 ``` use_series(utility) %>% ``` 108 1 ``` use_series(varnames) %>% ``` 109 1 ``` extract(1) ``` 110 111 1 ``` y_lab <- ``` 112 1 ``` x %>% ``` 113 1 ``` use_series(utility) %>% ``` 114 1 ``` use_series(varnames) %>% ``` 115 1 ``` extract(2) ``` 116 117 1 ``` k <- string_to_name(x) ``` 118 1 ``` j <- string_to_name(x, 2) ``` 119 120 1 ``` if (proportional) { ``` 121 1 ``` p <- ``` 122 1 ``` x %>% ``` 123 1 ``` use_series(utility) %>% ``` 124 1 ``` use_series(data) %>% ``` 125 1 ``` dplyr::select(x = !! k, y = !! j) %>% ``` 126 1 ``` table() %>% ``` 127 1 ``` tibble::as_tibble() %>% ``` 128 1 ``` ggplot(aes(x = x, y = n, fill = y)) + ``` 129 1 ``` geom_bar(stat = "identity", position = "fill") + ``` 130 1 ``` scale_y_continuous(labels = scales::percent_format()) + ``` 131 1 ``` xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) + ``` 132 1 ``` labs(fill = y_lab) ``` 133 ``` } else { ``` 134 1 ``` if (stacked) { ``` 135 1 ``` p <- ``` 136 1 ``` x %>% ``` 137 1 ``` use_series(utility) %>% ``` 138 1 ``` use_series(data) %>% ``` 139 1 ``` dplyr::select(x = !! k, y = !! j) %>% ``` 140 1 ``` ggplot() + ``` 141 1 ``` geom_bar(aes(x, fill = y), position = "stack") + ``` 142 1 ``` xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) + ``` 143 1 ``` labs(fill = y_lab) ``` 144 ``` } else { ``` 145 1 ``` p <- ``` 146 1 ``` x %>% ``` 147 1 ``` use_series(utility) %>% ``` 148 1 ``` use_series(data) %>% ``` 149 1 ``` dplyr::select(x = !! k, y = !! j) %>% ``` 150 1 ``` ggplot() + ``` 151 1 ``` geom_bar(aes(x, fill = y), position = "dodge") + ``` 152 1 ``` xlab(x_lab) + ggtitle(paste(x_lab, "vs", y_lab)) + ``` 153 1 ``` labs(fill = y_lab) ``` 154 ``` } ``` 155 ``` } ``` 156 157 1 ``` if (print_plot) { ``` 158 1 ``` print(p) ``` 159 ``` } else { ``` 160 0 ``` return(p) ``` 161 ``` } ``` 162 163 ```} ``` 164 165 ```#' @rdname ds_cross_table ``` 166 ```#' @export ``` 167 ```#' ``` 168 ```ds_twoway_table <- function(data, var_1, var_2) { ``` 169 170 1 ``` check_df(data) ``` 171 1 ``` var1_name <- deparse(substitute(var_1)) ``` 172 1 ``` var2_name <- deparse(substitute(var_2)) ``` 173 174 1 ``` var_1 <- rlang::enquo(var_1) ``` 175 1 ``` var_2 <- rlang::enquo(var_2) ``` 176 1 ``` check_factor(data, !! var_1, var1_name) ``` 177 1 ``` check_factor(data, !! var_2, var2_name) ``` 178 179 1 ``` group <- ``` 180 1 ``` data %>% ``` 181 1 ``` dplyr::select(!! var_1, !! var_2) %>% ``` 182 1 ``` na.omit() %>% ``` 183 1 ``` dplyr::group_by(!! var_1, !! var_2) %>% ``` 184 1 ``` dplyr::summarise(count = dplyr::n()) ``` 185 186 1 ``` total <- ``` 187 1 ``` group %>% ``` 188 1 ``` dplyr::pull(count) %>% ``` 189 1 ``` sum() ``` 190 191 1 ``` div_by <- ``` 192 1 ``` data %>% ``` 193 1 ``` dplyr::group_by(!! var_2) %>% ``` 194 1 ``` na.omit() %>% ``` 195 1 ``` dplyr::tally() %>% ``` 196 1 ``` dplyr::pull(n) ``` 197 198 199 1 ``` group2 <- ``` 200 1 ``` data %>% ``` 201 1 ``` dplyr::select(!! var_1, !! var_2) %>% ``` 202 1 ``` na.omit() %>% ``` 203 1 ``` dplyr::group_by(!! var_2, !! var_1) %>% ``` 204 1 ``` dplyr::summarise(count = dplyr::n()) %>% ``` 205 1 ``` dplyr::mutate( ``` 206 1 ``` col_percent = count / sum(count) ``` 207 ``` ) %>% ``` 208 1 ``` dplyr::ungroup() ``` 209 210 1 ``` group %<>% ``` 211 1 ``` dplyr::mutate( ``` 212 1 ``` percent = count / total, ``` 213 1 ``` row_percent = count / sum(count) ``` 214 ``` ) %>% ``` 215 1 ``` dplyr::ungroup() ``` 216 217 1 ``` result <- dplyr::inner_join(group, group2) ``` 218 1 ``` return(result) ``` 219 220 ```} ``` 221 222 ```get_names <- function(x) { ``` 223 224 1 ``` if (is.factor(x)) { ``` 225 1 ``` varname <- levels(x) ``` 226 ``` } else { ``` 227 0 ``` varname <- unique(sort(x)) ``` 228 ``` } ``` 229 230 1 ``` return(varname) ``` 231 232 ```} ``` 233 234 ```prep_table <- function(x, rowtotal, row_name) { ``` 235 236 1 ``` x1 <- cbind(x, rowtotal) ``` 237 1 ``` cbind(unname(row_name), x1) ``` 238 239 ```} ``` 240 241 ```prep_per_mat <- function(per_mat, row_pct, col_pct) { ``` 242 243 1 ``` per_mat_1 <- cbind(per_mat, row_pct) ``` 244 1 ``` per_mat_2 <- suppressWarnings(rbind(per_mat_1, col_pct)) ``` 245 1 ``` d <- dim(per_mat_2) ``` 246 1 ``` per_mat_2[d[1], d[2]] <- 1 ``` 247 1 ``` return(per_mat_2) ``` 248 249 ```} ``` 250 251 ```prep_rcent <- function(x, rowtotal, row_pct) { ``` 252 253 1 ``` rcent_1 <- row_pct(x, rowtotal) ``` 254 1 ``` rcent_2 <- cbind(rcent_1, row_pct) ``` 255 1 ``` apply(rcent_2, c(1, 2), rounda) ``` 256 257 ```} ``` 258 259 ```prep_ccent <- function(x, coltotal) { ``` 260 261 1 ``` ccent_1 <- col_pct(x, coltotal) ``` 262 1 ``` apply(ccent_1, c(1, 2), rounda) ``` 263 264 ```} ```