rsquaredacademy / descriptr
1
#' Generate scatter plots
2
#'
3
#' Creates scatter plots if the data has continuous variables.
4
#'
5
#' @param data A \code{data.frame} or \code{tibble}.
6
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
7
#' @param ... Column(s) in \code{data}.
8
#'
9
#' @examples
10
#' # plot select variables
11
#' ds_plot_scatter(mtcarz, mpg, disp)
12
#'
13
#' # plot all variables
14
#' ds_plot_scatter(mtcarz)
15
#'
16
#' @importFrom rlang sym
17
#' @importFrom utils combn
18
#'
19
#' @export
20
#'
21
ds_plot_scatter <- function(data, ..., print_plot = TRUE) {
22

23 1
  check_df(data)
24 1
  var <- rlang::quos(...)
25 1
  is_num <- sapply(data, is.numeric)
26

27 1
  if (length(var) < 1) {
28 1
    if (!any(is_num == TRUE)) {
29 1
      stop("Data has no continuous variables.", call. = FALSE)
30
    }
31 0
    plot_data <- data[is_num]
32
  } else {
33 1
    data %<>%
34 1
      dplyr::select(!!! var)
35 1
    is_num <- sapply(data, is.numeric)
36 1
    if (!any(is_num == TRUE)) {
37 1
      stop("Data has no continuous variables.", call. = FALSE)
38
    }
39 1
    if (length(is_num) < 2) {
40 1
      stop("Scatter plot requires 2 continuous variables.", call. = FALSE)
41
    } else {
42 1
      plot_data <- data[is_num]
43
    }
44
  }
45

46 1
  if (ncol(plot_data) < 1) {
47 0
    stop("Data has no continuous variables.", call. = FALSE)
48
  }
49

50 1
  num_var   <- names(plot_data)
51 1
  num_start <- combn(num_var, 2)
52 1
  num_num   <- cbind(num_start, rbind(num_start[2, ], num_start[1, ]))
53 1
  myplots   <- list()
54 1
  n         <- dim(num_num)[2]
55

56 1
  for (i in seq_len(n)) {
57 1
    x <- num_num[, i][1]
58 1
    y <- num_num[, i][2]
59 1
    p <-
60 1
      ggplot(data = plot_data) +
61 1
      geom_point(aes(x = !! rlang::sym(x), y = !! rlang::sym(y)))
62 1
    myplots[[i]] <- p
63 1
    names(myplots)[[i]] <- paste(y, "v", x)
64
  }
65

66 1
  if (print_plot) {
67 0
    check_suggests('gridExtra')
68 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
69
  } else {
70 1
    return(myplots)
71
  }
72

73
}
74

75
#' Generate histograms
76
#'
77
#' Creates histograms if the data has continuous variables.
78
#'
79
#' @param data A \code{data.frame} or \code{tibble}.
80
#' @param ... Column(s) in \code{data}.
81
#' @param bins Number of bins in the histogram.
82
#' @param fill Color of the histogram.
83
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
84
#'
85
#' @examples
86
#' # plot single variable
87
#' ds_plot_histogram(mtcarz, mpg)
88
#'
89
#' # plot multiple variables
90
#' ds_plot_histogram(mtcarz, mpg, disp, hp)
91
#'
92
#' # plot all variables
93
#' ds_plot_histogram(mtcarz)
94
#'
95
#' @export
96
#'
97
ds_plot_histogram <- function(data, ..., bins = 5, fill = 'blue',
98
                              print_plot = TRUE) {
99

100 1
  check_df(data)
101 1
  var <- rlang::quos(...)
102 1
  is_num <- sapply(data, is.numeric)
103

104 1
  if (length(var) < 1) {
105 1
    if (!any(is_num == TRUE)) {
106 1
      stop("Data has no continuous variables.", call. = FALSE)
107
    }
108 0
    plot_data <- data[is_num]
109
  } else {
110 1
    data %<>%
111 1
      dplyr::select(!!! var)
112 1
    is_num <- sapply(data, is.numeric)
113 1
    if (!any(is_num == TRUE)) {
114 1
      stop("Data has no continuous variables.", call. = FALSE)
115
    }
116 1
    plot_data <- data[is_num]
117
  }
118

119 1
  if (ncol(plot_data) < 1) {
120 0
    stop("Data has no continuous variables.", call. = FALSE)
121
  }
122

123 1
  num_var   <- names(plot_data)
124 1
  myplots   <- list()
125 1
  n         <- length(num_var)
126

127 1
  for (i in seq_len(n)) {
128 1
    x <- num_var[i]
129 1
    p <-
130 1
      ggplot(data = plot_data) +
131 1
      geom_histogram(aes(x = !! rlang::sym(x)), bins = bins,
132 1
        fill = fill)
133 1
    myplots[[i]] <- p
134 1
    names(myplots)[[i]] <- x
135
  }
136

137 1
  if (print_plot) {
138 0
    check_suggests('gridExtra')
139 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
140
  } else {
141 1
    return(myplots)
142
  }
143

144
}
145

146

147
#' Generate density plots
148
#'
149
#' Creates density plots if the data has continuous variables.
150
#'
151
#' @param data A \code{data.frame} or \code{tibble}.
152
#' @param ... Column(s) in \code{data}.
153
#' @param color Color of the plot.
154
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
155
#'
156
#' @examples
157
#' # plot single variable
158
#' ds_plot_density(mtcarz, mpg)
159
#'
160
#' # plot multiple variables
161
#' ds_plot_density(mtcarz, mpg, disp, hp)
162
#'
163
#' # plot all variables
164
#' ds_plot_density(mtcarz)
165
#'
166
#' @export
167
#'
168
ds_plot_density <- function(data, ..., color = 'blue', print_plot = TRUE) {
169

170 1
  check_df(data)
171 1
  var <- rlang::quos(...)
172 1
  is_num <- sapply(data, is.numeric)
173

174 1
  if (length(var) < 1) {
175 1
    if (!any(is_num == TRUE)) {
176 1
      stop("Data has no continuous variables.", call. = FALSE)
177
    }
178 0
    plot_data <- data[is_num]
179
  } else {
180 1
    data %<>%
181 1
      dplyr::select(!!! var)
182 1
    is_num <- sapply(data, is.numeric)
183 1
    if (!any(is_num == TRUE)) {
184 1
      stop("Data has no continuous variables.", call. = FALSE)
185
    }
186 0
    plot_data <- data[is_num]
187
  }
188

189 0
  if (ncol(plot_data) < 1) {
190 0
    stop("Data has no continuous variables.", call. = FALSE)
191
  }
192

193 0
  num_var   <- names(plot_data)
194 0
  myplots   <- list()
195 0
  n         <- length(num_var)
196

197 0
  for (i in seq_len(n)) {
198 0
    x <- num_var[i]
199 0
    p <-
200 0
      ggplot(data = plot_data) +
201 0
      geom_density(aes(x = !! rlang::sym(x)), color = color)
202 0
    myplots[[i]] <- p
203 0
    names(myplots)[[i]] <- x
204
  }
205

206 0
  if (print_plot) {
207 0
    check_suggests('gridExtra')
208 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
209
  } else {
210 0
    return(myplots)
211
  }
212

213
}
214

215
#' Generate bar plots
216
#'
217
#' Creates bar plots if the data has categorical variables.
218
#'
219
#' @param data A \code{data.frame} or \code{tibble}.
220
#' @param ... Column(s) in \code{data}.
221
#' @param fill Color of the bars.
222
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
223
#'
224
#' @examples
225
#' # plot single variable
226
#' ds_plot_bar(mtcarz, cyl)
227
#'
228
#' # plot multiple variables
229
#' ds_plot_bar(mtcarz, cyl, gear)
230
#'
231
#' # plot all variables
232
#' ds_plot_bar(mtcarz)
233
#'
234
#' @export
235
#'
236
ds_plot_bar <- function(data, ..., fill = 'blue', print_plot = TRUE) {
237

238 1
  check_df(data)
239 1
  var <- rlang::quos(...)
240 1
  is_factor <- sapply(data, is.factor)
241

242 1
  if (length(var) < 1) {
243 1
    if (!any(is_factor == TRUE)) {
244 1
      stop("Data has no categorical variables.", call. = FALSE)
245
    }
246 0
    plot_data <- data[is_factor]
247
  } else {
248 1
    data %<>%
249 1
      dplyr::select(!!! var)
250 1
    is_factor <- sapply(data, is.factor)
251 1
    if (!any(is_factor == TRUE)) {
252 1
      stop("Data has no categorical variables.", call. = FALSE)
253
    }
254 1
    plot_data <- data[is_factor]
255
  }
256

257 1
  if (ncol(plot_data) < 1) {
258 0
    stop("Data has no categorical variables.", call. = FALSE)
259
  }
260

261 1
  factor_var <- names(plot_data)
262 1
  myplots    <- list()
263 1
  n          <- length(factor_var)
264

265 1
  for (i in seq_len(n)) {
266 1
    x <- factor_var[i]
267 1
    p <-
268 1
      ggplot(data = plot_data) +
269 1
      geom_bar(aes(x = !! rlang::sym(x)), fill = fill)
270 1
    myplots[[i]] <- p
271 1
    names(myplots)[[i]] <- x
272
  }
273

274 1
  if (print_plot) {
275 0
    check_suggests('gridExtra')
276 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
277
  } else {
278 1
    return(myplots)
279
  }
280

281
}
282

283

284
#' Generate box plots
285
#'
286
#' Creates box plots if the data has continuous variables.
287
#'
288
#' @param data A \code{data.frame} or \code{tibble}.
289
#' @param ... Column(s) in \code{data}.
290
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
291
#'
292
#' @examples
293
#' # plot single variable
294
#' ds_plot_box_single(mtcarz, mpg)
295
#'
296
#' # plot multiple variables
297
#' ds_plot_box_single(mtcarz, mpg, disp, hp)
298
#'
299
#' # plot all variables
300
#' ds_plot_box_single(mtcarz)
301
#'
302
#' @export
303
#'
304
ds_plot_box_single <- function(data, ..., print_plot = TRUE) {
305

306 1
  check_df(data)
307 1
  var <- rlang::quos(...)
308 1
  is_num <- sapply(data, is.numeric)
309

310 1
  if (length(var) < 1) {
311 1
    if (!any(is_num == TRUE)) {
312 1
      stop("Data has no continuous variables.", call. = FALSE)
313
    }
314 0
    plot_data <- data[is_num]
315
  } else {
316 1
    data %<>%
317 1
      dplyr::select(!!! var)
318 1
    is_num <- sapply(data, is.numeric)
319 1
    if (!any(is_num == TRUE)) {
320 1
      stop("Data has no continuous variables.", call. = FALSE)
321
    }
322 1
    plot_data <- data[is_num]
323
  }
324

325 1
  if (ncol(plot_data) < 1) {
326 0
    stop("Data has no continuous variables.", call. = FALSE)
327
  }
328

329 1
  num_var   <- names(plot_data)
330 1
  myplots   <- list()
331 1
  n         <- length(num_var)
332

333 1
  for (i in seq_len(n)) {
334 1
    x <- num_var[i]
335 1
    p <-
336 1
      ggplot(data = plot_data) +
337 1
      geom_boxplot(aes(x = factor(1), y = !! rlang::sym(x))) +
338 1
      labs(x = ' ')
339 1
    myplots[[i]] <- p
340 1
    names(myplots)[[i]] <- x
341
  }
342

343 1
  if (print_plot) {
344 0
    check_suggests('gridExtra')
345 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
346
  } else {
347 1
    return(myplots)
348
  }
349

350
}
351

352

353
#' Generate stacked bar plots
354
#'
355
#' Creates stacked bar plots if the data has categorical variables.
356
#'
357
#' @param data A \code{data.frame} or \code{tibble}.
358
#' @param ... Column(s) in \code{data}.
359
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
360
#'
361
#' @examples
362
#' # subset data
363
#' mt <- dplyr::select(mtcarz, cyl, gear, am)
364
#'
365
#' # stacked bar plot
366
#' ds_plot_bar_stacked(mtcarz, cyl, gear)
367
#'
368
#' # plot all variables
369
#' ds_plot_bar_stacked(mt)
370
#'
371
#' @export
372
#'
373
ds_plot_bar_stacked <- function(data, ..., print_plot = TRUE) {
374

375 1
  check_df(data)
376 1
  var <- rlang::quos(...)
377 1
  is_factor <- sapply(data, is.factor)
378

379 1
  if (length(var) < 1) {
380 1
    if (!any(is_factor == TRUE)) {
381 1
      stop("Data has no categorical variables.", call. = FALSE)
382
    }
383 0
    plot_data <- data[is_factor]
384
  } else {
385 1
    data %<>%
386 1
      dplyr::select(!!! var)
387 1
    is_factor <- sapply(data, is.factor)
388 1
    if (!any(is_factor == TRUE)) {
389 1
      stop("Data has no categorical variables.", call. = FALSE)
390
    }
391 1
    if (length(is_factor) < 2) {
392 1
      stop("Stacked bar plot requires 2 categorical variables.", call. = FALSE)
393
    } else {
394 1
      plot_data <- data[is_factor]
395
    }
396
  }
397

398 1
  if (ncol(plot_data) < 1) {
399 0
    stop("Data has no categorical variables.", call. = FALSE)
400
  }
401

402 1
  factor_var    <- names(plot_data)
403 1
  factor_start  <- combn(factor_var, 2)
404 1
  fact_fact     <- cbind(factor_start, rbind(factor_start[2, ], factor_start[1, ]))
405 1
  myplots       <- list()
406 1
  n             <- dim(fact_fact)[2]
407

408 1
  for (i in seq_len(n)) {
409 1
    x <- fact_fact[, i][1]
410 1
    y <- fact_fact[, i][2]
411 1
    p <-
412 1
      ggplot(data = plot_data) +
413 1
      geom_bar(aes(x = !! rlang::sym(x), fill = !! rlang::sym(y)))
414 1
    myplots[[i]] <- p
415 1
    names(myplots)[[i]] <- paste(y, "v", x)
416
  }
417

418 1
  if (print_plot) {
419 0
    check_suggests('gridExtra')
420 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
421
  } else {
422 1
    return(myplots)
423
  }
424

425
}
426

427
#' Generate grouped bar plots
428
#'
429
#' Creates grouped bar plots if the data has categorical variables.
430
#'
431
#' @param data A \code{data.frame} or \code{tibble}.
432
#' @param ... Column(s) in \code{data}.
433
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
434
#'
435
#' @examples
436
#' # subset data
437
#' mt <- dplyr::select(mtcarz, cyl, gear, am)
438
#'
439
#' # grouped bar plot
440
#' ds_plot_bar_grouped(mtcarz, cyl, gear)
441
#'
442
#' # plot all variables
443
#' ds_plot_bar_grouped(mt)
444
#'
445
#' @export
446
#'
447
ds_plot_bar_grouped <- function(data, ..., print_plot = TRUE) {
448

449 1
  check_df(data)
450 1
  var <- rlang::quos(...)
451 1
  is_factor <- sapply(data, is.factor)
452

453 1
  if (length(var) < 1) {
454 1
    if (!any(is_factor == TRUE)) {
455 1
      stop("Data has no categorical variables.", call. = FALSE)
456
    }
457 0
    plot_data <- data[is_factor]
458
  } else {
459 1
    data %<>%
460 1
      dplyr::select(!!! var)
461 1
    is_factor <- sapply(data, is.factor)
462 1
    if (!any(is_factor == TRUE)) {
463 1
      stop("Data has no categorical variables.", call. = FALSE)
464
    }
465 1
    if (length(is_factor) < 2) {
466 1
      stop("Grouped bar plot requires 2 categorical variables.", call. = FALSE)
467
    } else {
468 1
      plot_data <- data[is_factor]
469
    }
470
  }
471

472 1
  if (ncol(plot_data) < 1) {
473 0
    stop("Data has no categorical variables.", call. = FALSE)
474
  }
475

476 1
  factor_var    <- names(plot_data)
477 1
  factor_start  <- combn(factor_var, 2)
478 1
  fact_fact     <- cbind(factor_start, rbind(factor_start[2, ], factor_start[1, ]))
479 1
  myplots       <- list()
480 1
  n             <- dim(fact_fact)[2]
481

482 1
  for (i in seq_len(n)) {
483 1
    x <- fact_fact[, i][1]
484 1
    y <- fact_fact[, i][2]
485 1
    p <-
486 1
      ggplot(data = plot_data) +
487 1
      geom_bar(aes(x = !! rlang::sym(x), fill = !! rlang::sym(y)),
488 1
        position = 'dodge')
489 1
    myplots[[i]] <- p
490 1
    names(myplots)[[i]] <- paste(y, "v", x)
491
  }
492

493 1
  if (print_plot) {
494 0
    check_suggests('gridExtra')
495 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
496
  } else {
497 1
    return(myplots)
498
  }
499

500
}
501

502

503
#' Compare distributions
504
#'
505
#' Creates box plots if the data has both categorical & continuous variables.
506
#'
507
#' @param data A \code{data.frame} or \code{tibble}.
508
#' @param ... Column(s) in \code{data}.
509
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
510
#'
511
#' @examples
512
#' # subset data
513
#' mt <- dplyr::select(mtcarz, cyl, disp, mpg)
514
#'
515
#' # plot select variables
516
#' ds_plot_box_group(mtcarz, cyl, gear, mpg)
517
#'
518
#' # plot all variables
519
#' ds_plot_box_group(mt)
520
#'
521
#' @export
522
#'
523
ds_plot_box_group <- function(data, ..., print_plot = TRUE) {
524

525 1
  check_df(data)
526 1
  var <- rlang::quos(...)
527

528 1
  is_num    <- sapply(data, is.numeric)
529 1
  is_factor <- sapply(data, is.factor)
530

531 1
  if (length(var) < 1) {
532 1
    if (!any(is_factor == TRUE)) {
533 1
      stop("Data has no categorical variables.", call. = FALSE)
534
    }
535 1
    if (!any(is_num == TRUE)) {
536 1
      stop("Data has no continuous variables.", call. = FALSE)
537
    }
538 0
    plot_data  <- cbind(data[is_factor] , data[is_num] )
539
  } else {
540 1
    data %<>%
541 1
      dplyr::select(!!! var)
542 1
    is_num    <- sapply(data, is.numeric)
543 1
    if (!any(is_num == TRUE)) {
544 1
      stop("Data has no continuous variables.", call. = FALSE)
545
    }
546 1
    is_factor <- sapply(data, is.factor)
547 1
    if (!any(is_factor == TRUE)) {
548 1
      stop("Data has no categorical variables.", call. = FALSE)
549
    }
550 1
    plot_data <- cbind(data[is_factor], data[is_num])
551
  }
552

553 1
  if (ncol(data) < 1) {
554 0
    stop("Data should include at least one categorical and one continuous variable.", call. = FALSE)
555
  }
556

557 1
  is_num    <- sapply(plot_data, is.numeric)
558 1
  is_factor <- sapply(plot_data, is.factor)
559 1
  num_data  <- plot_data[is_num]
560 1
  fact_data <- plot_data[is_factor]
561 1
  fact_var  <- names(fact_data)
562 1
  num_var   <- names(num_data)
563 1
  combs     <- expand.grid(fact_var, num_var)
564 1
  myplots   <- list()
565 1
  n         <- nrow(combs)
566

567 1
  for (i in seq_len(n)) {
568 1
    x <- as.character(combs[i, 1])
569 1
    y <- as.character(combs[i, 2])
570 1
    p <-
571 1
      ggplot(data = plot_data) +
572 1
      geom_boxplot(aes(x = !! rlang::sym(x), y = !! rlang::sym(y)))
573 1
    myplots[[i]] <- p
574 1
    names(myplots)[[i]] <- paste(y, "v", x)
575
  }
576

577 1
  if (print_plot) {
578 0
    check_suggests('gridExtra')
579 0
    gridExtra::marrangeGrob(myplots, nrow = 2, ncol = 2)
580
  } else {
581 1
    return(myplots)
582
  }
583

584
}

Read our documentation on viewing source code .

Loading