rsquaredacademy / inferr
Showing 17 of 52 files from the diff.

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom stats qnorm
1 2
#' @importFrom magrittr %>%
2 3
#' @title McNemar Test
3 4
#' @description Test if the proportions of two dichotomous variables are
@@ -33,9 +34,11 @@
Loading
33 34
#' @seealso \code{\link[stats]{mcnemar.test}}
34 35
#' @examples
35 36
#' # using variables from data
36 -
#' hb <- hsb
37 -
#' hb$himath <- ifelse(hsb$math > 60, 1, 0)
38 -
#' hb$hiread <- ifelse(hsb$read > 60, 1, 0)
37 +
#' library(dplyr)
38 +
#' hb <- mutate(hsb,
39 +
#'         himath = if_else(math > 60, 1, 0),
40 +
#'         hiread = if_else(read > 60, 1, 0)
41 +
#'     )
39 42
#' infer_mcnemar_test(hb, himath, hiread)
40 43
#'
41 44
#' # test if the proportion of students in himath and hiread group is same
@@ -52,185 +55,42 @@
Loading
52 55
#' @export
53 56
#'
54 57
infer_mcnemar_test.default <- function(data, x = NULL, y = NULL) {
55 -
56 58
  if (is.matrix(data) | is.table(data)) {
57 59
    dat <- mcdata(data)
58 60
  } else {
61 +
    x1 <- enquo(x)
62 +
    y1 <- enquo(y)
59 63
60 -
    x1  <- deparse(substitute(x))
61 -
    y1  <- deparse(substitute(y))
62 -
    dat <- table(data[c(x1, y1)])
63 -
64 +
    dat <-
65 +
      data %>%
66 +
      select(!! x1, !! y1) %>%
67 +
      table()
64 68
  }
65 69
66 70
  k <- mccomp(dat)
67 71
68 -
  result <-
69 -
    list(cases     = k$cases,
70 -
         controls  = k$controls,
71 -
         cpvalue   = k$cpvalue,
72 -
         cstat     = k$cstat,
73 -
         df        = k$df,
74 -
         exactp    = k$exactp,
75 -
         kappa     = k$kappa,
76 -
         kappa_cil = k$kappa_cil,
77 -
         kappa_ciu = k$kappa_ciu,
78 -
         odratio   = k$odratio,
79 -
         pvalue    = k$pvalue,
80 -
         ratio     = k$ratio,
81 -
         statistic = k$statistic,
82 -
         std_err   = k$std_err,
83 -
         tbl       = dat)
72 +
  result <- list(
73 +
    statistic = k$statistic, df = k$df, pvalue = k$pvalue,
74 +
    exactp = k$exactp, cstat = k$cstat, cpvalue = k$cpvalue, kappa = k$kappa,
75 +
    std_err = k$std_err, kappa_cil = k$kappa_cil, kappa_ciu = k$kappa_ciu,
76 +
    cases = k$cases, controls = k$controls, ratio = k$ratio,
77 +
    odratio = k$odratio, tbl = dat
78 +
  )
84 79
85 80
  class(result) <- "infer_mcnemar_test"
86 81
  return(result)
87 82
}
88 83
89 84
#' @export
85 +
#' @rdname infer_mcnemar_test
86 +
#' @usage NULL
90 87
#'
91 -
print.infer_mcnemar_test <- function(x, ...) {
92 -
  print_mcnemar_test(x)
93 -
}
94 -
95 -
mcdata <- function(x, y) {
96 -
  if (!is.matrix(x)) {
97 -
    stop("x must be either a table or a matrix")
98 -
  }
99 -
100 -
  if (is.matrix(x)) {
101 -
    if (length(x) != 4) {
102 -
      stop("x must be a 2 x 2 matrix")
103 -
    }
104 -
  }
105 -
106 -
  dat <- x
107 -
  return(dat)
108 -
}
109 -
110 -
111 -
mctestp <- function(dat) {
112 -
  retrieve <- matrix(c(1, 2, 2, 1), nrow = 2)
113 -
  dat[retrieve]
114 -
}
115 -
116 -
tetat <- function(p) {
117 -
  ((p[1] - p[2]) ^ 2) / sum(p)
118 -
}
119 -
120 -
mcpval <- function(test_stat, df) {
121 -
  1 - stats::pchisq(test_stat, df)
122 -
}
123 -
124 -
mcpex <- function(dat) {
125 -
  2 * min(stats::pbinom(dat[2], sum(dat[2], dat[3]), 0.5), stats::pbinom(dat[3], sum(dat[2], dat[3]), 0.5))
126 -
}
127 -
128 -
mcstat <- function(p) {
129 -
  ((abs(p[1] - p[2]) - 1) ^ 2) / sum(p)
130 -
}
131 -
132 -
mccpval <- function(cstat, df) {
133 -
  1 - stats::pchisq(cstat, df)
134 -
}
135 -
136 -
mckappa <- function(dat) {
137 -
138 -
  agreement <- sum(diag(dat)) / sum(dat)
139 -
  expected  <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2)
140 -
  (agreement - expected) / (1 - expected)
141 -
142 -
}
143 -
144 -
mcserr <- function(dat, kappa) {
145 -
  expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2)
146 -
  serr(dat, kappa, expected)
88 +
mcnemar_test <- function(x, y = NULL) {
89 +
  .Deprecated("infer_mcnemar_test()")
147 90
}
148 91
149 -
mcconf <- function(std_err, kappa) {
150 -
151 -
  alpha    <- 0.05
152 -
  interval <- stats::qnorm(1 - (alpha / 2)) * std_err
153 -
  ci_lower <- kappa - interval
154 -
  ci_upper <- kappa + interval
155 -
156 -
  list(ci_lower = ci_lower, ci_upper = ci_upper)
157 -
158 -
}
159 -
160 -
prop_fact <- function(dat, p) {
161 -
162 -
  dat_per    <- dat / sum(dat)
163 -
  row_sum    <- rowSums(dat_per)
164 -
  col_sum    <- colSums(dat_per)
165 -
  controls   <- 1 - col_sum[2]
166 -
  cases      <- 1 - row_sum[2]
167 -
  ratio      <- cases / controls
168 -
  odds_ratio <- p[1] / p[2]
169 -
170 -
  list(cases      = cases,
171 -
       controls   = controls,
172 -
       odds_ratio = odds_ratio,
173 -
       ratio      = ratio
174 -
175 -
  )
176 -
177 -
}
178 -
179 -
serr <- function(dat, kappa, expected) {
180 -
181 -
  dat_per    <- dat / sum(dat)
182 -
  row_sum    <- rowSums(dat_per)
183 -
  row_sum[3] <- sum(row_sum)
184 -
  col_sum    <- colSums(dat_per)
185 -
  dat_per    <- rbind(dat_per, col_sum)
186 -
  dat_per    <- cbind(dat_per, row_sum)
187 -
  d1         <- dim(dat_per)
188 -
189 -
  dat_per[d1[1], d1[2]] <- 1.0
190 -
  diagonal <- diag(dat_per)
191 -
192 -
  a <- diagonal[1] * (1 - (row_sum[1] + col_sum[1]) * (1 - kappa)) ^ 2 +
193 -
    diagonal[2] * (1 - (row_sum[2] + col_sum[2]) * (1 - kappa)) ^ 2
194 -
195 -
  x1 <- dat_per[lower.tri(dat_per)][1]
196 -
  x2 <- dat_per[upper.tri(dat_per)][1]
197 -
198 -
  b <- ((1 - kappa) ^ 2) * ((x1 * (row_sum[1] + col_sum[2]) ^ 2) +
199 -
    (x2 * (row_sum[2] + col_sum[1]) ^ 2))
200 -
201 -
  c <- ((kappa) - expected * (1 - kappa)) ^ 2
202 -
  variance <- ((a + b - c) / ((1 - expected) ^ 2)) / sum(dat)
203 -
204 -
  sqrt(variance)
205 -
}
206 -
207 -
mccomp <- function(dat) {
208 -
209 -
  p         <- mctestp(dat)
210 -
  test_stat <- tetat(p)
211 -
  df        <- nrow(dat) - 1
212 -
  pvalue    <- mcpval(test_stat, df)
213 -
  exactp    <- mcpex(dat)
214 -
  cstat     <- mcstat(p)
215 -
  cpvalue   <- mccpval(cstat, df)
216 -
  kappa     <- mckappa(dat)
217 -
  std_err   <- mcserr(dat, kappa)
218 -
  clu       <- mcconf(std_err, kappa)
219 -
  k         <- prop_fact(dat, p)
220 -
221 -
  list(cases     = round(k$cases, 4),
222 -
       controls  = round(k$controls, 4),
223 -
       cpvalue   = cpvalue,
224 -
       cstat     = cstat,
225 -
       df        = df,
226 -
       exactp    = round(exactp, 4),
227 -
       kappa     = round(kappa, 4),
228 -
       kappa_cil = round(clu$ci_lower, 4),
229 -
       kappa_ciu = round(clu$ci_upper, 4),
230 -
       odratio   = round(k$odds_ratio, 4),
231 -
       pvalue    = round(pvalue, 4),
232 -
       ratio     = round(k$ratio, 4),
233 -
       statistic = round(test_stat, 4),
234 -
       std_err   = round(std_err, 4))
235 -
236 -
}
92 +
#' @export
93 +
#'
94 +
print.infer_mcnemar_test <- function(x, ...) {
95 +
  print_mcnemar_test(x)
96 +
}

@@ -1,3 +1,5 @@
Loading
1 +
#' @importFrom stats pchisq
2 +
#' @importFrom dplyr pull
1 3
#' @title Chi Square Test of Association
2 4
#' @description Chi Square test of association to examine if there is a
3 5
#' relationship between two categorical variables.
@@ -9,20 +11,19 @@
Loading
9 11
#' \code{"infer_chisq_assoc_test"} is a list containing the
10 12
#' following components:
11 13
#'
12 -
#' \item{chisquare}{chi square}
13 -
#' \item{chisquare_lr}{likelihood ratio chi square}
14 -
#' \item{chisquare_mantel_haenszel}{mantel haenszel chi square}
15 -
#' \item{chisquare_adjusted}{continuity adjusted chi square}
16 -
#' \item{contingency_coefficient}{contingency coefficient}
17 -
#' \item{cramers_v}{cramer's v}
18 -
#' \item{df}{degrees of freedom}
14 +
#' \item{chi}{chi square}
15 +
#' \item{chilr}{likelihood ratio chi square}
16 +
#' \item{chimh}{mantel haenszel chi square}
17 +
#' \item{chiy}{continuity adjusted chi square}
18 +
#' \item{sig}{p-value of chi square}
19 +
#' \item{siglr}{p-value of likelihood ratio chi square}
20 +
#' \item{sigmh}{p-value of mantel haenszel chi square}
21 +
#' \item{sigy}{p-value of continuity adjusted chi square}
22 +
#' \item{phi}{phi coefficient}
23 +
#' \item{cc}{contingency coefficient}
24 +
#' \item{cv}{cramer's v}
19 25
#' \item{ds}{product of dimensions of the table of \code{x} and \code{y}}
20 -
#' \item{phi_coefficient}{phi coefficient}
21 -
#' \item{pval_chisquare}{p-value of chi square}
22 -
#' \item{pval_chisquare_adjusted}{p-value of continuity adjusted chi square}
23 -
#' \item{pval_chisquare_lr}{p-value of likelihood ratio chi square}
24 -
#' \item{pval_chisquare_mantel_haenszel}{p-value of mantel haenszel chi square}
25 -
#'
26 +
#' \item{df}{degrees of freedom}
26 27
#' @section Deprecated Function:
27 28
#' \code{chisq_test()} has been deprecated. Instead use
28 29
#' \code{infer_chisq_assoc_test()}.
@@ -40,12 +41,16 @@
Loading
40 41
41 42
#' @export
42 43
infer_chisq_assoc_test.default <- function(data, x, y) {
44 +
  x1 <- enquo(x)
45 +
  y1 <- enquo(y)
43 46
44 -
  x1 <- deparse(substitute(x))
45 -
  y1 <- deparse(substitute(y))
47 +
  xone <-
48 +
    data %>%
49 +
    pull(!! x1)
46 50
47 -
  xone <- data[[x1]]
48 -
  yone <- data[[y1]]
51 +
  yone <-
52 +
    data %>%
53 +
    pull(!! y1)
49 54
50 55
  if (!is.factor(xone)) {
51 56
    stop("x must be a categorical variable")
@@ -56,7 +61,7 @@
Loading
56 61
  }
57 62
58 63
  # dimensions
59 -
  k  <- table(xone, yone)
64 +
  k <- table(xone, yone)
60 65
  dk <- dim(k)
61 66
  ds <- prod(dk)
62 67
  nr <- dk[1]
@@ -67,47 +72,30 @@
Loading
67 72
    twoway <- matrix(table(xone, yone), nrow = 2)
68 73
    df <- df_chi(twoway)
69 74
    ef <- efmat(twoway)
70 -
    k  <- pear_chsq(twoway, df, ef)
71 -
    m  <- lr_chsq(twoway, df, ef)
72 -
    n  <- yates_chsq(twoway)
73 -
    p  <- mh_chsq(twoway, n$total, n$prod_totals)
75 +
    k <- pear_chsq(twoway, df, ef)
76 +
    m <- lr_chsq(twoway, df, ef)
77 +
    n <- yates_chsq(twoway)
78 +
    p <- mh_chsq(twoway, n$total, n$prod_totals)
74 79
  } else {
75 80
    twoway <- matrix(table(xone, yone), nrow = dk[1])
76 81
    ef <- efm(twoway, dk)
77 82
    df <- df_chi(twoway)
78 -
    k  <- pear_chi(twoway, df, ef)
79 -
    m  <- lr_chsq2(twoway, df, ef, ds)
83 +
    k <- pear_chi(twoway, df, ef)
84 +
    m <- lr_chsq2(twoway, df, ef, ds)
80 85
  }
81 86
82 87
  j <- chigf(xone, yone, k$chi)
83 88
84 89
  result <- if (ds == 4) {
85 90
    list(
86 -
      chisquare                      = k$chi,
87 -
      chisquare_adjusted             = n$chi_y,
88 -
      chisquare_lr                   = m$chilr,
89 -
      chisquare_mantel_haenszel      = p$chimh,
90 -
      contingency_coefficient        = j$cc,
91 -
      cramers_v                      = j$cv,
92 -
      df                             = df,
93 -
      ds                             = ds,
94 -
      phi_coefficient                = j$phi,
95 -
      pval_chisquare                 = k$sig,
96 -
      pval_chisquare_adjusted        = n$sig_y,
97 -
      pval_chisquare_lr              = m$sig_lr,
98 -
      pval_chisquare_mantel_haenszel = p$sig_mh
91 +
      chi = k$chi, chilr = m$chilr, chimh = p$chimh, chiy = n$chi_y,
92 +
      sig = k$sig, siglr = m$sig_lr, sigy = n$sig_y, sigmh = p$sig_mh,
93 +
      phi = j$phi, cc = j$cc, cv = j$cv, ds = ds, df = df
99 94
    )
100 95
  } else {
101 96
    list(
102 -
      chisquare               = k$chi,
103 -
      chisquare_lr            = m$chilr,
104 -
      contingency_coefficient = j$cc,
105 -
      cramers_v               = j$cv,
106 -
      df                      = df,
107 -
      ds                      = ds,
108 -
      phi_coefficient         = j$phi,
109 -
      pval_chisquare          = k$sig,
110 -
      pval_chisquare_lr       = m$sig_lr
97 +
      df = df, chi = k$chi, chilr = m$chilr, sig = k$sig, siglr = m$sig_lr,
98 +
      phi = j$phi, cc = j$cc, cv = j$cv, ds = ds
111 99
    )
112 100
  }
113 101
@@ -116,87 +104,14 @@
Loading
116 104
}
117 105
118 106
#' @export
119 -
print.infer_chisq_assoc_test <- function(x, ...) {
120 -
  print_chisq_test(x)
121 -
}
122 -
123 -
# chi square association
124 -
df_chi <- function(twoway) {
125 -
  (nrow(twoway) - 1) * (ncol(twoway) - 1)
126 -
}
127 -
128 -
efmat <- function(twoway) {
129 -
  mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = 2)
130 -
  mat2 <- matrix(colSums(twoway), nrow = 1)
131 -
132 -
  mat1 %*% mat2
133 -
}
134 -
135 -
pear_chsq <- function(twoway, df, ef) {
136 -
  chi <- round(sum(((twoway - ef) ^ 2) / ef), 4)
137 -
  sig <- round(stats::pchisq(chi, df, lower.tail = F), 4)
138 -
139 -
  list(chi = chi, sig = sig)
140 -
}
141 -
142 -
lr_chsq <- function(twoway, df, ef) {
143 -
  chilr  <- round(2 * sum(matrix(log(twoway / ef), nrow = 1) %*% matrix(twoway, nrow = 4)), 4)
144 -
  sig_lr <- round(stats::pchisq(chilr, df, lower.tail = F), 4)
145 -
146 -
  list(chilr = chilr, sig_lr = sig_lr)
147 -
}
148 -
149 -
lr_chsq2 <- function(twoway, df, ef, ds) {
150 -
  chilr  <- round(2 * sum(matrix(twoway, ncol = ds) %*% matrix(log(twoway / ef), nrow = ds)), 4)
151 -
  sig_lr <- round(stats::pchisq(chilr, df, lower.tail = F), 4)
152 -
153 -
  list(chilr = chilr, sig_lr = sig_lr)
154 -
}
155 -
156 -
yates_chsq <- function(twoway) {
157 -
  way2        <- twoway[, c(2, 1)]
158 -
  total       <- sum(twoway)
159 -
  prods       <- prod(diag(twoway)) - prod(diag(way2))
160 -
  prod_totals <- prod(rowSums(twoway)) * prod(colSums(twoway))
161 -
  chi_y       <- round((total * (abs(prods) - (total / 2)) ^ 2) / prod_totals, 4)
162 -
  sig_y       <- round(stats::pchisq(chi_y, 1, lower.tail = F), 4)
163 -
164 -
  list(chi_y = chi_y, sig_y = sig_y, total = total, prod_totals = prod_totals)
165 -
}
166 -
167 -
mh_chsq <- function(twoway, total, prod_totals) {
168 -
  num    <- twoway[1] - ((rowSums(twoway)[1] * colSums(twoway)[1]) / total)
169 -
  den    <- prod_totals / ((total ^ 3) - (total ^ 2))
170 -
  chimh  <- round((num ^ 2) / den, 4)
171 -
  sig_mh <- round(stats::pchisq(chimh, 1, lower.tail = F), 4)
172 -
173 -
  list(chimh = chimh, sig_mh = sig_mh)
174 -
}
175 -
176 -
efm <- function(twoway, dk) {
177 -
  mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = dk[1])
178 -
  mat2 <- matrix(colSums(twoway), ncol = dk[2])
179 -
180 -
  mat1 %*% mat2
181 -
}
182 -
183 -
pear_chi <- function(twoway, df, ef) {
184 -
  chi <- round(sum(((twoway - ef) ^ 2) / ef), 4)
185 -
  sig <- round(stats::pchisq(chi, df, lower.tail = F), 4)
186 -
187 -
  list(chi = chi, sig = sig)
107 +
#' @rdname infer_chisq_assoc_test
108 +
#' @usage NULL
109 +
#'
110 +
chisq_test <- function(x, y) {
111 +
  .Deprecated("infer_chisq_assoc_test()")
188 112
}
189 113
190 -
chigf <- function(x, y, chi) {
191 -
  twoway <- matrix(table(x, y),
192 -
    nrow = nlevels(as.factor(x)),
193 -
    ncol = nlevels(as.factor(y))
194 -
  )
195 -
  total <- sum(twoway)
196 -
  phi   <- round(sqrt(chi / total), 4)
197 -
  cc    <- round(sqrt(chi / (chi + total)), 4)
198 -
  q     <- min(nrow(twoway), ncol(twoway))
199 -
  cv    <- round(sqrt(chi / (total * (q - 1))), 4)
200 -
201 -
  list(phi = phi, cc = cc, cv = cv)
202 -
}
114 +
#' @export
115 +
print.infer_chisq_assoc_test <- function(x, ...) {
116 +
  print_chisq_test(x)
117 +
}

@@ -1,3 +1,6 @@
Loading
1 +
#' @importFrom stats anova model.frame formula
2 +
#' @importFrom purrr map_int
3 +
#' @importFrom rlang quo_is_null
1 4
#' @title Levene's test for equality of variances
2 5
#' @description  \code{infer_levene_test} reports Levene's robust test statistic
3 6
#' for the equality of variances and the
@@ -52,32 +55,37 @@
Loading
52 55
#' @rdname infer_levene_test
53 56
infer_levene_test.default <- function(data, ..., group_var = NULL,
54 57
                                      trim_mean = 0.1) {
58 +
  groupvar <- enquo(group_var)
55 59
56 -
  groupvar  <- deparse(substitute(group_var))
57 -
  varyables <- vapply(substitute(...()), deparse, NA_character_)
58 -
  fdata     <- data[varyables]
60 +
  varyables <- quos(...)
59 61
60 -
  if (groupvar == "NULL") {
61 -
    z  <- as.list(fdata)
62 -
    ln <- unlist(lapply(z, length))
62 +
  fdata <-
63 +
    data %>%
64 +
    select(!!! varyables)
65 +
66 +
  if (quo_is_null(groupvar)) {
67 +
    z <- as.list(fdata)
68 +
    ln <- z %>% map_int(length)
63 69
    ly <- seq_len(length(z))
64 70
65 71
    if (length(z) < 2) {
66 72
      stop("Please specify at least two variables.", call. = FALSE)
67 73
    }
68 74
69 -
    out   <- gvar(ln, ly)
75 +
    out <- gvar(ln, ly)
70 76
    fdata <- unlist(z)
71 -
72 77
    groupvars <-
73 78
      out %>%
74 79
      unlist() %>%
75 80
      as.factor()
76 -
77 81
  } else {
82 +
    fdata <-
83 +
      fdata %>%
84 +
      pull(1)
78 85
79 -
    fdata <- fdata[[1]]
80 -
    groupvars <- data[[groupvar]]
86 +
    groupvars <-
87 +
      data %>%
88 +
      pull(!! groupvar)
81 89
82 90
    if (length(fdata) != length(groupvars)) {
83 91
      stop("Length of variable and group_var do not match.", call. = FALSE)
@@ -86,78 +94,28 @@
Loading
86 94
87 95
  k <- lev_comp(fdata, groupvars, trim_mean)
88 96
89 -
  out <-
90 -
    list(avg   = k$avg,
91 -
         avgs  = k$avgs,
92 -
         bf    = k$bf,
93 -
         bft   = k$bft,
94 -
         d_df  = k$d_df,
95 -
         lens  = k$lens,
96 -
         lev   = k$lev,
97 -
         levs  = k$levs,
98 -
         n     = k$n,
99 -
         n_df  = k$n_df,
100 -
         p_bf  = k$p_bf,
101 -
         p_bft = k$p_bft,
102 -
         p_lev = k$p_lev,
103 -
         sd    = k$sd,
104 -
         sds   = k$sds)
97 +
  out <- list(
98 +
    bf = k$bf, p_bf = k$p_bf, lev = k$lev, p_lev = k$p_lev,
99 +
    bft = k$bft, p_bft = k$p_bft, avgs = k$avgs, sds = k$sds,
100 +
    avg = k$avg, sd = k$sd, n = k$n, levs = k$levs, n_df = k$n_df,
101 +
    d_df = k$d_df, lens = k$lens
102 +
  )
105 103
106 104
  class(out) <- "infer_levene_test"
107 105
  return(out)
108 106
}
109 107
110 108
#' @export
109 +
#' @rdname infer_levene_test
110 +
#' @usage NULL
111 111
#'
112 -
print.infer_levene_test <- function(x, ...) {
113 -
  print_levene_test(x)
112 +
levene_test <- function(variable, ..., group_var = NULL,
113 +
                        trim.mean = 0.1) {
114 +
  .Deprecated("infer_levene_test()")
114 115
}
115 116
116 -
lev_metric <- function(cvar, gvar, loc, ...) {
117 -
118 -
  metric <- tapply(cvar, gvar, loc, ...)
119 -
  y      <- abs(cvar - metric[gvar])
120 -
  result <- stats::anova(stats::lm(y ~ gvar))
121 -
122 -
  list(
123 -
    fstat = result$`F value`[1],
124 -
    p     = result$`Pr(>F)`[1]
125 -
  )
126 -
127 -
}
128 -
129 -
lev_comp <- function(variable, group_var, trim.mean) {
130 -
131 -
  comp <- stats::complete.cases(variable, group_var)
132 -
  n    <- length(comp)
133 -
  k    <- nlevels(group_var)
134 -
135 -
  cvar <- variable[comp]
136 -
  gvar <- group_var[comp]
137 -
138 -
  lens <- tapply(cvar, gvar, length)
139 -
  avgs <- tapply(cvar, gvar, mean)
140 -
  sds  <- tapply(cvar, gvar, stats::sd)
141 -
142 -
  bf   <- lev_metric(cvar, gvar, mean)
143 -
  lev  <- lev_metric(cvar, gvar, stats::median)
144 -
  bft  <- lev_metric(cvar, gvar, mean, trim = trim.mean)
145 -
146 -
  list(
147 -
    avg   = round(mean(cvar), 2),
148 -
    avgs  = round(avgs, 2),
149 -
    bf    = round(bf$fstat, 4),
150 -
    bft   = round(bft$fstat, 4),
151 -
    d_df  = (n - k),
152 -
    lens  = lens,
153 -
    lev   = round(lev$fstat, 4),
154 -
    levs  = levels(gvar),
155 -
    n     = n,
156 -
    n_df  = (k - 1),
157 -
    p_bf  = round(bf$p, 4),
158 -
    p_bft = round(bft$p, 4),
159 -
    p_lev = round(lev$p, 4),
160 -
    sd    = round(stats::sd(cvar), 2),
161 -
    sds   = round(sds, 2))
162 -
163 -
}
117 +
#' @export
118 +
#'
119 +
print.infer_levene_test <- function(x, ...) {
120 +
  print_levene_test(x)
121 +
}

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom stats qchisq
1 2
#' @title One Sample Variance Comparison Test
2 3
#' @description  \code{infer_os_var_test} performs tests on the equality of standard
3 4
#' deviations (variances).It tests that the standard deviation of a sample is
@@ -55,9 +56,11 @@
Loading
55 56
#'
56 57
infer_os_var_test.default <- function(data, x, sd, confint = 0.95,
57 58
                                      alternative = c("both", "less", "greater", "all"), ...) {
59 +
  x1 <- enquo(x)
58 60
59 -
  x1   <- deparse(substitute(x))
60 -
  xone <- data[[x1]]
61 +
  xone <-
62 +
    data %>%
63 +
    pull(!! x1)
61 64
62 65
  if (!is.numeric(xone)) {
63 66
    stop("x must be numeric")
@@ -72,73 +75,37 @@
Loading
72 75
  }
73 76
74 77
  type <- match.arg(alternative)
75 -
  varname <- names(data[x1])
78 +
79 +
  varname <-
80 +
    data %>%
81 +
    select(!! x1) %>%
82 +
    names()
83 +
76 84
  k <- osvar_comp(xone, sd, confint)
77 85
78 -
  result <-
79 -
    list(chi      = round(k$chi, 4),
80 -
         c_lwr    = k$c_lwr,
81 -
         conf     = k$conf,
82 -
         c_upr    = k$c_upr,
83 -
         df       = k$df,
84 -
         n        = k$n,
85 -
         p_lower  = k$p_lower,
86 -
         p_two    = k$p_two,
87 -
         p_upper  = k$p_upper,
88 -
         sd       = k$sd,
89 -
         se       = round(k$se, 4),
90 -
         sigma    = round(k$sigma, 4),
91 -
         type     = type,
92 -
         var_name = varname,
93 -
         xbar     = round(k$xbar, 4))
86 +
  result <- list(
87 +
    n = k$n, sd = k$sd, sigma = round(k$sigma, 4), se = round(k$se, 4),
88 +
    chi = round(k$chi, 4),
89 +
    df = k$df, p_lower = k$p_lower, p_upper = k$p_upper, p_two = k$p_two,
90 +
    xbar = round(k$xbar, 4), c_lwr = k$c_lwr, c_upr = k$c_upr, var_name = varname,
91 +
    conf = k$conf, type = type
92 +
  )
94 93
95 94
  class(result) <- "infer_os_var_test"
96 95
  return(result)
97 96
}
98 97
99 98
#' @export
99 +
#' @rdname infer_os_var_test
100 +
#' @usage NULL
100 101
#'
101 -
print.infer_os_var_test <- function(x, ...) {
102 -
  print_os_vartest(x)
102 +
os_vartest <- function(x, sd, confint = 0.95,
103 +
                       alternative = c("both", "less", "greater", "all"), ...) {
104 +
  .Deprecated("infer_os_var_test()")
103 105
}
104 106
105 -
osvar_comp <- function(x, sd, confint) {
106 -
107 -
  n     <- length(x)
108 -
  df    <- n - 1
109 -
  xbar  <- mean(x)
110 -
  sigma <- stats::sd(x)
111 -
  se    <- sigma / sqrt(n)
112 -
  chi   <- df * ((sigma / sd) ^ 2)
113 -
114 -
  p_lower <- stats::pchisq(chi, df)
115 -
  p_upper <- stats::pchisq(chi, df, lower.tail = F)
116 -
117 -
  if (p_lower < 0.5) {
118 -
    p_two <- stats::pchisq(chi, df) * 2
119 -
  } else {
120 -
    p_two <- stats::pchisq(chi, df, lower.tail = F) * 2
121 -
  }
122 -
123 -
  conf  <- confint
124 -
  a     <- (1 - conf) / 2
125 -
  al    <- 1 - a
126 -
  tv    <- df * sigma
127 -
  c_lwr <- round(tv / stats::qchisq(al, df), 4)
128 -
  c_upr <- round(tv / stats::qchisq(a, df), 4)
129 -
130 -
  list(chi     = chi,
131 -
       c_lwr   = c_lwr,
132 -
       conf    = conf,
133 -
       c_upr   = c_upr,
134 -
       df      = df,
135 -
       n       = n,
136 -
       p_lower = p_lower,
137 -
       p_two   = p_two,
138 -
       p_upper = p_upper,
139 -
       sd      = sd,
140 -
       se      = se,
141 -
       sigma   = sigma,
142 -
       xbar    = xbar)
143 -
144 -
}
107 +
#' @export
108 +
#'
109 +
print.infer_os_var_test <- function(x, ...) {
110 +
  print_os_vartest(x)
111 +
}

@@ -2,26 +2,26 @@
Loading
2 2
3 3
  # width
4 4
  w1 <- nchar("Between Groups")
5 -
  w2 <- max(nchar("Squares"), nchar(data$ss_between), nchar(data$ss_within), nchar(data$ss_total))
5 +
  w2 <- max(nchar("Squares"), nchar(data$between), nchar(data$within), nchar(data$total))
6 6
  w3 <- max(nchar("DF"), nchar(data$df_btw), nchar(data$df_btw), nchar(data$df_within), nchar(data$df_total))
7 7
  w4 <- max(nchar("Mean Square"), nchar(data$ms_btw), nchar(data$ms_within))
8 -
  w5 <- max(nchar("F"), nchar(data$fstat))
9 -
  w6 <- max(nchar("Sig."), nchar(format(data$pval, nsmall = 4)))
8 +
  w5 <- max(nchar("F"), nchar(data$f))
9 +
  w6 <- max(nchar("Sig."), nchar(format(data$sig, nsmall = 4)))
10 10
  w <- sum(w1, w2, w3, w4, w5, w6, 21)
11 -
  w7 <- nchar(data$rmse)
11 +
  w7 <- nchar(data$sigma)
12 12
13 -
  dc <- as.vector(data$group_stats[, 1])
13 +
  dc <- as.vector(data$tab[, 1])
14 14
15 15
  w8 <- max(nchar("Category"), max(nchar(dc)))
16 -
  w9 <- max(nchar("N"), max(nchar(data$group_stats[[2]])))
17 -
  w10 <- max(nchar("Mean"), max(nchar(format(data$group_stats[[3]], nsmall = 3))))
18 -
  w11 <- max(nchar("Std. Dev."), max(nchar(format(data$group_stats[[4]], nsmall = 3))))
16 +
  w9 <- max(nchar("N"), max(nchar(data$tab[[2]])))
17 +
  w10 <- max(nchar("Mean"), max(nchar(format(data$tab[[3]], nsmall = 3))))
18 +
  w11 <- max(nchar("Std. Dev."), max(nchar(format(data$tab[[4]], nsmall = 3))))
19 19
  wr <- sum(w8, w9, w10, w11, 13)
20 20
21 21
22 -
  p <- format(data$pval, nsmall = 4)
23 -
  q <- nrow(data$group_stats)
24 -
  s <- length(data$group_stats)
22 +
  p <- format(data$p, nsmall = 4)
23 +
  q <- nrow(data$tab)
24 +
  s <- length(data$tab)
25 25
26 26
27 27
  cat(fg("ANOVA", w), "\n")
@@ -29,9 +29,9 @@
Loading
29 29
  cat(fg("", w1), fs(), fg("Sum of", w2), fs(), fg("", w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n")
30 30
  cat(fg("", w1), fs(), fg("Squares", w2), fs(), fg("DF", w3), fs(), fg("Mean Square", w4), fs(), fg("F", w5), fs(), fg("Sig.", w6), "\n")
31 31
  cat(rep("-", w), sep = "", "\n")
32 -
  cat(fl("Between Groups", w1), fs(), fg(data$ss_between, w2), fs(), fg(data$df_btw, w3), fs(), fg(data$ms_btw, w4), fs(), fg(data$fstat, w5), fs(), fg(data$pval, w6), "\n")
33 -
  cat(fl("Within Groups", w1), fs(), fg(data$ss_within, w2), fs(), fg(data$df_within, w3), fs(), fg(data$ms_within, w4), fs(), fg("", w5), fs(), fg("", w6), "\n")
34 -
  cat(fl("Total", w1), fs(), fg(data$ss_total, w2), fs(), fg(data$df_total, w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n")
32 +
  cat(fl("Between Groups", w1), fs(), fg(data$between, w2), fs(), fg(data$df_btw, w3), fs(), fg(data$ms_btw, w4), fs(), fg(data$f, w5), fs(), fg(p, w6), "\n")
33 +
  cat(fl("Within Groups", w1), fs(), fg(data$within, w2), fs(), fg(data$df_within, w3), fs(), fg(data$ms_within, w4), fs(), fg("", w5), fs(), fg("", w6), "\n")
34 +
  cat(fl("Total", w1), fs(), fg(data$total, w2), fs(), fg(data$df_total, w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n")
35 35
  cat(rep("-", w), sep = "", "\n\n")
36 36
37 37
  cat(fg("Report", wr), "\n")
@@ -40,14 +40,14 @@
Loading
40 40
  cat(rep("-", wr), sep = "", "\n")
41 41
  for (i in seq_len(q)) {
42 42
    cat(
43 -
      fc(data$group_stats[[i, 1]], w8), fs(), fg(data$group_stats[[i, 2]], w9), fs(), fk(format(round(data$group_stats[[i, 3]], 3), nsmall = 3), w10),
44 -
      fs(), fk(format(round(data$group_stats[[i, 4]], 3), nsmall = 3), w11), "\n"
43 +
      fc(data$tab[[i, 1]], w8), fs(), fg(data$tab[[i, 2]], w9), fs(), fk(format(round(data$tab[[i, 3]], 3), nsmall = 3), w10),
44 +
      fs(), fk(format(round(data$tab[[i, 4]], 3), nsmall = 3), w11), "\n"
45 45
    )
46 46
  }
47 47
  cat(rep("-", wr), sep = "", "\n\n")
48 48
49 49
  cat(fl("Number of obs", 13), "=", fl(data$obs, w7), fs(), fl("R-squared", 13), "=", data$r2, "\n")
50 -
  cat(fl("Root MSE", 13), "=", data$rmse, fs(), fl("Adj R-squared", 13), "=", data$adjusted_r2, "\n\n")
50 +
  cat(fl("Root MSE", 13), "=", data$sigma, fs(), fl("Adj R-squared", 13), "=", data$ar2, "\n\n")
51 51
}
52 52
53 53
@@ -108,13 +108,19 @@
Loading
108 108
    cat(" ", rep("-", w11), sep = "", "\n")
109 109
    cat(
110 110
      " ", format("Lower", width = w6, justify = "left"), fs(), format(paste0("Pr(k <= ", data$k, ")"), width = w8, justify = "centre"), fs(),
111 -
      format(as.character(data$pval_lower), width = w9, justify = "centre"), "\n"
111 +
      format(as.character(data$lower), width = w9, justify = "centre"), "\n"
112 112
    )
113 113
    cat(
114 114
      " ", format("Upper", width = w6, justify = "left"), fs(), format(paste0("Pr(k >= ", data$k, ")"), width = w8, justify = "centre"), fs(),
115 -
      format(as.character(data$pval_upper), width = w9, justify = "centre"), "\n"
116 -
    )
117 -
    
115 +
      format(as.character(data$upper), width = w9, justify = "centre"), "\n"
116 +
    )
117 +
    #       if (data$ik < 0) {
118 +
    #         cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(),
119 +
    #         format(data$two_tail, width = w9, justify = 'centre'),'\n')
120 +
    #       } else {
121 +
    #         cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ' or k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(),
122 +
    #         format(data$two_tail, width = w9, justify = 'centre'),'\n')
123 +
    #       }
118 124
    cat(" ", rep("-", w11), sep = "", "\n")
119 125
  } else {
120 126
    cat("\n\n", format("Test Summary", width = w10, justify = "centre"), "\n")
@@ -126,13 +132,19 @@
Loading
126 132
    cat(" ", rep("-", w10), sep = "", "\n")
127 133
    cat(
128 134
      " ", format("Lower", width = w6, justify = "left"), fs(), format(paste0("Pr(k <= ", data$k, ")"), width = w7, justify = "centre"), fs(),
129 -
      format(as.character(data$pval_lower), width = w9, justify = "centre"), "\n"
135 +
      format(as.character(data$lower), width = w9, justify = "centre"), "\n"
130 136
    )
131 137
    cat(
132 138
      " ", format("Upper", width = w6, justify = "left"), fs(), format(paste0("Pr(k >= ", data$k, ")"), width = w7, justify = "centre"), fs(),
133 -
      format(as.character(data$pval_upper), width = w9, justify = "centre"), "\n"
134 -
    )
135 -
   
139 +
      format(as.character(data$upper), width = w9, justify = "centre"), "\n"
140 +
    )
141 +
    #         if (data$ik < 0) {
142 +
    #         cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'left'), fs(),
143 +
    #         format(data$two_tail, width = w9, justify = 'centre'),'\n')
144 +
    #         } else {
145 +
    #           cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$ik, ' or k >= ', data$k, ')'), width = w7, justify = 'left'), fs(),
146 +
    #           format(data$two_tail, width = w9, justify = 'centre'),'\n')
147 +
    #         }
136 148
    cat(" ", rep("-", w10), sep = "", "\n")
137 149
  }
138 150
}
@@ -872,8 +884,8 @@
Loading
872 884
  width1 <- nchar("Likelihood Ratio Chi-Square")
873 885
  width2 <- max(nchar(x$df))
874 886
  width3 <- max(
875 -
    nchar(x$chisquare), nchar(x$chisquare_lr), nchar(x$chisquare_mantel_haenszel), nchar(x$chisquare_adjusted), nchar(x$phi_coefficient),
876 -
    nchar(x$contingency_coefficient), nchar(x$cramers_v)
887 +
    nchar(x$chi), nchar(x$chilr), nchar(x$chimh), nchar(x$chiy), nchar(x$phi),
888 +
    nchar(x$cc), nchar(x$cv)
877 889
  )
878 890
  width4 <- 6
879 891
  widthn <- sum(width1, width2, width3, width4, 12)
@@ -887,31 +899,31 @@
Loading
887 899
    cat(rep("-", widthn), sep = "", "\n")
888 900
    cat(
889 901
      format("Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(),
890 -
      format(x$chisquare, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
902 +
      format(x$chi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sig, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
891 903
    )
892 904
    cat(
893 905
      format("Likelihood Ratio Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(),
894 -
      format(x$chisquare_lr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_lr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
906 +
      format(x$chilr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$siglr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
895 907
    )
896 908
    cat(
897 909
      format("Continuity Adj. Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "right"), formats(),
898 -
      format(x$chisquare_adjusted, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_adjusted, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
910 +
      format(x$chiy, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sigy, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
899 911
    )
900 912
    cat(
901 913
      format("Mantel-Haenszel Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "right"), formats(),
902 -
      format(x$chisquare_mantel_haenszel, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_mantel_haenszel, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
914 +
      format(x$chimh, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sigmh, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
903 915
    )
904 916
    cat(
905 917
      format("Phi Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
906 -
      format(x$phi_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
918 +
      format(x$phi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
907 919
    )
908 920
    cat(
909 921
      format("Contingency Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
910 -
      format(x$contingency_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
922 +
      format(x$cc, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
911 923
    )
912 924
    cat(
913 925
      format("Cramer's V", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
914 -
      format(x$cramers_v, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
926 +
      format(x$cv, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
915 927
    )
916 928
    cat(rep("-", widthn), sep = "", "\n")
917 929
  } else {
@@ -923,23 +935,23 @@
Loading
923 935
    cat(rep("-", widthn), sep = "", "\n")
924 936
    cat(
925 937
      format("Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(),
926 -
      format(x$chisquare, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
938 +
      format(x$chi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sig, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
927 939
    )
928 940
    cat(
929 941
      format("Likelihood Ratio Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(),
930 -
      format(x$chisquare_lr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_lr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
942 +
      format(x$chilr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$siglr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = ""
931 943
    )
932 944
    cat(
933 945
      format("Phi Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
934 -
      format(x$phi_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
946 +
      format(x$phi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
935 947
    )
936 948
    cat(
937 949
      format("Contingency Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
938 -
      format(x$contingency_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
950 +
      format(x$cc, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
939 951
    )
940 952
    cat(
941 953
      format("Cramer's V", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(),
942 -
      format(x$cramers_v, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
954 +
      format(x$cv, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = ""
943 955
    )
944 956
    cat(rep("-", widthn), sep = "", "\n")
945 957
  }
@@ -948,37 +960,68 @@
Loading
948 960
949 961
print_chisq_gof <- function(data) {
950 962
  cwidth <- max(nchar("Chi-Square"), nchar("DF"), nchar("Pr > Chi Sq"), nchar("Sample Size"))
951 -
  nwidth <- max(nchar(data$chisquare), nchar(data$degrees_of_freedom), nchar(data$pvalue), 
952 -
    nchar(data$sample_size))
963 +
  nwidth <- max(nchar(data$chisquare), nchar(data$df), nchar(data$pvalue), nchar(data$ssize))
953 964
  w1 <- sum(cwidth, nwidth, 6)
954 -
  lw <- max(nchar("Variable"), nchar(data$categories))
955 -
  ow <- max(nchar("Observed"), nchar(data$observed_frequency))
956 -
  ew <- max(nchar("Expected"), nchar(data$expected_frequency))
965 +
  lw <- max(nchar("Variable"), nchar(data$names))
966 +
  ow <- max(nchar("Observed"), nchar(data$obs))
967 +
  ew <- max(nchar("Expected"), nchar(data$exp))
957 968
  dw <- max(nchar("% Deviation"), nchar(data$deviation))
958 -
  rw <- max(nchar("Std. Residuals"), nchar(data$std_residuals))
969 +
  rw <- max(nchar("Std. Residuals"), nchar(data$std))
959 970
  w <- sum(lw, ow, ew, dw, rw, 16)
960 971
961 972
962 973
  cat(format("Test Statistics", width = w1, justify = "centre"), "\n")
963 974
  cat(rep("-", w1), sep = "", "\n")
964 975
  cat(format("Chi-Square", width = cwidth, justify = "left"), formats(), format(data$chisquare, width = nwidth, justify = "right"), "\n")
965 -
  cat(format("DF", width = cwidth, justify = "left"), formats(), format(data$degrees_of_freedom, width = nwidth, justify = "right"), "\n")
976 +
  cat(format("DF", width = cwidth, justify = "left"), formats(), format(data$df, width = nwidth, justify = "right"), "\n")
966 977
  cat(format("Pr > Chi Sq", width = cwidth, justify = "left"), formats(), format(data$pvalue, width = nwidth, justify = "right"), "\n")
967 -
  cat(format("Sample Size", width = cwidth, justify = "left"), formats(), format(data$sample_size, width = nwidth, justify = "right"), "\n\n")
978 +
  cat(format("Sample Size", width = cwidth, justify = "left"), formats(), format(data$ssize, width = nwidth, justify = "right"), "\n\n")
968 979
  cat(format(paste("Variable:", data$varname), width = w, justify = "centre"), "\n")
969 980
  cat(rep("-", w), sep = "", "\n")
970 981
  cat(fg("Category", lw), fs(), fg("Observed", ow), fs(), fg("Expected", ew), fs(), fg("% Deviation", dw), fs(), fg("Std. Residuals", rw), "\n")
971 982
  cat(rep("-", w), sep = "", "\n")
972 -
  for (i in seq_len(data$n_levels)) {
983 +
  for (i in seq_len(data$level)) {
973 984
    cat(
974 -
      fg(data$categories[i], lw), fs(), fg(data$observed_frequency[i], ow), fs(), 
975 -
      fg(data$expected_frequency[i], ew), fs(), fg(data$deviation[i], dw), fs(), 
976 -
      fg(data$std_residuals[i], rw), "\n"
985 +
      fg(data$names[i], lw), fs(), fg(data$obs[i], ow), fs(), fg(data$exp[i], ew), fs(),
986 +
      fg(data$deviation[i], dw), fs(), fg(data$std[i], rw), "\n"
977 987
    )
978 988
  }
979 989
  cat(rep("-", w), sep = "", "\n")
980 990
}
981 991
992 +
993 +
# print_os_chisqgof <- function(data) {
994 +
995 +
#   cwidth <- max(nchar('Chi-Square'), nchar('DF'), nchar('Pr > Chi Sq'), nchar('Sample Size'))
996 +
#   nwidth <- max(nchar(data$chisquare), nchar(data$df), nchar(data$pvalue), nchar(data$ssize))
997 +
#   w1 <- sum(cwidth, nwidth, 6)
998 +
#   lw <- max(nchar('Variable'), nchar(data$names))
999 +
#   ow <- max(nchar('Observed'), nchar(data$obs))
1000 +
#   ew <- max(nchar('Expected'), nchar(data$exp))
1001 +
#   dw <- max(nchar('% Deviation'), nchar(data$deviation))
1002 +
#   rw <- max(nchar('Std. Residuals'), nchar(data$std))
1003 +
#   w <- sum(lw, ow, ew, dw, rw, 16)
1004 +
1005 +
1006 +
#   cat(format("Test Statistics", width = w1, justify = "centre"), "\n")
1007 +
#   cat(rep("-", w1), sep = "", '\n')
1008 +
#   cat(format('Chi-Square', width = cwidth, justify = 'left'), formats(), format(data$chisquare, width = nwidth, justify = 'right'), '\n')
1009 +
#   cat(format('DF', width = cwidth, justify = 'left'), formats(), format(data$df, width = nwidth, justify = 'right'), '\n')
1010 +
#   cat(format('Pr > Chi Sq', width = cwidth, justify = 'left'), formats(), format(data$pvalue, width = nwidth, justify = 'right'), '\n')
1011 +
#   cat(format('Sample Size', width = cwidth, justify = 'left'), formats(), format(data$ssize, width = nwidth, justify = 'right'), '\n\n')
1012 +
#   cat(format(paste('Variable:', data$varname), width = w, justify = 'centre'), '\n')
1013 +
#   cat(rep("-", w), sep = "", '\n')
1014 +
#   cat(fg('Category', lw), fs(), fg('Observed', ow), fs(), fg('Expected', ew), fs(), fg('% Deviation', dw), fs(), fg('Std. Residuals', rw), '\n')
1015 +
#   cat(rep("-", w), sep = "", '\n')
1016 +
#   for (i in seq_len(data$level)) {
1017 +
#     cat(fg(data$names[i], lw), fs(), fg(data$obs[i], ow), fs(), fg(data$exp[i], ew), fs(),
1018 +
#       fg(data$deviation[i], dw), fs(), fg(data$std[i], rw), '\n')
1019 +
#   }
1020 +
#   cat(rep("-", w), sep = "", '\n')
1021 +
1022 +
# }
1023 +
1024 +
982 1025
print_runs_test <- function(x) {
983 1026
  cat(
984 1027
    "Runs Test\n",
@@ -1301,4 +1344,4 @@
Loading
1301 1344
    )
1302 1345
    cat("\n", rep("-", all_width), sep = "")
1303 1346
  }
1304 -
}
1347 +
}

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom stats pnorm
1 2
#' @title One Sample Test of Proportion
2 3
#' @description  \code{infer_os_prop_test} compares proportion in one group to a
3 4
#' specified population proportion.
@@ -45,26 +46,29 @@
Loading
45 46
infer_os_prop_test.default <- function(data, variable = NULL, prob = 0.5, phat = 0.5,
46 47
                                       alternative = c("both", "less", "greater", "all")) {
47 48
  if (is.numeric(data)) {
48 -
49 49
    method <- match.arg(alternative)
50 50
    k <- prop_comp(
51 51
      data, prob = prob, phat = phat,
52 52
      alternative = method
53 53
    )
54 -
55 54
  } else {
55 +
    varyables <- enquo(variable)
56 +
57 +
    fdata <-
58 +
      data %>%
59 +
      pull(!! varyables)
56 60
57 -
    varyables <- deparse(substitute(variable))
58 -
    fdata     <- data[[varyables]]
59 -
    n1        <- length(fdata)
61 +
    n1 <- length(fdata)
60 62
61 63
    n2 <-
62 64
      fdata %>%
63 65
      table() %>%
64 66
      `[[`(2)
65 67
66 -
    phat   <- round(n2 / n1, 4)
67 -
    prob   <- prob
68 +
    phat <- round(n2 / n1, 4)
69 +
70 +
    prob <- prob
71 +
68 72
    method <- match.arg(alternative)
69 73
70 74
    k <- prop_comp(
@@ -73,68 +77,29 @@
Loading
73 77
    )
74 78
  }
75 79
76 -
  result <-
77 -
    list(alt       = k$alt,
78 -
         deviation = k$deviation,
79 -
         exp       = k$exp,
80 -
         n         = k$n,
81 -
         obs       = k$obs,
82 -
         p         = k$p,
83 -
         phat      = k$phat,
84 -
         sig       = k$sig,
85 -
         std       = k$std,
86 -
         z         = k$z)
80 +
  result <- list(
81 +
    n = k$n, phat = k$phat, p = k$p, z = k$z, sig = k$sig,
82 +
    alt = k$alt, obs = k$obs, exp = k$exp,
83 +
    deviation = k$deviation,
84 +
    std = k$std
85 +
  )
87 86
88 87
  class(result) <- "infer_os_prop_test"
89 88
  return(result)
90 89
}
91 90
91 +
92 92
#' @export
93 +
#' @rdname infer_os_prop_test
94 +
#' @usage NULL
93 95
#'
94 -
print.infer_os_prop_test <- function(x, ...) {
95 -
  print_prop_test(x)
96 +
prop_test <- function(n, prob = 0.5,
97 +
                      alternative = c("both", "less", "greater", "all"), phat, ...) {
98 +
  .Deprecated("infer_os_prop_test()")
96 99
}
97 100
98 -
prop_comp <- function(n, prob, alternative, phat) {
99 -
100 -
  n    <- n
101 -
  phat <- phat
102 -
  p    <- prob
103 -
  q    <- 1 - p
104 -
  obs  <- c(n * (1 - phat), n * phat)
105 -
  exp  <- n * c(q, p)
106 -
  dif  <- obs - exp
107 -
  dev  <- round((dif / exp) * 100, 2)
108 -
  std  <- round(dif / sqrt(exp), 2)
109 -
  num  <- phat - prob
110 -
  den  <- sqrt((p * q) / n)
111 -
  z    <- round(num / den, 4)
112 -
  lt   <- round(stats::pnorm(z), 4)
113 -
  ut   <- round(1 - stats::pnorm(z), 4)
114 -
  tt   <- round((1 - stats::pnorm(abs(z))) * 2, 4)
115 -
  alt  <- alternative
116 -
117 -
  if (alt == "all") {
118 -
    sig <- c("two-both" = tt, "less" = lt, "greater" = ut)
119 -
  } else if (alt == "greater") {
120 -
    sig <- ut
121 -
  } else if (alt == "less") {
122 -
    sig <- lt
123 -
  } else {
124 -
    sig <- tt
125 -
  }
126 -
127 -
  out <-
128 -
    list(alt       = alt,
129 -
         deviation = format(dev, nsmall = 2),
130 -
         exp       = exp,
131 -
         n         = n,
132 -
         obs       = obs,
133 -
         p         = prob,
134 -
         phat      = phat,
135 -
         sig       = sig,
136 -
         std       = format(std, nsmall = 2),
137 -
         z         = z)
138 -
139 -
  return(out)
140 -
}
101 +
#' @export
102 +
#'
103 +
print.infer_os_prop_test <- function(x, ...) {
104 +
  print_prop_test(x)
105 +
}

@@ -1,29 +1,32 @@
Loading
1 +
#' @importFrom stats as.formula lm pf
2 +
#' @importFrom rlang enquo !!
1 3
#' @title One Way ANOVA
2 4
#' @description One way analysis of variance
3 5
#' @param data a \code{data.frame} or a \code{tibble}
4 6
#' @param x numeric; column in \code{data}
5 7
#' @param y factor; column in \code{data}
6 8
#' @param ... additional arguments passed to or from other methods
7 -
#' @return \code{infer_oneway_anova} returns an object of class \code{"infer_oneway_anova"}.
8 -
#' An object of class \code{"infer_oneway_anova"} is a list containing the
9 +
#' @return \code{owanova} returns an object of class \code{"owanova"}.
10 +
#' An object of class \code{"owanova"} is a list containing the
9 11
#' following components:
10 12
#'
11 -
#' \item{adjusted_r2}{adjusted r squared value}
13 +
#' \item{between}{between group sum of squares}
14 +
#' \item{within}{within group sum of squares}
15 +
#' \item{total}{total sum of squares}
12 16
#' \item{df_btw}{between groups degress of freedom}
13 17
#' \item{df_within}{within groups degress of freedom}
14 18
#' \item{df_total}{total degress of freedom}
15 -
#' \item{fstat}{f value}
16 -
#' \item{group_stats}{group statistics}
17 19
#' \item{ms_btw}{between groups mean square}
18 20
#' \item{ms_within}{within groups mean square}
19 -
#' \item{obs}{number of observations}
20 -
#' \item{pval}{p value}
21 +
#' \item{f}{f value}
22 +
#' \item{p}{p value}
21 23
#' \item{r2}{r squared value}
22 -
#' \item{rmse}{root mean squared error}
23 -
#' \item{ss_between}{between group sum of squares}
24 -
#' \item{ss_within}{within group sum of squares}
25 -
#' \item{ss_total}{total sum of squares}
26 -
#'
24 +
#' \item{ar2}{adjusted r squared value}
25 +
#' \item{sigma}{root mean squared error}
26 +
#' \item{obs}{number of observations}
27 +
#' \item{tab}{group statistics}
28 +
#' @section Deprecated Functions:
29 +
#' \code{owanova()} has been deprecated. Instead use \code{infer_oneway_anova()}.
27 30
#' @references Kutner, M. H., Nachtsheim, C., Neter, J., & Li, W. (2005).
28 31
#' Applied linear statistical models. Boston: McGraw-Hill Irwin.
29 32
#'
@@ -37,105 +40,42 @@
Loading
37 40
38 41
#' @export
39 42
infer_oneway_anova.default <- function(data, x, y, ...) {
40 -
41 -
  x1 <- deparse(substitute(x))
42 -
  y1 <- deparse(substitute(y))
43 -
44 -
  fdata        <- data[c(x1, y1)]
45 -
  sample_mean  <- anova_avg(fdata, x1)
46 -
  sample_stats <- anova_split(fdata, x1, y1, sample_mean)
47 -
  k            <- anova_calc(fdata, sample_stats, x1, y1)
48 -
49 -
50 -
  result <-
51 -
    list(
52 -
      adjusted_r2 = round(k$reg$adj.r.squared, 4),
53 -
      df_btw      = k$df_sstr,
54 -
      df_total    = k$df_sst,
55 -
      df_within   = k$df_sse,
56 -
      fstat       = k$f,
57 -
      group_stats = sample_stats[, c(1, 2, 3, 5)],
58 -
      ms_btw      = k$mstr,
59 -
      ms_within   = k$mse,
60 -
      obs         = k$obs,
61 -
      pval        = k$sig,
62 -
      r2          = round(k$reg$r.squared, 4),
63 -
      rmse        = round(k$reg$sigma, 4),
64 -
      ss_between  = k$sstr,
65 -
      ss_total    = k$total,
66 -
      ss_within   = k$ssee)
43 +
  x1 <- enquo(x)
44 +
  y1 <- enquo(y)
45 +
46 +
  fdata <-
47 +
    data %>%
48 +
    select(!! x1, !! y1)
49 +
50 +
  sample_mean <- anova_avg(fdata, !! x1)
51 +
  sample_stats <- anova_split(fdata, !! x1, !! y1, sample_mean)
52 +
  k <- anova_calc(fdata, sample_stats, !! x1, !! y1)
53 +
54 +
55 +
  result <- list(
56 +
    between = k$sstr, within = k$ssee, total = k$total,
57 +
    df_btw = k$df_sstr, df_within = k$df_sse,
58 +
    df_total = k$df_sst, ms_btw = k$mstr, ms_within = k$mse,
59 +
    f = k$f, p = k$sig, r2 = round(k$reg$r.squared, 4),
60 +
    ar2 = round(k$reg$adj.r.squared, 4),
61 +
    sigma = round(k$reg$sigma, 4), obs = k$obs,
62 +
    tab = sample_stats[, c(1, 2, 3, 5)]
63 +
  )
67 64
68 65
  class(result) <- "infer_oneway_anova"
69 66
  return(result)
70 67
}
71 68
72 69
#' @export
73 -
print.infer_oneway_anova <- function(x, ...) {
74 -
  print_owanova(x)
75 -
}
76 -
77 -
#' @import magrittr
78 -
#' @importFrom data.table data.table := setDF
79 -
anova_split <- function(data, x, y, sample_mean) {
80 -
81 -
  dat <- data[c(y, x)]
82 -
  dm  <- data.table(dat)
83 -
84 -
  by_factor <- dm[, .(length = length(get(x)),
85 -
                     mean = mean(get(x)),
86 -
                     var = stats::var(get(x)),
87 -
                     sd = stats::sd(get(x))),
88 -
                  by = y]
89 -
90 -
  by_factor[, ':='(sst = length * ((mean - sample_mean) ^ 2),
91 -
                   sse = (length - 1) * var)]
92 -
93 -
  setDF(by_factor)
94 -
  by_factor <- by_factor[order(by_factor[, 1]),]
95 -
96 -
  return(by_factor)
97 -
}
98 -
99 -
anova_avg <- function(data, y) {
100 -
101 -
  mean(data[[y]])
102 -
70 +
#' @rdname infer_oneway_anova
71 +
#' @usage NULL
72 +
#'
73 +
owanova <- function(data, x, y, ...) {
74 +
  .Deprecated("infer_oneway_anova()")
75 +
  infer_oneway_anova(data, x, y, ...)
103 76
}
104 77
105 -
anova_calc <- function(data, sample_stats, x, y) {
106 -
107 -
  var_names <- names(data[c(x, y)])
108 -
109 -
  sstr <-
110 -
    sample_stats %>%
111 -
    magrittr::use_series(sst) %>%
112 -
    sum() %>%
113 -
    round(3)
114 -
115 -
  ssee <-
116 -
    sample_stats %>%
117 -
    magrittr::use_series(sse) %>%
118 -
    sum() %>%
119 -
    round(3)
120 -
121 -
  total   <- round(sstr + ssee, 3)
122 -
  df_sstr <- nrow(sample_stats) - 1
123 -
  df_sse  <- nrow(data) - nrow(sample_stats)
124 -
  df_sst  <- nrow(data) - 1
125 -
  mstr    <- round(sstr / df_sstr, 3)
126 -
  mse     <- round(ssee / df_sse, 3)
127 -
  f       <- round(mstr / mse, 3)
128 -
  sig     <- round(1 - stats::pf(f, df_sstr, df_sse), 3)
129 -
  obs     <- nrow(data)
130 -
  regs    <- paste(var_names[1], "~ as.factor(", var_names[2], ")")
131 -
  model   <- stats::lm(stats::as.formula(regs), data = data)
132 -
  reg     <- summary(model)
133 -
134 -
  out <- list(
135 -
    sstr = sstr, ssee = ssee, total = total, df_sstr = df_sstr,
136 -
    df_sse = df_sse, df_sst = df_sst, mstr = mstr, mse = mse, f = f,
137 -
    sig = sig, obs = obs, model = model, reg = reg
138 -
  )
139 -
140 -
  return(out)
141 -
}
78 +
#' @export
79 +
print.infer_oneway_anova <- function(x, ...) {
80 +
  print_owanova(x)
81 +
}

@@ -1,3 +1,790 @@
Loading
1 +
#' @importFrom dplyr group_by summarise_all funs mutate
2 +
#' @importFrom magrittr %>% use_series
3 +
#' @importFrom stats var sd
4 +
#' @importFrom tibble tibble as_data_frame
5 +
anova_split <- function(data, x, y, sample_mean) {
6 +
  x1 <- enquo(x)
7 +
  y1 <- enquo(y)
8 +
9 +
  by_factor <-
10 +
    data %>%
11 +
    group_by(!! y1) %>%
12 +
    select(!! y1, !! x1) %>%
13 +
    summarise_all(funs(length, mean, var, sd)) %>%
14 +
    as_data_frame() %>%
15 +
    mutate(
16 +
      sst = length * ((mean - sample_mean) ^ 2),
17 +
      sse = (length - 1) * var
18 +
    )
19 +
20 +
  return(by_factor)
21 +
}
22 +
23 +
anova_avg <- function(data, y) {
24 +
  y1 <- enquo(y)
25 +
26 +
  avg <-
27 +
    data %>%
28 +
    select(!! y1) %>%
29 +
    summarise_all(funs(mean))
30 +
31 +
  return(unlist(avg, use.names = FALSE))
32 +
}
33 +
34 +
anova_calc <- function(data, sample_stats, x, y) {
35 +
  x1 <- enquo(x)
36 +
  y1 <- enquo(y)
37 +
38 +
  var_names <-
39 +
    data %>%
40 +
    select(!! x1, !! y1) %>%
41 +
    names()
42 +
43 +
  sstr <-
44 +
    sample_stats %>%
45 +
    use_series(sst) %>%
46 +
    sum() %>%
47 +
    round(3)
48 +
49 +
  ssee <-
50 +
    sample_stats %>%
51 +
    use_series(sse) %>%
52 +
    sum() %>%
53 +
    round(3)
54 +
55 +
  total <- round(sstr + ssee, 3)
56 +
  df_sstr <- nrow(sample_stats) - 1
57 +
  df_sse <- nrow(data) - nrow(sample_stats)
58 +
  df_sst <- nrow(data) - 1
59 +
  mstr <- round(sstr / df_sstr, 3)
60 +
  mse <- round(ssee / df_sse, 3)
61 +
  f <- round(mstr / mse, 3)
62 +
  sig <- round(1 - pf(f, df_sstr, df_sse), 3)
63 +
  obs <- nrow(data)
64 +
  regs <- paste(var_names[1], "~ as.factor(", var_names[2], ")")
65 +
  model <- lm(as.formula(regs), data = data)
66 +
  reg <- summary(model)
67 +
  out <- list(
68 +
    sstr = sstr, ssee = ssee, total = total, df_sstr = df_sstr,
69 +
    df_sse = df_sse, df_sst = df_sst, mstr = mstr, mse = mse, f = f,
70 +
    sig = sig, obs = obs, model = model, reg = reg
71 +
  )
72 +
  return(out)
73 +
}
74 +
75 +
binom_comp <- function(n, success, prob) {
76 +
  n <- n
77 +
  k <- success
78 +
  obs_p <- k / n
79 +
  exp_k <- round(n * prob)
80 +
  lt <- pbinom(k, n, prob, lower.tail = T)
81 +
  ut <- pbinom(k - 1, n, prob, lower.tail = F)
82 +
  p_opp <- round(dbinom(k, n, prob), 9)
83 +
  i_p <- dbinom(exp_k, n, prob)
84 +
  i_k <- exp_k
85 +
86 +
  if (k < exp_k) {
87 +
    while (i_p > p_opp) {
88 +
      i_k <- i_k + 1
89 +
      i_p <- round(dbinom(i_k, n, prob), 9)
90 +
      if (round(i_p) == p_opp) {
91 +
        break
92 +
      }
93 +
    }
94 +
95 +
    ttf <- pbinom(k, n, prob, lower.tail = T) +
96 +
      pbinom(i_k - 1, n, prob, lower.tail = F)
97 +
  } else {
98 +
    while (p_opp <= i_p) {
99 +
      i_k <- i_k - 1
100 +
      i_p <- dbinom(i_k, n, prob)
101 +
      if (round(i_p) == p_opp) {
102 +
        break
103 +
      }
104 +
    }
105 +
106 +
    i_k <- i_k
107 +
108 +
    tt <- pbinom(i_k, n, prob, lower.tail = T) +
109 +
      pbinom(k - 1, n, prob, lower.tail = F)
110 +
111 +
    ttf <- ifelse(tt <= 1, tt, 1)
112 +
  }
113 +
  out <- list(
114 +
    n = n, k = k, exp_k = exp_k, obs_p = obs_p, exp_p = prob, ik = i_k,
115 +
    lower = round(lt, 6), upper = round(ut, 6), two_tail = round(ttf, 6)
116 +
  )
117 +
  return(out)
118 +
}
119 +
120 +
# chi square association
121 +
df_chi <- function(twoway) {
122 +
  (nrow(twoway) - 1) * (ncol(twoway) - 1)
123 +
}
124 +
125 +
efmat <- function(twoway) {
126 +
  mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = 2)
127 +
  mat2 <- matrix(colSums(twoway), nrow = 1)
128 +
  ef <- mat1 %*% mat2
129 +
  return(ef)
130 +
}
131 +
132 +
pear_chsq <- function(twoway, df, ef) {
133 +
  chi <- round(sum(((twoway - ef) ^ 2) / ef), 4)
134 +
  sig <- round(pchisq(chi, df, lower.tail = F), 4)
135 +
  out <- list(chi = chi, sig = sig)
136 +
  return(out)
137 +
}
138 +
139 +
lr_chsq <- function(twoway, df, ef) {
140 +
  chilr <- round(2 * sum(matrix(log(twoway / ef), nrow = 1) %*% matrix(twoway, nrow = 4)), 4)
141 +
  sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4)
142 +
  out <- list(chilr = chilr, sig_lr = sig_lr)
143 +
  return(out)
144 +
}
145 +
146 +
lr_chsq2 <- function(twoway, df, ef, ds) {
147 +
  chilr <- round(2 * sum(matrix(twoway, ncol = ds) %*% matrix(log(twoway / ef), nrow = ds)), 4)
148 +
  sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4)
149 +
  out <- list(chilr = chilr, sig_lr = sig_lr)
150 +
  return(out)
151 +
}
152 +
153 +
yates_chsq <- function(twoway) {
154 +
  way2 <- twoway[, c(2, 1)]
155 +
  total <- sum(twoway)
156 +
  prods <- prod(diag(twoway)) - prod(diag(way2))
157 +
  prod_totals <- prod(rowSums(twoway)) * prod(colSums(twoway))
158 +
  chi_y <- round((total * (abs(prods) - (total / 2)) ^ 2) / prod_totals, 4)
159 +
  sig_y <- round(pchisq(chi_y, 1, lower.tail = F), 4)
160 +
  out <- list(chi_y = chi_y, sig_y = sig_y, total = total, prod_totals = prod_totals)
161 +
  return(out)
162 +
}
163 +
164 +
mh_chsq <- function(twoway, total, prod_totals) {
165 +
  num <- twoway[1] - ((rowSums(twoway)[1] * colSums(twoway)[1]) / total)
166 +
  den <- prod_totals / ((total ^ 3) - (total ^ 2))
167 +
  chimh <- round((num ^ 2) / den, 4)
168 +
  sig_mh <- round(pchisq(chimh, 1, lower.tail = F), 4)
169 +
  out <- list(chimh = chimh, sig_mh = sig_mh)
170 +
  return(out)
171 +
}
172 +
173 +
efm <- function(twoway, dk) {
174 +
  mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = dk[1])
175 +
  mat2 <- matrix(colSums(twoway), ncol = dk[2])
176 +
  ef <- mat1 %*% mat2
177 +
  return(ef)
178 +
}
179 +
180 +
pear_chi <- function(twoway, df, ef) {
181 +
  chi <- round(sum(((twoway - ef) ^ 2) / ef), 4)
182 +
  sig <- round(pchisq(chi, df, lower.tail = F), 4)
183 +
  out <- list(chi = chi, sig = sig)
184 +
  return(out)
185 +
}
186 +
187 +
chigf <- function(x, y, chi) {
188 +
  twoway <- matrix(
189 +
    table(x, y), nrow = nlevels(as.factor(x)),
190 +
    ncol = nlevels(as.factor(y))
191 +
  )
192 +
  total <- sum(twoway)
193 +
  phi <- round(sqrt(chi / total), 4)
194 +
  cc <- round(sqrt(chi / (chi + total)), 4)
195 +
  q <- min(nrow(twoway), ncol(twoway))
196 +
  cv <- round(sqrt(chi / (total * (q - 1))), 4)
197 +
  out <- list(phi = phi, cc = cc, cv = cv)
198 +
  return(out)
199 +
}
200 +
201 +
# chi square goodness of fit
202 +
chi_cort <- function(x, y) {
203 +
  diff <- x - y - 0.5
204 +
  dif <- abs(x - y) - 0.5
205 +
  dif2 <- dif ^ 2
206 +
  dev <- round((diff / y) * 100, 2)
207 +
  std <- round(diff / sqrt(y), 2)
208 +
  chi <- round(sum(dif2 / y), 4)
209 +
  out <- list(dev = dev, std = std, chi = chi)
210 +
  return(out)
211 +
}
212 +
213 +
chigof <- function(x, y) {
214 +
  dif <- x - y
215 +
  dif2 <- dif ^ 2
216 +
  dev <- round((dif / y) * 100, 2)
217 +
  std <- round(dif / sqrt(y), 2)
218 +
  chi <- round(sum(dif2 / y), 4)
219 +
  out <- list(dev = dev, std = std, chi = chi)
220 +
  return(out)
221 +
}
222 +
223 +
# cochran's q test
224 +
coch_data <- function(x, ...) {
225 +
  if (is.data.frame(x)) {
226 +
    data <- x %>%
227 +
      lapply(as.numeric) %>%
228 +
      as.data.frame() %>%
229 +
      `-`(1)
230 +
  } else {
231 +
    data <- cbind(x, ...) %>%
232 +
      apply(2, as.numeric) %>%
233 +
      `-`(1) %>%
234 +
      as.data.frame()
235 +
  }
236 +
237 +
  return(data)
238 +
}
239 +
240 +
#' @importFrom purrr map_df
241 +
#' @importFrom magrittr subtract
242 +
cochran_comp <- function(data) {
243 +
  n <- nrow(data)
244 +
  k <- ncol(data)
245 +
  df <- k - 1
246 +
247 +
  cs <-
248 +
    data %>%
249 +
    map_df(.f = as.numeric) %>%
250 +
    subtract(1) %>%
251 +
    sums()
252 +
253 +
  q <- coch(k, cs$cls_sum, cs$cl, cs$g, cs$gs_sum)
254 +
255 +
  pvalue <- 1 - pchisq(q, df)
256 +
257 +
  out <- list(
258 +
    n = n,
259 +
    df = df,
260 +
    q = q,
261 +
    pvalue = round(pvalue, 4)
262 +
  )
263 +
264 +
  return(out)
265 +
}
266 +
267 +
268 +
# levene test
269 +
lev_metric <- function(cvar, gvar, loc, ...) {
270 +
  metric <- tapply(cvar, gvar, loc, ...)
271 +
  y <- abs(cvar - metric[gvar])
272 +
  result <- anova(lm(y ~ gvar))
273 +
  out <- list(
274 +
    fstat = result$`F value`[1],
275 +
    p = result$`Pr(>F)`[1]
276 +
  )
277 +
  return(out)
278 +
}
279 +
280 +
lev_comp <- function(variable, group_var, trim.mean) {
281 +
  comp <- complete.cases(variable, group_var)
282 +
  n <- length(comp)
283 +
  k <- nlevels(group_var)
284 +
  cvar <- variable[comp]
285 +
  gvar <- group_var[comp]
286 +
  lens <- tapply(cvar, gvar, length)
287 +
  avgs <- tapply(cvar, gvar, mean)
288 +
  sds <- tapply(cvar, gvar, sd)
289 +
290 +
  bf <- lev_metric(cvar, gvar, mean)
291 +
  lev <- lev_metric(cvar, gvar, median)
292 +
  bft <- lev_metric(cvar, gvar, mean, trim = trim.mean)
293 +
  out <- list(
294 +
    bf = round(bf$fstat, 4),
295 +
    p_bf = round(bf$p, 4),
296 +
    lev = round(lev$fstat, 4),
297 +
    p_lev = round(lev$p, 4),
298 +
    bft = round(bft$fstat, 4),
299 +
    p_bft = round(bft$p, 4),
300 +
    avgs = round(avgs, 2),
301 +
    sds = round(sds, 2),
302 +
    avg = round(mean(cvar), 2),
303 +
    sd = round(sd(cvar), 2),
304 +
    n = n,
305 +
    levs = levels(gvar),
306 +
    n_df = (k - 1),
307 +
    d_df = (n - k),
308 +
    lens = lens
309 +
  )
310 +
  return(out)
311 +
}
312 +
313 +
# mcnemar test
314 +
mcdata <- function(x, y) {
315 +
  if (!is.matrix(x)) {
316 +
    stop("x must be either a table or a matrix")
317 +
  }
318 +
319 +
  if (is.matrix(x)) {
320 +
    if (length(x) != 4) {
321 +
      stop("x must be a 2 x 2 matrix")
322 +
    }
323 +
  }
324 +
325 +
  dat <- x
326 +
  return(dat)
327 +
}
328 +
329 +
330 +
mctestp <- function(dat) {
331 +
  retrieve <- matrix(c(1, 2, 2, 1), nrow = 2)
332 +
  p <- dat[retrieve]
333 +
  return(p)
334 +
}
335 +
336 +
tetat <- function(p) {
337 +
  out <- ((p[1] - p[2]) ^ 2) / sum(p)
338 +
  return(out)
339 +
}
340 +
341 +
mcpval <- function(test_stat, df) {
342 +
  out <- 1 - pchisq(test_stat, df)
343 +
  return(out)
344 +
}
345 +
346 +
mcpex <- function(dat) {
347 +
  out <- 2 * min(pbinom(dat[2], sum(dat[2], dat[3]), 0.5), pbinom(dat[3], sum(dat[2], dat[3]), 0.5))
348 +
  return(out)
349 +
}
350 +
351 +
mcstat <- function(p) {
352 +
  out <- ((abs(p[1] - p[2]) - 1) ^ 2) / sum(p)
353 +
  return(out)
354 +
}
355 +
356 +
mccpval <- function(cstat, df) {
357 +
  out <- 1 - pchisq(cstat, df)
358 +
  return(out)
359 +
}
360 +
361 +
mckappa <- function(dat) {
362 +
  agreement <- sum(diag(dat)) / sum(dat)
363 +
  expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2)
364 +
  kappa <- (agreement - expected) / (1 - expected)
365 +
  return(kappa)
366 +
}
367 +
368 +
mcserr <- function(dat, kappa) {
369 +
  expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2)
370 +
  out <- serr(dat, kappa, expected)
371 +
}
372 +
373 +
mcconf <- function(std_err, kappa) {
374 +
  alpha <- 0.05
375 +
  interval <- qnorm(1 - (alpha / 2)) * std_err
376 +
  ci_lower <- kappa - interval
377 +
  ci_upper <- kappa + interval
378 +
  out <- list(ci_lower = ci_lower, ci_upper = ci_upper)
379 +
  return(out)
380 +
}
381 +
382 +
prop_fact <- function(dat, p) {
383 +
  dat_per <- dat / sum(dat)
384 +
  row_sum <- rowSums(dat_per)
385 +
  col_sum <- colSums(dat_per)
386 +
  controls <- 1 - col_sum[2]
387 +
  cases <- 1 - row_sum[2]
388 +
  ratio <- cases / controls
389 +
  odds_ratio <- p[1] / p[2]
390 +
  out <- list(
391 +
    cases = cases, controls = controls, ratio = ratio,
392 +
    odds_ratio = odds_ratio
393 +
  )
394 +
  return(out)
395 +
}
396 +
397 +
serr <- function(dat, kappa, expected) {
398 +
  dat_per <- dat / sum(dat)
399 +
  row_sum <- rowSums(dat_per)
400 +
  row_sum[3] <- sum(row_sum)
401 +
  col_sum <- colSums(dat_per)
402 +
  dat_per <- rbind(dat_per, col_sum)
403 +
  dat_per <- cbind(dat_per, row_sum)
404 +
  d1 <- dim(dat_per)
405 +
  dat_per[d1[1], d1[2]] <- 1.0
406 +
  diagonal <- diag(dat_per)
407 +
  a <- diagonal[1] * (1 - (row_sum[1] + col_sum[1]) * (1 - kappa)) ^ 2 +
408 +
    diagonal[2] * (1 - (row_sum[2] + col_sum[2]) * (1 - kappa)) ^ 2
409 +
410 +
  x1 <- dat_per[lower.tri(dat_per)][1]
411 +
  x2 <- dat_per[upper.tri(dat_per)][1]
412 +
  b <- ((1 - kappa) ^ 2) * ((x1 * (row_sum[1] + col_sum[2]) ^ 2) +
413 +
    (x2 * (row_sum[2] + col_sum[1]) ^ 2))
414 +
415 +
  c <- ((kappa) - expected * (1 - kappa)) ^ 2
416 +
  variance <- ((a + b - c) / ((1 - expected) ^ 2)) / sum(dat)
417 +
418 +
  return(sqrt(variance))
419 +
}
420 +
421 +
mccomp <- function(dat) {
422 +
  p <- mctestp(dat)
423 +
  test_stat <- tetat(p)
424 +
  df <- nrow(dat) - 1
425 +
  pvalue <- mcpval(test_stat, df)
426 +
  exactp <- mcpex(dat)
427 +
  cstat <- mcstat(p)
428 +
  cpvalue <- mccpval(cstat, df)
429 +
  kappa <- mckappa(dat)
430 +
  std_err <- mcserr(dat, kappa)
431 +
  clu <- mcconf(std_err, kappa)
432 +
  k <- prop_fact(dat, p)
433 +
434 +
  out <- list(
435 +
    statistic = round(test_stat, 4), df = df,
436 +
    pvalue = round(pvalue, 4), exactp = round(exactp, 4),
437 +
    cstat = cstat, cpvalue = cpvalue, kappa = round(kappa, 4),
438 +
    std_err = round(std_err, 4), kappa_cil = round(clu$ci_lower, 4),
439 +
    kappa_ciu = round(clu$ci_upper, 4), cases = round(k$cases, 4),
440 +
    controls = round(k$controls, 4), ratio = round(k$ratio, 4),
441 +
    odratio = round(k$odds_ratio, 4)
442 +
  )
443 +
  return(out)
444 +
}
445 +
446 +
# one sample proportion test
447 +
prop_comp <- function(n, prob, alternative, phat) {
448 +
  n <- n
449 +
  phat <- phat
450 +
  p <- prob
451 +
  q <- 1 - p
452 +
  obs <- c(n * (1 - phat), n * phat)
453 +
  exp <- n * c(q, p)
454 +
  dif <- obs - exp
455 +
  dev <- round((dif / exp) * 100, 2)
456 +
  std <- round(dif / sqrt(exp), 2)
457 +
  num <- phat - prob
458 +
  den <- sqrt((p * q) / n)
459 +
  z <- round(num / den, 4)
460 +
  lt <- round(pnorm(z), 4)
461 +
  ut <- round(1 - pnorm(z), 4)
462 +
  tt <- round((1 - pnorm(abs(z))) * 2, 4)
463 +
  alt <- alternative
464 +
465 +
  if (alt == "all") {
466 +
    sig <- c("two-both" = tt, "less" = lt, "greater" = ut)
467 +
  } else if (alt == "greater") {
468 +
    sig <- ut
469 +
  } else if (alt == "less") {
470 +
    sig <- lt
471 +
  } else {
472 +
    sig <- tt
473 +
  }
474 +
475 +
  out <- list(
476 +
    n = n, phat = phat, p = prob, z = z, sig = sig, alt = alt,
477 +
    obs = obs, exp = exp, deviation = format(dev, nsmall = 2),
478 +
    std = format(std, nsmall = 2)
479 +
  )
480 +
481 +
  return(out)
482 +
}
483 +
484 +
# one sample variance test
485 +
osvar_comp <- function(x, sd, confint) {
486 +
  n <- length(x)
487 +
  df <- n - 1
488 +
  xbar <- mean(x)
489 +
  sigma <- sd(x)
490 +
  se <- sigma / sqrt(n)
491 +
  chi <- df * ((sigma / sd) ^ 2)
492 +
493 +
  p_lower <- pchisq(chi, df)
494 +
  p_upper <- pchisq(chi, df, lower.tail = F)
495 +
  if (p_lower < 0.5) {
496 +
    p_two <- pchisq(chi, df) * 2
497 +
  } else {
498 +
    p_two <- pchisq(chi, df, lower.tail = F) * 2
499 +
  }
500 +
501 +
502 +
  conf <- confint
503 +
  a <- (1 - conf) / 2
504 +
  al <- 1 - a
505 +
  tv <- df * sigma
506 +
  c_lwr <- round(tv / qchisq(al, df), 4)
507 +
  c_upr <- round(tv / qchisq(a, df), 4)
508 +
509 +
  out <- list(
510 +
    n = n, sd = sd, sigma = sigma, se = se, chi = chi, df = df,
511 +
    p_lower = p_lower, p_upper = p_upper, p_two = p_two, xbar = xbar,
512 +
    c_lwr = c_lwr, c_upr = c_upr, conf = conf
513 +
  )
514 +
515 +
  return(out)
516 +
}
517 +
518 +
# two sample variance test
519 +
var_comp <- function(variable, group_var) {
520 +
  comp <- complete.cases(variable, group_var)
521 +
  cvar <- variable[comp]
522 +
  gvar <- group_var[comp]
523 +
524 +
  d <- tibble(cvar, gvar)
525 +
  vals <- tibble_stats(d, "cvar", "gvar")
526 +
  lass <- tbl_stats(d, "cvar")
527 +
528 +
  lens <- vals[[2]] %>% map_int(1)
529 +
  vars <- vals[[4]] %>% map_dbl(1)
530 +
531 +
  f <- vars[1] / vars[2]
532 +
  n1 <- lens[1] - 1
533 +
  n2 <- lens[2] - 1
534 +
  lower <- pf(f, n1, n2)
535 +
  upper <- pf(f, n1, n2, lower.tail = FALSE)
536 +
537 +
  out <- list(
538 +
    f = round(f, 4), lower = round(lower, 4),
539 +
    upper = round(upper, 4),
540 +
    vars = round(vars, 2),
541 +
    avgs = round((vals[[3]] %>% map_dbl(1)), 2),
542 +
    sds = round((vals[[5]] %>% map_dbl(1)), 2),
543 +
    ses = round((vals[[6]] %>% map_dbl(1)), 2),
544 +
    avg = round(lass[2], 2),
545 +
    sd = round(lass[3], 2),
546 +
    se = round(lass[4], 2),
547 +
    n1 = n1,
548 +
    n2 = n2,
549 +
    lens = lens,
550 +
    len = lass[1]
551 +
  )
552 +
553 +
  return(out)
554 +
}
555 +
556 +
# two sample proportion test
557 +
prop_comp2 <- function(var1, var2, alt) {
558 +
  n1 <- length(var1)
559 +
  n2 <- length(var2)
560 +
  y1 <- table(var1)[[2]]
561 +
  y2 <- table(var2)[[2]]
562 +
  phat1 <- round(y1 / n1, 4)
563 +
  phat2 <- round(y2 / n2, 4)
564 +
  phat <- sum(y1, y2) / sum(n1, n2)
565 +
566 +
  # test statistic
567 +
  num <- (phat1 - phat2)
568 +
  den1 <- phat * (1 - phat)
569 +
  den2 <- (1 / n1) + (1 / n2)
570 +
  den <- sqrt(den1 * den2)
571 +
  z <- round(num / den, 4)
572 +
573 +
  lt <- round(pnorm(z), 4)
574 +
  ut <- round(pnorm(z, lower.tail = FALSE), 4)
575 +
  tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4)
576 +
577 +
578 +
579 +
  if (alt == "all") {
580 +
    sig <- c("two-tail" = tt, "lower-tail" = lt, "upper-tail" = ut)
581 +
  } else if (alt == "greater") {
582 +
    sig <- ut
583 +
  } else if (alt == "less") {
584 +
    sig <- lt
585 +
  } else {
586 +
    sig <- tt
587 +
  }
588 +
589 +
  # result
590 +
  out <- list(
591 +
    n1 = n1, n2 = n2, phat1 = phat1, phat2 = phat2, z = round(z, 3),
592 +
    sig = round(sig, 3)
593 +
  )
594 +
595 +
  return(out)
596 +
}
597 +
598 +
# one sample t test
599 +
ttest_comp <- function(x, mu, alpha, type) {
600 +
  n <- length(x)
601 +
  a <- (alpha / 2)
602 +
  df <- n - 1
603 +
  conf <- 1 - alpha
604 +
  Mean <- round(mean(x), 4)
605 +
  stddev <- round(sd(x), 4)
606 +
  std_err <- round(stddev / sqrt(n), 4)
607 +
  test_stat <- round((Mean - mu) / std_err, 3)
608 +
609 +
  if (type == "less") {
610 +
    cint <- c(-Inf, test_stat + qt(1 - alpha, df))
611 +
  } else if (type == "greater") {
612 +
    cint <- c(test_stat - qt(1 - alpha, df), Inf)
613 +
  } else {
614 +
    cint <- qt(1 - a, df)
615 +
    cint <- test_stat + c(-cint, cint)
616 +
  }
617 +
618 +
  confint <- round(mu + cint * std_err, 4)
619 +
  mean_diff <- round((Mean - mu), 4)
620 +
  mean_diff_l <- confint[1] - mu
621 +
  mean_diff_u <- confint[2] - mu
622 +
  p_l <- pt(test_stat, df)
623 +
  p_u <- pt(test_stat, df, lower.tail = FALSE)
624 +
625 +
  if (p_l < 0.5) {
626 +
    p <- p_l * 2
627 +
  } else {
628 +
    p <- p_u * 2
629 +
  }
630 +
631 +
632 +
  out <- list(
633 +
    mu = mu, n = n, df = df, Mean = Mean, stddev = stddev, std_err = std_err,
634 +
    test_stat = test_stat, confint = confint, mean_diff = mean_diff, mean_diff_l = mean_diff_l,
635 +
    mean_diff_u = mean_diff_u, p_l = p_l, p_u = p_u, p = p, conf = conf
636 +
  )
637 +
638 +
  return(out)
639 +
}
640 +
641 +
# paired sample t test
642 +
paired_comp <- function(x, y, confint, var_names) {
643 +
  n <- length(x)
644 +
  df <- (n - 1)
645 +
  xy <- paste(var_names[1], "-", var_names[2])
646 +
  data_prep <- paired_data(x, y)
647 +
  b <- paired_stats(data_prep, "key", "value")
648 +
  corr <- round(cor(x, y), 4)
649 +
  corsig <- cor_sig(corr, n)
650 +
  alpha <- 1 - confint
651 +
  confint1 <- conf_int_t(b[[1, 1]], b[[1, 2]], n, alpha = alpha) %>% round(2)
652 +
  confint2 <- conf_int_t(b[[2, 1]], b[[2, 2]], n, alpha = alpha) %>% round(2)
653 +
  confint3 <- conf_int_t(b[[3, 1]], b[[3, 2]], n, alpha = alpha) %>% round(2)
654 +
  t <- round(b[[3, 1]] / b[[3, 3]], 4)
655 +
  p_l <- pt(t, df)
656 +
  p_u <- pt(t, df, lower.tail = FALSE)
657 +
  p <- pt(abs(t), df, lower.tail = FALSE) * 2
658 +
659 +
  out <- list(
660 +
    Obs = n, b = b, conf_int1 = confint1, conf_int2 = confint2,
661 +
    conf_int_diff = confint3, corr = round(corr, 2), corsig = round(corsig, 2),
662 +
    tstat = t, p_lower = p_l, p_upper = p_u, p_two_tail = p, xy = xy, df = df
663 +
  )
664 +
665 +
  return(out)
666 +
}
667 +
668 +
# independent sample t test
669 +
indth <- function(data, x, y, a) {
670 +
  x1 <- enquo(x)
671 +
  y1 <- enquo(y)
672 +
673 +
  h <- data_split(data, !! x1, !! y1)
674 +
  h$df <- h$length - 1
675 +
  h$error <- qt(a, h$df) * -1
676 +
  h$lower <- h$mean_t - (h$error * h$std_err)
677 +
  h$upper <- h$mean_t + (h$error * h$std_err)
678 +
  return(h)
679 +
}
680 +
681 +
indcomb <- function(data, y, a) {
682 +
  y1 <- enquo(y)
683 +
684 +
  comb <- da(data, !! y1)
685 +
  comb$df <- comb$length - 1
686 +
  comb$error <- qt(a, comb$df) * -1
687 +
  comb$lower <- round(comb$mean_t - (comb$error * comb$std_err), 5)
688 +
  comb$upper <- round(comb$mean_t + (comb$error * comb$std_err), 5)
689 +
  names(comb) <- NULL
690 +
  return(comb)
691 +
}
692 +
693 +
indcomp <- function(grp_stat, alpha) {
694 +
  n1 <- grp_stat[1, 2]
695 +
  n2 <- grp_stat[2, 2]
696 +
  n <- n1 + n2
697 +
  means <- grp_stat[, 3]
698 +
  mean_diff <- means[1] - means[2]
699 +
  sd1 <- grp_stat[1, 4]
700 +
  sd2 <- grp_stat[2, 4]
701 +
  s1 <- grp_stat[1, 4] ^ 2
702 +
  s2 <- grp_stat[2, 4] ^ 2
703 +
  sd_dif <- sd_diff(n1, n2, s1, s2)
704 +
  se_dif <- se_diff(n1, n2, s1, s2)
705 +
  conf_diff <- conf_int_p(mean_diff, se_dif, alpha = alpha)
706 +
  out <- list(
707 +
    n1 = n1, n2 = n2, n = n, mean_diff = mean_diff, sd1 = sd1,
708 +
    sd2 = sd2, s1 = s1, s2 = s2, sd_dif = sd_dif, se_dif = se_dif,
709 +
    conf_diff = conf_diff
710 +
  )
711 +
  return(out)
712 +
}
713 +
714 +
indsig <- function(n1, n2, s1, s2, mean_diff) {
715 +
  d_f <- as.vector(df(n1, n2, s1, s2))
716 +
  t <- mean_diff / (((s1 / n1) + (s2 / n2)) ^ 0.5)
717 +
  sig_l <- round(pt(t, d_f), 4)
718 +
  sig_u <- round(pt(t, d_f, lower.tail = FALSE), 4)
719 +
  if (sig_l < 0.5) {
720 +
    sig <- round(pt(t, d_f) * 2, 4)
721 +
  } else {
722 +
    sig <- round(pt(t, d_f, lower.tail = FALSE) * 2, 4)
723 +
  }
724 +
  out <- list(d_f = d_f, t = t, sig_l = sig_l, sig_u = sig_u, sig = sig)
725 +
  return(out)
726 +
}
727 +
728 +
fsig <- function(s1, s2, n1, n2) {
729 +
  out <- round(min(
730 +
    pf((s1 / s2), (n1 - 1), (n2 - 1)),
731 +
    pf((s1 / s2), (n1 - 1), (n2 - 1),
732 +
      lower.tail = FALSE
733 +
    )
734 +
  ) * 2, 4)
735 +
  return(out)
736 +
}
737 +
738 +
739 +
indpool <- function(n1, n2, mean_diff, se_dif) {
740 +
  df_pooled <- (n1 + n2) - 2
741 +
  t_pooled <- mean_diff / se_dif
742 +
  sig_pooled_l <- round(pt(t_pooled, df_pooled), 4)
743 +
  sig_pooled_u <- round(pt(t_pooled, df_pooled, lower.tail = FALSE), 4)
744 +
  if (sig_pooled_l < 0.5) {
745 +
    sig_pooled <- round(pt(t_pooled, df_pooled) * 2, 4)
746 +
  } else {
747 +
    sig_pooled <- round(pt(t_pooled, df_pooled, lower.tail = FALSE) * 2, 4)
748 +
  }
749 +
  out <- list(
750 +
    df_pooled = df_pooled, t_pooled = t_pooled,
751 +
    sig_pooled_l = sig_pooled_l, sig_pooled_u = sig_pooled_u,
752 +
    sig_pooled = sig_pooled
753 +
  )
754 +
  return(out)
755 +
}
756 +
757 +
#' @importFrom rlang sym
758 +
tibble_stats <- function(data, x, y) {
759 +
760 +
  by_factor <- data %>%
761 +
    group_by(!! sym(y)) %>%
762 +
    select(!! sym(y), !! sym(x)) %>%
763 +
    summarise_all(funs(length, mean, var, sd)) %>%
764 +
    as_data_frame() %>%
765 +
    mutate(
766 +
      ses = sd / sqrt(length)
767 +
    )
768 +
769 +
  return(by_factor)
770 +
771 +
}
772 +
773 +
tbl_stats <- function(data, y) {
774 +
775 +
  avg <- data %>%
776 +
    select(y) %>%
777 +
    summarise_all(funs(length, mean, sd)) %>%
778 +
    as_data_frame() %>%
779 +
    mutate(
780 +
      se = sd / sqrt(length)
781 +
    )
782 +
783 +
  return(unlist(avg, use.names = FALSE))
784 +
785 +
}
786 +
787 +
1 788
fg <- function(x, w) {
2 789
  x %>%
3 790
    as.character() %>%
@@ -5,7 +792,8 @@
Loading
5 792
}
6 793
7 794
fk <- function(x, w) {
8 -
  format(x, width = w, justify = "centre", nsmall = 3)
795 +
  x %>%
796 +
    format(width = w, justify = "centre", nsmall = 3)
9 797
}
10 798
11 799
@@ -47,13 +835,149 @@
Loading
47 835
  rep("  ")
48 836
}
49 837
838 +
l <- function(x) {
839 +
  x <- as.character(x)
840 +
  k <- grep("\\$", x)
841 +
  if (length(k) == 1) {
842 +
    temp <- strsplit(x, "\\$")
843 +
    out <- temp[[1]][2]
844 +
  } else {
845 +
    out <- x
846 +
  }
847 +
  return(out)
848 +
}
849 +
850 +
#' @importFrom tidyr gather
851 +
paired_data <- function(x, y) {
852 +
  d <- tibble(x = x, y = y) %>%
853 +
    mutate(z = x - y) %>%
854 +
    gather()
855 +
  return(d)
856 +
}
857 +
858 +
#' @importFrom dplyr select
859 +
paired_stats <- function(data, key, value) {
860 +
861 +
  d <- data %>%
862 +
    group_by(key) %>%
863 +
    select(value, key) %>%
864 +
    summarise_all(funs(length, mean, sd)) %>%
865 +
    as_data_frame() %>%
866 +
    mutate(
867 +
      se = sd / sqrt(length)
868 +
    ) %>%
869 +
    select(-(key:length))
870 +
871 +
  return(d)
872 +
}
873 +
874 +
875 +
cor_sig <- function(corr, n) {
876 +
  t <- corr / ((1 - (corr ^ 2)) / (n - 2)) ^ 0.5
877 +
  df <- n - 2
878 +
  sig <- (1 - pt(t, df)) * 2
879 +
  return(round(sig, 4))
880 +
}
881 +
882 +
samp_err <- function(sigma, n) {
883 +
  sigma / (n ^ 0.5)
884 +
}
885 +
886 +
conf_int_t <- function(u, s, n, alpha = 0.05) {
887 +
  a <- alpha / 2
888 +
  df <- n - 1
889 +
  error <- round(qt(a, df), 3) * -1
890 +
  lower <- u - (error * samp_err(s, n))
891 +
  upper <- u + (error * samp_err(s, n))
892 +
  result <- c(lower, upper)
893 +
  return(result)
894 +
}
895 +
50 896
formatter_pair <- function(x, w) {
51 -
  x1  <- format(x, nsmall = 2)
52 -
  x2  <- as.character(x1)
897 +
  x1 <- format(x, nsmall = 2)
898 +
  x2 <- as.character(x1)
53 899
  ret <- format(x2, width = w, justify = "centre")
54 900
  return(ret)
55 901
}
56 902
903 +
mean_t <- function(x) {
904 +
  return(round(mean(x), 3))
905 +
}
906 +
907 +
sd_t <- function(x) {
908 +
  s <- sd(x)
909 +
  return(round(s, 3))
910 +
}
911 +
912 +
std_err <- function(x) {
913 +
  se <- sd(x) / sqrt(length(x))
914 +
  return(round(se, 3))
915 +
}
916 +
917 +
data_split <- function(data, x, y) {
918 +
  x1 <- enquo(x)
919 +
  y1 <- enquo(y)
920 +
921 +
  by_gender <-
922 +
    data %>%
923 +
    group_by(!! x1) %>%
924 +
    select(!! x1, !! y1) %>%
925 +
    summarise_all(funs(length, mean_t, sd_t, std_err)) %>%
926 +
    as.data.frame()
927 +
928 +
  return(by_gender)
929 +
}
930 +
931 +
da <- function(data, y) {
932 +
  y1 <- enquo(y)
933 +
934 +
  dat <-
935 +
    data %>%
936 +
    select(!! y1) %>%
937 +
    summarise_all(funs(length, mean_t, sd_t, std_err)) %>%
938 +
    as.data.frame()
939 +
940 +
  return(dat)
941 +
}
942 +
943 +
sd_diff <- function(n1, n2, s1, s2) {
944 +
  n1 <- n1 - 1
945 +
  n2 <- n2 - 1
946 +
  n <- (n1 + n2) - 2
947 +
  return(((n1 * s1 + n2 * s2) / n) ^ 0.5)
948 +
}
949 +
950 +
se_diff <- function(n1, n2, s1, s2) {
951 +
  df <- n1 + n2 - 2
952 +
  n_1 <- n1 - 1
953 +
  n_2 <- n2 - 1
954 +
  v <- (n_1 * s1 + n_2 * s2) / df
955 +
  return(sqrt(v * (1 / n1 + 1 / n2)))
956 +
}
957 +
958 +
se_sw <- function(s1, s2, n1, n2) {
959 +
  return(((s1 / n1) + (s2 / n2)) ^ 0.5)
960 +
}
961 +
962 +
df <- function(n1, n2, s1, s2) {
963 +
  sn1 <- s1 / n1
964 +
  sn2 <- s2 / n2
965 +
  m1 <- 1 / (n1 - 1)
966 +
  m2 <- 1 / (n2 - 1)
967 +
  num <- (sn1 + sn2) ^ 2
968 +
  den <- (m1 * (sn1 ^ 2)) + (m2 * (sn2 ^ 2))
969 +
  return(round(num / den))
970 +
}
971 +
972 +
conf_int_p <- function(u, se, alpha = 0.05) {
973 +
  a <- alpha / 2
974 +
  error <- round(qnorm(a), 3) * -1
975 +
  lower <- u - (error * se)
976 +
  upper <- u + (error * se)
977 +
  result <- c(lower, upper)
978 +
  return(result)
979 +
}
980 +
57 981
fw <- function(x, w) {
58 982
  x %>%
59 983
    as.character() %>%
@@ -70,25 +994,64 @@
Loading
70 994
  rep("    ")
71 995
}
72 996
73 -
#' @importFrom utils packageVersion menu install.packages
74 -
check_suggests <- function(pkg) {
75 -
  
76 -
  pkg_flag <- tryCatch(utils::packageVersion(pkg), error = function(e) NA)
77 -
  
78 -
  if (is.na(pkg_flag)) {
79 -
    
80 -
    msg <- message(paste0('\n', pkg, ' must be installed for this functionality.'))
81 -
    
82 -
    if (interactive()) {
83 -
      message(msg, "\nWould you like to install it?")
84 -
      if (utils::menu(c("Yes", "No")) == 1) {
85 -
        utils::install.packages(pkg)
86 -
      } else {
87 -
        stop(msg, call. = FALSE)
88 -
      }
89 -
    } else {
90 -
      stop(msg, call. = FALSE)
91 -
    } 
997 +
sums <- function(data) {
998 +
  cl <- colSums(data)
999 +
  cls_sum <- sum(cl ^ 2)
1000 +
  g <- rowSums(data)
1001 +
  gs_sum <- sum(g ^ 2)
1002 +
  result <- list(cl = cl, cls_sum = cls_sum, g = g, gs_sum = gs_sum)
1003 +
}
1004 +
1005 +
coch <- function(k, cls_sum, cl, g, gs_sum) {
1006 +
  out <- ((k - 1) * ((k * cls_sum) - (sum(cl) ^ 2))) / ((k * sum(g)) - gs_sum)
1007 +
  return(out)
1008 +
}
1009 +
1010 +
# function for binary coding
1011 +
nruns <- function(data, value) {
1012 +
  if (data > value) {
1013 +
    return(1)
1014 +
  } else if (data < value) {
1015 +
    return(0)
1016 +
  } else {
1017 +
    return(NA)
92 1018
  }
1019 +
}
1020 +
1021 +
nruns2 <- function(data, value) {
1022 +
  if (data <= value) {
1023 +
    return(0)
1024 +
  } else {
1025 +
    return(1)
1026 +
  }
1027 +
}
1028 +
1029 +
# expected runs
1030 +
expruns <- function(n0, n1) {
1031 +
  N <- n0 + n1
1032 +
  return(((2 * n0 * n1) / N) + 1)
1033 +
}
1034 +
1035 +
# standard deviation of runs
1036 +
sdruns <- function(n0, n1) {
1037 +
  N <- n0 + n1
1038 +
  n <- 2 * n0 * n1
1039 +
  return(((n * (n - N)) / ((N ^ 2) * (N - 1))))
1040 +
}
1041 +
1042 +
check_level <- function(data, x) {
1043 +
  x1 <- enquo(x)
1044 +
1045 +
  data %>%
1046 +
    pull(!! x1) %>%
1047 +
    nlevels()
1048 +
}
1049 +
1050 +
check_x <- function(data, x) {
1051 +
  x1 <- enquo(x)
93 1052
1053 +
  data %>%
1054 +
    pull(!! x1) %>%
1055 +
    (is.factor) %>%
1056 +
    `!`()
94 1057
}

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom stats pbinom dbinom
1 2
#' @title Binomial Test
2 3
#' @description Test whether the proportion of successes on a two-level
3 4
#' categorical dependent variable significantly differs from a hypothesized value.
@@ -7,18 +8,17 @@
Loading
7 8
#' @param data a \code{data.frame} or a \code{tibble}
8 9
#' @param variable factor; column in \code{data}
9 10
#' @param ... additional arguments passed to or from other methods
10 -
#'
11 11
#' @return \code{binom_test} returns an object of class \code{"binom_test"}.
12 12
#' An object of class \code{"binom_test"} is a list containing the
13 13
#' following components:
14 14
#'
15 -
#' \item{exp_k}{expected number of successes}
16 -
#' \item{exp_p}{expected probability of success}
17 -
#' \item{k}{number of successes}
18 15
#' \item{n}{number of observations}
16 +
#' \item{k}{number of successes}
17 +
#' \item{exp_k}{expected number of successes}
19 18
#' \item{obs_p}{assumed probability of success}
20 -
#' \item{pval_lower}{lower one sided p value}
21 -
#' \item{pval_upper}{upper one sided p value}
19 +
#' \item{exp_p}{expected probability of success}
20 +
#' \item{lower}{lower one sided p value}
21 +
#' \item{upper}{upper one sided p value}
22 22
#' @section Deprecated Functions:
23 23
#' \code{binom_calc()} and \code{binom_test()} have been deprecated. Instead use
24 24
#' \code{infer_binom_cal()} and \code{infer_binom_test()}.
@@ -38,7 +38,6 @@
Loading
38 38
39 39
#' @export
40 40
infer_binom_calc.default <- function(n, success, prob = 0.5, ...) {
41 -
42 41
  if (!is.numeric(n)) {
43 42
    stop("n must be an integer")
44 43
  }
@@ -57,21 +56,24 @@
Loading
57 56
58 57
  k <- binom_comp(n, success, prob)
59 58
60 -
  out <-
61 -
    list(
62 -
      exp_k      = k$exp_k,
63 -
      exp_p      = k$exp_p,
64 -
      k          = k$k,
65 -
      n          = n,
66 -
      obs_p      = k$obs_p,
67 -
      pval_lower = k$lower,
68 -
      pval_upper = k$upper
59 +
  out <- list(
60 +
    n = n, k = k$k, exp_k = k$exp_k, obs_p = k$obs_p,
61 +
    exp_p = k$exp_p, lower = k$lower, upper = k$upper
69 62
  )
70 63
71 64
  class(out) <- "infer_binom_calc"
72 65
  return(out)
73 66
}
74 67
68 +
#' @export
69 +
#' @rdname infer_binom_calc
70 +
#' @usage NULL
71 +
#'
72 +
binom_calc <- function(n, success, prob = 0.5, ...) {
73 +
  .Deprecated("infer_binom_calc()")
74 +
  infer_binom_calc(n, success, prob = 0.5, ...)
75 +
}
76 +
75 77
#' @export
76 78
print.infer_binom_calc <- function(x, ...) {
77 79
  print_binom(x)
@@ -80,9 +82,11 @@
Loading
80 82
#' @export
81 83
#' @rdname infer_binom_calc
82 84
infer_binom_test <- function(data, variable, prob = 0.5) {
85 +
  varyable <- enquo(variable)
83 86
84 -
  varyable <- deparse(substitute(variable))
85 -
  fdata    <- data[[varyable]]
87 +
  fdata <-
88 +
    data %>%
89 +
    pull(!! varyable)
86 90
87 91
  if (!is.factor(fdata)) {
88 92
    stop("variable must be of type factor", call. = FALSE)
@@ -105,49 +109,10 @@
Loading
105 109
  infer_binom_calc.default(n, k, prob)
106 110
}
107 111
108 -
binom_comp <- function(n, success, prob) {
109 -
110 -
  n     <- n
111 -
  k     <- success
112 -
  obs_p <- k / n
113 -
  exp_k <- round(n * prob)
114 -
  lt    <- stats::pbinom(k, n, prob, lower.tail = T)
115 -
  ut    <- stats::pbinom(k - 1, n, prob, lower.tail = F)
116 -
  p_opp <- round(stats::dbinom(k, n, prob), 9)
117 -
  i_p   <- stats::dbinom(exp_k, n, prob)
118 -
  i_k   <- exp_k
119 -
120 -
  if (k < exp_k) {
121 -
    while (i_p > p_opp) {
122 -
      i_k <- i_k + 1
123 -
      i_p <- round(stats::dbinom(i_k, n, prob), 9)
124 -
      if (round(i_p) == p_opp) {
125 -
        break
126 -
      }
127 -
    }
128 -
129 -
    ttf <- stats::pbinom(k, n, prob, lower.tail = T) +
130 -
      stats::pbinom(i_k - 1, n, prob, lower.tail = F)
131 -
  } else {
132 -
    while (p_opp <= i_p) {
133 -
      i_k <- i_k - 1
134 -
      i_p <- stats::dbinom(i_k, n, prob)
135 -
      if (round(i_p) == p_opp) {
136 -
        break
137 -
      }
138 -
    }
139 -
140 -
    i_k <- i_k
141 -
142 -
    tt <- stats::pbinom(i_k, n, prob, lower.tail = T) +
143 -
      stats::pbinom(k - 1, n, prob, lower.tail = F)
144 -
145 -
    ttf <- ifelse(tt <= 1, tt, 1)
146 -
  }
147 -
148 -
  list(
149 -
    n = n, k = k, exp_k = exp_k, obs_p = obs_p, exp_p = prob, ik = i_k,
150 -
    lower = round(lt, 6), upper = round(ut, 6), two_tail = round(ttf, 6)
151 -
  )
152 -
153 -
}
112 +
#' @export
113 +
#' @rdname infer_binom_calc
114 +
#' @usage NULL
115 +
#'
116 +
binom_test <- function(data, prob = 0.5) {
117 +
  .Deprecated("infer_binom_test()")
118 +
}

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom rlang quos !!!
1 2
#' @title Cochran Q Test
2 3
#' @description Test if the proportions of 3 or more dichotomous variables are
3 4
#' equal in the same population.
@@ -7,11 +8,10 @@
Loading
7 8
#' \code{"infer_cochran_qtest"}. An object of class \code{"infer_cochran_qtest"}
8 9
#' is a list containing the following components:
9 10
#'
10 -
#' \item{df}{degrees of freedom}
11 11
#' \item{n}{number of observations}
12 -
#' \item{pvalue}{p value}
12 +
#' \item{df}{degrees of freedom}
13 13
#' \item{q}{cochran's q statistic}
14 -
#'
14 +
#' \item{pvalue}{p-value}
15 15
#' @section Deprecated Function:
16 16
#' \code{cochran_test()} has been deprecated. Instead use
17 17
#' \code{infer_cochran_qtest()}.
@@ -26,9 +26,10 @@
Loading
26 26
27 27
#' @export
28 28
infer_cochran_qtest.default <- function(data, ...) {
29 +
  vars <- quos(...)
29 30
30 -
  vars  <- vapply(substitute(...()), deparse, NA_character_)
31 -
  fdata <- data[vars]
31 +
  fdata <- data %>%
32 +
    select(!!! vars)
32 33
33 34
  if (ncol(fdata) < 3) {
34 35
    stop("Please specify at least 3 variables.")
@@ -39,82 +40,21 @@
Loading
39 40
  }
40 41
41 42
  k <- cochran_comp(fdata)
42 -
43 -
  result <-
44 -
    list(
45 -
      df     = k$df,
46 -
      n      = k$n,
47 -
      pvalue = k$pvalue,
48 -
      q      = k$q)
49 -
43 +
  result <- list(n = k$n, df = k$df, q = k$q, pvalue = k$pvalue)
50 44
  class(result) <- "infer_cochran_qtest"
51 45
  return(result)
52 46
}
53 47
54 48
#' @export
49 +
#' @rdname infer_cochran_qtest
50 +
#' @usage NULL
55 51
#'
56 -
print.infer_cochran_qtest <- function(x, ...) {
57 -
  print_cochran_test(x)
58 -
}
59 -
60 -
coch_data <- function(x, ...) {
61 -
62 -
  if (is.data.frame(x)) {
63 -
    data <- x %>%
64 -
      lapply(as.numeric) %>%
65 -
      as.data.frame() %>%
66 -
      `-`(1)
67 -
  } else {
68 -
    data <- cbind(x, ...) %>%
69 -
      apply(2, as.numeric) %>%
70 -
      `-`(1) %>%
71 -
      as.data.frame()
72 -
  }
73 -
74 -
  return(data)
75 -
}
76 -
77 -
cochran_comp <- function(data) {
78 -
79 -
  n  <- nrow(data)
80 -
  k  <- ncol(data)
81 -
  df <- k - 1
82 -
83 -
  cs <-
84 -
    data %>%
85 -
    lapply(as.numeric) %>%
86 -
    as.data.frame() %>%
87 -
    magrittr::subtract(1) %>%
88 -
    sums()
89 -
90 -
  q <- coch(k, cs$cls_sum, cs$cl, cs$g, cs$gs_sum)
91 -
92 -
  pvalue <- 1 - stats::pchisq(q, df)
93 -
94 -
  list(
95 -
    df = df,
96 -
    n = n,
97 -
    pvalue = round(pvalue, 4),
98 -
    q = q)
99 -
100 -
}
101 -
102 -
sums <- function(data) {
103 -
104 -
  cl      <- colSums(data)
105 -
  cls_sum <- sum(cl ^ 2)
106 -
  g       <- rowSums(data)
107 -
  gs_sum  <- sum(g ^ 2)
108 -
109 -
  list(
110 -
    cl      = cl,
111 -
    cls_sum = cls_sum,
112 -
    g       = g,
113 -
    gs_sum  = gs_sum)
114 -
115 -
}
116 -
117 -
coch <- function(k, cls_sum, cl, g, gs_sum) {
118 -
  ((k - 1) * ((k * cls_sum) - (sum(cl) ^ 2))) / ((k * sum(g)) - gs_sum)
52 +
cochran_test <- function(x, ...) {
53 +
  .Deprecated("infer_cochran_qtest()")
119 54
}
120 55
56 +
#' @export
57 +
#'
58 +
print.infer_cochran_qtest <- function(x, ...) {
59 +
  print_cochran_test(x)
60 +
}

@@ -1,3 +1,4 @@
Loading
1 +
#' @importFrom stats qt pt pf
1 2
#' @title Two Independent Sample t Test
2 3
#' @description \code{infer_ts_ind_ttest} compares the means of two independent groups in order to determine whether
3 4
#' there is statistical evidence that the associated population means are significantly different.
@@ -70,291 +71,66 @@
Loading
70 71
#'
71 72
infer_ts_ind_ttest.default <- function(data, x, y, confint = 0.95,
72 73
                                       alternative = c("both", "less", "greater", "all"), ...) {
74 +
  x1 <- enquo(x)
75 +
  y1 <- enquo(y)
73 76
74 -
  x1   <- deparse(substitute(x))
75 -
  y1   <- deparse(substitute(y))
76 -
  yone <- names(data[y1])
77 +
  yone <-
78 +
    data %>%
79 +
    select(!! y1) %>%
80 +
    names()
77 81
78 -
  if (check_x(data, x1)) {
82 +
  if (check_x(data, !! x1)) {
79 83
    stop("x must be a binary factor variable", call. = FALSE)
80 84
  }
81 85
82 -
  if (check_level(data, x1) > 2) {
86 +
  if (check_level(data, !! x1) > 2) {
83 87
    stop("x must be a binary factor variable", call. = FALSE)
84 88
  }
85 89
86 -
  method   <- match.arg(alternative)
87 -
  var_y    <- yone
88 -
  alpha    <- 1 - confint
89 -
  a        <- alpha / 2
90 -
  h        <- indth(data, x1, y1, a)
91 -
  grp_stat <- h
92 -
  g_stat   <- as.matrix(h)
93 -
  comb     <- indcomb(data, y1, a)
94 -
  k        <- indcomp(grp_stat, alpha)
95 -
  j        <- indsig(k$n1, k$n2, k$s1, k$s2, k$mean_diff)
96 -
  m        <- indpool(k$n1, k$n2, k$mean_diff, k$se_dif)
90 +
  method <- match.arg(alternative)
91 +
  var_y <- yone
92 +
  alpha <- 1 - confint
93 +
  a <- alpha / 2
97 94
98 -
  result <- list(alternative      = method,
99 -
                 combined         = comb,
100 -
                 confint          = confint,
101 -
                 conf_diff        = round(k$conf_diff, 5),
102 -
                 den_df           = k$n2 - 1,
103 -
                 df_pooled        = m$df_pooled,
104 -
                 df_satterthwaite = j$d_f,
105 -
                 f                = round(k$s1 / k$s2, 4),
106 -
                 f_sig            = fsig(k$s1, k$s2, k$n1, k$n2),
107 -
                 levels           = g_stat[, 1],
108 -
                 lower            = g_stat[, 8],
109 -
                 mean             = g_stat[, 3],
110 -
                 mean_diff        = round(k$mean_diff, 3),
111 -
                 n                = k$n,
112 -
                 num_df           = k$n1 - 1,
113 -
                 obs              = g_stat[, 2],
114 -
                 sd               = g_stat[, 4],
115 -
                 sd_dif           = round(k$sd_dif, 3),
116 -
                 se               = g_stat[, 5],
117 -
                 se_dif           = round(k$se_dif, 3),
118 -
                 sig              = j$sig,
119 -
                 sig_l            = j$sig_l,
120 -
                 sig_pooled_l     = m$sig_pooled_l,
121 -
                 sig_pooled_u     = m$sig_pooled_u,
122 -
                 sig_pooled       = m$sig_pooled,
123 -
                 sig_u            = j$sig_u,
124 -
                 t_pooled         = round(m$t_pooled, 4),
125 -
                 t_satterthwaite  = round(j$t, 4),
126 -
                 upper            = g_stat[, 9],
127 -
                 var_y            = var_y)
95 +
  h <- indth(data, !! x1, !! y1, a)
96 +
  grp_stat <- h
97 +
  g_stat <- as.matrix(h)
98 +
  comb <- indcomb(data, !! y1, a)
99 +
  k <- indcomp(grp_stat, alpha)
100 +
  j <- indsig(k$n1, k$n2, k$s1, k$s2, k$mean_diff)
101 +
  m <- indpool(k$n1, k$n2, k$mean_diff, k$se_dif)
102 +
103 +
  result <- list(
104 +
    levels = g_stat[, 1], obs = g_stat[, 2], n = k$n,
105 +
    mean = g_stat[, 3], sd = g_stat[, 4], se = g_stat[, 5],
106 +
    lower = g_stat[, 8], upper = g_stat[, 9], combined = comb,
107 +
    mean_diff = round(k$mean_diff, 3), sd_dif = round(k$sd_dif, 3),
108 +
    se_dif = round(k$se_dif, 3),
109 +
    conf_diff = round(k$conf_diff, 5), df_pooled = m$df_pooled,
110 +
    df_satterthwaite = j$d_f, t_pooled = round(m$t_pooled, 4),
111 +
    t_satterthwaite = round(j$t, 4),
112 +
    sig_pooled_l = m$sig_pooled_l, sig_pooled_u = m$sig_pooled_u,
113 +
    sig_pooled = m$sig_pooled, sig = j$sig, sig_l = j$sig_l, sig_u = j$sig_u,
114 +
    num_df = k$n1 - 1, den_df = k$n2 - 1, f = round(k$s1 / k$s2, 4),
115 +
    f_sig = fsig(k$s1, k$s2, k$n1, k$n2), var_y = var_y, confint = confint,
116 +
    alternative = method
117 +
  )
128 118
129 119
  class(result) <- "infer_ts_ind_ttest"
130 120
  return(result)
121 +
}
131 122
123 +
#' @export
124 +
#' @rdname infer_ts_ind_ttest
125 +
#' @usage NULL
126 +
#'
127 +
ind_ttest <- function(data, x, y, confint = 0.95,
128 +
                      alternative = c("both", "less", "greater", "all"), ...) {
129 +
  .Deprecated("infer_ts_ind_ttest()")
132 130
}
133 131
134 132
#' @export
135 133
#'
136 134
print.infer_ts_ind_ttest <- function(x, ...) {
137 135
  print_two_ttest(x)
138 -
}
139 -
140 -
indth <- function(data, x, y, a) {
141 -
142 -
  h       <- data_split(data, x, y)
143 -
  h$df    <- h$length - 1
144 -
  h$error <- stats::qt(a, h$df) * -1
145 -
  h$lower <- h$mean_t - (h$error * h$std_err)
146 -
  h$upper <- h$mean_t + (h$error * h$std_err)
147 -
148 -
  return(h)
149 -
}
150 -
151 -
data_split <- function(data, x, y) {
152 -
153 -
  dat <- data.table(data[c(x, y)])
154 -
  out <- dat[, .(length = length(get(y)),
155 -
                 mean_t = mean_t(get(y)),
156 -
                 sd_t = sd_t(get(y)),
157 -
                 std_err = std_err(get(y))),
158 -
            by = x]
159 -
160 -
  setDF(out)
161 -
162 -
}
163 -
164 -
indcomb <- function(data, y, a) {
165 -
166 -
  comb        <- da(data, y)
167 -
  comb$df     <- comb$length - 1
168 -
  comb$error  <- stats::qt(a, comb$df) * -1
169 -
  comb$lower  <- round(comb$mean_t - (comb$error * comb$std_err), 5)
170 -
  comb$upper  <- round(comb$mean_t + (comb$error * comb$std_err), 5)
171 -
  names(comb) <- NULL
172 -
173 -
  return(comb)
174 -
175 -
}
176 -
177 -
da <- function(data, y) {
178 -
179 -
  dat <- data[[y]]
180 -
  data.frame(length  = length(dat),
181 -
             mean_t  = mean_t(dat),
182 -
             sd_t    = sd_t(dat),
183 -
             std_err = std_err(dat))
184 -
185 -
}
186 -
187 -
mean_t <- function(x) {
188 -
189 -
  x %>%
190 -
    mean() %>%
191 -
    round(3)
192 -
193 -
}
194 -
195 -
sd_t <- function(x) {
196 -
197 -
  x %>%
198 -
    stats::sd() %>%
199 -
    round(3)
200 -
201 -
}
202 -
203 -
std_err <- function(x) {
204 -
205 -
  x %>%
206 -
    stats::sd() %>%
207 -
    divide_by(x %>%
208 -
                length() %>%
209 -
                sqrt()) %>%
210 -
    round(3)
211 -
212 -
}
213 -
214 -
indcomp <- function(grp_stat, alpha) {
215 -
216 -
  n1        <- grp_stat[1, 2]
217 -
  n2        <- grp_stat[2, 2]
218 -
  n         <- n1 + n2
219 -
  means     <- grp_stat[, 3]
220 -
  mean_diff <- means[1] - means[2]
221 -
  sd1       <- grp_stat[1, 4]
222 -
  sd2       <- grp_stat[2, 4]
223 -
  s1        <- grp_stat[1, 4] ^ 2
224 -
  s2        <- grp_stat[2, 4] ^ 2
225 -
  sd_dif    <- sd_diff(n1, n2, s1, s2)
226 -
  se_dif    <- se_diff(n1, n2, s1, s2)
227 -
  conf_diff <- conf_int_p(mean_diff, se_dif, alpha = alpha)
228 -
229 -
  list(conf_diff = conf_diff,
230 -
       mean_diff = mean_diff,
231 -
       n         = n,
232 -
       n1        = n1,
233 -
       n2        = n2,
234 -
       s1        = s1,
235 -
       s2        = s2,
236 -
       sd1       = sd1,
237 -
       sd2       = sd2,
238 -
       sd_dif    = sd_dif,
239 -
       se_dif    = se_dif)
240 -
241 -
}
242 -
243 -
sd_diff <- function(n1, n2, s1, s2) {
244 -
245 -
  n1 <- n1 - 1
246 -
  n2 <- n2 - 1
247 -
  n  <- (n1 + n2) - 2
248 -
249 -
  (n1 * s1) %>%
250 -
    add(n2 * s2) %>%
251 -
    divide_by(n) %>%
252 -
    raise_to_power(0.5)
253 -
254 -
}
255 -
256 -
se_diff <- function(n1, n2, s1, s2) {
257 -
258 -
  df  <- n1 + n2 - 2
259 -
  n_1 <- n1 - 1
260 -
  n_2 <- n2 - 1
261 -
262 -
  (n_1 * s1) %>%
263 -
    add(n_2 * s2) %>%
264 -
    divide_by(df) -> v
265 -
266 -
  (1 / n1) %>%
267 -
    add(1 / n2) %>%
268 -
    multiply_by(v) %>%
269 -
    sqrt()
270 -
271 -
}
272 -
273 -
conf_int_p <- function(u, se, alpha = 0.05) {
274 -
275 -
  a     <- alpha / 2
276 -
  error <- round(stats::qnorm(a), 3) * -1
277 -
  lower <- u - (error * se)
278 -
  upper <- u + (error * se)
279 -
  c(lower, upper)
280 -
281 -
}
282 -
283 -
indsig <- function(n1, n2, s1, s2, mean_diff) {
284 -
285 -
  d_f   <- as.vector(df(n1, n2, s1, s2))
286 -
  t     <- mean_diff / (((s1 / n1) + (s2 / n2)) ^ 0.5)
287 -
  sig_l <- round(stats::pt(t, d_f), 4)
288 -
  sig_u <- round(stats::pt(t, d_f, lower.tail = FALSE), 4)
289 -
290 -
  if (sig_l < 0.5) {
291 -
    sig <- round(stats::pt(t, d_f) * 2, 4)
292 -
  } else {
293 -
    sig <- round(stats::pt(t, d_f, lower.tail = FALSE) * 2, 4)
294 -
  }
295 -
296 -
  list(d_f   = d_f,
297 -
       sig_l = sig_l,
298 -
       sig_u = sig_u,
299 -
       sig   = sig,
300 -
       t     = t)
301 -
302 -
}
303 -
304 -
df <- function(n1, n2, s1, s2) {
305 -
306 -
  sn1 <- s1 / n1
307 -
  sn2 <- s2 / n2
308 -
  m1  <- 1 / (n1 - 1)
309 -
  m2  <- 1 / (n2 - 1)
310 -
  num <- (sn1 + sn2) ^ 2
311 -
  den <- (m1 * (sn1 ^ 2)) + (m2 * (sn2 ^ 2))
312 -
313 -
  round(num / den)
314 -
315 -
}
316 -
317 -
fsig <- function(s1, s2, n1, n2) {
318 -
319 -
  round(min(
320 -
    stats::pf((s1 / s2), (n1 - 1), (n2 - 1)),
321 -
    stats::pf((s1 / s2), (n1 - 1), (n2 - 1),
322 -
      lower.tail = FALSE
323 -
    )
324 -
  ) * 2, 4)
325 -
326 -
}
327 -
328 -
329 -
indpool <- function(n1, n2, mean_diff, se_dif) {
330 -
331 -
  df_pooled    <- (n1 + n2) - 2
332 -
  t_pooled     <- mean_diff / se_dif
333 -
  sig_pooled_l <- round(stats::pt(t_pooled, df_pooled), 4)
334 -
  sig_pooled_u <- round(stats::pt(t_pooled, df_pooled, lower.tail = FALSE), 4)
335 -
336 -
  if (sig_pooled_l < 0.5) {
337 -
    sig_pooled <- round(stats::pt(t_pooled, df_pooled) * 2, 4)
338 -
  } else {
339 -
    sig_pooled <- round(stats::pt(t_pooled, df_pooled, lower.tail = FALSE) * 2, 4)
340 -
  }
341 -
342 -
  list(df_pooled    = df_pooled,
343 -
       sig_pooled_l = sig_pooled_l,
344 -
       sig_pooled_u = sig_pooled_u,
345 -
       sig_pooled   = sig_pooled,
346 -
       t_pooled     = t_pooled)
347 -
348 -
}
349 -
350 -
check_x <- function(data, x) {
351 -
352 -
  !is.factor(data[[x]])
353 -
354 -
}
355 -
356 -
check_level <- function(data, x) {
357 -
358 -
  nlevels(data[[x]])
359 -
360 -
}
136 +
}

@@ -57,9 +57,11 @@
Loading
57 57
#'
58 58
infer_os_t_test.default <- function(data, x, mu = 0, alpha = 0.05,
59 59
                                    alternative = c("both", "less", "greater", "all"), ...) {
60 +
  x1 <- enquo(x)
60 61
61 -
  x1   <- deparse(substitute(x))
62 -
  xone <- data[[x1]]
62 +
  xone <-
63 +
    data %>%
64 +
    pull(!! x1)
63 65
64 66
  if (!is.numeric(xone)) {
65 67
    stop("x must be numeric")
@@ -72,88 +74,38 @@
Loading
72 74
  }
73 75
74 76
  type <- match.arg(alternative)
75 -
  var_name <- names(data[x1])
77 +
78 +
  var_name <-
79 +
    data %>%
80 +
    select(!! x1) %>%
81 +
    names()
82 +
76 83
  k <- ttest_comp(xone, mu, alpha, type)
77 84
78 -
  result <-
79 -
    list(conf        = k$conf,
80 -
         confint     = k$confint,
81 -
         df          = k$df,
82 -
         Mean        = k$Mean,
83 -
         mean_diff   = k$mean_diff,
84 -
         mean_diff_l = k$mean_diff_l,
85 -
         mean_diff_u = k$mean_diff_u,
86 -
         mu          = k$mu,
87 -
         n           = k$n,
88 -
         p           = k$p,
89 -
         p_l         = k$p_l,
90 -
         p_u         = k$p_u,
91 -
         stddev      = k$stddev,
92 -
         std_err     = k$std_err,
93 -
         test_stat   = k$test_stat,
94 -
         type        = type,
95 -
         var_name    = var_name)
85 +
  result <- list(
86 +
    mu = k$mu, n = k$n, df = k$df, Mean = k$Mean,
87 +
    stddev = k$stddev, std_err = k$std_err,
88 +
    test_stat = k$test_stat, confint = k$confint,
89 +
    mean_diff = k$mean_diff, mean_diff_l = k$mean_diff_l,
90 +
    mean_diff_u = k$mean_diff_u, p_l = k$p_l, p_u = k$p_u,
91 +
    p = k$p, conf = k$conf, type = type, var_name = var_name
92 +
  )
96 93
97 94
  class(result) <- "infer_os_t_test"
98 95
  return(result)
99 96
}
100 97
101 98
#' @export
99 +
#' @rdname infer_os_t_test
100 +
#' @usage NULL
102 101
#'
103 -
print.infer_os_t_test <- function(x, ...) {
104 -
  print_ttest(x)
102 +
ttest <- function(x, mu = 0, alpha = 0.05,
103 +
                  type = c("both", "less", "greater", "all"), ...) {
104 +
  .Deprecated("infer_os_t_test()")
105 105
}
106 106
107 -
ttest_comp <- function(x, mu, alpha, type) {
108 -
109 -
  n         <- length(x)
110 -
  a         <- (alpha / 2)
111 -
  df        <- n - 1
112 -
  conf      <- 1 - alpha
113 -
  Mean      <- round(mean(x), 4)
114 -
  stddev    <- round(stats::sd(x), 4)
115 -
  std_err   <- round(stddev / sqrt(n), 4)
116 -
  test_stat <- round((Mean - mu) / std_err, 3)
117 -
118 -
  if (type == "less") {
119 -
    cint <- c(-Inf, test_stat + stats::qt(1 - alpha, df))
120 -
  } else if (type == "greater") {
121 -
    cint <- c(test_stat - stats::qt(1 - alpha, df), Inf)
122 -
  } else {
123 -
    cint <- stats::qt(1 - a, df)
124 -
    cint <- test_stat + c(-cint, cint)
125 -
  }
126 -
127 -
  confint     <- round(mu + cint * std_err, 4)
128 -
  mean_diff   <- round((Mean - mu), 4)
129 -
  mean_diff_l <- confint[1] - mu
130 -
  mean_diff_u <- confint[2] - mu
131 -
  p_l         <- stats::pt(test_stat, df)
132 -
  p_u         <- stats::pt(test_stat, df, lower.tail = FALSE)
133 -
134 -
  if (p_l < 0.5) {
135 -
    p <- p_l * 2
136 -
  } else {
137 -
    p <- p_u * 2
138 -
  }
139 -
140 -
141 -
  out <-
142 -
    list(conf        = conf,
143 -
         confint     = confint,
144 -
         df          = df,
145 -
         Mean        = Mean,
146 -
         mean_diff   = mean_diff,
147 -
         mean_diff_l = mean_diff_l,
148 -
         mean_diff_u = mean_diff_u,
149 -
         mu          = mu,
150 -
         n           = n,
151 -
         p           = p,
152 -
         p_l         = p_l,
153 -
         p_u         = p_u,
154 -
         stddev      = stddev,
155 -
         std_err     = std_err,
156 -
         test_stat   = test_stat)
157 -
158 -
  return(out)
159 -
}
107 +
#' @export
108 +
#'
109 +
print.infer_os_t_test <- function(x, ...) {
110 +
  print_ttest(x)
111 +
}

@@ -1,3 +1,5 @@
Loading
1 +
#' @importFrom stats complete.cases
2 +
#' @importFrom purrr map_dbl
1 3
#' @title Two Sample Variance Comparison Test
2 4
#' @description  \code{infer_ts_var_test} performs tests on the equality of standard
3 5
#' deviations (variances).
@@ -49,21 +51,25 @@
Loading
49 51
#'
50 52
infer_ts_var_test.default <- function(data, ..., group_var = NULL,
51 53
                                      alternative = c("less", "greater", "all")) {
54 +
  groupvar <- enquo(group_var)
52 55
53 -
  groupvar  <- deparse(substitute(group_var))
54 -
  varyables <- vapply(substitute(...()), deparse, NA_character_)
55 -
  fdata     <- data[varyables]
56 +
  varyables <- quos(...)
56 57
57 -
  if (groupvar == "NULL") {
58 -
    z  <- as.list(fdata)
59 -
    ln <- unlist(lapply(z, length))
58 +
  fdata <-
59 +
    data %>%
60 +
    select(!!! varyables)
61 +
62 +
  if (quo_is_null(groupvar)) {
63 +
    z <- as.list(fdata)
64 +
    ln <- z %>% map_int(length)
60 65
    ly <- seq_len(length(z))
61 66
62 67
    if (length(z) < 2) {
63 68
      stop("Please specify at least two variables.", call. = FALSE)
64 69
    }
65 70
66 -
    out   <- gvar(ln, ly)
71 +
    out <- gvar(ln, ly)
72 +
67 73
    fdata <- unlist(z)
68 74
69 75
    groupvars <-
@@ -71,13 +77,20 @@
Loading
71 77
      unlist() %>%
72 78
      as.factor()
73 79
74 -
    lev <- names(data[varyables])
75 -
80 +
    lev <-
81 +
      data %>%
82 +
      select(!!! varyables) %>%
83 +
      names()
76 84
  } else {
85 +
    fdata <-
86 +
      fdata %>%
87 +
      pull(1)
77 88
78 -
    fdata     <- fdata[[1]]
79 -
    groupvars <- data[[groupvar]]
80 -
    lev       <- levels(groupvars)
89 +
    groupvars <-
90 +
      data %>%
91 +
      pull(!! groupvar)
92 +
93 +
    lev <- levels(groupvars)
81 94
82 95
    if (length(fdata) != length(groupvars)) {
83 96
      stop("Length of variable and group_var do not match.", call. = FALSE)
@@ -86,94 +99,30 @@
Loading
86 99
87 100
88 101
  type <- match.arg(alternative)
89 -
  k    <- var_comp(fdata, groupvars)
90 -
91 -
  out <- list(avg   = k$avg,
92 -
              avgs  = k$avgs,
93 -
              f     = k$f,
94 -
              len   = k$len,
95 -
              lens  = k$lens,
96 -
              lev   = lev,
97 -
              lower = k$lower,
98 -
              n1    = k$n1,
99 -
              n2    = k$n2,
100 -
              sd    = k$sd,
101 -
              sds   = k$sds,
102 -
              se    = k$se,
103 -
              ses   = k$ses,
104 -
              type  = type,
105 -
              upper = k$upper,
106 -
              vars  = k$vars)
102 +
  k <- var_comp(fdata, groupvars)
103 +
104 +
  out <- list(
105 +
    f = k$f, lower = k$lower, upper = k$upper, vars = k$vars,
106 +
    avgs = k$avgs, sds = k$sds, ses = k$ses, avg = k$avg, sd = k$sd,
107 +
    se = k$se, n1 = k$n1, n2 = k$n2, lens = k$lens, len = k$len,
108 +
    lev = lev, type = type
109 +
  )
107 110
108 111
  class(out) <- "infer_ts_var_test"
109 112
  return(out)
110 113
}
111 114
112 115
#' @export
116 +
#' @rdname infer_ts_var_test
117 +
#' @usage NULL
113 118
#'
114 -
print.infer_ts_var_test <- function(x, ...) {
115 -
  print_var_test(x)
116 -
}
117 -
118 -
var_comp <- function(variable, group_var) {
119 -
120 -
  comp  <- stats::complete.cases(variable, group_var)
121 -
  cvar  <- variable[comp]
122 -
  gvar  <- group_var[comp]
123 -
124 -
  d     <- data.frame(cvar, gvar)
125 -
  vals  <- tibble_stats(d, "cvar", "gvar")
126 -
  lass  <- tbl_stats(d, "cvar")
127 -
128 -
  # lens  <- vals[[2]] %>% purrr::map_int(1)
129 -
  # vars  <- vals[[4]] %>% purrr::map_dbl(1)
130 -
131 -
  lens  <- vals[[2]]
132 -
  vars  <- vals[[4]]
133 -
134 -
  f     <- vars[1] / vars[2]
135 -
  n1    <- lens[1] - 1
136 -
  n2    <- lens[2] - 1
137 -
  lower <- stats::pf(f, n1, n2)
138 -
  upper <- stats::pf(f, n1, n2, lower.tail = FALSE)
139 -
140 -
  list(avg   = round(lass[2], 2),
141 -
       avgs  = round(vals[[3]], 2),
142 -
       f     = round(f, 4),
143 -
       len   = lass[1],
144 -
       lens  = lens,
145 -
       lower = round(lower, 4),
146 -
       n1    = n1,
147 -
       n2    = n2,
148 -
       sd    = round(lass[3], 2),
149 -
       sds   = round(vals[[5]], 2),
150 -
       se    = round(lass[4], 2),
151 -
       ses   = round(vals[[6]], 2),
152 -
       upper = round(upper, 4),
153 -
       vars  = round(vars, 2))
154 -
155 -
}
156 -
157 -
tibble_stats <- function(data, x, y) {
158 -
159 -
  dat <- data.table(data[c(x, y)])
160 -
161 -
  out <- dat[, .(length = length(get(x)),
162 -
                     mean = mean(get(x)),
163 -
                     var = stats::var(get(x)),
164 -
                     sd = stats::sd(get(x))),
165 -
                  by = y]
166 -
167 -
  out[, ':='(ses = sd / sqrt(length))]
168 -
  setDF(out)
169 -
  out <- out[order(out[, 1]),]
170 -
  return(out)
171 -
119 +
var_test <- function(variable, ..., group_var = NA,
120 +
                     alternative = c("less", "greater", "all")) {
121 +
  .Deprecated("infer_ts_var_test()")
172 122
}
173 123
174 -
tbl_stats <- function(data, y) {
175 -
176 -
  dat <- data[[y]]
177 -
  c(length(dat), mean(dat), sd(dat), (sd(dat) / sqrt(length(dat))))
178 -
             
179 -
}
124 +
#' @export
125 +
#'
126 +
print.infer_ts_var_test <- function(x, ...) {
127 +
  print_var_test(x)
128 +
}

@@ -1,5 +1,7 @@
Loading
1 1
#' @useDynLib inferr
2 2
#' @importFrom Rcpp sourceCpp
3 +
#' @importFrom stats median
4 +
#' @importFrom purrr map
3 5
#' @title Test for Random Order
4 6
#' @description runtest tests whether the observations of \code{x} are serially
5 7
#' independent i.e. whether they occur in a random order, by counting
@@ -57,10 +59,16 @@
Loading
57 59
infer_runs_test.default <- function(data, x, drop = FALSE,
58 60
                                    split = FALSE, mean = FALSE,
59 61
                                    threshold = NA) {
62 +
  x1 <- enquo(x)
60 63
61 -
  x1   <- deparse(substitute(x))
62 -
  xone <- data[[x1]]
63 -
  n    <- length(xone)
64 +
  xone <-
65 +
    data %>%
66 +
    pull(!! x1)
67 +
68 +
  n <- length(xone)
69 +
70 +
  # if (!(is.numeric(x) || is.integer(x)))
71 +
  #     stop("x must be numeric or integer")
64 72
65 73
  if (is.na(threshold)) {
66 74
    y <- unique(xone)
@@ -69,75 +77,65 @@
Loading
69 77
    }
70 78
  }
71 79
80 +
  # compute threshold
72 81
  if (!(is.na(threshold))) {
73 82
    thresh <- threshold
74 -
  } else if (mean) {
83 +
  } else if (mean == TRUE) {
75 84
    thresh <- mean(xone)
76 85
  } else {
77 -
    thresh <- stats::median(xone, na.rm = TRUE)
86 +
    thresh <- median(xone, na.rm = TRUE)
78 87
  }
79 88
80 -
  if (drop) {
89 +
  # drop values equal to threshold if drop == TRUE
90 +
  if (drop == TRUE) {
81 91
    xone <- xone[xone != thresh]
82 92
  }
83 93
84 -
  if (split) {
94 +
  # binary coding the data based on the threshold
95 +
  if (split == TRUE) {
85 96
    x_binary <- ifelse(xone > thresh, 1, 0)
86 97
  } else {
87 -
88 98
    x_binary <-
89 99
      xone %>%
90 -
      lapply(nruns2, thresh) %>%
100 +
      map(nruns2, thresh) %>%
91 101
      unlist(use.names = FALSE)
92 102
  }
93 103
94 -
  n_runs   <- nsignC(x_binary)
95 -
  n1       <- sum(x_binary)
96 -
  n0       <- length(x_binary) - n1
104 +
  # compute the number of runs
105 +
  n_runs <- nsignC(x_binary)
106 +
  n1 <- sum(x_binary)
107 +
  n0 <- length(x_binary) - n1
108 +
109 +
  # compute expected runs and sd of runs
97 110
  exp_runs <- expruns(n0, n1)
98 -
  sd_runs  <- sdruns(n0, n1)
111 +
  sd_runs <- sdruns(n0, n1)
99 112
113 +
  # compute the test statistic
100 114
  test_stat <- (n_runs - exp_runs) / (sd_runs ^ 0.5)
101 -
  sig <- 2 * (1 - stats::pnorm(abs(test_stat), lower.tail = TRUE))
115 +
  sig <- 2 * (1 - pnorm(abs(test_stat), lower.tail = TRUE))
102 116
103 -
  result <-
104 -
    list(mean      = exp_runs,
105 -
         n         = n,
106 -
         n_above   = n1,
107 -
         n_below   = n0,
108 -
         n_runs    = n_runs,
109 -
         p         = sig,
110 -
         threshold = thresh,
111 -
         var       = sd_runs,
112 -
         z         = test_stat)
117 +
  # result
118 +
  result <- list(
119 +
    n = n, threshold = thresh, n_below = n0, n_above = n1,
120 +
    mean = exp_runs, var = sd_runs, n_runs = n_runs, z = test_stat,
121 +
    p = sig
122 +
  )
113 123
114 124
  class(result) <- "infer_runs_test"
115 125
  return(result)
116 126
}
117 127
118 128
#' @export
129 +
#' @rdname infer_runs_test
130 +
#' @usage NULL
119 131
#'
120 -
print.infer_runs_test <- function(x, ...) {
121 -
  print_runs_test(x)
122 -
}
123 -
124 -
# expected runs
125 -
expruns <- function(n0, n1) {
126 -
  N <- n0 + n1
127 -
  return(((2 * n0 * n1) / N) + 1)
132 +
runs_test <- function(x, drop = FALSE, split = FALSE, mean = FALSE,
133 +
                      threshold = NA) {
134 +
  .Deprecated("infer_runs_test()")
128 135
}
129 136
130 -
# standard deviation of runs
131 -
sdruns <- function(n0, n1) {
132 -
  N <- n0 + n1
133 -
  n <- 2 * n0 * n1
134 -
  return(((n * (n - N)) / ((N ^ 2) * (N - 1))))
135 -
}
136 -
137 -
nruns2 <- function(data, value) {
138 -
  if (data <= value) {
139 -
    return(0)
140 -
  } else {
141 -
    return(1)
142 -
  }
143 -
}
137 +
#' @export
138 +
#'
139 +
print.infer_runs_test <- function(x, ...) {
140 +
  print_runs_test(x)
141 +
}

@@ -9,18 +9,17 @@
Loading
9 9
#' \code{"infer_chisq_gof_test"}. An object of class \code{"infer_chisq_gof_test"}
10 10
#' is a list containing the following components:
11 11
#'
12 -
#' \item{categories}{levels of \code{x}}
13 12
#' \item{chisquare}{chi square statistic}
14 -
#' \item{deviation}{deviation of observed from frequency}
15 -
#' \item{degrees_of_freedom}{chi square degrees of freedom}
16 -
#' \item{expected_frequency}{expected frequency/proportion}
17 -
#' \item{n_levels}{number of levels of \code{x}}
18 -
#' \item{observed_frequency}{observed frequency/proportion}
19 13
#' \item{pvalue}{p-value}
20 -
#' \item{sample_size}{number of observations}
21 -
#' \item{std_residuals}{standardized residuals}
14 +
#' \item{df}{chi square degrees of freedom}
15 +
#' \item{ssize}{number of observations}
16 +
#' \item{names}{levels of \code{x}}
17 +
#' \item{level}{number of levels of \code{x}}
18 +
#' \item{obs}{observed frequency/proportion}
19 +
#' \item{exp}{expected frequency/proportion}
20 +
#' \item{deviation}{deviation of observed from frequency}
21 +
#' \item{std}{standardized residuals}
22 22
#' \item{varname}{name of categorical variable}
23 -
#'
24 23
#' @section Deprecated Function:
25 24
#' \code{chisq_gof()} has been deprecated. Instead use
26 25
#' \code{infer_chisq_gof_test()}
@@ -39,11 +38,22 @@
Loading
39 38
40 39
#' @export
41 40
infer_chisq_gof_test.default <- function(data, x, y, correct = FALSE) {
41 +
  x1 <- enquo(x)
42 42
43 -
  x1     <- deparse(substitute(x))
44 -
  xcheck <- data[[x1]]
45 -
  xlen   <- length(data[[x1]])
46 -
  xone   <- as.vector(table(data[[x1]]))
43 +
  xcheck <-
44 +
    data %>%
45 +
    pull(!! x1)
46 +
47 +
  xlen <-
48 +
    data %>%
49 +
    pull(!! x1) %>%
50 +
    length()
51 +
52 +
  xone <-
53 +
    data %>%
54 +
    pull(!! x1) %>%
55 +
    table() %>%
56 +
    as.vector()
47 57
48 58
  if (!is.factor(xcheck)) {
49 59
    stop("x must be an object of class factor")
@@ -57,7 +67,12 @@
Loading
57 67
    stop("correct must be either TRUE or FALSE")
58 68
  }
59 69
60 -
  varname <- names(data[x1])
70 +
71 +
  varname <-
72 +
    data %>%
73 +
    select(!! x1) %>%
74 +
    names()
75 +
61 76
  n <- length(xone)
62 77
63 78
  if (length(y) != n) {
@@ -76,21 +91,13 @@
Loading
76 91
    k <- chigof(xone, y)
77 92
  }
78 93
79 -
  sig <- round(stats::pchisq(k$chi, df, lower.tail = FALSE), 4)
80 -
81 -
  result <-
82 -
    list(
83 -
      categories         = levels(xcheck),
84 -
      chisquare          = k$chi,
85 -
      deviation          = format(k$dev, nsmall = 2),
86 -
      degrees_of_freedom = df,
87 -
      expected_frequency = y,
88 -
      n_levels           = nlevels(xcheck),
89 -
      observed_frequency = xone,
90 -
      pvalue             = sig,
91 -
      sample_size        = length(xcheck),
92 -
      std_residuals      = format(k$std, nsmall = 2),
93 -
      varname            = varname
94 +