1
#' @title TableSubgroupGLM: Sub-group analysis table for GLM.
2
#' @description Sub-group analysis table for GLM.
3
#' @param formula formula with survival analysis.
4
#' @param var_subgroup 1 sub-group variable for analysis, Default: NULL
5
#' @param var_cov Variables for additional adjust, Default: NULL
6
#' @param data Data or svydesign in survey package.
7
#' @param family family, "gaussian" or "binomial"
8
#' @param decimal.estimate Decimal for estimate, Default: 2
9
#' @param decimal.percent Decimal for percent, Default: 1
10
#' @param decimal.pvalue Decimal for pvalue, Default: 3
11
#' @return Sub-group analysis table. 
12
#' @details This result is used to make forestplot.
13
#' @examples
14
#' library(survival);library(dplyr)
15
#' lung %>% 
16
#'   mutate(status = as.integer(status == 1),
17
#'          sex = factor(sex),
18
#'          kk = factor(as.integer(pat.karno >= 70))) -> lung
19
#' TableSubgroupGLM(status ~ sex, data = lung, family = "binomial")
20
#' TableSubgroupGLM(status ~ sex, var_subgroup = "kk", data = lung, family = "binomial")
21
#' 
22
#' ## survey design
23
#' library(survey)
24
#' data.design <- svydesign(id = ~1, data = lung)
25
#' TableSubgroupGLM(status ~ sex, data = data.design, family = "binomial")
26
#' TableSubgroupGLM(status ~ sex, var_subgroup = "kk", data = data.design, family = "binomial")
27
#' @seealso 
28
#'  \code{\link[purrr]{safely}},\code{\link[purrr]{map}},\code{\link[purrr]{map2}}
29
#'  \code{\link[stats]{glm}}
30
#'  \code{\link[survey]{svyglm}}
31
#'  \code{\link[stats]{confint}}
32
#' @rdname TableSubgroupGLM
33
#' @export 
34
#' @importFrom purrr possibly map_dbl map map2
35
#' @importFrom dplyr group_split select filter mutate bind_cols
36
#' @importFrom magrittr %>%
37
#' @importFrom survey svyglm
38
#' @importFrom stats glm confint coefficients anova gaussian quasibinomial
39
#' @importFrom utils tail
40

41
TableSubgroupGLM <- function(formula, var_subgroup = NULL, var_cov = NULL, data, family = "binomial", decimal.estimate = 2, decimal.percent = 1, decimal.pvalue = 3){
42
  
43 1
  . <- NULL
44
  
45 0
  if (length(formula[[3]]) > 1) stop("Formula must contains only 1 independent variable")
46 1
  if (any(class(data) == "survey.design" & !is.null(var_subgroup))){
47 0
    if (is.numeric(data$variables[[var_subgroup]])) stop("var_subgroup must categorical.")
48 0
    if (length(levels(data$variables[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.")
49 1
  } else if(any(class(data) == "data.frame" & !is.null(var_subgroup))){
50 0
    if (is.numeric(data[[var_subgroup]])) stop("var_subgroup must categorical.")
51 0
    if (length(levels(data[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.")
52
  }
53
  
54
  
55
  ## functions with error
56 1
  possible_table <- purrr::possibly(table, NA)
57 0
  possible_prop.table <- purrr::possibly(function(x){prop.table(x, 1)[2, ] * 100}, NA) 
58 1
  possible_pv <- purrr::possibly(function(x){summary(x)[["coefficients"]][2, ] %>% tail(1)}, NA)
59 1
  possible_glm <- purrr::possibly(stats::glm, NA)
60 1
  possible_svyglm <- purrr::possibly(survey::svyglm, NA)
61 1
  possible_confint <- purrr::possibly(stats::confint, NA)
62 0
  possible_modely <- purrr::possibly(function(x){purrr::map_dbl(x, .[["y"]], 1)}, NA)
63 1
  possible_rowone <- purrr::possibly(function(x){x[2, ]}, NA)
64
  
65

66 1
  var_cov <- setdiff(var_cov, c(as.character(formula[[3]]), var_subgroup))
67 1
  family.svyglm <- gaussian()
68 1
  if (family  == "binomial") family.svyglm <- quasibinomial()
69
  
70 1
  if (is.null(var_subgroup)){
71 1
    if (!is.null(var_cov)){
72 0
      formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+")))
73
    }
74
    
75 1
    if (any(class(data) == "survey.design")){
76 1
      model <- survey::svyglm(formula, design = data, x= T, family = family.svyglm)
77 0
      if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.")
78
    } else{
79 1
      model <- stats::glm(formula, data = data, x= TRUE, family = family)
80 0
      if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.")
81
    }
82
    
83
    
84 1
    Point.Estimate <- round(coef(model), decimal.estimate)[2]
85 1
    CI <- round(confint(model)[2, ], decimal.estimate)
86 1
    if(family == "binomial"){
87 1
      Point.Estimate <- round(exp(coef(model)), decimal.estimate)[2]
88 1
      CI <- round(exp(confint(model)[2, ]), decimal.estimate)
89
    }
90
    
91
    
92
    
93
    #if (length(Point.Estimate) > 1){
94
    #  stop("Formula must contain 1 independent variable only.")
95
    #}
96
    
97
    
98
    
99
    #event <- model$y
100
    #prop <- round(prop.table(table(event, model$x[, 1]), 2)[2, ] * 100, decimal.percent)
101 1
    pv <- round(tail(summary(model)$coefficients[2, ], 1), decimal.pvalue)
102
    
103 1
    data.frame(Variable = "Overall", Count = length(model$y), Percent = 100, `Point Estimate` = Point.Estimate, Lower = CI[1], Upper = CI[2]) %>% 
104 1
    dplyr::mutate(`P value` = ifelse(pv >= 0.001, pv, "<0.001"), `P for interaction` = NA) -> out
105
    
106 1
    if (family == "binomial"){
107 1
      names(out)[4] <- "OR"
108
    }
109
    
110 1
    return(out)
111 1
  } else if (length(var_subgroup) > 1 | any(grepl(var_subgroup, formula))){
112 0
    stop("Please input correct subgroup variable.")
113
  } else{
114 1
    if (!is.null(var_cov)){
115 0
      formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+")))
116
    }
117 1
    if (any(class(data) == "survey.design")){
118 1
      data$variables[[var_subgroup]] %>% table %>% names -> label_val
119 1
      label_val %>% purrr::map(~possible_svyglm(formula, design = subset(data, get(var_subgroup) == .), x = TRUE, family = family.svyglm)) -> model
120 1
      xlev <- survey::svyglm(formula, design = data)$xlevels
121 1
      xlabel <- names(attr(model[[which(!is.na(model))[1]]]$x, "contrast"))[1]
122 1
      pvs_int <- possible_svyglm(as.formula(gsub(xlabel, paste(xlabel, "*", var_subgroup, sep=""), deparse(formula))), design = data, family = family.svyglm) %>% summary %>% coefficients
123 1
      pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue)
124 0
      if (!is.null(xlev) & length(xlev[[1]]) != 2) stop("Categorical independent variable must have 2 levels.")
125
      
126
      
127 1
      if (length(label_val) > 2){
128 0
        model.int <- survey::svyglm(as.formula(gsub(xlabel, paste(xlabel, "*", var_subgroup, sep=""), deparse(formula))), design = data, family = family.svyglm)
129 0
        pv_anova <- anova(model.int, method = "Wald")
130 0
        pv_int <- pv_anova[[length(pv_anova)]][[7]]
131
      }
132 1
      Count <- as.vector(table(data$variables[[var_subgroup]]))
133
      
134
    } else{
135 1
      data %>% filter(!is.na(get(var_subgroup))) %>% group_split(get(var_subgroup)) %>% purrr::map(~possible_glm(formula, data = ., x= T, family = family)) -> model
136 1
      data %>% filter(!is.na(get(var_subgroup))) %>% select(var_subgroup) %>% table %>% names -> label_val
137 1
      xlev <- stats::glm(formula, data = data)$xlevels
138 1
      xlabel <- names(attr(model[[which(!is.na(model))[1]]]$x, "contrast"))[1]
139 1
      model.int <- possible_glm(as.formula(gsub(xlabel, paste(xlabel, "*", var_subgroup, sep=""), deparse(formula))), data = data, family = family)  
140 1
      pvs_int <- model.int %>% summary %>% coefficients
141 1
      pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue)
142 0
      if (!is.null(xlev) & length(xlev[[1]]) != 2) stop("Categorical independent variable must have 2 levels.")
143
      
144 1
      if (length(label_val) > 2){
145 0
        pv_anova <- anova(model.int, test = "Chisq")
146 0
        pv_int <- round(pv_anova[nrow(pv_anova), 5], decimal.pvalue)
147
      }
148
      
149 1
      Count <- as.vector(table(data[[var_subgroup]]))
150
    }
151
    
152
    
153
    
154 1
    Estimate <- model %>% purrr::map("coefficients", .default = NA) %>% purrr::map_dbl(2, .default = NA)
155 1
    CI0 <- model %>% purrr::map(possible_confint) %>% purrr::map(possible_rowone) %>% Reduce(rbind, .)
156 1
    Point.Estimate <- round(Estimate, decimal.estimate)
157 1
    CI <- round(CI0, decimal.estimate)
158 1
    if (family == "binomial"){
159 1
      Point.Estimate <- round(exp(Estimate), decimal.estimate)
160 1
      CI <- round(exp(CI0), decimal.estimate)
161
    }
162
    
163 1
    model %>% purrr::map(possible_pv) %>% purrr::map_dbl(~round(., decimal.pvalue)) -> pv
164
    
165
    
166 1
    data.frame(Variable = paste("  ", label_val) , Count = Count, Percent = round(Count/sum(Count) * 100, decimal.percent), "Point Estimate" = Point.Estimate, Lower = CI[, 1], Upper = CI[, 2]) %>%
167 1
      dplyr::mutate(`P value` = ifelse(pv >= 0.001, pv, "<0.001"), `P for interaction` = NA) -> out
168 1
    if (family == "binomial"){
169 1
      names(out)[4] <- "OR"
170
    }
171
    
172 1
    return(rbind(c(var_subgroup, rep(NA, ncol(out) - 2), ifelse(pv_int >= 0.001, pv_int, "<0.001")), out))
173
  }
174
}
175

176

177

178
#' @title TableSubgroupMultiGLM: Multiple sub-group analysis table for GLM.
179
#' @description Multiple sub-group analysis table for GLM.
180
#' @param formula formula with survival analysis.
181
#' @param var_subgroups Multiple sub-group variables for analysis, Default: NULL
182
#' @param var_cov Variables for additional adjust, Default: NULL
183
#' @param data Data or svydesign in survey package.
184
#' @param family family, "gaussian" or "binomial"
185
#' @param decimal.estimate Decimal for estimate, Default: 2
186
#' @param decimal.percent Decimal for percent, Default: 1
187
#' @param decimal.pvalue Decimal for pvalue, Default: 3
188
#' @param line Include new-line between sub-group variables, Default: F
189
#' @return Multiple sub-group analysis table. 
190
#' @details This result is used to make forestplot.
191
#' @examples 
192
#' library(survival);library(dplyr)
193
#' lung %>% 
194
#'   mutate(status = as.integer(status == 1),
195
#'          sex = factor(sex),
196
#'          kk = factor(as.integer(pat.karno >= 70)),
197
#'          kk1 = factor(as.integer(pat.karno >= 60))) -> lung
198
#' TableSubgroupMultiGLM(status ~ sex, var_subgroups = c("kk", "kk1"), 
199
#'                       data=lung, line = TRUE, family = "binomial")
200
#' 
201
#' ## survey design
202
#' library(survey)
203
#' data.design <- svydesign(id = ~1, data = lung)
204
#' TableSubgroupMultiGLM(status ~ sex, var_subgroups = c("kk", "kk1"), 
205
#'                       data = data.design, family = "binomial")
206
#' @seealso 
207
#'  \code{\link[purrr]{map}}
208
#'  \code{\link[dplyr]{bind}}
209
#' @rdname TableSubgroupMultiGLM
210
#' @export 
211
#' @importFrom purrr map
212
#' @importFrom magrittr %>%
213
#' @importFrom dplyr bind_rows
214

215

216
TableSubgroupMultiGLM <- function(formula, var_subgroups = NULL, var_cov = NULL, data, family = "binomial", decimal.estimate = 2, decimal.percent = 1, decimal.pvalue = 3, line = F){
217
  
218 1
  . <- NULL
219 1
  out.all <- TableSubgroupGLM(formula, var_subgroup = NULL, var_cov = var_cov, data = data, family = family, decimal.estimate = decimal.estimate, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue)
220
  
221 1
  if (is.null(var_subgroups)){
222 1
    return(out.all)
223
  } else {
224 1
    out.list <- purrr::map(var_subgroups, ~TableSubgroupGLM(formula, var_subgroup = ., var_cov = var_cov, data = data, family = family, decimal.estimate = decimal.estimate, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue))
225 1
    if (line){
226 1
      out.newline <- out.list %>% purrr::map(~rbind(NA, .))
227 1
      return(rbind(out.all, out.newline %>% dplyr::bind_rows()))
228
    } else{
229 1
      return(rbind(out.all, out.list %>% dplyr::bind_rows()))
230
    }
231
  } 
232
}
233

234

235

Read our documentation on viewing source code .

Loading