slu-openGIS / qualmap

@@ -0,0 +1,46 @@
Loading
1 +
#' Verify Previously Saved Cluster Data
2 +
#'
3 +
#' @description Users may wish to save long-form combined cluster data as a \code{.csv} file
4 +
#'     or similar after combining individual clusters with \code{\link{qm_combine}}. Unless
5 +
#'     data are saved as an \code{.rda} file, the class definition of \code{"qm_cluster"}
6 +
#'     will be lost. The \code{qm_verify} function allows users to import data from any
7 +
#'     file type readable by \code{R}, verify that it has the column names needed for
8 +
#'     \code{\link{qm_summarize}}, and re-add the \code{"qm_cluster"} class.
9 +
#'
10 +
#' @usage qm_verify(clusters)
11 +
#'
12 +
#' @param clusters An object created by \code{qm_combine} with two or more clusters worth of data
13 +
#'    that has been previously saved and requires verification before summarization.
14 +
#'
15 +
#' @return A tibble stored with a custom class of \code{qm_cluster} to facilitate data validation.
16 +
#'
17 +
#' @export
18 +
qm_verify <- function(clusters){
19 +
20 +
  # verify columns
21 +
  if (ncol(clusters) < 4){
22 +
    stop("The object given for 'clusters' has fewer than the minimum 4 columns.")
23 +
  }
24 +
25 +
  if ("RID" %in% names(clusters) == FALSE){
26 +
    stop("The variable 'RID' is missing and is required for verification.")
27 +
  }
28 +
29 +
  if ("CID" %in% names(clusters) == FALSE){
30 +
    stop("The variable 'CID' is missing and is required for verification.")
31 +
  }
32 +
33 +
  if ("CAT" %in% names(clusters) == FALSE){
34 +
    stop("The variable 'CAT' is missing and is required for verification.")
35 +
  }
36 +
37 +
  # ensure clusters is a tibble
38 +
  clusters <- dplyr::as_tibble(clusters)
39 +
40 +
  # add new class
41 +
  class(clusters) <- append(class(clusters), "qm_cluster")
42 +
43 +
  # return output
44 +
  return(clusters)
45 +
46 +
}

@@ -150,7 +150,7 @@
Loading
150 150
    dplyr::mutate(RID = as.integer(rid)) %>%
151 151
    dplyr::mutate(CID = as.integer(cid)) %>%
152 152
    dplyr::mutate(CAT = category) %>%
153 -
    dplyr::select(RID, CID, CAT, !!keyVarQ, COUNT, ...) -> result
153 +
    dplyr::select(RID, CID, CAT, !!keyVarQ, ...) -> result
154 154
155 155
  # remove geometry
156 156
  sf::st_geometry(result) <- NULL

@@ -4,12 +4,15 @@
Loading
4 4
#' in the key variable. For each feature, a count corresponding to the number of times that feature is
5 5
#' identified in a cluster for the give category is also provided.
6 6
#'
7 -
#' @usage qm_summarize(ref, key, clusters, category, geometry = TRUE, use.na = FALSE)
7 +
#' @usage qm_summarize(ref, key, clusters, category, count, geometry = TRUE, use.na = FALSE)
8 8
#'
9 9
#' @param ref An \code{sf} object that serves as a master list of features
10 10
#' @param key Name of geographic id variable in the \code{ref} object to match input values to
11 11
#' @param clusters A tibble created by \code{qm_combine} with two or more clusters worth of data
12 12
#' @param category Value of the \code{CAT} variable to be analyzed
13 +
#' @param count How should clusters be summarized: by counting each time a feature is included
14 +
#'     in a cluster (\code{"clusters"}) or by counting the number of respondents
15 +
#'     (\code{"respondents"}) who associated a feature with the given category.
13 16
#' @param geometry A logical scalar that returns the full geometry and attributes of \code{ref}
14 17
#'     when \code{TRUE} (default). If \code{FALSE}, only the \code{key} and count of features is
15 18
#'     returned after validation.
@@ -45,22 +48,28 @@
Loading
45 48
#' clusters <- qm_combine(cluster_obj1, cluster_obj2)
46 49
#'
47 50
#' # summarize cluster objects
48 -
#' positive1 <- qm_summarize(ref = stl, key = TRACTCE, clusters = clusters, category = "positive")
51 +
#' positive1 <- qm_summarize(ref = stl, key = TRACTCE, clusters = clusters, category = "positive",
52 +
#'     count = "clusters")
49 53
#' class(positive1)
50 54
#' mean(positive1$positive)
51 55
#'
52 56
#' # summarize cluster objects with NA's instead of 0's
53 57
#' positive2 <- qm_summarize(ref = stl, key = TRACTCE, clusters = clusters, category = "positive",
54 -
#'     use.na = TRUE)
58 +
#'     count = "clusters", use.na = TRUE)
55 59
#' class(positive2)
56 60
#' mean(positive2$positive, na.rm = TRUE)
57 61
#'
58 62
#' # return tibble of valid features only
59 63
#' positive3 <- qm_summarize(ref = stl, key = TRACTCE, clusters = clusters, category = "positive",
60 -
#'     geometry = FALSE)
64 +
#'     count = "clusters", geometry = FALSE)
61 65
#' class(positive3)
62 66
#' mean(positive3$positive)
63 67
#'
68 +
#' # count respondents instead of clusters
69 +
#' positive4 <- qm_summarize(ref = stl, key = TRACTCE, clusters = clusters, category = "positive",
70 +
#'     count = "respondents")
71 +
#' mean(positive4$positive)
72 +
#'
64 73
#' @import sf
65 74
#' @importFrom dplyr filter
66 75
#' @importFrom dplyr group_by
@@ -73,10 +82,10 @@
Loading
73 82
#' @importFrom rlang :=
74 83
#'
75 84
#' @export
76 -
qm_summarize <- function(ref, key, clusters, category, geometry = TRUE, use.na = FALSE){
85 +
qm_summarize <- function(ref, key, clusters, category, count, geometry = TRUE, use.na = FALSE){
77 86
78 87
  # define undefined global variables as NULL
79 -
  CAT = COUNT = NULL
88 +
  RID = CAT = COUNT = NULL
80 89
81 90
  # save parameters to list
82 91
  paramList <- as.list(match.call())
@@ -116,6 +125,16 @@
Loading
116 125
    stop('A category from the cluster object must be specified.')
117 126
  }
118 127
128 +
  # check for missing parameters - count
129 +
  if (missing(count)) {
130 +
    stop("A method for producing counts, either 'clusters' or 'respondents', must be specified.")
131 +
  }
132 +
133 +
  # check for incorrect parameters - count
134 +
  if (count %in% c("clusters", "respondents") == FALSE){
135 +
    stop("Counts only accepts 'clusters' or 'respondents' as arguments.")
136 +
  }
137 +
119 138
  # check for missing parameters - geometry
120 139
  if (missing(geometry)) {
121 140
    geometry <- TRUE
@@ -146,20 +165,12 @@
Loading
146 165
  keyVarQ <- rlang::quo_name(rlang::enquo(key))
147 166
148 167
  # check to see if key exists in ref data
149 -
  refCols <- colnames(ref)
150 -
151 -
  keyVarQ %in% refCols -> keyExists
152 -
153 -
  if (keyExists == FALSE){
168 +
  if (keyVarQ %in% colnames(ref) == FALSE){
154 169
    stop(glue('The specified key {keyVarQ} cannot be found in the reference data.'))
155 170
  }
156 171
157 172
  # check to see if key exists in clusters data
158 -
  clusterCols <- colnames(clusters)
159 -
160 -
  keyVarQ %in% clusterCols -> keyExistsC
161 -
162 -
  if (keyExistsC == FALSE){
173 +
  if (keyVarQ %in% colnames(clusters) == FALSE){
163 174
    stop(glue('The specified key {keyVarQ} cannot be found in the clusters data.'))
164 175
  }
165 176
@@ -173,31 +184,42 @@
Loading
173 184
  categoryVarQ <- rlang::quo_name(rlang::enquo(category))
174 185
175 186
  # check to see if category exists in clusters data
176 -
  categoryVarQ %in% clusters$CAT -> catExists
177 -
178 -
  if (catExists == FALSE){
187 +
  if (categoryVarQ %in% clusters$CAT == FALSE){
179 188
    stop(glue('The specified category {categoryVarQ} cannot be found in the clusters data.'))
180 189
  }
181 190
182 -
  # filter, group, and summarize
183 -
  clusters %>%
184 -
    dplyr::filter(CAT == category) %>%
185 -
    dplyr::group_by(!!keyVar) %>%
186 -
    dplyr::summarize(COUNT := n()) -> result
191 +
  # subset
192 +
  clusters <- dplyr::filter(clusters, CAT == category)
193 +
194 +
  # summarize
195 +
  if (count == "clusters"){
196 +
197 +
    # will return the number of clusters that included each feature for the given category
198 +
    clusters <- dplyr::group_by(clusters, !!keyVar)
199 +
    clusters <- dplyr::summarize(clusters, !!categoryVarQ := n())
200 +
    # clusters <- dplyr::summarize(clusters, COUNT := n())
201 +
    # clusters <- dplyr::rename(clusters, !!categoryVarQ := COUNT)
187 202
188 -
  result <- dplyr::rename(result, !!categoryVarQ := COUNT)
203 +
  } else if (count == "respondents"){
204 +
205 +
    # will return the number of respondents that associated a feature with the category
206 +
    clusters <- dplyr::distinct(clusters, RID, CAT, !!keyVar)
207 +
    clusters <- dplyr::group_by(clusters, !!keyVar)
208 +
    clusters <- dplyr::summarize(clusters, !!categoryVarQ := n())
209 +
210 +
  }
189 211
190 212
  # add geometry
191 213
  if (geometry == TRUE) {
192 -
    result <- dplyr::left_join(ref, result, by = keyVarQ)
214 +
    clusters <- dplyr::left_join(ref, clusters, by = keyVarQ)
193 215
  }
194 216
195 217
  # replace zeros with missing
196 218
  if (use.na == FALSE) {
197 -
    result <- dplyr::mutate(result, !!categoryVarQ := ifelse(is.na(!!categoryVar) == TRUE, 0, !!categoryVar))
219 +
    clusters <- dplyr::mutate(clusters, !!categoryVarQ := ifelse(is.na(!!categoryVar) == TRUE, 0, !!categoryVar))
198 220
  }
199 221
200 222
  # return result
201 -
  return(result)
223 +
  return(clusters)
202 224
203 225
}
Files Coverage
R 92.82%
Project Totals (8 files) 92.82%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading