jacobkap / fastDummies

@@ -66,6 +66,10 @@
Loading
66 66
         to proceed.")
67 67
  }
68 68
69 +
  if (is.vector(.data)) {
70 +
    .data <- data.frame(x = .data)
71 +
  }
72 +
69 73
  data_type <- check_type(.data)
70 74
71 75
  if (!data.table::is.data.table(.data)) {
@@ -74,9 +78,9 @@
Loading
74 78
75 79
  # Grabs column names that are character or factor class -------------------
76 80
  if (!is.null(select_columns)) {
77 -
    char_cols <- select_columns
81 +
    char_cols        <- select_columns
78 82
    cols_not_in_data <- char_cols[!char_cols %in% names(.data)]
79 -
    char_cols <- char_cols[!char_cols %in% cols_not_in_data]
83 +
    char_cols        <- char_cols[!char_cols %in% cols_not_in_data]
80 84
    if (length(char_cols) == 0) {
81 85
      stop("select_columns is/are not in data. Please check data and spelling.")
82 86
    }
@@ -131,13 +135,41 @@
Loading
131 135
      vals <- as.character(.data[[col_name]])
132 136
      vals <- data.frame(sort(table(vals), decreasing = TRUE),
133 137
                         stringsAsFactors = FALSE)
134 -
      if (vals$Freq[1] > vals$Freq[2]) {
135 -
        vals <- as.character(vals$vals[2:nrow(vals)])
136 -
        unique_vals <- unique_vals[which(unique_vals %in% vals)]
137 -
        unique_vals <- vals[order(match(vals, unique_vals))]
138 +
      # If there is a actual most frequent value, drop that value. Else,
139 +
      # if there is a tie, drop the one that's first alphabetically.
140 +
      top_vals <- vals[vals$Freq %in% max(vals$Freq), ]
141 +
      other_vals <- vals$vals[!vals$Freq %in% max(vals$Freq)]
142 +
      other_vals <- as.character(other_vals)
143 +
      top_vals <- top_vals[stringr::str_order(top_vals$vals,
144 +
                                              na_last = TRUE,
145 +
                                              locale = "en_US",
146 +
                                              numeric = TRUE), ]
147 +
      if (nrow(top_vals) == 1) {
148 +
        top_vals <- NULL
138 149
      } else {
139 -
        remove_first_dummy <- TRUE
150 +
        top_vals <- as.character(top_vals$vals[2:nrow(top_vals)])
140 151
      }
152 +
153 +
      unique_vals <- c(top_vals, other_vals)
154 +
      unique_vals <- stringr::str_sort(unique_vals,
155 +
                                       na_last = TRUE,
156 +
                                       locale = "en_US",
157 +
                                       numeric = TRUE)
158 +
      #    unique_vals <- vals[order(match(vals, unique_vals))]
159 +
      # if (vals$Freq[1] > vals$Freq[2]) {
160 +
      #   vals <- as.character(vals$vals[2:nrow(vals)])
161 +
      #   unique_vals <- unique_vals[which(unique_vals %in% vals)]
162 +
      #   unique_vals <- vals[order(match(vals, unique_vals))]
163 +
      # } else {
164 +
      #   vals <- vals[vals$Freq %in% max(vals$Freq), ]
165 +
      #   vals <- vals[stringr::str_order(vals$vals,
166 +
      #                                   na_last = TRUE,
167 +
      #                                   locale = "en_US",
168 +
      #                                   numeric = TRUE)]
169 +
      #   vals <- as.character(vals$vals[2:nrow(vals)])
170 +
      #   unique_vals <- unique_vals[which(unique_vals %in% vals)]
171 +
      #   unique_vals <- vals[order(match(vals, unique_vals))]
172 +
      # }
141 173
    }
142 174
143 175
    if (remove_first_dummy) {
@@ -145,7 +177,8 @@
Loading
145 177
    }
146 178
147 179
    data.table::alloc.col(.data, ncol(.data) + length(unique_vals))
148 -
    data.table::set(.data, j = paste0(col_name, "_", unique_vals), value = 0L)
180 +
    #   data.table::set(.data, j = paste0(col_name, "_", unique_vals), value = 0L)
181 +
    .data[, paste0(col_name, "_", unique_vals)] <- 0L
149 182
    for (unique_value in unique_vals) {
150 183
      data.table::set(.data, i =
151 184
                        which(data.table::chmatch(
Files Coverage
R 100.00%
Project Totals (3 files) 100.00%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading