Showing 7 of 54 files from the diff.
Other files ignored by Codecov
NAMESPACE has changed.
README.Rmd has changed.
DESCRIPTION has changed.

@@ -40,28 +40,24 @@
Loading
40 40
  customer_id <- NULL
41 41
  segment <- NULL
42 42
43 -
  rfm_score_table <-
44 -
    data %>%
45 -
    use_series(rfm) %>%
46 -
    dplyr::mutate(segment = 1)
43 +
  rfm_score_table <- data$rfm
44 +
  rfm_score_table$segment <- 1
47 45
48 46
  n_segments <- length(segment_names)
49 47
50 48
  for (i in seq_len(n_segments)) {
51 49
    rfm_score_table$segment[(
52 -
      (rfm_score_table$recency_score %>% dplyr::between(recency_lower[i], recency_upper[i])) &
53 -
        (rfm_score_table$frequency_score %>% dplyr::between(frequency_lower[i], frequency_upper[i])) &
54 -
        (rfm_score_table$monetary_score %>% dplyr::between(monetary_lower[i], monetary_upper[i])) &
50 +
      (rfm_score_table$recency_score %>% between(recency_lower[i], recency_upper[i])) &
51 +
        (rfm_score_table$frequency_score %>% between(frequency_lower[i], frequency_upper[i])) &
52 +
        (rfm_score_table$monetary_score %>% between(monetary_lower[i], monetary_upper[i])) &
55 53
        !rfm_score_table$segment %in% segment_names)] <- segment_names[i]
56 54
  }
57 55
58 56
  rfm_score_table$segment[is.na(rfm_score_table$segment)] <- "Others"
59 57
  rfm_score_table$segment[rfm_score_table$segment == 1]   <- "Others"
60 58
61 -
  rfm_score_table %>%
62 -
    dplyr::select(customer_id, segment, rfm_score, transaction_count, recency_days,
63 -
           amount, recency_score, frequency_score,
64 -
           monetary_score)
59 +
  rfm_score_table[c("customer_id", "segment", "rfm_score", "transaction_count", "recency_days",
60 +
                    "amount", "recency_score", "frequency_score", "monetary_score")]
65 61
66 62
67 63
}
@@ -97,14 +93,19 @@
Loading
97 93
#' @export
98 94
#'
99 95
rfm_segment_summary <- function(segments) {
100 -
  segments %>%
101 -
    dplyr::group_by(segment) %>%
102 -
    dplyr::summarise(
103 -
      customers = dplyr::n(),
104 -
      orders = sum(transaction_count),
105 -
      revenue = sum(amount),
106 -
      aov = revenue / orders
107 -
    )
96 +
97 +
  result <- 
98 +
    segments %>% 
99 +
    data.table() %>% 
100 +
    .[, .(customers = .N,
101 +
          orders = sum(transaction_count),
102 +
          revenue = sum(amount)),
103 +
      by = segment] %>% 
104 +
    setDF()
105 +
106 +
  result$aov <- result$revenue / result$orders
107 +
  return(result)
108 +
108 109
}
109 110
110 111
#' Visulaize segment summary
@@ -155,23 +156,23 @@
Loading
155 156
    for (i in 2:n_plots) {
156 157
      j <- i - 1
157 158
      var <- vars[i]
158 -
      data <- dplyr::select(x, segment, !!sym(var))
159 +
      data <- x[c("segment", var)]
159 160
      if (sort) {
160 161
        if (ascending) {
161 162
          if (flip) {
162 -
            p <- ggplot(data, aes(x = stats::reorder(segment, -!!sym(var), sum), y = !!sym(var)))
163 +
            p <- ggplot(data, aes_string(x = paste0("reorder(segment, -", var, ", sum)"), y = var))
163 164
          } else {
164 -
            p <- ggplot(data, aes(x = stats::reorder(segment, !!sym(var), sum), y = !!sym(var)))
165 +
            p <- ggplot(data, aes_string(x = paste0("reorder(segment, ", var, ", sum)"), y = var))
165 166
          }
166 167
        } else {
167 168
          if (flip) {
168 -
            p <- ggplot(data, aes(x = stats::reorder(segment, !!sym(var), sum), y = !!sym(var)))
169 +
            p <- ggplot(data, aes_string(x = paste0("reorder(segment, ", var, ", sum)"), y = var))
169 170
          } else {
170 -
            p <- ggplot(data, aes(x = stats::reorder(segment, -!!sym(var), sum), y = !!sym(var)))
171 +
            p <- ggplot(data, aes_string(x = paste0("reorder(segment, -", var, ", sum)"), y = var))
171 172
          }
172 173
        }
173 174
      } else {
174 -
        p <- ggplot(data, aes(x = segment, y = !!sym(var)))
175 +
        p <- ggplot(data, aes_string(x = "segment", y = var))
175 176
      }
176 177
177 178
      p <-
@@ -217,6 +218,7 @@
Loading
217 218
#' @param flip logical; if \code{TRUE}, creates horizontal bar plot.
218 219
#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object.
219 220
#'
221 +
#' @examples
220 222
#' analysis_date <- as.Date('2006-12-31')
221 223
#' rfm_result <- rfm_table_order(rfm_data_orders, customer_id, order_date,
222 224
#' revenue, analysis_date)
@@ -289,15 +291,23 @@
Loading
289 291
290 292
rfm_prep_revenue_dist <- function(x) {
291 293
292 -
  data <-
293 -
    x %>%
294 -
    dplyr::mutate(customer_share = customers / sum(customers),
295 -
                  revenue_share = revenue / sum(revenue)) %>%
296 -
    dplyr::select(segment, customer_share, revenue_share) %>%
297 -
    tidyr::pivot_longer(!segment, names_to = "category", values_to = "share")
294 +
  x$customer_share <- x$customers / sum(x$customers)
295 +
  x$revenue_share <- x$revenue / sum(x$revenue)
296 +
  data <- x[c("segment", "customer_share", "revenue_share")]
298 297
299 -
  data$category <- factor(data$category, levels = c("revenue_share", "customer_share"))
300 -
  return(data)
298 +
  n_row    <- nrow(data)
299 +
  segment  <- rep(data$segment, each = 2)
300 +
  category <- rep(c("customer_share", "revenue_share"), times = n_row)
301 +
302 +
  share <- c()
303 +
  for (i in seq_len(n_row)) {
304 +
    y <- as.numeric(data[i, c(2, 3)])
305 +
    share <- c(share, y)
306 +
  }
307 +
308 +
  result <- data.frame(segment, category, share)
309 +
  result$category <- factor(result$category, levels = c("revenue_share", "customer_share"))
310 +
  return(result)
301 311
}
302 312
303 313
#' Segmentation plots
@@ -389,16 +399,17 @@
Loading
389 399
390 400
rfm_prep_median <- function(rfm_segment_table, metric) {
391 401
392 -
  met <- rlang::enquo(metric)
402 +
  met <- deparse(substitute(metric))
393 403
394 -
  result <-
395 -
    rfm_segment_table %>%
396 -
    dplyr::group_by(segment) %>%
397 -
    dplyr::select(segment, !!met) %>%
398 -
    dplyr::summarise(mem = stats::median(!!met)) %>%
399 -
    dplyr::arrange(mem)
404 +
  result <- 
405 +
    rfm_segment_table %>% 
406 +
    data.table() %>% 
407 +
    .[, .(segment, met = get(met))] %>% 
408 +
    .[, .(mem = median(met)), by = segment] %>% 
409 +
    .[order(mem)] %>% 
410 +
    setnames(old = "mem", new = met) %>%
411 +
    setDF()
400 412
401 -
  colnames(result) <- c("segment", as_label(met))
402 413
  return(result)
403 414
404 415
}
@@ -416,15 +427,15 @@
Loading
416 427
  if (sort) {
417 428
    if (ascending) {
418 429
      if (flip) {
419 -
        p <- ggplot(data, aes(x = stats::reorder(!!as.symbol(cnames[1]), -!!as.symbol(cnames[2]), sum), y = !!as.symbol(cnames[2])))
430 +
        p <- ggplot(data, aes_string(x = paste0("reorder(", cnames[1], ", -", cnames[2], ", sum)"), y = cnames[2]))
420 431
      } else {
421 -
        p <- ggplot(data, aes(x = stats::reorder(!!as.symbol(cnames[1]), !!as.symbol(cnames[2]), sum), y = !!as.symbol(cnames[2])))
432 +
        p <- ggplot(data, aes_string(x = paste0("reorder(", cnames[1], ", ", cnames[2], ", sum)"), y = cnames[2]))
422 433
      }
423 434
    } else {
424 435
      if (flip) {
425 -
        p <- ggplot(data, aes(x = stats::reorder(!!as.symbol(cnames[1]), !!as.symbol(cnames[2]), sum), y = !!as.symbol(cnames[2])))
436 +
        p <- ggplot(data, aes_string(x = paste0("reorder(", cnames[1], ", ", cnames[2], ", sum)"), y = cnames[2]))
426 437
      } else {
427 -
        p <- ggplot(data, aes(x = stats::reorder(!!as.symbol(cnames[1]), -!!as.symbol(cnames[2]), sum), y = !!as.symbol(cnames[2])))
438 +
        p <- ggplot(data, aes_string(x = paste0("reorder(", cnames[1], ", -", cnames[2], ", sum)"), y = cnames[2]))
428 439
      }
429 440
    }
430 441
  } else {

@@ -25,19 +25,21 @@
Loading
25 25
#'
26 26
rfm_heatmap_data <- function(rfm_table) {
27 27
28 -
  result <-
29 -
    rfm_table %>%
30 -
    magrittr::use_series(rfm) %>%
31 -
    dplyr::group_by(frequency_score, recency_score) %>%
32 -
    dplyr::select(frequency_score, recency_score, amount) %>%
33 -
    dplyr::summarise(monetary = mean(amount))
28 +
  result <- 
29 +
    rfm_table %>% 
30 +
    use_series(rfm) %>% 
31 +
    data.table() %>% 
32 +
    .[, .(frequency_score, recency_score, amount)] %>% 
33 +
    .[, .(monetary = mean(amount)), 
34 +
      keyby = .(frequency_score, recency_score)] %>% 
35 +
    setDF()
34 36
35 37
  l_frequency      <- check_levels(result, frequency_score)
36 38
  l_recency        <- check_levels(result, recency_score)
37 39
  levels_frequency <- check_levels(result, frequency_score) %>% length()
38 40
  levels_recency   <- check_levels(result, recency_score) %>% length()
39 -
  f_frequency      <- magrittr::use_series(rfm_table, frequency_bins)
40 -
  r_recency        <- magrittr::use_series(rfm_table, recency_bins)
41 +
  f_frequency      <- use_series(rfm_table, frequency_bins)
42 +
  r_recency        <- use_series(rfm_table, recency_bins)
41 43
42 44
  if (levels_frequency != f_frequency) {
43 45
    result %<>%
@@ -80,10 +82,11 @@
Loading
80 82
#'
81 83
rfm_hist_data <- function(rfm_table) {
82 84
83 -
  rfm_table %>%
84 -
    magrittr::use_series(rfm) %>%
85 -
    dplyr::select(recency_days, transaction_count, amount) %>%
86 -
    tidyr::gather(rfm, score)
85 +
  cnames <- c("recency_days", "transaction_count", "amount")
86 +
  data   <- rfm_table$rfm[, cnames]
87 +
  rfm    <- rep(cnames, each = nrow(data))
88 +
  score  <- c(data$recency_days, data$transaction_count, data$amount)
89 +
  data.frame(rfm, score)
87 90
88 91
}
89 92
@@ -116,11 +119,11 @@
Loading
116 119
117 120
  rlevels <-
118 121
    rfm_table %>%
119 -
    magrittr::use_series(recency_bins) %>%
122 +
    use_series(recency_bins) %>%
120 123
    seq_len(.) %>%
121 124
    rev()
122 125
123 -
  data <- magrittr::use_series(rfm_table, rfm)
126 +
  data <- use_series(rfm_table, rfm)
124 127
  data$recency_score <- factor(data$recency_score, levels = rlevels)
125 128
126 129
  return(data)

@@ -19,7 +19,7 @@
Loading
19 19
#' \item{frequency_bins}{Number of bins used for frequency score.}
20 20
#' \item{recency_bins}{Number of bins used for recency score.}
21 21
#' \item{monetary_bins}{Number of bins used for monetary score.}
22 -
#' \item{threshold}{tibble with thresholds used for generating RFM scores.}
22 +
#' \item{threshold}{thresholds used for generating RFM scores.}
23 23
#'
24 24
#' @examples
25 25
#' analysis_date <- as.Date('2006-12-31')
@@ -46,26 +46,41 @@
Loading
46 46
                              revenue = NULL, analysis_date = NULL, recency_bins = 5,
47 47
                              frequency_bins = 5, monetary_bins = 5, ...) {
48 48
49 -
  cust_id  <- rlang::enquo(customer_id)
50 -
  odate    <- rlang::enquo(order_date)
51 -
  revenues <- rlang::enquo(revenue)
52 -
53 -
  result <-
54 -
    data %>%
55 -
    dplyr::select(!! cust_id, !! odate, !! revenues) %>%
56 -
    dplyr::group_by(!! cust_id) %>%
57 -
    dplyr::summarise(
58 -
      date_most_recent = max(!! odate), amount = sum(!! revenues),
59 -
      transaction_count = dplyr::n()
60 -
    ) %>%
61 -
    dplyr::mutate(
62 -
      recency_days = (analysis_date - date_most_recent) / lubridate::ddays()
63 -
    ) %>%
64 -
    dplyr::select(
65 -
      !! cust_id, date_most_recent, recency_days, transaction_count,
66 -
      amount
67 -
    ) %>%
68 -
    set_names(c("customer_id", "date_most_recent", "recency_days", "transaction_count", "amount"))
49 +
  cust_id <- deparse(substitute(customer_id))
50 +
  odate   <- deparse(substitute(order_date))
51 +
  reven   <- deparse(substitute(revenue))                                  
52 +
53 +
  result <- rfm_prep_table_data(data, cust_id, odate, reven, analysis_date)
54 +
  out    <- rfm_prep_bins(result, recency_bins, frequency_bins, monetary_bins, analysis_date)
55 +
56 +
  class(out) <- c("rfm_table_order", "tibble", "data.frame")
57 +
  return(out)
58 +
59 +
}
60 +
61 +
#' @export
62 +
#'
63 +
print.rfm_table_order <- function(x, ...) {
64 +
  print(x$rfm)
65 +
}
66 +
67 +
rfm_prep_table_data <- function(data, customer_id, order_date, revenue, analysis_date) {
68 +
69 +
  result <- 
70 +
    data %>% 
71 +
    data.table() %>% 
72 +
    .[, .(date_most_recent = max(order_date),
73 +
          amount = sum(revenue),
74 +
          transaction_count = .N), 
75 +
      by = customer_id] %>% 
76 +
    .[, ':='(recency_days = as.numeric(analysis_date - date_most_recent, units = "days"))] %>% 
77 +
    .[, .(customer_id, recency_days, transaction_count, amount)] %>% 
78 +
    setDF()
79 +
80 +
  return(result)
81 +
}
82 +
83 +
rfm_prep_bins <- function(result, recency_bins, frequency_bins, monetary_bins, analysis_date) {
69 84
70 85
  result$recency_score   <- NA
71 86
  result$frequency_score <- NA
@@ -135,46 +150,18 @@
Loading
135 150
      result$amount < upper_monetary[i]] <- i
136 151
  }
137 152
138 -
  result %<>%
139 -
    dplyr::mutate(
140 -
      rfm_score = recency_score * 100 + frequency_score * 10 + monetary_score
141 -
    ) %>%
142 -
    dplyr::select(
143 -
      customer_id, date_most_recent, recency_days, transaction_count, amount,
144 -
      recency_score, frequency_score, monetary_score, rfm_score
145 -
    )
153 +
  result$rfm_score <- result$recency_score * 100 + result$frequency_score * 10 + result$monetary_score
146 154
147 155
  result$transaction_count <- as.numeric(result$transaction_count)
148 156
149 -
  threshold <- tibble::tibble(recency_lower   = lower_recency,
150 -
                              recency_upper   = upper_recency,
151 -
                              frequency_lower = lower_frequency,
152 -
                              frequency_upper = upper_frequency,
153 -
                              monetary_lower  = lower_monetary,
154 -
                              monetary_upper  = upper_monetary)
155 -
156 -
  out <- list(
157 -
    rfm            = result,
158 -
    analysis_date  = analysis_date,
159 -
    frequency_bins = frequency_bins,
160 -
    recency_bins   = recency_bins,
161 -
    monetary_bins  = monetary_bins,
162 -
    threshold      = threshold
163 -
  )
157 +
  threshold <- data.frame(recency_lower   = lower_recency,
158 +
                          recency_upper   = upper_recency,
159 +
                          frequency_lower = lower_frequency,
160 +
                          frequency_upper = upper_frequency,
161 +
                          monetary_lower  = lower_monetary,
162 +
                          monetary_upper  = upper_monetary)
164 163
165 -
  class(out) <- c("rfm_table_order", "tibble", "data.frame")
166 -
  return(out)
164 +
  list(rfm = result, analysis_date = analysis_date, frequency_bins = frequency_bins,
165 +
       recency_bins = recency_bins, monetary_bins = monetary_bins, threshold = threshold)
167 166
168 167
}
169 -
170 -
171 -
172 -
#' @export
173 -
#'
174 -
print.rfm_table_order <- function(x, ...) {
175 -
  print(x$rfm)
176 -
}
177 -
178 -
179 -
180 -

@@ -64,19 +64,17 @@
Loading
64 64
    use_series(frequency_score) %>%
65 65
    max()
66 66
67 -
  guide_breaks <-
68 -
    seq(llm, ulm, length.out = bins) %>%
69 -
    round()
70 -
67 +
  guide_breaks <- round(seq(llm, ulm, length.out = bins))
68 +
    
71 69
  p <-
72 70
    ggplot(data = mapdata) +
73 71
    geom_tile(aes(x = frequency_score, y = recency_score, fill = monetary)) +
74 -
    scale_fill_gradientn(limits = c(llm, ulm),
75 -
                         colours = RColorBrewer::brewer.pal(n = brewer_n, name = brewer_name),
76 -
                         name = legend_title) +
77 72
    ggtitle(plot_title) +
78 73
    xlab(xaxis_title) +
79 74
    ylab(yaxis_title) +
75 +
    scale_fill_gradientn(limits = c(llm, ulm),
76 +
                         colours = RColorBrewer::brewer.pal(n = brewer_n, name = brewer_name),
77 +
                         name = legend_title) +
80 78
    theme(plot.title = element_text(hjust = plot_title_justify))
81 79
82 80
  if (print_plot) {
@@ -136,7 +134,9 @@
Loading
136 134
    rfm_hist_data(rfm_table) %>%
137 135
    ggplot(aes(score)) +
138 136
    geom_histogram(bins = hist_bins, fill = hist_color) +
139 -
    ylab(yaxis_title) + ggtitle(plot_title) + xlab(xaxis_title) +
137 +
    xlab(xaxis_title) +
138 +
    ylab(yaxis_title) + 
139 +
    ggtitle(plot_title) + 
140 140
    facet_grid(. ~ rfm, scales = "free_x",
141 141
      labeller = labeller(
142 142
        rfm = c(amount = hist_m_label, recency_days = hist_r_label,
@@ -189,16 +189,14 @@
Loading
189 189
    rfm_barchart_data(rfm_table) %>%
190 190
    ggplot() +
191 191
    geom_bar(aes(x = monetary_score), fill = bar_color) +
192 -
    facet_grid(recency_score ~ frequency_score) +
192 +
    xlab(xaxis_title) + 
193 +
    ylab(" ") + 
194 +
    ggtitle(sec_xaxis_title) +
193 195
    scale_y_continuous(sec.axis = sec_axis(~ ., name = sec_yaxis_title)) +
194 -
    xlab(xaxis_title) + ylab(" ") + ggtitle(sec_xaxis_title) +
195 -
    theme(
196 -
      plot.title = element_text(
197 -
        face = "plain", size = 11, hjust = 0.5
198 -
      ),
199 -
      axis.text.y = element_blank(),
200 -
      axis.ticks.y = element_blank()
201 -
    )
196 +
    facet_grid(recency_score ~ frequency_score) +
197 +
    theme(plot.title = element_text(face = "plain", size = 11, hjust = 0.5),
198 +
          axis.text.y = element_blank(),
199 +
          axis.ticks.y = element_blank())
202 200
203 201
  if (print_plot) {
204 202
    print(p)
@@ -248,14 +246,16 @@
Loading
248 246
                           plot_title = 'Customers by Orders',
249 247
                           plot_title_justify = 0.5, print_plot = TRUE) {
250 248
251 -
  data <-
252 -
    rfm_table %>%
253 -
    use_series(rfm) %>%
254 -
    dplyr::count(transaction_count)
249 +
  data <- 
250 +
    rfm_table %>% 
251 +
    use_series(rfm) %>% 
252 +
    data.table() %>% 
253 +
    .[, .(n = .N), by = transaction_count] %>% 
254 +
    setDF()
255 255
256 256
  ylim_max <-
257 257
    data %>%
258 -
    dplyr::pull(n) %>%
258 +
    use_series(n) %>% 
259 259
    max() %>%
260 260
    multiply_by(1.1) %>%
261 261
    ceiling(.)
@@ -264,11 +264,11 @@
Loading
264 264
    data %>%
265 265
    ggplot(aes(x = transaction_count, y = n)) +
266 266
    geom_bar(stat = "identity", fill = bar_color) +
267 -
    xlab(xaxis_title) + ylab(yaxis_title) + ylim(0, ylim_max) +
267 +
    xlab(xaxis_title) + 
268 +
    ylab(yaxis_title) + 
269 +
    ylim(0, ylim_max) +
268 270
    ggtitle(plot_title) +
269 -
    geom_text(
270 -
      aes(label = n, y = n + 3), position = position_dodge(0.9), vjust = 0
271 -
    ) +
271 +
    geom_text(aes(label = n, y = n + 3), position = position_dodge(0.9), vjust = 0) +
272 272
    theme(plot.title = element_text(hjust = 0.5))
273 273
274 274
  if (print_plot) {

@@ -20,7 +20,7 @@
Loading
20 20
#' \item{frequency_bins}{Number of bins used for frequency score.}
21 21
#' \item{recency_bins}{Number of bins used for recency score.}
22 22
#' \item{monetary_bins}{Number of bins used for monetary score.}
23 -
#' \item{threshold}{tibble with thresholds used for generating RFM scores.}
23 +
#' \item{threshold}{thresholds used for generating RFM scores.}
24 24
#'
25 25
#' @examples
26 26
#' analysis_date <- as.Date('2007-01-01')
@@ -49,108 +49,20 @@
Loading
49 49
                                       latest_visit_date = NULL, total_revenue = NULL, analysis_date = NULL,
50 50
                                       recency_bins = 5, frequency_bins = 5, monetary_bins = 5, ...) {
51 51
52 -
  cust_id      <- rlang::enquo(customer_id)
53 -
  order_count  <- rlang::enquo(n_transactions)
54 -
  recent_visit <- rlang::enquo(latest_visit_date)
55 -
  revenues     <- rlang::enquo(total_revenue)
56 -
57 -
  result <-
58 -
    data %>%
59 -
    dplyr::mutate(
60 -
      recency_days = (analysis_date - !! recent_visit) / lubridate::ddays()
61 -
    ) %>%
62 -
    dplyr::select(!! cust_id, recency_days, !! order_count, !! revenues) %>%
63 -
    set_names(c("customer_id", "recency_days", "transaction_count", "amount"))
64 -
65 -
  result$recency_score   <- NA
66 -
  result$frequency_score <- NA
67 -
  result$monetary_score  <- NA
68 -
69 -
  if (length(recency_bins) == 1) {
70 -
    rscore <- rev(seq_len(recency_bins))
71 -
  } else {
72 -
    rscore <- rev(seq_len((length(recency_bins) + 1)))
73 -
  }
74 -
75 -
  if (length(recency_bins) == 1) {
76 -
    bins_recency <- bins(result, recency_days, recency_bins)
77 -
  } else {
78 -
    bins_recency <- recency_bins
79 -
  }
80 -
  lower_recency <- bins_lower(result, recency_days, bins_recency)
81 -
  upper_recency <- bins_upper(result, recency_days, bins_recency)
82 -
83 -
  rscore_len <- length(rscore)
84 -
85 -
  for (i in seq_len(rscore_len)) {
86 -
    result$recency_score[result$recency_days >= lower_recency[i] &
87 -
                           result$recency_days < upper_recency[i]] <- rscore[i]
88 -
  }
89 -
90 -
  if (length(frequency_bins) == 1) {
91 -
    fscore <- rev(seq_len(frequency_bins))
92 -
  } else {
93 -
    fscore <- rev(seq_len((length(frequency_bins) + 1)))
94 -
  }
95 -
96 -
  if (length(frequency_bins) == 1) {
97 -
    bins_frequency <- bins(result, transaction_count, frequency_bins)
98 -
  } else {
99 -
    bins_frequency <- frequency_bins
100 -
  }
101 -
  lower_frequency <- bins_lower(result, transaction_count, bins_frequency)
102 -
  upper_frequency <- bins_upper(result, transaction_count, bins_frequency)
103 -
104 -
  fscore_len <- length(fscore)
105 -
106 -
  for (i in seq_len(fscore_len)) {
107 -
    result$frequency_score[result$transaction_count >= lower_frequency[i] &
108 -
                             result$transaction_count < upper_frequency[i]] <- i
109 -
  }
110 -
111 -
  if (length(monetary_bins) == 1) {
112 -
    mscore <- rev(seq_len(monetary_bins))
113 -
  } else {
114 -
    mscore <- rev(seq_len((length(monetary_bins) + 1)))
115 -
  }
116 -
117 -
  if (length(monetary_bins) == 1) {
118 -
    bins_monetary <- bins(result, amount, monetary_bins)
119 -
  } else {
120 -
    bins_monetary <- monetary_bins
121 -
  }
122 -
  lower_monetary <- bins_lower(result, amount, bins_monetary)
123 -
  upper_monetary <- bins_upper(result, amount, bins_monetary)
124 -
125 -
  mscore_len <- length(mscore)
126 -
127 -
  for (i in seq_len(mscore_len)) {
128 -
    result$monetary_score[result$amount >= lower_monetary[i] &
129 -
                            result$amount < upper_monetary[i]] <- i
130 -
  }
131 -
132 -
  result %<>%
133 -
    dplyr::mutate(
134 -
      rfm_score = recency_score * 100 + frequency_score * 10 + monetary_score
135 -
    )
136 -
137 -
  result$transaction_count <- as.numeric(result$transaction_count)
138 -
139 -
  threshold <- tibble::tibble(recency_lower   = lower_recency,
140 -
                              recency_upper   = upper_recency,
141 -
                              frequency_lower = lower_frequency,
142 -
                              frequency_upper = upper_frequency,
143 -
                              monetary_lower  = lower_monetary,
144 -
                              monetary_upper  = upper_monetary)
145 -
146 -
  out <- list(
147 -
    rfm            = result,
148 -
    analysis_date  = analysis_date,
149 -
    frequency_bins = frequency_bins,
150 -
    recency_bins   = recency_bins,
151 -
    monetary_bins  = monetary_bins,
152 -
    threshold      = threshold
153 -
  )
52 +
  cust_id      <- deparse(substitute(customer_id))
53 +
  order_count  <- deparse(substitute(n_transactions))
54 +
  recent_visit <- deparse(substitute(latest_visit_date))
55 +
  reven        <- deparse(substitute(total_revenue))
56 +
57 +
  result <- 
58 +
    data %>% 
59 +
    data.table() %>% 
60 +
    .[, ':='(recency_days = as.numeric(analysis_date - get(recent_visit), units = "days"))] %>% 
61 +
    .[, .(get(cust_id), recency_days, get(order_count), get(reven))] %>% 
62 +
    setDF()
63 +
  
64 +
  colnames(result) <- c("customer_id", "recency_days", "transaction_count", "amount")
65 +
  out <- rfm_prep_bins(result, recency_bins, frequency_bins, monetary_bins, analysis_date)
154 66
155 67
  class(out) <- c("rfm_table_customer_2", "tibble", "data.frame")
156 68
  return(out)

@@ -1,14 +1,14 @@
Loading
1 -
#' @importFrom magrittr %>% %<>% use_series set_names extract extract2 add multiply_by
1 +
#' @import magrittr 
2 2
#' @import ggplot2
3 -
#' @importFrom stats median runif quantile
3 +
#' @import data.table
4 +
#' @importFrom stats median runif quantile reorder
4 5
#' @importFrom utils available.packages menu update.packages packageVersion install.packages
5 6
bins <- function(data, value, n_bins) {
6 7
7 -
  my_value   <- enquo(value)
8 +
  my_value   <- deparse(substitute(value))
8 9
  length_out <- n_bins + 1
9 10
10 -
  data %>%
11 -
    dplyr::pull(!! my_value) %>%
11 +
  data[[my_value]] %>%
12 12
    quantile(probs = seq(0, 1, length.out = length_out)) %>%
13 13
    unname() %>%
14 14
    extract(c(-1, -length_out)) %>%
@@ -18,10 +18,9 @@
Loading
18 18
19 19
bins_lower <- function(data, value, bins) {
20 20
21 -
  my_value <- enquo(value)
21 +
  my_value   <- deparse(substitute(value))
22 22
23 -
  data %>%
24 -
    dplyr::pull(!! my_value) %>%
23 +
  data[[my_value]] %>%
25 24
    min() %>%
26 25
    append(bins)
27 26
@@ -29,11 +28,10 @@
Loading
29 28
30 29
bins_upper <- function(data, value, bins) {
31 30
32 -
  my_value <- enquo(value)
31 +
  my_value <- deparse(substitute(value))
33 32
34 33
  data_max <-
35 -
    data %>%
36 -
    dplyr::pull(!! my_value) %>%
34 +
    data[[my_value]] %>%
37 35
    max() %>%
38 36
    add(1)
39 37
@@ -43,10 +41,9 @@
Loading
43 41
44 42
check_levels <- function(rfm_heatmap_data, column) {
45 43
46 -
  my_column <- enquo(column)
44 +
  my_column <- deparse(substitute(column))
47 45
48 -
  rfm_heatmap_data %>%
49 -
    dplyr::pull(!! my_column) %>%
46 +
  rfm_heatmap_data[[my_column]] %>%
50 47
    as.factor() %>%
51 48
    levels() %>%
52 49
    as.vector() %>%
@@ -60,8 +57,7 @@
Loading
60 57
  missing2          <- seq_len(n_bins)[missing]
61 58
  extra_data        <- expand.grid(missing2, seq_len(n_bins), 0)
62 59
  names(extra_data) <- names(rfm_heatmap_data)
63 -
64 -
  dplyr::bind_rows(rfm_heatmap_data, extra_data)
60 +
  rbind(rfm_heatmap_data, extra_data)
65 61
66 62
}
67 63

@@ -20,7 +20,7 @@
Loading
20 20
#' \item{frequency_bins}{Number of bins used for frequency score.}
21 21
#' \item{recency_bins}{Number of bins used for recency score.}
22 22
#' \item{monetary_bins}{Number of bins used for monetary score.}
23 -
#' \item{threshold}{tibble with thresholds used for generating RFM scores.}
23 +
#' \item{threshold}{thresholds used for generating RFM scores.}
24 24
#'
25 25
#' @examples
26 26
#' analysis_date <- as.Date('2007-01-01')
@@ -49,105 +49,19 @@
Loading
49 49
                                       recency_days = NULL, total_revenue = NULL, analysis_date = NULL, recency_bins = 5,
50 50
                                       frequency_bins = 5, monetary_bins = 5, ...) {
51 51
52 -
  cust_id     <- rlang::enquo(customer_id)
53 -
  order_count <- rlang::enquo(n_transactions)
54 -
  n_recency   <- rlang::enquo(recency_days)
55 -
  revenues    <- rlang::enquo(total_revenue)
56 -
57 -
  result <-
58 -
    data %>%
59 -
    dplyr::select(!! cust_id, !! n_recency, !! order_count, !! revenues) %>%
60 -
    set_names(c("customer_id", "recency_days", "transaction_count", "amount"))
61 -
62 -
  result$recency_score   <- NA
63 -
  result$frequency_score <- NA
64 -
  result$monetary_score  <- NA
65 -
66 -
  if (length(recency_bins) == 1) {
67 -
    rscore <- rev(seq_len(recency_bins))
68 -
  } else {
69 -
    rscore <- rev(seq_len((length(recency_bins) + 1)))
70 -
  }
71 -
72 -
  if (length(recency_bins) == 1) {
73 -
    bins_recency <- bins(result, recency_days, recency_bins)
74 -
  } else {
75 -
    bins_recency <- recency_bins
76 -
  }
77 -
  lower_recency <- bins_lower(result, recency_days, bins_recency)
78 -
  upper_recency <- bins_upper(result, recency_days, bins_recency)
79 -
80 -
  rscore_len <- length(rscore)
81 -
82 -
  for (i in seq_len(rscore_len)) {
83 -
    result$recency_score[result$recency_days >= lower_recency[i] &
84 -
                           result$recency_days < upper_recency[i]] <- rscore[i]
85 -
  }
86 -
87 -
  if (length(frequency_bins) == 1) {
88 -
    fscore <- rev(seq_len(frequency_bins))
89 -
  } else {
90 -
    fscore <- rev(seq_len((length(frequency_bins) + 1)))
91 -
  }
92 -
93 -
  if (length(frequency_bins) == 1) {
94 -
    bins_frequency <- bins(result, transaction_count, frequency_bins)
95 -
  } else {
96 -
    bins_frequency <- frequency_bins
97 -
  }
98 -
  lower_frequency <- bins_lower(result, transaction_count, bins_frequency)
99 -
  upper_frequency <- bins_upper(result, transaction_count, bins_frequency)
100 -
101 -
  fscore_len <- length(fscore)
102 -
103 -
  for (i in seq_len(fscore_len)) {
104 -
    result$frequency_score[result$transaction_count >= lower_frequency[i] &
105 -
                             result$transaction_count < upper_frequency[i]] <- i
106 -
  }
107 -
108 -
  if (length(monetary_bins) == 1) {
109 -
    mscore <- rev(seq_len(monetary_bins))
110 -
  } else {
111 -
    mscore <- rev(seq_len((length(monetary_bins) + 1)))
112 -
  }
113 -
114 -
  if (length(monetary_bins) == 1) {
115 -
    bins_monetary <- bins(result, amount, monetary_bins)
116 -
  } else {
117 -
    bins_monetary <- monetary_bins
118 -
  }
119 -
  lower_monetary <- bins_lower(result, amount, bins_monetary)
120 -
  upper_monetary <- bins_upper(result, amount, bins_monetary)
121 -
122 -
  mscore_len <- length(mscore)
123 -
124 -
  for (i in seq_len(mscore_len)) {
125 -
    result$monetary_score[result$amount >= lower_monetary[i] &
126 -
                            result$amount < upper_monetary[i]] <- i
127 -
  }
128 -
129 -
  result %<>%
130 -
    dplyr::mutate(
131 -
      rfm_score = recency_score * 100 + frequency_score * 10 + monetary_score
132 -
    )
133 -
134 -
  result$transaction_count <- as.numeric(result$transaction_count)
135 -
136 -
  threshold <- tibble::tibble(recency_lower   = lower_recency,
137 -
                              recency_upper   = upper_recency,
138 -
                              frequency_lower = lower_frequency,
139 -
                              frequency_upper = upper_frequency,
140 -
                              monetary_lower  = lower_monetary,
141 -
                              monetary_upper  = upper_monetary)
142 -
143 -
  out <- list(
144 -
    rfm            = result,
145 -
    analysis_date  = analysis_date,
146 -
    frequency_bins = frequency_bins,
147 -
    recency_bins   = recency_bins,
148 -
    monetary_bins  = monetary_bins,
149 -
    threshold      = threshold
150 -
  )
52 +
  cust_id     <- deparse(substitute(customer_id))
53 +
  order_count <- deparse(substitute(n_transactions))
54 +
  n_recency   <- deparse(substitute(recency_days))
55 +
  revenues    <- deparse(substitute(total_revenue))
56 +
57 +
  result <- 
58 +
    data %>% 
59 +
    data.table() %>% 
60 +
    .[, .(get(cust_id), get(n_recency), get(order_count), get(revenues))] %>% 
61 +
    setDF()
62 +
    
63 +
  colnames(result) <- c("customer_id", "recency_days", "transaction_count", "amount")
64 +
  out <- rfm_prep_bins(result, recency_bins, frequency_bins, monetary_bins, analysis_date)
151 65
152 66
  class(out) <- c("rfm_table_customer", "tibble", "data.frame")
153 67
  return(out)
Files Coverage
R 87.40%
Project Totals (9 files) 87.40%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
        informational: true
10
    patch:
11
      default:
12
        target: auto
13
        threshold: 1%
14
        informational: true
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading