No flags found
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
ee925e4
... +0 ...
13b3b3c
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
1 | + | #' Cluster pairs of items into groups using hierarchical clustering |
|
2 | + | #' |
|
3 | + | #' Reshape a table that represents pairwise distances into hierarchical clusters, |
|
4 | + | #' returning a table with \code{item} and \code{cluster} columns. |
|
5 | + | #' |
|
6 | + | #' @param tbl Table |
|
7 | + | #' @param item1 First item |
|
8 | + | #' @param item2 Second item |
|
9 | + | #' @param distance Distance column |
|
10 | + | #' @param k The desired number of groups |
|
11 | + | #' @param h Height at which to cut the hierarchically clustered tree |
|
12 | + | #' |
|
13 | + | #' @examples |
|
14 | + | #' |
|
15 | + | #' library(gapminder) |
|
16 | + | #' library(dplyr) |
|
17 | + | #' |
|
18 | + | #' # Construct Euclidean distances between countries based on life |
|
19 | + | #' # expectancy over time |
|
20 | + | #' country_distances <- gapminder %>% |
|
21 | + | #' pairwise_dist(country, year, lifeExp) |
|
22 | + | #' |
|
23 | + | #' country_distances |
|
24 | + | #' |
|
25 | + | #' # Turn this into 5 hierarchical clusters |
|
26 | + | #' clusters <- country_distances %>% |
|
27 | + | #' widely_hclust(item1, item2, distance, k = 8) |
|
28 | + | #' |
|
29 | + | #' # Examine a few such clusters |
|
30 | + | #' clusters %>% filter(cluster == 1) |
|
31 | + | #' clusters %>% filter(cluster == 2) |
|
32 | + | #' |
|
33 | + | #' @seealso \link{cutree} |
|
34 | + | #' |
|
35 | + | #' @export |
|
36 | + | widely_hclust <- function(tbl, item1, item2, distance, k = NULL, h = NULL) { |
|
37 | + | col1_str <- as.character(substitute(item1)) |
|
38 | + | col2_str <- as.character(substitute(item2)) |
|
39 | + | dist_str <- as.character(substitute(distance)) |
|
40 | + | ||
41 | + | unique_items <- unique(c(as.character(tbl[[col1_str]]), as.character(tbl[[col2_str]]))) |
|
42 | + | ||
43 | + | form <- stats::as.formula(paste(col1_str, "~", col2_str)) |
|
44 | + | ||
45 | + | max_distance <- max(tbl[[dist_str]]) |
|
46 | + | ||
47 | + | tibble(item1 = match(tbl[[col1_str]], unique_items), |
|
48 | + | item2 = match(tbl[[col2_str]], unique_items), |
|
49 | + | distance = tbl[[dist_str]]) %>% |
|
50 | + | reshape2::acast(item1 ~ item2, value.var = "distance", fill = max_distance) %>% |
|
51 | + | stats::as.dist() %>% |
|
52 | + | stats::hclust() %>% |
|
53 | + | stats::cutree(k = k, h = h) %>% |
|
54 | + | tibble::enframe("item", "cluster") %>% |
|
55 | + | dplyr::mutate(item = unique_items[as.integer(item)], |
|
56 | + | cluster = factor(cluster)) %>% |
|
57 | + | dplyr::arrange(cluster) |
|
58 | + | } |
1 | + | #' Cluster items based on k-means across features |
|
2 | + | #' |
|
3 | + | #' Given a tidy table of features describing each item, perform k-means |
|
4 | + | #' clustering using \code{\link{kmeans}} and retidy the data into |
|
5 | + | #' one-row-per-cluster. |
|
6 | + | #' |
|
7 | + | #' @param tbl Table |
|
8 | + | #' @param item Item to cluster (as a bare column name) |
|
9 | + | #' @param feature Feature column (dimension in clustering) |
|
10 | + | #' @param value Value column |
|
11 | + | #' @param k Number of clusters |
|
12 | + | #' @param fill What to fill in for missing values |
|
13 | + | #' @param ... Other arguments passed on to \code{\link{kmeans}} |
|
14 | + | #' |
|
15 | + | #' @seealso \code{\link{widely_hclust}} |
|
16 | + | #' |
|
17 | + | #' @importFrom rlang := |
|
18 | + | #' |
|
19 | + | #' @examples |
|
20 | + | #' |
|
21 | + | #' library(gapminder) |
|
22 | + | #' library(dplyr) |
|
23 | + | #' |
|
24 | + | #' clusters <- gapminder %>% |
|
25 | + | #' widely_kmeans(country, year, lifeExp, k = 5) |
|
26 | + | #' |
|
27 | + | #' clusters |
|
28 | + | #' |
|
29 | + | #' clusters %>% |
|
30 | + | #' count(cluster) |
|
31 | + | #' |
|
32 | + | #' # Examine a few clusters |
|
33 | + | #' clusters %>% filter(cluster == 1) |
|
34 | + | #' clusters %>% filter(cluster == 2) |
|
35 | + | #' |
|
36 | + | #' @export |
|
37 | + | widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) { |
|
38 | + | item_str <- as.character(substitute(item)) |
|
39 | + | feature_str <- as.character(substitute(feature)) |
|
40 | + | value_str <- as.character(substitute(value)) |
|
41 | + | ||
42 | + | form <- stats::as.formula(paste(item_str, "~", feature_str)) |
|
43 | + | ||
44 | + | m <- tbl %>% |
|
45 | + | reshape2::acast(form, value.var = value_str, fill = fill) |
|
46 | + | ||
47 | + | clustered <- stats::kmeans(m, k, ...) |
|
48 | + | ||
49 | + | # Add the clusters to the original table |
|
50 | + | i <- match(rownames(m), as.character(tbl[[item_str]])) |
|
51 | + | tibble::tibble(!!sym(item_str) := tbl[[item_str]][i], |
|
52 | + | cluster = factor(clustered$cluster)) %>% |
|
53 | + | dplyr::arrange(cluster) |
|
54 | + | } |
Learn more Showing 2 files with coverage changes found.
R/widely_hclust.R
R/widely_kmeans.R
Files | Coverage |
---|---|
Project Totals (13 files) | 57.92% |
13b3b3c
ee925e4