From 10c63beccbdfb934f6d9d1d4e385079720ace370 Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Thu, 12 May 2022 23:51:54 -0700 Subject: [PATCH 01/10] the hungarian algorithm is cool. --- R/reconcile_clusterings.R | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 R/reconcile_clusterings.R diff --git a/R/reconcile_clusterings.R b/R/reconcile_clusterings.R new file mode 100644 index 00000000..076efe03 --- /dev/null +++ b/R/reconcile_clusterings.R @@ -0,0 +1,29 @@ +library(RcppHungarian) + +reconcile_clusterings <- function(clusters_1, clusters_2) { + + clusters_1 <- factor(clusters_1) + clusters_2 <- factor(clusters_2) + + cost <- as.matrix(table(clusters_1, clusters_2)) + matches <- HungarianSolver(-cost) + + c1_names <- unique(clusters_1) + c2_new <- factor(clusters_2, + levels = levels(clusters_2)[matches$pairs[,2]], + labels = levels(clusters_1)) + + res <- tibble::tibble( + clusters_1 = clusters_1, + clusters_2 = clusters_2, + clusters_2_renamed = c2_new + ) + + res + +} + +clusters_1 <- c("C1", "C1", "C2", "C3", "C4", "C4") +clusters_2 <- c("C3", "C2", "C2", "C4", "C1", "C1") + +reconcile_clusterings(clusters_1, clusters_2) From 139ec027d4125acaecefb4facd5960563cdd2458 Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Thu, 12 May 2022 23:54:08 -0700 Subject: [PATCH 02/10] test code for clust reconcile --- R/reconcile_clusterings.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/reconcile_clusterings.R b/R/reconcile_clusterings.R index 076efe03..2871c1ed 100644 --- a/R/reconcile_clusterings.R +++ b/R/reconcile_clusterings.R @@ -27,3 +27,9 @@ clusters_1 <- c("C1", "C1", "C2", "C3", "C4", "C4") clusters_2 <- c("C3", "C2", "C2", "C4", "C1", "C1") reconcile_clusterings(clusters_1, clusters_2) + +km_1 <- kmeans(ir, 10) +km_2 <- kmeans(ir, 10) + +bob <- reconcile_clusterings(km_1$cluster, km_2$cluster) +sum(bob$clusters_1 == bob$clusters_2_renamed) From 23bff327ff1a89fb7d424ee5f287f2e56ba8fd27 Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Fri, 13 May 2022 09:33:02 -0700 Subject: [PATCH 03/10] testing --- R/reconcile_clusterings.R | 4 ++++ dev/cross_val_kmeans.R | 32 ++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/R/reconcile_clusterings.R b/R/reconcile_clusterings.R index 2871c1ed..ce3aedd7 100644 --- a/R/reconcile_clusterings.R +++ b/R/reconcile_clusterings.R @@ -33,3 +33,7 @@ km_2 <- kmeans(ir, 10) bob <- reconcile_clusterings(km_1$cluster, km_2$cluster) sum(bob$clusters_1 == bob$clusters_2_renamed) + + +### what happens when clusters2 and clusters1 don't share names, even permuted? +### or not the same number of unique values? diff --git a/dev/cross_val_kmeans.R b/dev/cross_val_kmeans.R index 3ecc6cd1..370d29a1 100644 --- a/dev/cross_val_kmeans.R +++ b/dev/cross_val_kmeans.R @@ -1,5 +1,6 @@ library(tidymodels) library(tidyverse) +library(celery) ## "Cross-validation" for kmeans @@ -54,13 +55,13 @@ res %>% ## This needs "predict" ## Doesn't really make sense yet -cvs <- vfold_cv(ir, v = 5) +cvs <- vfold_cv(ir, v = 10) res <- data.frame( k = NA, i = NA, acc = NA, - roc_auc = NA + f1 = NA ) for (k in 2:10) { @@ -68,27 +69,29 @@ for (k in 2:10) { km <- k_means(k = k) %>% set_engine_celery("stats") - full_fit <- km %>% fit(~., data = ir) %>% - extract_cluster_assignment() + full_fit <- km %>% fit(~., data = ir) - for (i in 1:5) { + for (i in 1:10) { tmp_train <- training(cvs$splits[[i]]) tmp_test <- testing(cvs$splits[[i]]) km_fit <- km %>% fit(~., data = tmp_train) - dat <- tmp_train %>% + dat <- tmp_test %>% mutate( - truth = full_fit$.cluster[as.numeric(rownames(tmp_train))], - estimate = extract_cluster_assignment(km_fit)$.cluster + truth = predict(full_fit, tmp_test)$.pred_cluster, + estimate = predict(km_fit, tmp_test)$.pred_cluster ) - acc <- accuracy(dat, truth, estimate) + thing <- reconcile_clusterings(dat$truth, dat$estimate) + + acc <- accuracy(thing, clusters_1, clusters_2) + f1 <- f_meas(thing, clusters_1, clusters_2) res <- rbind(res, - c(k = k, i = i, acc = acc$.estimate[1], roc_auc = NA)) + c(k = k, i = i, acc = acc$.estimate[1], f1 = f1$.estimate)) } @@ -96,5 +99,10 @@ for (k in 2:10) { res %>% - ggplot(aes(x = factor(k), y = acc)) + - geom_boxplot() + ggplot(aes(x = factor(k), y = f1)) + + geom_point() + + +### use orders from reconciling to order centers and check center similarity? +### or to get "raw probabilities" - what does that mean though? +### to do predict = raw From c31db8fb793bd161c5215faf8e831db9b6532161 Mon Sep 17 00:00:00 2001 From: Kelly Bodwin Date: Thu, 2 Jun 2022 16:40:36 -0700 Subject: [PATCH 04/10] making reconciliation more flexible --- DESCRIPTION | 2 + R/reconcile_clusterings.R | 114 +++++++++++++++++++++++++++++++------- 2 files changed, 95 insertions(+), 21 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c2d94e92..d3b3dc13 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,7 @@ Imports: ClusterR, dials, dplyr, + forcats, foreach, generics, glue, @@ -25,6 +26,7 @@ Imports: parsnip, prettyunits, purrr, + RcppHungarian, rlang, rsample, stats, diff --git a/R/reconcile_clusterings.R b/R/reconcile_clusterings.R index ce3aedd7..a246348e 100644 --- a/R/reconcile_clusterings.R +++ b/R/reconcile_clusterings.R @@ -1,39 +1,111 @@ -library(RcppHungarian) +#' Relabels clusters to match another cluster assignment +#' +#' Retains the cluster labels of the primary assignment, and relabel the alternate assignment +#' to match as closely as possible. The user must decide whether clusters are forced to be +#' "one-to-one"; that is, are we allowed to assign multiple labels from the alternate assignment +#' to the same primary label? +#' +#' The user can opt to prioritize: +#' * "accuracy": Each alt label will be assigned to the primary label with which +#' it shares the highest raw member count. +#' * "precision": Each alt label will be assigned to the primary label that captures +#' the highest percentage of its members. +#' * "recall": Each alt label will be assigned +#' +#' @param primary_cluster_assignment A vector containing cluster labels, to be matched +#' @param alt_cluster_assignment Another vector containing cluster labels, to be changed +#' @param one_to_one Boolean; should each alt cluster match only one primary cluster? +#' @param optimize One of "precision", "recall" or "accuracy"; see description. +#' +#' @return A vector with the new cluster labels +#' +#' @importFrom forcats fct_inorder -reconcile_clusterings <- function(clusters_1, clusters_2) { +reconcile_clusterings <- function(primary_cluster_assignment, + alt_cluster_assignment, + one_to_one = TRUE, + optimize = ) { - clusters_1 <- factor(clusters_1) - clusters_2 <- factor(clusters_2) - cost <- as.matrix(table(clusters_1, clusters_2)) - matches <- HungarianSolver(-cost) + clusters_1 <- fct_inorder(as.character(primary_cluster_assignment)) + clusters_2 <- fct_inorder(as.character(alt_cluster_assignment)) - c1_names <- unique(clusters_1) - c2_new <- factor(clusters_2, - levels = levels(clusters_2)[matches$pairs[,2]], + nclust_1 <- length(levels(clusters_1)) + nclust_2 <- length(levels(clusters_2)) + + + if (nclust_1 > nclust_2) { + stop("Primary clustering must have equal or fewer clusters to alternate clustering.") + } + + ## Use standard names in order for both + + clusters_1_f <- factor(clusters_1, labels = paste0("Cluster_", 1:nclust_1)) + clusters_2_f <- factor(clusters_2, labels = paste0("Cluster_", 1:nclust_2)) + clusters_1_f <- factor(clusters_1_f, levels = levels(clusters_2_f)) + + + ## Get counts + cost <- as.matrix(table(clusters_1_f, clusters_2_f)) + cost <- t(t(cost)/colSums(cost)) + + ## If they have the same size, it's easy + ## If there are more clusters in the alt clustering, try all combos + + if (nclust_1 == nclust_2) { + + matches <- RcppHungarian::HungarianSolver(-cost) + reord <- matches$pairs[,2] + + } else { + + + n_combos <- choose(nclust_2, nclust_1) + + } + + + ## Reorder new clusters and then use original labels + + c2_new <- factor(clusters_2_f, + levels = levels(clusters_2_f)[], labels = levels(clusters_1)) - res <- tibble::tibble( - clusters_1 = clusters_1, - clusters_2 = clusters_2, - clusters_2_renamed = c2_new - ) - res + + return(as.character(c2_new)) } -clusters_1 <- c("C1", "C1", "C2", "C3", "C4", "C4") -clusters_2 <- c("C3", "C2", "C2", "C4", "C1", "C1") +primary_cluster_assignment <- c("C4", "C4", "C3", "C3", "C1", "C1") +alt_cluster_assignment <- c("C3", "C2", "C2", "C4", "C1", "C1") reconcile_clusterings(clusters_1, clusters_2) -km_1 <- kmeans(ir, 10) -km_2 <- kmeans(ir, 10) +## test it with numbers + +primary_cluster_assignment <- sample(1:10, 100, replace = TRUE) +alt_cluster_assignment <- sample(1:10, 100, replace = TRUE) + +thing <- tibble( + a = primary_cluster_assignment, + b= alt_cluster_assignment, + c= reconcile_clusterings(primary_cluster_assignment, alt_cluster_assignment) +) + +table(thing$a, thing$c) + + +## test it with too many in c2 -bob <- reconcile_clusterings(km_1$cluster, km_2$cluster) -sum(bob$clusters_1 == bob$clusters_2_renamed) +clusters_1 <- sample(1:3, 10, replace = TRUE) +clusters_2 <- sample(1:4, 10, replace = TRUE) +cbind( + clusters_1, + clusters_2, + reconcile_clusterings(clusters_1, clusters_2) +) ### what happens when clusters2 and clusters1 don't share names, even permuted? ### or not the same number of unique values? From 42c741b1398d07712f06e7d43192a6c93f80bb91 Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Thu, 9 Jun 2022 20:19:58 -0700 Subject: [PATCH 05/10] Update k_means.R I think the `keras` was a holdover from the template. :) --- R/k_means.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/k_means.R b/R/k_means.R index 4711c5a0..dd607a42 100644 --- a/R/k_means.R +++ b/R/k_means.R @@ -146,7 +146,9 @@ translate_celery.k_means <- function(x, engine = x$engine, ...) { #' The higher this value is, the far appart from each other the centroids are. #' @param seed integer value for random number generator (RNG) #' -#' @return A `keras` model object. +#' @return a list with the following attributes: clusters, fuzzy_clusters (if +#' fuzzy = TRUE), centroids, total_SSE, best_initialization, WCSS_per_cluster, +#' obs_per_cluster, between.SS_DIV_total.SS #' @keywords internal #' @export ClusterR_kmeans_fit <- function(data, clusters, num_init = 1, max_iters = 100, From 5c85ca1c0eeadc453c4da9d88a6c0f73171d0aa3 Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Fri, 10 Jun 2022 01:54:21 -0700 Subject: [PATCH 06/10] rename kmeans diagnostics --- NAMESPACE | 1 + ...ans_diagnostics.R => diagnostic_metrics.R} | 0 R/reconcile_clusterings.R | 96 +++++++------------ man/avg_silhouette.Rd | 2 +- man/enrichment.Rd | 2 +- man/get_centroid_dists.Rd | 2 +- man/prep_data_dist.Rd | 2 +- man/silhouettes.Rd | 2 +- man/sse_ratio.Rd | 2 +- man/tot_sse.Rd | 2 +- man/tot_wss.Rd | 2 +- man/within_cluster_sse.Rd | 2 +- 12 files changed, 45 insertions(+), 70 deletions(-) rename R/{kmeans_diagnostics.R => diagnostic_metrics.R} (100%) diff --git a/NAMESPACE b/NAMESPACE index 3b5eec1c..d6a7c3cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -68,6 +68,7 @@ export(predict_cluster.cluster_fit) export(predict_raw) export(predict_raw.cluster_fit) export(prepare_data) +export(reconcile_clusterings) export(required_pkgs) export(set_dependency_celery) export(set_encoding_celery) diff --git a/R/kmeans_diagnostics.R b/R/diagnostic_metrics.R similarity index 100% rename from R/kmeans_diagnostics.R rename to R/diagnostic_metrics.R diff --git a/R/reconcile_clusterings.R b/R/reconcile_clusterings.R index a246348e..992258a7 100644 --- a/R/reconcile_clusterings.R +++ b/R/reconcile_clusterings.R @@ -5,37 +5,38 @@ #' "one-to-one"; that is, are we allowed to assign multiple labels from the alternate assignment #' to the same primary label? #' -#' The user can opt to prioritize: -#' * "accuracy": Each alt label will be assigned to the primary label with which -#' it shares the highest raw member count. -#' * "precision": Each alt label will be assigned to the primary label that captures -#' the highest percentage of its members. -#' * "recall": Each alt label will be assigned +#' When forcing one-to-one, the user needs to decide what to prioritize: +#' * "accuracy": optimize raw count of all observations with the same label +#' across the two assignments +#' * "precision": optimize the average percent of each alt cluster that matches +#' the corresponding primary cluster #' #' @param primary_cluster_assignment A vector containing cluster labels, to be matched #' @param alt_cluster_assignment Another vector containing cluster labels, to be changed #' @param one_to_one Boolean; should each alt cluster match only one primary cluster? -#' @param optimize One of "precision", "recall" or "accuracy"; see description. +#' @param optimize One of "accuracy" or "precision"; see description. #' #' @return A vector with the new cluster labels -#' -#' @importFrom forcats fct_inorder +#' @export reconcile_clusterings <- function(primary_cluster_assignment, alt_cluster_assignment, one_to_one = TRUE, - optimize = ) { + optimize = "accuracy") { - clusters_1 <- fct_inorder(as.character(primary_cluster_assignment)) - clusters_2 <- fct_inorder(as.character(alt_cluster_assignment)) + clusters_1 <- forcats::fct_inorder(as.character(primary_cluster_assignment)) + clusters_2 <- forcats::fct_inorder(as.character(alt_cluster_assignment)) nclust_1 <- length(levels(clusters_1)) nclust_2 <- length(levels(clusters_2)) - - if (nclust_1 > nclust_2) { - stop("Primary clustering must have equal or fewer clusters to alternate clustering.") + if (one_to_one && nclust_1 != nclust_2) { + stop("For one-to-one matching, must have the same number of clusters in + primary and alt.") + } else if (nclust_1 > nclust_2) { + stop("Primary clustering must have equal or fewer clusters to alternate + clustering.") } ## Use standard names in order for both @@ -46,66 +47,39 @@ reconcile_clusterings <- function(primary_cluster_assignment, ## Get counts - cost <- as.matrix(table(clusters_1_f, clusters_2_f)) - cost <- t(t(cost)/colSums(cost)) + cross_counts <- table(clusters_1_f, clusters_2_f) + cross_counts <- matrix(cross_counts, + ncol = ncol(cross_counts), + dimnames = dimnames(cross_counts)) - ## If they have the same size, it's easy - ## If there are more clusters in the alt clustering, try all combos - if (nclust_1 == nclust_2) { + ## one-to-one and accuracy = hungarian on counts + ## one-to-one and precision = hungarian on col-stdized - matches <- RcppHungarian::HungarianSolver(-cost) - reord <- matches$pairs[,2] + if (one_to_one) { - } else { + ## Hungarian solver guarantees max diagonal sum + cost <- cross_counts + if (optimize == "precision") { + cost <- t(t(cost)/colSums(cost)) + } - n_combos <- choose(nclust_2, nclust_1) + matches <- RcppHungarian::HungarianSolver(-cost) + reord <- matches$pairs[,2] + } else { + reord <- c(apply(cross_counts, 2, which.max)) } ## Reorder new clusters and then use original labels - c2_new <- factor(clusters_2_f, - levels = levels(clusters_2_f)[], - labels = levels(clusters_1)) - + recode_vec <- levels(clusters_2) + names(recode_vec) <- levels(clusters_1)[reord] + c2_new <- forcats::fct_recode(clusters_2, !!!recode_vec) return(as.character(c2_new)) } - -primary_cluster_assignment <- c("C4", "C4", "C3", "C3", "C1", "C1") -alt_cluster_assignment <- c("C3", "C2", "C2", "C4", "C1", "C1") - -reconcile_clusterings(clusters_1, clusters_2) - -## test it with numbers - -primary_cluster_assignment <- sample(1:10, 100, replace = TRUE) -alt_cluster_assignment <- sample(1:10, 100, replace = TRUE) - -thing <- tibble( - a = primary_cluster_assignment, - b= alt_cluster_assignment, - c= reconcile_clusterings(primary_cluster_assignment, alt_cluster_assignment) -) - -table(thing$a, thing$c) - - -## test it with too many in c2 - -clusters_1 <- sample(1:3, 10, replace = TRUE) -clusters_2 <- sample(1:4, 10, replace = TRUE) - -cbind( - clusters_1, - clusters_2, - reconcile_clusterings(clusters_1, clusters_2) -) - -### what happens when clusters2 and clusters1 don't share names, even permuted? -### or not the same number of unique values? diff --git a/man/avg_silhouette.Rd b/man/avg_silhouette.Rd index 06494198..1cc1c8e7 100644 --- a/man/avg_silhouette.Rd +++ b/man/avg_silhouette.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{avg_silhouette} \alias{avg_silhouette} \title{Measures average silhouette across all observations} diff --git a/man/enrichment.Rd b/man/enrichment.Rd index e8f2c869..5aca859e 100644 --- a/man/enrichment.Rd +++ b/man/enrichment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{enrichment} \alias{enrichment} \title{Measures relationship between cluster assignments and another categorical variable.} diff --git a/man/get_centroid_dists.Rd b/man/get_centroid_dists.Rd index 4f79475a..5b4751ff 100644 --- a/man/get_centroid_dists.Rd +++ b/man/get_centroid_dists.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{get_centroid_dists} \alias{get_centroid_dists} \title{Computes distance from observations to centroids} diff --git a/man/prep_data_dist.Rd b/man/prep_data_dist.Rd index b7deccb0..73072fb6 100644 --- a/man/prep_data_dist.Rd +++ b/man/prep_data_dist.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{prep_data_dist} \alias{prep_data_dist} \title{Prepares data and distance matrices for metric calculation} diff --git a/man/silhouettes.Rd b/man/silhouettes.Rd index 1f5cc979..cdc738a7 100644 --- a/man/silhouettes.Rd +++ b/man/silhouettes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{silhouettes} \alias{silhouettes} \title{Measures silhouettes between clusters} diff --git a/man/sse_ratio.Rd b/man/sse_ratio.Rd index 3766ac56..9662476a 100644 --- a/man/sse_ratio.Rd +++ b/man/sse_ratio.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{sse_ratio} \alias{sse_ratio} \title{Compute the ratio of the WSS to the total SSE} diff --git a/man/tot_sse.Rd b/man/tot_sse.Rd index 40a3e8d3..57984a75 100644 --- a/man/tot_sse.Rd +++ b/man/tot_sse.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{tot_sse} \alias{tot_sse} \title{Compute the total sum of squares} diff --git a/man/tot_wss.Rd b/man/tot_wss.Rd index 8dedd273..acadd082 100644 --- a/man/tot_wss.Rd +++ b/man/tot_wss.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{tot_wss} \alias{tot_wss} \title{Compute the sum of within-cluster SSE} diff --git a/man/within_cluster_sse.Rd b/man/within_cluster_sse.Rd index 410b196a..ebb08445 100644 --- a/man/within_cluster_sse.Rd +++ b/man/within_cluster_sse.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kmeans_diagnostics.R +% Please edit documentation in R/diagnostic_metrics.R \name{within_cluster_sse} \alias{within_cluster_sse} \title{Calculates Sum of Squared Error in each cluster} From 5143041cb0d31cded93bca1b140344282d820c7d Mon Sep 17 00:00:00 2001 From: "Kelly N. Bodwin" Date: Fri, 10 Jun 2022 01:54:31 -0700 Subject: [PATCH 07/10] reconcile function works now --- man/reconcile_clusterings.Rd | 40 +++++++++++++++++++++ tests/testthat/test-reconcile_clusterings.R | 23 ++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 man/reconcile_clusterings.Rd create mode 100644 tests/testthat/test-reconcile_clusterings.R diff --git a/man/reconcile_clusterings.Rd b/man/reconcile_clusterings.Rd new file mode 100644 index 00000000..7bd9d96e --- /dev/null +++ b/man/reconcile_clusterings.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reconcile_clusterings.R +\name{reconcile_clusterings} +\alias{reconcile_clusterings} +\title{Relabels clusters to match another cluster assignment} +\usage{ +reconcile_clusterings( + primary_cluster_assignment, + alt_cluster_assignment, + one_to_one = TRUE, + optimize = "accuracy" +) +} +\arguments{ +\item{primary_cluster_assignment}{A vector containing cluster labels, to be matched} + +\item{alt_cluster_assignment}{Another vector containing cluster labels, to be changed} + +\item{one_to_one}{Boolean; should each alt cluster match only one primary cluster?} + +\item{optimize}{One of "accuracy" or "precision"; see description.} +} +\value{ +A vector with the new cluster labels +} +\description{ +Retains the cluster labels of the primary assignment, and relabel the alternate assignment +to match as closely as possible. The user must decide whether clusters are forced to be +"one-to-one"; that is, are we allowed to assign multiple labels from the alternate assignment +to the same primary label? +} +\details{ +When forcing one-to-one, the user needs to decide what to prioritize: +\itemize{ +\item "accuracy": optimize raw count of all observations with the same label +across the two assignments +\item "precision": optimize the average percent of each alt cluster that matches +the corresponding primary cluster +} +} diff --git a/tests/testthat/test-reconcile_clusterings.R b/tests/testthat/test-reconcile_clusterings.R new file mode 100644 index 00000000..ec9d2143 --- /dev/null +++ b/tests/testthat/test-reconcile_clusterings.R @@ -0,0 +1,23 @@ +test_that("reconciliation works with one-to-one", { + + primary_cluster_assignment <- c("Apple", "Apple", "Carrot", "Carrot", "Banana", "Banana") + alt_cluster_assignment <- c("Dog", "Dog", "Cat", "Dog", "Fish", "Fish") + + + res <- reconcile_clusterings(primary_cluster_assignment, alt_cluster_assignment) + + expect_equal(res, c("Apple", "Apple", "Carrot", "Apple", "Banana", "Banana")) +}) + + + +test_that("reconciliation works with uneven numbers", { + + primary_cluster_assignment <- c("Apple", "Apple", "Carrot", "Carrot", "Banana", "Banana") + alt_cluster_assignment <- c("Dog", "Dog", "Cat", "Dog", "Parrot", "Fish") + + + res <- reconcile_clusterings(primary_cluster_assignment, alt_cluster_assignment, one_to_one = FALSE) + + expect_equal(res, c("Apple", "Apple", "Carrot", "Apple", "Banana", "Banana")) +}) From dcb5b50028066bb39408fc91b3789c7cb2781db0 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jun 2022 13:41:29 -0700 Subject: [PATCH 08/10] reknit --- DESCRIPTION | 2 +- man/ClusterR_kmeans_fit.Rd | 4 +++- man/k.Rd | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d3b3dc13..76b41491 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,4 +54,4 @@ Config/Needs/website: pkgdown, tidymodels, tidyverse, palmerpenguins Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.0.9000 diff --git a/man/ClusterR_kmeans_fit.Rd b/man/ClusterR_kmeans_fit.Rd index 7281976c..e6e86b2f 100644 --- a/man/ClusterR_kmeans_fit.Rd +++ b/man/ClusterR_kmeans_fit.Rd @@ -51,7 +51,9 @@ The higher this value is, the far appart from each other the centroids are.} \item{seed}{integer value for random number generator (RNG)} } \value{ -A \code{keras} model object. +a list with the following attributes: clusters, fuzzy_clusters (if +fuzzy = TRUE), centroids, total_SSE, best_initialization, WCSS_per_cluster, +obs_per_cluster, between.SS_DIV_total.SS } \description{ This wrapper runs \code{ClusterR::KMeans_rcpp} and adds column names to the diff --git a/man/k.Rd b/man/k.Rd index f32da4de..182ac74b 100644 --- a/man/k.Rd +++ b/man/k.Rd @@ -8,7 +8,8 @@ k(range = c(1L, 10L), trans = NULL) } \arguments{ \item{range}{A two-element vector holding the \emph{defaults} for the smallest and -largest possible values, respectively.} +largest possible values, respectively. If a transformation is specified, +these values should be in the \emph{transformed units}.} \item{trans}{A \code{trans} object from the \code{scales} package, such as \code{scales::log10_trans()} or \code{scales::reciprocal_trans()}. If not provided, From fbfda65cdcda39e87cc0343319a4158accddffa2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jun 2022 14:10:36 -0700 Subject: [PATCH 09/10] rename package to tidyclust --- .Rbuildignore | 2 +- DESCRIPTION | 6 +- LICENSE.md | 2 +- NAMESPACE | 54 ++-- R/aaa_models.R | 256 +++++++++--------- R/cluster_spec.R | 6 +- R/colors.R | 8 +- R/control.R | 10 +- R/diagnostic_metrics.R | 24 +- R/engines.R | 12 +- R/extract_assignment.R | 2 +- R/extract_characterization.R | 2 +- R/extract_summary.R | 2 +- R/fit.R | 34 +-- R/fit_helpers.R | 10 +- R/k_means.R | 4 +- R/k_means_data.R | 38 +-- R/load_ns.R | 2 +- R/predict.R | 4 +- R/print.R | 2 +- R/tidy.R | 6 +- R/{celery-package.R => tidyclust-package.R} | 0 R/translate.R | 34 +-- R/tunable.R | 8 +- R/tune_cluster.R | 10 +- R/tune_helpers.R | 30 +- R/zzz.R | 10 +- README.Rmd | 20 +- README.md | 22 +- dev/cross_val_kmeans.R | 6 +- dev/kmeans.Rmd | 8 +- man/add_on_exports.Rd | 2 +- man/avg_silhouette.Rd | 4 +- ...ry.Rd => check_empty_ellipse_tidyclust.Rd} | 6 +- ...control_celery.Rd => control_tidyclust.Rd} | 8 +- man/convert_helpers.Rd | 2 +- man/extract_centroids.Rd | 2 +- man/extract_cluster_assignment.Rd | 2 +- man/extract_fit_summary.Rd | 2 +- man/fit.Rd | 24 +- ...v_celery.Rd => get_model_env_tidyclust.Rd} | 18 +- man/glance.cluster_fit.Rd | 2 +- ...es_celery.Rd => make_classes_tidyclust.Rd} | 6 +- man/predict.cluster_fit.Rd | 2 +- ...gine_celery.Rd => set_engine_tidyclust.Rd} | 12 +- ...l_celery.Rd => set_new_model_tidyclust.Rd} | 82 +++--- man/silhouettes.Rd | 4 +- man/sse_ratio.Rd | 4 +- man/tidy.cluster_fit.Rd | 4 +- ...celery-package.Rd => tidyclust-package.Rd} | 14 +- man/tot_sse.Rd | 4 +- man/tot_wss.Rd | 4 +- ...slate_celery.Rd => translate_tidyclust.Rd} | 20 +- man/tune_cluster.Rd | 4 +- man/within_cluster_sse.Rd | 4 +- tests/testthat.R | 4 +- tests/testthat/_snaps/k_means.md | 6 +- tests/testthat/_snaps/registration.md | 86 +++--- ...y-package.R => helper-tidyclust-package.R} | 2 +- tests/testthat/test-control.R | 4 +- tests/testthat/test-extract_summary.R | 4 +- tests/testthat/test-k_means.R | 16 +- tests/testthat/test-kmeans_diagnostics.R | 14 +- tests/testthat/test-predict_formats.R | 4 +- tests/testthat/test-registration.R | 158 +++++------ tests/testthat/test-tune_cluster.R | 26 +- celery.Rproj => tidyclust.Rproj | 0 vignettes/articles/kmeans.Rmd | 12 +- vignettes/articles/kmeans_metrics.Rmd | 8 +- 69 files changed, 607 insertions(+), 607 deletions(-) rename R/{celery-package.R => tidyclust-package.R} (100%) rename man/{check_empty_ellipse_celery.Rd => check_empty_ellipse_tidyclust.Rd} (75%) rename man/{control_celery.Rd => control_tidyclust.Rd} (84%) rename man/{get_model_env_celery.Rd => get_model_env_tidyclust.Rd} (66%) rename man/{make_classes_celery.Rd => make_classes_tidyclust.Rd} (77%) rename man/{set_engine_celery.Rd => set_engine_tidyclust.Rd} (77%) rename man/{set_new_model_celery.Rd => set_new_model_tidyclust.Rd} (72%) rename man/{celery-package.Rd => tidyclust-package.Rd} (58%) rename man/{translate_celery.Rd => translate_tidyclust.Rd} (69%) rename tests/testthat/{helper-celery-package.R => helper-tidyclust-package.R} (94%) rename celery.Rproj => tidyclust.Rproj (100%) diff --git a/.Rbuildignore b/.Rbuildignore index 02203b29..c9408518 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,4 @@ -^celery\.Rproj$ +^tidyclust\.Rproj$ ^\.Rproj\.user$ ^isc-proposal.pdf$ ^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 76b41491..c35b316b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: celery +Package: tidyclust Title: What the Package Does (One Line, Title Case) Version: 0.0.0.9000 Authors@R: c( @@ -9,8 +9,8 @@ Authors@R: c( ) Description: What the package does (one paragraph). License: MIT + file LICENSE -URL: https://github.com/EmilHvitfeldt/celery -BugReports: https://github.com/EmilHvitfeldt/celery/issues +URL: https://github.com/EmilHvitfeldt/tidyclust +BugReports: https://github.com/EmilHvitfeldt/tidyclust/issues Imports: cli, cluster, diff --git a/LICENSE.md b/LICENSE.md index f2054a7a..adc18850 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2022 celery authors +Copyright (c) 2022 tidyclust authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index d6a7c3cd..d46e981d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,11 +18,11 @@ S3method(predict_cluster,cluster_fit) S3method(predict_raw,cluster_fit) S3method(print,cluster_fit) S3method(print,cluster_spec) -S3method(print,control_celery) +S3method(print,control_tidyclust) S3method(print,k_means) S3method(tidy,cluster_fit) -S3method(translate_celery,default) -S3method(translate_celery,k_means) +S3method(translate_tidyclust,default) +S3method(translate_tidyclust,k_means) S3method(tune_cluster,cluster_spec) S3method(tune_cluster,default) S3method(tune_cluster,workflow) @@ -33,10 +33,10 @@ export(.convert_x_to_form_fit) export(.convert_x_to_form_new) export(ClusterR_kmeans_fit) export(avg_silhouette) -export(check_empty_ellipse_celery) -export(check_model_doesnt_exist_celery) -export(check_model_exists_celery) -export(control_celery) +export(check_empty_ellipse_tidyclust) +export(check_model_doesnt_exist_tidyclust) +export(check_model_exists_tidyclust) +export(control_tidyclust) export(enrichment) export(extract_centroids) export(extract_cluster_assignment) @@ -49,17 +49,17 @@ export(fit) export(fit.cluster_spec) export(fit_xy) export(fit_xy.cluster_spec) -export(get_dependency_celery) -export(get_encoding_celery) -export(get_fit_celery) -export(get_from_env_celery) -export(get_model_env_celery) -export(get_pred_type_celery) +export(get_dependency_tidyclust) +export(get_encoding_tidyclust) +export(get_fit_tidyclust) +export(get_from_env_tidyclust) +export(get_model_env_tidyclust) +export(get_pred_type_tidyclust) export(glance) export(k) export(k_means) export(load_pkgs) -export(make_classes_celery) +export(make_classes_tidyclust) export(min_grid) export(new_cluster_spec) export(predict.cluster_fit) @@ -70,24 +70,24 @@ export(predict_raw.cluster_fit) export(prepare_data) export(reconcile_clusterings) export(required_pkgs) -export(set_dependency_celery) -export(set_encoding_celery) -export(set_engine_celery) -export(set_env_val_celery) -export(set_fit_celery) -export(set_model_arg_celery) -export(set_model_engine_celery) -export(set_model_mode_celery) -export(set_new_model_celery) -export(set_pred_celery) -export(show_model_info_celery) +export(set_dependency_tidyclust) +export(set_encoding_tidyclust) +export(set_engine_tidyclust) +export(set_env_val_tidyclust) +export(set_fit_tidyclust) +export(set_model_arg_tidyclust) +export(set_model_engine_tidyclust) +export(set_model_mode_tidyclust) +export(set_new_model_tidyclust) +export(set_pred_tidyclust) +export(show_model_info_tidyclust) export(silhouettes) export(sse_ratio) export(tidy) export(tot_sse) export(tot_wss) -export(translate_celery) -export(translate_celery.default) +export(translate_tidyclust) +export(translate_tidyclust.default) export(tune) export(tune_cluster) export(within_cluster_sse) diff --git a/R/aaa_models.R b/R/aaa_models.R index 428d9f57..1e68b438 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -31,20 +31,20 @@ pred_types <- c("cluster", "raw") # ------------------------------------------------------------------------------ -celery <- rlang::new_environment() -celery$models <- NULL -celery$modes <- c(all_modes, "unknown") +tidyclust <- rlang::new_environment() +tidyclust$models <- NULL +tidyclust$modes <- c(all_modes, "unknown") # check if class and mode and engine are compatible check_spec_mode_engine_val <- function(cls, eng, mode) { - all_modes <- get_from_env_celery(paste0(cls, "_modes")) + all_modes <- get_from_env_tidyclust(paste0(cls, "_modes")) if (!(mode %in% all_modes)) { rlang::abort(paste0("'", mode, "' is not a known mode for model `", cls, "()`.")) } - model_info <- rlang::env_get(get_model_env_celery(), cls) + model_info <- rlang::env_get(get_model_env_tidyclust(), cls) - # Cases where the model definition is in celery but all of the engines + # Cases where the model definition is in tidyclust but all of the engines # are contained in a different package if (nrow(model_info) == 0) { check_mode_with_no_engine(cls, mode) @@ -95,7 +95,7 @@ check_spec_mode_engine_val <- function(cls, eng, mode) { invisible(NULL) } -#' Working with the celery model environment +#' Working with the tidyclust model environment #' #' These functions read and write to the environment where the package stores #' information about model specifications. @@ -107,31 +107,31 @@ check_spec_mode_engine_val <- function(cls, eng, mode) { #' @keywords internal #' @examples #' # Access the model data: -#' current_code <- get_model_env_celery() +#' current_code <- get_model_env_tidyclust() #' ls(envir = current_code) #' @keywords internal #' @export -get_model_env_celery <- function() { - current <- utils::getFromNamespace("celery", ns = "celery") +get_model_env_tidyclust <- function() { + current <- utils::getFromNamespace("tidyclust", ns = "tidyclust") current } -#' @rdname get_model_env_celery +#' @rdname get_model_env_tidyclust #' @keywords internal #' @export -get_from_env_celery <- function(items) { - mod_env <- get_model_env_celery() +get_from_env_tidyclust <- function(items) { + mod_env <- get_model_env_tidyclust() rlang::env_get(mod_env, items, default = NULL) } -#' @rdname get_model_env_celery +#' @rdname get_model_env_tidyclust #' @keywords internal #' @export -set_env_val_celery <- function(name, value) { +set_env_val_tidyclust <- function(name, value) { if (length(name) != 1 || !is.character(name)) { rlang::abort("`name` should be a single character value.") } - mod_env <- get_model_env_celery() + mod_env <- get_model_env_tidyclust() x <- list(value) names(x) <- name rlang::env_bind(mod_env, !!!x) @@ -161,13 +161,13 @@ set_env_val_celery <- function(name, value) { #' package's `predict` method. #' @param fit_obj A list with elements `interface`, `protect`, #' `func` and `defaults`. See the package vignette "Making a -#' `celery` model from scratch". +#' `tidyclust` model from scratch". #' @param pred_obj A list with elements `pre`, `post`, `func`, and `args`. #' @param type A single character value for the type of prediction. Possible #' values are: `cluster` and `raw`. #' @param pkg An options character string for a package name. -#' @param celery A single character string for the "harmonized" argument name -#' that `celery` exposes. +#' @param tidyclust A single character string for the "harmonized" argument name +#' that `tidyclust` exposes. #' @param original A single character string for the argument name that #' underlying model function uses. #' @param value A list that conforms to the `fit_obj` or `pred_obj` description @@ -181,21 +181,21 @@ set_env_val_celery <- function(name, value) { #' @keywords internal #' @details These functions are available for users to add their own models or #' engines (in a package or otherwise) so that they can be accessed using -#' `celery`. +#' `tidyclust`. #' -#' In short, `celery` stores an environment object that contains all of the +#' In short, `tidyclust` stores an environment object that contains all of the #' information and code about how models are used (e.g. fitting, predicting, #' etc). These functions can be used to add models to that environment as well #' as helper functions that can be used to makes sure that the model data is #' in the right format. #' -#' `check_model_exists_celery()` checks the model value and ensures that the -#' model has already been registered. `check_model_doesnt_exist_celery()` +#' `check_model_exists_tidyclust()` checks the model value and ensures that the +#' model has already been registered. `check_model_doesnt_exist_tidyclust()` #' checks the model value and also checks to see if it is novel in the #' environment. #' #' The options for engine-specific encodings dictate how the predictors should -#' be handled. These options ensure that the data that `celery` gives to the +#' be handled. These options ensure that the data that `tidyclust` gives to the #' underlying model allows for a model fit that is as similar as possible to #' what it would have produced directly. #' @@ -229,38 +229,38 @@ set_env_val_celery <- function(name, value) { #' and tuning. #' #' @examples -#' # set_new_model_celery("shallow_learning_model") +#' # set_new_model_tidyclust("shallow_learning_model") #' #' # Show the information about a model: -#' show_model_info_celery("k_means") +#' show_model_info_tidyclust("k_means") #' @keywords internal #' @export -set_new_model_celery <- function(model) { - check_model_doesnt_exist_celery(model) +set_new_model_tidyclust <- function(model) { + check_model_doesnt_exist_tidyclust(model) - current <- get_model_env_celery() + current <- get_model_env_tidyclust() - set_env_val_celery("models", c(current$models, model)) - set_env_val_celery( + set_env_val_tidyclust("models", c(current$models, model)) + set_env_val_tidyclust( model, dplyr::tibble(engine = character(0), mode = character(0)) ) - set_env_val_celery( + set_env_val_tidyclust( paste0(model, "_pkgs"), dplyr::tibble(engine = character(0), pkg = list(), mode = character(0)) ) - set_env_val_celery(paste0(model, "_modes"), "unknown") - set_env_val_celery( + set_env_val_tidyclust(paste0(model, "_modes"), "unknown") + set_env_val_tidyclust( paste0(model, "_args"), dplyr::tibble( engine = character(0), - celery = character(0), + tidyclust = character(0), original = character(0), func = list(), has_submodel = logical(0) ) ) - set_env_val_celery( + set_env_val_tidyclust( paste0(model, "_fit"), dplyr::tibble( engine = character(0), @@ -268,7 +268,7 @@ set_new_model_celery <- function(model) { value = list() ) ) - set_env_val_celery( + set_env_val_tidyclust( paste0(model, "_predict"), dplyr::tibble( engine = character(0), @@ -281,14 +281,14 @@ set_new_model_celery <- function(model) { invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @export -check_model_doesnt_exist_celery <- function(model) { +check_model_doesnt_exist_tidyclust <- function(model) { if (rlang::is_missing(model) || length(model) != 1 || !is.character(model)) { rlang::abort("Please supply a character string for a model name (e.g. `'k_means'`)") } - current <- get_model_env_celery() + current <- get_model_env_tidyclust() if (any(current$models == model)) { rlang::abort(glue::glue("Model `{model}` already exists")) @@ -297,34 +297,34 @@ check_model_doesnt_exist_celery <- function(model) { invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_model_mode_celery <- function(model, mode) { - check_model_exists_celery(model) +set_model_mode_tidyclust <- function(model, mode) { + check_model_exists_tidyclust(model) check_mode_val(mode) - current <- get_model_env_celery() + current <- get_model_env_tidyclust() if (!any(current$modes == mode)) { current$modes <- unique(c(current$modes, mode)) } - set_env_val_celery( + set_env_val_tidyclust( paste0(model, "_modes"), - unique(c(get_from_env_celery(paste0(model, "_modes")), mode)) + unique(c(get_from_env_tidyclust(paste0(model, "_modes")), mode)) ) invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @export -check_model_exists_celery <- function(model) { +check_model_exists_tidyclust <- function(model) { if (rlang::is_missing(model) || length(model) != 1 || !is.character(model)) { rlang::abort("Please supply a character string for a model name (e.g. `'k_means'`)") } - current <- get_model_env_celery() + current <- get_model_env_tidyclust() if (!any(current$models == model)) { rlang::abort(glue::glue("Model `{model}` has not been registered.")) @@ -342,26 +342,26 @@ check_mode_val <- function(mode) { # ------------------------------------------------------------------------------ -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_model_engine_celery <- function(model, mode, eng) { - check_model_exists_celery(model) +set_model_engine_tidyclust <- function(model, mode, eng) { + check_model_exists_tidyclust(model) check_mode_val(mode) check_eng_val(eng) check_mode_val(eng) check_mode_for_new_engine(model, eng, mode) new_eng <- dplyr::tibble(engine = eng, mode = mode) - old_eng <- get_from_env_celery(model) + old_eng <- get_from_env_tidyclust(model) engs <- old_eng %>% dplyr::bind_rows(new_eng) %>% dplyr::distinct() - set_env_val_celery(model, engs) - set_model_mode_celery(model, mode) + set_env_val_tidyclust(model, engs) + set_model_mode_tidyclust(model, mode) invisible(NULL) } @@ -373,23 +373,23 @@ check_eng_val <- function(eng) { } check_mode_for_new_engine <- function(cls, eng, mode) { - all_modes <- get_from_env_celery(paste0(cls, "_modes")) + all_modes <- get_from_env_tidyclust(paste0(cls, "_modes")) if (!(mode %in% all_modes)) { rlang::abort(paste0("'", mode, "' is not a known mode for model `", cls, "()`.")) } invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_dependency_celery <- function(model, eng, pkg = "celery", mode = NULL) { - check_model_exists_celery(model) +set_dependency_tidyclust <- function(model, eng, pkg = "tidyclust", mode = NULL) { + check_model_exists_tidyclust(model) check_eng_val(eng) check_pkg_val(pkg) - model_info <- get_from_env_celery(model) - pkg_info <- get_from_env_celery(paste0(model, "_pkgs")) + model_info <- get_from_env_tidyclust(model) + pkg_info <- get_from_env_tidyclust(paste0(model, "_pkgs")) # ---------------------------------------------------------------------------- # Check engine @@ -443,21 +443,21 @@ set_dependency_celery <- function(model, eng, pkg = "celery", mode = NULL) { dplyr::bind_rows(eng_pkgs) %>% dplyr::arrange(engine, mode) - set_env_val_celery(paste0(model, "_pkgs"), pkg_info) + set_env_val_tidyclust(paste0(model, "_pkgs"), pkg_info) invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -get_dependency_celery <- function(model) { - check_model_exists_celery(model) +get_dependency_tidyclust <- function(model) { + check_model_exists_tidyclust(model) pkg_name <- paste0(model, "_pkgs") - if (!any(pkg_name != rlang::env_names(get_model_env_celery()))) { - rlang::abort(glue::glue("`{model}` does not have a dependency list in celery.")) + if (!any(pkg_name != rlang::env_names(get_model_env_tidyclust()))) { + rlang::abort(glue::glue("`{model}` does not have a dependency list in tidyclust.")) } - rlang::env_get(get_model_env_celery(), pkg_name) + rlang::env_get(get_model_env_tidyclust(), pkg_name) } check_pkg_val <- function(pkg) { @@ -467,17 +467,17 @@ check_pkg_val <- function(pkg) { invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_fit_celery <- function(model, mode, eng, value) { - check_model_exists_celery(model) +set_fit_tidyclust <- function(model, mode, eng, value) { + check_model_exists_tidyclust(model) check_eng_val(eng) check_spec_mode_engine_val(model, eng, mode) check_fit_info(value) - model_info <- get_from_env_celery(model) - old_fits <- get_from_env_celery(paste0(model, "_fit")) + model_info <- get_from_env_tidyclust(model) + old_fits <- get_from_env_tidyclust(paste0(model, "_fit")) has_engine <- model_info %>% @@ -514,7 +514,7 @@ set_fit_celery <- function(model, mode, eng, value) { rlang::abort("An error occured when adding the new fit module.") } - set_env_val_celery( + set_env_val_tidyclust( paste0(model, "_fit"), updated ) @@ -522,16 +522,16 @@ set_fit_celery <- function(model, mode, eng, value) { invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -get_fit_celery <- function(model) { - check_model_exists_celery(model) +get_fit_tidyclust <- function(model) { + check_model_exists_tidyclust(model) fit_name <- paste0(model, "_fit") - if (!any(fit_name != rlang::env_names(get_model_env_celery()))) { - rlang::abort(glue::glue("`{model}` does not have a `fit` method in celery.")) + if (!any(fit_name != rlang::env_names(get_model_env_tidyclust()))) { + rlang::abort(glue::glue("`{model}` does not have a `fit` method in tidyclust.")) } - rlang::env_get(get_model_env_celery(), fit_name) + rlang::env_get(get_model_env_tidyclust(), fit_name) } check_fit_info <- function(fit_obj) { @@ -635,17 +635,17 @@ check_func_val <- function(func) { invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -get_encoding_celery <- function(model) { - check_model_exists_celery(model) +get_encoding_tidyclust <- function(model) { + check_model_exists_tidyclust(model) nm <- paste0(model, "_encoding") - res <- try(get_from_env_celery(nm), silent = TRUE) + res <- try(get_from_env_tidyclust(nm), silent = TRUE) if (inherits(res, "try-error")) { - # for objects made before encodings were specified in celery + # for objects made before encodings were specified in tidyclust res <- - get_from_env_celery(model) %>% + get_from_env_tidyclust(model) %>% dplyr::mutate( model = model, predictor_indicators = "traditional", @@ -662,10 +662,10 @@ get_encoding_celery <- function(model) { } #' @export -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal -set_encoding_celery <- function(model, mode, eng, options) { - check_model_exists_celery(model) +set_encoding_tidyclust <- function(model, mode, eng, options) { + check_model_exists_tidyclust(model) check_eng_val(eng) check_mode_val(mode) check_encodings(options) @@ -675,10 +675,10 @@ set_encoding_celery <- function(model, mode, eng, options) { new_values <- dplyr::bind_cols(keys, options) - current_db_list <- ls(envir = get_model_env_celery()) + current_db_list <- ls(envir = get_model_env_tidyclust()) nm <- paste(model, "encoding", sep = "_") if (any(current_db_list == nm)) { - current <- get_from_env_celery(nm) + current <- get_from_env_tidyclust(nm) dup_check <- current %>% dplyr::inner_join( @@ -693,7 +693,7 @@ set_encoding_celery <- function(model, mode, eng, options) { } db_values <- dplyr::bind_rows(current, new_values) - set_env_val_celery(nm, db_values) + set_env_val_tidyclust(nm, db_values) invisible(NULL) } @@ -713,7 +713,7 @@ check_encodings <- function(x) { if (length(missing_args) > 0) { rlang::abort( glue::glue( - "The values passed to `set_encoding_celery()` are missing arguments: ", + "The values passed to `set_encoding_tidyclust()` are missing arguments: ", paste0("'", missing_args, "'", collapse = ", ") ) ) @@ -722,7 +722,7 @@ check_encodings <- function(x) { if (length(extra_args) > 0) { rlang::abort( glue::glue( - "The values passed to `set_encoding_celery()` had extra arguments: ", + "The values passed to `set_encoding_tidyclust()` had extra arguments: ", paste0("'", extra_args, "'", collapse = ", ") ) ) @@ -731,23 +731,23 @@ check_encodings <- function(x) { } # ------------------------------------------------------------------------------ -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_model_arg_celery <- function(model, eng, celery, original, func, has_submodel) { - check_model_exists_celery(model) +set_model_arg_tidyclust <- function(model, eng, tidyclust, original, func, has_submodel) { + check_model_exists_tidyclust(model) check_eng_val(eng) - check_arg_val(celery) + check_arg_val(tidyclust) check_arg_val(original) check_func_val(func) check_submodels_val(has_submodel) - old_args <- get_from_env_celery(paste0(model, "_args")) + old_args <- get_from_env_tidyclust(paste0(model, "_args")) new_arg <- dplyr::tibble( engine = eng, - celery = celery, + tidyclust = tidyclust, original = original, func = list(func), has_submodel = has_submodel @@ -759,7 +759,7 @@ set_model_arg_celery <- function(model, eng, celery, original, func, has_submode } updated <- vctrs::vec_unique(updated) - set_env_val_celery(paste0(model, "_args"), updated) + set_env_val_tidyclust(paste0(model, "_args"), updated) invisible(NULL) } @@ -779,7 +779,7 @@ check_submodels_val <- function(has_submodel) { } check_mode_with_no_engine <- function(cls, mode) { - spec_modes <- get_from_env_celery(paste0(cls, "_modes")) + spec_modes <- get_from_env_tidyclust(paste0(cls, "_modes")) if (!(mode %in% spec_modes)) { stop_incompatible_mode(spec_modes, cls = cls) } @@ -816,21 +816,21 @@ stop_incompatible_engine <- function(spec_engs, mode) { # ------------------------------------------------------------------------------ -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -show_model_info_celery <- function(model) { - check_model_exists_celery(model) +show_model_info_tidyclust <- function(model) { + check_model_exists_tidyclust(model) cat("Information for `", model, "`\n", sep = "") cat( " modes:", - paste0(get_from_env_celery(paste0(model, "_modes")), collapse = ", "), + paste0(get_from_env_tidyclust(paste0(model, "_modes")), collapse = ", "), "\n\n" ) - engines <- get_from_env_celery(model) + engines <- get_from_env_tidyclust(model) if (nrow(engines) > 0) { cat(" engines: \n") engines %>% @@ -852,21 +852,21 @@ show_model_info_celery <- function(model) { cat(" no registered engines.\n\n") } - args <- get_from_env_celery(paste0(model, "_args")) + args <- get_from_env_tidyclust(paste0(model, "_args")) if (nrow(args) > 0) { cat(" arguments: \n") args %>% - dplyr::select(engine, celery, original) %>% + dplyr::select(engine, tidyclust, original) %>% dplyr::distinct() %>% dplyr::mutate( engine = format(paste0(" ", engine, ": ")), - celery = paste0(" ", format(celery), " --> ", original, "\n") + tidyclust = paste0(" ", format(tidyclust), " --> ", original, "\n") ) %>% dplyr::group_by(engine) %>% dplyr::mutate( engine2 = ifelse(dplyr::row_number() == 1, engine, ""), - celery = ifelse(dplyr::row_number() == 1, paste0("\n", celery), celery), - lab = paste0(engine2, celery) + tidyclust = ifelse(dplyr::row_number() == 1, paste0("\n", tidyclust), tidyclust), + lab = paste0(engine2, tidyclust) ) %>% dplyr::ungroup() %>% dplyr::pull(lab) %>% @@ -876,7 +876,7 @@ show_model_info_celery <- function(model) { cat(" no registered arguments.\n\n") } - fits <- get_from_env_celery(paste0(model, "_fit")) + fits <- get_from_env_tidyclust(paste0(model, "_fit")) if (nrow(fits) > 0) { cat(" fit modules:\n") fits %>% @@ -889,7 +889,7 @@ show_model_info_celery <- function(model) { cat(" no registered fit modules.\n\n") } - preds <- get_from_env_celery(paste0(model, "_predict")) + preds <- get_from_env_tidyclust(paste0(model, "_predict")) if (nrow(preds) > 0) { cat(" prediction modules:\n") preds %>% @@ -908,11 +908,11 @@ show_model_info_celery <- function(model) { # ------------------------------------------------------------------------------ -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -set_pred_celery <- function(model, mode, eng, type, value) { - check_model_exists_celery(model) +set_pred_tidyclust <- function(model, mode, eng, type, value) { + check_model_exists_tidyclust(model) check_eng_val(eng) check_spec_mode_engine_val(model, eng, mode) check_pred_info(value, type) @@ -931,29 +931,29 @@ set_pred_celery <- function(model, mode, eng, type, value) { return(invisible(NULL)) } - old_pred <- get_from_env_celery(paste0(model, "_predict")) + old_pred <- get_from_env_tidyclust(paste0(model, "_predict")) updated <- try(dplyr::bind_rows(old_pred, new_pred), silent = TRUE) if (inherits(updated, "try-error")) { rlang::abort("An error occured when adding the new fit module.") } - set_env_val_celery(paste0(model, "_predict"), updated) + set_env_val_tidyclust(paste0(model, "_predict"), updated) invisible(NULL) } -#' @rdname set_new_model_celery +#' @rdname set_new_model_tidyclust #' @keywords internal #' @export -get_pred_type_celery <- function(model, type) { - check_model_exists_celery(model) +get_pred_type_tidyclust <- function(model, type) { + check_model_exists_tidyclust(model) pred_name <- paste0(model, "_predict") - if (!any(pred_name != rlang::env_names(get_model_env_celery()))) { - rlang::abort(glue::glue("`{model}` does not have any `pred` methods in celery.")) + if (!any(pred_name != rlang::env_names(get_model_env_tidyclust()))) { + rlang::abort(glue::glue("`{model}` does not have any `pred` methods in tidyclust.")) } - all_preds <- rlang::env_get(get_model_env_celery(), pred_name) + all_preds <- rlang::env_get(get_model_env_tidyclust(), pred_name) if (!any(all_preds$type == type)) { - rlang::abort(glue::glue("`{model}` does not have any prediction methods incelery.")) + rlang::abort(glue::glue("`{model}` does not have any prediction methods intidyclust.")) } dplyr::filter(all_preds, type == !!type) } @@ -996,7 +996,7 @@ check_pred_info <- function(pred_obj, type) { } check_unregistered <- function(model, mode, eng) { - model_info <- get_from_env_celery(model) + model_info <- get_from_env_tidyclust(model) has_engine <- model_info %>% dplyr::filter(engine == eng & mode == !!mode) %>% @@ -1017,7 +1017,7 @@ check_unregistered <- function(model, mode, eng) { # new information is different, fail with a message. See issue parsnip/#653 is_discordant_info <- function(model, mode, eng, candidate, pred_type = NULL, component = "fit") { - current <- get_from_env_celery(paste0(model, "_", component)) + current <- get_from_env_tidyclust(paste0(model, "_", component)) # For older versions of parsnip before set_encoding() new_encoding <- is.null(current) & component == "encoding" diff --git a/R/cluster_spec.R b/R/cluster_spec.R index 62eac80a..0b7f44f8 100644 --- a/R/cluster_spec.R +++ b/R/cluster_spec.R @@ -1,4 +1,4 @@ -#' Functions required for celery-adjacent packages +#' Functions required for tidyclust-adjacent packages #' #' These functions are helpful when creating new packages that will register #' new cluster specifications. @@ -13,7 +13,7 @@ new_cluster_spec <- function(cls, args, eng_args, mode, method, engine) { args = args, eng_args = eng_args, mode = mode, method = method, engine = engine ) - class(out) <- make_classes_celery(cls) + class(out) <- make_classes_tidyclust(cls) out } @@ -25,6 +25,6 @@ new_cluster_spec <- function(cls, args, eng_args, mode, method, engine) { #' @return A character vector. #' @keywords internal #' @export -make_classes_celery <- function(prefix) { +make_classes_tidyclust <- function(prefix) { c(prefix, "cluster_spec") } diff --git a/R/colors.R b/R/colors.R index ee7ff6ce..2f49d5bb 100644 --- a/R/colors.R +++ b/R/colors.R @@ -15,9 +15,9 @@ bold <- function(...) as.character(cli::style_bold(...)) # ------------------------------------------------------------------------------ -# For use in setting the `celery_color` active binding in `.onLoad()` +# For use in setting the `tidyclust_color` active binding in `.onLoad()` -celery_color_dark <- list( +tidyclust_color_dark <- list( symbol = list( "warning" = yellow, "go" = white, @@ -34,7 +34,7 @@ celery_color_dark <- list( ) ) -celery_color_light <- list( +tidyclust_color_light <- list( symbol = list( "warning" = yellow, "go" = black, @@ -52,4 +52,4 @@ celery_color_light <- list( ) -get_celery_colors <- function() celery_color +get_tidyclust_colors <- function() tidyclust_color diff --git a/R/control.R b/R/control.R index 1cc3fb4e..27644d51 100644 --- a/R/control.R +++ b/R/control.R @@ -12,13 +12,13 @@ #' @param catch A logical where a value of `TRUE` will evaluate the model inside #' of `try(, silent = TRUE)`. If the model fails, an object is still returned #' (without an error) that inherits the class "try-error". -#' @return An S3 object with class "control_celery" that is a named list with +#' @return An S3 object with class "control_tidyclust" that is a named list with #' the results of the function call #' @export -control_celery <- function(verbosity = 1L, catch = FALSE) { +control_tidyclust <- function(verbosity = 1L, catch = FALSE) { res <- list(verbosity = verbosity, catch = catch) res <- check_control(res) - class(res) <- c("control_celery", "control_parsnip") + class(res) <- c("control_tidyclust", "control_parsnip") res } @@ -41,8 +41,8 @@ check_control <- function(x) { } #' @export -print.control_celery <- function(x, ...) { - cat("celery control object\n") +print.control_tidyclust <- function(x, ...) { + cat("tidyclust control object\n") if (x$verbosity > 1) { cat(" - verbose level", x$verbosity, "\n") } diff --git a/R/diagnostic_metrics.R b/R/diagnostic_metrics.R index 120b526f..1803837f 100644 --- a/R/diagnostic_metrics.R +++ b/R/diagnostic_metrics.R @@ -2,7 +2,7 @@ #' Calculates Sum of Squared Error in each cluster #' -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dist_fun A function for calculating distances to centroids. Defaults #' to Euclidean distance on processed data. @@ -12,7 +12,7 @@ #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -62,7 +62,7 @@ within_cluster_sse <- function(object, new_data = NULL, #' Compute the sum of within-cluster SSE #' -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dist_fun A function for calculating distances to centroids. Defaults #' to Euclidean distance on processed data. @@ -71,7 +71,7 @@ within_cluster_sse <- function(object, new_data = NULL, #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -84,7 +84,7 @@ tot_wss <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' Compute the total sum of squares #' -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dist_fun A function for calculating distances to centroids. Defaults #' to Euclidean distance on processed data. @@ -93,7 +93,7 @@ tot_wss <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -126,7 +126,7 @@ tot_sse <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' Compute the ratio of the WSS to the total SSE #' -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dist_fun A function for calculating distances to centroids. Defaults #' to Euclidean distance on processed data. @@ -134,7 +134,7 @@ tot_sse <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -151,7 +151,7 @@ sse_ratio <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' Measures silhouettes between clusters #' -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dists A distance matrix. Used if `new_data` is `NULL`. #' @param dist_fun A function for calculating distances between observations. Defaults @@ -161,7 +161,7 @@ sse_ratio <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) { #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -192,7 +192,7 @@ silhouettes <- function(object, new_data = NULL, #' Measures average silhouette across all observations -#' @param object A fitted kmeans celery model +#' @param object A fitted kmeans tidyclust model #' @param new_data A dataset to predict on. If `NULL`, uses trained clustering. #' @param dists A distance matrix. Used if `new_data` is `NULL`. #' @param dist_fun A function for calculating distances between observations. Defaults @@ -203,7 +203,7 @@ silhouettes <- function(object, new_data = NULL, #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' diff --git a/R/engines.R b/R/engines.R index 2185bf5b..74369385 100644 --- a/R/engines.R +++ b/R/engines.R @@ -1,6 +1,6 @@ #' Declare a computational engine and specific arguments #' -#' `set_engine_celery()` is used to specify which package or system will be used +#' `set_engine_tidyclust()` is used to specify which package or system will be used #' to fit the model, along with any arguments specific to that software. #' #' @section Engines: @@ -18,11 +18,11 @@ #' # First, set general arguments using the standardized names #' mod <- k_means(k = 10) %>% #' # now say how you want to fit the model and another other options -#' set_engine_celery("stats", iter.max = 15) +#' set_engine_tidyclust("stats", iter.max = 15) #' -#' translate_celery(mod, engine = "stats") +#' translate_tidyclust(mod, engine = "stats") #' @export -set_engine_celery <- function(object, engine, ...) { +set_engine_tidyclust <- function(object, engine, ...) { mod_type <- class(object)[1] if (!inherits(object, "cluster_spec")) { rlang::abort("`object` should have class 'cluster_spec'.") @@ -46,7 +46,7 @@ set_engine_celery <- function(object, engine, ...) { stop_missing_engine <- function(cls) { info <- - get_from_env_celery(cls) %>% + get_from_env_tidyclust(cls) %>% dplyr::group_by(mode) %>% dplyr::summarize( msg = paste0( @@ -81,7 +81,7 @@ specific_model <- function(x) { } possible_engines <- function(object, ...) { - m_env <- get_model_env_celery() + m_env <- get_model_env_tidyclust() engs <- rlang::env_get(m_env, specific_model(object)) unique(engs$engine) } diff --git a/R/extract_assignment.R b/R/extract_assignment.R index 292ce496..6afb11da 100644 --- a/R/extract_assignment.R +++ b/R/extract_assignment.R @@ -5,7 +5,7 @@ #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' diff --git a/R/extract_characterization.R b/R/extract_characterization.R index 5174e020..d09f0473 100644 --- a/R/extract_characterization.R +++ b/R/extract_characterization.R @@ -6,7 +6,7 @@ #' @examples #' set.seed(1234) #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' diff --git a/R/extract_summary.R b/R/extract_summary.R index 9952153c..47a23e8e 100644 --- a/R/extract_summary.R +++ b/R/extract_summary.R @@ -8,7 +8,7 @@ #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' diff --git a/R/fit.R b/R/fit.R index 4695756d..1b0b536b 100644 --- a/R/fit.R +++ b/R/fit.R @@ -1,25 +1,25 @@ #' Fit a Model Specification to a Data Set #' -#' `fit()` and `fit_xy()` take a model specification, translate_celery the +#' `fit()` and `fit_xy()` take a model specification, translate_tidyclust the #' required code by substituting arguments, and execute the model fit routine. #' #' @param object An object of class `cluster_spec` that has a chosen engine (via -#' [set_engine_celery()]). +#' [set_engine_tidyclust()]). #' @param formula An object of class `formula` (or one that can be coerced to #' that class): a symbolic description of the model to be fitted. #' @param data Optional, depending on the interface (see Details below). A data #' frame containing all relevant variables (e.g. outcome(s), predictors, case #' weights, etc). Note: when needed, a \emph{named argument} should be used. #' @param control A named list with elements `verbosity` and `catch`. See -#' [control_celery()]. +#' [control_tidyclust()]. #' @param ... Not currently used; values passed here will be ignored. Other #' options required to fit the model should be passed using -#' `set_engine_celery()`. +#' `set_engine_tidyclust()`. #' @details `fit()` and `fit_xy()` substitute the current arguments in the #' model specification into the computational engine's code, check them for #' validity, then fit the model using the data and the engine-specific code. #' Different model functions have different interfaces (e.g. formula or -#' `x`/`y`) and these functions translate_celery between the interface used +#' `x`/`y`) and these functions translate_tidyclust between the interface used #' when `fit()` or `fit_xy()` was invoked and the one required by the #' underlying model. #' @@ -33,7 +33,7 @@ #' #' If the model engine has not been set, the model's default engine will be #' used (as discussed on each model page). If the `verbosity` option of -#' [control_celery()] is greater than zero, a warning will be produced. +#' [control_tidyclust()] is greater than zero, a warning will be produced. #' #' If you would like to use an alternative method for generating contrasts #' when supplying a formula to `fit()`, set the global option `contrasts` to @@ -47,12 +47,12 @@ #' #' using_formula <- #' kmeans_mod %>% -#' set_engine_celery("stats") %>% +#' set_engine_tidyclust("stats") %>% #' fit(~., data = mtcars) #' #' using_x <- #' kmeans_mod %>% -#' set_engine_celery("stats") %>% +#' set_engine_tidyclust("stats") %>% #' fit_xy(x = mtcars) #' #' using_formula @@ -73,11 +73,11 @@ #' The return value will also have a class related to the fitted model (e.g. #' `"_kmeans"`) before the base class of `"cluster_fit"`. #' -#' @seealso [set_engine_celery()], [control_celery()], `cluster_spec`, +#' @seealso [set_engine_tidyclust()], [control_tidyclust()], `cluster_spec`, #' `cluster_fit` #' @param x A matrix, sparse matrix, or data frame of predictors. Only some #' models have support for sparse matrix input. See -#' `celery::get_encoding_celery()` for details. `x` should have column names. +#' `tidyclust::get_encoding_tidyclust()` for details. `x` should have column names. #' @param case_weights An optional classed vector of numeric case weights. This #' must return `TRUE` when [hardhat::is_case_weights()] is run on it. See #' [hardhat::frequency_weights()] and [hardhat::importance_weights()] for @@ -88,13 +88,13 @@ fit.cluster_spec <- function(object, formula, data, - control = control_celery(), + control = control_tidyclust(), ...) { if (object$mode == "unknown") { rlang::abort("Please set the mode in the model specification.") } - # if (!inherits(control, "control_celery")) { - # rlang::abort("The 'control' argument should have class 'control_celery'.") + # if (!inherits(control, "control_tidyclust")) { + # rlang::abort("The 'control' argument should have class 'control_tidyclust'.") # } dots <- quos(...) if (is.null(object$engine)) { @@ -228,9 +228,9 @@ eval_mod <- function(e, capture = FALSE, catch = FALSE, ...) { #' @export #' @export fit_xy.cluster_spec fit_xy.cluster_spec <- - function(object, x, case_weights = NULL, control = control_celery(), ...) { - # if (!inherits(control, "control_celery")) { - # rlang::abort("The 'control' argument should have class 'control_celery'.") + function(object, x, case_weights = NULL, control = control_tidyclust(), ...) { + # if (!inherits(control, "control_tidyclust")) { + # rlang::abort("The 'control' argument should have class 'control_tidyclust'.") # } if (is.null(colnames(x))) { rlang::abort("'x' should have column names.") @@ -331,6 +331,6 @@ check_x_interface <- function(x, cl, model) { } allow_sparse <- function(x) { - res <- get_from_env_celery(paste0(class(x)[1], "_encoding")) + res <- get_from_env_tidyclust(paste0(class(x)[1], "_encoding")) all(res$allow_sparse_x[res$engine == x$engine]) } diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 4557495e..a643be07 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -4,7 +4,7 @@ form_form <- function(object, control, env, ...) { object <- check_args(object) # sub in arguments to actual syntax for corresponding engine - object <- translate_celery(object, engine = object$engine) + object <- translate_tidyclust(object, engine = object$engine) fit_call <- make_form_call(object, env = env) @@ -39,7 +39,7 @@ form_form <- function(object, control, env, ...) { form_x <- function(object, control, env, target = "none", ...) { encoding_info <- - get_encoding_celery(class(object)[1]) %>% + get_encoding_tidyclust(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) indicators <- encoding_info %>% dplyr::pull(predictor_indicators) @@ -69,7 +69,7 @@ form_x <- function(object, control, env, target = "none", ...) { x_x <- function(object, env, control, target = "none", y = NULL, ...) { encoding_info <- - get_encoding_celery(class(object)[1]) %>% + get_encoding_tidyclust(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) remove_intercept <- encoding_info %>% dplyr::pull(remove_intercept) @@ -81,7 +81,7 @@ x_x <- function(object, env, control, target = "none", y = NULL, ...) { object <- check_args(object) # sub in arguments to actual syntax for corresponding engine - object <- translate_celery(object, engine = object$engine) + object <- translate_tidyclust(object, engine = object$engine) fit_call <- make_x_call(object, target) @@ -115,7 +115,7 @@ x_x <- function(object, env, control, target = "none", y = NULL, ...) { x_form <- function(object, env, control, ...) { encoding_info <- - get_encoding_celery(class(object)[1]) %>% + get_encoding_tidyclust(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) remove_intercept <- encoding_info %>% dplyr::pull(remove_intercept) diff --git a/R/k_means.R b/R/k_means.R index dd607a42..1faf7666 100644 --- a/R/k_means.R +++ b/R/k_means.R @@ -49,8 +49,8 @@ print.k_means <- function(x, ...) { } #' @export -translate_celery.k_means <- function(x, engine = x$engine, ...) { - x <- translate_celery.default(x, engine, ...) +translate_tidyclust.k_means <- function(x, engine = x$engine, ...) { + x <- translate_tidyclust.default(x, engine, ...) x } diff --git a/R/k_means_data.R b/R/k_means_data.R index 6223768e..dc23f43e 100644 --- a/R/k_means_data.R +++ b/R/k_means_data.R @@ -1,13 +1,13 @@ -set_new_model_celery("k_means") +set_new_model_tidyclust("k_means") -set_model_mode_celery("k_means", "partition") +set_model_mode_tidyclust("k_means", "partition") # ------------------------------------------------------------------------------ -set_model_engine_celery("k_means", "partition", "stats") -set_dependency_celery("k_means", "stats", "stats") +set_model_engine_tidyclust("k_means", "partition", "stats") +set_dependency_tidyclust("k_means", "stats", "stats") -set_fit_celery( +set_fit_tidyclust( model = "k_means", eng = "stats", mode = "partition", @@ -19,7 +19,7 @@ set_fit_celery( ) ) -set_encoding_celery( +set_encoding_tidyclust( model = "k_means", eng = "stats", mode = "partition", @@ -31,16 +31,16 @@ set_encoding_celery( ) ) -set_model_arg_celery( +set_model_arg_tidyclust( model = "k_means", eng = "stats", - celery = "k", + tidyclust = "k", original = "centers", - func = list(pkg = "celery", fun = "k"), + func = list(pkg = "tidyclust", fun = "k"), has_submodel = TRUE ) -set_pred_celery( +set_pred_tidyclust( model = "k_means", eng = "stats", mode = "partition", @@ -59,10 +59,10 @@ set_pred_celery( # ------------------------------------------------------------------------------ -set_model_engine_celery("k_means", "partition", "ClusterR") -set_dependency_celery("k_means", "ClusterR", "ClusterR") +set_model_engine_tidyclust("k_means", "partition", "ClusterR") +set_dependency_tidyclust("k_means", "ClusterR", "ClusterR") -set_fit_celery( +set_fit_tidyclust( model = "k_means", eng = "ClusterR", mode = "partition", @@ -70,12 +70,12 @@ set_fit_celery( interface = "matrix", data = c(x = "data"), protect = c("data", "clusters"), - func = c(pkg = "celery", fun = "ClusterR_kmeans_fit"), + func = c(pkg = "tidyclust", fun = "ClusterR_kmeans_fit"), defaults = list() ) ) -set_encoding_celery( +set_encoding_tidyclust( model = "k_means", eng = "ClusterR", mode = "partition", @@ -87,16 +87,16 @@ set_encoding_celery( ) ) -set_model_arg_celery( +set_model_arg_tidyclust( model = "k_means", eng = "ClusterR", - celery = "k", + tidyclust = "k", original = "clusters", - func = list(pkg = "celery", fun = "k"), + func = list(pkg = "tidyclust", fun = "k"), has_submodel = TRUE ) -set_pred_celery( +set_pred_tidyclust( model = "k_means", eng = "ClusterR", mode = "partition", diff --git a/R/load_ns.R b/R/load_ns.R index fe30512c..c05904e5 100644 --- a/R/load_ns.R +++ b/R/load_ns.R @@ -28,6 +28,6 @@ load_namespace <- function(x) { } infra_pkgs <- c( - "tune", "recipes", "celery", "yardstick", "purrr", "dplyr", "tibble", + "tune", "recipes", "tidyclust", "yardstick", "purrr", "dplyr", "tibble", "dials", "rsample", "workflows", "tidyr", "rlang", "vctrs" ) diff --git a/R/predict.R b/R/predict.R index e92626e9..5d8fa4d3 100644 --- a/R/predict.R +++ b/R/predict.R @@ -41,7 +41,7 @@ #' #' @examples #' kmeans_spec <- k_means(k = 5) %>% -#' set_engine_celery("stats") +#' set_engine_tidyclust("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) #' @@ -119,7 +119,7 @@ prepare_data <- function(object, new_data) { } remove_intercept <- - get_encoding_celery(class(object$spec)[1]) %>% + get_encoding_tidyclust(class(object$spec)[1]) %>% dplyr::filter(mode == object$spec$mode, engine == object$spec$engine) %>% dplyr::pull(remove_intercept) if (remove_intercept & any(grepl("Intercept", names(new_data)))) { diff --git a/R/print.R b/R/print.R index b43e6da2..65de3127 100644 --- a/R/print.R +++ b/R/print.R @@ -1,6 +1,6 @@ #' @export print.cluster_fit <- function(x, ...) { - cat("celery cluster object\n\n") + cat("tidyclust cluster object\n\n") if (!is.na(x$elapsed[["elapsed"]])) { cat( "Fit time: ", prettyunits::pretty_sec(x$elapsed[["elapsed"]]), diff --git a/R/tidy.R b/R/tidy.R index a9b9b619..c5439070 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -1,6 +1,6 @@ -#' Turn a celery model object into a tidy tibble +#' Turn a tidyclust model object into a tidy tibble #' -#' This method tidies the model in a celery model object, if it exists. +#' This method tidies the model in a tidyclust model object, if it exists. #' #' @inheritParams generics::tidy #' @@ -11,7 +11,7 @@ tidy.cluster_fit <- function(x, ...) generics::tidy(x$fit, ...) #' Construct a single row summary "glance" of a model, fit, or other object #' -#' This method glances the model in a celery model object, if it exists. +#' This method glances the model in a tidyclust model object, if it exists. #' #' @inheritParams generics::glance #' diff --git a/R/celery-package.R b/R/tidyclust-package.R similarity index 100% rename from R/celery-package.R rename to R/tidyclust-package.R diff --git a/R/translate.R b/R/translate.R index 29d1772e..ff7751c0 100644 --- a/R/translate.R +++ b/R/translate.R @@ -1,14 +1,14 @@ #' Resolve a Model Specification for a Computational Engine #' -#' `translate_celery()` will translate_celery a model specification into a code +#' `translate_tidyclust()` will translate_tidyclust a model specification into a code #' object that is specific to a particular engine (e.g. R package). -#' It translate_celerys generic parameters to their counterparts. +#' It translate_tidyclusts generic parameters to their counterparts. #' #' @param x A model specification. -#' @param engine The computational engine for the model (see `?set_engine_celery`). +#' @param engine The computational engine for the model (see `?set_engine_tidyclust`). #' @param ... Not currently used. #' @details -#' `translate_celery()` produces a _template_ call that lacks the specific +#' `translate_tidyclust()` produces a _template_ call that lacks the specific #' argument values (such as `data`, etc). These are filled in once #' `fit()` is called with the specifics of the data for the model. #' The call may also include `tune()` arguments if these are in @@ -20,7 +20,7 @@ #' the model fitting function/engine. #' #' This function can be useful when you need to understand how -#' `celery` goes from a generic model specific to a model fitting +#' `tidyclust` goes from a generic model specific to a model fitting #' function. #' #' **Note**: this function is used internally and users should only use it @@ -28,15 +28,15 @@ #' to modify the cluster specification. #' #' @export -translate_celery <- function(x, ...) { - UseMethod("translate_celery") +translate_tidyclust <- function(x, ...) { + UseMethod("translate_tidyclust") } -#' @rdname translate_celery +#' @rdname translate_tidyclust #' @export -#' @export translate_celery.default -translate_celery.default <- function(x, engine = x$engine, ...) { - check_empty_ellipse_celery(...) +#' @export translate_tidyclust.default +translate_tidyclust.default <- function(x, engine = x$engine, ...) { + check_empty_ellipse_tidyclust(...) if (is.null(engine)) { rlang::abort("Please set an engine.") } @@ -87,7 +87,7 @@ translate_celery.default <- function(x, engine = x$engine, ...) { # new code for revised model data structures get_cluster_spec <- function(model, mode, engine) { - m_env <- get_model_env_celery() + m_env <- get_model_env_tidyclust() env_obj <- rlang::env_names(m_env) env_obj <- grep(model, env_obj, value = TRUE) @@ -116,7 +116,7 @@ get_cluster_spec <- function(model, mode, engine) { } get_args <- function(model, engine) { - m_env <- get_model_env_celery() + m_env <- get_model_env_tidyclust() rlang::env_get(m_env, paste0(model, "_args")) %>% dplyr::filter(engine == !!engine) %>% dplyr::select(-engine) @@ -127,9 +127,9 @@ deharmonize <- function(args, key) { if (length(args) == 0) { return(args) } - parsn <- tibble::tibble(celery = names(args), order = seq_along(args)) + parsn <- tibble::tibble(tidyclust = names(args), order = seq_along(args)) merged <- - dplyr::left_join(parsn, key, by = "celery") %>% + dplyr::left_join(parsn, key, by = "tidyclust") %>% dplyr::arrange(order) # TODO correct for bad merge? @@ -142,10 +142,10 @@ deharmonize <- function(args, key) { #' @return If an error is not thrown (from non-empty ellipses), a NULL list. #' @keywords internal #' @export -check_empty_ellipse_celery <- function(...) { +check_empty_ellipse_tidyclust <- function(...) { terms <- quos(...) if (!rlang::is_empty(terms)) { - rlang::abort("Please pass other arguments to the model function via `set_engine_celery()`.") + rlang::abort("Please pass other arguments to the model function via `set_engine_tidyclust()`.") } terms } diff --git a/R/tunable.R b/R/tunable.R index 23d6d532..b42c4615 100644 --- a/R/tunable.R +++ b/R/tunable.R @@ -2,7 +2,7 @@ # Unit tests are in extratests # nocov start tunable_cluster_spec <- function(x, ...) { - mod_env <- rlang::ns_env("celery")$celery + mod_env <- rlang::ns_env("tidyclust")$tidyclust if (is.null(x$engine)) { stop("Please declare an engine first using `set_engine()`.", call. = FALSE) @@ -10,7 +10,7 @@ tunable_cluster_spec <- function(x, ...) { arg_name <- paste0(mod_type(x), "_args") if (!(any(arg_name == names(mod_env)))) { - stop("The `celery` model database doesn't know about the arguments for ", + stop("The `tidyclust` model database doesn't know about the arguments for ", "model `", mod_type(x), "`. Was it registered?", sep = "", call. = FALSE ) @@ -19,7 +19,7 @@ tunable_cluster_spec <- function(x, ...) { arg_vals <- mod_env[[arg_name]] %>% dplyr::filter(engine == x$engine) %>% - dplyr::select(name = celery, call_info = func) %>% + dplyr::select(name = tidyclust, call_info = func) %>% dplyr::full_join( tibble::tibble(name = c(names(x$args), names(x$eng_args))), by = "name" @@ -67,7 +67,7 @@ stats_k_means_engine_args <- "centers" ), call_info = list( - list(pkg = "celery", fun = "k") + list(pkg = "tidyclust", fun = "k") ), source = "cluster_spec", component = "k_means", diff --git a/R/tune_cluster.R b/R/tune_cluster.R index 623e6e82..b63d12b6 100644 --- a/R/tune_cluster.R +++ b/R/tune_cluster.R @@ -4,7 +4,7 @@ #' for a pre-defined set of tuning parameters that correspond to a model or #' recipe across one or more resamples of the data. #' -#' @param object A `celery` model specification or a [workflows::workflow()]. +#' @param object A `tidyclust` model specification or a [workflows::workflow()]. #' @param preprocessor A traditional model formula or a recipe created using #' [recipes::recipe()]. #' @param resamples An `rset()` object. @@ -44,7 +44,7 @@ tune_cluster.default <- function(object, ...) { tune_cluster.cluster_spec <- function(object, preprocessor, resamples, ..., param_info = NULL, grid = 10, metrics = NULL, - control = control_celery()) { + control = control_tidyclust()) { if (rlang::is_missing(preprocessor) || !tune::is_preprocessor(preprocessor)) { rlang::abort(paste( "To tune a model spec, you must preprocess", @@ -189,7 +189,7 @@ tune_cluster_loop <- function(resamples, grid, workflow, metrics, control, rng) # Extract internal function from tune namespace tune_cluster_loop_iter_safely <- utils::getFromNamespace( x = "tune_cluster_loop_iter_safely", - ns = "celery" + ns = "tidyclust" ) tune_cluster_loop_iter_safely( @@ -219,10 +219,10 @@ tune_cluster_loop <- function(resamples, grid, workflow, metrics, control, rng) .errorhandling = "pass", .combine = iter_combine ) %op% { - # Extract internal function from celery namespace + # Extract internal function from tidyclust namespace tune_grid_loop_iter_safely <- utils::getFromNamespace( x = "tune_cluster_loop_iter_safely", - ns = "celery" + ns = "tidyclust" ) grid_info_row <- vctrs::vec_slice(grid_info, row) diff --git a/R/tune_helpers.R b/R/tune_helpers.R index 5fdca2e1..9a5b4a6f 100644 --- a/R/tune_helpers.R +++ b/R/tune_helpers.R @@ -45,7 +45,7 @@ set_workflow <- function(workflow, control) { format(w_size, units = "Mb", digits = 2), " in memory. If this was not intentional, please set the control ", "setting `save_workflow = FALSE`." ) - cols <- get_celery_colors() + cols <- get_tidyclust_colors() msg <- strwrap(msg, prefix = paste0( cols$symbol$info(cli::symbol$info), " " @@ -109,14 +109,14 @@ required_pkgs.cluster_fit <- function(x, infra = TRUE, ...) { get_pkgs <- function(x, infra) { cls <- class(x)[1] pkgs <- - get_from_env_celery(paste0(cls, "_pkgs")) %>% + get_from_env_tidyclust(paste0(cls, "_pkgs")) %>% dplyr::filter(engine == x$engine) res <- pkgs$pkg[[1]] if (length(res) == 0) { res <- character(0) } if (infra) { - infra_pkgs <- c("celery") + infra_pkgs <- c("tidyclust") res <- c(infra_pkgs, res) } res <- unique(res) @@ -264,23 +264,23 @@ catcher <- function(expr) { } siren <- function(x, type = "info") { - celery_color <- get_celery_colors() - types <- names(celery_color$message) + tidyclust_color <- get_tidyclust_colors() + types <- names(tidyclust_color$message) type <- match.arg(type, types) msg <- glue::glue(x) symb <- dplyr::case_when( - type == "warning" ~ celery_color$symbol$warning("!"), - type == "go" ~ celery_color$symbol$go(cli::symbol$pointer), - type == "danger" ~ celery_color$symbol$danger("x"), type == - "success" ~ celery_color$symbol$success(celery_symbol$success), - type == "info" ~ celery_color$symbol$info("i") + type == "warning" ~ tidyclust_color$symbol$warning("!"), + type == "go" ~ tidyclust_color$symbol$go(cli::symbol$pointer), + type == "danger" ~ tidyclust_color$symbol$danger("x"), type == + "success" ~ tidyclust_color$symbol$success(tidyclust_symbol$success), + type == "info" ~ tidyclust_color$symbol$info("i") ) msg <- dplyr::case_when( - type == "warning" ~ celery_color$message$warning(msg), - type == "go" ~ celery_color$message$go(msg), type == "danger" ~ - celery_color$message$danger(msg), type == "success" ~ - celery_color$message$success(msg), type == "info" ~ - celery_color$message$info(msg) + type == "warning" ~ tidyclust_color$message$warning(msg), + type == "go" ~ tidyclust_color$message$go(msg), type == "danger" ~ + tidyclust_color$message$danger(msg), type == "success" ~ + tidyclust_color$message$success(msg), type == "info" ~ + tidyclust_color$message$info(msg) ) if (inherits(msg, "character")) { msg <- as.character(msg) diff --git a/R/zzz.R b/R/zzz.R index b3e715bc..08b70f8c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -29,21 +29,21 @@ vctrs::s3_register("generics::tunable", "k_means", tunable_k_means) } - ns <- rlang::ns_env("celery") + ns <- rlang::ns_env("tidyclust") makeActiveBinding( - "celery_color", + "tidyclust_color", function() { opt <- getOption("tidymodels.dark", NULL) if (!is.null(opt)) { if (isTRUE(opt)) { - return(celery_color_dark) + return(tidyclust_color_dark) } else { - return(celery_color_light) + return(tidyclust_color_light) } } - celery_color_light + tidyclust_color_light }, ns ) diff --git a/README.Rmd b/README.Rmd index b94859cd..5726a9d2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,35 +13,35 @@ knitr::opts_chunk$set( ) ``` -# celery +# tidyclust -[![Codecov test coverage](https://codecov.io/gh/EmilHvitfeldt/celery/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/celery?branch=main) -[![R-CMD-check](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/EmilHvitfeldt/tidyclust/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/tidyclust?branch=main) +[![R-CMD-check](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml) -The goal of celery is to provide a tidy, unified interface to clustering models. The packages is closely modeled after the [parsnip](https://parsnip.tidymodels.org/) package. +The goal of tidyclust is to provide a tidy, unified interface to clustering models. The packages is closely modeled after the [parsnip](https://parsnip.tidymodels.org/) package. ## Installation -You can install the development version of celery from [GitHub](https://github.com/) with: +You can install the development version of tidyclust from [GitHub](https://github.com/) with: ``` r # install.packages("devtools") -devtools::install_github("EmilHvitfeldt/celery") +devtools::install_github("EmilHvitfeldt/tidyclust") ``` -Please note that this package currently requires a [branch of the workflows](https://github.com/tidymodels/workflows/tree/celery) package to work. Use with caution. +Please note that this package currently requires a [branch of the workflows](https://github.com/tidymodels/workflows/tree/tidyclust) package to work. Use with caution. ## Example The first thing you do is to create a `cluster specification`. For this example we are creating a K-means model, using the `stats` engine. ```{r} -library(celery) +library(tidyclust) kmeans_spec <- k_means(k = 3) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_spec ``` @@ -54,7 +54,7 @@ kmeans_spec_fit <- kmeans_spec %>% kmeans_spec_fit ``` -Once you have a fitted celery object, you can do a number of things. `predict()` returns the cluster a new observation belongs to +Once you have a fitted tidyclust object, you can do a number of things. `predict()` returns the cluster a new observation belongs to ```{r} predict(kmeans_spec_fit, mtcars[1:4, ]) diff --git a/README.md b/README.md index 6a8e9787..d6122da2 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,31 @@ -# celery +# tidyclust [![Codecov test -coverage](https://codecov.io/gh/EmilHvitfeldt/celery/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/celery?branch=main) -[![R-CMD-check](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml) +coverage](https://codecov.io/gh/EmilHvitfeldt/tidyclust/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/tidyclust?branch=main) +[![R-CMD-check](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml) -The goal of celery is to provide a tidy, unified interface to clustering +The goal of tidyclust is to provide a tidy, unified interface to clustering models. The packages is closely modeled after the [parsnip](https://parsnip.tidymodels.org/) package. ## Installation -You can install the development version of celery from +You can install the development version of tidyclust from [GitHub](https://github.com/) with: ``` r # install.packages("devtools") -devtools::install_github("EmilHvitfeldt/celery") +devtools::install_github("EmilHvitfeldt/tidyclust") ``` Please note that this package currently requires a [branch of the -workflows](https://github.com/tidymodels/workflows/tree/celery) package +workflows](https://github.com/tidymodels/workflows/tree/tidyclust) package to work. Use with caution. ## Example @@ -34,10 +34,10 @@ The first thing you do is to create a `cluster specification`. For this example we are creating a K-means model, using the `stats` engine. ``` r -library(celery) +library(tidyclust) kmeans_spec <- k_means(k = 3) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_spec #> K Means Cluster Specification (partition) @@ -54,7 +54,7 @@ This specification can then be fit using data. kmeans_spec_fit <- kmeans_spec %>% fit(~., data = mtcars) kmeans_spec_fit -#> celery cluster object +#> tidyclust cluster object #> #> K-means clustering with 3 clusters of sizes 14, 11, 7 #> @@ -96,7 +96,7 @@ kmeans_spec_fit #> [6] "betweenss" "size" "iter" "ifault" ``` -Once you have a fitted celery object, you can do a number of things. +Once you have a fitted tidyclust object, you can do a number of things. `predict()` returns the cluster a new observation belongs to ``` r diff --git a/dev/cross_val_kmeans.R b/dev/cross_val_kmeans.R index 370d29a1..07a0a5cc 100644 --- a/dev/cross_val_kmeans.R +++ b/dev/cross_val_kmeans.R @@ -1,6 +1,6 @@ library(tidymodels) library(tidyverse) -library(celery) +library(tidyclust) ## "Cross-validation" for kmeans @@ -19,7 +19,7 @@ res <- data.frame( for (k in 2:10) { km <- k_means(k = k) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") for (i in 1:5) { @@ -67,7 +67,7 @@ res <- data.frame( for (k in 2:10) { km <- k_means(k = k) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") full_fit <- km %>% fit(~., data = ir) diff --git a/dev/kmeans.Rmd b/dev/kmeans.Rmd index 92421a4c..cc5d8249 100644 --- a/dev/kmeans.Rmd +++ b/dev/kmeans.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ``` ```{r setup} -library(celery) +library(tidyclust) library(palmerpenguins) library(tidymodels) ``` @@ -24,7 +24,7 @@ library(tidymodels) ```{r} kmeans_spec <- k_means(k = 5) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") penguins_rec_1 <- recipe(~ ., data = penguins) %>% update_role(species, island, new_role = "demographic") %>% @@ -66,7 +66,7 @@ kmeans_fit %>% * Missing values should probably return an NA prediction. Or for k-means, imputation isn't crazy... -* We want a consistent return, and k-means is randomized. We should agree to a consistent default ordering throughout celery - maybe by size (number of members) or something? +* We want a consistent return, and k-means is randomized. We should agree to a consistent default ordering throughout tidyclust - maybe by size (number of members) or something? ```{r} @@ -98,7 +98,7 @@ something with indices ```{r} recipe( ~ demo1 + predictor1) %>% - step_celery(kmeans_fit) # doesn't quite make sense + step_tidyclust(kmeans_fit) # doesn't quite make sense ``` diff --git a/man/add_on_exports.Rd b/man/add_on_exports.Rd index 7d4b1017..fe705e1f 100644 --- a/man/add_on_exports.Rd +++ b/man/add_on_exports.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/cluster_spec.R \name{new_cluster_spec} \alias{new_cluster_spec} -\title{Functions required for celery-adjacent packages} +\title{Functions required for tidyclust-adjacent packages} \usage{ new_cluster_spec(cls, args, eng_args, mode, method, engine) } diff --git a/man/avg_silhouette.Rd b/man/avg_silhouette.Rd index 1cc1c8e7..2b74cf1f 100644 --- a/man/avg_silhouette.Rd +++ b/man/avg_silhouette.Rd @@ -13,7 +13,7 @@ avg_silhouette( ) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -32,7 +32,7 @@ Measures average silhouette across all observations } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/check_empty_ellipse_celery.Rd b/man/check_empty_ellipse_tidyclust.Rd similarity index 75% rename from man/check_empty_ellipse_celery.Rd rename to man/check_empty_ellipse_tidyclust.Rd index ec2748d0..dc92408d 100644 --- a/man/check_empty_ellipse_celery.Rd +++ b/man/check_empty_ellipse_tidyclust.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/translate.R -\name{check_empty_ellipse_celery} -\alias{check_empty_ellipse_celery} +\name{check_empty_ellipse_tidyclust} +\alias{check_empty_ellipse_tidyclust} \title{Check to ensure that ellipses are empty} \usage{ -check_empty_ellipse_celery(...) +check_empty_ellipse_tidyclust(...) } \arguments{ \item{...}{Extra arguments.} diff --git a/man/control_celery.Rd b/man/control_tidyclust.Rd similarity index 84% rename from man/control_celery.Rd rename to man/control_tidyclust.Rd index efd1b3e8..c3cb31e4 100644 --- a/man/control_celery.Rd +++ b/man/control_tidyclust.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/control.R -\name{control_celery} -\alias{control_celery} +\name{control_tidyclust} +\alias{control_tidyclust} \title{Control the fit function} \usage{ -control_celery(verbosity = 1L, catch = FALSE) +control_tidyclust(verbosity = 1L, catch = FALSE) } \arguments{ \item{verbosity}{An integer where a value of zero indicates that no messages @@ -19,7 +19,7 @@ of \code{try(, silent = TRUE)}. If the model fails, an object is still returned (without an error) that inherits the class "try-error".} } \value{ -An S3 object with class "control_celery" that is a named list with +An S3 object with class "control_tidyclust" that is a named list with the results of the function call } \description{ diff --git a/man/convert_helpers.Rd b/man/convert_helpers.Rd index a28e4a63..613663d9 100644 --- a/man/convert_helpers.Rd +++ b/man/convert_helpers.Rd @@ -52,7 +52,7 @@ column after \code{model.matrix()} is finished.} \item{x}{A matrix, sparse matrix, or data frame of predictors. Only some models have support for sparse matrix input. See -\code{celery::get_encoding_celery()} for details. \code{x} should have column names.} +\code{tidyclust::get_encoding_tidyclust()} for details. \code{x} should have column names.} \item{weights}{A numeric vector containing the weights.} diff --git a/man/extract_centroids.Rd b/man/extract_centroids.Rd index 8b58c1e5..0386b666 100644 --- a/man/extract_centroids.Rd +++ b/man/extract_centroids.Rd @@ -17,7 +17,7 @@ Extract clusters from model \examples{ set.seed(1234) kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/extract_cluster_assignment.Rd b/man/extract_cluster_assignment.Rd index 6df77e01..803fc1d2 100644 --- a/man/extract_cluster_assignment.Rd +++ b/man/extract_cluster_assignment.Rd @@ -16,7 +16,7 @@ Extract cluster assignments from model } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/extract_fit_summary.Rd b/man/extract_fit_summary.Rd index bfefbb20..8de621ca 100644 --- a/man/extract_fit_summary.Rd +++ b/man/extract_fit_summary.Rd @@ -19,7 +19,7 @@ S3 method to get fitted model summary info depending on engine } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/fit.Rd b/man/fit.Rd index 1b13f583..58ca701b 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -5,13 +5,13 @@ \alias{fit_xy.cluster_spec} \title{Fit a Model Specification to a Data Set} \usage{ -\method{fit}{cluster_spec}(object, formula, data, control = control_celery(), ...) +\method{fit}{cluster_spec}(object, formula, data, control = control_tidyclust(), ...) -\method{fit_xy}{cluster_spec}(object, x, case_weights = NULL, control = control_celery(), ...) +\method{fit_xy}{cluster_spec}(object, x, case_weights = NULL, control = control_tidyclust(), ...) } \arguments{ \item{object}{An object of class \code{cluster_spec} that has a chosen engine (via -\code{\link[=set_engine_celery]{set_engine_celery()}}).} +\code{\link[=set_engine_tidyclust]{set_engine_tidyclust()}}).} \item{formula}{An object of class \code{formula} (or one that can be coerced to that class): a symbolic description of the model to be fitted.} @@ -21,15 +21,15 @@ frame containing all relevant variables (e.g. outcome(s), predictors, case weights, etc). Note: when needed, a \emph{named argument} should be used.} \item{control}{A named list with elements \code{verbosity} and \code{catch}. See -\code{\link[=control_celery]{control_celery()}}.} +\code{\link[=control_tidyclust]{control_tidyclust()}}.} \item{...}{Not currently used; values passed here will be ignored. Other options required to fit the model should be passed using -\code{set_engine_celery()}.} +\code{set_engine_tidyclust()}.} \item{x}{A matrix, sparse matrix, or data frame of predictors. Only some models have support for sparse matrix input. See -\code{celery::get_encoding_celery()} for details. \code{x} should have column names.} +\code{tidyclust::get_encoding_tidyclust()} for details. \code{x} should have column names.} \item{case_weights}{An optional classed vector of numeric case weights. This must return \code{TRUE} when \code{\link[hardhat:is_case_weights]{hardhat::is_case_weights()}} is run on it. See @@ -54,7 +54,7 @@ The return value will also have a class related to the fitted model (e.g. \code{"_kmeans"}) before the base class of \code{"cluster_fit"}. } \description{ -\code{fit()} and \code{fit_xy()} take a model specification, translate_celery the +\code{fit()} and \code{fit_xy()} take a model specification, translate_tidyclust the required code by substituting arguments, and execute the model fit routine. } \details{ @@ -62,7 +62,7 @@ required code by substituting arguments, and execute the model fit routine. model specification into the computational engine's code, check them for validity, then fit the model using the data and the engine-specific code. Different model functions have different interfaces (e.g. formula or -\code{x}/\code{y}) and these functions translate_celery between the interface used +\code{x}/\code{y}) and these functions translate_tidyclust between the interface used when \code{fit()} or \code{fit_xy()} was invoked and the one required by the underlying model. @@ -76,7 +76,7 @@ used to fit the model. If the model engine has not been set, the model's default engine will be used (as discussed on each model page). If the \code{verbosity} option of -\code{\link[=control_celery]{control_celery()}} is greater than zero, a warning will be produced. +\code{\link[=control_tidyclust]{control_tidyclust()}} is greater than zero, a warning will be produced. If you would like to use an alternative method for generating contrasts when supplying a formula to \code{fit()}, set the global option \code{contrasts} to @@ -90,18 +90,18 @@ kmeans_mod <- k_means(k = 5) using_formula <- kmeans_mod \%>\% - set_engine_celery("stats") \%>\% + set_engine_tidyclust("stats") \%>\% fit(~., data = mtcars) using_x <- kmeans_mod \%>\% - set_engine_celery("stats") \%>\% + set_engine_tidyclust("stats") \%>\% fit_xy(x = mtcars) using_formula using_x } \seealso{ -\code{\link[=set_engine_celery]{set_engine_celery()}}, \code{\link[=control_celery]{control_celery()}}, \code{cluster_spec}, +\code{\link[=set_engine_tidyclust]{set_engine_tidyclust()}}, \code{\link[=control_tidyclust]{control_tidyclust()}}, \code{cluster_spec}, \code{cluster_fit} } diff --git a/man/get_model_env_celery.Rd b/man/get_model_env_tidyclust.Rd similarity index 66% rename from man/get_model_env_celery.Rd rename to man/get_model_env_tidyclust.Rd index 1e1bd0f6..3107d44e 100644 --- a/man/get_model_env_celery.Rd +++ b/man/get_model_env_tidyclust.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/aaa_models.R -\name{get_model_env_celery} -\alias{get_model_env_celery} -\alias{get_from_env_celery} -\alias{set_env_val_celery} -\title{Working with the celery model environment} +\name{get_model_env_tidyclust} +\alias{get_model_env_tidyclust} +\alias{get_from_env_tidyclust} +\alias{set_env_val_tidyclust} +\title{Working with the tidyclust model environment} \usage{ -get_model_env_celery() +get_model_env_tidyclust() -get_from_env_celery(items) +get_from_env_tidyclust(items) -set_env_val_celery(name, value) +set_env_val_tidyclust(name, value) } \arguments{ \item{items}{A character string of objects in the model environment.} @@ -27,7 +27,7 @@ information about model specifications. } \examples{ # Access the model data: -current_code <- get_model_env_celery() +current_code <- get_model_env_tidyclust() ls(envir = current_code) } \keyword{internal} diff --git a/man/glance.cluster_fit.Rd b/man/glance.cluster_fit.Rd index a27dfab0..bf9d9f67 100644 --- a/man/glance.cluster_fit.Rd +++ b/man/glance.cluster_fit.Rd @@ -15,5 +15,5 @@ a tibble } \description{ -This method glances the model in a celery model object, if it exists. +This method glances the model in a tidyclust model object, if it exists. } diff --git a/man/make_classes_celery.Rd b/man/make_classes_tidyclust.Rd similarity index 77% rename from man/make_classes_celery.Rd rename to man/make_classes_tidyclust.Rd index a992ce3c..20104e89 100644 --- a/man/make_classes_celery.Rd +++ b/man/make_classes_tidyclust.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster_spec.R -\name{make_classes_celery} -\alias{make_classes_celery} +\name{make_classes_tidyclust} +\alias{make_classes_tidyclust} \title{Prepend a new class} \usage{ -make_classes_celery(prefix) +make_classes_tidyclust(prefix) } \arguments{ \item{prefix}{A character string for a class.} diff --git a/man/predict.cluster_fit.Rd b/man/predict.cluster_fit.Rd index 99e345b9..6672a761 100644 --- a/man/predict.cluster_fit.Rd +++ b/man/predict.cluster_fit.Rd @@ -60,7 +60,7 @@ section below) in a tibble output format. } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/set_engine_celery.Rd b/man/set_engine_tidyclust.Rd similarity index 77% rename from man/set_engine_celery.Rd rename to man/set_engine_tidyclust.Rd index ce0e154b..5c4ce363 100644 --- a/man/set_engine_celery.Rd +++ b/man/set_engine_tidyclust.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/engines.R -\name{set_engine_celery} -\alias{set_engine_celery} +\name{set_engine_tidyclust} +\alias{set_engine_tidyclust} \title{Declare a computational engine and specific arguments} \usage{ -set_engine_celery(object, engine, ...) +set_engine_tidyclust(object, engine, ...) } \arguments{ \item{object}{A model specification.} @@ -20,7 +20,7 @@ engine. These are captured as quosures and can be \code{tune()}.} An updated model specification. } \description{ -\code{set_engine_celery()} is used to specify which package or system will be used +\code{set_engine_tidyclust()} is used to specify which package or system will be used to fit the model, along with any arguments specific to that software. } \section{Engines}{ @@ -33,7 +33,7 @@ engines available to each model specification. # First, set general arguments using the standardized names mod <- k_means(k = 10) \%>\% # now say how you want to fit the model and another other options - set_engine_celery("stats", iter.max = 15) + set_engine_tidyclust("stats", iter.max = 15) -translate_celery(mod, engine = "stats") +translate_tidyclust(mod, engine = "stats") } diff --git a/man/set_new_model_celery.Rd b/man/set_new_model_tidyclust.Rd similarity index 72% rename from man/set_new_model_celery.Rd rename to man/set_new_model_tidyclust.Rd index 50cc8bfb..87ee9f39 100644 --- a/man/set_new_model_celery.Rd +++ b/man/set_new_model_tidyclust.Rd @@ -1,52 +1,52 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/aaa_models.R -\name{set_new_model_celery} -\alias{set_new_model_celery} -\alias{check_model_doesnt_exist_celery} -\alias{set_model_mode_celery} -\alias{check_model_exists_celery} -\alias{set_model_engine_celery} -\alias{set_dependency_celery} -\alias{get_dependency_celery} -\alias{set_fit_celery} -\alias{get_fit_celery} -\alias{get_encoding_celery} -\alias{set_encoding_celery} -\alias{set_model_arg_celery} -\alias{show_model_info_celery} -\alias{set_pred_celery} -\alias{get_pred_type_celery} +\name{set_new_model_tidyclust} +\alias{set_new_model_tidyclust} +\alias{check_model_doesnt_exist_tidyclust} +\alias{set_model_mode_tidyclust} +\alias{check_model_exists_tidyclust} +\alias{set_model_engine_tidyclust} +\alias{set_dependency_tidyclust} +\alias{get_dependency_tidyclust} +\alias{set_fit_tidyclust} +\alias{get_fit_tidyclust} +\alias{get_encoding_tidyclust} +\alias{set_encoding_tidyclust} +\alias{set_model_arg_tidyclust} +\alias{show_model_info_tidyclust} +\alias{set_pred_tidyclust} +\alias{get_pred_type_tidyclust} \title{Tools to Register Models} \usage{ -set_new_model_celery(model) +set_new_model_tidyclust(model) -check_model_doesnt_exist_celery(model) +check_model_doesnt_exist_tidyclust(model) -set_model_mode_celery(model, mode) +set_model_mode_tidyclust(model, mode) -check_model_exists_celery(model) +check_model_exists_tidyclust(model) -set_model_engine_celery(model, mode, eng) +set_model_engine_tidyclust(model, mode, eng) -set_dependency_celery(model, eng, pkg = "celery", mode = NULL) +set_dependency_tidyclust(model, eng, pkg = "tidyclust", mode = NULL) -get_dependency_celery(model) +get_dependency_tidyclust(model) -set_fit_celery(model, mode, eng, value) +set_fit_tidyclust(model, mode, eng, value) -get_fit_celery(model) +get_fit_tidyclust(model) -get_encoding_celery(model) +get_encoding_tidyclust(model) -set_encoding_celery(model, mode, eng, options) +set_encoding_tidyclust(model, mode, eng, options) -set_model_arg_celery(model, eng, celery, original, func, has_submodel) +set_model_arg_tidyclust(model, eng, tidyclust, original, func, has_submodel) -show_model_info_celery(model) +show_model_info_tidyclust(model) -set_pred_celery(model, mode, eng, type, value) +set_pred_tidyclust(model, mode, eng, type, value) -get_pred_type_celery(model, type) +get_pred_type_tidyclust(model, type) } \arguments{ \item{model}{A single character string for the model type (e.g. @@ -64,8 +64,8 @@ below, depending on context.} \item{options}{A list of options for engine-specific preprocessing encodings. See Details below.} -\item{celery}{A single character string for the "harmonized" argument name -that \code{celery} exposes.} +\item{tidyclust}{A single character string for the "harmonized" argument name +that \code{tidyclust} exposes.} \item{original}{A single character string for the argument name that underlying model function uses.} @@ -88,7 +88,7 @@ values are: \code{cluster} and \code{raw}.} \item{fit_obj}{A list with elements \code{interface}, \code{protect}, \code{func} and \code{defaults}. See the package vignette "Making a -\code{celery} model from scratch".} +\code{tidyclust} model from scratch".} \item{pred_obj}{A list with elements \code{pre}, \code{post}, \code{func}, and \code{args}.} @@ -106,21 +106,21 @@ package. \details{ These functions are available for users to add their own models or engines (in a package or otherwise) so that they can be accessed using -\code{celery}. +\code{tidyclust}. -In short, \code{celery} stores an environment object that contains all of the +In short, \code{tidyclust} stores an environment object that contains all of the information and code about how models are used (e.g. fitting, predicting, etc). These functions can be used to add models to that environment as well as helper functions that can be used to makes sure that the model data is in the right format. -\code{check_model_exists_celery()} checks the model value and ensures that the -model has already been registered. \code{check_model_doesnt_exist_celery()} +\code{check_model_exists_tidyclust()} checks the model value and ensures that the +model has already been registered. \code{check_model_doesnt_exist_tidyclust()} checks the model value and also checks to see if it is novel in the environment. The options for engine-specific encodings dictate how the predictors should -be handled. These options ensure that the data that \code{celery} gives to the +be handled. These options ensure that the data that \code{tidyclust} gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly. @@ -154,9 +154,9 @@ accommodate a sparse matrix representation for predictors during fitting and tuning. } \examples{ -# set_new_model_celery("shallow_learning_model") +# set_new_model_tidyclust("shallow_learning_model") # Show the information about a model: -show_model_info_celery("k_means") +show_model_info_tidyclust("k_means") } \keyword{internal} diff --git a/man/silhouettes.Rd b/man/silhouettes.Rd index cdc738a7..e48c980e 100644 --- a/man/silhouettes.Rd +++ b/man/silhouettes.Rd @@ -7,7 +7,7 @@ silhouettes(object, new_data = NULL, dists = NULL, dist_fun = Rfast::Dist) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -24,7 +24,7 @@ Measures silhouettes between clusters } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/sse_ratio.Rd b/man/sse_ratio.Rd index 9662476a..209a2618 100644 --- a/man/sse_ratio.Rd +++ b/man/sse_ratio.Rd @@ -7,7 +7,7 @@ sse_ratio(object, new_data = NULL, dist_fun = Rfast::dista, ...) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -21,7 +21,7 @@ Compute the ratio of the WSS to the total SSE } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/tidy.cluster_fit.Rd b/man/tidy.cluster_fit.Rd index a2e76274..2a282f22 100644 --- a/man/tidy.cluster_fit.Rd +++ b/man/tidy.cluster_fit.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tidy.R \name{tidy.cluster_fit} \alias{tidy.cluster_fit} -\title{Turn a celery model object into a tidy tibble} +\title{Turn a tidyclust model object into a tidy tibble} \usage{ \method{tidy}{cluster_fit}(x, ...) } @@ -15,5 +15,5 @@ a tibble } \description{ -This method tidies the model in a celery model object, if it exists. +This method tidies the model in a tidyclust model object, if it exists. } diff --git a/man/celery-package.Rd b/man/tidyclust-package.Rd similarity index 58% rename from man/celery-package.Rd rename to man/tidyclust-package.Rd index b0252df8..8f5fce43 100644 --- a/man/celery-package.Rd +++ b/man/tidyclust-package.Rd @@ -1,18 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/celery-package.R +% Please edit documentation in R/tidyclust-package.R \docType{package} -\name{celery-package} -\alias{celery} -\alias{celery-package} -\title{celery: What the Package Does (One Line, Title Case)} +\name{tidyclust-package} +\alias{tidyclust} +\alias{tidyclust-package} +\title{tidyclust: What the Package Does (One Line, Title Case)} \description{ What the package does (one paragraph). } \seealso{ Useful links: \itemize{ - \item \url{https://github.com/EmilHvitfeldt/celery} - \item Report bugs at \url{https://github.com/EmilHvitfeldt/celery/issues} + \item \url{https://github.com/EmilHvitfeldt/tidyclust} + \item Report bugs at \url{https://github.com/EmilHvitfeldt/tidyclust/issues} } } diff --git a/man/tot_sse.Rd b/man/tot_sse.Rd index 57984a75..dd87f318 100644 --- a/man/tot_sse.Rd +++ b/man/tot_sse.Rd @@ -7,7 +7,7 @@ tot_sse(object, new_data = NULL, dist_fun = Rfast::dista, ...) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -21,7 +21,7 @@ Compute the total sum of squares } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/tot_wss.Rd b/man/tot_wss.Rd index acadd082..8ea6940b 100644 --- a/man/tot_wss.Rd +++ b/man/tot_wss.Rd @@ -7,7 +7,7 @@ tot_wss(object, new_data = NULL, dist_fun = Rfast::dista, ...) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -21,7 +21,7 @@ Compute the sum of within-cluster SSE } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/translate_celery.Rd b/man/translate_tidyclust.Rd similarity index 69% rename from man/translate_celery.Rd rename to man/translate_tidyclust.Rd index ce9d754c..ddf59773 100644 --- a/man/translate_celery.Rd +++ b/man/translate_tidyclust.Rd @@ -1,28 +1,28 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/translate.R -\name{translate_celery} -\alias{translate_celery} -\alias{translate_celery.default} +\name{translate_tidyclust} +\alias{translate_tidyclust} +\alias{translate_tidyclust.default} \title{Resolve a Model Specification for a Computational Engine} \usage{ -translate_celery(x, ...) +translate_tidyclust(x, ...) -\method{translate_celery}{default}(x, engine = x$engine, ...) +\method{translate_tidyclust}{default}(x, engine = x$engine, ...) } \arguments{ \item{x}{A model specification.} \item{...}{Not currently used.} -\item{engine}{The computational engine for the model (see \code{?set_engine_celery}).} +\item{engine}{The computational engine for the model (see \code{?set_engine_tidyclust}).} } \description{ -\code{translate_celery()} will translate_celery a model specification into a code +\code{translate_tidyclust()} will translate_tidyclust a model specification into a code object that is specific to a particular engine (e.g. R package). -It translate_celerys generic parameters to their counterparts. +It translate_tidyclusts generic parameters to their counterparts. } \details{ -\code{translate_celery()} produces a \emph{template} call that lacks the specific +\code{translate_tidyclust()} produces a \emph{template} call that lacks the specific argument values (such as \code{data}, etc). These are filled in once \code{fit()} is called with the specifics of the data for the model. The call may also include \code{tune()} arguments if these are in @@ -34,7 +34,7 @@ It does contain the resolved argument names that are specific to the model fitting function/engine. This function can be useful when you need to understand how -\code{celery} goes from a generic model specific to a model fitting +\code{tidyclust} goes from a generic model specific to a model fitting function. \strong{Note}: this function is used internally and users should only use it diff --git a/man/tune_cluster.Rd b/man/tune_cluster.Rd index 7c569545..c753a237 100644 --- a/man/tune_cluster.Rd +++ b/man/tune_cluster.Rd @@ -16,7 +16,7 @@ tune_cluster(object, ...) param_info = NULL, grid = 10, metrics = NULL, - control = control_celery() + control = control_tidyclust() ) \method{tune_cluster}{workflow}( @@ -30,7 +30,7 @@ tune_cluster(object, ...) ) } \arguments{ -\item{object}{A \code{celery} model specification or a \code{\link[workflows:workflow]{workflows::workflow()}}.} +\item{object}{A \code{tidyclust} model specification or a \code{\link[workflows:workflow]{workflows::workflow()}}.} \item{...}{Not currently used.} diff --git a/man/within_cluster_sse.Rd b/man/within_cluster_sse.Rd index ebb08445..41e0dc4b 100644 --- a/man/within_cluster_sse.Rd +++ b/man/within_cluster_sse.Rd @@ -7,7 +7,7 @@ within_cluster_sse(object, new_data = NULL, dist_fun = Rfast::dista) } \arguments{ -\item{object}{A fitted kmeans celery model} +\item{object}{A fitted kmeans tidyclust model} \item{new_data}{A dataset to predict on. If \code{NULL}, uses trained clustering.} @@ -23,7 +23,7 @@ Calculates Sum of Squared Error in each cluster } \examples{ kmeans_spec <- k_means(k = 5) \%>\% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/tests/testthat.R b/tests/testthat.R index 726dc8b1..c393d896 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) -library(celery) +library(tidyclust) -test_check("celery") +test_check("tidyclust") diff --git a/tests/testthat/_snaps/k_means.md b/tests/testthat/_snaps/k_means.md index cd6da455..9bada046 100644 --- a/tests/testthat/_snaps/k_means.md +++ b/tests/testthat/_snaps/k_means.md @@ -8,7 +8,7 @@ --- Code - bt <- k_means(k = -1) %>% set_engine_celery("stats") + bt <- k_means(k = -1) %>% set_engine_tidyclust("stats") fit(bt, mpg ~ ., mtcars) Error invalid 'size' argument @@ -16,14 +16,14 @@ --- Code - translate_celery(k_means(), engine = NULL) + translate_tidyclust(k_means(), engine = NULL) Error Please set an engine. --- Code - translate_celery(k_means(formula = ~x)) + translate_tidyclust(k_means(formula = ~x)) Error unused argument (formula = ~x) diff --git a/tests/testthat/_snaps/registration.md b/tests/testthat/_snaps/registration.md index 77d95dfc..54608284 100644 --- a/tests/testthat/_snaps/registration.md +++ b/tests/testthat/_snaps/registration.md @@ -1,77 +1,77 @@ # adding a new model Code - set_new_model_celery() + set_new_model_tidyclust() Error Please supply a character string for a model name (e.g. `'k_means'`) --- Code - set_new_model_celery(2) + set_new_model_tidyclust(2) Error Please supply a character string for a model name (e.g. `'k_means'`) --- Code - set_new_model_celery(letters[1:2]) + set_new_model_tidyclust(letters[1:2]) Error Please supply a character string for a model name (e.g. `'k_means'`) # adding a new mode Code - set_model_mode_celery("sponge") + set_model_mode_tidyclust("sponge") Error Please supply a character string for a mode (e.g. `'partition'`). # adding a new engine Code - set_model_engine_celery("sponge", eng = "gum") + set_model_engine_tidyclust("sponge", eng = "gum") Error Please supply a character string for a mode (e.g. `'partition'`). --- Code - set_model_engine_celery("sponge", mode = "partition") + set_model_engine_tidyclust("sponge", mode = "partition") Error Please supply a character string for an engine name (e.g. `'stats'`) --- Code - set_model_engine_celery("sponge", mode = "regression", eng = "gum") + set_model_engine_tidyclust("sponge", mode = "regression", eng = "gum") Error 'regression' is not a known mode for model `sponge()`. # adding a new package Code - set_dependency_celery("sponge", "gum", letters[1:2]) + set_dependency_tidyclust("sponge", "gum", letters[1:2]) Error Please supply a single character value for the package name. --- Code - set_dependency_celery("sponge", "gummies", "trident") + set_dependency_tidyclust("sponge", "gummies", "trident") Error The engine 'gummies' has not been registered for model 'sponge'. --- Code - set_dependency_celery("sponge", "gum", "trident", mode = "regression") + set_dependency_tidyclust("sponge", "gum", "trident", mode = "regression") Error mode 'regression' is not a valid mode for 'sponge' # adding a new argument Code - set_model_arg_celery(model = "lunchroom", eng = "gum", celery = "modeling", + set_model_arg_tidyclust(model = "lunchroom", eng = "gum", tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE) Error Model `lunchroom` has not been registered. @@ -79,15 +79,15 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "modeling", func = list( - pkg = "foo", fun = "bar"), has_submodel = FALSE) + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "modeling", + func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE) Error Please supply a character string for the argument. --- Code - set_model_arg_celery(model = "sponge", eng = "gum", original = "modelling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE) Error Please supply a character string for the argument. @@ -95,7 +95,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "modeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "modeling", original = "modelling", func = "foo::bar", has_submodel = FALSE) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. @@ -103,7 +103,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "modeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = 2) Error The `submodels` argument should be a single logical. @@ -111,7 +111,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "modeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar")) Error argument "has_submodel" is missing, with no default @@ -119,7 +119,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "yodeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "yodeling", original = "yodelling", func = c(foo = "a", bar = "b"), has_submodel = FALSE) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. @@ -127,7 +127,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "yodeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "yodeling", original = "yodelling", func = c(foo = "a"), has_submodel = FALSE) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. @@ -135,7 +135,7 @@ --- Code - set_model_arg_celery(model = "sponge", eng = "gum", celery = "yodeling", + set_model_arg_tidyclust(model = "sponge", eng = "gum", tidyclust = "yodeling", original = "yodelling", func = c(fun = 2, pkg = 1), has_submodel = FALSE) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. @@ -143,28 +143,28 @@ # adding a new fit Code - set_fit_celery(model = "cactus", eng = "gum", mode = "partition", value = fit_vals) + set_fit_tidyclust(model = "cactus", eng = "gum", mode = "partition", value = fit_vals) Error Model `cactus` has not been registered. --- Code - set_fit_celery(model = "sponge", eng = "nose", mode = "partition", value = fit_vals) + set_fit_tidyclust(model = "sponge", eng = "nose", mode = "partition", value = fit_vals) Error Engine 'nose' is not supported for `sponge()`. See `show_engines('sponge')`. --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "frog", value = fit_vals) + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "frog", value = fit_vals) Error 'frog' is not a known mode for model `sponge()`. --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ -i]) Error The `fit` module should have elements: `defaults`, `func`, `interface`, `protect` @@ -172,7 +172,7 @@ --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ -i]) Error The `fit` module should have elements: `defaults`, `func`, `interface`, `protect` @@ -180,7 +180,7 @@ --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ -i]) Error The `fit` module should have elements: `defaults`, `func`, `interface`, `protect` @@ -188,7 +188,7 @@ --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals[ -i]) Error The `fit` module should have elements: `defaults`, `func`, `interface`, `protect` @@ -196,35 +196,35 @@ --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_0) + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_0) Error The `interface` element should have a single value of: `data.frame`, `formula`, `matrix` --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_1) + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_1) Error The `defaults` element should be a list: --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_2) + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_2) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. --- Code - set_fit_celery(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_3) + set_fit_tidyclust(model = "sponge", eng = "gum", mode = "partition", value = fit_vals_3) Error The `interface` element should have a single value of: `data.frame`, `formula`, `matrix` # adding a new predict method Code - set_pred_celery(model = "cactus", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "cactus", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals) Error Model `cactus` has not been registered. @@ -232,7 +232,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "nose", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "nose", mode = "partition", type = "cluster", value = cluster_vals) Error Engine 'nose' is not supported for `sponge()`. See `show_engines('sponge')`. @@ -240,7 +240,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "eggs", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "eggs", value = cluster_vals) Error The prediction type should be one of: 'cluster', 'raw' @@ -248,7 +248,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "frog", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "frog", type = "cluster", value = cluster_vals) Error 'frog' is not a known mode for model `sponge()`. @@ -256,7 +256,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals[-i]) Error The `predict` module should have elements: `args`, `func`, `post`, `pre` @@ -264,7 +264,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals[-i]) Error The `predict` module should have elements: `args`, `func`, `post`, `pre` @@ -272,7 +272,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals[-i]) Error The `predict` module should have elements: `args`, `func`, `post`, `pre` @@ -280,7 +280,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals[-i]) Error The `predict` module should have elements: `args`, `func`, `post`, `pre` @@ -288,7 +288,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals_0) Error The `pre` module should be null or a function: @@ -296,7 +296,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals_1) Error The `post` module should be null or a function: @@ -304,7 +304,7 @@ --- Code - set_pred_celery(model = "sponge", eng = "gum", mode = "partition", type = "cluster", + set_pred_tidyclust(model = "sponge", eng = "gum", mode = "partition", type = "cluster", value = cluster_vals_2) Error `func` should be a named vector with element 'fun' and the optional elements 'pkg', 'range', 'trans', and 'values'. `func` and 'pkg' should both be single character strings. @@ -312,7 +312,7 @@ # showing model info Code - show_model_info_celery("k_means") + show_model_info_tidyclust("k_means") Output Information for `k_means` modes: unknown, partition diff --git a/tests/testthat/helper-celery-package.R b/tests/testthat/helper-tidyclust-package.R similarity index 94% rename from tests/testthat/helper-celery-package.R rename to tests/testthat/helper-tidyclust-package.R index a1aac0d5..a7a354cf 100644 --- a/tests/testthat/helper-celery-package.R +++ b/tests/testthat/helper-tidyclust-package.R @@ -1,6 +1,6 @@ new_rng_snapshots <- utils::compareVersion("3.6.0", as.character(getRversion())) > 0 -helper_objects_celery <- function() { +helper_objects_tidyclust <- function() { rec_tune_1 <- recipes::recipe(mpg ~ ., data = mtcars) %>% recipes::step_normalize(recipes::all_predictors()) %>% diff --git a/tests/testthat/test-control.R b/tests/testthat/test-control.R index 3c684373..e2484389 100644 --- a/tests/testthat/test-control.R +++ b/tests/testthat/test-control.R @@ -1,7 +1,7 @@ test_that("control class", { skip("waiting for workflow PR") - x <- k_means(k = 5) %>% set_engine_celery("stats") - ctrl <- control_celery() + x <- k_means(k = 5) %>% set_engine_tidyclust("stats") + ctrl <- control_tidyclust() class(ctrl) <- c("potato", "chair") expect_snapshot( error = TRUE, diff --git a/tests/testthat/test-extract_summary.R b/tests/testthat/test-extract_summary.R index 5ec07434..d81973c4 100644 --- a/tests/testthat/test-extract_summary.R +++ b/tests/testthat/test-extract_summary.R @@ -1,10 +1,10 @@ test_that("extract summary works for kmeans", { obj1 <- k_means(k = mtcars[1:3, ]) %>% - set_engine_celery("stats", algorithm = "MacQueen") %>% + set_engine_tidyclust("stats", algorithm = "MacQueen") %>% fit(~., mtcars) obj2 <- k_means(k = 3) %>% - set_engine_celery("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% + set_engine_tidyclust("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) summ1 <- extract_fit_summary(obj1) diff --git a/tests/testthat/test-k_means.R b/tests/testthat/test-k_means.R index 36e6fe3f..7393b482 100644 --- a/tests/testthat/test-k_means.R +++ b/tests/testthat/test-k_means.R @@ -1,6 +1,6 @@ test_that("primary arguments", { basic <- k_means(mode = "partition") - basic_stats <- translate_celery(basic %>% set_engine_celery("stats")) + basic_stats <- translate_tidyclust(basic %>% set_engine_tidyclust("stats")) expect_equal( basic_stats$method$fit$args, list( @@ -10,7 +10,7 @@ test_that("primary arguments", { ) k <- k_means(k = 15, mode = "partition") - k_stats <- translate_celery(k %>% set_engine_celery("stats")) + k_stats <- translate_tidyclust(k %>% set_engine_tidyclust("stats")) expect_equal( k_stats$method$fit$args, list( @@ -24,9 +24,9 @@ test_that("primary arguments", { test_that("engine arguments", { stats_print <- k_means(mode = "partition") expect_equal( - translate_celery( + translate_tidyclust( stats_print %>% - set_engine_celery("stats", nstart = 1L) + set_engine_tidyclust("stats", nstart = 1L) )$method$fit$args, list( x = expr(missing_arg()), @@ -39,17 +39,17 @@ test_that("engine arguments", { test_that("bad input", { expect_snapshot(error = TRUE, k_means(mode = "bogus")) expect_snapshot(error = TRUE, { - bt <- k_means(k = -1) %>% set_engine_celery("stats") + bt <- k_means(k = -1) %>% set_engine_tidyclust("stats") fit(bt, mpg ~ ., mtcars) }) - expect_snapshot(error = TRUE, translate_celery(k_means(), engine = NULL)) - expect_snapshot(error = TRUE, translate_celery(k_means(formula = ~x))) + expect_snapshot(error = TRUE, translate_tidyclust(k_means(), engine = NULL)) + expect_snapshot(error = TRUE, translate_tidyclust(k_means(formula = ~x))) }) test_that("predictions", { set.seed(1234) kmeans_fit <- k_means(k = 4) %>% - set_engine_celery("stats") %>% + set_engine_tidyclust("stats") %>% fit(~., mtcars) set.seed(1234) diff --git a/tests/testthat/test-kmeans_diagnostics.R b/tests/testthat/test-kmeans_diagnostics.R index f029453d..07fb8e4f 100644 --- a/tests/testthat/test-kmeans_diagnostics.R +++ b/tests/testthat/test-kmeans_diagnostics.R @@ -1,10 +1,10 @@ test_that("kmeans sse metrics work", { kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% - set_engine_celery("stats", algorithm = "MacQueen") %>% + set_engine_tidyclust("stats", algorithm = "MacQueen") %>% fit(~., mtcars) kmeans_fit_ClusterR <- k_means(k = 3) %>% - set_engine_celery("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% + set_engine_tidyclust("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) km_orig <- kmeans(mtcars, centers = mtcars[1:3, ], algorithm = "MacQueen") @@ -35,7 +35,7 @@ test_that("kmeans sse metrics work", { test_that("kmeans sse metrics work on new data", { kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% - set_engine_celery("stats", algorithm = "MacQueen") %>% + set_engine_tidyclust("stats", algorithm = "MacQueen") %>% fit(~., mtcars) new_data <- mtcars[1:4, ] @@ -52,11 +52,11 @@ test_that("kmeans sse metrics work on new data", { test_that("kmeans sihouette metrics work", { kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% - set_engine_celery("stats", algorithm = "MacQueen") %>% + set_engine_tidyclust("stats", algorithm = "MacQueen") %>% fit(~., mtcars) kmeans_fit_ClusterR <- k_means(k = 3) %>% - set_engine_celery("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% + set_engine_tidyclust("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) new_data <- mtcars[1:4, ] @@ -80,11 +80,11 @@ test_that("kmeans sihouette metrics work", { test_that("kmeans sihouette metrics work with new data", { kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% - set_engine_celery("stats", algorithm = "MacQueen") %>% + set_engine_tidyclust("stats", algorithm = "MacQueen") %>% fit(~., mtcars) kmeans_fit_ClusterR <- k_means(k = 3) %>% - set_engine_celery("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% + set_engine_tidyclust("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) new_data <- mtcars[1:4, ] diff --git a/tests/testthat/test-predict_formats.R b/tests/testthat/test-predict_formats.R index cb0d25f5..8d03c377 100644 --- a/tests/testthat/test-predict_formats.R +++ b/tests/testthat/test-predict_formats.R @@ -1,10 +1,10 @@ test_that("partition predictions", { kmeans_fit <- k_means(k = 3, mode = "partition") %>% - set_engine_celery("stats") %>% + set_engine_tidyclust("stats") %>% fit(~., data = mtcars) expect_true(tibble::is_tibble(predict(kmeans_fit, new_data = mtcars))) - expect_true(is.factor(celery:::predict_cluster.cluster_fit(kmeans_fit, new_data = mtcars))) + expect_true(is.factor(tidyclust:::predict_cluster.cluster_fit(kmeans_fit, new_data = mtcars))) expect_equal(names(predict(kmeans_fit, new_data = mtcars)), ".pred_cluster") }) diff --git a/tests/testthat/test-registration.R b/tests/testthat/test-registration.R index cdaa6ae2..e8f46faa 100644 --- a/tests/testthat/test-registration.R +++ b/tests/testthat/test-registration.R @@ -1,7 +1,7 @@ test_that("adding a new model", { - set_new_model_celery("sponge") + set_new_model_tidyclust("sponge") - mod_items <- get_model_env_celery() %>% rlang::env_names() + mod_items <- get_model_env_tidyclust() %>% rlang::env_names() sponges <- grep("sponge", mod_items, value = TRUE) exp_obj <- c( "sponge_modes", "sponge_fit", "sponge_args", @@ -10,95 +10,95 @@ test_that("adding a new model", { expect_identical(sort(sponges), sort(exp_obj)) expect_identical( - get_from_env_celery("sponge"), + get_from_env_tidyclust("sponge"), tibble(engine = character(0), mode = character(0)) ) expect_identical( - get_from_env_celery("sponge_pkgs"), + get_from_env_tidyclust("sponge_pkgs"), tibble(engine = character(0), pkg = list(), mode = character(0)) ) expect_identical( - get_from_env_celery("sponge_modes"), "unknown" + get_from_env_tidyclust("sponge_modes"), "unknown" ) expect_identical( - get_from_env_celery("sponge_args"), + get_from_env_tidyclust("sponge_args"), dplyr::tibble( - engine = character(0), celery = character(0), + engine = character(0), tidyclust = character(0), original = character(0), func = vector("list"), has_submodel = logical(0) ) ) expect_identical( - get_from_env_celery("sponge_fit"), + get_from_env_tidyclust("sponge_fit"), tibble(engine = character(0), mode = character(0), value = vector("list")) ) expect_identical( - get_from_env_celery("sponge_predict"), + get_from_env_tidyclust("sponge_predict"), tibble( engine = character(0), mode = character(0), type = character(0), value = vector("list") ) ) - expect_snapshot(error = TRUE, set_new_model_celery()) - expect_snapshot(error = TRUE, set_new_model_celery(2)) - expect_snapshot(error = TRUE, set_new_model_celery(letters[1:2])) + expect_snapshot(error = TRUE, set_new_model_tidyclust()) + expect_snapshot(error = TRUE, set_new_model_tidyclust(2)) + expect_snapshot(error = TRUE, set_new_model_tidyclust(letters[1:2])) }) test_that("adding a new mode", { - set_model_mode_celery("sponge", "partition") + set_model_mode_tidyclust("sponge", "partition") - expect_equal(get_from_env_celery("sponge_modes"), c("unknown", "partition")) + expect_equal(get_from_env_tidyclust("sponge_modes"), c("unknown", "partition")) - expect_snapshot(error = TRUE, set_model_mode_celery("sponge")) + expect_snapshot(error = TRUE, set_model_mode_tidyclust("sponge")) }) test_that("adding a new engine", { - set_model_engine_celery("sponge", mode = "partition", eng = "gum") + set_model_engine_tidyclust("sponge", mode = "partition", eng = "gum") expect_identical( - get_from_env_celery("sponge"), + get_from_env_tidyclust("sponge"), tibble(engine = "gum", mode = "partition") ) - expect_equal(get_from_env_celery("sponge_modes"), c("unknown", "partition")) + expect_equal(get_from_env_tidyclust("sponge_modes"), c("unknown", "partition")) - expect_snapshot(error = TRUE, set_model_engine_celery("sponge", eng = "gum")) + expect_snapshot(error = TRUE, set_model_engine_tidyclust("sponge", eng = "gum")) expect_snapshot(error = TRUE, - set_model_engine_celery("sponge", mode = "partition") + set_model_engine_tidyclust("sponge", mode = "partition") ) expect_snapshot( error = TRUE, - set_model_engine_celery("sponge", mode = "regression", eng = "gum") + set_model_engine_tidyclust("sponge", mode = "regression", eng = "gum") ) }) test_that("adding a new package", { - set_dependency_celery("sponge", "gum", "trident") + set_dependency_tidyclust("sponge", "gum", "trident") expect_snapshot(error = TRUE, - set_dependency_celery("sponge", "gum", letters[1:2]) + set_dependency_tidyclust("sponge", "gum", letters[1:2]) ) expect_snapshot(error = TRUE, - set_dependency_celery("sponge", "gummies", "trident") + set_dependency_tidyclust("sponge", "gummies", "trident") ) expect_snapshot(error = TRUE, - set_dependency_celery("sponge", "gum", "trident", mode = "regression") + set_dependency_tidyclust("sponge", "gum", "trident", mode = "regression") ) expect_identical( - get_from_env_celery("sponge_pkgs"), + get_from_env_tidyclust("sponge_pkgs"), tibble(engine = "gum", pkg = list("trident"), mode = "partition") ) - set_dependency_celery("sponge", "gum", "juicy-fruit", mode = "partition") + set_dependency_tidyclust("sponge", "gum", "juicy-fruit", mode = "partition") expect_identical( - get_from_env_celery("sponge_pkgs"), + get_from_env_tidyclust("sponge_pkgs"), tibble( engine = "gum", pkg = list(c("trident", "juicy-fruit")), @@ -107,7 +107,7 @@ test_that("adding a new package", { ) expect_identical( - get_dependency_celery("sponge"), + get_dependency_tidyclust("sponge"), tibble( engine = "gum", pkg = list(c("trident", "juicy-fruit")), @@ -117,31 +117,31 @@ test_that("adding a new package", { }) test_that("adding a new argument", { - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE ) - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE ) - args <- get_from_env_celery("sponge_args") - expect_equal(sum(args$celery == "modeling"), 1) + args <- get_from_env_tidyclust("sponge_args") + expect_equal(sum(args$tidyclust == "modeling"), 1) expect_identical( - get_from_env_celery("sponge_args"), + get_from_env_tidyclust("sponge_args"), tibble( - engine = "gum", celery = "modeling", original = "modelling", + engine = "gum", tidyclust = "modeling", original = "modelling", func = list(list(pkg = "foo", fun = "bar")), has_submodel = FALSE ) @@ -149,10 +149,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "lunchroom", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE @@ -161,10 +161,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", func = list(pkg = "foo", fun = "bar"), has_submodel = FALSE ) @@ -172,7 +172,7 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", original = "modelling", @@ -183,10 +183,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = "foo::bar", has_submodel = FALSE @@ -195,10 +195,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar"), has_submodel = 2 @@ -207,10 +207,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "modeling", + tidyclust = "modeling", original = "modelling", func = list(pkg = "foo", fun = "bar") ) @@ -218,10 +218,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "yodeling", + tidyclust = "yodeling", original = "yodelling", func = c(foo = "a", bar = "b"), has_submodel = FALSE @@ -230,10 +230,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "yodeling", + tidyclust = "yodeling", original = "yodelling", func = c(foo = "a"), has_submodel = FALSE @@ -242,10 +242,10 @@ test_that("adding a new argument", { expect_snapshot( error = TRUE, - set_model_arg_celery( + set_model_arg_tidyclust( model = "sponge", eng = "gum", - celery = "yodeling", + tidyclust = "yodeling", original = "yodelling", func = c(fun = 2, pkg = 1), has_submodel = FALSE @@ -262,14 +262,14 @@ test_that("adding a new fit", { defaults = list() ) - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", value = fit_vals ) - fit_env_data <- get_from_env_celery("sponge_fit") + fit_env_data <- get_from_env_tidyclust("sponge_fit") expect_identical( fit_env_data[1:2], tibble(engine = "gum", mode = "partition") @@ -282,7 +282,7 @@ test_that("adding a new fit", { expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "cactus", eng = "gum", mode = "partition", @@ -292,7 +292,7 @@ test_that("adding a new fit", { expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "nose", mode = "partition", @@ -302,7 +302,7 @@ test_that("adding a new fit", { expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "frog", @@ -313,7 +313,7 @@ test_that("adding a new fit", { for (i in seq_along(fit_vals)) { expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -326,7 +326,7 @@ test_that("adding a new fit", { fit_vals_0$interface <- "loaf" expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -338,7 +338,7 @@ test_that("adding a new fit", { fit_vals_1$defaults <- 2 expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -350,7 +350,7 @@ test_that("adding a new fit", { fit_vals_2$func <- "foo:bar" expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -362,7 +362,7 @@ test_that("adding a new fit", { fit_vals_3$interface <- letters expect_snapshot( error = TRUE, - set_fit_celery( + set_fit_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -371,12 +371,12 @@ test_that("adding a new fit", { ) expect_identical( - get_fit_celery("sponge")[, 1:2], + get_fit_tidyclust("sponge")[, 1:2], tibble(engine = "gum", mode = "partition") ) expect_equal( - get_fit_celery("sponge")$value[[1]], + get_fit_tidyclust("sponge")$value[[1]], fit_vals ) }) @@ -390,7 +390,7 @@ test_that("adding a new predict method", { args = list(x = quote(2)) ) - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -398,7 +398,7 @@ test_that("adding a new predict method", { value = cluster_vals ) - pred_env_data <- get_from_env_celery("sponge_predict") + pred_env_data <- get_from_env_tidyclust("sponge_predict") expect_identical( pred_env_data[1:3], tibble(engine = "gum", mode = "partition", type = "cluster") @@ -410,18 +410,18 @@ test_that("adding a new predict method", { ) expect_identical( - get_pred_type_celery("sponge", "cluster")[1:3], + get_pred_type_tidyclust("sponge", "cluster")[1:3], tibble(engine = "gum", mode = "partition", type = "cluster") ) expect_equal( - get_pred_type_celery("sponge", "cluster")$value[[1]], + get_pred_type_tidyclust("sponge", "cluster")$value[[1]], cluster_vals ) expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "cactus", eng = "gum", mode = "partition", @@ -432,7 +432,7 @@ test_that("adding a new predict method", { expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "nose", mode = "partition", @@ -444,7 +444,7 @@ test_that("adding a new predict method", { expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -455,7 +455,7 @@ test_that("adding a new predict method", { expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "frog", @@ -467,7 +467,7 @@ test_that("adding a new predict method", { for (i in seq_along(cluster_vals)) { expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -481,7 +481,7 @@ test_that("adding a new predict method", { cluster_vals_0$pre <- "I" expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -494,7 +494,7 @@ test_that("adding a new predict method", { cluster_vals_1$post <- "I" expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -507,7 +507,7 @@ test_that("adding a new predict method", { cluster_vals_2$func <- "foo:bar" expect_snapshot( error = TRUE, - set_pred_celery( + set_pred_tidyclust( model = "sponge", eng = "gum", mode = "partition", @@ -519,6 +519,6 @@ test_that("adding a new predict method", { test_that("showing model info", { expect_snapshot( - show_model_info_celery("k_means") + show_model_info_tidyclust("k_means") ) }) diff --git a/tests/testthat/test-tune_cluster.R b/tests/testthat/test-tune_cluster.R index a67156aa..3afd254c 100644 --- a/tests/testthat/test-tune_cluster.R +++ b/tests/testthat/test-tune_cluster.R @@ -1,5 +1,5 @@ test_that("tune recipe only", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) wflow <- workflows::workflow() %>% @@ -35,7 +35,7 @@ test_that("tune recipe only", { }) test_that("tune model only (with recipe)", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) wflow <- workflows::workflow() %>% @@ -71,7 +71,7 @@ test_that("tune model only (with recipe)", { }) test_that("tune model only (with variables)", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) @@ -99,7 +99,7 @@ test_that("tune model only (with variables)", { }) test_that("tune model and recipe", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) wflow <- workflows::workflow() %>% @@ -144,7 +144,7 @@ test_that("tune model and recipe", { }) test_that('tune model and recipe (parallel_over = "everything")', { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) wflow <- workflows::workflow() %>% @@ -179,7 +179,7 @@ test_that('tune model and recipe (parallel_over = "everything")', { test_that("tune recipe only - failure in recipe is caught elegantly", { skip("wait for parameter checking") - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) @@ -230,7 +230,7 @@ test_that("tune recipe only - failure in recipe is caught elegantly", { test_that("tune model only - failure in recipe is caught elegantly", { skip("wait for parameter checking") - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) @@ -267,7 +267,7 @@ test_that("tune model only - failure in recipe is caught elegantly", { }) test_that("tune model only - failure in formula is caught elegantly", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) @@ -302,7 +302,7 @@ test_that("tune model only - failure in formula is caught elegantly", { test_that("tune model and recipe - failure in recipe is caught elegantly", { skip("wait for parameter checking") - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) @@ -346,7 +346,7 @@ test_that("tune model and recipe - failure in recipe is caught elegantly", { }) test_that("argument order gives errors for recipes", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() expect_snapshot(error = TRUE, { tune_cluster( @@ -358,7 +358,7 @@ test_that("argument order gives errors for recipes", { }) test_that("argument order gives errors for formula", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() expect_snapshot(error = TRUE, { tune_cluster( @@ -370,7 +370,7 @@ test_that("argument order gives errors for formula", { }) test_that("ellipses with tune_cluster", { - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() wflow <- workflows::workflow() %>% workflows::add_recipe(helper_objects$rec_tune_1) %>% @@ -393,7 +393,7 @@ test_that("determining the grid type", { test_that("retain extra attributes", { skip("wait for parameter checking") - helper_objects <- helper_objects_celery() + helper_objects <- helper_objects_tidyclust() set.seed(4400) wflow <- workflows::workflow() %>% diff --git a/celery.Rproj b/tidyclust.Rproj similarity index 100% rename from celery.Rproj rename to tidyclust.Rproj diff --git a/vignettes/articles/kmeans.Rmd b/vignettes/articles/kmeans.Rmd index e21e4dd5..89d5385a 100644 --- a/vignettes/articles/kmeans.Rmd +++ b/vignettes/articles/kmeans.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ``` ```{r setup} -library(celery) +library(tidyclust) library(tidyverse) library(tidymodels) set.seed(838383) @@ -86,14 +86,14 @@ one cluster regarding the selection of the observations or the values of the variables. -## k-means specification in {celery} +## k-means specification in {tidyclust} -To specify a k-means model in `celery`, simply choose a value of $k$ and an +To specify a k-means model in `tidyclust`, simply choose a value of $k$ and an engine: ```{r} kmeans_spec <- k_means(k = 3) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_spec ``` @@ -127,7 +127,7 @@ kmeans_spec_fit %>% extract_cluster_assignment() ``` -Note that this function renames clusters in accordance with the standard `celery` +Note that this function renames clusters in accordance with the standard `tidyclust` naming convention and ordering: clusters are named "Cluster_1", "Cluster_2", etc. and are numbered by the order they appear in the rows of the training dataset. @@ -180,7 +180,7 @@ penguins_recipe_2 <- recipe(species ~ bill_length_mm + bill_depth_mm, data = penguins) # wflow_1 <- workflow() %>% -# add_celery_model(kmeans_spec) %>% +# add_tidyclust_model(kmeans_spec) %>% # add_recipe(penguins_recipe_1) ``` diff --git a/vignettes/articles/kmeans_metrics.Rmd b/vignettes/articles/kmeans_metrics.Rmd index 525c4502..278ee61c 100644 --- a/vignettes/articles/kmeans_metrics.Rmd +++ b/vignettes/articles/kmeans_metrics.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ``` ```{r setup} -library(celery) +library(tidyclust) library(tidyverse) library(tidymodels) @@ -28,7 +28,7 @@ penguins <- penguins %>% drop_na() ```{r} kmeans_spec <- k_means(k = 3) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit <- kmeans_spec %>% fit(~ bill_length_mm + bill_depth_mm, @@ -95,7 +95,7 @@ results_ratio <- c() for (k in 2:10) { kmeans_spec_k <- k_means(k = k) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit_k <- kmeans_spec_k %>% fit(~ bill_length_mm + bill_depth_mm, @@ -123,7 +123,7 @@ results_species <- c() for (k in 2:10) { kmeans_spec_k <- k_means(k = k) %>% - set_engine_celery("stats") + set_engine_tidyclust("stats") kmeans_fit_k <- kmeans_spec_k %>% fit(~ bill_length_mm + bill_depth_mm, From b6bec59f1e43ecc413697d5fe21cba9cbdde565e Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jun 2022 14:11:14 -0700 Subject: [PATCH 10/10] reknit readme --- README.md | 90 +++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index d6122da2..a248e1d7 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ coverage](https://codecov.io/gh/EmilHvitfeldt/tidyclust/branch/main/graph/badge. [![R-CMD-check](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/tidyclust/actions/workflows/R-CMD-check.yaml) -The goal of tidyclust is to provide a tidy, unified interface to clustering -models. The packages is closely modeled after the +The goal of tidyclust is to provide a tidy, unified interface to +clustering models. The packages is closely modeled after the [parsnip](https://parsnip.tidymodels.org/) package. ## Installation @@ -25,8 +25,8 @@ devtools::install_github("EmilHvitfeldt/tidyclust") ``` Please note that this package currently requires a [branch of the -workflows](https://github.com/tidymodels/workflows/tree/tidyclust) package -to work. Use with caution. +workflows](https://github.com/tidymodels/workflows/tree/tidyclust) +package to work. Use with caution. ## Example @@ -56,39 +56,39 @@ kmeans_spec_fit <- kmeans_spec %>% kmeans_spec_fit #> tidyclust cluster object #> -#> K-means clustering with 3 clusters of sizes 14, 11, 7 +#> K-means clustering with 3 clusters of sizes 7, 16, 9 #> #> Cluster means: -#> mpg cyl disp hp drat wt qsec vs -#> 1 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 -#> 2 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909 -#> 3 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 +#> mpg cyl disp hp drat wt qsec vs +#> 1 17.01429 7.428571 276.0571 150.7143 2.994286 3.601429 18.11857 0.2857143 +#> 2 24.50000 4.625000 122.2937 96.8750 4.002500 2.518000 18.54312 0.7500000 +#> 3 14.64444 8.000000 388.2222 232.1111 3.343333 4.161556 16.40444 0.0000000 #> am gear carb -#> 1 0.1428571 3.285714 3.500000 -#> 2 0.7272727 4.090909 1.545455 -#> 3 0.4285714 3.857143 3.428571 +#> 1 0.0000000 3.000000 2.142857 +#> 2 0.6875000 4.125000 2.437500 +#> 3 0.2222222 3.444444 4.000000 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive -#> 3 3 2 3 +#> 2 2 2 1 #> Hornet Sportabout Valiant Duster 360 Merc 240D -#> 1 3 1 2 +#> 3 1 3 2 #> Merc 230 Merc 280 Merc 280C Merc 450SE -#> 2 3 3 1 +#> 2 2 2 1 #> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental -#> 1 1 1 1 +#> 1 1 3 3 #> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla -#> 1 2 2 2 +#> 3 2 2 2 #> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 -#> 2 1 1 1 +#> 2 1 1 3 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa -#> 1 2 2 2 +#> 3 2 2 2 #> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E -#> 1 3 1 2 +#> 3 2 3 2 #> #> Within cluster sum of squares by cluster: -#> [1] 93643.90 11848.37 13954.34 -#> (between_SS / total_SS = 80.8 %) +#> [1] 11846.09 32838.00 46659.32 +#> (between_SS / total_SS = 85.3 %) #> #> Available components: #> @@ -104,10 +104,10 @@ predict(kmeans_spec_fit, mtcars[1:4, ]) #> # A tibble: 4 × 1 #> .pred_cluster #> -#> 1 3 -#> 2 3 -#> 3 2 -#> 4 3 +#> 1 Cluster_1 +#> 2 Cluster_1 +#> 3 Cluster_1 +#> 4 Cluster_2 ``` `extract_cluster_assignment()` returns the cluster assignments of the @@ -116,18 +116,18 @@ training observations ``` r extract_cluster_assignment(kmeans_spec_fit) #> # A tibble: 32 × 1 -#> .cluster -#> -#> 1 C1 -#> 2 C1 -#> 3 C2 -#> 4 C1 -#> 5 C3 -#> 6 C1 -#> 7 C3 -#> 8 C2 -#> 9 C2 -#> 10 C1 +#> .cluster +#> +#> 1 Cluster_1 +#> 2 Cluster_1 +#> 3 Cluster_1 +#> 4 Cluster_2 +#> 5 Cluster_3 +#> 6 Cluster_2 +#> 7 Cluster_3 +#> 8 Cluster_1 +#> 9 Cluster_1 +#> 10 Cluster_1 #> # … with 22 more rows ``` @@ -135,12 +135,10 @@ and `extract_clusters()` returns the locations of the clusters ``` r extract_centroids(kmeans_spec_fit) -#> New names: -#> • `` -> `...2` -#> # A tibble: 3 × 2 -#> .cluster ...2 -#> -#> 1 Cluster_1 19.7 -#> 2 Cluster_2 26.7 -#> 3 Cluster_3 15.1 +#> # A tibble: 3 × 12 +#> .cluster mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 Cluster_1 24.5 4.62 122. 96.9 4.00 2.52 18.5 0.75 0.688 4.12 2.44 +#> 2 Cluster_2 17.0 7.43 276. 151. 2.99 3.60 18.1 0.286 0 3 2.14 +#> 3 Cluster_3 14.6 8 388. 232. 3.34 4.16 16.4 0 0.222 3.44 4 ```