From b9994036697365caf36a93c64f28f85551573086 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Jul 2022 13:51:58 -0700 Subject: [PATCH] change k -> num_clusters --- NAMESPACE | 2 +- R/augment.R | 2 +- R/dials.R | 6 +-- R/extract_assignment.R | 2 +- R/extract_characterization.R | 2 +- R/extract_summary.R | 2 +- R/finalize.R | 4 +- R/fit.R | 2 +- R/k_means.R | 12 ++--- R/k_means_data.R | 8 +-- R/metric-silhouette.R | 4 +- R/metric-sse.R | 8 +-- R/predict.R | 2 +- R/tunable.R | 2 +- R/update.R | 8 +-- README.Rmd | 2 +- README.md | 60 +++++++++++------------ _pkgdown.yml | 2 +- man/augment.Rd | 2 +- man/avg_silhouette.Rd | 2 +- man/extract_centroids.Rd | 2 +- man/extract_cluster_assignment.Rd | 2 +- man/extract_fit_summary.Rd | 2 +- man/finalize_model_tidyclust.Rd | 4 +- man/fit.Rd | 2 +- man/k_means.Rd | 4 +- man/{k.Rd => num_clusters.Rd} | 8 +-- man/predict.cluster_fit.Rd | 2 +- man/silhouettes.Rd | 2 +- man/sse_ratio.Rd | 2 +- man/tidyclust_update.Rd | 12 ++--- man/tot_sse.Rd | 2 +- man/tot_wss.Rd | 2 +- man/within_cluster_sse.Rd | 2 +- tests/testthat/_snaps/k_means.md | 10 ++-- tests/testthat/_snaps/registration.md | 4 +- tests/testthat/helper-tidyclust-package.R | 4 +- tests/testthat/test-arguments.R | 14 +++--- tests/testthat/test-augment.R | 2 +- tests/testthat/test-cluster_metric_set.R | 2 +- tests/testthat/test-control.R | 2 +- tests/testthat/test-extract_summary.R | 10 ++-- tests/testthat/test-k_means.R | 12 ++--- tests/testthat/test-kmeans_diagnostics.R | 14 +++--- tests/testthat/test-predict_formats.R | 2 +- tests/testthat/test-tune_cluster.R | 33 ++++++------- vignettes/articles/kmeans.Rmd | 2 +- vignettes/articles/kmeans_metrics.Rmd | 6 +-- 48 files changed, 149 insertions(+), 150 deletions(-) rename man/{k.Rd => num_clusters.Rd} (86%) diff --git a/NAMESPACE b/NAMESPACE index f9c5695c..38229b7f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -76,13 +76,13 @@ export(get_from_env_tidyclust) export(get_model_env_tidyclust) export(get_pred_type_tidyclust) export(glance) -export(k) export(k_means) export(load_pkgs) export(make_classes_tidyclust) export(min_grid) export(new_cluster_metric) export(new_cluster_spec) +export(num_clusters) export(predict.cluster_fit) export(predict_cluster) export(predict_cluster.cluster_fit) diff --git a/R/augment.R b/R/augment.R index 8e47a3f5..c8d7299d 100644 --- a/R/augment.R +++ b/R/augment.R @@ -11,7 +11,7 @@ #' @rdname augment #' @export #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/dials.R b/R/dials.R index 5fab1c94..9b4baf42 100644 --- a/R/dials.R +++ b/R/dials.R @@ -2,15 +2,15 @@ #' #' @inheritParams dials::Laplace #' @examples -#' k() +#' num_clusters() #' @export -k <- function(range = c(1L, 10L), trans = NULL) { +num_clusters <- function(range = c(1L, 10L), trans = NULL) { dials::new_quant_param( type = "integer", range = range, inclusive = c(TRUE, TRUE), trans = trans, - label = c(k = "# Clusters"), + label = c(num_clusters = "# Clusters"), finalize = NULL ) } diff --git a/R/extract_assignment.R b/R/extract_assignment.R index ba6abfc8..cd4b3f61 100644 --- a/R/extract_assignment.R +++ b/R/extract_assignment.R @@ -4,7 +4,7 @@ #' @param ... Other arguments passed to methods. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/extract_characterization.R b/R/extract_characterization.R index cf363a2c..20a216f6 100644 --- a/R/extract_characterization.R +++ b/R/extract_characterization.R @@ -5,7 +5,7 @@ #' #' @examples #' set.seed(1234) -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/extract_summary.R b/R/extract_summary.R index 1c1dad7b..cf516525 100644 --- a/R/extract_summary.R +++ b/R/extract_summary.R @@ -7,7 +7,7 @@ #' @return A list with various summary elements #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/finalize.R b/R/finalize.R index 2fc9a6fb..8da34a4e 100644 --- a/R/finalize.R +++ b/R/finalize.R @@ -11,9 +11,9 @@ #' @return An updated version of `x`. #' @export #' @examples -#' kmeans_spec <- k_means(k = tune()) +#' kmeans_spec <- k_means(num_clusters = tune()) #' -#' best_params <- data.frame(k = 5) +#' best_params <- data.frame(num_clusters = 5) #' best_params #' #' kmeans_spec diff --git a/R/fit.R b/R/fit.R index 1fd10d29..f31f69f0 100644 --- a/R/fit.R +++ b/R/fit.R @@ -43,7 +43,7 @@ #' @examples #' library(dplyr) #' -#' kmeans_mod <- k_means(k = 5) +#' kmeans_mod <- k_means(num_clusters = 5) #' #' using_formula <- #' kmeans_mod %>% diff --git a/R/k_means.R b/R/k_means.R index 7e87724b..abdc2261 100644 --- a/R/k_means.R +++ b/R/k_means.R @@ -10,7 +10,7 @@ #' @param engine A single character string specifying what computational engine #' to use for fitting. Possible engines are listed below. The default for this #' model is `"stats"`. -#' @param k Positive integer, number of clusters in model. +#' @param num_clusters Positive integer, number of clusters in model. #' #' @examples #' # show_engines("k_means") @@ -20,9 +20,9 @@ k_means <- function(mode = "partition", engine = "stats", - k = NULL) { + num_clusters = NULL) { args <- list( - k = enquo(k) + num_clusters = enquo(num_clusters) ) new_cluster_spec( @@ -61,7 +61,7 @@ translate_tidyclust.k_means <- function(x, engine = x$engine, ...) { #' @export update.k_means <- function(object, parameters = NULL, - k = NULL, + num_clusters = NULL, fresh = FALSE, ...) { eng_args <- parsnip::update_engine_parameters(object$eng_args, ...) @@ -70,7 +70,7 @@ update.k_means <- function(object, parameters <- parsnip::check_final_param(parameters) } args <- list( - k = enquo(k) + num_clusters = enquo(num_clusters) ) args <- parsnip::update_main_parameters(args, parameters) @@ -104,7 +104,7 @@ check_args.k_means <- function(object) { args <- lapply(object$args, rlang::eval_tidy) - if (all(is.numeric(args$k)) && any(args$k < 0)) + if (all(is.numeric(args$num_clusters)) && any(args$num_clusters < 0)) rlang::abort("The number of centers should be >= 0.") invisible(object) diff --git a/R/k_means_data.R b/R/k_means_data.R index dc23f43e..4b43d1c5 100644 --- a/R/k_means_data.R +++ b/R/k_means_data.R @@ -34,9 +34,9 @@ set_encoding_tidyclust( set_model_arg_tidyclust( model = "k_means", eng = "stats", - tidyclust = "k", + tidyclust = "num_clusters", original = "centers", - func = list(pkg = "tidyclust", fun = "k"), + func = list(pkg = "tidyclust", fun = "num_clusters"), has_submodel = TRUE ) @@ -90,9 +90,9 @@ set_encoding_tidyclust( set_model_arg_tidyclust( model = "k_means", eng = "ClusterR", - tidyclust = "k", + tidyclust = "num_clusters", original = "clusters", - func = list(pkg = "tidyclust", fun = "k"), + func = list(pkg = "tidyclust", fun = "num_clusters"), has_submodel = TRUE ) diff --git a/R/metric-silhouette.R b/R/metric-silhouette.R index 5a0bfb0f..904778ec 100644 --- a/R/metric-silhouette.R +++ b/R/metric-silhouette.R @@ -9,7 +9,7 @@ #' @return A tibble giving the silhouettes for each observation. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) @@ -50,7 +50,7 @@ silhouettes <- function(object, new_data = NULL, dists = NULL, #' @return A double; the average silhouette. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/metric-sse.R b/R/metric-sse.R index a797c0cc..e741f894 100644 --- a/R/metric-sse.R +++ b/R/metric-sse.R @@ -9,7 +9,7 @@ #' cluster. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) @@ -66,7 +66,7 @@ within_cluster_sse <- function(object, new_data = NULL, #' #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) @@ -130,7 +130,7 @@ tot_wss_impl <- function(object, new_data = NULL, #' #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) @@ -206,7 +206,7 @@ tot_sse_impl <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...) #' @param ... Other arguments passed to methods. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/predict.R b/R/predict.R index 83648f29..a8cf691d 100644 --- a/R/predict.R +++ b/R/predict.R @@ -39,7 +39,7 @@ #' multivariate models. #' #' @examples -#' kmeans_spec <- k_means(k = 5) %>% +#' kmeans_spec <- k_means(num_clusters = 5) %>% #' set_engine("stats") #' #' kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/R/tunable.R b/R/tunable.R index 17f99adf..6b7a9335 100644 --- a/R/tunable.R +++ b/R/tunable.R @@ -71,7 +71,7 @@ stats_k_means_engine_args <- "centers" ), call_info = list( - list(pkg = "tidyclust", fun = "k") + list(pkg = "tidyclust", fun = "num_clusters") ), source = "cluster_spec", component = "k_means", diff --git a/R/update.R b/R/update.R index 0f388acc..f343035e 100644 --- a/R/update.R +++ b/R/update.R @@ -17,12 +17,12 @@ #' @return An updated cluster specification. #' @name tidyclust_update #' @examples -#' kmeans_spec <- k_means(k = 5) +#' kmeans_spec <- k_means(num_clusters = 5) #' kmeans_spec -#' update(kmeans_spec, k = 1) -#' update(kmeans_spec, k = 1, fresh = TRUE) +#' update(kmeans_spec, num_clusters = 1) +#' update(kmeans_spec, num_clusters = 1, fresh = TRUE) #' -#' param_values <- tibble::tibble(k = 10) +#' param_values <- tibble::tibble(num_clusters = 10) #' #' kmeans_spec %>% update(param_values) NULL diff --git a/README.Rmd b/README.Rmd index aca3ed12..43afb13b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -40,7 +40,7 @@ The first thing you do is to create a `cluster specification`. For this example ```{r} library(tidyclust) -kmeans_spec <- k_means(k = 3) %>% +kmeans_spec <- k_means(num_clusters = 3) %>% set_engine("stats") kmeans_spec diff --git a/README.md b/README.md index 2fe39dc6..70766233 100644 --- a/README.md +++ b/README.md @@ -36,14 +36,14 @@ example we are creating a K-means model, using the `stats` engine. ``` r library(tidyclust) -kmeans_spec <- k_means(k = 3) %>% +kmeans_spec <- k_means(num_clusters = 3) %>% set_engine("stats") kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: -#> k = 3 +#> num_clusters = 3 #> #> Computational engine: stats ``` @@ -56,39 +56,39 @@ kmeans_spec_fit <- kmeans_spec %>% kmeans_spec_fit #> tidyclust cluster object #> -#> K-means clustering with 3 clusters of sizes 11, 7, 14 +#> K-means clustering with 3 clusters of sizes 9, 7, 16 #> #> Cluster means: -#> mpg cyl disp hp drat wt qsec vs -#> 1 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909 -#> 2 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 -#> 3 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 +#> mpg cyl disp hp drat wt qsec vs +#> 1 14.64444 8.000000 388.2222 232.1111 3.343333 4.161556 16.40444 0.0000000 +#> 2 17.01429 7.428571 276.0571 150.7143 2.994286 3.601429 18.11857 0.2857143 +#> 3 24.50000 4.625000 122.2937 96.8750 4.002500 2.518000 18.54312 0.7500000 #> am gear carb -#> 1 0.7272727 4.090909 1.545455 -#> 2 0.4285714 3.857143 3.428571 -#> 3 0.1428571 3.285714 3.500000 +#> 1 0.2222222 3.444444 4.000000 +#> 2 0.0000000 3.000000 2.142857 +#> 3 0.6875000 4.125000 2.437500 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive -#> 2 2 1 2 +#> 3 3 3 2 #> Hornet Sportabout Valiant Duster 360 Merc 240D -#> 3 2 3 1 +#> 1 2 1 3 #> Merc 230 Merc 280 Merc 280C Merc 450SE -#> 1 2 2 3 +#> 3 3 3 2 #> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental -#> 3 3 3 3 +#> 2 2 1 1 #> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla -#> 3 1 1 1 -#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 #> 1 3 3 3 +#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 +#> 3 2 2 1 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa -#> 3 1 1 1 +#> 1 3 3 3 #> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E -#> 3 2 3 1 +#> 1 3 1 3 #> #> Within cluster sum of squares by cluster: -#> [1] 11848.37 13954.34 93643.90 -#> (between_SS / total_SS = 80.8 %) +#> [1] 46659.32 11846.09 32838.00 +#> (between_SS / total_SS = 85.3 %) #> #> Available components: #> @@ -106,8 +106,8 @@ predict(kmeans_spec_fit, mtcars[1:4, ]) #> #> 1 Cluster_1 #> 2 Cluster_1 -#> 3 Cluster_2 -#> 4 Cluster_1 +#> 3 Cluster_1 +#> 4 Cluster_2 ``` `extract_cluster_assignment()` returns the cluster assignments of the @@ -120,13 +120,13 @@ extract_cluster_assignment(kmeans_spec_fit) #> #> 1 Cluster_1 #> 2 Cluster_1 -#> 3 Cluster_2 -#> 4 Cluster_1 +#> 3 Cluster_1 +#> 4 Cluster_2 #> 5 Cluster_3 -#> 6 Cluster_1 +#> 6 Cluster_2 #> 7 Cluster_3 -#> 8 Cluster_2 -#> 9 Cluster_2 +#> 8 Cluster_1 +#> 9 Cluster_1 #> 10 Cluster_1 #> # … with 22 more rows #> # ℹ Use `print(n = ...)` to see more rows @@ -139,7 +139,7 @@ extract_centroids(kmeans_spec_fit) #> # A tibble: 3 × 12 #> .cluster mpg cyl disp hp drat wt qsec vs am gear carb #> -#> 1 Cluster_1 19.7 6 183. 122. 3.59 3.12 18.0 0.571 0.429 3.86 3.43 -#> 2 Cluster_2 26.7 4 105. 82.6 4.07 2.29 19.1 0.909 0.727 4.09 1.55 -#> 3 Cluster_3 15.1 8 353. 209. 3.23 4.00 16.8 0 0.143 3.29 3.5 +#> 1 Cluster_1 24.5 4.62 122. 96.9 4.00 2.52 18.5 0.75 0.688 4.12 2.44 +#> 2 Cluster_2 17.0 7.43 276. 151. 2.99 3.60 18.1 0.286 0 3 2.14 +#> 3 Cluster_3 14.6 8 388. 232. 3.34 4.16 16.4 0 0.222 3.44 4 ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index 4e39ab44..09616d33 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,7 +33,7 @@ reference: Parameter objects for tuning. Similar to [parameter objects from dials package](https://dials.tidymodels.org/reference/index.html#parameter-objects) contents: - - k + - num_clusters - title: Model based performance metrics desc: > These metrics use the fitted clustering model to extract values denoting how diff --git a/man/augment.Rd b/man/augment.Rd index b5c7799c..4c27a1d6 100644 --- a/man/augment.Rd +++ b/man/augment.Rd @@ -21,7 +21,7 @@ For partition models, a \code{.pred_cluster} column is added. } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/avg_silhouette.Rd b/man/avg_silhouette.Rd index 93b6f840..f044428e 100644 --- a/man/avg_silhouette.Rd +++ b/man/avg_silhouette.Rd @@ -40,7 +40,7 @@ A double; the average silhouette. Measures average silhouette across all observations } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/extract_centroids.Rd b/man/extract_centroids.Rd index 0e581641..330f0909 100644 --- a/man/extract_centroids.Rd +++ b/man/extract_centroids.Rd @@ -16,7 +16,7 @@ Extract clusters from model } \examples{ set.seed(1234) -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/extract_cluster_assignment.Rd b/man/extract_cluster_assignment.Rd index 671f12b2..1d8eccbd 100644 --- a/man/extract_cluster_assignment.Rd +++ b/man/extract_cluster_assignment.Rd @@ -15,7 +15,7 @@ extract_cluster_assignment(object, ...) Extract cluster assignments from model } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/extract_fit_summary.Rd b/man/extract_fit_summary.Rd index 33d8caac..56788562 100644 --- a/man/extract_fit_summary.Rd +++ b/man/extract_fit_summary.Rd @@ -18,7 +18,7 @@ A list with various summary elements S3 method to get fitted model summary info depending on engine } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/finalize_model_tidyclust.Rd b/man/finalize_model_tidyclust.Rd index 7a754598..443f9e6f 100644 --- a/man/finalize_model_tidyclust.Rd +++ b/man/finalize_model_tidyclust.Rd @@ -25,9 +25,9 @@ The \verb{finalize_*} functions take a list or tibble of tuning parameter values update objects with those values. } \examples{ -kmeans_spec <- k_means(k = tune()) +kmeans_spec <- k_means(num_clusters = tune()) -best_params <- data.frame(k = 5) +best_params <- data.frame(num_clusters = 5) best_params kmeans_spec diff --git a/man/fit.Rd b/man/fit.Rd index 64fdf335..3437b3b2 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -84,7 +84,7 @@ page for \code{\link[stats:contrast]{stats::contr.treatment()}} for more possibl \examples{ library(dplyr) -kmeans_mod <- k_means(k = 5) +kmeans_mod <- k_means(num_clusters = 5) using_formula <- kmeans_mod \%>\% diff --git a/man/k_means.Rd b/man/k_means.Rd index 805c8894..8ccfae5c 100644 --- a/man/k_means.Rd +++ b/man/k_means.Rd @@ -4,7 +4,7 @@ \alias{k_means} \title{K-Means} \usage{ -k_means(mode = "partition", engine = "stats", k = NULL) +k_means(mode = "partition", engine = "stats", num_clusters = NULL) } \arguments{ \item{mode}{A single character string for the type of model. @@ -14,7 +14,7 @@ The only possible value for this model is "partition".} to use for fitting. Possible engines are listed below. The default for this model is \code{"stats"}.} -\item{k}{Positive integer, number of clusters in model.} +\item{num_clusters}{Positive integer, number of clusters in model.} } \description{ \code{k_means()} defines a model that fits clusters based on distances to a number diff --git a/man/k.Rd b/man/num_clusters.Rd similarity index 86% rename from man/k.Rd rename to man/num_clusters.Rd index 182ac74b..c8129e2c 100644 --- a/man/k.Rd +++ b/man/num_clusters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dials.R -\name{k} -\alias{k} +\name{num_clusters} +\alias{num_clusters} \title{Number of Clusters} \usage{ -k(range = c(1L, 10L), trans = NULL) +num_clusters(range = c(1L, 10L), trans = NULL) } \arguments{ \item{range}{A two-element vector holding the \emph{defaults} for the smallest and @@ -20,5 +20,5 @@ transformation, \code{NULL}.} Number of Clusters } \examples{ -k() +num_clusters() } diff --git a/man/predict.cluster_fit.Rd b/man/predict.cluster_fit.Rd index 6f8b7969..dd8ff763 100644 --- a/man/predict.cluster_fit.Rd +++ b/man/predict.cluster_fit.Rd @@ -58,7 +58,7 @@ is made: section below) in a tibble output format. } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/silhouettes.Rd b/man/silhouettes.Rd index d93d96c1..efb52056 100644 --- a/man/silhouettes.Rd +++ b/man/silhouettes.Rd @@ -23,7 +23,7 @@ A tibble giving the silhouettes for each observation. Measures silhouettes between clusters } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/sse_ratio.Rd b/man/sse_ratio.Rd index 4308bec9..d3df4a89 100644 --- a/man/sse_ratio.Rd +++ b/man/sse_ratio.Rd @@ -29,7 +29,7 @@ to Euclidean distance on processed data.} Compute the ratio of the WSS to the total SSE } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/tidyclust_update.Rd b/man/tidyclust_update.Rd index fadb4081..6b635ac7 100644 --- a/man/tidyclust_update.Rd +++ b/man/tidyclust_update.Rd @@ -5,7 +5,7 @@ \alias{tidyclust_update} \title{Update a cluster specification} \usage{ -\method{update}{k_means}(object, parameters = NULL, k = NULL, fresh = FALSE, ...) +\method{update}{k_means}(object, parameters = NULL, num_clusters = NULL, fresh = FALSE, ...) } \arguments{ \item{object}{A cluster specification.} @@ -16,7 +16,7 @@ directly when updating. If the main arguments are used, these will supersede the values in \code{parameters}. Also, using engine arguments in this object will result in an error.} -\item{k}{Positive integer, number of clusters in model.} +\item{num_clusters}{Positive integer, number of clusters in model.} \item{fresh}{A logical for whether the arguments should be modified in-place or replaced wholesale.} @@ -31,12 +31,12 @@ If parameters of a cluster specification need to be modified, \code{update()} ca be used in lieu of recreating the object from scratch. } \examples{ -kmeans_spec <- k_means(k = 5) +kmeans_spec <- k_means(num_clusters = 5) kmeans_spec -update(kmeans_spec, k = 1) -update(kmeans_spec, k = 1, fresh = TRUE) +update(kmeans_spec, num_clusters = 1) +update(kmeans_spec, num_clusters = 1, fresh = TRUE) -param_values <- tibble::tibble(k = 10) +param_values <- tibble::tibble(num_clusters = 10) kmeans_spec \%>\% update(param_values) } diff --git a/man/tot_sse.Rd b/man/tot_sse.Rd index 31e0ae1b..1adf5983 100644 --- a/man/tot_sse.Rd +++ b/man/tot_sse.Rd @@ -29,7 +29,7 @@ to Euclidean distance on processed data.} Compute the total sum of squares } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/tot_wss.Rd b/man/tot_wss.Rd index eae1e188..4dcb80de 100644 --- a/man/tot_wss.Rd +++ b/man/tot_wss.Rd @@ -29,7 +29,7 @@ to Euclidean distance on processed data.} Compute the sum of within-cluster SSE } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/man/within_cluster_sse.Rd b/man/within_cluster_sse.Rd index 5ceee7ea..1e425a4e 100644 --- a/man/within_cluster_sse.Rd +++ b/man/within_cluster_sse.Rd @@ -22,7 +22,7 @@ cluster. Calculates Sum of Squared Error in each cluster } \examples{ -kmeans_spec <- k_means(k = 5) \%>\% +kmeans_spec <- k_means(num_clusters = 5) \%>\% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/tests/testthat/_snaps/k_means.md b/tests/testthat/_snaps/k_means.md index ef47dd79..ed62ffcc 100644 --- a/tests/testthat/_snaps/k_means.md +++ b/tests/testthat/_snaps/k_means.md @@ -8,7 +8,7 @@ --- Code - bt <- k_means(k = -1) %>% set_engine("stats") + bt <- k_means(num_clusters = -1) %>% set_engine("stats") fit(bt, mpg ~ ., mtcars) Error The number of centers should be >= 0. @@ -40,12 +40,12 @@ --- Code - k_means(k = 10) + k_means(num_clusters = 10) Output K Means Cluster Specification (partition) Main Arguments: - k = 10 + num_clusters = 10 Computational engine: stats @@ -53,12 +53,12 @@ # updating Code - k_means(k = 5) %>% update(k = tune()) + k_means(num_clusters = 5) %>% update(num_clusters = tune()) Output K Means Cluster Specification (partition) Main Arguments: - k = tune() + num_clusters = tune() Computational engine: stats diff --git a/tests/testthat/_snaps/registration.md b/tests/testthat/_snaps/registration.md index 54608284..9b20dcec 100644 --- a/tests/testthat/_snaps/registration.md +++ b/tests/testthat/_snaps/registration.md @@ -322,9 +322,9 @@ arguments: stats: - k --> centers + num_clusters --> centers ClusterR: - k --> clusters + num_clusters --> clusters fit modules: engine mode diff --git a/tests/testthat/helper-tidyclust-package.R b/tests/testthat/helper-tidyclust-package.R index 115e3880..cfd0721b 100644 --- a/tests/testthat/helper-tidyclust-package.R +++ b/tests/testthat/helper-tidyclust-package.R @@ -10,9 +10,9 @@ helper_objects_tidyclust <- function() { recipes::recipe(~ ., data = mtcars) %>% recipes::step_normalize(recipes::all_predictors()) - kmeans_mod_no_tune <- k_means(k = 2) + kmeans_mod_no_tune <- k_means(num_clusters = 2) - kmeans_mod <- k_means(k = tune()) + kmeans_mod <- k_means(num_clusters = tune()) list( rec_tune_1 = rec_tune_1, diff --git a/tests/testthat/test-arguments.R b/tests/testthat/test-arguments.R index 6eaa6f33..e9c1a38f 100644 --- a/tests/testthat/test-arguments.R +++ b/tests/testthat/test-arguments.R @@ -1,26 +1,26 @@ test_that('pipe arguments', { mod_1 <- k_means() %>% - set_args(k = 1) + set_args(num_clusters = 1) expect_equal( - rlang::quo_get_expr(mod_1$args$k), + rlang::quo_get_expr(mod_1$args$num_clusters), 1 ) expect_equal( - rlang::quo_get_env(mod_1$args$k), + rlang::quo_get_env(mod_1$args$num_clusters), rlang::empty_env() ) - mod_2 <- k_means(k = 2) %>% - set_args(k = 1) + mod_2 <- k_means(num_clusters = 2) %>% + set_args(num_clusters = 1) var_env <- rlang::current_env() expect_equal( - rlang::quo_get_expr(mod_2$args$k), + rlang::quo_get_expr(mod_2$args$num_clusters), 1 ) expect_equal( - rlang::quo_get_env(mod_2$args$k), + rlang::quo_get_env(mod_2$args$num_clusters), rlang::empty_env() ) diff --git a/tests/testthat/test-augment.R b/tests/testthat/test-augment.R index 6cc0d992..e973e288 100644 --- a/tests/testthat/test-augment.R +++ b/tests/testthat/test-augment.R @@ -1,5 +1,5 @@ test_that('partition models', { - x <- k_means(k = 2) + x <- k_means(num_clusters = 2) set.seed(1234) reg_form <- x %>% fit(~ ., data = mtcars) diff --git a/tests/testthat/test-cluster_metric_set.R b/tests/testthat/test-cluster_metric_set.R index 966fc9d9..14038214 100644 --- a/tests/testthat/test-cluster_metric_set.R +++ b/tests/testthat/test-cluster_metric_set.R @@ -1,5 +1,5 @@ test_that("cluster_metric_set works", { - kmeans_spec <- k_means(k = 5) %>% + kmeans_spec <- k_means(num_clusters = 5) %>% set_engine("stats") kmeans_fit <- fit(kmeans_spec, ~., mtcars) diff --git a/tests/testthat/test-control.R b/tests/testthat/test-control.R index 0ecf2466..2203dfea 100644 --- a/tests/testthat/test-control.R +++ b/tests/testthat/test-control.R @@ -1,6 +1,6 @@ test_that("control class", { skip("waiting for workflow PR") - x <- k_means(k = 5) %>% set_engine("stats") + x <- k_means(num_clusters = 5) %>% set_engine("stats") ctrl <- control_cluster() class(ctrl) <- c("potato", "chair") expect_snapshot( diff --git a/tests/testthat/test-extract_summary.R b/tests/testthat/test-extract_summary.R index 66d90259..060205e4 100644 --- a/tests/testthat/test-extract_summary.R +++ b/tests/testthat/test-extract_summary.R @@ -1,9 +1,9 @@ test_that("extract summary works for kmeans", { - obj1 <- k_means(k = mtcars[1:3, ]) %>% + obj1 <- k_means(num_clusters = mtcars[1:3, ]) %>% set_engine("stats", algorithm = "MacQueen") %>% fit(~., mtcars) - obj2 <- k_means(k = 3) %>% + obj2 <- k_means(num_clusters = 3) %>% set_engine("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) @@ -16,12 +16,12 @@ test_that("extract summary works for kmeans", { expect_equal(summ1$n_members, c(17, 11, 4)) }) -test_that("extract summary works for kmeans when k = 1", { - obj1 <- k_means(k = 1) %>% +test_that("extract summary works for kmeans when num_clusters = 1", { + obj1 <- k_means(num_clusters = 1) %>% set_engine("stats") %>% fit(~., mtcars) - obj2 <- k_means(k = 1) %>% + obj2 <- k_means(num_clusters = 1) %>% set_engine("ClusterR") %>% fit(~., mtcars) diff --git a/tests/testthat/test-k_means.R b/tests/testthat/test-k_means.R index 2baee51a..4913215e 100644 --- a/tests/testthat/test-k_means.R +++ b/tests/testthat/test-k_means.R @@ -9,7 +9,7 @@ test_that("primary arguments", { ) ) - k <- k_means(k = 15, mode = "partition") + k <- k_means(num_clusters = 15, mode = "partition") k_stats <- translate_tidyclust(k %>% set_engine("stats")) expect_equal( k_stats$method$fit$args, @@ -39,7 +39,7 @@ test_that("engine arguments", { test_that("bad input", { expect_snapshot(error = TRUE, k_means(mode = "bogus")) expect_snapshot(error = TRUE, { - bt <- k_means(k = -1) %>% set_engine("stats") + bt <- k_means(num_clusters = -1) %>% set_engine("stats") fit(bt, mpg ~ ., mtcars) }) expect_snapshot(error = TRUE, translate_tidyclust(k_means(), engine = NULL)) @@ -48,7 +48,7 @@ test_that("bad input", { test_that("predictions", { set.seed(1234) - kmeans_fit <- k_means(k = 4) %>% + kmeans_fit <- k_means(num_clusters = 4) %>% set_engine("stats") %>% fit(~., mtcars) @@ -89,13 +89,13 @@ test_that("printing", { k_means() ) expect_snapshot( - k_means(k = 10) + k_means(num_clusters = 10) ) }) test_that('updating', { expect_snapshot( - k_means(k = 5) %>% - update(k = tune()) + k_means(num_clusters = 5) %>% + update(num_clusters = tune()) ) }) diff --git a/tests/testthat/test-kmeans_diagnostics.R b/tests/testthat/test-kmeans_diagnostics.R index 8d5fc298..344ea278 100644 --- a/tests/testthat/test-kmeans_diagnostics.R +++ b/tests/testthat/test-kmeans_diagnostics.R @@ -1,9 +1,9 @@ test_that("kmeans sse metrics work", { - kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% + kmeans_fit_stats <- k_means(num_clusters = mtcars[1:3, ]) %>% set_engine("stats", algorithm = "MacQueen") %>% fit(~., mtcars) - kmeans_fit_ClusterR <- k_means(k = 3) %>% + kmeans_fit_ClusterR <- k_means(num_clusters = 3) %>% set_engine("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) @@ -58,7 +58,7 @@ test_that("kmeans sse metrics work", { }) test_that("kmeans sse metrics work on new data", { - kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% + kmeans_fit_stats <- k_means(num_clusters = mtcars[1:3, ]) %>% set_engine("stats", algorithm = "MacQueen") %>% fit(~., mtcars) @@ -87,11 +87,11 @@ test_that("kmeans sse metrics work on new data", { }) test_that("kmeans sihouette metrics work", { - kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% + kmeans_fit_stats <- k_means(num_clusters = mtcars[1:3, ]) %>% set_engine("stats", algorithm = "MacQueen") %>% fit(~., mtcars) - kmeans_fit_ClusterR <- k_means(k = 3) %>% + kmeans_fit_ClusterR <- k_means(num_clusters = 3) %>% set_engine("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) @@ -119,11 +119,11 @@ test_that("kmeans sihouette metrics work", { }) test_that("kmeans sihouette metrics work with new data", { - kmeans_fit_stats <- k_means(k = mtcars[1:3, ]) %>% + kmeans_fit_stats <- k_means(num_clusters = mtcars[1:3, ]) %>% set_engine("stats", algorithm = "MacQueen") %>% fit(~., mtcars) - kmeans_fit_ClusterR <- k_means(k = 3) %>% + kmeans_fit_ClusterR <- k_means(num_clusters = 3) %>% set_engine("ClusterR", CENTROIDS = as.matrix(mtcars[1:3, ])) %>% fit(~., mtcars) diff --git a/tests/testthat/test-predict_formats.R b/tests/testthat/test-predict_formats.R index 917a24c9..0660a759 100644 --- a/tests/testthat/test-predict_formats.R +++ b/tests/testthat/test-predict_formats.R @@ -1,6 +1,6 @@ test_that("partition predictions", { kmeans_fit <- - k_means(k = 3, mode = "partition") %>% + k_means(num_clusters = 3, mode = "partition") %>% set_engine("stats") %>% fit(~., data = mtcars) diff --git a/tests/testthat/test-tune_cluster.R b/tests/testthat/test-tune_cluster.R index 7a3d9e88..8b958f73 100644 --- a/tests/testthat/test-tune_cluster.R +++ b/tests/testthat/test-tune_cluster.R @@ -43,7 +43,7 @@ test_that("tune model only (with recipe)", { workflows::add_model(helper_objects$kmeans_mod) pset <- hardhat::extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) - grid$k <- grid$k + 1 + grid$num_clusters <- grid$num_clusters + 1 folds <- rsample::vfold_cv(mtcars) control <- tune::control_grid(extract = identity) metrics <- cluster_metric_set(tot_wss, tot_sse) @@ -59,15 +59,15 @@ test_that("tune model only (with recipe)", { res_workflow <- res$.extracts[[1]]$.extracts[[1]] # Ensure tunable parameters in spec are finalized - k_quo <- res_workflow$fit$fit$spec$args$k - k <- rlang::quo_get_expr(k_quo) + num_clusters_quo <- res_workflow$fit$fit$spec$args$num_clusters + num_clusters <- rlang::quo_get_expr(num_clusters_quo) expect_equal(res$id, folds$id) expect_equal(nrow(res_est), nrow(grid) * 2) expect_equal(sum(res_est$.metric == "tot_sse"), nrow(grid)) expect_equal(sum(res_est$.metric == "tot_wss"), nrow(grid)) expect_equal(res_est$n, rep(10, nrow(grid) * 2)) - expect_false(identical(k, expr(tune()))) + expect_false(identical(num_clusters, expr(tune()))) expect_true(res_workflow$trained) }) @@ -81,7 +81,7 @@ test_that("tune model and recipe", { pset <- hardhat::extract_parameter_set_dials(wflow) %>% update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) - grid$k <- grid$k + 1 + grid$num_clusters <- grid$num_clusters + 1 folds <- rsample::vfold_cv(mtcars) control <- tune::control_grid(extract = identity) metrics <- cluster_metric_set(tot_wss, tot_sse) @@ -97,8 +97,8 @@ test_that("tune model and recipe", { res_workflow <- res$.extracts[[1]]$.extracts[[1]] # Ensure tunable parameters in spec are finalized - k_quo <- res_workflow$fit$fit$spec$args$k - k <- rlang::quo_get_expr(k_quo) + num_clusters_quo <- res_workflow$fit$fit$spec$args$num_clusters + num_clusters <- rlang::quo_get_expr(num_clusters_quo) # Ensure tunable parameters in recipe are finalized num_comp <- res_workflow$pre$actions$recipe$recipe$steps[[2]]$num_comp @@ -106,13 +106,13 @@ test_that("tune model and recipe", { expect_equal(res$id, folds$id) expect_equal( colnames(res$.metrics[[1]]), - c("k", "num_comp", ".metric", ".estimator", ".estimate", ".config") + c("num_clusters", "num_comp", ".metric", ".estimator", ".estimate", ".config") ) expect_equal(nrow(res_est), nrow(grid) * 2) expect_equal(sum(res_est$.metric == "tot_sse"), nrow(grid)) expect_equal(sum(res_est$.metric == "tot_wss"), nrow(grid)) expect_equal(res_est$n, rep(10, nrow(grid) * 2)) - expect_false(identical(k, expr(tune()))) + expect_false(identical(num_clusters, expr(tune()))) expect_false(identical(num_comp, expr(tune()))) expect_true(res_workflow$trained) }) @@ -127,7 +127,7 @@ test_that('tune model and recipe (parallel_over = "everything")', { pset <- hardhat::extract_parameter_set_dials(wflow) %>% update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) - grid$k <- grid$k + 1 + grid$num_clusters <- grid$num_clusters + 1 folds <- rsample::vfold_cv(mtcars) control <- tune::control_grid(extract = identity, parallel_over = "everything") metrics <- cluster_metric_set(tot_wss, tot_sse) @@ -144,7 +144,7 @@ test_that('tune model and recipe (parallel_over = "everything")', { expect_equal(res$id, folds$id) expect_equal( colnames(res$.metrics[[1]]), - c("k", "num_comp", ".metric", ".estimator", ".estimate", ".config") + c("num_clusters", "num_comp", ".metric", ".estimator", ".estimate", ".config") ) expect_equal(nrow(res_est), nrow(grid) * 2) expect_equal(sum(res_est$.metric == "tot_sse"), nrow(grid)) @@ -158,7 +158,7 @@ test_that("tune model only - failure in formula is caught elegantly", { set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) - cars_grid <- tibble::tibble(k = 2) + cars_grid <- tibble::tibble(num_clusters = 2) # these terms don't exist! expect_snapshot( @@ -310,7 +310,7 @@ test_that("retain extra attributes", { workflows::add_model(helper_objects$kmeans_mod) pset <- hardhat::extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) - grid$k <- grid$k + 1 + grid$num_clusters <- grid$num_clusters + 1 folds <- rsample::vfold_cv(mtcars) metrics <- cluster_metric_set(tot_wss, tot_sse) res <- tune_cluster(wflow, resamples = folds, grid = grid, metrics = metrics) @@ -333,7 +333,7 @@ test_that("select_best() and show_best() works", { workflows::add_model(helper_objects$kmeans_mod) pset <- hardhat::extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 10) - grid$k <- grid$k + 1 + grid$num_clusters <- grid$num_clusters + 1 folds <- rsample::vfold_cv(mtcars) control <- tune::control_grid(extract = identity) metrics <- cluster_metric_set(tot_wss, tot_sse) @@ -369,7 +369,7 @@ test_that("select_best() and show_best() works", { tune::collect_metrics(res) %>% dplyr::filter(.metric == "tot_wss") %>% dplyr::slice_min(mean, n = 1, with_ties = FALSE) %>% - dplyr::select(k, .config) + dplyr::select(num_clusters, .config) ) expect_equal( @@ -377,8 +377,7 @@ test_that("select_best() and show_best() works", { tune::collect_metrics(res) %>% dplyr::filter(.metric == "tot_sse") %>% dplyr::slice_min(mean, n = 1, with_ties = FALSE) %>% - dplyr::select(k, .config) + dplyr::select(num_clusters, .config) ) - }) diff --git a/vignettes/articles/kmeans.Rmd b/vignettes/articles/kmeans.Rmd index 89c89f85..47859720 100644 --- a/vignettes/articles/kmeans.Rmd +++ b/vignettes/articles/kmeans.Rmd @@ -92,7 +92,7 @@ To specify a k-means model in `tidyclust`, simply choose a value of $k$ and an engine: ```{r} -kmeans_spec <- k_means(k = 3) %>% +kmeans_spec <- k_means(num_clusters = 3) %>% set_engine("stats") kmeans_spec diff --git a/vignettes/articles/kmeans_metrics.Rmd b/vignettes/articles/kmeans_metrics.Rmd index 9abad3f0..efe75042 100644 --- a/vignettes/articles/kmeans_metrics.Rmd +++ b/vignettes/articles/kmeans_metrics.Rmd @@ -27,7 +27,7 @@ penguins <- penguins %>% drop_na() ## Setup ```{r} -kmeans_spec <- k_means(k = 3) %>% +kmeans_spec <- k_means(num_clusters = 3) %>% set_engine("stats") kmeans_fit <- kmeans_spec %>% @@ -94,7 +94,7 @@ results_ratio <- c() for (k in 2:10) { - kmeans_spec_k <- k_means(k = k) %>% + kmeans_spec_k <- k_means(num_clusters = k) %>% set_engine("stats") kmeans_fit_k <- kmeans_spec_k %>% @@ -122,7 +122,7 @@ results_species <- c() for (k in 2:10) { - kmeans_spec_k <- k_means(k = k) %>% + kmeans_spec_k <- k_means(num_clusters = k) %>% set_engine("stats") kmeans_fit_k <- kmeans_spec_k %>%