Merge pull request #59 from EmilHvitfeldt/no-more-k

Finally change k -> num_clusters
tidymodels · Jul 19, 2022 · cea4c44 · cea4c44
2 parents d4b77b2 + b999403
commit cea4c44
Show file tree

Hide file tree

Showing 48 changed files with 149 additions and 150 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -76,13 +76,13 @@ export(get_from_env_tidyclust)
 export(get_model_env_tidyclust)
 export(get_pred_type_tidyclust)
 export(glance)
-export(k)
 export(k_means)
 export(load_pkgs)
 export(make_classes_tidyclust)
 export(min_grid)
 export(new_cluster_metric)
 export(new_cluster_spec)
+export(num_clusters)
 export(predict.cluster_fit)
 export(predict_cluster)
 export(predict_cluster.cluster_fit)

diff --git a/R/augment.R b/R/augment.R
@@ -11,7 +11,7 @@
 #' @rdname augment
 #' @export
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/dials.R b/R/dials.R
@@ -2,15 +2,15 @@
 #'
 #' @inheritParams dials::Laplace
 #' @examples
-#' k()
+#' num_clusters()
 #' @export
-k <- function(range = c(1L, 10L), trans = NULL) {
+num_clusters <- function(range = c(1L, 10L), trans = NULL) {
   dials::new_quant_param(
     type = "integer",
     range = range,
     inclusive = c(TRUE, TRUE),
     trans = trans,
-    label = c(k = "# Clusters"),
+    label = c(num_clusters = "# Clusters"),
     finalize = NULL
   )
 }
diff --git a/R/extract_assignment.R b/R/extract_assignment.R
@@ -4,7 +4,7 @@
 #' @param ... Other arguments passed to methods.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/extract_characterization.R b/R/extract_characterization.R
@@ -5,7 +5,7 @@
 #'
 #' @examples
 #' set.seed(1234)
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/extract_summary.R b/R/extract_summary.R
@@ -7,7 +7,7 @@
 #' @return A list with various summary elements
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/finalize.R b/R/finalize.R
@@ -11,9 +11,9 @@
 #' @return An updated version of `x`.
 #' @export
 #' @examples
-#' kmeans_spec <- k_means(k = tune())
+#' kmeans_spec <- k_means(num_clusters = tune())
 #'
-#' best_params <- data.frame(k = 5)
+#' best_params <- data.frame(num_clusters = 5)
 #' best_params
 #'
 #' kmeans_spec

diff --git a/R/fit.R b/R/fit.R
@@ -43,7 +43,7 @@
 #' @examples
 #' library(dplyr)
 #'
-#' kmeans_mod <- k_means(k = 5)
+#' kmeans_mod <- k_means(num_clusters = 5)
 #'
 #' using_formula <-
 #'   kmeans_mod %>%

diff --git a/R/k_means.R b/R/k_means.R
@@ -10,7 +10,7 @@
 #' @param engine A single character string specifying what computational engine
 #'  to use for fitting. Possible engines are listed below. The default for this
 #'  model is `"stats"`.
-#' @param k Positive integer, number of clusters in model.
+#' @param num_clusters Positive integer, number of clusters in model.
 #'
 #' @examples
 #' # show_engines("k_means")
@@ -20,9 +20,9 @@
 k_means <-
   function(mode = "partition",
            engine = "stats",
-           k = NULL) {
+           num_clusters = NULL) {
     args <- list(
-      k = enquo(k)
+      num_clusters = enquo(num_clusters)
     )
 
     new_cluster_spec(
@@ -61,7 +61,7 @@ translate_tidyclust.k_means <- function(x, engine = x$engine, ...) {
 #' @export
 update.k_means <- function(object,
                            parameters = NULL,
-                           k = NULL,
+                           num_clusters = NULL,
                            fresh = FALSE, ...) {
 
   eng_args <- parsnip::update_engine_parameters(object$eng_args, ...)
@@ -70,7 +70,7 @@ update.k_means <- function(object,
     parameters <- parsnip::check_final_param(parameters)
   }
   args <- list(
-    k = enquo(k)
+    num_clusters = enquo(num_clusters)
   )
 
   args <- parsnip::update_main_parameters(args, parameters)
@@ -104,7 +104,7 @@ check_args.k_means <- function(object) {
 
   args <- lapply(object$args, rlang::eval_tidy)
 
-  if (all(is.numeric(args$k)) && any(args$k < 0))
+  if (all(is.numeric(args$num_clusters)) && any(args$num_clusters < 0))
     rlang::abort("The number of centers should be >= 0.")
 
   invisible(object)

diff --git a/R/k_means_data.R b/R/k_means_data.R
@@ -34,9 +34,9 @@ set_encoding_tidyclust(
 set_model_arg_tidyclust(
   model = "k_means",
   eng = "stats",
-  tidyclust = "k",
+  tidyclust = "num_clusters",
   original = "centers",
-  func = list(pkg = "tidyclust", fun = "k"),
+  func = list(pkg = "tidyclust", fun = "num_clusters"),
   has_submodel = TRUE
 )
 
@@ -90,9 +90,9 @@ set_encoding_tidyclust(
 set_model_arg_tidyclust(
   model = "k_means",
   eng = "ClusterR",
-  tidyclust = "k",
+  tidyclust = "num_clusters",
   original = "clusters",
-  func = list(pkg = "tidyclust", fun = "k"),
+  func = list(pkg = "tidyclust", fun = "num_clusters"),
   has_submodel = TRUE
 )
 

diff --git a/R/metric-silhouette.R b/R/metric-silhouette.R
@@ -9,7 +9,7 @@
 #' @return A tibble giving the silhouettes for each observation.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
@@ -50,7 +50,7 @@ silhouettes <- function(object, new_data = NULL, dists = NULL,
 #' @return A double; the average silhouette.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/metric-sse.R b/R/metric-sse.R
@@ -9,7 +9,7 @@
 #' cluster.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
@@ -66,7 +66,7 @@ within_cluster_sse <- function(object, new_data = NULL,
 #'
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
@@ -130,7 +130,7 @@ tot_wss_impl <- function(object, new_data = NULL,
 #'
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
@@ -206,7 +206,7 @@ tot_sse_impl <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...)
 #' @param ... Other arguments passed to methods.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/predict.R b/R/predict.R
@@ -39,7 +39,7 @@
 #'  multivariate models.
 #'
 #' @examples
-#' kmeans_spec <- k_means(k = 5) %>%
+#' kmeans_spec <- k_means(num_clusters = 5) %>%
 #'   set_engine("stats")
 #'
 #' kmeans_fit <- fit(kmeans_spec, ~., mtcars)

diff --git a/R/tunable.R b/R/tunable.R
@@ -71,7 +71,7 @@ stats_k_means_engine_args <-
       "centers"
     ),
     call_info = list(
-      list(pkg = "tidyclust", fun = "k")
+      list(pkg = "tidyclust", fun = "num_clusters")
     ),
     source = "cluster_spec",
     component = "k_means",

diff --git a/R/update.R b/R/update.R
@@ -17,12 +17,12 @@
 #' @return An updated cluster specification.
 #' @name tidyclust_update
 #' @examples
-#' kmeans_spec <- k_means(k = 5)
+#' kmeans_spec <- k_means(num_clusters = 5)
 #' kmeans_spec
-#' update(kmeans_spec, k = 1)
-#' update(kmeans_spec, k = 1, fresh = TRUE)
+#' update(kmeans_spec, num_clusters = 1)
+#' update(kmeans_spec, num_clusters = 1, fresh = TRUE)
 #'
-#' param_values <- tibble::tibble(k = 10)
+#' param_values <- tibble::tibble(num_clusters = 10)
 #'
 #' kmeans_spec %>% update(param_values)
 NULL
diff --git a/README.Rmd b/README.Rmd
@@ -40,7 +40,7 @@ The first thing you do is to create a `cluster specification`. For this example
 ```{r}
 library(tidyclust)
 
-kmeans_spec <- k_means(k = 3) %>%
+kmeans_spec <- k_means(num_clusters = 3) %>%
   set_engine("stats") 
 
 kmeans_spec

diff --git a/README.md b/README.md
@@ -36,14 +36,14 @@ example we are creating a K-means model, using the `stats` engine.
 ``` r
 library(tidyclust)
 
-kmeans_spec <- k_means(k = 3) %>%
+kmeans_spec <- k_means(num_clusters = 3) %>%
   set_engine("stats") 
 
 kmeans_spec
 #> K Means Cluster Specification (partition)
 #> 
 #> Main Arguments:
-#>   k = 3
+#>   num_clusters = 3
 #> 
 #> Computational engine: stats
 ```
@@ -56,39 +56,39 @@ kmeans_spec_fit <- kmeans_spec %>%
 kmeans_spec_fit
 #> tidyclust cluster object
 #> 
-#> K-means clustering with 3 clusters of sizes 11, 7, 14
+#> K-means clustering with 3 clusters of sizes 9, 7, 16
 #> 
 #> Cluster means:
-#>        mpg cyl     disp        hp     drat       wt     qsec        vs
-#> 1 26.66364   4 105.1364  82.63636 4.070909 2.285727 19.13727 0.9090909
-#> 2 19.74286   6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286
-#> 3 15.10000   8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000
+#>        mpg      cyl     disp       hp     drat       wt     qsec        vs
+#> 1 14.64444 8.000000 388.2222 232.1111 3.343333 4.161556 16.40444 0.0000000
+#> 2 17.01429 7.428571 276.0571 150.7143 2.994286 3.601429 18.11857 0.2857143
+#> 3 24.50000 4.625000 122.2937  96.8750 4.002500 2.518000 18.54312 0.7500000
 #>          am     gear     carb
-#> 1 0.7272727 4.090909 1.545455
-#> 2 0.4285714 3.857143 3.428571
-#> 3 0.1428571 3.285714 3.500000
+#> 1 0.2222222 3.444444 4.000000
+#> 2 0.0000000 3.000000 2.142857
+#> 3 0.6875000 4.125000 2.437500
 #> 
 #> Clustering vector:
 #>           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
-#>                   2                   2                   1                   2 
+#>                   3                   3                   3                   2 
 #>   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
-#>                   3                   2                   3                   1 
+#>                   1                   2                   1                   3 
 #>            Merc 230            Merc 280           Merc 280C          Merc 450SE 
-#>                   1                   2                   2                   3 
+#>                   3                   3                   3                   2 
 #>          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
-#>                   3                   3                   3                   3 
+#>                   2                   2                   1                   1 
 #>   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
-#>                   3                   1                   1                   1 
-#>       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
 #>                   1                   3                   3                   3 
+#>       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
+#>                   3                   2                   2                   1 
 #>    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
-#>                   3                   1                   1                   1 
+#>                   1                   3                   3                   3 
 #>      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
-#>                   3                   2                   3                   1 
+#>                   1                   3                   1                   3 
 #> 
 #> Within cluster sum of squares by cluster:
-#> [1] 11848.37 13954.34 93643.90
-#>  (between_SS / total_SS =  80.8 %)
+#> [1] 46659.32 11846.09 32838.00
+#>  (between_SS / total_SS =  85.3 %)
 #> 
 #> Available components:
 #> 
@@ -106,8 +106,8 @@ predict(kmeans_spec_fit, mtcars[1:4, ])
 #>   <fct>        
 #> 1 Cluster_1    
 #> 2 Cluster_1    
-#> 3 Cluster_2    
-#> 4 Cluster_1
+#> 3 Cluster_1    
+#> 4 Cluster_2
 ```
 
 `extract_cluster_assignment()` returns the cluster assignments of the
@@ -120,13 +120,13 @@ extract_cluster_assignment(kmeans_spec_fit)
 #>    <fct>    
 #>  1 Cluster_1
 #>  2 Cluster_1
-#>  3 Cluster_2
-#>  4 Cluster_1
+#>  3 Cluster_1
+#>  4 Cluster_2
 #>  5 Cluster_3
-#>  6 Cluster_1
+#>  6 Cluster_2
 #>  7 Cluster_3
-#>  8 Cluster_2
-#>  9 Cluster_2
+#>  8 Cluster_1
+#>  9 Cluster_1
 #> 10 Cluster_1
 #> # … with 22 more rows
 #> # ℹ Use `print(n = ...)` to see more rows
@@ -139,7 +139,7 @@ extract_centroids(kmeans_spec_fit)
 #> # A tibble: 3 × 12
 #>   .cluster    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
 #>   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-#> 1 Cluster_1  19.7     6  183. 122.   3.59  3.12  18.0 0.571 0.429  3.86  3.43
-#> 2 Cluster_2  26.7     4  105.  82.6  4.07  2.29  19.1 0.909 0.727  4.09  1.55
-#> 3 Cluster_3  15.1     8  353. 209.   3.23  4.00  16.8 0     0.143  3.29  3.5
+#> 1 Cluster_1  24.5  4.62  122.  96.9  4.00  2.52  18.5 0.75  0.688  4.12  2.44
+#> 2 Cluster_2  17.0  7.43  276. 151.   2.99  3.60  18.1 0.286 0      3     2.14
+#> 3 Cluster_3  14.6  8     388. 232.   3.34  4.16  16.4 0     0.222  3.44  4
 ```
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -33,7 +33,7 @@ reference:
     Parameter objects for tuning. Similar to
     [parameter objects from dials package](https://dials.tidymodels.org/reference/index.html#parameter-objects)
   contents:
-    - k
+    - num_clusters
 - title: Model based performance metrics
   desc: >
     These metrics use the fitted clustering model to extract values denoting how

diff --git a/man/augment.Rd b/man/augment.Rd