Skip to content

Commit

Permalink
Merge pull request #59 from EmilHvitfeldt/no-more-k
Browse files Browse the repository at this point in the history
Finally change k -> num_clusters
  • Loading branch information
EmilHvitfeldt authored Jul 19, 2022
2 parents d4b77b2 + b999403 commit cea4c44
Show file tree
Hide file tree
Showing 48 changed files with 149 additions and 150 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ export(get_from_env_tidyclust)
export(get_model_env_tidyclust)
export(get_pred_type_tidyclust)
export(glance)
export(k)
export(k_means)
export(load_pkgs)
export(make_classes_tidyclust)
export(min_grid)
export(new_cluster_metric)
export(new_cluster_spec)
export(num_clusters)
export(predict.cluster_fit)
export(predict_cluster)
export(predict_cluster.cluster_fit)
Expand Down
2 changes: 1 addition & 1 deletion R/augment.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @rdname augment
#' @export
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
6 changes: 3 additions & 3 deletions R/dials.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
#'
#' @inheritParams dials::Laplace
#' @examples
#' k()
#' num_clusters()
#' @export
k <- function(range = c(1L, 10L), trans = NULL) {
num_clusters <- function(range = c(1L, 10L), trans = NULL) {
dials::new_quant_param(
type = "integer",
range = range,
inclusive = c(TRUE, TRUE),
trans = trans,
label = c(k = "# Clusters"),
label = c(num_clusters = "# Clusters"),
finalize = NULL
)
}
2 changes: 1 addition & 1 deletion R/extract_assignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @param ... Other arguments passed to methods.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
2 changes: 1 addition & 1 deletion R/extract_characterization.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#'
#' @examples
#' set.seed(1234)
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
2 changes: 1 addition & 1 deletion R/extract_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @return A list with various summary elements
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
4 changes: 2 additions & 2 deletions R/finalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#' @return An updated version of `x`.
#' @export
#' @examples
#' kmeans_spec <- k_means(k = tune())
#' kmeans_spec <- k_means(num_clusters = tune())
#'
#' best_params <- data.frame(k = 5)
#' best_params <- data.frame(num_clusters = 5)
#' best_params
#'
#' kmeans_spec
Expand Down
2 changes: 1 addition & 1 deletion R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#' @examples
#' library(dplyr)
#'
#' kmeans_mod <- k_means(k = 5)
#' kmeans_mod <- k_means(num_clusters = 5)
#'
#' using_formula <-
#' kmeans_mod %>%
Expand Down
12 changes: 6 additions & 6 deletions R/k_means.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @param engine A single character string specifying what computational engine
#' to use for fitting. Possible engines are listed below. The default for this
#' model is `"stats"`.
#' @param k Positive integer, number of clusters in model.
#' @param num_clusters Positive integer, number of clusters in model.
#'
#' @examples
#' # show_engines("k_means")
Expand All @@ -20,9 +20,9 @@
k_means <-
function(mode = "partition",
engine = "stats",
k = NULL) {
num_clusters = NULL) {
args <- list(
k = enquo(k)
num_clusters = enquo(num_clusters)
)

new_cluster_spec(
Expand Down Expand Up @@ -61,7 +61,7 @@ translate_tidyclust.k_means <- function(x, engine = x$engine, ...) {
#' @export
update.k_means <- function(object,
parameters = NULL,
k = NULL,
num_clusters = NULL,
fresh = FALSE, ...) {

eng_args <- parsnip::update_engine_parameters(object$eng_args, ...)
Expand All @@ -70,7 +70,7 @@ update.k_means <- function(object,
parameters <- parsnip::check_final_param(parameters)
}
args <- list(
k = enquo(k)
num_clusters = enquo(num_clusters)
)

args <- parsnip::update_main_parameters(args, parameters)
Expand Down Expand Up @@ -104,7 +104,7 @@ check_args.k_means <- function(object) {

args <- lapply(object$args, rlang::eval_tidy)

if (all(is.numeric(args$k)) && any(args$k < 0))
if (all(is.numeric(args$num_clusters)) && any(args$num_clusters < 0))
rlang::abort("The number of centers should be >= 0.")

invisible(object)
Expand Down
8 changes: 4 additions & 4 deletions R/k_means_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ set_encoding_tidyclust(
set_model_arg_tidyclust(
model = "k_means",
eng = "stats",
tidyclust = "k",
tidyclust = "num_clusters",
original = "centers",
func = list(pkg = "tidyclust", fun = "k"),
func = list(pkg = "tidyclust", fun = "num_clusters"),
has_submodel = TRUE
)

Expand Down Expand Up @@ -90,9 +90,9 @@ set_encoding_tidyclust(
set_model_arg_tidyclust(
model = "k_means",
eng = "ClusterR",
tidyclust = "k",
tidyclust = "num_clusters",
original = "clusters",
func = list(pkg = "tidyclust", fun = "k"),
func = list(pkg = "tidyclust", fun = "num_clusters"),
has_submodel = TRUE
)

Expand Down
4 changes: 2 additions & 2 deletions R/metric-silhouette.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @return A tibble giving the silhouettes for each observation.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down Expand Up @@ -50,7 +50,7 @@ silhouettes <- function(object, new_data = NULL, dists = NULL,
#' @return A double; the average silhouette.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
8 changes: 4 additions & 4 deletions R/metric-sse.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' cluster.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down Expand Up @@ -66,7 +66,7 @@ within_cluster_sse <- function(object, new_data = NULL,
#'
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down Expand Up @@ -130,7 +130,7 @@ tot_wss_impl <- function(object, new_data = NULL,
#'
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down Expand Up @@ -206,7 +206,7 @@ tot_sse_impl <- function(object, new_data = NULL, dist_fun = Rfast::dista, ...)
#' @param ... Other arguments passed to methods.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
2 changes: 1 addition & 1 deletion R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#' multivariate models.
#'
#' @examples
#' kmeans_spec <- k_means(k = 5) %>%
#' kmeans_spec <- k_means(num_clusters = 5) %>%
#' set_engine("stats")
#'
#' kmeans_fit <- fit(kmeans_spec, ~., mtcars)
Expand Down
2 changes: 1 addition & 1 deletion R/tunable.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ stats_k_means_engine_args <-
"centers"
),
call_info = list(
list(pkg = "tidyclust", fun = "k")
list(pkg = "tidyclust", fun = "num_clusters")
),
source = "cluster_spec",
component = "k_means",
Expand Down
8 changes: 4 additions & 4 deletions R/update.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#' @return An updated cluster specification.
#' @name tidyclust_update
#' @examples
#' kmeans_spec <- k_means(k = 5)
#' kmeans_spec <- k_means(num_clusters = 5)
#' kmeans_spec
#' update(kmeans_spec, k = 1)
#' update(kmeans_spec, k = 1, fresh = TRUE)
#' update(kmeans_spec, num_clusters = 1)
#' update(kmeans_spec, num_clusters = 1, fresh = TRUE)
#'
#' param_values <- tibble::tibble(k = 10)
#' param_values <- tibble::tibble(num_clusters = 10)
#'
#' kmeans_spec %>% update(param_values)
NULL
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ The first thing you do is to create a `cluster specification`. For this example
```{r}
library(tidyclust)
kmeans_spec <- k_means(k = 3) %>%
kmeans_spec <- k_means(num_clusters = 3) %>%
set_engine("stats")
kmeans_spec
Expand Down
60 changes: 30 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ example we are creating a K-means model, using the `stats` engine.
``` r
library(tidyclust)

kmeans_spec <- k_means(k = 3) %>%
kmeans_spec <- k_means(num_clusters = 3) %>%
set_engine("stats")

kmeans_spec
#> K Means Cluster Specification (partition)
#>
#> Main Arguments:
#> k = 3
#> num_clusters = 3
#>
#> Computational engine: stats
```
Expand All @@ -56,39 +56,39 @@ kmeans_spec_fit <- kmeans_spec %>%
kmeans_spec_fit
#> tidyclust cluster object
#>
#> K-means clustering with 3 clusters of sizes 11, 7, 14
#> K-means clustering with 3 clusters of sizes 9, 7, 16
#>
#> Cluster means:
#> mpg cyl disp hp drat wt qsec vs
#> 1 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909
#> 2 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286
#> 3 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000
#> mpg cyl disp hp drat wt qsec vs
#> 1 14.64444 8.000000 388.2222 232.1111 3.343333 4.161556 16.40444 0.0000000
#> 2 17.01429 7.428571 276.0571 150.7143 2.994286 3.601429 18.11857 0.2857143
#> 3 24.50000 4.625000 122.2937 96.8750 4.002500 2.518000 18.54312 0.7500000
#> am gear carb
#> 1 0.7272727 4.090909 1.545455
#> 2 0.4285714 3.857143 3.428571
#> 3 0.1428571 3.285714 3.500000
#> 1 0.2222222 3.444444 4.000000
#> 2 0.0000000 3.000000 2.142857
#> 3 0.6875000 4.125000 2.437500
#>
#> Clustering vector:
#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
#> 2 2 1 2
#> 3 3 3 2
#> Hornet Sportabout Valiant Duster 360 Merc 240D
#> 3 2 3 1
#> 1 2 1 3
#> Merc 230 Merc 280 Merc 280C Merc 450SE
#> 1 2 2 3
#> 3 3 3 2
#> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
#> 3 3 3 3
#> 2 2 1 1
#> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
#> 3 1 1 1
#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
#> 1 3 3 3
#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
#> 3 2 2 1
#> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
#> 3 1 1 1
#> 1 3 3 3
#> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
#> 3 2 3 1
#> 1 3 1 3
#>
#> Within cluster sum of squares by cluster:
#> [1] 11848.37 13954.34 93643.90
#> (between_SS / total_SS = 80.8 %)
#> [1] 46659.32 11846.09 32838.00
#> (between_SS / total_SS = 85.3 %)
#>
#> Available components:
#>
Expand All @@ -106,8 +106,8 @@ predict(kmeans_spec_fit, mtcars[1:4, ])
#> <fct>
#> 1 Cluster_1
#> 2 Cluster_1
#> 3 Cluster_2
#> 4 Cluster_1
#> 3 Cluster_1
#> 4 Cluster_2
```

`extract_cluster_assignment()` returns the cluster assignments of the
Expand All @@ -120,13 +120,13 @@ extract_cluster_assignment(kmeans_spec_fit)
#> <fct>
#> 1 Cluster_1
#> 2 Cluster_1
#> 3 Cluster_2
#> 4 Cluster_1
#> 3 Cluster_1
#> 4 Cluster_2
#> 5 Cluster_3
#> 6 Cluster_1
#> 6 Cluster_2
#> 7 Cluster_3
#> 8 Cluster_2
#> 9 Cluster_2
#> 8 Cluster_1
#> 9 Cluster_1
#> 10 Cluster_1
#> # … with 22 more rows
#> # ℹ Use `print(n = ...)` to see more rows
Expand All @@ -139,7 +139,7 @@ extract_centroids(kmeans_spec_fit)
#> # A tibble: 3 × 12
#> .cluster mpg cyl disp hp drat wt qsec vs am gear carb
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Cluster_1 19.7 6 183. 122. 3.59 3.12 18.0 0.571 0.429 3.86 3.43
#> 2 Cluster_2 26.7 4 105. 82.6 4.07 2.29 19.1 0.909 0.727 4.09 1.55
#> 3 Cluster_3 15.1 8 353. 209. 3.23 4.00 16.8 0 0.143 3.29 3.5
#> 1 Cluster_1 24.5 4.62 122. 96.9 4.00 2.52 18.5 0.75 0.688 4.12 2.44
#> 2 Cluster_2 17.0 7.43 276. 151. 2.99 3.60 18.1 0.286 0 3 2.14
#> 3 Cluster_3 14.6 8 388. 232. 3.34 4.16 16.4 0 0.222 3.44 4
```
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ reference:
Parameter objects for tuning. Similar to
[parameter objects from dials package](https://dials.tidymodels.org/reference/index.html#parameter-objects)
contents:
- k
- num_clusters
- title: Model based performance metrics
desc: >
These metrics use the fitted clustering model to extract values denoting how
Expand Down
2 changes: 1 addition & 1 deletion man/augment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cea4c44

Please sign in to comment.