Skip to content

Commit a60c023

Browse files
Merge pull request #92 from tidymodels/RC0.1.0
Rc0.1.0
2 parents 521132e + ff75197 commit a60c023

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+463
-430
lines changed

.Rbuildignore

+2
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@
1010
^\.github$
1111
^dev$
1212
^vignettes/articles$
13+
^cran-comments\.md$
14+
^CRAN-SUBMISSION$

DESCRIPTION

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
Package: tidyclust
22
Title: A Common API to Clustering
3-
Version: 0.0.0.9000
3+
Version: 0.1.0
44
Authors@R: c(
55
person("Emil", "Hvitfeldt", , "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0002-0679-1945")),
77
person("Kelly", "Bodwin", , "[email protected]", role = "aut"),
8-
person("RStudio", role = c("cph", "fnd"))
8+
person("Posit Software, PBC.", role = c("cph", "fnd"))
99
)
1010
Description: A common interface to specifying clustering models, in the
11-
same style as `parsnip`. Creates unified interface across different
11+
same style as 'parsnip'. Creates unified interface across different
1212
functions and computational engines.
1313
License: MIT + file LICENSE
1414
URL: https://github.com/tidymodels/tidyclust
@@ -45,8 +45,6 @@ Suggests:
4545
rmarkdown,
4646
testthat (>= 3.0.0),
4747
workflows (>= 1.1.2)
48-
VignetteBuilder:
49-
knitr
5048
Config/Needs/website: pkgdown, tidymodels, tidyverse, palmerpenguins,
5149
patchwork, ggforce
5250
Config/testthat/edition: 3

NAMESPACE

-5
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,8 @@ S3method(tune_cluster,workflow)
4949
S3method(update,hier_clust)
5050
S3method(update,k_means)
5151
export("%>%")
52-
export(.convert_form_to_x_fit)
53-
export(.convert_form_to_x_new)
54-
export(.convert_x_to_form_fit)
55-
export(.convert_x_to_form_new)
5652
export(ClusterR_kmeans_fit)
5753
export(augment)
58-
export(check_empty_ellipse_tidyclust)
5954
export(cluster_metric_set)
6055
export(control_cluster)
6156
export(extract_centroids)

R/arguments.R

+8
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ make_form_call <- function(object, env = NULL) {
6565
fit_call
6666
}
6767

68+
#' Change arguments of a cluster specification
69+
#'
70+
#' @inheritParams parsnip::set_args
71+
#' @return An updated `cluster_spec` object.
6872
#' @export
6973
set_args.cluster_spec <- function(object, ...) {
7074
the_dots <- enquos(...)
@@ -90,6 +94,10 @@ set_args.cluster_spec <- function(object, ...) {
9094
)
9195
}
9296

97+
#' Change mode of a cluster specification
98+
#'
99+
#' @inheritParams parsnip::set_mode
100+
#' @return An updated `cluster_spec` object.
93101
#' @export
94102
set_mode.cluster_spec <- function(object, mode) {
95103
cls <- class(object)[1]

R/augment.R

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#' @param new_data A data frame or matrix.
1010
#' @param ... Not currently used.
1111
#' @rdname augment
12+
#' @return A `tibble::tibble()` with containing `new_data` with columns added
13+
#' depending on the mode of the model.
1214
#' @examples
1315
#' kmeans_spec <- k_means(num_clusters = 5) %>%
1416
#' set_engine("stats")

R/cluster_spec.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
#' These functions are helpful when creating new packages that will register new
44
#' cluster specifications.
55
#'
6+
#' @return A `cluster_spec` object made to work with tidyclust.
7+
#'
68
#' @export
79
#' @keywords internal
8-
#' @rdname add_on_exports
910
new_cluster_spec <- function(cls, args, eng_args, mode, method, engine) {
1011
modelenv::check_spec_mode_engine_val(model = cls, mode = mode, eng = engine)
1112

R/convert_data.R

-5
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#' @inheritParams fit.cluster_spec
3131
#' @rdname convert_helpers
3232
#' @keywords internal
33-
#' @export
3433
.convert_form_to_x_fit <- function(formula,
3534
data,
3635
...,
@@ -149,10 +148,8 @@ local_one_hot_contrasts <- function(frame = rlang::caller_env()) {
149148
#' @param weights A numeric vector containing the weights.
150149
#' @inheritParams fit.cluster_spec
151150
#' @inheritParams .convert_form_to_x_fit
152-
#'
153151
#' @rdname convert_helpers
154152
#' @keywords internal
155-
#' @export
156153
.convert_x_to_form_fit <- function(x,
157154
weights = NULL,
158155
remove_intercept = TRUE) {
@@ -210,7 +207,6 @@ make_formula <- function(x, short = TRUE) {
210207
#' @inheritParams predict.cluster_fit
211208
#' @rdname convert_helpers
212209
#' @keywords internal
213-
#' @export
214210
.convert_form_to_x_new <- function(object,
215211
new_data,
216212
na.action = stats::na.pass,
@@ -262,7 +258,6 @@ make_formula <- function(x, short = TRUE) {
262258

263259
#' @rdname convert_helpers
264260
#' @keywords internal
265-
#' @export
266261
.convert_x_to_form_new <- function(object, new_data) {
267262
new_data <- new_data[, object$x_var, drop = FALSE]
268263
if (!is.data.frame(new_data)) {

R/engines.R

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#' Change engine of a cluster specification
2+
#'
3+
#' @inheritParams parsnip::set_engine
4+
#' @return An updated `cluster_spec` object.
15
#' @export
26
set_engine.cluster_spec <- function(object, engine, ...) {
37
mod_type <- class(object)[1]

R/extract.R

+47
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,50 @@
1+
#' Extract elements of a tidyclust model object
2+
#'
3+
#' @description
4+
#' These functions extract various elements from a clustering object. If they do
5+
#' not exist yet, an error is thrown.
6+
#'
7+
#' - `extract_fit_engine()` returns the engine specific fit embedded within
8+
#' a tidyclust model fit. For example, when using [tidyclust::k_means()]
9+
#' with the `"lm"` engine, this returns the underlying `kmeans` object.
10+
#'
11+
#' - `extract_parameter_set_dials()` returns a set of dials parameter objects.
12+
#'
13+
#' @param x A `cluster_fit` object or a `cluster_spec` object.
14+
#' @param ... Not currently used.
15+
#' @details
16+
#' Extracting the underlying engine fit can be helpful for describing the
17+
#' model (via `print()`, `summary()`, `plot()`, etc.) or for variable
18+
#' importance/explainers.
19+
#'
20+
#' However, users should not invoke the `predict()` method on an extracted
21+
#' model. There may be preprocessing operations that `tidyclust` has executed
22+
#' on the data prior to giving it to the model. Bypassing these can lead to
23+
#' errors or silently generating incorrect predictions.
24+
#'
25+
#' **Good**:
26+
#' ```r
27+
#' tidyclust_fit %>% predict(new_data)
28+
#' ```
29+
#'
30+
#' **Bad**:
31+
#' ```r
32+
#' tidyclust_fit %>% extract_fit_engine() %>% predict(new_data)
33+
#' ```
34+
#' @return
35+
#' The extracted value from the tidyclust object, `x`, as described in the
36+
#' description section.
37+
#'
38+
#' @name extract-tidyclust
39+
#' @examples
40+
#' kmeans_spec <- k_means(num_clusters = 2)
41+
#' kmeans_fit <- fit(kmeans_spec, ~ ., data = mtcars)
42+
#'
43+
#' extract_fit_engine(kmeans_fit)
44+
NULL
45+
46+
47+
#' @rdname extract-tidyclust
148
#' @export
249
extract_fit_engine.cluster_fit <- function (x, ...) {
350
if (any(names(x) == "fit")) {

R/extract_assignment.R

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#' @param object An cluster_spec object.
44
#' @param ... Other arguments passed to methods.
55
#'
6+
#' @return A `tibble::tibble()` with 1 column `.cluster`.
7+
#'
68
#' @examples
79
#' kmeans_spec <- k_means(num_clusters = 5) %>%
810
#' set_engine("stats")

R/extract_characterization.R

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#' @param object An cluster_spec object.
44
#' @param ... Other arguments passed to methods.
55
#'
6+
#' @return A `tibble::tibble()` with 1 row for each centroid and their position.
7+
#'
68
#' @examples
79
#' set.seed(1234)
810
#' kmeans_spec <- k_means(num_clusters = 5) %>%

R/extract_parameter_set_dials.R

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#' @rdname extract-tidyclust
12
#' @export
23
extract_parameter_set_dials.cluster_spec <- function(x, ...) {
34
all_args <- generics::tunable(x)

R/fit.R

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
#' [hardhat::frequency_weights()] and [hardhat::importance_weights()] for
8080
#' examples.
8181
#' @rdname fit
82+
#' @return A fitted `cluster_fit` object.
8283
#' @export
8384
#' @export fit.cluster_spec
8485
fit.cluster_spec <- function(object,

R/hier_clust.R

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#' `"complete"`, `"average"` (= UPGMA), `"mcquitty"` (= WPGMA), `"median"` (=
1919
#' WPGMC) or `"centroid"` (= UPGMC).
2020
#'
21+
#' @return A `hier_clust` cluster specification.
22+
#'
2123
#' @examples
2224
#' # Show all engines
2325
#' modelenv::get_from_env("hier_clust")

R/k_means.R

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#' model is `"stats"`.
1313
#' @param num_clusters Positive integer, number of clusters in model.
1414
#'
15+
#' @return A `k_means` cluster specification.
16+
#'
1517
#' @examples
1618
#' # Show all engines
1719
#' modelenv::get_from_env("k_means")

R/load_ns.R

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
#' Quietly load package namespace
2+
#'
3+
#' For one or more packages, load the namespace. This is used during parallel
4+
#' processing since the different parallel backends handle the package
5+
#' environments differently.
6+
#' @param x A character vector of packages.
7+
#' @param infra Should base tidymodels packages be loaded as well?
8+
#' @return An invisible NULL.
9+
#' @keywords internal
110
#' @export
211
load_pkgs.cluster_spec <- function(x, infra = TRUE, ...) {
312
pkgs <- required_pkgs(x)

R/metric-aaa.R

+7-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#'
88
#' @param fn A function.
99
#'
10+
#' @return A `cluster_metric` object.
11+
#'
1012
#' @param direction A string. One of:
1113
#' - `"maximize"`
1214
#' - `"minimize"`
@@ -34,13 +36,16 @@ new_cluster_metric <- function(fn, direction) {
3436

3537
#' Combine metric functions
3638
#'
37-
#' `metric_set()` allows you to combine multiple metric functions together into
38-
#' a new function that calculates all of them at once.
39+
#' `cluster_metric_set()` allows you to combine multiple metric functions
40+
#' together into a new function that calculates all of them at once.
3941
#'
4042
#' @param ... The bare names of the functions to be included in the metric set.
4143
#' These functions must be cluster metrics such as [sse_total()],
4244
#' [sse_ratio()], or [silhouette_avg()].
4345
#'
46+
#' @return A `cluster_metric_set()` object, combining the use of all input
47+
#' metrics.
48+
#'
4449
#' @details All functions must be:
4550
#' - Only cluster metrics
4651
#' @export

R/metric-sse.R

+6
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ sse_within <- function(object, new_data = NULL,
6969
#' @details Not to be confused with [sse_within()] that returns a tibble
7070
#' with within-cluster SSE, one row for each cluster.
7171
#'
72+
#' @return A tibble with 3 columns; `.metric`, `.estimator`, and `.estimate`.
73+
#'
7274
#' @family cluster metric
7375
#'
7476
#' @examples
@@ -131,6 +133,8 @@ sse_within_total_impl <- function(object, new_data = NULL,
131133
#' to Euclidean distance on processed data.
132134
#' @param ... Other arguments passed to methods.
133135
#'
136+
#' @return A tibble with 3 columns; `.metric`, `.estimator`, and `.estimate`.
137+
#'
134138
#' @family cluster metric
135139
#'
136140
#' @examples
@@ -209,6 +213,8 @@ sse_total_impl <- function(object,
209213
#' to Euclidean distance on processed data.
210214
#' @param ... Other arguments passed to methods.
211215
#'
216+
#' @return A tibble with 3 columns; `.metric`, `.estimator`, and `.estimate`.
217+
#'
212218
#' @family cluster metric
213219
#'
214220
#' @examples

R/predict_cluster.R

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#'
33
#' These are internal functions not meant to be directly called by the user.
44
#'
5+
#' @return A `tibble::tibble()`.
6+
#'
57
#' @keywords internal
68
#' @rdname other_predict
79
#' @inheritParams predict_cluster.cluster_fit
@@ -11,6 +13,7 @@ predict_cluster <- function(object, ...) {
1113
}
1214

1315
#' @keywords internal
16+
#' @return A `tibble::tibble()`.
1417
#' @rdname other_predict
1518
#' @inheritParams predict.cluster_fit
1619
#' @method predict_cluster cluster_fit

R/predict_helpers.R

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ stats_kmeans_predict <- function(object, new_data, prefix = "Cluster_") {
77
}
88

99
clusterR_kmeans_predict <- function(object, new_data, prefix = "Cluster_") {
10-
res <- object$centroids[unique(object$clusters), , drop = FALSE]
11-
res <- flexclust::dist2(res, new_data)
12-
res <- apply(res, 2, which.min)
10+
res <- predict(object, new_data)
1311
res <- paste0(prefix, res)
1412
factor(res)
1513
}

R/symbol.R

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ is_windows <- function() {
8888
#' Get colors for tidyclust text.
8989
#'
9090
#' @keywords internal
91+
#' @return a list of `cli` functions.
9192
#' @export
9293
#' @rdname empty_ellipses
9394
get_tidyclust_colors <- function() tidyclust_color

R/translate.R

+3-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#'
33
#' `translate_tidyclust()` will translate_tidyclust a model specification into a
44
#' code object that is specific to a particular engine (e.g. R package). It
5-
#' translate_tidyclusts generic parameters to their counterparts.
5+
#' translate tidyclust generic parameters to their counterparts.
66
#'
77
#' @param x A model specification.
88
#' @param engine The computational engine for the model (see `?set_engine`).
@@ -25,6 +25,8 @@
2525
#' to understand what the underlying syntax would be. It should not be used to
2626
#' modify the cluster specification.
2727
#'
28+
#' @return Prints translated code.
29+
#'
2830
#' @export
2931
translate_tidyclust <- function(x, ...) {
3032
UseMethod("translate_tidyclust")
@@ -142,11 +144,6 @@ deharmonize <- function(args, key) {
142144
args[!is.na(merged$original)]
143145
}
144146

145-
#' Check to ensure that ellipses are empty
146-
#' @param ... Extra arguments.
147-
#' @return If an error is not thrown (from non-empty ellipses), a NULL list.
148-
#' @keywords internal
149-
#' @export
150147
check_empty_ellipse_tidyclust <- function(...) {
151148
terms <- quos(...)
152149
if (!rlang::is_empty(terms)) {

R/tune_cluster.R

+1-10
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#' grid <- tibble(num_clusters = 1:3)
4444
#'
4545
#' set.seed(4400)
46-
#' folds <- vfold_cv(mtcars)
46+
#' folds <- vfold_cv(mtcars, v = 2)
4747
#'
4848
#' res <- tune_cluster(
4949
#' wflow,
@@ -326,15 +326,6 @@ tune_cluster_loop_iter <- function(split,
326326
load_pkgs(workflow)
327327
load_namespace(control$pkgs)
328328

329-
# After package loading to avoid potential package RNG manipulation
330-
if (!is.null(seed)) {
331-
# `assign()`-ing the random seed alters the `kind` type to L'Ecuyer-CMRG,
332-
# so we have to ensure it is restored on exit
333-
old_kind <- RNGkind()[[1]]
334-
assign(".Random.seed", seed, envir = globalenv())
335-
on.exit(RNGkind(kind = old_kind), add = TRUE)
336-
}
337-
338329
control_parsnip <- parsnip::control_parsnip(verbosity = 0, catch = TRUE)
339330
control_workflow <- workflows::control_workflow(control_parsnip)
340331

0 commit comments

Comments
 (0)