Skip to content

Commit ca02908

Browse files
Merge pull request #165 from tidymodels/engine-documentation
2 parents 6598746 + 6dc5388 commit ca02908

38 files changed

+1423
-4
lines changed

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ export(.k_means_fit_stats)
7171
export(augment)
7272
export(cluster_metric_set)
7373
export(control_cluster)
74+
export(cut_height)
7475
export(extract_centroids)
7576
export(extract_cluster_assignment)
7677
export(extract_fit_engine)
@@ -89,6 +90,9 @@ export(get_tidyclust_colors)
8990
export(glance)
9091
export(hier_clust)
9192
export(k_means)
93+
export(knit_engine_docs)
94+
export(linkage_method)
95+
export(list_md_problems)
9296
export(load_pkgs)
9397
export(make_classes_tidyclust)
9498
export(min_grid)
@@ -119,6 +123,7 @@ export(translate_tidyclust)
119123
export(translate_tidyclust.default)
120124
export(tune)
121125
export(tune_cluster)
126+
export(values_linkage_method)
122127
importFrom(dplyr,"%>%")
123128
importFrom(dplyr,bind_cols)
124129
importFrom(generics,augment)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
* Cluster reordering is now done at the fitting time, not the extraction and prediction time. (#154)
2222

23+
* Engine specific documentation has been added for all models and engines. (#159)
24+
2325
# tidyclust 0.1.2
2426

2527
* The cluster specification methods for `generics::tune_args()` and `generics::tunable()` are now registered unconditionally (#115).

R/aaa.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ utils::globalVariables(
1313
)
1414
)
1515

16+
17+
release_bullets <- function() {
18+
c(
19+
"Run `knit_engine_docs()` and `devtools::document()` to update docs"
20+
)
21+
}
22+
1623
# nocov end
1724

1825
# ------------------------------------------------------------------------------

R/dials-params.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#' Cut Height
2+
#'
3+
#' Used in most `tidyclust::hier_clust()` models.
4+
#'
5+
#' @inheritParams dials::Laplace
6+
#' @examples
7+
#' cut_height()
8+
#' @export
9+
cut_height <- function(range = c(0, dials::unknown()), trans = NULL) {
10+
dials::new_quant_param(
11+
type = "integer",
12+
range = range,
13+
inclusive = c(TRUE, TRUE),
14+
trans = trans,
15+
label = c(cut_height = "Cut Height"),
16+
finalize = NULL
17+
)
18+
}
19+
20+
#' The agglomeration Linkage method
21+
#'
22+
#' @param values A character string of possible values. See `linkage_methods`
23+
#' in examples below.
24+
#'
25+
#' @details
26+
#' This parameter is used in `tidyclust` models for `hier_clust()`.
27+
#' @examples
28+
#' values_linkage_method
29+
#' linkage_method()
30+
#' @export
31+
linkage_method <- function(values = values_linkage_method) {
32+
dials::new_qual_param(
33+
type = "character",
34+
values = values,
35+
label = c(activation = "Linkage Method"),
36+
finalize = NULL
37+
)
38+
}
39+
40+
#' @rdname linkage_method
41+
#' @export
42+
values_linkage_method <- c(
43+
"ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median",
44+
"centroid"
45+
)

R/engine_docs.R

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# https://github.com/tidymodels/parsnip/blob/main/R/engine_docs.R
2+
3+
#' Knit engine-specific documentation
4+
#' @param pattern A regular expression to specify which files to knit. The
5+
#' default knits all engine documentation files.
6+
#' @param ... Options passed to [knitr::knit()].
7+
#' @return A tibble with column `file` for the file name and `result` (a
8+
#' character vector that echos the output file name or, when there is
9+
#' a failure, the error message).
10+
#' @keywords internal
11+
#' @export
12+
knit_engine_docs <- function(pattern = NULL) {
13+
rmd_files <- list.files("man/rmd", pattern = "\\.Rmd", full.names = TRUE)
14+
15+
if (!is.null(pattern)) {
16+
target_exists <- grepl(pattern, rmd_files)
17+
files <- rmd_files[target_exists]
18+
} else {
19+
files <- rmd_files[!grepl("(template-)|(setup\\.)|(aaa\\.)", rmd_files)]
20+
}
21+
outputs <- gsub("Rmd$", "md", files)
22+
23+
res <- map2(files, outputs, ~ try(knitr::knit(.x, .y), silent = TRUE))
24+
is_error <- map_lgl(res, ~ inherits(.x, "try-error"))
25+
26+
if (any(is_error)) {
27+
# In some cases where there are issues, the md file is empty.
28+
errors <- res[which(is_error)]
29+
error_nms <- basename(files)[which(is_error)]
30+
errors <-
31+
map_chr(errors, ~ cli::ansi_strip(as.character(.x))) %>%
32+
map2_chr(error_nms, ~ paste0(.y, ": ", .x)) %>%
33+
map_chr(~ gsub("Error in .f(.x[[i]], ...) :", "", .x, fixed = TRUE))
34+
cat("There were failures duing knitting:\n\n")
35+
cat(errors)
36+
cat("\n\n")
37+
}
38+
39+
res <- map_chr(res, as.character)
40+
41+
issues <- list_md_problems()
42+
if (nrow(issues) > 0) {
43+
cat("There are some issues with the help files:\n")
44+
print(issues)
45+
}
46+
47+
invisible(tibble::tibble(file = basename(files), result = res))
48+
}
49+
50+
#' Locate and show errors/warnings in engine-specific documentation
51+
#' @return A tibble with column `file` for the file name, `line` indicating
52+
#' the line where the error/warning occurred, and `problem` showing the
53+
#' error/warning message.
54+
#' @keywords internal
55+
#' @export
56+
list_md_problems <- function() {
57+
md_files <- list.files("man/rmd", pattern = "\\.md", full.names = TRUE)
58+
59+
get_errors <- function(file) {
60+
lines <- readLines(file)
61+
line <- grep("## (Error|Warning)", lines)
62+
problem <- lines[line]
63+
tibble::tibble(basename(file), line, problem)
64+
}
65+
66+
map(md_files, get_errors) %>% vctrs::vec_rbind()
67+
}

R/hier_clust.R

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
#' `hier_clust()` defines a model that fits clusters based on a distance-based
66
#' dendrogram
77
#'
8+
#' There are different ways to fit this model, and the method of estimation is
9+
#' chosen by setting the model engine. The engine-specific pages for this model
10+
#' are listed below.
11+
#'
12+
#' - \link[=details_hier_clust_stats]{stats}
13+
#'
814
#' @param mode A single character string for the type of model. The only
915
#' possible value for this model is "partition".
1016
#' @param engine A single character string specifying what computational engine
@@ -23,7 +29,8 @@
2329
#' ## What does it mean to predict?
2430
#'
2531
#' To predict the cluster assignment for a new observation, we find the closest
26-
#' cluster. How we measure “closeness” is dependent on the specified type of linkage in the model:
32+
#' cluster. How we measure “closeness” is dependent on the specified type of
33+
#' linkage in the model:
2734
#'
2835
#' - *single linkage*: The new observation is assigned to the same cluster as
2936
#' its nearest observation from the training data.

R/hier_clust_stats.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#' Hierarchical (Agglomerative) Clustering via stats
2+
#'
3+
#' [hier_clust()] creates Hierarchical (Agglomerative) Clustering model.
4+
#'
5+
#' @includeRmd man/rmd/hier_clust_stats.md details
6+
#'
7+
#' @name details_hier_clust_stats
8+
#' @keywords internal
9+
NULL
10+
11+
# See inst/README-DOCS.md for a description of how these files are processed

R/k_means.R

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,16 @@
44
#'
55
#' `k_means()` defines a model that fits clusters based on distances to a number
66
#' of centers. This definition doesn't just include K-means, but includes
7-
#' models like K-prototypes
7+
#' models like K-prototypes.
8+
#'
9+
#' There are different ways to fit this model, and the method of estimation is
10+
#' chosen by setting the model engine. The engine-specific pages for this model
11+
#' are listed below.
12+
#'
13+
#' - \link[=details_k_means_stats]{stats}: Classical K-means
14+
#' - \link[=details_k_means_ClusterR]{ClusterR}: Classical K-means
15+
#' - \link[=details_k_means_klaR]{klaR}: K-Modes
16+
#' - \link[=details_k_means_clustMixType]{clustMixType}: K-prototypes
817
#'
918
#' @param mode A single character string for the type of model. The only
1019
#' possible value for this model is "partition".

R/k_means_ClusterR.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#' K-means via ClusterR
2+
#'
3+
#' [k_means()] creates K-means model. This engine uses the classical definition
4+
#' of a K-means model, which only takes numeric predictors.
5+
#'
6+
#' @includeRmd man/rmd/k_means_ClusterR.md details
7+
#'
8+
#' @name details_k_means_ClusterR
9+
#' @keywords internal
10+
NULL
11+
12+
# See inst/README-DOCS.md for a description of how these files are processed

R/k_means_clustMixType.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#' K-means via clustMixType
2+
#'
3+
#' [k_means()] creates K-prototypes model. A K-prototypes is the middle ground
4+
#' between a K-means and K-modes model, in the sense that it can be used with
5+
#' data that contains both numeric and categorical predictors.
6+
#'
7+
#' Both numeric and categorical predictors are requires for this engine.
8+
#'
9+
#' @includeRmd man/rmd/k_means_clustMixType.md details
10+
#'
11+
#' @name details_k_means_clustMixType
12+
#' @keywords internal
13+
NULL
14+
15+
# See inst/README-DOCS.md for a description of how these files are processed

0 commit comments

Comments
 (0)