diff --git a/dev/articles/hier_clust.html b/dev/articles/hier_clust.html
index b19db8d..4f2bdcb 100644
--- a/dev/articles/hier_clust.html
+++ b/dev/articles/hier_clust.html
@@ -117,7 +117,7 @@
Value
diff --git a/dev/search.json b/dev/search.json
index 0e2ae08..eaa6777 100644
--- a/dev/search.json
+++ b/dev/search.json
@@ -1 +1 @@
-[{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 tidyclust authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Hierarchical Clustering","text":"Load libraries: Load clean dataset: yet read k_means vignette, recommend reading first; functions used vignette explained detail .","code":"library(workflows) library(parsnip) library(tidyclust) #> #> Attaching package: 'tidyclust' #> The following objects are masked from 'package:parsnip': #> #> knit_engine_docs, list_md_problems library(tidyverse) #> ── Attaching core tidyverse packages ────────────────── tidyverse 2.0.0 ── #> ✔ dplyr 1.1.4 ✔ readr 2.1.5 #> ✔ forcats 1.0.0 ✔ stringr 1.5.1 #> ✔ ggplot2 3.5.1 ✔ tibble 3.2.1 #> ✔ lubridate 1.9.4 ✔ tidyr 1.3.1 #> ✔ purrr 1.0.2 #> ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ── #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ℹ Use the conflicted package (
) to force all conflicts to become errors library(tidymodels) #> ── Attaching packages ──────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ rsample 1.2.1 #> ✔ dials 1.3.0 ✔ tune 1.2.1 #> ✔ infer 1.0.7 ✔ workflowsets 1.1.0 #> ✔ modeldata 1.4.0 ✔ yardstick 1.3.2 #> ✔ recipes 1.1.0 #> ── Conflicts ─────────────────────────────────── tidymodels_conflicts() ── #> ✖ scales::discard() masks purrr::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ recipes::fixed() masks stringr::fixed() #> ✖ tidyclust::knit_engine_docs() masks parsnip::knit_engine_docs() #> ✖ dplyr::lag() masks stats::lag() #> ✖ tidyclust::list_md_problems() masks parsnip::list_md_problems() #> ✖ yardstick::spec() masks readr::spec() #> ✖ recipes::step() masks stats::step() #> • Use suppressPackageStartupMessages() to eliminate package startup messages set.seed(838383) data(\"penguins\", package = \"modeldata\") penguins <- penguins %>% select(bill_length_mm, bill_depth_mm) %>% drop_na() # shuffle rows penguins <- penguins %>% sample_n(nrow(penguins))"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"a-brief-introduction-to-hierarchical-clustering","dir":"Articles","previous_headings":"","what":"A brief introduction to hierarchical clustering","title":"Hierarchical Clustering","text":"Hierarchical Clustering, sometimes called Agglomerative Clustering, method unsupervised learning produces dendrogram, can used partition observations clusters. hierarchical clustering process begins observation ’s cluster; .e., n clusters n observations. closest two observations joined together single cluster. process continues, closest two clusters joined (“aggolermated”) step. result process dendrogram, shows joining clusters tree form:","code":"#> Warning in dist(fake_dat): NAs introduced by coercion"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"clusters-from-dendrogram","dir":"Articles","previous_headings":"A brief introduction to hierarchical clustering","what":"Clusters from dendrogram","title":"Hierarchical Clustering","text":"produce partition-style cluster assignment dendrogram, one must “cut” tree chosen height: observations remain joined dendrogram cut height considered cluster together:","code":"#> # A tibble: 5 × 2 #> observation cluster_assignment #>
#> 1 a 1 #> 2 b 2 #> 3 c 2 #> 4 d 3 #> 5 e 3"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"methods-of-aggolmeration","dir":"Articles","previous_headings":"A brief introduction to hierarchical clustering","what":"Methods of aggolmeration","title":"Hierarchical Clustering","text":"every step agglomeration, measure distances current clusters. cluster containing (possibly) multiple points, mean measure distance? four common approaches cluster-cluster distancing, aka “linkage”: single linkage: distance two clusters distance two closest observations. average linkage: distance two clusters average distances observations one cluster observations . complete linkage: distance two clusters distance two furthest observations. centroid method: distance two clusters distance centroids (geometric mean median). Ward’s method: distance two clusters proportional increase error sum squares (ESS) result joining . ESS computed sum squared distances observations cluster, centroid cluster. also worth mentioning McQuitty method, retains information previously joined clusters measure future linkage distance. method currently supported model fitting, prediction, tidyclust.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"hier_clust-specification-in-tidyclust","dir":"Articles","previous_headings":"","what":"hier_clust specification in {tidyclust}","title":"Hierarchical Clustering","text":"specify hierarchical clustering model tidyclust, simply choose value num_clusters (optionally) linkage method: Currently, supported engine stats::hclust(). default linkage","code":"hc_spec <- hier_clust( num_clusters = 3, linkage_method = \"average\" ) hc_spec #> Hierarchical Clustering Specification (partition) #> #> Main Arguments: #> num_clusters = 3 #> linkage_method = average #> #> Computational engine: stats"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"fitting-hier_clust-models","dir":"Articles","previous_headings":"","what":"Fitting hier_clust models","title":"Hierarchical Clustering","text":"fit model data usual way: produce dendrogram plot, access engine fit: (Although see , dendrograms often informative moderate large size datasets.) can also extract standard tidyclust summary list: Note , although hierarchical clustering algorithm focused cluster centroids way kk-means , still able compute geometric mean predictors cluster:","code":"hc_fit <- hc_spec %>% fit(~ bill_length_mm + bill_depth_mm, data = penguins ) hc_fit %>% summary() #> Length Class Mode #> spec 4 hier_clust list #> fit 7 hclust list #> elapsed 1 -none- list #> preproc 4 -none- list hc_fit$fit %>% plot() hc_summary <- hc_fit %>% extract_fit_summary() hc_summary %>% str() #> List of 7 #> $ cluster_names : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 2 3 #> $ centroids : tibble [3 × 2] (S3: tbl_df/tbl/data.frame) #> ..$ bill_length_mm: num [1:3] 38.8 47.9 56.6 #> ..$ bill_depth_mm : num [1:3] 18.3 16.2 16.7 #> $ n_members : int [1:3] 153 184 5 #> $ sse_within_total_total: num [1:3] 378.4 573.9 9.7 #> $ sse_total : num 1803 #> $ orig_labels : NULL #> $ cluster_assignments : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 1 1 2 2 2 2 2 1 2 ... hc_fit %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 38.8 18.3 #> 2 Cluster_2 47.9 16.2 #> 3 Cluster_3 56.6 16.7"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"Hierarchical Clustering","text":"predict cluster assignment new observation, find closest cluster. measure “closeness” dependent specified type linkage model: single linkage: new observation assigned cluster nearest observation training data. complete linkage: new observation assigned cluster smallest maximum distances training observations new observation. average linkage: new observation assigned cluster smallest average distances training observations new observation. centroid method: new observation assigned cluster closest centroid, prediction k_means. Ward’s method: new observation assigned cluster smallest increase error sum squares (ESS) due new addition. ESS computed sum squared distances observations cluster, centroid cluster. ’s important note guarantee predict() training data produce results extract_cluster_assignments(). process clusters created aggolmerations results particular partition; training observation treated new data, predicted manner truly new information.","code":"hc_preds <- hc_fit %>% predict(penguins) hc_preds #> # A tibble: 342 × 1 #> .pred_cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_3 #> 7 Cluster_2 #> 8 Cluster_2 #> 9 Cluster_1 #> 10 Cluster_2 #> # ℹ 332 more rows bind_cols( hc_preds, extract_cluster_assignment(hc_fit) ) #> # A tibble: 342 × 2 #> .pred_cluster .cluster #> #> 1 Cluster_1 Cluster_1 #> 2 Cluster_1 Cluster_1 #> 3 Cluster_1 Cluster_1 #> 4 Cluster_2 Cluster_2 #> 5 Cluster_3 Cluster_2 #> 6 Cluster_3 Cluster_2 #> 7 Cluster_2 Cluster_2 #> 8 Cluster_2 Cluster_2 #> 9 Cluster_1 Cluster_1 #> 10 Cluster_2 Cluster_2 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"reconciling-partitions","dir":"Articles","previous_headings":"","what":"Reconciling partitions","title":"Hierarchical Clustering","text":"Suppose produced cluster assignments two models: hierarchical clustering model three clusters () kk-means clustering model five clusters (). can combine assignments? notice three-cluster assignments hier_clust line perfectly five-cluster assignments k_means. However, fully unrelated assignments. example, KM_2 kk-means assignment fell inside HC_1 hierarchical assignments. goal relabel five kk-means clusters match three cluster names hierarchical output. can accomplished reconcile_clusterings_mapping(). function expects two vectors cluster labels input. first label matched, second label recoded first. trying simply match names across two -size clusterings, option one_to_one must set FALSE. example, can see KM_1, KM_2, KM_5 matched HC_1; KM_3 KM_4 matched HC_2. Notice clusters KM set matched HC_3; evidently, small cluster manifest clearly kk-means clustering.","code":"km_spec <- k_means(num_clusters = 5) km_fit <- km_spec %>% fit(~., data = penguins) km_preds <- predict(km_fit, penguins, prefix = \"KM_\") hc_preds <- predict(hc_fit, penguins, prefix = \"HC_\") tibble( hc = hc_preds$.pred_cluster, km = km_preds$.pred_cluster ) %>% count(hc, km) #> # A tibble: 8 × 3 #> hc km n #> #> 1 HC_1 KM_1 80 #> 2 HC_1 KM_2 72 #> 3 HC_1 KM_3 3 #> 4 HC_1 KM_5 1 #> 5 HC_2 KM_3 28 #> 6 HC_2 KM_4 64 #> 7 HC_2 KM_5 76 #> 8 HC_3 KM_4 18 reconcile_clusterings_mapping( primary = hc_preds$.pred_cluster, alternative = km_preds$.pred_cluster, one_to_one = FALSE ) #> # A tibble: 342 × 3 #> primary alt alt_recoded #> #> 1 HC_1 KM_1 HC_1 #> 2 HC_1 KM_2 HC_1 #> 3 HC_1 KM_2 HC_1 #> 4 HC_2 KM_3 HC_2 #> 5 HC_3 KM_4 HC_2 #> 6 HC_3 KM_4 HC_2 #> 7 HC_2 KM_3 HC_2 #> 8 HC_2 KM_5 HC_2 #> 9 HC_1 KM_2 HC_1 #> 10 HC_2 KM_4 HC_2 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"k-means","text":"Load libraries: Load clean dataset: end vignette, find brief overview k-means algorithm, well algorithmic variant details, like reference.","code":"library(workflows) library(parsnip) library(tidyclust) library(tidyverse) library(tidymodels) data(\"penguins\", package = \"modeldata\") penguins <- penguins %>% select(bill_length_mm, bill_depth_mm) %>% drop_na() # shuffle rows penguins <- penguins %>% sample_n(nrow(penguins))"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"k-means-specification-in-tidyclust","dir":"Articles","previous_headings":"","what":"k-means specification in {tidyclust}","title":"k-means","text":"specify k-means model tidyclust, simply choose value num_clusters: currently two engines: stats::kmeans (default) ClusterR::KMeans_rcpp. also possible change algorithmic details implementation, changing engine /using corresponding arguments engine functions: Note stats::kmeans ClusterR::KMeans_rcpp implementations different default settings algorithmic details, recommended deliberate explicit choosing options. (See end document detail algorithmic options defaults.)","code":"kmeans_spec <- k_means(num_clusters = 3) kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 3 #> #> Computational engine: stats kmeans_spec_lloyd <- k_means(num_clusters = 3) %>% parsnip::set_engine(\"stats\", algorithm = \"Lloyd\") kmeans_spec_cr <- k_means(num_clusters = 3) %>% parsnip::set_engine(\"ClusterR\", initializer = \"random\")"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"fitting-k-means-models","dir":"Articles","previous_headings":"","what":"Fitting k-means models","title":"k-means","text":"specified, model may “fit” dataset providing formula data frame manner tidymodels model fit. Note unlike supervised modeling, formula include response variable. access results produced engine - case, stats::kmeans - simply extract fit fitted model object: tidyclust also provides function, extract_fit_summary(), produce list model summary information format consistent across cluster model specifications engines","code":"kmeans_fit <- kmeans_spec %>% fit(~ bill_length_mm + bill_depth_mm, data = penguins ) kmeans_fit %>% summary() #> Length Class Mode #> spec 4 k_means list #> fit 9 kmeans list #> elapsed 1 -none- list #> preproc 4 -none- list kmeans_fit$fit #> K-means clustering with 3 clusters of sizes 141, 116, 85 #> #> Cluster means: #> bill_length_mm bill_depth_mm #> 2 38.40355 18.27943 #> 3 45.51379 15.64397 #> 1 50.90353 17.33647 #> #> Clustering vector: #> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #> 1 1 1 2 3 3 2 2 1 3 3 1 1 1 2 1 3 2 #> 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 #> 2 1 1 1 3 1 3 3 2 2 2 2 2 3 2 2 2 3 #> 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 #> 3 1 2 3 2 3 2 2 2 3 1 1 1 3 1 1 2 1 #> 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 #> 3 2 2 1 2 1 2 2 1 3 3 1 1 1 3 3 1 1 #> 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 #> 1 1 3 2 3 1 3 1 1 1 1 3 3 2 1 2 3 1 #> 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 #> 1 2 1 2 3 1 1 2 2 2 2 2 1 1 2 1 1 2 #> 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 #> 1 2 1 1 1 2 3 2 2 3 2 2 1 1 1 1 3 2 #> 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 #> 1 1 2 1 3 1 1 3 1 2 3 2 1 1 2 2 3 2 #> 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 #> 2 2 3 3 1 3 2 1 2 3 3 1 3 2 1 2 1 2 #> 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 #> 1 2 3 2 2 2 1 2 1 2 1 1 1 2 3 3 1 2 #> 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 #> 1 1 2 2 1 2 1 1 2 1 2 3 3 1 1 3 1 1 #> 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 #> 2 2 1 3 2 1 1 1 1 2 3 3 1 1 1 2 1 3 #> 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 #> 3 1 1 1 3 3 1 1 2 1 1 2 3 1 1 2 3 1 #> 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 #> 1 3 1 1 2 3 2 2 1 3 3 3 2 1 1 3 1 1 #> 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 #> 2 3 1 2 2 3 1 2 3 3 2 1 3 3 2 2 1 1 #> 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 #> 2 1 1 1 1 1 2 1 1 3 3 3 2 1 2 1 1 1 #> 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 #> 2 1 2 1 1 3 2 2 2 3 2 1 3 1 1 1 3 1 #> 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 #> 2 2 2 1 3 2 3 3 2 1 2 2 1 3 2 2 2 2 #> 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 #> 3 2 2 2 3 1 3 1 2 1 3 3 3 1 3 1 2 2 #> #> Within cluster sum of squares by cluster: #> [1] 944.4986 754.7437 617.9859 #> (between_SS / total_SS = 79.8 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" #> [5] \"tot.withinss\" \"betweenss\" \"size\" \"iter\" #> [9] \"ifault\" kmeans_summary <- kmeans_fit %>% extract_fit_summary() kmeans_summary %>% str() #> List of 7 #> $ cluster_names : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 2 3 #> $ centroids : tibble [3 × 2] (S3: tbl_df/tbl/data.frame) #> ..$ bill_length_mm: num [1:3] 38.4 45.5 50.9 #> ..$ bill_depth_mm : num [1:3] 18.3 15.6 17.3 #> $ n_members : int [1:3] 141 116 85 #> $ sse_within_total_total: num [1:3] 944 755 618 #> $ sse_total : num 11494 #> $ orig_labels : int [1:342] 1 1 1 2 3 3 2 2 1 3 ... #> $ cluster_assignments : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 1 1 2 3 3 2 2 1 3 ..."},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"cluster-assignments-and-centers","dir":"Articles","previous_headings":"","what":"Cluster assignments and centers","title":"k-means","text":"primary objective fitting clustering model typically assign observations clusters. access , use extract_cluster_assignment() function: Note function renames clusters accordance standard tidyclust naming convention ordering: clusters named “Cluster_1”, “Cluster_2”, etc. numbered order appear rows training dataset. reconcile standardized cluster labels engine output, refer back full model fit summary: example, see cluster labelled “3” stats::kmeans engine function - label assigned randomly implementation - first appear training data, converted “Cluster_1” standardized labels.","code":"kmeans_fit %>% extract_cluster_assignment() #> # A tibble: 342 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_3 #> 7 Cluster_2 #> 8 Cluster_2 #> 9 Cluster_1 #> 10 Cluster_3 #> # ℹ 332 more rows tibble( orig_labels = kmeans_summary$orig_labels, standard_labels = kmeans_summary$cluster_assignments ) #> # A tibble: 342 × 2 #> orig_labels standard_labels #> #> 1 1 Cluster_1 #> 2 1 Cluster_1 #> 3 1 Cluster_1 #> 4 2 Cluster_2 #> 5 3 Cluster_3 #> 6 3 Cluster_3 #> 7 2 Cluster_2 #> 8 2 Cluster_2 #> 9 1 Cluster_1 #> 10 3 Cluster_3 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"centroids","dir":"Articles","previous_headings":"Cluster assignments and centers","what":"Centroids","title":"k-means","text":"secondary output interest often characterization clusters; .e., data feature trends cluster seem represent? commonly, clusters characterized mean values predictor space, .k.. centroids. can accessed full summary: can also accessed directly fitted model : Based output, might say Cluster_1 penguins smaller bill lengths, Cluster_2 smaller bill depths, Cluster_3 penguins large bills dimensions.","code":"kmeans_summary$centroids #> # A tibble: 3 × 2 #> bill_length_mm bill_depth_mm #> #> 1 38.4 18.3 #> 2 45.5 15.6 #> 3 50.9 17.3 kmeans_fit %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 38.4 18.3 #> 2 Cluster_2 45.5 15.6 #> 3 Cluster_3 50.9 17.3"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"k-means","text":"Since kk-means algorithm ultimately assigns training observations cluster closest centroid, natural “predict” test observations also belong closest centroid cluster. predict() function behaves expected, producing cluster assignment predictions new data based distance fitted model centroids. attach predictions dataset column, use augment():","code":"new_penguin <- tibble( bill_length_mm = 42, bill_depth_mm = 17 ) kmeans_fit %>% predict(new_penguin) #> # A tibble: 1 × 1 #> .pred_cluster #> #> 1 Cluster_2 kmeans_fit %>% augment(penguins) #> # A tibble: 342 × 3 #> bill_length_mm bill_depth_mm .pred_cluster #> #> 1 39.6 20.7 Cluster_1 #> 2 36.2 17.3 Cluster_1 #> 3 32.1 15.5 Cluster_1 #> 4 47.6 18.3 Cluster_2 #> 5 52 18.1 Cluster_3 #> 6 52.7 19.8 Cluster_3 #> 7 45.2 16.4 Cluster_2 #> 8 46.6 14.2 Cluster_2 #> 9 34.4 18.4 Cluster_1 #> 10 49.8 15.9 Cluster_3 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"metrics","dir":"Articles","previous_headings":"","what":"Metrics","title":"k-means","text":"Since clustering unsupervised method, target/outcome variable, objective notion predictive success. However, many common approaches exist quantifying quality particular cluster partition structure.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"sum-of-squared-error","dir":"Articles","previous_headings":"Metrics","what":"Sum of squared error","title":"k-means","text":"One simple metric within cluster sum--squared error (WSS), measures sum distances observations cluster center. sometimes scaled total sum--squared error (TSS), distance observations global centroid; particular, ratio WSS/TSS often computed. principle, small values WSS WSS/TSS ratio suggest observations within clusters closer (similar) clusters. WSS TSS come “free” model fit summary, can accessed directly model fit: can also see within sum--squares cluster, rather totalled, sse_within():","code":"kmeans_summary$sse_within_total_total #> [1] 944.4986 754.7437 617.9859 kmeans_summary$sse_total #> [1] 11494.04 kmeans_fit %>% sse_within_total() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_within_total standard 2317. kmeans_fit %>% sse_total() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_total standard 11494. kmeans_fit %>% sse_ratio() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_ratio standard 0.202 kmeans_fit %>% sse_within() #> # A tibble: 3 × 3 #> .cluster wss n_members #> #> 1 Cluster_1 944. 141 #> 2 Cluster_2 755. 116 #> 3 Cluster_3 618. 85"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"silhouette","dir":"Articles","previous_headings":"Metrics","what":"Silhouette","title":"k-means","text":"Another common measure cluster structure called silhouette. silhouette single observation proportional average distance observation within-cluster observations minus average distance outside-cluster observations; normalized greater two average. principle, large silhouette (close 1) suggests observation similar within cluster outside cluster. can average silhouettes get metric full clustering fit. Beause computation silhouette depends original observation values, dataset must also supplied function.","code":"kmeans_fit %>% silhouette_avg(penguins) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 silhouette_avg standard 0.488"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"changing-distance-measures","dir":"Articles","previous_headings":"Metrics","what":"Changing distance measures","title":"k-means","text":"metrics depend measuring distance points /centroids. default, ordinary Euclidean distance used. However, possible select different distance function. sum squares metrics, distance function supplied must take two arguments (.e., observation locations centroid locations). sihouette metric, distance function must find pairwise distances single matrix (.e., pairwise distances observations). using metrics cluster model selection, see Tuning vignette.","code":"my_dist_1 <- function(x) { philentropy::distance(x, method = \"manhattan\") } my_dist_2 <- function(x, y) { philentropy::dist_many_many(x, y, method = \"manhattan\") } kmeans_fit %>% sse_ratio(dist_fun = my_dist_2) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_ratio standard 0.202 kmeans_fit %>% silhouette_avg(penguins, dist_fun = my_dist_1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 silhouette_avg standard 0.494"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"workflows","dir":"Articles","previous_headings":"","what":"Workflows","title":"k-means","text":"workflow structure tidymodels also usable tidyclust objects. following example, try two recipes clustering penguins bill dimensions. second recipe, log-scale predictors clustering.","code":"penguins_recipe_1 <- recipe(~ bill_length_mm + bill_depth_mm, data = penguins ) penguins_recipe_2 <- recipe(~ bill_length_mm + bill_depth_mm, data = penguins ) %>% step_log(all_numeric_predictors()) wflow_1 <- workflow() %>% add_model(kmeans_spec) %>% add_recipe(penguins_recipe_1) wflow_2 <- workflow() %>% add_model(kmeans_spec) %>% add_recipe(penguins_recipe_2) wflow_1 %>% fit(penguins) %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 38.4 18.3 #> 2 Cluster_2 45.5 15.6 #> 3 Cluster_3 50.9 17.3 wflow_2 %>% fit(penguins) %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 3.65 2.90 #> 2 Cluster_2 3.90 2.92 #> 3 Cluster_3 3.85 2.70"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"a-brief-introduction-to-the-k-means-algorithm","dir":"Articles","previous_headings":"","what":"A brief introduction to the k-means algorithm","title":"k-means","text":"k-means method unsupervised learning produces partitioning observations k unique clusters. goal k-means minimize sum squared Euclidian distances observations cluster centroid, geometric mean, cluster. k-means clustering, observed variables (columns) considered locations axes multidimensional space. example, plot , point represents observation one penguin, location 2-dimensional space determined bill length bill depth penguin. k-means cluster assignment achieved iterating convergence random initial conditions. algorithm typically proceeds follows: Choose k random observations dataset. locations space declared initial centroids. Assign observation nearest centroid. Compute new centroids cluster. Repeat steps 2 3 centroids change.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"iteration-of-centroids","dir":"Articles","previous_headings":"A brief introduction to the k-means algorithm","what":"Iteration of centroids","title":"k-means","text":"also variation implementations update process takes place. example, shown common implementation known Lloyd Forgy method. update steps : Assign observations closest centroid. Recalculate centroids. Repeat convergence. One variant approach MacQueen method, updates centroids continually: Assign one observation closest centroid. Recalculate centroids. Repeat observations reassigned . Repeat convergence. third common variant Hartigan-Wong method, assigns observations based overall sum squared errors rather simply closest cluster: Temporarily assign one observation one cluster. Recalculate centroid. Find distances observations cluster center (SSE). Repeat cluster. Permanently assign observation cluster resulted lowest SSE. Repeat observations. Repeat convergence. many interactive algorithms, choice methods choice complexity versus accuracy. Hartigan-Wong method generally results consistent human-verified clusterings, default setting stats::kmeans implementation k-means clustering; although three algorithms available options engine. Lloyd/Forgy method simple ubiquitous; method available ClusterR package implementation. Source","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"initialization-of-the-k-means-algorithm","dir":"Articles","previous_headings":"A brief introduction to the k-means algorithm","what":"Initialization of the k-means algorithm","title":"k-means","text":"k-means algorithm depends choosing initial set cluster centers. three common methods selecting initial centers: Random observations: example , chosen three random observations act initial centers. commonly used approach, implemented Forgy, Lloyd, MacQueen methods. Random partition: observations assigned cluster uniformly random. centroid cluster computed, used initial centers. approach implemented Hartigan-Wong method. k-means++: Beginning one random set observations, observations sampled via probability-weighted sampling kk clusters formed. centroids clusters used initial centers. (detail ) initial conditions based random selection approaches, k-means algorithm determinitistic. , running clustering twice data may result cluster assignments. common perform k-means clustering algorithm multiple times, different random initial conditions, combine results end. option controlled nstart argument stats::kmeans implementation, num_init argument ClusterR::KMeans_rcpp implementation.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/tuning_and_metrics.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Tuning Cluster Models","text":"Load libraries: Load clean dataset:","code":"library(tidyclust) library(tidyverse) library(tidymodels) set.seed(838383) data(\"penguins\", package = \"modeldata\") penguins <- penguins %>% drop_na()"},{"path":"https://tidyclust.tidymodels.org/dev/articles/tuning_and_metrics.html","id":"tuning-in-unsupervised-settings","dir":"Articles","previous_headings":"","what":"Tuning in unsupervised settings","title":"Tuning Cluster Models","text":"supervised modeling scenarios, observe values target (“response”) variable, measure success model based well predicts future response values. select hyperparameter values, tune , trying many possible values measuring well performs predicting target values test data. unsupervised modeling setting tidyclust, objective measure success. Clustering analyses typically exploratory rather testable. Nonetheless, core tuning principle varying inputs quantifying results still applicable.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/tuning_and_metrics.html","id":"specify-and-fit-a-model","dir":"Articles","previous_headings":"","what":"Specify and fit a model","title":"Tuning Cluster Models","text":"example, fit kk-means cluster model palmerpenguins dataset, using bill length bill depth penguins predictors. (Please refer k-means vignette -depth discussion model specification.) goal select appropriate number clusters model based metrics. First, set cross-validation samples data: Next, specify model tuning parameter, make workflow, establish range possible values num_clusters try: , can use tune_cluster() compute metrics cross-validation split, possible choice number clusters.","code":"penguins_cv <- vfold_cv(penguins, v = 5) kmeans_spec <- k_means(num_clusters = tune()) penguins_rec <- recipe(~ bill_length_mm + bill_depth_mm, data = penguins ) kmeans_wflow <- workflow(penguins_rec, kmeans_spec) clust_num_grid <- grid_regular(num_clusters(), levels = 10 ) clust_num_grid #> # A tibble: 10 × 1 #> num_clusters #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 #> 6 6 #> 7 7 #> 8 8 #> 9 9 #> 10 10 res <- tune_cluster( kmeans_wflow, resamples = penguins_cv, grid = clust_num_grid, control = control_grid(save_pred = TRUE, extract = identity), metrics = cluster_metric_set(sse_within_total, sse_total, sse_ratio) ) res #> # Tuning results #> # 5-fold cross-validation #> # A tibble: 5 × 6 #> splits id .metrics .notes .extracts .predictions #> #> 1 Fold1 #> 2 Fold2 #> 3 Fold3 #> 4 Fold4 #> 5 Fold5 res_metrics <- res %>% collect_metrics() res_metrics #> # A tibble: 30 × 7 #> num_clusters .metric .estimator mean n std_err .config #> #> 1 1 sse_ratio standard 1 5 0 Prepro… #> 2 1 sse_total standard 8971. 5 1.19e+2 Prepro… #> 3 1 sse_within_total standard 8971. 5 1.19e+2 Prepro… #> 4 2 sse_ratio standard 0.321 5 1.41e-3 Prepro… #> 5 2 sse_total standard 8971. 5 1.19e+2 Prepro… #> 6 2 sse_within_total standard 2885. 5 4.73e+1 Prepro… #> 7 3 sse_ratio standard 0.202 5 2.21e-3 Prepro… #> 8 3 sse_total standard 8971. 5 1.19e+2 Prepro… #> 9 3 sse_within_total standard 1809. 5 3.47e+1 Prepro… #> 10 4 sse_ratio standard 0.160 5 5.79e-3 Prepro… #> # ℹ 20 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/tuning_and_metrics.html","id":"choosing-hyperparameters","dir":"Articles","previous_headings":"Specify and fit a model","what":"Choosing hyperparameters","title":"Tuning Cluster Models","text":"supervised learning, choose model best value target metric. However, clustering models general local maxima minima. clusters model, always expect within sum--squares smaller. common approach choosing number clusters look “elbow”, notable bend, plot WSS/TSS ratio cluster number: increase number clusters, WSS/TSS ratio decreases, amount decrease getting smaller number clusters grows. might argue drop two clusters three, three four, bit extreme subsequent drops, probably choose three four clusters.","code":"res_metrics %>% filter(.metric == \"sse_ratio\") %>% ggplot(aes(x = num_clusters, y = mean)) + geom_point() + geom_line() + theme_minimal() + ylab(\"mean WSS/TSS ratio, over 5 folds\") + xlab(\"Number of clusters\") + scale_x_continuous(breaks = 1:10)"},{"path":"https://tidyclust.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Emil Hvitfeldt. Author, maintainer. Kelly Bodwin. Author. . Copyright holder, funder.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Hvitfeldt E, Bodwin K (2025). tidyclust: Common API Clustering. R package version 0.2.3.9000, https://tidyclust.tidymodels.org/, https://github.com/tidymodels/tidyclust.","code":"@Manual{, title = {tidyclust: A Common API to Clustering}, author = {Emil Hvitfeldt and Kelly Bodwin}, year = {2025}, note = {R package version 0.2.3.9000, https://tidyclust.tidymodels.org/}, url = {https://github.com/tidymodels/tidyclust}, }"},{"path":"https://tidyclust.tidymodels.org/dev/index.html","id":"tidyclust-","dir":"","previous_headings":"","what":"A Common API to Clustering","title":"A Common API to Clustering","text":"goal tidyclust provide tidy, unified interface clustering models. packages closely modeled parsnip package.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"A Common API to Clustering","text":"can install released version tidyclust CRAN : development version tidyclust GitHub :","code":"install.packages(\"tidyclust\") # install.packages(\"pak\") pak::pak(\"tidymodels/tidyclust\")"},{"path":"https://tidyclust.tidymodels.org/dev/index.html","id":"example","dir":"","previous_headings":"","what":"Example","title":"A Common API to Clustering","text":"first thing create cluster specification. example creating K-means model, using stats engine. specification can fit using data. fitted tidyclust object, can number things. predict() returns cluster new observation belongs extract_cluster_assignment() returns cluster assignments training observations extract_centroids() returns locations clusters","code":"library(tidyclust) set.seed(1234) kmeans_spec <- k_means(num_clusters = 3) %>% set_engine(\"stats\") kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 3 #> #> Computational engine: stats kmeans_spec_fit <- kmeans_spec %>% fit(~., data = mtcars) kmeans_spec_fit #> tidyclust cluster object #> #> K-means clustering with 3 clusters of sizes 7, 11, 14 #> #> Cluster means: #> mpg cyl disp hp drat wt qsec vs #> 1 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 #> 3 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909 #> 2 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 #> am gear carb #> 1 0.4285714 3.857143 3.428571 #> 3 0.7272727 4.090909 1.545455 #> 2 0.1428571 3.285714 3.500000 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive #> 1 1 2 1 #> Hornet Sportabout Valiant Duster 360 Merc 240D #> 3 1 3 2 #> Merc 230 Merc 280 Merc 280C Merc 450SE #> 2 1 1 3 #> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental #> 3 3 3 3 #> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla #> 3 2 2 2 #> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 #> 2 3 3 3 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa #> 3 2 2 2 #> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E #> 3 1 3 2 #> #> Within cluster sum of squares by cluster: #> [1] 13954.34 11848.37 93643.90 #> (between_SS / total_SS = 80.8 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" \"tot.withinss\" #> [6] \"betweenss\" \"size\" \"iter\" \"ifault\" predict(kmeans_spec_fit, mtcars[1:4, ]) #> # A tibble: 4 × 1 #> .pred_cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_2 #> 4 Cluster_1 extract_cluster_assignment(kmeans_spec_fit) #> # A tibble: 32 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_2 #> 4 Cluster_1 #> 5 Cluster_3 #> 6 Cluster_1 #> 7 Cluster_3 #> 8 Cluster_2 #> 9 Cluster_2 #> 10 Cluster_1 #> # ℹ 22 more rows extract_centroids(kmeans_spec_fit) #> # A tibble: 3 × 12 #> .cluster mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 Cluster_1 19.7 6 183. 122. 3.59 3.12 18.0 0.571 0.429 3.86 3.43 #> 2 Cluster_2 26.7 4 105. 82.6 4.07 2.29 19.1 0.909 0.727 4.09 1.55 #> 3 Cluster_3 15.1 8 353. 209. 3.23 4.00 16.8 0 0.143 3.29 3.5"},{"path":"https://tidyclust.tidymodels.org/dev/index.html","id":"visual-comparison-of-clustering-methods","dir":"","previous_headings":"","what":"Visual comparison of clustering methods","title":"A Common API to Clustering","text":"visualization available models compare using 2 dimensional toy data sets.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"A Common API to Clustering","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help. Footer","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":null,"dir":"Reference","previous_headings":"","what":"Augment data with predictions — augment.cluster_fit","title":"Augment data with predictions — augment.cluster_fit","text":"augment() add column(s) predictions given data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Augment data with predictions — augment.cluster_fit","text":"","code":"# S3 method for class 'cluster_fit' augment(x, new_data, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Augment data with predictions — augment.cluster_fit","text":"x cluster_fit object produced fit.cluster_spec() fit_xy.cluster_spec() . new_data data frame matrix. ... currently used.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Augment data with predictions — augment.cluster_fit","text":"tibble::tibble() containing new_data columns added depending mode model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Augment data with predictions — augment.cluster_fit","text":"partition models, .pred_cluster column added.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/augment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Augment data with predictions — augment.cluster_fit","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) kmeans_fit %>% augment(new_data = mtcars) #> # A tibble: 32 × 12 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: .pred_cluster "},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Fit Object Information — cluster_fit","title":"Model Fit Object Information — cluster_fit","text":"object class \"cluster_fit\" container information model fit data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_fit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model Fit Object Information — cluster_fit","text":"following model types implemented tidyclust: K-Means k_means() Hierarchical (Agglomerative) Clustering hier_clust() main elements object : spec: cluster_spec object. fit: object produced fitting function. preproc: contains data-specific information required process new sample point prediction. example, underlying model function requires arguments x user passed formula fit, preproc object contain items terms object . information required, NA. discussed documentation cluster_spec, original arguments specification saved quosures. evaluated cluster_fit object prior fitting. resulting model object prints call, user-defined options shown call preceded tilde (see example ). result use quosures specification. class structure basis tidyclust stores model objects seeing data applying model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_metric_set.html","id":null,"dir":"Reference","previous_headings":"","what":"Combine metric functions — cluster_metric_set","title":"Combine metric functions — cluster_metric_set","text":"cluster_metric_set() allows combine multiple metric functions together new function calculates .","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_metric_set.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Combine metric functions — cluster_metric_set","text":"","code":"cluster_metric_set(...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_metric_set.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Combine metric functions — cluster_metric_set","text":"... bare names functions included metric set. functions must cluster metrics sse_total(), sse_ratio(), silhouette_avg().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_metric_set.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Combine metric functions — cluster_metric_set","text":"cluster_metric_set() object, combining use input metrics.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_metric_set.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Combine metric functions — cluster_metric_set","text":"functions must : cluster metrics","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Specification Information — cluster_spec","title":"Model Specification Information — cluster_spec","text":"object class \"cluster_spec\" container information model fit.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_spec.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model Specification Information — cluster_spec","text":"following model types implemented tidyclust: K-Means k_means() Hierarchical (Agglomerative) Clustering hier_clust() main elements object : args: vector main arguments model. names arguments may different counterparts n underlying model function. example, k_means() model, argument name number clusters called \"num_clusters\" instead \"k\" make general usable across different types models (specific particular model function). elements args can tune() use tune_cluster(). information see https://www.tidymodels.org/start/tuning/. left defaults (NULL), arguments use underlying model functions default value. discussed , arguments args captured quosures immediately executed. ...: Optional model-function-specific parameters. args, quosures can tune(). mode: type model, \"partition\". modes added package adds functionality. method: slot filled later model's constructor function. generally contains lists information used create fit prediction code well required packages similar data. engine: character string declares exactly software used. can package name technology type. class structure basis tidyclust stores model objects prior seeing data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cluster_spec.html","id":"argument-details","dir":"Reference","previous_headings":"","what":"Argument Details","title":"Model Specification Information — cluster_spec","text":"important detail understand creating model specifications intended functionally independent data. true tuning parameters data dependent, model specification interact data . example, R functions immediately evaluate arguments. example, calling mean(dat_vec), object dat_vec immediately evaluated inside function. tidyclust model functions . example, using execute ncol(mtcars) / 5 creating specification. can seen output: model functions save argument expressions associated environments (.k.. quosure) evaluated later either fit.cluster_spec() fit_xy.cluster_spec() called actual data. consequence strategy data required get parameter values must available model fit. two main ways can fail : data modified creation model specification model fit function invoked. model specification saved loaded new session data objects exist. best way avoid issues reference data objects global environment use data descriptors .cols(). Another way writing previous specification dependent specific data object evaluated immediately model fitting process begins. One less advantageous approach solving issue use quasiquotation. insert actual R object model specification might best idea data object small. example, using work (reproducible sessions) embeds entire mtcars data set num_clusters expression: However, object number columns , bad: information quosures quasiquotation can found https://adv-r.hadley.nz/quasiquotation.html.","code":"k_means(num_clusters = ncol(mtcars) / 5) > k_means(num_clusters = ncol(mtcars) / 5) K Means Cluster Specification (partition) Main Arguments: num_clusters = ncol(mtcars)/5 Computational engine: stats k_means(num_clusters = .cols() / 5) k_means(num_clusters = ncol(!!mtcars) - 1) > k_means(num_clusters = ncol(!!mtcars) / 5) K Means Cluster Specification (partition) Main Arguments: num_clusters = ncol(structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, Computational engine: stats > num_clusters_val <- ncol(mtcars) / 5 > num_clusters_val [1] 10 > k_means(num_clusters = !!num_clusters_val) K Means Cluster Specification (partition) Main Arguments: num_clusters = 2.2"},{"path":"https://tidyclust.tidymodels.org/dev/reference/control_cluster.html","id":null,"dir":"Reference","previous_headings":"","what":"Control the fit function — control_cluster","title":"Control the fit function — control_cluster","text":"Options can passed fit.cluster_spec() function control output computations.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/control_cluster.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Control the fit function — control_cluster","text":"","code":"control_cluster(verbosity = 1L, catch = FALSE)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/control_cluster.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Control the fit function — control_cluster","text":"verbosity integer value zero indicates messages output shown packages loaded model fit. value 1 means package loading quiet model fits can produce output screen (depending contain verbose-type argument). value 2 indicates output seen. catch logical value TRUE evaluate model inside try(, silent = TRUE). model fails, object still returned (without error) inherits class \"try-error\".","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/control_cluster.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Control the fit function — control_cluster","text":"S3 object class \"control_cluster\" named list results function call","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/control_cluster.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Control the fit function — control_cluster","text":"","code":"control_cluster() #> tidyclust control object control_cluster(catch = TRUE) #> tidyclust control object #> - fit errors will be caught"},{"path":"https://tidyclust.tidymodels.org/dev/reference/convert_helpers.html","id":null,"dir":"Reference","previous_headings":"","what":"Helper functions to convert between formula and matrix interface — .convert_form_to_x_fit","title":"Helper functions to convert between formula and matrix interface — .convert_form_to_x_fit","text":"Functions take formula interface get resulting objects (y, x, weights, etc) back way around. functions intended developer use. part, emulates internals lm() (also see notes https://developer.r-project.org/model-fitting-functions.html). .convert_form_to_x_fit() .convert_x_to_form_fit() data created modeling. .convert_form_to_x_fit() saves data objects well objects needed new data predicted (e.g. terms, etc.). .convert_form_to_x_new() .convert_x_to_form_new() used new samples predicted require predictors available.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/convert_helpers.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Helper functions to convert between formula and matrix interface — .convert_form_to_x_fit","text":"","code":".convert_form_to_x_fit( formula, data, ..., na.action = na.omit, indicators = \"traditional\", composition = \"data.frame\", remove_intercept = TRUE ) .convert_x_to_form_fit(x, weights = NULL, remove_intercept = TRUE) .convert_form_to_x_new( object, new_data, na.action = stats::na.pass, composition = \"data.frame\" ) .convert_x_to_form_new(object, new_data)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/convert_helpers.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Helper functions to convert between formula and matrix interface — .convert_form_to_x_fit","text":"formula object class formula (one can coerced class): symbolic description model fitted. data data frame containing relevant variables (e.g. predictors, case weights, etc). ... Additional arguments passed stats::model.frame(). na.action function indicates happen data contain NAs. indicators string describing whether create indicator/dummy variables factor predictors. Possible options \"none\", \"traditional\", \"one_hot\". composition string describing whether resulting x y returned \"matrix\" \"data.frame\". remove_intercept logical indicating whether remove intercept column model.matrix() finished. x matrix, sparse matrix, data frame predictors. models support sparse matrix input. See modelenv::get_encoding() details. x column names. weights numeric vector containing weights. object object class cluster_fit. new_data rectangular data object, data frame.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cut_height.html","id":null,"dir":"Reference","previous_headings":"","what":"Cut Height — cut_height","title":"Cut Height — cut_height","text":"Used tidyclust::hier_clust() models.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cut_height.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cut Height — cut_height","text":"","code":"cut_height(range = c(0, dials::unknown()), trans = NULL)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/cut_height.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cut Height — cut_height","text":"range two-element vector holding defaults smallest largest possible values, respectively. transformation specified, values transformed units. trans trans object scales package, scales::transform_log10() scales::transform_reciprocal(). provided, default used matches units used range. transformation, NULL.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/cut_height.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Cut Height — cut_height","text":"","code":"cut_height() #> Cut Height (quantitative) #> Range: [0, ?]"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":null,"dir":"Reference","previous_headings":"","what":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"hier_clust() creates Hierarchical (Agglomerative) Clustering model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"engine, single mode: partition","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"model 1 tuning parameters: num_clusters: # Clusters (type: integer, default: default)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":"translation-from-tidyclust-to-the-original-package-partition-","dir":"Reference","previous_headings":"","what":"Translation from tidyclust to the original package (partition)","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"","code":"hier_clust(num_clusters = integer(1)) %>% set_engine(\"stats\") %>% set_mode(\"partition\") %>% translate_tidyclust() ## Hierarchical Clustering Specification (partition) ## ## Main Arguments: ## num_clusters = integer(1) ## linkage_method = complete ## ## Computational engine: stats ## ## Model fit template: ## tidyclust::.hier_clust_fit_stats(data = missing_arg(), num_clusters = integer(1), ## linkage_method = \"complete\")"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":"preprocessing-requirements","dir":"Reference","previous_headings":"","what":"Preprocessing requirements","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"Factor/categorical predictors need converted numeric values (e.g., dummy indicator variables) engine. using formula method via fit(), tidyclust convert factor columns indicators.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_hier_clust_stats.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Hierarchical (Agglomerative) Clustering via stats — details_hier_clust_stats","text":"Becker, R. ., Chambers, J. M. Wilks, . R. (1988). New S Language. Wadsworth & Brooks/Cole. (S version.) Everitt, B. (1974). Cluster Analysis. London: Heinemann Educ. Books. Hartigan, J.. (1975). Clustering Algorithms. New York: Wiley. Sneath, P. H. . R. R. Sokal (1973). Numerical Taxonomy. San Francisco: Freeman. Anderberg, M. R. (1973). Cluster Analysis Applications. Academic Press: New York. Gordon, . D. (1999). Classification. Second Edition. London: Chapman Hall / CRC Murtagh, F. (1985). “Multidimensional Clustering Algorithms”, COMPSTAT Lectures 4. Wuerzburg: Physica-Verlag (algorithmic details algorithms used). McQuitty, L.L. (1966). Similarity Analysis Reciprocal Pairs Discrete Continuous Data. Educational Psychological Measurement, 26, 825–831. doi:10.1177/001316446602600402. Legendre, P. L. Legendre (2012). Numerical Ecology, 3rd English ed. Amsterdam: Elsevier Science BV. Murtagh, Fionn Legendre, Pierre (2014). Ward’s hierarchical agglomerative clustering method: algorithms implement Ward’s criterion? Journal Classification, 31, 274–295. doi:10.1007/s00357-014-9161-z.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":null,"dir":"Reference","previous_headings":"","what":"K-means via ClusterR — details_k_means_ClusterR","title":"K-means via ClusterR — details_k_means_ClusterR","text":"k_means() creates K-means model. engine uses classical definition K-means model, takes numeric predictors.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"K-means via ClusterR — details_k_means_ClusterR","text":"engine, single mode: partition","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"K-means via ClusterR — details_k_means_ClusterR","text":"model 1 tuning parameters: num_clusters: # Clusters (type: integer, default: default)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":"translation-from-tidyclust-to-the-original-package-partition-","dir":"Reference","previous_headings":"","what":"Translation from tidyclust to the original package (partition)","title":"K-means via ClusterR — details_k_means_ClusterR","text":"","code":"k_means(num_clusters = integer(1)) %>% set_engine(\"ClusterR\") %>% set_mode(\"partition\") %>% translate_tidyclust() ## K Means Cluster Specification (partition) ## ## Main Arguments: ## num_clusters = integer(1) ## ## Computational engine: ClusterR ## ## Model fit template: ## tidyclust::.k_means_fit_ClusterR(data = missing_arg(), clusters = missing_arg(), ## clusters = integer(1))"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":"preprocessing-requirements","dir":"Reference","previous_headings":"","what":"Preprocessing requirements","title":"K-means via ClusterR — details_k_means_ClusterR","text":"Factor/categorical predictors need converted numeric values (e.g., dummy indicator variables) engine. using formula method via fit(), tidyclust convert factor columns indicators. Predictors scale. One way achieve center scale predictor mean zero variance one.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_ClusterR.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"K-means via ClusterR — details_k_means_ClusterR","text":"Forgy, E. W. (1965). Cluster analysis multivariate data: efficiency vs interpretability classifications. Biometrics, 21, 768–769. Hartigan, J. . Wong, M. . (1979). Algorithm 136: K-means clustering algorithm. Applied Statistics, 28, 100–108. doi:10.2307/2346830. Lloyd, S. P. (1957, 1982). Least squares quantization PCM. Technical Note, Bell Laboratories. Published 1982 IEEE Transactions Information Theory, 28, 128–137. MacQueen, J. (1967). methods classification analysis multivariate observations. Proceedings Fifth Berkeley Symposium Mathematical Statistics Probability, eds L. M. Le Cam & J. Neyman, 1, pp. 281–297. Berkeley, CA: University California Press.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":null,"dir":"Reference","previous_headings":"","what":"K-means via clustMixType — details_k_means_clustMixType","title":"K-means via clustMixType — details_k_means_clustMixType","text":"k_means() creates K-prototypes model. K-prototypes middle ground K-means K-modes model, sense can used data contains numeric categorical predictors.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"K-means via clustMixType — details_k_means_clustMixType","text":"numeric categorical predictors requires engine. engine, single mode: partition","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"K-means via clustMixType — details_k_means_clustMixType","text":"model 1 tuning parameters: num_clusters: # Clusters (type: integer, default: default)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":"translation-from-tidyclust-to-the-original-package-partition-","dir":"Reference","previous_headings":"","what":"Translation from tidyclust to the original package (partition)","title":"K-means via clustMixType — details_k_means_clustMixType","text":"","code":"k_means(num_clusters = integer(1)) %>% set_engine(\"clustMixType\") %>% set_mode(\"partition\") %>% translate_tidyclust() ## K Means Cluster Specification (partition) ## ## Main Arguments: ## num_clusters = integer(1) ## ## Computational engine: clustMixType ## ## Model fit template: ## tidyclust::.k_means_fit_clustMixType(x = missing_arg(), k = missing_arg(), ## keep.data = missing_arg(), k = integer(1), keep.data = TRUE, ## verbose = FALSE)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":"preprocessing-requirements","dir":"Reference","previous_headings":"","what":"Preprocessing requirements","title":"K-means via clustMixType — details_k_means_clustMixType","text":"categorical numeric predictors required.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_clustMixType.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"K-means via clustMixType — details_k_means_clustMixType","text":"Szepannek, G. (2018): clustMixType: User-Friendly Clustering Mixed-Type Data R, R Journal 10/2, 200-208, doi:10.32614/RJ-2018-048. Aschenbruck, R., Szepannek, G., Wilhelm, . (2022): Imputation Strategies Clustering Mixed‑Type Data Missing Values, Journal Classification, doi:10.1007/s00357-022-09422-y. Z.Huang (1998): Extensions k-Means Algorithm Clustering Large Data Sets Categorical Variables, Data Mining Knowledge Discovery 2, 283-304.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":null,"dir":"Reference","previous_headings":"","what":"K-means via klaR — details_k_means_klaR","title":"K-means via klaR — details_k_means_klaR","text":"k_means() creates K-Modes model. model intended used categorical predictors. Although accept numeric predictors contain number unique values. numeric predictors treated like categorical.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"K-means via klaR — details_k_means_klaR","text":"engine, single mode: partition","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"K-means via klaR — details_k_means_klaR","text":"model 1 tuning parameters: num_clusters: # Clusters (type: integer, default: default)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":"translation-from-tidyclust-to-the-original-package-partition-","dir":"Reference","previous_headings":"","what":"Translation from tidyclust to the original package (partition)","title":"K-means via klaR — details_k_means_klaR","text":"","code":"k_means(num_clusters = integer(1)) %>% set_engine(\"klaR\") %>% set_mode(\"partition\") %>% translate_tidyclust() ## K Means Cluster Specification (partition) ## ## Main Arguments: ## num_clusters = integer(1) ## ## Computational engine: klaR ## ## Model fit template: ## tidyclust::.k_means_fit_klaR(data = missing_arg(), modes = missing_arg(), ## modes = integer(1))"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":"preprocessing-requirements","dir":"Reference","previous_headings":"","what":"Preprocessing requirements","title":"K-means via klaR — details_k_means_klaR","text":"categorical variables accepted, along numerics unique values.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_klaR.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"K-means via klaR — details_k_means_klaR","text":"Huang, Z. (1997) Fast Clustering Algorithm Cluster Large Categorical Data Sets Data Mining. KDD: Techniques Applications (H. Lu, H. Motoda H. Luu, Eds.), pp. 21-34, World Scientific, Singapore. MacQueen, J. (1967) methods classification analysis multivariate observations. Proceedings Fifth Berkeley Symposium Mathematical Statistics Probability, eds L. M. Le Cam & J. Neyman, 1, pp. 281-297. Berkeley, CA: University California Press.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":null,"dir":"Reference","previous_headings":"","what":"K-means via stats — details_k_means_stats","title":"K-means via stats — details_k_means_stats","text":"k_means() creates K-means model. engine uses classical definition K-means model, takes numeric predictors.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"K-means via stats — details_k_means_stats","text":"engine, single mode: partition","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"K-means via stats — details_k_means_stats","text":"model 1 tuning parameters: num_clusters: # Clusters (type: integer, default: default)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":"translation-from-tidyclust-to-the-original-package-partition-","dir":"Reference","previous_headings":"","what":"Translation from tidyclust to the original package (partition)","title":"K-means via stats — details_k_means_stats","text":"","code":"k_means(num_clusters = integer(1)) %>% set_engine(\"stats\") %>% set_mode(\"partition\") %>% translate_tidyclust() ## K Means Cluster Specification (partition) ## ## Main Arguments: ## num_clusters = integer(1) ## ## Computational engine: stats ## ## Model fit template: ## tidyclust::.k_means_fit_stats(x = missing_arg(), centers = missing_arg(), ## centers = integer(1))"},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":"preprocessing-requirements","dir":"Reference","previous_headings":"","what":"Preprocessing requirements","title":"K-means via stats — details_k_means_stats","text":"Factor/categorical predictors need converted numeric values (e.g., dummy indicator variables) engine. using formula method via fit(), tidyclust convert factor columns indicators. Predictors scale. One way achieve center scale predictor mean zero variance one.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/details_k_means_stats.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"K-means via stats — details_k_means_stats","text":"Forgy, E. W. (1965). Cluster analysis multivariate data: efficiency vs interpretability classifications. Biometrics, 21, 768–769. Hartigan, J. . Wong, M. . (1979). Algorithm 136: K-means clustering algorithm. Applied Statistics, 28, 100–108. doi:10.2307/2346830. Lloyd, S. P. (1957, 1982). Least squares quantization PCM. Technical Note, Bell Laboratories. Published 1982 IEEE Transactions Information Theory, 28, 128–137. MacQueen, J. (1967). methods classification analysis multivariate observations. Proceedings Fifth Berkeley Symposium Mathematical Statistics Probability, eds L. M. Le Cam & J. Neyman, 1, pp. 281–297. Berkeley, CA: University California Press.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-hier_clust_fit_stats.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Wrapper around hclust function — .hier_clust_fit_stats","title":"Simple Wrapper around hclust function — .hier_clust_fit_stats","text":"wrapper prepares data distance matrix send stats::hclust retains parameters num_clusters h attribute.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-hier_clust_fit_stats.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Wrapper around hclust function — .hier_clust_fit_stats","text":"","code":".hier_clust_fit_stats( x, num_clusters = NULL, cut_height = NULL, linkage_method = NULL, dist_fun = philentropy::distance )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-hier_clust_fit_stats.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Wrapper around hclust function — .hier_clust_fit_stats","text":"x matrix data frame num_clusters number clusters cut_height height cut dendrogram linkage_method agglomeration method used. (unambiguous abbreviation ) one \"ward.D\", \"ward.D2\", \"single\", \"complete\", \"average\" (= UPGMA), \"mcquitty\" (= WPGMA), \"median\" (= WPGMC) \"centroid\" (= UPGMC). dist_fun distance function use","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-hier_clust_fit_stats.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Wrapper around hclust function — .hier_clust_fit_stats","text":"dendrogram","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_ClusterR.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Wrapper around ClusterR kmeans — .k_means_fit_ClusterR","title":"Simple Wrapper around ClusterR kmeans — .k_means_fit_ClusterR","text":"wrapper runs ClusterR::KMeans_rcpp() adds column names centroids field. reorders clusters.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_ClusterR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Wrapper around ClusterR kmeans — .k_means_fit_ClusterR","text":"","code":".k_means_fit_ClusterR( data, clusters, num_init = 1, max_iters = 100, initializer = \"kmeans++\", fuzzy = FALSE, verbose = FALSE, CENTROIDS = NULL, tol = 1e-04, tol_optimal_init = 0.3, seed = 1 )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_ClusterR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Wrapper around ClusterR kmeans — .k_means_fit_ClusterR","text":"data matrix data frame clusters number clusters num_init number times algorithm run different centroid seeds max_iters maximum number clustering iterations initializer method initialization. One , optimal_init, quantile_init, kmeans++ random. See details information fuzzy either TRUE FALSE. TRUE, prediction probabilities calculated using distance observations centroids verbose either TRUE FALSE, indicating whether progress printed clustering. CENTROIDS matrix initial cluster centroids. rows CENTROIDS matrix equal number clusters columns equal columns data. tol float number. , case iteration (iteration > 1 iteration < max_iters) 'tol' greater squared norm centroids, kmeans converged tol_optimal_init tolerance value 'optimal_init' initializer. higher value , far appart centroids . seed integer value random number generator (RNG)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_ClusterR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Wrapper around ClusterR kmeans — .k_means_fit_ClusterR","text":"list following attributes: clusters, fuzzy_clusters (fuzzy = TRUE), centroids, total_SSE, best_initialization, WCSS_per_cluster, obs_per_cluster, .SS_DIV_total.SS","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_clustMixType.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Wrapper around clustMixType kmeans — .k_means_fit_clustMixType","title":"Simple Wrapper around clustMixType kmeans — .k_means_fit_clustMixType","text":"wrapper runs clustMixType::kproto() reorders clusters.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_clustMixType.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Wrapper around clustMixType kmeans — .k_means_fit_clustMixType","text":"","code":".k_means_fit_clustMixType(x, k, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_clustMixType.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Wrapper around clustMixType kmeans — .k_means_fit_clustMixType","text":"x Data frame numerics factors (also ordered factors possible). k Either number clusters, vector specifying indices initial prototypes, data frame prototypes columns x. ... arguments passed clustMixType::kproto()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_clustMixType.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Wrapper around clustMixType kmeans — .k_means_fit_clustMixType","text":"Result clustMixType::kproto()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_klaR.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Wrapper around klaR kmeans — .k_means_fit_klaR","title":"Simple Wrapper around klaR kmeans — .k_means_fit_klaR","text":"wrapper runs klaR::kmodes() reorders clusters.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_klaR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Wrapper around klaR kmeans — .k_means_fit_klaR","text":"","code":".k_means_fit_klaR(data, modes, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_klaR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Wrapper around klaR kmeans — .k_means_fit_klaR","text":"data matrix data frame categorical data. Objects rows, variables columns. modes Either number modes set initial (distinct) cluster modes. number, random set (distinct) rows data chosen initial modes. ... arguments passed klaR::kmodes()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_klaR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Wrapper around klaR kmeans — .k_means_fit_klaR","text":"Result klaR::kmodes()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_stats.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Wrapper around stats kmeans — .k_means_fit_stats","title":"Simple Wrapper around stats kmeans — .k_means_fit_stats","text":"wrapper runs stats::kmeans() adds check centers specified. reorders clusters.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_stats.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Wrapper around stats kmeans — .k_means_fit_stats","text":"","code":".k_means_fit_stats(data, centers = NULL, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_stats.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Wrapper around stats kmeans — .k_means_fit_stats","text":"centers either number clusters, say \\(k\\), set initial (distinct) cluster centres. number, random set (distinct) rows x chosen initial centres. ... arguments passed stats::kmeans()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/dot-k_means_fit_stats.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Wrapper around stats kmeans — .k_means_fit_stats","text":"Result stats::kmeans()","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/empty_ellipses.html","id":null,"dir":"Reference","previous_headings":"","what":"Get colors for tidyclust text. — get_tidyclust_colors","title":"Get colors for tidyclust text. — get_tidyclust_colors","text":"Get colors tidyclust text.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/empty_ellipses.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get colors for tidyclust text. — get_tidyclust_colors","text":"","code":"get_tidyclust_colors()"},{"path":"https://tidyclust.tidymodels.org/dev/reference/empty_ellipses.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get colors for tidyclust text. — get_tidyclust_colors","text":"list cli functions.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract elements of a tidyclust model object — extract-tidyclust","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"functions extract various elements clustering object. exist yet, error thrown. extract_fit_engine() returns engine specific fit embedded within tidyclust model fit. example, using k_means() \"lm\" engine, returns underlying kmeans object. extract_parameter_set_dials() returns set dials parameter objects.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"","code":"# S3 method for class 'cluster_fit' extract_fit_engine(x, ...) # S3 method for class 'cluster_spec' extract_parameter_set_dials(x, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"x cluster_fit object cluster_spec object. ... currently used.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"extracted value tidyclust object, x, described description section.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"Extracting underlying engine fit can helpful describing model (via print(), summary(), plot(), etc.) variable importance/explainers. However, users invoke predict() method extracted model. may preprocessing operations tidyclust executed data prior giving model. Bypassing can lead errors silently generating incorrect predictions. Good: Bad:","code":"tidyclust_fit %>% predict(new_data) tidyclust_fit %>% extract_fit_engine() %>% predict(new_data)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract-tidyclust.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract elements of a tidyclust model object — extract-tidyclust","text":"","code":"kmeans_spec <- k_means(num_clusters = 2) kmeans_fit <- fit(kmeans_spec, ~., data = mtcars) extract_fit_engine(kmeans_fit) #> K-means clustering with 2 clusters of sizes 18, 14 #> #> Cluster means: #> mpg cyl disp hp drat wt qsec #> 2 23.97222 4.777778 135.5389 98.05556 3.882222 2.609056 18.68611 #> 1 15.10000 8.000000 353.1000 209.21429 3.229286 3.999214 16.77214 #> vs am gear carb #> 2 0.7777778 0.6111111 4.000000 2.277778 #> 1 0.0000000 0.1428571 3.285714 3.500000 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 #> 1 1 1 #> Hornet 4 Drive Hornet Sportabout Valiant #> 1 2 1 #> Duster 360 Merc 240D Merc 230 #> 2 1 1 #> Merc 280 Merc 280C Merc 450SE #> 1 1 2 #> Merc 450SL Merc 450SLC Cadillac Fleetwood #> 2 2 2 #> Lincoln Continental Chrysler Imperial Fiat 128 #> 2 2 1 #> Honda Civic Toyota Corolla Toyota Corona #> 1 1 1 #> Dodge Challenger AMC Javelin Camaro Z28 #> 2 2 2 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 #> 2 1 1 #> Lotus Europa Ford Pantera L Ferrari Dino #> 1 2 1 #> Maserati Bora Volvo 142E #> 2 1 #> #> Within cluster sum of squares by cluster: #> [1] 58920.54 93643.90 #> (between_SS / total_SS = 75.5 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" #> [5] \"tot.withinss\" \"betweenss\" \"size\" \"iter\" #> [9] \"ifault\""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract clusters from model — extract_centroids","title":"Extract clusters from model — extract_centroids","text":"applied fitted cluster specification, returns tibble cluster location. locations make sense model, mean location used.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract clusters from model — extract_centroids","text":"","code":"extract_centroids(object, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract clusters from model — extract_centroids","text":"object fitted cluster_spec object. ... arguments passed methods. Using prefix allows change prefix levels factor levels.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract clusters from model — extract_centroids","text":"tibble::tibble() 1 row centroid position. .cluster denotes cluster name centroid. remaining variables match variables passed model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract clusters from model — extract_centroids","text":"model types K-means seen k_means() stores centroid object . leading use function act simple extract. model types Hierarchical (Agglomerative) Clustering seen hier_clust(), fit way number clusters can determined time fit. Setting num_clusters cut_height function used determine clustering reported. , models like hier_clust(), notion \"centroids\". mean observation within cluster assignment returned centroid. ordering clusters first observation training data set cluster 1, next observation belong cluster 1 cluster 2, forth. ordering clustering matter, done avoid identical sets clustering different labels fit multiple times.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"related-functions","dir":"Reference","previous_headings":"","what":"Related functions","title":"Extract clusters from model — extract_centroids","text":"extract_centroids() part trio functions similar things: extract_cluster_assignment() returns cluster assignments training observations extract_centroids() returns location centroids predict() returns cluster new observation belongs ","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_centroids.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract clusters from model — extract_centroids","text":"","code":"set.seed(1234) kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) kmeans_fit %>% extract_centroids() #> # A tibble: 5 × 12 #> .cluster mpg cyl disp hp drat wt qsec vs am gear #> #> 1 Cluster_1 19.9 5.71 167. 120. 3.71 3.11 18.5 0.571 0.429 4 #> 2 Cluster_2 27.0 4 102. 81.4 4.09 2.20 18.8 0.9 0.8 4.1 #> 3 Cluster_3 17.1 7.71 295. 161. 3.05 3.60 17.7 0.143 0 3 #> 4 Cluster_4 14.6 8 340. 272. 3.68 3.54 15.1 0 0.5 4 #> 5 Cluster_5 13.7 8 443 206. 3.06 4.97 17.6 0 0 3 #> # ℹ 1 more variable: carb # Some models such as `hier_clust()` fits in such a way that you can specify # the number of clusters after the model is fit. # A Hierarchical (Agglomerative) Clustering method doesn't technically have # clusters, so the center of the observation within each cluster is returned # instead. hclust_spec <- hier_clust() %>% set_engine(\"stats\") hclust_fit <- fit(hclust_spec, ~., mtcars) hclust_fit %>% extract_centroids(num_clusters = 2) #> # A tibble: 2 × 12 #> .cluster mpg cyl disp hp drat wt qsec vs am gear #> #> 1 Cluster_1 22.2 5.48 169. 113. 3.70 2.85 18.4 0.609 0.478 3.78 #> 2 Cluster_2 14.6 8 388. 232. 3.34 4.16 16.4 0 0.222 3.44 #> # ℹ 1 more variable: carb hclust_fit %>% extract_centroids(cut_height = 250) #> # A tibble: 3 × 12 #> .cluster mpg cyl disp hp drat wt qsec vs am gear #> #> 1 Cluster_1 24.5 4.62 122. 96.9 4.00 2.52 18.5 0.75 0.688 4.12 #> 2 Cluster_2 17.0 7.43 276. 151. 2.99 3.60 18.1 0.286 0 3 #> 3 Cluster_3 14.6 8 388. 232. 3.34 4.16 16.4 0 0.222 3.44 #> # ℹ 1 more variable: carb "},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract cluster assignments from model — extract_cluster_assignment","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"applied fitted cluster specification, returns tibble cluster assignments data used train model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"","code":"extract_cluster_assignment(object, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"object fitted cluster_spec object. ... arguments passed methods. Using prefix allows change prefix levels factor levels.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"tibble::tibble() 1 column named .cluster. tibble correspond training data set.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"model types K-means seen k_means() stores cluster assignments object . leading use function act simple extract. model types Hierarchical (Agglomerative) Clustering seen hier_clust(), fit way number clusters can determined time fit. Setting num_clusters cut_height function used determine clustering reported. ordering clusters first observation training data set cluster 1, next observation belong cluster 1 cluster 2, forth. ordering clustering matter, done avoid identical sets clustering different labels fit multiple times.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"related-functions","dir":"Reference","previous_headings":"","what":"Related functions","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"extract_cluster_assignment() part trio functions similar things: extract_cluster_assignment() returns cluster assignments training observations extract_centroids() returns location centroids predict() returns cluster new observation belongs ","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_cluster_assignment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract cluster assignments from model — extract_cluster_assignment","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) kmeans_fit %>% extract_cluster_assignment() #> # A tibble: 32 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_2 #> 7 Cluster_4 #> 8 Cluster_1 #> 9 Cluster_1 #> 10 Cluster_1 #> # ℹ 22 more rows kmeans_fit %>% extract_cluster_assignment(prefix = \"C_\") #> # A tibble: 32 × 1 #> .cluster #> #> 1 C_1 #> 2 C_1 #> 3 C_1 #> 4 C_2 #> 5 C_3 #> 6 C_2 #> 7 C_4 #> 8 C_1 #> 9 C_1 #> 10 C_1 #> # ℹ 22 more rows # Some models such as `hier_clust()` fits in such a way that you can specify # the number of clusters after the model is fit hclust_spec <- hier_clust() %>% set_engine(\"stats\") hclust_fit <- fit(hclust_spec, ~., mtcars) hclust_fit %>% extract_cluster_assignment(num_clusters = 2) #> # A tibble: 32 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_1 #> 5 Cluster_2 #> 6 Cluster_1 #> 7 Cluster_2 #> 8 Cluster_1 #> 9 Cluster_1 #> 10 Cluster_1 #> # ℹ 22 more rows hclust_fit %>% extract_cluster_assignment(cut_height = 250) #> # A tibble: 32 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_2 #> 7 Cluster_3 #> 8 Cluster_1 #> 9 Cluster_1 #> 10 Cluster_1 #> # ℹ 22 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":null,"dir":"Reference","previous_headings":"","what":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"S3 method get fitted model summary info depending engine","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"","code":"extract_fit_summary(object, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"object fitted cluster_spec object ... arguments passed methods","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"list various summary elements","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"elements cluster_names cluster_assignments factors.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/extract_fit_summary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"S3 method to get fitted model summary info depending on engine — extract_fit_summary","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) kmeans_fit %>% extract_fit_summary() #> $cluster_names #> [1] Cluster_1 Cluster_2 Cluster_3 Cluster_4 Cluster_5 #> Levels: Cluster_1 Cluster_2 Cluster_3 Cluster_4 Cluster_5 #> #> $centroids #> # A tibble: 5 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 24.5 4.62 122. 96.9 4.00 2.52 18.5 0.75 0.688 4.12 2.44 #> 2 19.8 6 242. 108. 2.92 3.34 19.8 1 0 3 1 #> 3 16.4 8 302. 169. 3.04 3.66 17.4 0 0 3 2.5 #> 4 14.6 8 340. 272. 3.68 3.54 15.1 0 0.5 4 5 #> 5 13.7 8 443 206. 3.06 4.97 17.6 0 0 3 3.5 #> #> $n_members #> [1] 16 2 6 4 4 #> #> $sse_within_total_total #> [1] 32837.9972 562.8304 6815.5541 7654.1463 4665.0415 #> #> $sse_total #> [1] 623387.5 #> #> $orig_labels #> [1] 1 1 1 2 3 2 4 1 1 1 1 3 3 3 5 5 5 1 1 1 1 3 3 4 5 1 1 1 4 1 4 1 #> #> $cluster_assignments #> [1] Cluster_1 Cluster_1 Cluster_1 Cluster_2 Cluster_3 Cluster_2 Cluster_4 #> [8] Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_3 Cluster_3 Cluster_3 #> [15] Cluster_5 Cluster_5 Cluster_5 Cluster_1 Cluster_1 Cluster_1 Cluster_1 #> [22] Cluster_3 Cluster_3 Cluster_4 Cluster_5 Cluster_1 Cluster_1 Cluster_1 #> [29] Cluster_4 Cluster_1 Cluster_4 Cluster_1 #> Levels: Cluster_1 Cluster_2 Cluster_3 Cluster_4 Cluster_5 #>"},{"path":"https://tidyclust.tidymodels.org/dev/reference/finalize_model_tidyclust.html","id":null,"dir":"Reference","previous_headings":"","what":"Splice final parameters into objects — finalize_model_tidyclust","title":"Splice final parameters into objects — finalize_model_tidyclust","text":"finalize_* functions take list tibble tuning parameter values update objects values.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/finalize_model_tidyclust.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Splice final parameters into objects — finalize_model_tidyclust","text":"","code":"finalize_model_tidyclust(x, parameters) finalize_workflow_tidyclust(x, parameters)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/finalize_model_tidyclust.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Splice final parameters into objects — finalize_model_tidyclust","text":"x recipe, parsnip model specification, workflow. parameters list 1-row tibble parameter values. Note column names tibble id fields attached tune(). example, Examples section , model tune(\"K\"). case, parameter tibble \"K\" \"neighbors\".","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/finalize_model_tidyclust.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Splice final parameters into objects — finalize_model_tidyclust","text":"updated version x.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/finalize_model_tidyclust.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Splice final parameters into objects — finalize_model_tidyclust","text":"","code":"kmeans_spec <- k_means(num_clusters = tune()) kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = tune() #> #> Computational engine: stats #> best_params <- data.frame(num_clusters = 5) best_params #> num_clusters #> 1 5 finalize_model_tidyclust(kmeans_spec, best_params) #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 5 #> #> Computational engine: stats #>"},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit a Model Specification to a Data Set — fit.cluster_spec","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"fit() fit_xy() take model specification, translate_tidyclust required code substituting arguments, execute model fit routine.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' fit(object, formula, data, control = control_cluster(), ...) # S3 method for class 'cluster_spec' fit_xy(object, x, case_weights = NULL, control = control_cluster(), ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"object object class cluster_spec chosen engine (via set_engine()). formula object class formula (one can coerced class): symbolic description model fitted. data Optional, depending interface (see Details ). data frame containing relevant variables (e.g. predictors, case weights, etc). Note: needed, named argument used. control named list elements verbosity catch. See control_cluster(). ... currently used; values passed ignored. options required fit model passed using set_engine(). x matrix, sparse matrix, data frame predictors. models support sparse matrix input. See modelenv::get_encoding() details. x column names. case_weights optional classed vector numeric case weights. must return TRUE hardhat::is_case_weights() run . See hardhat::frequency_weights() hardhat::importance_weights() examples.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"cluster_fit object contains several elements: spec: model specification object (object call fit) fit: model executed without error, model object. Otherwise, try-error object error message. preproc: objects needed convert formula non-formula interface (terms object) return value also class related fitted model (e.g. \"_kmeans\") base class \"cluster_fit\". fitted cluster_fit object.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"fit() fit_xy() substitute current arguments model specification computational engine's code, check validity, fit model using data engine-specific code. Different model functions different interfaces (e.g. formula x/y) functions translate_tidyclust interface used fit() fit_xy() invoked one required underlying model. possible, functions attempt avoid making copies data. example, underlying model uses formula fit() invoked, original data references model fit. However, underlying model uses something else, x/y, formula evaluated data converted required format. case, calls resulting model objects reference temporary objects used fit model. model engine set, model's default engine used (discussed model page). verbosity option control_cluster() greater zero, warning produced. like use alternative method generating contrasts supplying formula fit(), set global option contrasts preferred method. example, might set : options(contrasts = c(unordered = \"contr.helmert\", ordered = \"contr.poly\")). See help page stats::contr.treatment() possible contrast types.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/fit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit a Model Specification to a Data Set — fit.cluster_spec","text":"","code":"library(dplyr) #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union kmeans_mod <- k_means(num_clusters = 5) using_formula <- kmeans_mod %>% set_engine(\"stats\") %>% fit(~., data = mtcars) using_x <- kmeans_mod %>% set_engine(\"stats\") %>% fit_xy(x = mtcars) using_formula #> tidyclust cluster object #> #> K-means clustering with 5 clusters of sizes 7, 7, 10, 4, 4 #> #> Cluster means: #> mpg cyl disp hp drat wt qsec vs #> 2 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 #> 1 24.18571 4 121.7143 94.28571 3.924286 2.508286 19.10286 0.8571429 #> 4 15.67000 8 317.1400 210.40000 3.297000 3.612500 16.45400 0.0000000 #> 3 13.67500 8 443.0000 206.25000 3.060000 4.966000 17.56750 0.0000000 #> 5 31.00000 4 76.1250 62.25000 4.327500 1.896250 19.19750 1.0000000 #> am gear carb #> 2 0.4285714 3.857143 3.428571 #> 1 0.5714286 4.142857 1.714286 #> 4 0.2000000 3.400000 3.500000 #> 3 0.0000000 3.000000 3.500000 #> 5 1.0000000 4.000000 1.250000 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 #> 1 1 2 #> Hornet 4 Drive Hornet Sportabout Valiant #> 1 3 1 #> Duster 360 Merc 240D Merc 230 #> 3 2 2 #> Merc 280 Merc 280C Merc 450SE #> 1 1 3 #> Merc 450SL Merc 450SLC Cadillac Fleetwood #> 3 3 4 #> Lincoln Continental Chrysler Imperial Fiat 128 #> 4 4 5 #> Honda Civic Toyota Corolla Toyota Corona #> 5 5 2 #> Dodge Challenger AMC Javelin Camaro Z28 #> 3 3 3 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 #> 4 5 2 #> Lotus Europa Ford Pantera L Ferrari Dino #> 2 3 1 #> Maserati Bora Volvo 142E #> 3 2 #> #> Within cluster sum of squares by cluster: #> [1] 13954.3363 3616.8297 43649.5192 4665.0415 208.0365 #> (between_SS / total_SS = 89.4 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" #> [5] \"tot.withinss\" \"betweenss\" \"size\" \"iter\" #> [9] \"ifault\" using_x #> tidyclust cluster object #> #> K-means clustering with 5 clusters of sizes 16, 2, 6, 4, 4 #> #> Cluster means: #> mpg cyl disp hp drat wt qsec vs am #> 2 24.50000 4.625 122.2937 96.8750 4.0025 2.5180 18.54312 0.75 0.6875 #> 4 19.75000 6.000 241.5000 107.5000 2.9200 3.3375 19.83000 1.00 0.0000 #> 1 16.38333 8.000 301.5667 169.1667 3.0450 3.6625 17.36500 0.00 0.0000 #> 5 14.60000 8.000 340.5000 272.2500 3.6750 3.5375 15.08750 0.00 0.5000 #> 3 13.67500 8.000 443.0000 206.2500 3.0600 4.9660 17.56750 0.00 0.0000 #> gear carb #> 2 4.125 2.4375 #> 4 3.000 1.0000 #> 1 3.000 2.5000 #> 5 4.000 5.0000 #> 3 3.000 3.5000 #> #> Clustering vector: #> Mazda RX4 Mazda RX4 Wag Datsun 710 #> 1 1 1 #> Hornet 4 Drive Hornet Sportabout Valiant #> 2 3 2 #> Duster 360 Merc 240D Merc 230 #> 4 1 1 #> Merc 280 Merc 280C Merc 450SE #> 1 1 3 #> Merc 450SL Merc 450SLC Cadillac Fleetwood #> 3 3 5 #> Lincoln Continental Chrysler Imperial Fiat 128 #> 5 5 1 #> Honda Civic Toyota Corolla Toyota Corona #> 1 1 1 #> Dodge Challenger AMC Javelin Camaro Z28 #> 3 3 4 #> Pontiac Firebird Fiat X1-9 Porsche 914-2 #> 5 1 1 #> Lotus Europa Ford Pantera L Ferrari Dino #> 1 4 1 #> Maserati Bora Volvo 142E #> 4 1 #> #> Within cluster sum of squares by cluster: #> [1] 32837.9972 562.8304 6815.5541 7654.1463 4665.0415 #> (between_SS / total_SS = 91.6 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" #> [5] \"tot.withinss\" \"betweenss\" \"size\" \"iter\" #> [9] \"ifault\""},{"path":"https://tidyclust.tidymodels.org/dev/reference/get_centroid_dists.html","id":null,"dir":"Reference","previous_headings":"","what":"Computes distance from observations to centroids — get_centroid_dists","title":"Computes distance from observations to centroids — get_centroid_dists","text":"Computes distance observations centroids","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/get_centroid_dists.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Computes distance from observations to centroids — get_centroid_dists","text":"","code":"get_centroid_dists( new_data, centroids, dist_fun = function(x, y) { philentropy::dist_many_many(x, y, method = \"euclidean\") } )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/get_centroid_dists.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Computes distance from observations to centroids — get_centroid_dists","text":"new_data data frame centroids data frame row centroid. dist_fun function computing matrix--matrix distances. Defaults function(x, y) philentropy::dist_many_many(x, y, method = \"euclidean\").","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/glance.cluster_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a single row summary ","title":"Construct a single row summary ","text":"method glances model tidyclust model object, exists.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/glance.cluster_fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a single row summary ","text":"","code":"# S3 method for class 'cluster_fit' glance(x, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/glance.cluster_fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a single row summary ","text":"x model R object convert single-row data frame ... arguments passed methods","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/glance.cluster_fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a single row summary ","text":"tibble","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":null,"dir":"Reference","previous_headings":"","what":"Hierarchical (Agglomerative) Clustering — hier_clust","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"hier_clust() defines model fits clusters based distance-based dendrogram different ways fit model, method estimation chosen setting model engine. engine-specific pages model listed . stats","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"","code":"hier_clust( mode = \"partition\", engine = \"stats\", num_clusters = NULL, cut_height = NULL, linkage_method = \"complete\" )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"mode single character string type model. possible value model \"partition\". engine single character string specifying computational engine use fitting. Possible engines listed . default model \"stats\". num_clusters Positive integer, number clusters model (optional). cut_height Positive double, height cut dendrogram obtain cluster assignments (used num_clusters NULL) linkage_method agglomeration method used. (unambiguous abbreviation ) one \"ward.D\", \"ward.D2\", \"single\", \"complete\", \"average\" (= UPGMA), \"mcquitty\" (= WPGMA), \"median\" (= WPGMC) \"centroid\" (= UPGMC).","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"hier_clust cluster specification.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":"what-does-it-mean-to-predict-","dir":"Reference","previous_headings":"","what":"What does it mean to predict?","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"predict cluster assignment new observation, find closest cluster. measure “closeness” dependent specified type linkage model: single linkage: new observation assigned cluster nearest observation training data. complete linkage: new observation assigned cluster smallest maximum distances training observations new observation. average linkage: new observation assigned cluster smallest average distances training observations new observation. centroid method: new observation assigned cluster closest centroid, prediction k_means. Ward’s method: new observation assigned cluster smallest increase error sum squares (ESS) due new addition. ESS computed sum squared distances observations cluster, centroid cluster.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/hier_clust.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Hierarchical (Agglomerative) Clustering — hier_clust","text":"","code":"# Show all engines modelenv::get_from_env(\"hier_clust\") #> # A tibble: 1 × 2 #> engine mode #> #> 1 stats partition hier_clust() #> Hierarchical Clustering Specification (partition) #> #> Main Arguments: #> linkage_method = complete #> #> Computational engine: stats #>"},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":null,"dir":"Reference","previous_headings":"","what":"K-Means — k_means","title":"K-Means — k_means","text":"k_means() defines model fits clusters based distances number centers. definition just include K-means, includes models like K-prototypes. different ways fit model, method estimation chosen setting model engine. engine-specific pages model listed . stats: Classical K-means ClusterR: Classical K-means klaR: K-Modes clustMixType: K-prototypes","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"K-Means — k_means","text":"","code":"k_means(mode = \"partition\", engine = \"stats\", num_clusters = NULL)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"K-Means — k_means","text":"mode single character string type model. possible value model \"partition\". engine single character string specifying computational engine use fitting. Possible engines listed . default model \"stats\". num_clusters Positive integer, number clusters model.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"K-Means — k_means","text":"k_means cluster specification.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":"what-does-it-mean-to-predict-","dir":"Reference","previous_headings":"","what":"What does it mean to predict?","title":"K-Means — k_means","text":"K-means model, cluster defined location predictor space. Therefore, prediction tidyclust defined calculating cluster centroid observation closest .","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/k_means.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"K-Means — k_means","text":"","code":"# Show all engines modelenv::get_from_env(\"k_means\") #> # A tibble: 4 × 2 #> engine mode #> #> 1 stats partition #> 2 ClusterR partition #> 3 clustMixType partition #> 4 klaR partition k_means() #> K Means Cluster Specification (partition) #> #> Computational engine: stats #>"},{"path":"https://tidyclust.tidymodels.org/dev/reference/knit_engine_docs.html","id":null,"dir":"Reference","previous_headings":"","what":"Knit engine-specific documentation — knit_engine_docs","title":"Knit engine-specific documentation — knit_engine_docs","text":"Knit engine-specific documentation","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/knit_engine_docs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Knit engine-specific documentation — knit_engine_docs","text":"","code":"knit_engine_docs(pattern = NULL)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/knit_engine_docs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Knit engine-specific documentation — knit_engine_docs","text":"pattern regular expression specify files knit. default knits engine documentation files.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/knit_engine_docs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Knit engine-specific documentation — knit_engine_docs","text":"tibble column file file name result (character vector echos output file name , failure, error message).","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":null,"dir":"Reference","previous_headings":"","what":"The agglomeration Linkage method — linkage_method","title":"The agglomeration Linkage method — linkage_method","text":"agglomeration Linkage method","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"The agglomeration Linkage method — linkage_method","text":"","code":"linkage_method(values = values_linkage_method) values_linkage_method"},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"The agglomeration Linkage method — linkage_method","text":"object class character length 8.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"The agglomeration Linkage method — linkage_method","text":"values character string possible values. See linkage_methods examples .","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"The agglomeration Linkage method — linkage_method","text":"parameter used tidyclust models hier_clust().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/linkage_method.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"The agglomeration Linkage method — linkage_method","text":"","code":"values_linkage_method #> [1] \"ward.D\" \"ward.D2\" \"single\" \"complete\" \"average\" \"mcquitty\" #> [7] \"median\" \"centroid\" linkage_method() #> Linkage Method (qualitative) #> 8 possible values include: #> 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'medi..."},{"path":"https://tidyclust.tidymodels.org/dev/reference/list_md_problems.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate and show errors/warnings in engine-specific documentation — list_md_problems","title":"Locate and show errors/warnings in engine-specific documentation — list_md_problems","text":"Locate show errors/warnings engine-specific documentation","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/list_md_problems.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate and show errors/warnings in engine-specific documentation — list_md_problems","text":"","code":"list_md_problems()"},{"path":"https://tidyclust.tidymodels.org/dev/reference/list_md_problems.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate and show errors/warnings in engine-specific documentation — list_md_problems","text":"tibble column file file name, line indicating line error/warning occurred, problem showing error/warning message.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/load_pkgs.cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Quietly load package namespace — load_pkgs.cluster_spec","title":"Quietly load package namespace — load_pkgs.cluster_spec","text":"one packages, load namespace. used parallel processing since different parallel backends handle package environments differently.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/load_pkgs.cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Quietly load package namespace — load_pkgs.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' load_pkgs(x, infra = TRUE, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/load_pkgs.cluster_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Quietly load package namespace — load_pkgs.cluster_spec","text":"x character vector packages. infra base tidymodels packages loaded well?","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/load_pkgs.cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Quietly load package namespace — load_pkgs.cluster_spec","text":"invisible NULL.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/make_classes_tidyclust.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepend a new class — make_classes_tidyclust","title":"Prepend a new class — make_classes_tidyclust","text":"adds extra class base class \"cluster_spec\".","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/make_classes_tidyclust.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepend a new class — make_classes_tidyclust","text":"","code":"make_classes_tidyclust(prefix)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/make_classes_tidyclust.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prepend a new class — make_classes_tidyclust","text":"prefix character string class.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/make_classes_tidyclust.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepend a new class — make_classes_tidyclust","text":"character vector.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/min_grid.cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Determine the minimum set of model fits — min_grid.cluster_spec","title":"Determine the minimum set of model fits — min_grid.cluster_spec","text":"Determine minimum set model fits","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/min_grid.cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Determine the minimum set of model fits — min_grid.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' min_grid(x, grid, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/min_grid.cluster_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Determine the minimum set of model fits — min_grid.cluster_spec","text":"x cluster specification. grid tibble tuning parameter combinations. ... currently used.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/min_grid.cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Determine the minimum set of model fits — min_grid.cluster_spec","text":"tibble minimum tuning parameters fit additional list column parameter combinations used prediction.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_metric.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a new clustering metric function — new_cluster_metric","title":"Construct a new clustering metric function — new_cluster_metric","text":"functions provide convenient wrappers create one type metric functions celrry: clustering metrics. add metric-specific class fn. features used cluster_metric_set() tune_cluster() tuning.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_metric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a new clustering metric function — new_cluster_metric","text":"","code":"new_cluster_metric(fn, direction)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_metric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a new clustering metric function — new_cluster_metric","text":"fn function. direction string. One : \"maximize\" \"minimize\" \"zero\"","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_metric.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a new clustering metric function — new_cluster_metric","text":"cluster_metric object.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Functions required for tidyclust-adjacent packages — new_cluster_spec","title":"Functions required for tidyclust-adjacent packages — new_cluster_spec","text":"functions helpful creating new packages register new cluster specifications.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Functions required for tidyclust-adjacent packages — new_cluster_spec","text":"","code":"new_cluster_spec(cls, args, eng_args, mode, method, engine)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/new_cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Functions required for tidyclust-adjacent packages — new_cluster_spec","text":"cluster_spec object made work tidyclust.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/other_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Other predict methods. — predict_cluster","title":"Other predict methods. — predict_cluster","text":"internal functions meant directly called user.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/other_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Other predict methods. — predict_cluster","text":"","code":"predict_cluster(object, ...) # S3 method for class 'cluster_fit' predict_cluster(object, new_data, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/other_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Other predict methods. — predict_cluster","text":"object object class cluster_fit. ... Arguments underlying model's prediction function passed (see opts). new_data rectangular data object, data frame.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/other_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Other predict methods. — predict_cluster","text":"tibble::tibble(). tibble::tibble().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Model predictions — predict.cluster_fit","title":"Model predictions — predict.cluster_fit","text":"Apply model create different types predictions. predict() can used types models uses \"type\" argument specificity.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model predictions — predict.cluster_fit","text":"","code":"# S3 method for class 'cluster_fit' predict(object, new_data, type = NULL, opts = list(), ...) # S3 method for class 'cluster_fit' predict_raw(object, new_data, opts = list(), ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model predictions — predict.cluster_fit","text":"object object class cluster_fit. new_data rectangular data object, data frame. type single character value NULL. Possible values \"cluster\", \"raw\". NULL, predict() choose appropriate value based model's mode. opts list optional arguments underlying predict function used type = \"raw\". list include options model object new data predicted. ... Arguments underlying model's prediction function passed (see opts).","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model predictions — predict.cluster_fit","text":"exception type = \"raw\", results predict.cluster_fit() tibble many rows output rows new_data column names predictable. clustering results tibble .pred_cluster column. Using type = \"raw\" predict.cluster_fit() return unadulterated results prediction function. model fit failed error captured, predict() function return structure filled missing values. currently work multivariate models.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model predictions — predict.cluster_fit","text":"\"type\" supplied predict(), choice made: type = \"cluster\" clustering models predict() designed provide tidy result (see \"Value\" section ) tibble output format. ordering clusters first observation training data set cluster 1, next observation belong cluster 1 cluster 2, forth. ordering clustering matter, done avoid identical sets clustering different labels fit multiple times.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"what-does-it-mean-to-predict-","dir":"Reference","previous_headings":"","what":"What does it mean to predict?","title":"Model predictions — predict.cluster_fit","text":"Prediction always formally defined clustering models. Therefore, cluster_spec method section \"prediction\" interpreted, done implemented.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"related-functions","dir":"Reference","previous_headings":"","what":"Related functions","title":"Model predictions — predict.cluster_fit","text":"predict() used tidyclust objects part trio functions similar things: extract_cluster_assignment() returns cluster assignments training observations extract_centroids() returns location centroids predict() returns cluster new observation belongs ","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/predict.cluster_fit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model predictions — predict.cluster_fit","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) kmeans_fit %>% predict(new_data = mtcars) #> # A tibble: 32 × 1 #> .pred_cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_2 #> 4 Cluster_3 #> 5 Cluster_4 #> 6 Cluster_1 #> 7 Cluster_4 #> 8 Cluster_2 #> 9 Cluster_2 #> 10 Cluster_1 #> # ℹ 22 more rows # Some models such as `hier_clust()` fits in such a way that you can specify # the number of clusters after the model is fit hclust_spec <- hier_clust() %>% set_engine(\"stats\") hclust_fit <- fit(hclust_spec, ~., mtcars) hclust_fit %>% predict(new_data = mtcars[4:6, ], num_clusters = 2) #> # A tibble: 3 × 1 #> .pred_cluster #> #> 1 Cluster_1 #> 2 Cluster_2 #> 3 Cluster_1 hclust_fit %>% predict(new_data = mtcars[4:6, ], cut_height = 250) #> # A tibble: 3 × 1 #> .pred_cluster #> #> 1 Cluster_2 #> 2 Cluster_2 #> 3 Cluster_2"},{"path":"https://tidyclust.tidymodels.org/dev/reference/prep_data_dist.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepares data and distance matrices for metric calculation — prep_data_dist","title":"Prepares data and distance matrices for metric calculation — prep_data_dist","text":"Prepares data distance matrices metric calculation","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/prep_data_dist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepares data and distance matrices for metric calculation — prep_data_dist","text":"","code":"prep_data_dist( object, new_data = NULL, dists = NULL, dist_fun = philentropy::distance )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/prep_data_dist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prepares data and distance matrices for metric calculation — prep_data_dist","text":"object fitted cluster_spec object. new_data dataset calculate predictions . NULL, trained cluster assignments fitted object used. dists distance matrix data. NULL, distance computed new_data using stats::dist() function. dist_fun custom distance functions.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/prep_data_dist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepares data and distance matrices for metric calculation — prep_data_dist","text":"list","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":null,"dir":"Reference","previous_headings":"","what":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"forcing one--one, user needs decide prioritize: \"accuracy\": optimize raw count observations label across two assignments \"precision\": optimize average percent alt cluster matches corresponding primary cluster","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"","code":"reconcile_clusterings_mapping( primary, alternative, one_to_one = TRUE, optimize = \"accuracy\" )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"primary vector containing cluster labels, matched alternative Another vector containing cluster labels, changed one_to_one Boolean; alt cluster match one primary cluster? optimize One \"accuracy\" \"precision\"; see description.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"tibble 3 columns; primary, alt, alt_recoded","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"Retains cluster labels primary assignment, relabel alternate assignment match closely possible. user must decide whether clusters forced \"one--one\"; , allowed assign multiple labels alternate assignment primary label?","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/reconcile_clusterings_mapping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Relabels clusters to match another cluster assignment — reconcile_clusterings_mapping","text":"","code":"factor1 <- c(\"Apple\", \"Apple\", \"Carrot\", \"Carrot\", \"Banana\", \"Banana\") factor2 <- c(\"Dog\", \"Dog\", \"Cat\", \"Dog\", \"Fish\", \"Fish\") reconcile_clusterings_mapping(factor1, factor2) #> # A tibble: 6 × 3 #> primary alt alt_recoded #> #> 1 Apple Dog Carrot #> 2 Apple Dog Carrot #> 3 Carrot Cat Banana #> 4 Carrot Dog Carrot #> 5 Banana Fish Apple #> 6 Banana Fish Apple factor1 <- c(\"Apple\", \"Apple\", \"Carrot\", \"Carrot\", \"Banana\", \"Banana\") factor2 <- c(\"Dog\", \"Dog\", \"Cat\", \"Dog\", \"Fish\", \"Parrot\") reconcile_clusterings_mapping(factor1, factor2, one_to_one = FALSE) #> # A tibble: 6 × 3 #> primary alt alt_recoded #> #> 1 Apple Dog Apple #> 2 Apple Dog Apple #> 3 Carrot Cat Carrot #> 4 Carrot Dog Apple #> 5 Banana Fish Banana #> 6 Banana Parrot Banana"},{"path":"https://tidyclust.tidymodels.org/dev/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. dplyr %>% generics augment, fit, fit_xy, glance, min_grid, required_pkgs, tidy hardhat extract_fit_engine, extract_fit_parsnip, extract_parameter_set_dials, extract_preprocessor, extract_spec_parsnip, tune parsnip predict_raw, set_args, set_engine, set_mode tune load_pkgs","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_args.cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Change arguments of a cluster specification — set_args.cluster_spec","title":"Change arguments of a cluster specification — set_args.cluster_spec","text":"Change arguments cluster specification","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_args.cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Change arguments of a cluster specification — set_args.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' set_args(object, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_args.cluster_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Change arguments of a cluster specification — set_args.cluster_spec","text":"object model specification. ... One named model arguments.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_args.cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Change arguments of a cluster specification — set_args.cluster_spec","text":"updated cluster_spec object.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_engine.cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Change engine of a cluster specification — set_engine.cluster_spec","title":"Change engine of a cluster specification — set_engine.cluster_spec","text":"Change engine cluster specification","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_engine.cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Change engine of a cluster specification — set_engine.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' set_engine(object, engine, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_engine.cluster_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Change engine of a cluster specification — set_engine.cluster_spec","text":"object model specification. engine character string software used fit model. highly dependent type model (e.g. linear regression, random forest, etc.). ... optional arguments associated chosen computational engine. captured quosures can tuned tune().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_engine.cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Change engine of a cluster specification — set_engine.cluster_spec","text":"updated cluster_spec object.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_mode.cluster_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Change mode of a cluster specification — set_mode.cluster_spec","title":"Change mode of a cluster specification — set_mode.cluster_spec","text":"Change mode cluster specification","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_mode.cluster_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Change mode of a cluster specification — set_mode.cluster_spec","text":"","code":"# S3 method for class 'cluster_spec' set_mode(object, mode)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_mode.cluster_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Change mode of a cluster specification — set_mode.cluster_spec","text":"object model specification. mode character string model type (e.g. \"classification\" \"regression\")","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/set_mode.cluster_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Change mode of a cluster specification — set_mode.cluster_spec","text":"updated cluster_spec object.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":null,"dir":"Reference","previous_headings":"","what":"Measures silhouette between clusters — silhouette","title":"Measures silhouette between clusters — silhouette","text":"Measures silhouette clusters","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measures silhouette between clusters — silhouette","text":"","code":"silhouette( object, new_data = NULL, dists = NULL, dist_fun = philentropy::distance )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measures silhouette between clusters — silhouette","text":"object fitted tidyclust model new_data dataset predict . NULL, uses trained clustering. dists distance matrix. Used new_data NULL. dist_fun function calculating distances observations. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measures silhouette between clusters — silhouette","text":"tibble giving silhouette observation.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measures silhouette between clusters — silhouette","text":"silhouette_avg() corresponding cluster metric function returns average values given silhouette().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measures silhouette between clusters — silhouette","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) dists <- mtcars %>% as.matrix() %>% dist() silhouette(kmeans_fit, dists = dists) #> # A tibble: 32 × 3 #> cluster neighbor sil_width #> #> 1 Cluster_1 Cluster_2 0.572 #> 2 Cluster_1 Cluster_2 0.572 #> 3 Cluster_1 Cluster_2 0.752 #> 4 Cluster_2 Cluster_1 0.540 #> 5 Cluster_3 Cluster_4 0.149 #> 6 Cluster_2 Cluster_1 0.224 #> 7 Cluster_3 Cluster_4 0.649 #> 8 Cluster_1 Cluster_2 0.613 #> 9 Cluster_1 Cluster_2 0.692 #> 10 Cluster_1 Cluster_2 0.460 #> # ℹ 22 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":null,"dir":"Reference","previous_headings":"","what":"Measures average silhouette across all observations — silhouette_avg","title":"Measures average silhouette across all observations — silhouette_avg","text":"Measures average silhouette across observations","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measures average silhouette across all observations — silhouette_avg","text":"","code":"silhouette_avg(object, ...) # S3 method for class 'cluster_spec' silhouette_avg(object, ...) # S3 method for class 'cluster_fit' silhouette_avg(object, new_data = NULL, dists = NULL, dist_fun = NULL, ...) # S3 method for class 'workflow' silhouette_avg(object, new_data = NULL, dists = NULL, dist_fun = NULL, ...) silhouette_avg_vec( object, new_data = NULL, dists = NULL, dist_fun = philentropy::distance, ... )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measures average silhouette across all observations — silhouette_avg","text":"object fitted kmeans tidyclust model ... arguments passed methods. new_data dataset predict . NULL, uses trained clustering. dists distance matrix. Used new_data NULL. dist_fun function calculating distances observations. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measures average silhouette across all observations — silhouette_avg","text":"double; average silhouette.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measures average silhouette across all observations — silhouette_avg","text":"confused silhouette() returns tibble silhouette observation.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/silhouette_avg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measures average silhouette across all observations — silhouette_avg","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) dists <- mtcars %>% as.matrix() %>% dist() silhouette_avg(kmeans_fit, dists = dists) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 silhouette_avg standard 0.345 silhouette_avg_vec(kmeans_fit, dists = dists) #> [1] 0.3450963"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_ratio.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute the ratio of the WSS to the total SSE — sse_ratio","title":"Compute the ratio of the WSS to the total SSE — sse_ratio","text":"Compute ratio WSS total SSE","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_ratio.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute the ratio of the WSS to the total SSE — sse_ratio","text":"","code":"sse_ratio(object, ...) # S3 method for class 'cluster_spec' sse_ratio(object, ...) # S3 method for class 'cluster_fit' sse_ratio(object, new_data = NULL, dist_fun = NULL, ...) # S3 method for class 'workflow' sse_ratio(object, new_data = NULL, dist_fun = NULL, ...) sse_ratio_vec( object, new_data = NULL, dist_fun = function(x, y) { philentropy::dist_many_many(x, y, method = \"euclidean\") }, ... )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_ratio.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute the ratio of the WSS to the total SSE — sse_ratio","text":"object fitted kmeans tidyclust model ... arguments passed methods. new_data dataset predict . NULL, uses trained clustering. dist_fun function calculating distances centroids. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_ratio.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute the ratio of the WSS to the total SSE — sse_ratio","text":"tibble 3 columns; .metric, .estimator, .estimate.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_ratio.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compute the ratio of the WSS to the total SSE — sse_ratio","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) sse_ratio(kmeans_fit) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_ratio standard 0.0687 sse_ratio_vec(kmeans_fit) #> [1] 0.06873637"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_total.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute the total sum of squares — sse_total","title":"Compute the total sum of squares — sse_total","text":"Compute total sum squares","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_total.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute the total sum of squares — sse_total","text":"","code":"sse_total(object, ...) # S3 method for class 'cluster_spec' sse_total(object, ...) # S3 method for class 'cluster_fit' sse_total(object, new_data = NULL, dist_fun = NULL, ...) # S3 method for class 'workflow' sse_total(object, new_data = NULL, dist_fun = NULL, ...) sse_total_vec( object, new_data = NULL, dist_fun = function(x, y) { philentropy::dist_many_many(x, y, method = \"euclidean\") }, ... )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_total.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute the total sum of squares — sse_total","text":"object fitted kmeans tidyclust model ... arguments passed methods. new_data dataset predict . NULL, uses trained clustering. dist_fun function calculating distances centroids. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_total.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute the total sum of squares — sse_total","text":"tibble 3 columns; .metric, .estimator, .estimate.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_total.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compute the total sum of squares — sse_total","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) sse_total(kmeans_fit) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_total standard 623387. sse_total_vec(kmeans_fit) #> [1] 623387.5"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculates Sum of Squared Error in each cluster — sse_within","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"Calculates Sum Squared Error cluster","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"","code":"sse_within( object, new_data = NULL, dist_fun = function(x, y) { philentropy::dist_many_many(x, y, method = \"euclidean\") } )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"object fitted kmeans tidyclust model new_data dataset predict . NULL, uses trained clustering. dist_fun function calculating distances centroids. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"tibble two columns, cluster name SSE within cluster.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"sse_within_total() corresponding cluster metric function returns sum values given sse_within().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculates Sum of Squared Error in each cluster — sse_within","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) sse_within(kmeans_fit) #> # A tibble: 5 × 3 #> .cluster wss n_members #> #> 1 Cluster_1 7256. 6 #> 2 Cluster_2 3617. 7 #> 3 Cluster_3 6356. 6 #> 4 Cluster_4 46659. 9 #> 5 Cluster_5 208. 4"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute the sum of within-cluster SSE — sse_within_total","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"Compute sum within-cluster SSE","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"","code":"sse_within_total(object, ...) # S3 method for class 'cluster_spec' sse_within_total(object, ...) # S3 method for class 'cluster_fit' sse_within_total(object, new_data = NULL, dist_fun = NULL, ...) # S3 method for class 'workflow' sse_within_total(object, new_data = NULL, dist_fun = NULL, ...) sse_within_total_vec( object, new_data = NULL, dist_fun = function(x, y) { philentropy::dist_many_many(x, y, method = \"euclidean\") }, ... )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"object fitted kmeans tidyclust model ... arguments passed methods. new_data dataset predict . NULL, uses trained clustering. dist_fun function calculating distances centroids. Defaults Euclidean distance processed data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"tibble 3 columns; .metric, .estimator, .estimate.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"confused sse_within() returns tibble within-cluster SSE, one row cluster.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/sse_within_total.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compute the sum of within-cluster SSE — sse_within_total","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) %>% set_engine(\"stats\") kmeans_fit <- fit(kmeans_spec, ~., mtcars) sse_within_total(kmeans_fit) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_within_total standard 53990. sse_within_total_vec(kmeans_fit) #> [1] 53990.34"},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidy.cluster_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Turn a tidyclust model object into a tidy tibble — tidy.cluster_fit","title":"Turn a tidyclust model object into a tidy tibble — tidy.cluster_fit","text":"method tidies model tidyclust model object, exists.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidy.cluster_fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Turn a tidyclust model object into a tidy tibble — tidy.cluster_fit","text":"","code":"# S3 method for class 'cluster_fit' tidy(x, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidy.cluster_fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Turn a tidyclust model object into a tidy tibble — tidy.cluster_fit","text":"x object converted tidy tibble::tibble(). ... Additional arguments tidying method.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidy.cluster_fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Turn a tidyclust model object into a tidy tibble — tidy.cluster_fit","text":"tibble","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust-package.html","id":null,"dir":"Reference","previous_headings":"","what":"tidyclust: A Common API to Clustering — tidyclust-package","title":"tidyclust: A Common API to Clustering — tidyclust-package","text":"common interface specifying clustering models, style 'parsnip'. Creates unified interface across different functions computational engines.","code":""},{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"tidyclust: A Common API to Clustering — tidyclust-package","text":"Maintainer: Emil Hvitfeldt emil.hvitfeldt@posit.co (ORCID) Authors: Kelly Bodwin kelly@bodwin.us contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust_update.html","id":null,"dir":"Reference","previous_headings":"","what":"Update a cluster specification — update.hier_clust","title":"Update a cluster specification — update.hier_clust","text":"parameters cluster specification need modified, update() can used lieu recreating object scratch.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust_update.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update a cluster specification — update.hier_clust","text":"","code":"# S3 method for class 'hier_clust' update( object, parameters = NULL, num_clusters = NULL, cut_height = NULL, linkage_method = NULL, fresh = FALSE, ... ) # S3 method for class 'k_means' update(object, parameters = NULL, num_clusters = NULL, fresh = FALSE, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust_update.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update a cluster specification — update.hier_clust","text":"object cluster specification. parameters 1-row tibble named list main parameters update. Use either parameters main arguments directly updating. main arguments used, supersede values parameters. Also, using engine arguments object result error. num_clusters Positive integer, number clusters model. cut_height Positive double, height cut dendrogram obtain cluster assignments (used num_clusters NULL) linkage_method agglomeration method used. (unambiguous abbreviation ) one \"ward.D\", \"ward.D2\", \"single\", \"complete\", \"average\" (= UPGMA), \"mcquitty\" (= WPGMA), \"median\" (= WPGMC) \"centroid\" (= UPGMC). fresh logical whether arguments modified -place replaced wholesale. ... used update().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust_update.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update a cluster specification — update.hier_clust","text":"updated cluster specification.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tidyclust_update.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Update a cluster specification — update.hier_clust","text":"","code":"kmeans_spec <- k_means(num_clusters = 5) kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 5 #> #> Computational engine: stats #> update(kmeans_spec, num_clusters = 1) #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 1 #> #> Computational engine: stats #> update(kmeans_spec, num_clusters = 1, fresh = TRUE) #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 1 #> #> Computational engine: stats #> param_values <- tibble::tibble(num_clusters = 10) kmeans_spec %>% update(param_values) #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 10 #> #> Computational engine: stats #>"},{"path":"https://tidyclust.tidymodels.org/dev/reference/translate_tidyclust.html","id":null,"dir":"Reference","previous_headings":"","what":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","title":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","text":"translate_tidyclust() translate_tidyclust model specification code object specific particular engine (e.g. R package). translate tidyclust generic parameters counterparts.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/translate_tidyclust.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","text":"","code":"translate_tidyclust(x, ...) # Default S3 method translate_tidyclust(x, engine = x$engine, ...)"},{"path":"https://tidyclust.tidymodels.org/dev/reference/translate_tidyclust.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","text":"x model specification. ... currently used. engine computational engine model (see ?set_engine).","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/translate_tidyclust.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","text":"Prints translated code.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/translate_tidyclust.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Resolve a Model Specification for a Computational Engine — translate_tidyclust","text":"translate_tidyclust() produces template call lacks specific argument values (data, etc). filled fit() called specifics data model. call may also include tune() arguments specification. handle tune() arguments, need use tune package. information see https://www.tidymodels.org/start/tuning/ contain resolved argument names specific model fitting function/engine. function can useful need understand tidyclust goes generic model specific model fitting function. Note: function used internally users use understand underlying syntax . used modify cluster specification.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tune_cluster.html","id":null,"dir":"Reference","previous_headings":"","what":"Model tuning via grid search — tune_cluster","title":"Model tuning via grid search — tune_cluster","text":"tune_cluster() computes set performance metrics (e.g. accuracy RMSE) pre-defined set tuning parameters correspond model recipe across one resamples data.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tune_cluster.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model tuning via grid search — tune_cluster","text":"","code":"tune_cluster(object, ...) # S3 method for class 'cluster_spec' tune_cluster( object, preprocessor, resamples, ..., param_info = NULL, grid = 10, metrics = NULL, control = tune::control_grid() ) # S3 method for class 'workflow' tune_cluster( object, resamples, ..., param_info = NULL, grid = 10, metrics = NULL, control = tune::control_grid() )"},{"path":"https://tidyclust.tidymodels.org/dev/reference/tune_cluster.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model tuning via grid search — tune_cluster","text":"object tidyclust model specification workflows::workflow(). ... currently used. preprocessor traditional model formula recipe created using recipes::recipe(). resamples rset() object. param_info dials::parameters() object NULL. none given, parameters set derived arguments. Passing argument can useful parameter ranges need customized. grid data frame tuning combinations positive integer. data frame columns parameter tuned rows tuning parameter candidates. integer denotes number candidate parameter sets created automatically. metrics cluster_metric_set() NULL. control object used modify tuning process. Defaults tune::control_grid().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tune_cluster.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model tuning via grid search — tune_cluster","text":"updated version resamples extra list columns .metrics .notes (optional columns .predictions .extracts). .notes contains warnings errors occur execution.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/reference/tune_cluster.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model tuning via grid search — tune_cluster","text":"","code":"library(recipes) #> #> Attaching package: ‘recipes’ #> The following object is masked from ‘package:stats’: #> #> step library(rsample) library(workflows) library(tune) rec_spec <- recipe(~., data = mtcars) %>% step_normalize(all_numeric_predictors()) %>% step_pca(all_numeric_predictors()) kmeans_spec <- k_means(num_clusters = tune()) wflow <- workflow() %>% add_recipe(rec_spec) %>% add_model(kmeans_spec) grid <- tibble(num_clusters = 1:3) set.seed(4400) folds <- vfold_cv(mtcars, v = 2) res <- tune_cluster( wflow, resamples = folds, grid = grid ) res #> # Tuning results #> # 2-fold cross-validation #> # A tibble: 2 × 4 #> splits id .metrics .notes #> #> 1 Fold1 #> 2 Fold2 collect_metrics(res) #> # A tibble: 6 × 7 #> num_clusters .metric .estimator mean n std_err .config #> #> 1 1 sse_total standard 160. 2 0.346 Preprocess… #> 2 1 sse_within_total standard 160. 2 0.346 Preprocess… #> 3 2 sse_total standard 160. 2 0.346 Preprocess… #> 4 2 sse_within_total standard 80.3 2 3.63 Preprocess… #> 5 3 sse_total standard 160. 2 0.346 Preprocess… #> 6 3 sse_within_total standard 54.3 2 7.15 Preprocess…"},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-development-version","dir":"Changelog","previous_headings":"","what":"tidyclust (development version)","title":"tidyclust (development version)","text":"philentropy package now used calculate distances rather Rfast. (#199)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-023","dir":"Changelog","previous_headings":"","what":"tidyclust 0.2.3","title":"tidyclust 0.2.3","text":"CRAN release: 2024-07-02 Update fix revdep issue clustMixType. (#190)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-022","dir":"Changelog","previous_headings":"","what":"tidyclust 0.2.2","title":"tidyclust 0.2.2","text":"CRAN release: 2024-06-17 Update fix revdep issue ClusterR. (#186)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-021","dir":"Changelog","previous_headings":"","what":"tidyclust 0.2.1","title":"tidyclust 0.2.1","text":"CRAN release: 2024-02-28 Small change let tune package easy CRAN release. (#178)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-020","dir":"Changelog","previous_headings":"","what":"tidyclust 0.2.0","title":"tidyclust 0.2.0","text":"CRAN release: 2023-09-25","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"new-engines-0-2-0","dir":"Changelog","previous_headings":"","what":"New Engines","title":"tidyclust 0.2.0","text":"clustMixType engine added k_means(). engine allows fitting k-prototype models. (#63) klaR engine added k_means(). engine allows fitting k-modes models. (#63)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"improvements-0-2-0","dir":"Changelog","previous_headings":"","what":"Improvements","title":"tidyclust 0.2.0","text":"Engine specific documentation added models engines. (#159)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"bug-fixes-0-2-0","dir":"Changelog","previous_headings":"","what":"Bug Fixes","title":"tidyclust 0.2.0","text":"Fixed bug engine specific arguments passed along k_means() engine ClusterR. (#142) Fixed bug prefix argument wouldn’t correctly passed extract_cluster_assignment(), extract_centroids(), predict() (#145) Metric functions now error informatively used unfit cluster specifications. (#146) Fixed bug caused cluster ordering extract_fit_summary(). (#136) Using extract_cluster_assignment(), extract_centroids() predict() fitted hier_clust() model without specifying num_clust cut_height now gives informative error message. (#147) k_means() now errors informatively fit() without num_clust specified. (#134) Fixed bug levels didn’t match number clusters prediction fewer number observations. (#158) Fixed bug tune_cluster() error used recipe contained non-predictor variables id variables. (#124)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"breaking-changes-0-2-0","dir":"Changelog","previous_headings":"","what":"Breaking Changes","title":"tidyclust 0.2.0","text":"Exported internal functions ClusterR_kmeans_fit(), stats_kmeans_fit(), hclust_fit() renamed .k_means_fit_ClusterR(), .k_means_fit_stats(), .hier_clust_fit_stats() reduce visibility users. Cluster reordering now done fitting time, extraction prediction time. (#154)","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-012","dir":"Changelog","previous_headings":"","what":"tidyclust 0.1.2","title":"tidyclust 0.1.2","text":"CRAN release: 2023-02-23 cluster specification methods generics::tune_args() generics::tunable() now registered unconditionally (#115).","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-011","dir":"Changelog","previous_headings":"","what":"tidyclust 0.1.1","title":"tidyclust 0.1.1","text":"CRAN release: 2022-12-20 Fixed bug extract_cluster_assignment() predict() sometimes didn’t agreement clusters. (#94) silhouette() silhouette_avg() now return NAs instead erroring applied clustering object 1 cluster. (#104) Fixed bug extract_cluster_assignment() doesn’t work hier_clust() models workflows num_clusters specified extract_cluster_assignment().","code":""},{"path":"https://tidyclust.tidymodels.org/dev/news/index.html","id":"tidyclust-010","dir":"Changelog","previous_headings":"","what":"tidyclust 0.1.0","title":"tidyclust 0.1.0","text":"CRAN release: 2022-11-24 Added NEWS.md file track changes package.","code":""}]
+[{"path":[]},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 tidyclust authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Hierarchical Clustering","text":"Load libraries: Load clean dataset: yet read k_means vignette, recommend reading first; functions used vignette explained detail .","code":"library(workflows) library(parsnip) library(tidyclust) #> #> Attaching package: 'tidyclust' #> The following objects are masked from 'package:parsnip': #> #> knit_engine_docs, list_md_problems library(tidyverse) #> ── Attaching core tidyverse packages ────────────────── tidyverse 2.0.0 ── #> ✔ dplyr 1.1.4 ✔ readr 2.1.5 #> ✔ forcats 1.0.0 ✔ stringr 1.5.1 #> ✔ ggplot2 3.5.1 ✔ tibble 3.2.1 #> ✔ lubridate 1.9.4 ✔ tidyr 1.3.1 #> ✔ purrr 1.0.2 #> ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ── #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ℹ Use the conflicted package () to force all conflicts to become errors library(tidymodels) #> ── Attaching packages ──────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ rsample 1.2.1 #> ✔ dials 1.3.0 ✔ tune 1.2.1 #> ✔ infer 1.0.7 ✔ workflowsets 1.1.0 #> ✔ modeldata 1.4.0 ✔ yardstick 1.3.2 #> ✔ recipes 1.1.0 #> ── Conflicts ─────────────────────────────────── tidymodels_conflicts() ── #> ✖ scales::discard() masks purrr::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ recipes::fixed() masks stringr::fixed() #> ✖ tidyclust::knit_engine_docs() masks parsnip::knit_engine_docs() #> ✖ dplyr::lag() masks stats::lag() #> ✖ tidyclust::list_md_problems() masks parsnip::list_md_problems() #> ✖ yardstick::spec() masks readr::spec() #> ✖ recipes::step() masks stats::step() #> • Search for functions across packages at https://www.tidymodels.org/find/ set.seed(838383) data(\"penguins\", package = \"modeldata\") penguins <- penguins %>% select(bill_length_mm, bill_depth_mm) %>% drop_na() # shuffle rows penguins <- penguins %>% sample_n(nrow(penguins))"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"a-brief-introduction-to-hierarchical-clustering","dir":"Articles","previous_headings":"","what":"A brief introduction to hierarchical clustering","title":"Hierarchical Clustering","text":"Hierarchical Clustering, sometimes called Agglomerative Clustering, method unsupervised learning produces dendrogram, can used partition observations clusters. hierarchical clustering process begins observation ’s cluster; .e., n clusters n observations. closest two observations joined together single cluster. process continues, closest two clusters joined (“aggolermated”) step. result process dendrogram, shows joining clusters tree form:","code":"#> Warning in dist(fake_dat): NAs introduced by coercion"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"clusters-from-dendrogram","dir":"Articles","previous_headings":"A brief introduction to hierarchical clustering","what":"Clusters from dendrogram","title":"Hierarchical Clustering","text":"produce partition-style cluster assignment dendrogram, one must “cut” tree chosen height: observations remain joined dendrogram cut height considered cluster together:","code":"#> # A tibble: 5 × 2 #> observation cluster_assignment #> #> 1 a 1 #> 2 b 2 #> 3 c 2 #> 4 d 3 #> 5 e 3"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"methods-of-aggolmeration","dir":"Articles","previous_headings":"A brief introduction to hierarchical clustering","what":"Methods of aggolmeration","title":"Hierarchical Clustering","text":"every step agglomeration, measure distances current clusters. cluster containing (possibly) multiple points, mean measure distance? four common approaches cluster-cluster distancing, aka “linkage”: single linkage: distance two clusters distance two closest observations. average linkage: distance two clusters average distances observations one cluster observations . complete linkage: distance two clusters distance two furthest observations. centroid method: distance two clusters distance centroids (geometric mean median). Ward’s method: distance two clusters proportional increase error sum squares (ESS) result joining . ESS computed sum squared distances observations cluster, centroid cluster. also worth mentioning McQuitty method, retains information previously joined clusters measure future linkage distance. method currently supported model fitting, prediction, tidyclust.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"hier_clust-specification-in-tidyclust","dir":"Articles","previous_headings":"","what":"hier_clust specification in {tidyclust}","title":"Hierarchical Clustering","text":"specify hierarchical clustering model tidyclust, simply choose value num_clusters (optionally) linkage method: Currently, supported engine stats::hclust(). default linkage","code":"hc_spec <- hier_clust( num_clusters = 3, linkage_method = \"average\" ) hc_spec #> Hierarchical Clustering Specification (partition) #> #> Main Arguments: #> num_clusters = 3 #> linkage_method = average #> #> Computational engine: stats"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"fitting-hier_clust-models","dir":"Articles","previous_headings":"","what":"Fitting hier_clust models","title":"Hierarchical Clustering","text":"fit model data usual way: produce dendrogram plot, access engine fit: (Although see , dendrograms often informative moderate large size datasets.) can also extract standard tidyclust summary list: Note , although hierarchical clustering algorithm focused cluster centroids way kk-means , still able compute geometric mean predictors cluster:","code":"hc_fit <- hc_spec %>% fit(~ bill_length_mm + bill_depth_mm, data = penguins ) hc_fit %>% summary() #> Length Class Mode #> spec 4 hier_clust list #> fit 7 hclust list #> elapsed 1 -none- list #> preproc 4 -none- list hc_fit$fit %>% plot() hc_summary <- hc_fit %>% extract_fit_summary() hc_summary %>% str() #> List of 7 #> $ cluster_names : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 2 3 #> $ centroids : tibble [3 × 2] (S3: tbl_df/tbl/data.frame) #> ..$ bill_length_mm: num [1:3] 38.8 47.9 56.6 #> ..$ bill_depth_mm : num [1:3] 18.3 16.2 16.7 #> $ n_members : int [1:3] 153 184 5 #> $ sse_within_total_total: num [1:3] 378.4 573.9 9.7 #> $ sse_total : num 1803 #> $ orig_labels : NULL #> $ cluster_assignments : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 1 1 2 2 2 2 2 1 2 ... hc_fit %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 38.8 18.3 #> 2 Cluster_2 47.9 16.2 #> 3 Cluster_3 56.6 16.7"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"Hierarchical Clustering","text":"predict cluster assignment new observation, find closest cluster. measure “closeness” dependent specified type linkage model: single linkage: new observation assigned cluster nearest observation training data. complete linkage: new observation assigned cluster smallest maximum distances training observations new observation. average linkage: new observation assigned cluster smallest average distances training observations new observation. centroid method: new observation assigned cluster closest centroid, prediction k_means. Ward’s method: new observation assigned cluster smallest increase error sum squares (ESS) due new addition. ESS computed sum squared distances observations cluster, centroid cluster. ’s important note guarantee predict() training data produce results extract_cluster_assignments(). process clusters created aggolmerations results particular partition; training observation treated new data, predicted manner truly new information.","code":"hc_preds <- hc_fit %>% predict(penguins) hc_preds #> # A tibble: 342 × 1 #> .pred_cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_3 #> 7 Cluster_2 #> 8 Cluster_2 #> 9 Cluster_1 #> 10 Cluster_2 #> # ℹ 332 more rows bind_cols( hc_preds, extract_cluster_assignment(hc_fit) ) #> # A tibble: 342 × 2 #> .pred_cluster .cluster #> #> 1 Cluster_1 Cluster_1 #> 2 Cluster_1 Cluster_1 #> 3 Cluster_1 Cluster_1 #> 4 Cluster_2 Cluster_2 #> 5 Cluster_3 Cluster_2 #> 6 Cluster_3 Cluster_2 #> 7 Cluster_2 Cluster_2 #> 8 Cluster_2 Cluster_2 #> 9 Cluster_1 Cluster_1 #> 10 Cluster_2 Cluster_2 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/hier_clust.html","id":"reconciling-partitions","dir":"Articles","previous_headings":"","what":"Reconciling partitions","title":"Hierarchical Clustering","text":"Suppose produced cluster assignments two models: hierarchical clustering model three clusters () kk-means clustering model five clusters (). can combine assignments? notice three-cluster assignments hier_clust line perfectly five-cluster assignments k_means. However, fully unrelated assignments. example, KM_2 kk-means assignment fell inside HC_1 hierarchical assignments. goal relabel five kk-means clusters match three cluster names hierarchical output. can accomplished reconcile_clusterings_mapping(). function expects two vectors cluster labels input. first label matched, second label recoded first. trying simply match names across two -size clusterings, option one_to_one must set FALSE. example, can see KM_1, KM_2, KM_5 matched HC_1; KM_3 KM_4 matched HC_2. Notice clusters KM set matched HC_3; evidently, small cluster manifest clearly kk-means clustering.","code":"km_spec <- k_means(num_clusters = 5) km_fit <- km_spec %>% fit(~., data = penguins) km_preds <- predict(km_fit, penguins, prefix = \"KM_\") hc_preds <- predict(hc_fit, penguins, prefix = \"HC_\") tibble( hc = hc_preds$.pred_cluster, km = km_preds$.pred_cluster ) %>% count(hc, km) #> # A tibble: 8 × 3 #> hc km n #> #> 1 HC_1 KM_1 80 #> 2 HC_1 KM_2 72 #> 3 HC_1 KM_3 3 #> 4 HC_1 KM_5 1 #> 5 HC_2 KM_3 28 #> 6 HC_2 KM_4 64 #> 7 HC_2 KM_5 76 #> 8 HC_3 KM_4 18 reconcile_clusterings_mapping( primary = hc_preds$.pred_cluster, alternative = km_preds$.pred_cluster, one_to_one = FALSE ) #> # A tibble: 342 × 3 #> primary alt alt_recoded #> #> 1 HC_1 KM_1 HC_1 #> 2 HC_1 KM_2 HC_1 #> 3 HC_1 KM_2 HC_1 #> 4 HC_2 KM_3 HC_2 #> 5 HC_3 KM_4 HC_2 #> 6 HC_3 KM_4 HC_2 #> 7 HC_2 KM_3 HC_2 #> 8 HC_2 KM_5 HC_2 #> 9 HC_1 KM_2 HC_1 #> 10 HC_2 KM_4 HC_2 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"k-means","text":"Load libraries: Load clean dataset: end vignette, find brief overview k-means algorithm, well algorithmic variant details, like reference.","code":"library(workflows) library(parsnip) library(tidyclust) library(tidyverse) library(tidymodels) data(\"penguins\", package = \"modeldata\") penguins <- penguins %>% select(bill_length_mm, bill_depth_mm) %>% drop_na() # shuffle rows penguins <- penguins %>% sample_n(nrow(penguins))"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"k-means-specification-in-tidyclust","dir":"Articles","previous_headings":"","what":"k-means specification in {tidyclust}","title":"k-means","text":"specify k-means model tidyclust, simply choose value num_clusters: currently two engines: stats::kmeans (default) ClusterR::KMeans_rcpp. also possible change algorithmic details implementation, changing engine /using corresponding arguments engine functions: Note stats::kmeans ClusterR::KMeans_rcpp implementations different default settings algorithmic details, recommended deliberate explicit choosing options. (See end document detail algorithmic options defaults.)","code":"kmeans_spec <- k_means(num_clusters = 3) kmeans_spec #> K Means Cluster Specification (partition) #> #> Main Arguments: #> num_clusters = 3 #> #> Computational engine: stats kmeans_spec_lloyd <- k_means(num_clusters = 3) %>% parsnip::set_engine(\"stats\", algorithm = \"Lloyd\") kmeans_spec_cr <- k_means(num_clusters = 3) %>% parsnip::set_engine(\"ClusterR\", initializer = \"random\")"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"fitting-k-means-models","dir":"Articles","previous_headings":"","what":"Fitting k-means models","title":"k-means","text":"specified, model may “fit” dataset providing formula data frame manner tidymodels model fit. Note unlike supervised modeling, formula include response variable. access results produced engine - case, stats::kmeans - simply extract fit fitted model object: tidyclust also provides function, extract_fit_summary(), produce list model summary information format consistent across cluster model specifications engines","code":"kmeans_fit <- kmeans_spec %>% fit(~ bill_length_mm + bill_depth_mm, data = penguins ) kmeans_fit %>% summary() #> Length Class Mode #> spec 4 k_means list #> fit 9 kmeans list #> elapsed 1 -none- list #> preproc 4 -none- list kmeans_fit$fit #> K-means clustering with 3 clusters of sizes 141, 116, 85 #> #> Cluster means: #> bill_length_mm bill_depth_mm #> 2 38.40355 18.27943 #> 3 45.51379 15.64397 #> 1 50.90353 17.33647 #> #> Clustering vector: #> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #> 1 1 1 2 3 3 2 2 1 3 3 1 1 1 2 1 3 2 #> 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 #> 2 1 1 1 3 1 3 3 2 2 2 2 2 3 2 2 2 3 #> 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 #> 3 1 2 3 2 3 2 2 2 3 1 1 1 3 1 1 2 1 #> 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 #> 3 2 2 1 2 1 2 2 1 3 3 1 1 1 3 3 1 1 #> 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 #> 1 1 3 2 3 1 3 1 1 1 1 3 3 2 1 2 3 1 #> 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 #> 1 2 1 2 3 1 1 2 2 2 2 2 1 1 2 1 1 2 #> 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 #> 1 2 1 1 1 2 3 2 2 3 2 2 1 1 1 1 3 2 #> 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 #> 1 1 2 1 3 1 1 3 1 2 3 2 1 1 2 2 3 2 #> 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 #> 2 2 3 3 1 3 2 1 2 3 3 1 3 2 1 2 1 2 #> 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 #> 1 2 3 2 2 2 1 2 1 2 1 1 1 2 3 3 1 2 #> 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 #> 1 1 2 2 1 2 1 1 2 1 2 3 3 1 1 3 1 1 #> 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 #> 2 2 1 3 2 1 1 1 1 2 3 3 1 1 1 2 1 3 #> 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 #> 3 1 1 1 3 3 1 1 2 1 1 2 3 1 1 2 3 1 #> 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 #> 1 3 1 1 2 3 2 2 1 3 3 3 2 1 1 3 1 1 #> 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 #> 2 3 1 2 2 3 1 2 3 3 2 1 3 3 2 2 1 1 #> 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 #> 2 1 1 1 1 1 2 1 1 3 3 3 2 1 2 1 1 1 #> 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 #> 2 1 2 1 1 3 2 2 2 3 2 1 3 1 1 1 3 1 #> 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 #> 2 2 2 1 3 2 3 3 2 1 2 2 1 3 2 2 2 2 #> 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 #> 3 2 2 2 3 1 3 1 2 1 3 3 3 1 3 1 2 2 #> #> Within cluster sum of squares by cluster: #> [1] 944.4986 754.7437 617.9859 #> (between_SS / total_SS = 79.8 %) #> #> Available components: #> #> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" #> [5] \"tot.withinss\" \"betweenss\" \"size\" \"iter\" #> [9] \"ifault\" kmeans_summary <- kmeans_fit %>% extract_fit_summary() kmeans_summary %>% str() #> List of 7 #> $ cluster_names : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 2 3 #> $ centroids : tibble [3 × 2] (S3: tbl_df/tbl/data.frame) #> ..$ bill_length_mm: num [1:3] 38.4 45.5 50.9 #> ..$ bill_depth_mm : num [1:3] 18.3 15.6 17.3 #> $ n_members : int [1:3] 141 116 85 #> $ sse_within_total_total: num [1:3] 944 755 618 #> $ sse_total : num 11494 #> $ orig_labels : int [1:342] 1 1 1 2 3 3 2 2 1 3 ... #> $ cluster_assignments : Factor w/ 3 levels \"Cluster_1\",\"Cluster_2\",..: 1 1 1 2 3 3 2 2 1 3 ..."},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"cluster-assignments-and-centers","dir":"Articles","previous_headings":"","what":"Cluster assignments and centers","title":"k-means","text":"primary objective fitting clustering model typically assign observations clusters. access , use extract_cluster_assignment() function: Note function renames clusters accordance standard tidyclust naming convention ordering: clusters named “Cluster_1”, “Cluster_2”, etc. numbered order appear rows training dataset. reconcile standardized cluster labels engine output, refer back full model fit summary: example, see cluster labelled “3” stats::kmeans engine function - label assigned randomly implementation - first appear training data, converted “Cluster_1” standardized labels.","code":"kmeans_fit %>% extract_cluster_assignment() #> # A tibble: 342 × 1 #> .cluster #> #> 1 Cluster_1 #> 2 Cluster_1 #> 3 Cluster_1 #> 4 Cluster_2 #> 5 Cluster_3 #> 6 Cluster_3 #> 7 Cluster_2 #> 8 Cluster_2 #> 9 Cluster_1 #> 10 Cluster_3 #> # ℹ 332 more rows tibble( orig_labels = kmeans_summary$orig_labels, standard_labels = kmeans_summary$cluster_assignments ) #> # A tibble: 342 × 2 #> orig_labels standard_labels #> #> 1 1 Cluster_1 #> 2 1 Cluster_1 #> 3 1 Cluster_1 #> 4 2 Cluster_2 #> 5 3 Cluster_3 #> 6 3 Cluster_3 #> 7 2 Cluster_2 #> 8 2 Cluster_2 #> 9 1 Cluster_1 #> 10 3 Cluster_3 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"centroids","dir":"Articles","previous_headings":"Cluster assignments and centers","what":"Centroids","title":"k-means","text":"secondary output interest often characterization clusters; .e., data feature trends cluster seem represent? commonly, clusters characterized mean values predictor space, .k.. centroids. can accessed full summary: can also accessed directly fitted model : Based output, might say Cluster_1 penguins smaller bill lengths, Cluster_2 smaller bill depths, Cluster_3 penguins large bills dimensions.","code":"kmeans_summary$centroids #> # A tibble: 3 × 2 #> bill_length_mm bill_depth_mm #> #> 1 38.4 18.3 #> 2 45.5 15.6 #> 3 50.9 17.3 kmeans_fit %>% extract_centroids() #> # A tibble: 3 × 3 #> .cluster bill_length_mm bill_depth_mm #> #> 1 Cluster_1 38.4 18.3 #> 2 Cluster_2 45.5 15.6 #> 3 Cluster_3 50.9 17.3"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"prediction","dir":"Articles","previous_headings":"","what":"Prediction","title":"k-means","text":"Since kk-means algorithm ultimately assigns training observations cluster closest centroid, natural “predict” test observations also belong closest centroid cluster. predict() function behaves expected, producing cluster assignment predictions new data based distance fitted model centroids. attach predictions dataset column, use augment():","code":"new_penguin <- tibble( bill_length_mm = 42, bill_depth_mm = 17 ) kmeans_fit %>% predict(new_penguin) #> # A tibble: 1 × 1 #> .pred_cluster #> #> 1 Cluster_2 kmeans_fit %>% augment(penguins) #> # A tibble: 342 × 3 #> bill_length_mm bill_depth_mm .pred_cluster #> #> 1 39.6 20.7 Cluster_1 #> 2 36.2 17.3 Cluster_1 #> 3 32.1 15.5 Cluster_1 #> 4 47.6 18.3 Cluster_2 #> 5 52 18.1 Cluster_3 #> 6 52.7 19.8 Cluster_3 #> 7 45.2 16.4 Cluster_2 #> 8 46.6 14.2 Cluster_2 #> 9 34.4 18.4 Cluster_1 #> 10 49.8 15.9 Cluster_3 #> # ℹ 332 more rows"},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"metrics","dir":"Articles","previous_headings":"","what":"Metrics","title":"k-means","text":"Since clustering unsupervised method, target/outcome variable, objective notion predictive success. However, many common approaches exist quantifying quality particular cluster partition structure.","code":""},{"path":"https://tidyclust.tidymodels.org/dev/articles/k_means.html","id":"sum-of-squared-error","dir":"Articles","previous_headings":"Metrics","what":"Sum of squared error","title":"k-means","text":"One simple metric within cluster sum--squared error (WSS), measures sum distances observations cluster center. sometimes scaled total sum--squared error (TSS), distance observations global centroid; particular, ratio WSS/TSS often computed. principle, small values WSS WSS/TSS ratio suggest observations within clusters closer (similar) clusters. WSS TSS come “free” model fit summary, can accessed directly model fit: can also see within sum--squares cluster, rather totalled, sse_within():","code":"kmeans_summary$sse_within_total_total #> [1] 944.4986 754.7437 617.9859 kmeans_summary$sse_total #> [1] 11494.04 kmeans_fit %>% sse_within_total() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_within_total standard 2317. kmeans_fit %>% sse_total() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_total standard 11494. kmeans_fit %>% sse_ratio() #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sse_ratio standard 0.202 kmeans_fit %>% sse_within() #> # A tibble: 3 × 3 #> .cluster wss n_members #>