Merge pull request #168 from tidymodels/fix124

EmilHvitfeldt · web-flow · commit 21019d9b6ab3 · 2023-08-31T14:34:04.000-07:00
diff --git a/NEWS.md b/NEWS.md
@@ -24,6 +24,8 @@
 
 * Fixed bug where levels didn't match number of clusters if prediction on fewer number of observations. (#158)
 
+* Fixed bug where `tune_cluster()` would error if used with an recipe that contained non-predictor variables such as id variables. (#124)
+
 # tidyclust 0.1.2
 
 * The cluster specification methods for `generics::tune_args()` and `generics::tunable()` are now registered unconditionally (#115).
diff --git a/R/metric-aaa.R b/R/metric-aaa.R
@@ -234,7 +234,7 @@ extract_post_preprocessor <- function(object, new_data) {
   } else if (inherits(preprocessor, "recipe")) {
     new_data <- object %>%
       hardhat::extract_recipe() %>%
-      recipes::bake(new_data)
+      recipes::bake(new_data, recipes::all_predictors())
   }
   new_data
 }
diff --git a/tests/testthat/test-tune_cluster.R b/tests/testthat/test-tune_cluster.R
@@ -471,3 +471,42 @@ test_that("select_best() and show_best() works", {
       dplyr::select(num_clusters, .config)
   )
 })
+
+test_that("doesn't error if recipes uses id variables", {
+  helper_objects <- helper_objects_tidyclust()
+
+  mtcars_id <- mtcars %>%
+    tibble::rownames_to_column(var = "model")
+
+  rec_id <- recipes::recipe(~., data = mtcars_id) %>%
+    recipes::update_role(model, new_role = "id variable") %>%
+    recipes::step_normalize(recipes::all_numeric_predictors())
+
+  set.seed(4400)
+  wflow <- workflows::workflow() %>%
+    workflows::add_recipe(rec_id) %>%
+    workflows::add_model(helper_objects$kmeans_mod)
+  pset <- hardhat::extract_parameter_set_dials(wflow) %>%
+    update(num_clusters = dials::num_clusters(c(1, 3)))
+  grid <- dials::grid_regular(pset, levels = 3)
+  folds <- rsample::vfold_cv(mtcars_id, v = 2)
+  control <- tune::control_grid(extract = identity)
+  metrics <- cluster_metric_set(sse_within_total, sse_total)
+
+  res <- tune_cluster(
+    wflow,
+    resamples = folds,
+    grid = grid,
+    control = control,
+    metrics = metrics
+  )
+  res_est <- tune::collect_metrics(res)
+  res_workflow <- res$.extracts[[1]]$.extracts[[1]]
+
+  expect_equal(res$id, folds$id)
+  expect_equal(nrow(res_est), nrow(grid) * 2)
+  expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
+  expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
+  expect_equal(res_est$n, rep(2, nrow(grid) * 2))
+  expect_true(res_workflow$trained)
+})

Original file line number	Diff line number	Diff line change
`@@ -234,7 +234,7 @@ extract_post_preprocessor <- function(object, new_data) {`
`234`	`234`	`} else if (inherits(preprocessor, "recipe")) {`
`235`	`235`	`new_data <- object %>%`
`236`	`236`	`hardhat::extract_recipe() %>%`
`237`		`- recipes::bake(new_data)`
	`237`	`+ recipes::bake(new_data, recipes::all_predictors())`
`238`	`238`	`}`
`239`	`239`	`new_data`
`240`	`240`	`}`