From bb6ecaba6b9eed93c583aedea047a4f93be28c0a Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Wed, 15 Jan 2025 14:56:58 -0800
Subject: [PATCH 01/13] add `toggle_sparsity()`

---
 R/fit.R         |  2 ++
 R/sparsevctrs.R | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)

diff --git a/R/fit.R b/R/fit.R
index 13d1315..8c42940 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -71,6 +71,8 @@ fit.workflow <- function(object, data, ..., calibration = NULL, control = contro
     )
   }
 
+  object <- toggle_sparsity(object, data)
+
   workflow <- object
   workflow <- .fit_pre(workflow, data)
   workflow <- .fit_model(workflow, control)
diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index bbb865a..b272d42 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -1,3 +1,79 @@
 is_sparse_matrix <- function(x) {
   methods::is(x, "sparseMatrix")
 }
+
+toggle_sparsity <- function(object, data) {
+  toggle_sparse <- "no"
+
+  if (allow_sparse(object$fit$actions$model$spec)) {
+    if ("recipe" %in% names(object$pre$actions)) {
+      est_sparsity <- recipes::.recipes_estimate_sparsity(
+        object$pre$actions$recipe$recipe
+      )
+    } else {
+      est_sparsity <- sparsevctrs::sparsity(data, 1000)
+    }
+
+    pred_log_fold <- pred_log_fold(
+      est_sparsity,
+      object$fit$actions$model$spec$engine,
+      nrow(data)
+    )
+    if (pred_log_fold > 0) {
+      toggle_sparse <- "yes"
+    }
+  }
+
+  object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args(
+    object$pre$actions$recipe$recipe,
+    choice = toggle_sparse
+  )
+  object
+}
+
+allow_sparse <- function(x) {
+  if (inherits(x, "model_fit")) {
+    x <- x$spec
+  }
+  res <- parsnip::get_from_env(paste0(class(x)[1], "_encoding"))
+  all(res$allow_sparse_x[res$engine == x$engine])
+}
+
+pred_log_fold <- function(sparsity, model, n_rows) {
+  if (is.null(model) || model == "ranger") {
+    return("no")
+  }
+
+  log_fold <- -0.599333138645995 +
+    ifelse(sparsity < 0.836601307189543, 0.836601307189543 - sparsity, 0) *
+      -0.541581853008009 +
+    ifelse(n_rows < 16000, 16000 - n_rows, 0) * 3.23980908942813e-05 +
+    ifelse(n_rows > 16000, n_rows - 16000, 0) * -2.81001152147355e-06 +
+    ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) *
+      9.82444255114058 +
+    ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) *
+      ifelse(n_rows > 8000, n_rows - 8000, 0) *
+      7.27456967763306e-05 +
+    ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) *
+      ifelse(n_rows < 8000, 8000 - n_rows, 0) *
+      -0.000798307404212627
+
+  if (model == "xgboost") {
+    log_fold <- log_fold +
+      ifelse(sparsity < 0.984615384615385, 0.984615384615385 - sparsity, 0) *
+        0.113098025073806 +
+      ifelse(n_rows < 8000, 8000 - n_rows, 0) * -9.77914237255269e-05 +
+      ifelse(n_rows > 8000, n_rows - 8000, 0) * 3.22657666511869e-06 +
+      ifelse(sparsity > 0.984615384615385, sparsity - 0.984615384615385, 0) *
+        41.5180348086939 +
+      0.913457808326756
+  }
+
+  if (model == "LiblineaR") {
+    log_fold <- log_fold +
+      ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) *
+        -5.39592564852111
+  }
+
+  log_fold
+}

From f68285e6acdceb8b70228a5740c0b8dbfb5f245b Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Thu, 16 Jan 2025 12:14:57 -0800
Subject: [PATCH 02/13] move recipes to imports

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 181d507..af08bff 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,6 +25,7 @@ Imports:
     lifecycle (>= 1.0.3),
     modelenv (>= 0.1.0),
     parsnip (>= 1.2.1.9000),
+    recipes (>= 1.0.10.9000),
     rlang (>= 1.1.0),
     tidyselect (>= 1.2.0),
     sparsevctrs (>= 0.1.0.9002),
@@ -42,7 +43,6 @@ Suggests:
     methods,
     modeldata (>= 1.0.0),
     probably,
-    recipes (>= 1.0.10.9000),
     rmarkdown,
     testthat (>= 3.0.0)
 VignetteBuilder: 

From 39943be6f92ebe0fb3c877b953494dfe1f7db639 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emil.hvitfeldt@posit.co>
Date: Fri, 17 Jan 2025 08:50:29 -0800
Subject: [PATCH 03/13] Apply suggestions from code review

Co-authored-by: Simon P. Couch <simonpatrickcouch@gmail.com>
---
 R/sparsevctrs.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index b272d42..1c970ed 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -5,18 +5,18 @@ is_sparse_matrix <- function(x) {
 toggle_sparsity <- function(object, data) {
   toggle_sparse <- "no"
 
-  if (allow_sparse(object$fit$actions$model$spec)) {
-    if ("recipe" %in% names(object$pre$actions)) {
+  if (allow_sparse(expect_spec_parsnip(object))) {
+    if (has_preprocessor_recipe(object)) {
       est_sparsity <- recipes::.recipes_estimate_sparsity(
-        object$pre$actions$recipe$recipe
+        extract_preprocessor(object)
       )
     } else {
-      est_sparsity <- sparsevctrs::sparsity(data, 1000)
+      est_sparsity <- sparsevctrs::sparsity(data, sample = 1000)
     }
 
     pred_log_fold <- pred_log_fold(
       est_sparsity,
-      object$fit$actions$model$spec$engine,
+      extract_spec_parsnip(object)$engine,
       nrow(data)
     )
     if (pred_log_fold > 0) {

From 56304cdd941e48fe3c090e337b2df15732e99742 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 08:58:02 -0800
Subject: [PATCH 04/13] up sparsevctrs version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index af08bff..abff23e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -28,7 +28,7 @@ Imports:
     recipes (>= 1.0.10.9000),
     rlang (>= 1.1.0),
     tidyselect (>= 1.2.0),
-    sparsevctrs (>= 0.1.0.9002),
+    sparsevctrs (>= 0.1.0.9003),
     vctrs (>= 0.4.1),
     withr
 Suggests: 

From 7ffc1f9d122ee425417eb263431578d8f0ddd85a Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 09:19:44 -0800
Subject: [PATCH 05/13] pred_log_fold return -Inf instead of "no" to align

---
 R/sparsevctrs.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index 1c970ed..ce0ec1e 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -41,7 +41,7 @@ allow_sparse <- function(x) {
 
 pred_log_fold <- function(sparsity, model, n_rows) {
   if (is.null(model) || model == "ranger") {
-    return("no")
+    return(-Inf)
   }
 
   log_fold <- -0.599333138645995 +

From c13d98e9e6918ff210ff76c23ec646437ffc96a2 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 09:42:29 -0800
Subject: [PATCH 06/13] only do things in toggle_sparsity if there is a recipe

---
 R/sparsevctrs.R | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index ce0ec1e..e091a4e 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -3,31 +3,32 @@ is_sparse_matrix <- function(x) {
 }
 
 toggle_sparsity <- function(object, data) {
-  toggle_sparse <- "no"
-
-  if (allow_sparse(expect_spec_parsnip(object))) {
-    if (has_preprocessor_recipe(object)) {
-      est_sparsity <- recipes::.recipes_estimate_sparsity(
-        extract_preprocessor(object)
-      )
-    } else {
-      est_sparsity <- sparsevctrs::sparsity(data, sample = 1000)
-    }
+  if (
+    allow_sparse(object$fit$actions$model$spec) &&
+      has_preprocessor_recipe(object)
+  ) {
+    est_sparsity <- recipes::.recipes_estimate_sparsity(
+      extract_preprocessor(object)
+    )
 
     pred_log_fold <- pred_log_fold(
       est_sparsity,
       extract_spec_parsnip(object)$engine,
       nrow(data)
     )
+
+    toggle_sparse <- "no"
+
     if (pred_log_fold > 0) {
       toggle_sparse <- "yes"
     }
+
+    object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args(
+      object$pre$actions$recipe$recipe,
+      choice = toggle_sparse
+    )
   }
 
-  object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args(
-    object$pre$actions$recipe$recipe,
-    choice = toggle_sparse
-  )
   object
 }
 

From 96e134971e00a4c6976d1830bce9bd71008ddf8f Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 09:50:42 -0800
Subject: [PATCH 07/13] rename pred_log_fold() to should_use_sparsity()

---
 R/sparsevctrs.R | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index e091a4e..6eb9384 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -11,18 +11,12 @@ toggle_sparsity <- function(object, data) {
       extract_preprocessor(object)
     )
 
-    pred_log_fold <- pred_log_fold(
+    toggle_sparse <- should_use_sparsity(
       est_sparsity,
       extract_spec_parsnip(object)$engine,
       nrow(data)
     )
 
-    toggle_sparse <- "no"
-
-    if (pred_log_fold > 0) {
-      toggle_sparse <- "yes"
-    }
-
     object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args(
       object$pre$actions$recipe$recipe,
       choice = toggle_sparse
@@ -40,9 +34,9 @@ allow_sparse <- function(x) {
   all(res$allow_sparse_x[res$engine == x$engine])
 }
 
-pred_log_fold <- function(sparsity, model, n_rows) {
+should_use_sparsity <- function(sparsity, model, n_rows) {
   if (is.null(model) || model == "ranger") {
-    return(-Inf)
+    return("no")
   }
 
   log_fold <- -0.599333138645995 +
@@ -76,5 +70,5 @@ pred_log_fold <- function(sparsity, model, n_rows) {
         -5.39592564852111
   }
 
-  log_fold
+  ifelse(log_fold > 0, "yes", "no")
 }

From 3836124cab958c84380f284945b5ce04b01ac964 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 10:01:35 -0800
Subject: [PATCH 08/13] document should_use_sparsity()

---
 R/sparsevctrs.R | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index 6eb9384..d75b5c1 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -34,8 +34,27 @@ allow_sparse <- function(x) {
   all(res$allow_sparse_x[res$engine == x$engine])
 }
 
-should_use_sparsity <- function(sparsity, model, n_rows) {
-  if (is.null(model) || model == "ranger") {
+# This function was created using from the output of a mars model fit on the
+# simulation data generated in `analysis/time_analysis.R`
+# https://github.com/tidymodels/benchmark-sparsity-threshold
+#
+# The model was extracted using {tidypredict} and hand-tuned for speed.
+#
+# The model was fit on `sparsity`, `engine` and `n_rows` and the outcome was 
+# `log_fold` which is defined as 
+# `log(time to fit with dense data / time to fit with sparse data)`.
+# Meaning that values above above 0 would reflects longer fit times for dense,
+# Hence we want to use sparse data.
+#
+# At this time the only engines that support sparse data are glmnet, LiblineaR, 
+# ranger, and xgboost. Which is why they are the only ones listed here.
+# This is fine as this code will only run if `allow_sparse()` returns `TRUE`
+# Which only happens for these engines.
+# 
+# Ranger is hard-coded to always fail since they appear to use the same 
+# algorithm for sparse and dense data, resulting in identical times.
+should_use_sparsity <- function(sparsity, engine, n_rows) {
+  if (is.null(engine) || engine == "ranger") {
     return("no")
   }
 
@@ -53,7 +72,7 @@ should_use_sparsity <- function(sparsity, model, n_rows) {
       ifelse(n_rows < 8000, 8000 - n_rows, 0) *
       -0.000798307404212627
 
-  if (model == "xgboost") {
+  if (engine == "xgboost") {
     log_fold <- log_fold +
       ifelse(sparsity < 0.984615384615385, 0.984615384615385 - sparsity, 0) *
         0.113098025073806 +
@@ -64,7 +83,7 @@ should_use_sparsity <- function(sparsity, model, n_rows) {
       0.913457808326756
   }
 
-  if (model == "LiblineaR") {
+  if (engine == "LiblineaR") {
     log_fold <- log_fold +
       ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) *
         -5.39592564852111

From 2162736337eefb4689d80d0700f4635ad599482c Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 10:21:12 -0800
Subject: [PATCH 09/13] document toggle_sparsity()

---
 R/sparsevctrs.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R
index d75b5c1..2111dea 100644
--- a/R/sparsevctrs.R
+++ b/R/sparsevctrs.R
@@ -2,6 +2,12 @@ is_sparse_matrix <- function(x) {
   methods::is(x, "sparseMatrix")
 }
 
+# This function takes a workflow and its data. If the model supports sparse data
+# And there is a recipe, then it uses `should_use_sparsity()` to determine
+# whether all the `sparse = "auto"` should be turned to `"yes"` or `"no"` in the
+# recipe.
+#
+# Done using flow chart in https://github.com/tidymodels/workflows/issues/271
 toggle_sparsity <- function(object, data) {
   if (
     allow_sparse(object$fit$actions$model$spec) &&

From 843de34ba1c83869ef6ad123dbb26f8393ec7e90 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 10:48:51 -0800
Subject: [PATCH 10/13] test toggle_sparsity directly

---
 tests/testthat/test-sparsevctrs.R | 86 +++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R
index 1116118..18e8d3c 100644
--- a/tests/testthat/test-sparsevctrs.R
+++ b/tests/testthat/test-sparsevctrs.R
@@ -191,3 +191,89 @@ test_that("fit() errors if sparse matrix has no colnames", {
     fit(wf_spec, hotel_data)
   )
 })
+
+test_that("toggle_sparsity changes auto to yes", {
+  skip_if_not_installed("glmnet")
+  skip_if_not_installed("modeldata")
+
+  data("ames", package = "modeldata")
+
+  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+
+  rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
+    recipes::step_dummy(recipes::all_nominal_predictors())
+
+  wf_spec <- workflow(rec_spec, tree_spec)
+
+  res <- toggle_sparsity(wf_spec, ames)
+
+  expect_identical(
+    extract_preprocessor(res)$steps[[1]]$sparse,
+    "yes"
+  )
+})
+
+test_that("toggle_sparsity doesn't change no", {
+  skip_if_not_installed("glmnet")
+  skip_if_not_installed("modeldata")
+
+  data("ames", package = "modeldata")
+
+  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+
+  rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
+    recipes::step_dummy(recipes::all_nominal_predictors(), sparse = "no")
+
+  wf_spec <- workflow(rec_spec, tree_spec)
+
+  res <- toggle_sparsity(wf_spec, ames)
+
+  expect_identical(
+    extract_preprocessor(res)$steps[[1]]$sparse,
+    "no"
+  )
+})
+
+test_that("toggle_sparsity changes auto to no", {
+  skip_if_not_installed("glmnet")
+  skip_if_not_installed("modeldata")
+
+  data("ames", package = "modeldata")
+
+  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+
+  # if we only dummy 1 variable it doesn't make the data sparse enough
+  rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
+    recipes::step_dummy(MS_Zoning)
+
+  wf_spec <- workflow(rec_spec, tree_spec)
+
+  res <- toggle_sparsity(wf_spec, ames)
+
+  expect_identical(
+    extract_preprocessor(res)$steps[[1]]$sparse,
+    "no"
+  )
+})
+
+test_that("toggle_sparsity doesn't change yes", {
+  skip_if_not_installed("glmnet")
+  skip_if_not_installed("modeldata")
+
+  data("ames", package = "modeldata")
+
+  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+
+  # if we only dummy 1 variable it doesn't make the data sparse enough
+  rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
+    recipes::step_dummy(MS_Zoning, sparse = "yes")
+
+  wf_spec <- workflow(rec_spec, tree_spec)
+
+  res <- toggle_sparsity(wf_spec, ames)
+
+  expect_identical(
+    extract_preprocessor(res)$steps[[1]]$sparse,
+    "yes"
+  )
+})

From 138da2909b1164d9f25de3ee4e73643bfaf8f361 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 11:02:06 -0800
Subject: [PATCH 11/13] test toggle_sparsity inside fit

---
 tests/testthat/test-sparsevctrs.R | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R
index 18e8d3c..b14b4df 100644
--- a/tests/testthat/test-sparsevctrs.R
+++ b/tests/testthat/test-sparsevctrs.R
@@ -277,3 +277,22 @@ test_that("toggle_sparsity doesn't change yes", {
     "yes"
   )
 })
+
+test_that("toggle_sparsity doesn't break fit", {
+  skip_if_not_installed("glmnet")
+  skip_if_not_installed("modeldata")
+
+  data("ames", package = "modeldata")
+
+  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+
+  rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
+    recipes::step_dummy(recipes::all_nominal_predictors())
+
+  wf_spec <- workflow(rec_spec, tree_spec)
+
+  expect_no_error(
+    fit(wf_spec, ames)
+  )
+})
+

From 2a41454b1095c6cc2be0e01d0c0340b367baf380 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 11:29:53 -0800
Subject: [PATCH 12/13] use glmnet instead of xgboost

---
 tests/testthat/test-sparsevctrs.R | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R
index b14b4df..3468abf 100644
--- a/tests/testthat/test-sparsevctrs.R
+++ b/tests/testthat/test-sparsevctrs.R
@@ -197,8 +197,10 @@ test_that("toggle_sparsity changes auto to yes", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
+  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  ames <- ames[1:100, ]
 
-  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+  tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
 
   rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
     recipes::step_dummy(recipes::all_nominal_predictors())
@@ -218,8 +220,10 @@ test_that("toggle_sparsity doesn't change no", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
+  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  ames <- ames[1:100, ]
 
-  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+  tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
 
   rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
     recipes::step_dummy(recipes::all_nominal_predictors(), sparse = "no")
@@ -239,8 +243,10 @@ test_that("toggle_sparsity changes auto to no", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
+  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  ames <- ames[1:100, ]
 
-  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+  tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
 
   # if we only dummy 1 variable it doesn't make the data sparse enough
   rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
@@ -261,8 +267,10 @@ test_that("toggle_sparsity doesn't change yes", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
+  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  ames <- ames[1:100, ]
 
-  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+  tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
 
   # if we only dummy 1 variable it doesn't make the data sparse enough
   rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
@@ -283,8 +291,10 @@ test_that("toggle_sparsity doesn't break fit", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
+  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  ames <- ames[1:100, ]
 
-  tree_spec <- parsnip::boost_tree("regression", "xgboost")
+  tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
 
   rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>%
     recipes::step_dummy(recipes::all_nominal_predictors())

From 5d1c4f48288f9cac764d9eaf73e058f05fc65bfb Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 17 Jan 2025 12:07:07 -0800
Subject: [PATCH 13/13] don't use dplyr

---
 tests/testthat/test-sparsevctrs.R | 45 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R
index 3468abf..c627fbb 100644
--- a/tests/testthat/test-sparsevctrs.R
+++ b/tests/testthat/test-sparsevctrs.R
@@ -197,7 +197,14 @@ test_that("toggle_sparsity changes auto to yes", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
-  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  fcts <- c(
+    1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
+    17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, 
+    37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L
+  )
+  outcome <- 72
+
+  ames <- ames[c(fcts, outcome)]
   ames <- ames[1:100, ]
 
   tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
@@ -220,7 +227,14 @@ test_that("toggle_sparsity doesn't change no", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
-  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  fcts <- c(
+    1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
+    17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, 
+    37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L
+  )
+  outcome <- 72
+
+  ames <- ames[c(fcts, outcome)]
   ames <- ames[1:100, ]
 
   tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
@@ -243,7 +257,14 @@ test_that("toggle_sparsity changes auto to no", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
-  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  fcts <- c(
+    1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
+    17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, 
+    37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L
+  )
+  outcome <- 72
+
+  ames <- ames[c(fcts, outcome)]
   ames <- ames[1:100, ]
 
   tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
@@ -267,7 +288,14 @@ test_that("toggle_sparsity doesn't change yes", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
-  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  fcts <- c(
+    1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
+    17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, 
+    37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L
+  )
+  outcome <- 72
+
+  ames <- ames[c(fcts, outcome)]
   ames <- ames[1:100, ]
 
   tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)
@@ -291,7 +319,14 @@ test_that("toggle_sparsity doesn't break fit", {
   skip_if_not_installed("modeldata")
 
   data("ames", package = "modeldata")
-  ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor))
+  fcts <- c(
+    1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
+    17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, 
+    37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L
+  )
+  outcome <- 72
+
+  ames <- ames[c(fcts, outcome)]
   ames <- ames[1:100, ]
 
   tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)