Skip to content

Commit

Permalink
resolve lightgbm aliases for main boost_tree() arguments (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
simonpcouch authored Nov 29, 2022
1 parent 10aee24 commit 04d1b46
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 5 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ BugReports: https://github.com/tidymodels/bonsai/issues
Depends:
parsnip (>= 1.0.1)
Imports:
cli,
dials,
dplyr,
glue,
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Note that, when tuning hyperparameters with the tune package, detection of paral

* The `boost_tree` argument `stop_iter` now maps to the `lightgbm:::lgb.train()` argument `early_stopping_round` rather than its alias `early_stopping_rounds`. This does not affect parsnip's interface to lightgbm (i.e. via `boost_tree() %>% set_engine("lightgbm")`), though will introduce errors for code that uses the `train_lightgbm()` wrapper directly and sets the `lightgbm::lgb.train()` argument `early_stopping_round` by its alias `early_stopping_rounds` via `train_lightgbm()`'s `...`.

* Disallowed passing main model arguments as engine arguments to `set_engine("lightgbm", ...)` via aliases. That is, if a main argument is marked for tuning and a lightgbm alias is supplied as an engine argument, bonsai will now error, rather than supplying both to lightgbm and allowing the package to handle aliases. Users can still interface with non-main `boost_tree()` arguments via their lightgbm aliases (#53).

# bonsai 0.2.0

* Enabled bagging with lightgbm via the `sample_size` argument to `boost_tree`
Expand Down
54 changes: 54 additions & 0 deletions R/lightgbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ train_lightgbm <- function(x, y, max_depth = -1, num_iterations = 100, learning_
process_mtry(feature_fraction_bynode = feature_fraction_bynode,
counts = counts, x = x, is_missing = missing(feature_fraction_bynode))

check_lightgbm_aliases(...)

args <- list(
param = list(
num_iterations = num_iterations,
Expand Down Expand Up @@ -418,3 +420,55 @@ categorical_features_to_int <- function(x, cat_indices){
}
x
}

check_lightgbm_aliases <- function(...) {
dots <- rlang::list2(...)

for (param in names(dots)) {
uses_alias <- lightgbm_aliases$alias %in% param
if (any(uses_alias)) {
main <- lightgbm_aliases$lightgbm[uses_alias]
parsnip <- lightgbm_aliases$parsnip[uses_alias]
cli::cli_abort(c(
"!" = "The {.var {param}} argument passed to \\
{.help [`set_engine()`](parsnip::set_engine)} is an alias for \\
a main model argument.",
"i" = "Please instead pass this argument via the {.var {parsnip}} \\
argument to {.help [`boost_tree()`](parsnip::boost_tree)}."
), call = rlang::call2("fit"))
}
}

invisible(TRUE)
}

lightgbm_aliases <-
tibble::tribble(
~parsnip, ~lightgbm, ~alias,
# note that "tree_depth" -> "max_depth" has no aliases
"trees", "num_iterations", "num_iteration",
"trees", "num_iterations", "n_iter",
"trees", "num_iterations", "num_tree",
"trees", "num_iterations", "num_trees",
"trees", "num_iterations", "num_round",
"trees", "num_iterations", "num_rounds",
"trees", "num_iterations", "nrounds",
"trees", "num_iterations", "num_boost_round",
"trees", "num_iterations", "n_estimators",
"trees", "num_iterations", "max_iter",
"learn_rate", "learning_rate", "shrinkage_rate",
"learn_rate", "learning_rate", "eta",
"mtry", "feature_fraction_bynode", "sub_feature_bynode",
"mtry", "feature_fraction_bynode", "colsample_bynode",
"min_n", "min_data_in_leaf", "min_data_per_leaf",
"min_n", "min_data_in_leaf", "min_data",
"min_n", "min_data_in_leaf", "min_child_samples",
"min_n", "min_data_in_leaf", "min_samples_leaf",
"loss_reduction", "min_gain_to_split", "min_split_gain",
"sample_size", "bagging_fraction", "sub_row",
"sample_size", "bagging_fraction", "subsample",
"sample_size", "bagging_fraction", "bagging",
"stop_iter", "early_stopping_round", "early_stopping_rounds",
"stop_iter", "early_stopping_round", "early_stopping",
"stop_iter", "early_stopping_round", "n_iter_no_change"
)
34 changes: 31 additions & 3 deletions tests/testthat/_snaps/lightgbm.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,39 @@
See `?train_lightgbm` for more details.

# tuning mtry vs mtry_prop
# training wrapper warns on protected arguments

Code
boost_tree(mtry = tune::tune()) %>% set_engine("lightgbm") %>% set_mode(
boost_tree() %>% set_engine("lightgbm", n_iter = 10) %>% set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins)
Error <rlang_error>
! The `n_iter` argument passed to `set_engine()` (`?parsnip::set_engine()`) is an alias for a main model argument.
i Please instead pass this argument via the `trees` argument to `boost_tree()` (`?parsnip::boost_tree()`).

---

Code
boost_tree() %>% set_engine("lightgbm", num_tree = 10) %>% set_mode(
"regression") %>% fit(bill_length_mm ~ ., data = penguins)
Error <rlang_error>
! The `num_tree` argument passed to `set_engine()` (`?parsnip::set_engine()`) is an alias for a main model argument.
i Please instead pass this argument via the `trees` argument to `boost_tree()` (`?parsnip::boost_tree()`).

---

Code
boost_tree() %>% set_engine("lightgbm", min_split_gain = 2) %>% set_mode(
"regression") %>% fit(bill_length_mm ~ ., data = penguins)
Error <rlang_error>
The supplied `mtry` parameter is a call to `tune`. Did you forget to optimize hyperparameters with a tuning function like `tune::tune_grid`?
! The `min_split_gain` argument passed to `set_engine()` (`?parsnip::set_engine()`) is an alias for a main model argument.
i Please instead pass this argument via the `loss_reduction` argument to `boost_tree()` (`?parsnip::boost_tree()`).

---

Code
boost_tree() %>% set_engine("lightgbm", min_split_gain = 2, lambda_l2 = 0.5) %>%
set_mode("regression") %>% fit(bill_length_mm ~ ., data = penguins)
Error <rlang_error>
! The `min_split_gain` argument passed to `set_engine()` (`?parsnip::set_engine()`) is an alias for a main model argument.
i Please instead pass this argument via the `loss_reduction` argument to `boost_tree()` (`?parsnip::boost_tree()`).

35 changes: 33 additions & 2 deletions tests/testthat/test-lightgbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -477,13 +477,44 @@ test_that("training wrapper warns on protected arguments", {
boost_tree() %>%
set_engine(
"lightgbm",
colnames = paste0("X", 1:ncol(penguins)),
nrounds = 50
colnames = paste0("X", 1:ncol(penguins))
) %>%
set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins),
"guarded by bonsai.*colnames"
)

expect_snapshot(
error = TRUE,
boost_tree() %>%
set_engine("lightgbm", n_iter = 10) %>%
set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins)
)

expect_snapshot(
error = TRUE,
boost_tree() %>%
set_engine("lightgbm", num_tree = 10) %>%
set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins)
)

expect_snapshot(
error = TRUE,
boost_tree() %>%
set_engine("lightgbm", min_split_gain = 2) %>%
set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins)
)

expect_snapshot(
error = TRUE,
boost_tree() %>%
set_engine("lightgbm", min_split_gain = 2, lambda_l2 = .5) %>%
set_mode("regression") %>%
fit(bill_length_mm ~ ., data = penguins)
)
})

test_that("training wrapper passes stop_iter correctly", {
Expand Down

0 comments on commit 04d1b46

Please sign in to comment.