add brier score to default classification metrics (#859)

* add brier score to default metrics * update internal objects and add a skip due to new ggplot version --------- Co-authored-by: ‘topepo’ <‘[email protected]’>
tidymodels · Feb 28, 2024 · 31e3f64 · 31e3f64
1 parent a8f0772
commit 31e3f64
Show file tree

Hide file tree

Showing 22 changed files with 374 additions and 150 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tune
 Title: Tidy Tuning Tools
-Version: 1.1.2.9020
+Version: 1.1.2.9021
 Authors@R: c(
     person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-2402-136X")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -356,6 +356,7 @@ importFrom(tibble,size_sum)
 importFrom(tidyr,nest)
 importFrom(tidyr,unnest)
 importFrom(yardstick,accuracy)
+importFrom(yardstick,brier_class)
 importFrom(yardstick,brier_survival)
 importFrom(yardstick,rmse)
 importFrom(yardstick,roc_auc)

diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,8 @@
 
 * A method for rsample's `int_pctl()` function that will compute percentile confidence intervals on performance metrics for objects produced by `fit_resamples()`, `tune_*()`, and `last_fit()`. 
 
+* The Brier score is now part of the default metric set for classification models.
+
 
 ## Bug Fixes
 

diff --git a/R/0_imports.R b/R/0_imports.R
@@ -10,7 +10,7 @@
 #' @importFrom dials is_unknown encode_unit
 #' @importFrom stats sd qt qnorm dnorm pnorm predict model.matrix setNames
 #' @importFrom stats model.matrix model.response model.frame update median
-#' @importFrom yardstick rsq rmse accuracy roc_auc brier_survival
+#' @importFrom yardstick rsq rmse accuracy roc_auc brier_survival brier_class
 #' @importFrom tidyr unnest nest
 #' @importFrom GPfit GP_fit
 #' @importFrom parsnip get_from_env required_pkgs

diff --git a/R/checks.R b/R/checks.R
@@ -348,7 +348,7 @@ check_metrics <- function(x, object) {
              x <- yardstick::metric_set(rmse, rsq)
            },
            classification = {
-             x <- yardstick::metric_set(roc_auc, accuracy)
+             x <- yardstick::metric_set(roc_auc, accuracy, brier_class)
            },
            'censored regression' = {
              x <- yardstick::metric_set(brier_survival)

diff --git a/R/metric-selection.R b/R/metric-selection.R
@@ -262,7 +262,7 @@ check_metrics_arg <- function(mtr_set, wflow, call = rlang::caller_env()) {
              mtr_set <- yardstick::metric_set(rmse, rsq)
            },
            classification = {
-             mtr_set <- yardstick::metric_set(roc_auc, accuracy)
+             mtr_set <- yardstick::metric_set(roc_auc, accuracy, brier_class)
            },
            'censored regression' = {
              mtr_set <- yardstick::metric_set(brier_survival)

diff --git a/inst/test_objects.Rout b/inst/test_objects.Rout
diff --git a/revdep/README.md b/revdep/README.md
@@ -1,29 +1,103 @@
 # Platform
 
-|field    |value                        |
-|:--------|:----------------------------|
-|version  |R version 4.0.3 (2020-10-10) |
-|os       |macOS Catalina 10.15.7       |
-|system   |x86_64, darwin17.0           |
-|ui       |RStudio                      |
-|language |(EN)                         |
-|collate  |en_US.UTF-8                  |
-|ctype    |en_US.UTF-8                  |
-|tz       |America/New_York             |
-|date     |2021-02-27                   |
+|field    |value                               |
+|:--------|:-----------------------------------|
+|version  |R version 4.3.2 (2023-10-31)        |
+|os       |macOS Sonoma 14.3.1                 |
+|system   |aarch64, darwin20                   |
+|ui       |RStudio                             |
+|language |(EN)                                |
+|collate  |en_US.UTF-8                         |
+|ctype    |en_US.UTF-8                         |
+|tz       |America/New_York                    |
+|date     |2024-02-27                          |
+|rstudio  |2023.12.1+402 Ocean Storm (desktop) |
+|pandoc   |3.1.11 @ /opt/homebrew/bin/pandoc   |
 
 # Dependencies
 
-|package    |old   |new        |Δ  |
-|:----------|:-----|:----------|:--|
-|tune       |0.1.2 |0.1.3      |*  |
-|cli        |NA    |2.3.1.9000 |*  |
-|colorspace |NA    |2.0-0      |*  |
-|DiceDesign |NA    |1.9        |*  |
-|rprojroot  |NA    |2.0.2      |*  |
-|rstudioapi |NA    |0.13       |*  |
-|testthat   |NA    |3.0.2      |*  |
-|waldo      |NA    |0.2.4      |*  |
+|package      |old        |new        |Δ  |
+|:------------|:----------|:----------|:--|
+|tune         |1.1.2      |1.1.2.9020 |*  |
+|cli          |3.6.2      |3.6.2      |   |
+|clock        |0.7.0      |0.7.0      |   |
+|colorspace   |2.1-0      |2.1-0      |   |
+|cpp11        |0.4.7      |0.4.7      |   |
+|data.table   |1.15.0     |1.15.0     |   |
+|diagram      |1.6.5      |1.6.5      |   |
+|dials        |1.2.1      |1.2.1      |   |
+|DiceDesign   |1.10       |1.10       |   |
+|digest       |0.6.34     |0.6.34     |   |
+|dplyr        |1.1.4      |1.1.4      |   |
+|ellipsis     |0.3.2      |0.3.2      |   |
+|fansi        |1.0.6      |1.0.6      |   |
+|farver       |2.1.1      |2.1.1      |   |
+|foreach      |1.5.2      |1.5.2      |   |
+|furrr        |0.3.1      |0.3.1      |   |
+|future       |1.33.1     |1.33.1     |   |
+|future.apply |1.11.1     |1.11.1     |   |
+|generics     |0.1.3      |0.1.3      |   |
+|ggplot2      |3.5.0      |3.5.0      |   |
+|globals      |0.16.2     |0.16.2     |   |
+|glue         |1.7.0      |1.7.0      |   |
+|gower        |1.0.1      |1.0.1      |   |
+|GPfit        |1.0-8      |1.0-8      |   |
+|gtable       |0.3.4      |0.3.4      |   |
+|hardhat      |1.3.1      |1.3.1      |   |
+|ipred        |0.9-14     |0.9-14     |   |
+|isoband      |0.2.7      |0.2.7      |   |
+|iterators    |1.0.14     |1.0.14     |   |
+|labeling     |0.4.3      |0.4.3      |   |
+|lava         |1.7.3      |1.7.3      |   |
+|lhs          |1.1.6      |1.1.6      |   |
+|lifecycle    |1.0.4      |1.0.4      |   |
+|listenv      |0.9.1      |0.9.1      |   |
+|lubridate    |1.9.3      |1.9.3      |   |
+|magrittr     |2.0.3      |2.0.3      |   |
+|modelenv     |0.1.1      |0.1.1      |   |
+|munsell      |0.5.0      |0.5.0      |   |
+|numDeriv     |2016.8-1.1 |2016.8-1.1 |   |
+|parallelly   |1.37.0     |1.37.0     |   |
+|parsnip      |1.2.0      |1.2.0      |   |
+|pillar       |1.9.0      |1.9.0      |   |
+|pkgconfig    |2.0.3      |2.0.3      |   |
+|prettyunits  |1.2.0      |1.2.0      |   |
+|prodlim      |2023.08.28 |2023.08.28 |   |
+|progressr    |0.14.0     |0.14.0     |   |
+|purrr        |1.0.2      |1.0.2      |   |
+|R6           |2.5.1      |2.5.1      |   |
+|RColorBrewer |1.1-3      |1.1-3      |   |
+|Rcpp         |1.0.12     |1.0.12     |   |
+|recipes      |1.0.10     |1.0.10     |   |
+|rlang        |1.1.3      |1.1.3      |   |
+|rsample      |1.2.0      |1.2.0      |   |
+|scales       |1.3.0      |1.3.0      |   |
+|shape        |1.4.6.1    |1.4.6.1    |   |
+|slider       |0.3.1      |0.3.1      |   |
+|SQUAREM      |2021.1     |2021.1     |   |
+|stringi      |1.8.3      |1.8.3      |   |
+|stringr      |1.5.1      |1.5.1      |   |
+|tibble       |3.2.1      |3.2.1      |   |
+|tidyr        |1.3.1      |1.3.1      |   |
+|tidyselect   |1.2.0      |1.2.0      |   |
+|timechange   |0.3.0      |0.3.0      |   |
+|timeDate     |4032.109   |4032.109   |   |
+|tzdb         |0.4.0      |0.4.0      |   |
+|utf8         |1.2.4      |1.2.4      |   |
+|vctrs        |0.6.5      |0.6.5      |   |
+|viridisLite  |0.4.2      |0.4.2      |   |
+|warp         |0.2.1      |0.2.1      |   |
+|withr        |3.0.0      |3.0.0      |   |
+|workflows    |1.1.4      |1.1.4      |   |
+|yardstick    |1.3.0      |1.3.0      |   |
 
 # Revdeps
 
+## New problems (3)
+
+|package   |version |error  |warning |note |
+|:---------|:-------|:------|:-------|:----|
+|[finetune](problems.md#finetune)|1.1.0   |__+1__ |        |     |
+|[tidyclust](problems.md#tidyclust)|0.2.0   |__+1__ |        |     |
+|[tidysdm](problems.md#tidysdm)|0.9.3   |__+2__ |        |     |
+
diff --git a/revdep/problems.md b/revdep/problems.md
@@ -1 +1,146 @@
-*Wow, no problems at all. :)*
+# finetune
+
+<details>
+
+* Version: 1.1.0
+* GitHub: https://github.com/tidymodels/finetune
+* Source code: https://github.com/cran/finetune
+* Date/Publication: 2023-04-19 07:40:02 UTC
+* Number of recursive dependencies: 171
+
+Run `revdepcheck::revdep_details(, "finetune")` for more info
+
+</details>
+
+## Newly broken
+
+*   checking tests ...
+    ```
+      Running ‘spelling.R’
+      Running ‘testthat.R’
+     ERROR
+    Running the tests in ‘tests/testthat.R’ failed.
+    Last 13 lines of output:
+      ══ Failed tests ════════════════════════════════════════════════════════════════
+      ── Error ('test-sa-decision.R:16:5'): simulated annealing decisions ────────────
+      Error in `tune:::new_tune_results(., parameters = cart_param, outcomes = cart_outcomes, 
+          metrics = cart_metrics, rset_info = cart_rset_info)`: argument "eval_time" is missing, with no default
+      Backtrace:
+    ...
+          ▆
+       1. ├─cart_search %>% filter(.iter == iter_val) %>% ... at test-sa-decision.R:16:5
+       2. └─tune:::new_tune_results(...)
+       3.   └─tune:::new_bare_tibble(...)
+       4.     └─tibble::new_tibble(x, nrow = nrow(x), ..., class = class)
+       5.       └─rlang::pairlist2(...)
+      
+      [ FAIL 1 | WARN 0 | SKIP 20 | PASS 107 ]
+      Error: Test failures
+      Execution halted
+    ```
+
+# tidyclust
+
+<details>
+
+* Version: 0.2.0
+* GitHub: https://github.com/tidymodels/tidyclust
+* Source code: https://github.com/cran/tidyclust
+* Date/Publication: 2023-09-25 18:20:06 UTC
+* Number of recursive dependencies: 168
+
+Run `revdepcheck::revdep_details(, "tidyclust")` for more info
+
+</details>
+
+## Newly broken
+
+*   checking tests ...
+    ```
+      Running ‘testthat.R’
+     ERROR
+    Running the tests in ‘tests/testthat.R’ failed.
+    Last 13 lines of output:
+      Error in `if (metric_info$direction == "maximize") {
+          summary_res <- summary_res %>% dplyr::arrange(dplyr::desc(mean))
+      } else if (metric_info$direction == "minimize") {
+          summary_res <- summary_res %>% dplyr::arrange(mean)
+      } else if (metric_info$direction == "zero") {
+          summary_res <- summary_res %>% dplyr::arrange(abs(mean))
+      }`: argument is of length zero
+      Backtrace:
+          ▆
+       1. └─testthat::expect_snapshot(tmp <- tune::show_best(res)) at test-tune_cluster.R:440:3
+       2.   └─rlang::cnd_signal(state$error)
+      
+      [ FAIL 1 | WARN 0 | SKIP 47 | PASS 177 ]
+      Error: Test failures
+      Execution halted
+    ```
+
+# tidysdm
+
+<details>
+
+* Version: 0.9.3
+* GitHub: https://github.com/EvolEcolGroup/tidysdm
+* Source code: https://github.com/cran/tidysdm
+* Date/Publication: 2024-01-17 20:50:02 UTC
+* Number of recursive dependencies: 166
+
+Run `revdepcheck::revdep_details(, "tidysdm")` for more info
+
+</details>
+
+## Newly broken
+
+*   checking examples ... ERROR
+    ```
+    Running examples in ‘tidysdm-Ex.R’ failed
+    The error most likely occurred in:
+    
+    > ### Name: autoplot.simple_ensemble
+    > ### Title: Plot the results of a simple ensemble
+    > ### Aliases: autoplot.simple_ensemble
+    > 
+    > ### ** Examples
+    > 
+    > # we use the two_class_example from `workflowsets`
+    ...
+    2.26658549056941, 1.55070434710052, 2.61077822145814, 0.9766272632684,
+    1.86637107997978, 0.675410781588896, 1.27855056115075, 1.39156478397907,
+    1.03940782815086, 1.69628613309882, 1.32644435902627, 3.31674522338439,
+    2.79572399056405, 3.37417921047442, 1.14087715338524, 1.56584397770581,
+    1.74205067903299, 2.71615811524246, 1.97133695928657, [... truncated]
+    Warning: Unknown or uninitialised column: `metric`.
+    Error in if (!any(mtr_info$metric == metric)) { : 
+      missing value where TRUE/FALSE needed
+    Calls: %>% ... add_member.tune_results -> <Anonymous> -> check_metric_in_tune_results
+    Execution halted
+    ```
+
+*   checking tests ...
+    ```
+      Running ‘spelling.R’
+      Running ‘testthat.R’
+     ERROR
+    Running the tests in ‘tests/testthat.R’ failed.
+    Last 13 lines of output:
+        1. ├─testthat::expect_warning(test_ens <- simple_ensemble() %>% add_member(none_mars)) at test_simple_ensemble.R:14:3
+        2. │ └─testthat:::expect_condition_matching(...)
+        3. │   └─testthat:::quasi_capture(...)
+        4. │     ├─testthat (local) .capture(...)
+        5. │     │ └─base::withCallingHandlers(...)
+    ...
+        6. │     └─rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
+        7. ├─simple_ensemble() %>% add_member(none_mars)
+        8. ├─tidysdm::add_member(., none_mars)
+        9. └─tidysdm:::add_member.tune_results(., none_mars)
+       10.   └─(utils::getFromNamespace("choose_metric", "tune"))(metric, member)
+       11.     └─tune::check_metric_in_tune_results(mtr_info, metric, call = call)
+      
+      [ FAIL 6 | WARN 10 | SKIP 0 | PASS 153 ]
+      Error: Test failures
+      Execution halted
+    ```
+
diff --git a/tests/testthat/_snaps/metric-args.md b/tests/testthat/_snaps/metric-args.md
@@ -101,8 +101,9 @@
       check_metrics_arg(NULL, wflow)
     Output
       A metric set, consisting of:
-      - `roc_auc()`, a probability metric | direction: maximize
-      - `accuracy()`, a class metric      | direction: maximize
+      - `roc_auc()`, a probability metric     | direction: maximize
+      - `accuracy()`, a class metric          | direction: maximize
+      - `brier_class()`, a probability metric | direction: minimize
 
 ---
 

diff --git a/tests/testthat/data/knn_gp.rds b/tests/testthat/data/knn_gp.rds
diff --git a/tests/testthat/data/knn_grid.rds b/tests/testthat/data/knn_grid.rds
diff --git a/tests/testthat/data/knn_results.rds b/tests/testthat/data/knn_results.rds
diff --git a/tests/testthat/data/knn_set.rds b/tests/testthat/data/knn_set.rds
diff --git a/tests/testthat/data/lm_bayes.rds b/tests/testthat/data/lm_bayes.rds
diff --git a/tests/testthat/data/lm_resamples.rds b/tests/testthat/data/lm_resamples.rds
diff --git a/tests/testthat/data/rcv_results.rds b/tests/testthat/data/rcv_results.rds
diff --git a/tests/testthat/data/surv_boost_tree_res.rds b/tests/testthat/data/surv_boost_tree_res.rds
diff --git a/tests/testthat/data/svm_reg_results.rds b/tests/testthat/data/svm_reg_results.rds
diff --git a/tests/testthat/data/svm_results.rds b/tests/testthat/data/svm_results.rds
diff --git a/tests/testthat/data/test_objects.RData b/tests/testthat/data/test_objects.RData
diff --git a/tests/testthat/test-autoplot.R b/tests/testthat/test-autoplot.R
@@ -328,6 +328,7 @@ test_that("plot_perf_vs_iter with fairness metrics (#773)", {
 })
 
 test_that("regular grid plot", {
+  skip_if_not_installed("ggplot2", minimum_version = "3.5.0")
   set.seed(1)
   res <-
     parsnip::svm_rbf(cost = tune()) %>%
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,6 +8,8 @@

		* A method for rsample's `int_pctl()` function that will compute percentile confidence intervals on performance metrics for objects produced by `fit_resamples()`, `tune_*()`, and `last_fit()`.

		* The Brier score is now part of the default metric set for classification models.


		## Bug Fixes

Expand Down