tidymodels · topepo · Apr 4, 2024 · Apr 3, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -59,4 +59,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -127,6 +127,7 @@ export(as.factor)
 export(as.ordered)
 export(as_class_pred)
 export(augment)
+export(bound_prediction)
 export(cal_apply)
 export(cal_estimate_beta)
 export(cal_estimate_isotonic)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # probably (development version)
 
+* A new function `bound_prediction()` is available to constrain the values of a numeric prediction.
+
 # probably 1.0.3
 
 * Fixed a bug where the grouping for calibration methods was sensitive to the type of the grouping variables (#127).

diff --git a/R/bound_prediction.R b/R/bound_prediction.R
@@ -0,0 +1,38 @@
+#' Truncate a numeric prediction column
+#'
+#' For user-defined lower_limit and/or upper_limit bound, ensure that the values in the
+#' `.pred` column are coerced to these bounds.
+#'
+#' @param x A data frame that contains a numeric column named `.pred`.
+#' @param lower_limit,upper_limit Single numerics (or `NA`) that define
+#' constrains on `.pred`.
+#' @param call The call to be displayed in warnings or errors.
+#' @return `x` with potentially adjusted values.
+#' @examples
+#' data(solubility_test, package = "yardstick")
+#'
+#' names(solubility_test) <- c("solubility", ".pred")
+#'
+#' bound_prediction(solubility_test, lower_limit = -1)
+#' @export
+bound_prediction <- function(x, lower_limit = -Inf, upper_limit = Inf,
+                             call = rlang::caller_env()) {
+  if (!any(names(x) == ".pred")) {
+    cli::cli_abort("The argument {.arg x} should have a column named {.code .pred}",
+                   call = call)
+  }
+  if (!is.numeric(x$.pred)) {
+    cli::cli_abort("Column {.code .pred} should be numeric.",
+                   call = call)
+  }
+
+  if (is.numeric(lower_limit) && !is.na(lower_limit)) {
+    x$.pred <- ifelse(x$.pred < lower_limit, lower_limit, x$.pred)
+  }
+
+  if (is.numeric(upper_limit) && !is.na(upper_limit)) {
+    x$.pred <- ifelse(x$.pred > upper_limit, upper_limit, x$.pred)
+  }
+  x
+}
+
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -37,6 +37,7 @@ reference:
   - starts_with("int_")
   - starts_with("control_conformal_")
   - starts_with("predict.int")
+  - bound_prediction
 
 - title: Data
   contents:

diff --git a/man/bound_prediction.Rd b/man/bound_prediction.Rd
diff --git a/man/probably-package.Rd b/man/probably-package.Rd
diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf
diff --git a/tests/testthat/_snaps/bound-prediction.md b/tests/testthat/_snaps/bound-prediction.md
@@ -0,0 +1,34 @@
+# lower_limit bounds for numeric predictions
+
+    Code
+      bound_prediction(solubility_test, lower_limit = 2)
+    Condition
+      Error:
+      ! The argument `x` should have a column named `.pred`
+
+---
+
+    Code
+      solubility_test %>% mutate(.pred = format(prediction)) %>% bound_prediction(
+        lower_limit = 2)
+    Condition
+      Error:
+      ! Column `.pred` should be numeric.
+
+# upper_limit bounds for numeric predictions
+
+    Code
+      bound_prediction(solubility_test, lower_limit = 2)
+    Condition
+      Error:
+      ! The argument `x` should have a column named `.pred`
+
+---
+
+    Code
+      solubility_test %>% mutate(.pred = format(prediction)) %>% bound_prediction(
+        lower_limit = 2)
+    Condition
+      Error:
+      ! Column `.pred` should be numeric.
+
diff --git a/tests/testthat/test-bound-prediction.R b/tests/testthat/test-bound-prediction.R
@@ -0,0 +1,55 @@
+test_that("lower_limit bounds for numeric predictions", {
+  skip_if_not_installed("modeldata")
+  library(dplyr)
+  library(rlang)
+  data("solubility_test", package = "modeldata")
+  tune2 <- function() call("tune", "test")
+
+  # ------------------------------------------------------------------------------
+
+  expect_snapshot(bound_prediction(solubility_test, lower_limit = 2), error = TRUE)
+  expect_snapshot(
+    solubility_test %>%
+      mutate(.pred = format(prediction)) %>%
+      bound_prediction(lower_limit = 2),
+    error = TRUE)
+
+  sol <- solubility_test %>% set_names(c("solubility", ".pred"))
+
+  expect_equal(bound_prediction(sol), sol)
+  expect_equal(bound_prediction(sol, lower_limit = NA), sol)
+
+  res_1 <- bound_prediction(sol, lower_limit = -1)
+  expect_true(all(res_1$.pred[res_1$.pred < -1] == -1))
+  expect_true(all(res_1$.pred[res_1$.pred >= -1] == res_1$.pred[res_1$.pred >= -1]))
+
+  expect_equal(bound_prediction(sol, lower_limit = tune2()), sol)
+})
+
+test_that("upper_limit bounds for numeric predictions", {
+  skip_if_not_installed("modeldata")
+  library(dplyr)
+  library(rlang)
+  data("solubility_test", package = "modeldata")
+  tune2 <- function() call("tune", "test")
+
+  # ------------------------------------------------------------------------------
+
+  expect_snapshot(bound_prediction(solubility_test, lower_limit = 2), error = TRUE)
+  expect_snapshot(
+    solubility_test %>%
+      mutate(.pred = format(prediction)) %>%
+      bound_prediction(lower_limit = 2),
+    error = TRUE)
+
+  sol <- solubility_test %>% set_names(c("solubility", ".pred"))
+
+  expect_equal(bound_prediction(sol), sol)
+  expect_equal(bound_prediction(sol, upper_limit = NA), sol)
+
+  res_1 <- bound_prediction(sol, upper_limit = -1)
+  expect_true(all(res_1$.pred[res_1$.pred > -1] == -1))
+  expect_true(all(res_1$.pred[res_1$.pred <= -1] == res_1$.pred[res_1$.pred <= -1]))
+
+  expect_equal(bound_prediction(sol, upper_limit = tune2()), sol)
+})