mlr-org
diff --git a/‎DESCRIPTION
+5-4 b/‎DESCRIPTION
+5-4
diff --git a/‎NAMESPACE
-1 b/‎NAMESPACE
-1
diff --git a/‎R/CallbackSet.R
+10-2 b/‎R/CallbackSet.R
+10-2
diff --git a/‎R/CallbackSetEarlyStopping.R
+38 b/‎R/CallbackSetEarlyStopping.R
+38
diff --git a/‎R/CallbackSetHistory.R
+5-3 b/‎R/CallbackSetHistory.R
+5-3
diff --git a/‎R/CallbackSetProgress.R
-6 b/‎R/CallbackSetProgress.R
-6
diff --git a/‎R/ContextTorch.R
+19-6 b/‎R/ContextTorch.R
+19-6
diff --git a/‎R/DataBackendLazy.R
+7-6 b/‎R/DataBackendLazy.R
+7-6
diff --git a/‎R/LearnerTorch.R
+46-10 b/‎R/LearnerTorch.R
+46-10
@@ -40,7 +40,6 @@ Depends:
 Imports:
     backports,
     checkmate (>= 2.2.0),
-    coro,
     data.table,
     lgr,
     methods,
@@ -56,6 +55,7 @@ Suggests:
     jsonlite,
     knitr,
     magick,
+	mlr3tuning,
     progress,
     rmarkdown,
     rpart,
@@ -64,8 +64,9 @@ Suggests:
     torchvision,
     waldo
 Remotes:
-    mlr-org/mlr3,
-    mlr-org/mlr3pipelines,
+    mlr-org/mlr3@feat/inner_valid,
+    mlr-org/mlr3pipelines@feat/inner_valid,
+	mlr-org/mlr3tuning@internal_tuning,
     mlverse/torchvision
 Config/testthat/edition: 3
 NeedsCompilation: no
@@ -79,6 +80,7 @@ Collate:
     'zzz.R'
     'TorchCallback.R'
     'CallbackSetCheckpoint.R'
+    'CallbackSetEarlyStopping.R'
     'CallbackSetHistory.R'
     'CallbackSetProgress.R'
     'ContextTorch.R'
@@ -116,7 +118,6 @@ Collate:
     'PipeOpTorchOptimizer.R'
     'PipeOpTorchReshape.R'
     'PipeOpTorchSoftmax.R'
-    'ResamplingRowRoles.R'
     'TaskClassif_lazy_iris.R'
     'TaskClassif_mnist.R'
     'TaskClassif_tiny_imagenet.R'
 
@@ -131,7 +131,6 @@ export(PipeOpTorchTanh)
 export(PipeOpTorchTanhShrink)
 export(PipeOpTorchThreshold)
 export(PipeOpTorchUnsqueeze)
-export(ResamplingRowRoles)
 export(TorchCallback)
 export(TorchDescriptor)
 export(TorchIngressToken)
 
@@ -35,15 +35,21 @@
 #' @section Stages:
 #' * `begin` :: Run before the training loop begins.
 #' * `epoch_begin` :: Run he beginning of each epoch.
-#' * `before_validation` :: Run before each validation loop.
 #' * `batch_begin` :: Run before the forward call.
 #' * `after_backward` :: Run after the backward call.
 #' * `batch_end` :: Run after the optimizer step.
 #' * `batch_valid_begin` :: Run before the forward call in the validation loop.
 #' * `batch_valid_end` :: Run after the forward call in the validation loop.
+#' * `valid_end` :: Run at the end of validation.
 #' * `epoch_end` :: Run at the end of each epoch.
 #' * `end` :: Run after last epoch.
 #' * `exit` :: Run at last, using `on.exit()`.
+#'
+#' @section Terminate Training:
+#' If training is to be stopped, it is possible to set the field `$terminate` of [`ContextTorch`].
+#' At the end of every epoch this field is checked and if it is `TRUE`, training stops.
+#' This can for example be used to implement custom early stopping.
+#'
 #' @family Callback
 #' @export
 CallbackSet = R6Class("CallbackSet",
@@ -119,7 +125,7 @@ CallbackSet = R6Class("CallbackSet",
 #'
 #' @param classname (`character(1)`)\cr
 #'   The class name.
-#' @param on_begin,on_end,on_epoch_begin,on_before_valid,on_epoch_end,on_batch_begin,on_batch_end,on_after_backward,on_batch_valid_begin,on_batch_valid_end,on_exit (`function`)\cr
+#' @param on_begin,on_end,on_epoch_begin,on_before_valid,on_epoch_end,on_batch_begin,on_batch_end,on_after_backward,on_batch_valid_begin,on_batch_valid_end,on_valid_end,on_exit (`function`)\cr
 #'   Function to execute at the given stage, see section *Stages*.
 #' @param initialize (`function()`)\cr
 #'   The initialization method of the callback.
@@ -159,6 +165,7 @@ callback_set = function(
   # validation
   on_batch_valid_begin = NULL,
   on_batch_valid_end = NULL,
+  on_valid_end = NULL,
   # other methods
   state_dict = NULL,
   load_state_dict = NULL,
@@ -181,6 +188,7 @@ callback_set = function(
     on_after_backward = assert_function(on_after_backward, nargs = 0, null.ok = TRUE),
     on_batch_valid_begin = assert_function(on_batch_valid_begin, nargs = 0, null.ok = TRUE),
     on_batch_valid_end = assert_function(on_batch_valid_end, nargs = 0, null.ok = TRUE),
+    on_valid_end = assert_function(on_valid_end, nargs = 0, null.ok = TRUE),
     on_exit = assert_function(on_exit, nargs = 0, null.ok = TRUE)
   )
 
 
@@ -0,0 +1,38 @@
+CallbackSetEarlyStopping = R6Class("CallbackSetEarlyStopping",
+  inherit = CallbackSet,
+  lock_objects = FALSE,
+  public = list(
+    initialize = function(patience, min_delta) {
+      self$patience = assert_int(patience, lower = 1L)
+      self$min_delta = assert_double(min_delta, lower = 0, len = 1L, any.missing = FALSE)
+      self$stagnation = 0L
+    },
+    on_valid_end = function() {
+      if (is.null(self$prev_valid_scores)) {
+        self$prev_valid_scores = self$ctx$last_scores_valid
+        return(NULL)
+      }
+      if (is.null(self$ctx$last_scores_valid)) {
+        return(NULL)
+      }
+      multiplier = if (self$ctx$measures_valid[[1L]]$minimize) -1 else 1
+      improvement = multiplier * (self$ctx$last_scores_valid[[1L]] - self$prev_valid_scores[[1L]])
+
+      if (is.na(improvement)) {
+        lg$warn("Learner %s in epoch %s: Difference between subsequent validation performances is NA",
+          self$ctx$learner$id, self$ctx$epoch)
+        return(NULL)
+      }
+
+      if (improvement < self$min_delta) {
+        self$stagnation = self$stagnation + 1L
+        if (self$stagnation == self$patience) {
+          self$ctx$terminate = TRUE
+        }
+      } else {
+        self$stagnation = 0
+      }
+      self$prev_valid_scores = self$ctx$last_scores_valid
+    }
+  )
+)
@@ -1,4 +1,5 @@
 #' @title History Callback
+
 #'
 #' @name mlr_callback_set.history
 #'
@@ -22,17 +23,18 @@ CallbackSetHistory = R6Class("CallbackSetHistory",
     #' @description
     #' Converts the lists to data.tables.
     state_dict = function() {
-      structure(list(
+      list(
         train = rbindlist(self$train, fill = TRUE),
         valid = rbindlist(self$valid, fill = TRUE)
-      ), class = "callback_state_history")
+      )
     },
     #' @description
     #' Sets the field `$train` and `$valid` to those contained in the state dict.
     #' @param state_dict (`callback_state_history`)\cr
     #'   The state dict as retrieved via `$state_dict()`.
     load_state_dict = function(state_dict) {
-      assert_class(state_dict, "callback_state_history")
+      assert_list(state_dict, "data.table")
+      assert_permutation(names(state_dict), c("train", "valid"))
       self$train = state_dict$train
       self$valid = state_dict$valid
     },
 
@@ -62,12 +62,6 @@ CallbackSetProgress = R6Class("CallbackSetProgress",
           cat(paste(output, collapse = ""))
         }
       }
-    },
-    #' @description
-    #' Deletes the progess bar objects.
-    on_end = function() {
-      self$pb_train = NULL
-      self$pb_valid = NULL
     }
   )
 )
 
@@ -38,8 +38,11 @@ ContextTorch = R6Class("ContextTorch",
     #'   The total number of epochs the learner is trained for.
     #' @param prediction_encoder (`function()`)\cr
     #'   The learner's prediction encoder.
+    #' @param eval_freq (`integer(1)`)\cr
+    #'   The evaluation frequency.
     initialize = function(learner, task_train, task_valid = NULL, loader_train, loader_valid = NULL,
-      measures_train = NULL, measures_valid = NULL, network, optimizer, loss_fn, total_epochs, prediction_encoder) {
+      measures_train = NULL, measures_valid = NULL, network, optimizer, loss_fn, total_epochs, prediction_encoder,
+      eval_freq = 1L) {
       self$learner = assert_r6(learner, "Learner")
       self$task_train = assert_r6(task_train, "Task")
       self$task_valid = assert_r6(task_valid, "Task", null.ok = TRUE)
@@ -56,8 +59,8 @@ ContextTorch = R6Class("ContextTorch",
       self$last_scores_train = structure(list(), names = character(0))
       self$last_scores_valid = structure(list(), names = character(0))
       self$prediction_encoder = assert_function(prediction_encoder, args = c("predict_tensor", "task"))
-      self$epoch = 0
-      self$batch = 0
+      self$eval_freq = assert_int(eval_freq, lower = 1L)
+      self$terminate = FALSE
     },
     #' @field learner ([`Learner`])\cr
     #'   The torch learner.
@@ -92,11 +95,15 @@ ContextTorch = R6Class("ContextTorch",
     #' @field total_epochs (`integer(1)`)\cr
     #'   The total number of epochs the learner is trained for.
     total_epochs = NULL,
-    #' @field last_scores_train (named `list()`)\cr
-    #'   The scores from the last training batch. Names are the ids of the training measures.
+    #' @field last_scores_train (named `list()` or `NULL`)\cr
+    #'  The scores from the last training batch. Names are the ids of the training measures.
+    #'  If [`LearnerTorch`] sets `eval_freq` different from `1`, this is `NULL` in all epochs
+    #'  that don't evaluate the model.
     last_scores_train = NULL,
     #' @field last_scores_valid (`list()`)\cr
     #'   The scores from the last validation batch. Names are the ids of the validation measures.
+    #'  If [`LearnerTorch`] sets `eval_freq` different from `1`, this is `NULL` in all epochs
+    #'  that don't evaluate the model.
     last_scores_valid = NULL,
     #' @field epoch (`integer(1)`)\cr
     #'   The current epoch.
@@ -106,6 +113,12 @@ ContextTorch = R6Class("ContextTorch",
     step = NULL,
     #' @field prediction_encoder (`function()`)\cr
     #'   The learner's prediction encoder.
-    prediction_encoder = NULL
+    prediction_encoder = NULL,
+    #' @field batch (named `list()` of `torch_tensor`s)\cr
+    #'   The current batch.
+    batch = NULL,
+    #' @field terminate (`logical(1)`)\cr
+    #'   If this field is set to `TRUE` at the end of an epoch, training stops.
+    terminate = NULL
   )
 )
@@ -173,7 +173,7 @@ DataBackendLazy = R6Class("DataBackendLazy",
     backend = function(rhs) {
       assert_ro_binding(rhs)
       if (is.null(private$.backend)) {
-        private$.backend = assert_backend(private$.constructor(self))
+        backend = assert_backend(private$.constructor(self))
 
         f = function(test, x, y, var_name) {
           if (!test(x, y)) {
@@ -185,12 +185,13 @@ DataBackendLazy = R6Class("DataBackendLazy",
           }
         }
 
-        f(identical, private$.backend$primary_key, self$primary_key, "primary key")
-        f(test_permutation, private$.backend$rownames, self$rownames, "row identifiers")
-        f(test_permutation, private$.backend$colnames, private$.colnames, "column names")
-        f(test_equal_col_info, col_info(private$.backend), private$.col_info, "column information")
+        f(identical, backend$primary_key, self$primary_key, "primary key")
+        f(test_permutation, backend$rownames, self$rownames, "row identifiers")
+        f(test_permutation, backend$colnames, private$.colnames, "column names")
+        f(test_equal_col_info, col_info(backend), private$.col_info, "column information")
         # need to reverse the order for correct error message
-        f(function(x, y) test_subset(y, x), private$.backend$data_formats, self$data_formats, "data formats")
+        f(function(x, y) test_subset(y, x), backend$data_formats, self$data_formats, "data formats")
+        private$.backend = backend
       }
       private$.backend
     },
 
@@ -41,6 +41,7 @@
 #'   * `loss_fn` :: The `$state_dict()` of the [loss][torch::nn_module] used to train the network.
 #'   * `callbacks` :: The [callbacks][mlr3torch::mlr_callback_set] used to train the network.
 #'   * `seed` :: The seed that was / is used for training and prediction.
+#'   * `epochs` :: How many epochs the model was trained for (early stopping).
 #'   * `task_col_info` :: A `data.table()` containing information about the train-task.
 #'
 #' @template paramset_torchlearner
@@ -141,7 +142,7 @@ LearnerTorch = R6Class("LearnerTorch",
 
 
       assert_subset(properties, mlr_reflections$learner_properties[[task_type]])
-      properties = union(properties, "marshal")
+      properties = union(properties, c("marshal", "validation", "internal_tuning"))
       assert_subset(predict_types, names(mlr_reflections$learner_predict_types[[task_type]]))
       if (any(grepl("^(loss\\.|opt\\.|cb\\.)", param_set$ids()))) {
         stopf("Prefixes 'loss.', 'opt.', and 'cb.' are reserved for dynamically constructed parameters.")
@@ -210,6 +211,30 @@ LearnerTorch = R6Class("LearnerTorch",
     }
   ),
   active = list(
+    #' @field validate
+    #' How to construct the internal validation data. This parameter can be either `NULL`,
+    #' a ratio in $(0, 1)$, `"test"`, or `"predefined"`.
+    validate = function(rhs) {
+      if (!missing(rhs)) {
+        private$.validate = assert_validate(rhs)
+      }
+      private$.validate
+    },
+
+    #' @field internal_valid_scores
+    #' Retrieves the internal validation scores as a named `list()`.
+    #' Specify the `$validate` field and the `measures_valid` parameter to configure this.
+    #' Returns `NULL` if learner is not trained yet.
+    internal_valid_scores = function() {
+      self$state$internal_valid_scores
+    },
+    #' @field internal_tuned_values
+    #' When early stopping is activate, this returns a named list with the early-stopped epochs,
+    #' otherwise an empty list is returned.
+    #' Returns `NULL` if learner is not trained yet.
+    internal_tuned_values = function() {
+      self$state$internal_tuned_values
+    },
     #' @field marshaled (`logical(1)`)\cr
     #' Whether the learner is marshaled.
     marshaled = function(rhs) {
@@ -257,6 +282,21 @@ LearnerTorch = R6Class("LearnerTorch",
     }
   ),
   private = list(
+    .extract_internal_tuned_values = function() {
+      if (self$state$param_vals$patience == 0) {
+        named_list()
+      } else {
+        list(epochs = self$model$epochs)
+      }
+    },
+    .extract_internal_valid_scores = function() {
+      if (is.null(self$model$internal_valid_scores)) {
+        named_list()
+      } else {
+        self$model$internal_valid_scores
+      }
+    },
+    .validate = NULL,
     .additional_phash_input = function() {
       if (is.null(self$initialize)) return(NULL)
       initformals = names(formals(args(self$initialize)))
@@ -372,20 +412,16 @@ LearnerTorch = R6Class("LearnerTorch",
           model = value$model
           value["model"] = list(NULL)
           value = super$deep_clone(name, value)
-          value[["model"]] = set_class(list(
-            network = model$network$clone(deep = TRUE),
-            loss_fn = clone_recurse(model$loss_fn),
-            optimizer = clone_recurse(model$optimizer),
-            callbacks = map(model$callbacks, function(x) {
+          model$network = model$network$clone(deep = TRUE)
+          model$loss_fn = clone_recurse(model$loss_fn)
+          model$callbacks = map(model$callbacks, function(x) {
               if (is.R6(x)) {
                 x$clone(deep = TRUE)
               } else {
                 x
               }
-            }),
-            seed = model$seed,
-            task_col_info = copy(model$task_col_info)
-          ), c("learner_torch_model", "list"))
+          })
+          value$model = model
         }
         return(value)
       } else if (name == ".param_set") {
Original file line number	Diff line number	Diff line change
`@@ -62,12 +62,6 @@ CallbackSetProgress = R6Class("CallbackSetProgress",`
`62`	`62`	`cat(paste(output, collapse = ""))`
`63`	`63`	`}`
`64`	`64`	`}`
`65`		`- },`
`66`		`- #' @description`
`67`		`- #' Deletes the progess bar objects.`
`68`		`- on_end = function() {`
`69`		`- self$pb_train = NULL`
`70`		`- self$pb_valid = NULL`
`71`	`65`	`}`
`72`	`66`	`)`
`73`	`67`	`)`