From 2380913c38cb4660ebf32e607954eb92d56ee646 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Tue, 10 Sep 2024 18:39:43 +0200
Subject: [PATCH 01/37] TODO: write tests

---
 DESCRIPTION                            | 10 +++-
 R/CallbackSetTFLog.R                   | 70 ++++++++++++++++++++++++++
 tests/testthat/test_CallbackSetTFLog.R |  8 +++
 3 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 R/CallbackSetTFLog.R
 create mode 100644 tests/testthat/test_CallbackSetTFLog.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 18aa85173..a9231e485 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,7 +25,14 @@ Authors@R:
              family = "Pfisterer",
              role = "ctb",
              email = "pfistererf@googlemail.com",
-             comment = c(ORCID = "0000-0001-8867-762X")))
+             comment = c(ORCID = "0000-0001-8867-762X")),
+      person(given = "Carson",
+             family = "Zhang",
+             role = "ctb",
+             email = "carsonzhang4@gmail.com")
+             ),
+             
+
 Description: Deep Learning library that extends the mlr3 framework by building
   upon the 'torch' package. It allows to conveniently build, train,
   and evaluate deep learning models without having to worry about low level
@@ -64,6 +71,7 @@ Suggests:
     viridis,
     visNetwork,
     testthat (>= 3.0.0),
+    tfevents,
     torchvision (>= 0.6.0),
     waldo
 Config/testthat/edition: 3
diff --git a/R/CallbackSetTFLog.R b/R/CallbackSetTFLog.R
new file mode 100644
index 000000000..d3005105b
--- /dev/null
+++ b/R/CallbackSetTFLog.R
@@ -0,0 +1,70 @@
+#' @title TensorFlow Logging Callback
+#'
+#' @name mlr_callback_set.tflog
+#'
+#' @description
+#' Logs the training and validation measures for tracking via TensorBoard.
+#' @details
+#' TODO: add
+#'
+#' @param path (`character(1)`)\cr
+#'   The path to a folder where the events are logged. 
+#'   Point TensorBoard to this folder to view them.
+#' @family Callback
+#' @export
+#' @include CallbackSet.R
+CallbackSetTFLog = R6Class("CallbackSetTFLog",
+    inherit = CallbackSet,
+    lock_objects = TRUE,
+    public = list(
+        #' @description
+        #' Creates a new instance of this [R6][R6::R6Class] class.
+        initialize = function(path = get_default_logdir()) {
+            self$path = assert_path_for_output(path)
+            set_default_logdir(path)
+        },
+        #' @description
+        #' Logs the training measures as TensorFlow events.
+        #' Meaningful changes happen at the end of each batch, 
+        #' since this is when the gradient step occurs.
+        on_batch_end = function() {
+            log_train_score = function(measure_name) {
+                train_score = list(self$ctx$last_scores_train[[measure_name]])
+                names(train_score) = paste0("train.", measure_name)
+                do.call(log_event, train_score)
+            }
+
+            if (length(self$ctx$last_scores_train)) {
+                map(names(self$ctx$measures_train), log_train_score)
+            }
+        },
+        #' @description
+        #' Logs the validation measures as TensorFlow events.
+        #' Meaningful changes happen at the end of each epoch.
+        #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
+        #' and therefore differences are due to randomness
+        on_epoch_end = function() {
+            log_valid_score = function(measure_name) {
+                valid_score = list(self$ctx$last_scores_valid[[measure_name]])
+                names(valid_score) = paste0("valid.", measure_name)
+                do.call(log_event, valid_score)
+            }
+
+            if (length(self$ctx$last_scores_valid)) {
+                map(names(self$ctx$measure_valid), log_valid_score)
+            }
+        }
+    )
+)
+
+mlr3torch_callbacks$add("tflog", function() {
+    TorchCallback$new(
+        callback_generator = CallbackSetCheckpoint,
+        param_set = ps(
+            path      = p_uty(tags = c("train", "required"))
+        ),
+        id = "tflog",
+        label = "TFLog",
+        man = "mlr3torch::mlr_callback_set.tflog"
+    )
+})
diff --git a/tests/testthat/test_CallbackSetTFLog.R b/tests/testthat/test_CallbackSetTFLog.R
new file mode 100644
index 000000000..63ac409a9
--- /dev/null
+++ b/tests/testthat/test_CallbackSetTFLog.R
@@ -0,0 +1,8 @@
+test_that("autotest", {
+    cb = t_clbk()
+    expect_torch_callback(cb)
+})
+
+test_that("", {
+
+})
\ No newline at end of file

From 86f87c805d0155f2c61ac55db57fd9560c36bf0e Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Sun, 22 Sep 2024 19:43:09 +0200
Subject: [PATCH 02/37] name -> TB. began refactoring based on last meeting
 with Sebastian

---
 R/{CallbackSetTFLog.R => CallbackSetTB.R} | 70 +++++++++++++++--------
 tests/testthat/test_CallbackSetTB.R       | 21 +++++++
 tests/testthat/test_CallbackSetTFLog.R    |  8 ---
 3 files changed, 67 insertions(+), 32 deletions(-)
 rename R/{CallbackSetTFLog.R => CallbackSetTB.R} (52%)
 create mode 100644 tests/testthat/test_CallbackSetTB.R
 delete mode 100644 tests/testthat/test_CallbackSetTFLog.R

diff --git a/R/CallbackSetTFLog.R b/R/CallbackSetTB.R
similarity index 52%
rename from R/CallbackSetTFLog.R
rename to R/CallbackSetTB.R
index d3005105b..c699a97cc 100644
--- a/R/CallbackSetTFLog.R
+++ b/R/CallbackSetTB.R
@@ -1,6 +1,6 @@
-#' @title TensorFlow Logging Callback
+#' @title TensorBoard Logging Callback
 #'
-#' @name mlr_callback_set.tflog
+#' @name mlr_callback_set.tb
 #'
 #' @description
 #' Logs the training and validation measures for tracking via TensorBoard.
@@ -13,41 +13,57 @@
 #' @family Callback
 #' @export
 #' @include CallbackSet.R
-CallbackSetTFLog = R6Class("CallbackSetTFLog",
+CallbackSetTB = R6Class("CallbackSetTB",
     inherit = CallbackSet,
     lock_objects = TRUE,
     public = list(
         #' @description
         #' Creates a new instance of this [R6][R6::R6Class] class.
-        initialize = function(path = get_default_logdir()) {
+        initialize = function(path = tempfile()) {
             self$path = assert_path_for_output(path)
-            set_default_logdir(path)
         },
-        #' @description
-        #' Logs the training measures as TensorFlow events.
-        #' Meaningful changes happen at the end of each batch, 
-        #' since this is when the gradient step occurs.
-        on_batch_end = function() {
-            log_train_score = function(measure_name) {
-                train_score = list(self$ctx$last_scores_train[[measure_name]])
-                names(train_score) = paste0("train.", measure_name)
-                do.call(log_event, train_score)
-            }
+        # #' @description
+        # #' Logs the training measures as TensorFlow events.
+        # #' Meaningful changes happen at the end of each batch, 
+        # #' since this is when the gradient step occurs.
+        # # TODO: change this to log last_loss
+        # on_batch_end = function() {
+        #     # TODO: determine whether you can refactor this and the 
+        #     # validation one into a single function
+        #     # need to be able to access self$ctx
 
-            if (length(self$ctx$last_scores_train)) {
-                map(names(self$ctx$measures_train), log_train_score)
-            }
-        },
+        #     # TODO: pass in the appropriate step from the context
+        #     log_event(last_loss = self$ctx$last_loss)
+        # },
         #' @description
         #' Logs the validation measures as TensorFlow events.
         #' Meaningful changes happen at the end of each epoch.
         #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
         #' and therefore differences are due to randomness
+        # TODO: log last_scores_train here
+        # TODO: display the appropriate x axis with its label in TensorBoard
+        # relevant when we log different scores at different times
         on_epoch_end = function() {
             log_valid_score = function(measure_name) {
                 valid_score = list(self$ctx$last_scores_valid[[measure_name]])
                 names(valid_score) = paste0("valid.", measure_name)
-                do.call(log_event, valid_score)
+                with_logdir(temp, {
+                    do.call(log_event, valid_score)
+                })
+            }
+
+            log_train_score = function(measure_name) {
+                # TODO: change this to use last_loss. I don't recall why we wanted to do that.
+                train_score = list(self$ctx$last_scores_train[[measure_name]])
+                names(train_score) = paste0("train.", measure_name)
+                with_logdir(temp, {
+                    do.call(log_event, valid_score)
+                })
+            }
+
+            if (length(self$ctx$last_scores_train)) {
+                # TODO: decide whether we should put the temporary logdir modification here instead.
+                map(names(self$ctx$measures_train), log_train_score)
             }
 
             if (length(self$ctx$last_scores_valid)) {
@@ -55,16 +71,22 @@ CallbackSetTFLog = R6Class("CallbackSetTFLog",
             }
         }
     )
+    # private = list(
+    #     log_score = function(prefix, measure_name, score) {
+            
+    #     }
+    # )
 )
 
-mlr3torch_callbacks$add("tflog", function() {
+
+mlr3torch_callbacks$add("tb", function() {
     TorchCallback$new(
         callback_generator = CallbackSetCheckpoint,
         param_set = ps(
             path      = p_uty(tags = c("train", "required"))
         ),
-        id = "tflog",
-        label = "TFLog",
-        man = "mlr3torch::mlr_callback_set.tflog"
+        id = "tb",
+        label = "TensorBoard",
+        man = "mlr3torch::mlr_callback_set.tb"
     )
 })
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
new file mode 100644
index 000000000..f7387f185
--- /dev/null
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -0,0 +1,21 @@
+test_that("autotest", {
+    cb = t_clbk("tb")
+    expect_torch_callback(cb)
+})
+
+# TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
+test_that("a simple example works", {
+    # using a temp dir
+
+    # check that directory doesn't exist
+
+    # check that directory was created
+
+    # check that default logging directory is the directory name we passed in
+
+    # check that the correct training measure name was logged at the correct time (correct epoch)
+
+    # check that the correct validation measure name was logged
+
+    # check that logging happens at the same frequency as eval_freq
+})
\ No newline at end of file
diff --git a/tests/testthat/test_CallbackSetTFLog.R b/tests/testthat/test_CallbackSetTFLog.R
deleted file mode 100644
index 63ac409a9..000000000
--- a/tests/testthat/test_CallbackSetTFLog.R
+++ /dev/null
@@ -1,8 +0,0 @@
-test_that("autotest", {
-    cb = t_clbk()
-    expect_torch_callback(cb)
-})
-
-test_that("", {
-
-})
\ No newline at end of file

From 400ed74e926e3ffc1ae886b73ed6342b74db7919 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Wed, 2 Oct 2024 19:39:24 +0200
Subject: [PATCH 03/37] slight description change

---
 R/CallbackSetTB.R | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index c699a97cc..658d40944 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -3,7 +3,7 @@
 #' @name mlr_callback_set.tb
 #'
 #' @description
-#' Logs the training and validation measures for tracking via TensorBoard.
+#' Logs training loss and validation measures as events that can be tracked using TensorBoard.
 #' @details
 #' TODO: add
 #'
@@ -47,17 +47,21 @@ CallbackSetTB = R6Class("CallbackSetTB",
             log_valid_score = function(measure_name) {
                 valid_score = list(self$ctx$last_scores_valid[[measure_name]])
                 names(valid_score) = paste0("valid.", measure_name)
-                with_logdir(temp, {
+                with_logdir(self$path, {
                     do.call(log_event, valid_score)
                 })
             }
 
             log_train_score = function(measure_name) {
-                # TODO: change this to use last_loss. I don't recall why we wanted to do that.
-                train_score = list(self$ctx$last_scores_train[[measure_name]])
-                names(train_score) = paste0("train.", measure_name)
-                with_logdir(temp, {
-                    do.call(log_event, valid_score)
+                # OLD: previously logged the elements in last_scores_train
+                # train_score = list(self$ctx$last_scores_train[[measure_name]])
+                # names(train_score) = paste0("train.", measure_name)
+                # with_logdir(temp, {
+                #     do.call(log_event, train_score)
+                # })
+                # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
+                with_logdir(self$path, {
+                    log_event(train.loss = self$ctx$last_loss)
                 })
             }
 

From 9e6acd8301e1fb67157cdf915c79ef89de3b712a Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Wed, 2 Oct 2024 19:53:46 +0200
Subject: [PATCH 04/37] removed extraneous comments

---
 R/CallbackSetTB.R | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 658d40944..4543cd26e 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -22,25 +22,11 @@ CallbackSetTB = R6Class("CallbackSetTB",
         initialize = function(path = tempfile()) {
             self$path = assert_path_for_output(path)
         },
-        # #' @description
-        # #' Logs the training measures as TensorFlow events.
-        # #' Meaningful changes happen at the end of each batch, 
-        # #' since this is when the gradient step occurs.
-        # # TODO: change this to log last_loss
-        # on_batch_end = function() {
-        #     # TODO: determine whether you can refactor this and the 
-        #     # validation one into a single function
-        #     # need to be able to access self$ctx
-
-        #     # TODO: pass in the appropriate step from the context
-        #     log_event(last_loss = self$ctx$last_loss)
-        # },
         #' @description
-        #' Logs the validation measures as TensorFlow events.
+        #' Logs the training loss and validation measures as TensorFlow events.
         #' Meaningful changes happen at the end of each epoch.
         #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
         #' and therefore differences are due to randomness
-        # TODO: log last_scores_train here
         # TODO: display the appropriate x axis with its label in TensorBoard
         # relevant when we log different scores at different times
         on_epoch_end = function() {
@@ -53,13 +39,8 @@ CallbackSetTB = R6Class("CallbackSetTB",
             }
 
             log_train_score = function(measure_name) {
-                # OLD: previously logged the elements in last_scores_train
-                # train_score = list(self$ctx$last_scores_train[[measure_name]])
-                # names(train_score) = paste0("train.", measure_name)
-                # with_logdir(temp, {
-                #     do.call(log_event, train_score)
-                # })
                 # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
+                # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
                 with_logdir(self$path, {
                     log_event(train.loss = self$ctx$last_loss)
                 })

From fc4f2faab84c4cf58b3a8903ffa837f7f60dff63 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Wed, 2 Oct 2024 21:24:48 +0200
Subject: [PATCH 05/37] added n_last_loss frequency test

---
 tests/testthat/test_CallbackSetTB.R | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index f7387f185..4e11e3d08 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -6,16 +6,35 @@ test_that("autotest", {
 # TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
 test_that("a simple example works", {
     # using a temp dir
+    cb = t_clbk("tb")
 
     # check that directory doesn't exist
+    expect_false(dir.exists(cb$path))
 
-    # check that directory was created
+    # check that the correct training measure name was logged at the correct time (correct epoch)
+    task = tsk("iris")
 
-    # check that default logging directory is the directory name we passed in
+    n_epochs = 10
+    batch_size = 50
+    neurons = 200
+    mlp = lrn("classif.mlp", 
+          callbacks = cb,
+          epochs = n_epochs, batch_size = batch_size, neurons = neurons,
+          validate = 0.2, 
+          measures_valid = msrs(c("classif.acc", "classif.ce")), 
+          measures_train = msrs(c("classif.acc", "classif.ce"))
+    )
 
-    # check that the correct training measure name was logged at the correct time (correct epoch)
+    mlp$train(task)
 
-    # check that the correct validation measure name was logged
+    events = collect_events(cb$path)$summary %>%
+        mlr3misc::map(unlist)
 
+    # TODO: this but for the validation measures
+    n_last_loss = mlr3misc::map(\(x) x["tag"] == "last_loss") %>%
+        unlist() %>%
+        sum()
+    expect_equal(n_last_loss, n_epochs)
+    
     # check that logging happens at the same frequency as eval_freq
 })
\ No newline at end of file

From 81d1dedc717516d97f5991eec461788baaf89334 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 10 Oct 2024 23:06:26 +0200
Subject: [PATCH 06/37] in progress

---
 DESCRIPTION                         |  2 --
 R/CallbackSetTB.R                   | 14 +++++++++-----
 tests/testthat/test_CallbackSetTB.R | 23 +++++++++++------------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a9231e485..cb1acb1b6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,8 +31,6 @@ Authors@R:
              role = "ctb",
              email = "carsonzhang4@gmail.com")
              ),
-             
-
 Description: Deep Learning library that extends the mlr3 framework by building
   upon the 'torch' package. It allows to conveniently build, train,
   and evaluate deep learning models without having to worry about low level
diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 4543cd26e..8eeadf0d2 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -8,19 +8,23 @@
 #' TODO: add
 #'
 #' @param path (`character(1)`)\cr
-#'   The path to a folder where the events are logged. 
+#'   The path to a folder where the events are logged.
 #'   Point TensorBoard to this folder to view them.
 #' @family Callback
 #' @export
 #' @include CallbackSet.R
 CallbackSetTB = R6Class("CallbackSetTB",
     inherit = CallbackSet,
-    lock_objects = TRUE,
+    lock_objects = FALSE,
     public = list(
+        path = NULL,
         #' @description
         #' Creates a new instance of this [R6][R6::R6Class] class.
-        initialize = function(path = tempfile()) {
-            self$path = assert_path_for_output(path)
+        initialize = function(path) {
+          self$path = assert_path_for_output(path)
+          if (!dir.exists(path)) {
+            dir.create(path, recursive = TRUE)
+          }
         },
         #' @description
         #' Logs the training loss and validation measures as TensorFlow events.
@@ -58,7 +62,7 @@ CallbackSetTB = R6Class("CallbackSetTB",
     )
     # private = list(
     #     log_score = function(prefix, measure_name, score) {
-            
+
     #     }
     # )
 )
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 4e11e3d08..72e389ec1 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -1,6 +1,6 @@
-test_that("autotest", {
-    cb = t_clbk("tb")
-    expect_torch_callback(cb)
+test_that("basic", {
+    cb = t_clbk("tb", path = tempfile())
+    expect_torch_callback(cb, check_man = FALSE)
 })
 
 # TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
@@ -11,30 +11,29 @@ test_that("a simple example works", {
     # check that directory doesn't exist
     expect_false(dir.exists(cb$path))
 
-    # check that the correct training measure name was logged at the correct time (correct epoch)
     task = tsk("iris")
-
     n_epochs = 10
     batch_size = 50
     neurons = 200
-    mlp = lrn("classif.mlp", 
+    mlp = lrn("classif.mlp",
           callbacks = cb,
           epochs = n_epochs, batch_size = batch_size, neurons = neurons,
-          validate = 0.2, 
-          measures_valid = msrs(c("classif.acc", "classif.ce")), 
+          validate = 0.2,
+          measures_valid = msrs(c("classif.acc", "classif.ce")),
           measures_train = msrs(c("classif.acc", "classif.ce"))
     )
-
     mlp$train(task)
 
     events = collect_events(cb$path)$summary %>%
         mlr3misc::map(unlist)
 
-    # TODO: this but for the validation measures
     n_last_loss = mlr3misc::map(\(x) x["tag"] == "last_loss") %>%
         unlist() %>%
         sum()
     expect_equal(n_last_loss, n_epochs)
-    
+
+    # TODO: check that the correct training measure name was logged at the correct time (correct epoch)
+    # TODO: check that the correct validation measure name was logged at the correct time (correct epoch)
+
     # check that logging happens at the same frequency as eval_freq
-})
\ No newline at end of file
+})

From cb03eb3719c55e90599ed9fc43ef0bfccd0f8e58 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 10 Oct 2024 23:17:18 +0200
Subject: [PATCH 07/37] autotest working, accidentally used the wrong
 callback_generator

---
 R/CallbackSetTB.R                   |  3 +--
 tests/testthat/test_CallbackSetTB.R | 11 ++++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 8eeadf0d2..fd016c2c4 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -17,7 +17,6 @@ CallbackSetTB = R6Class("CallbackSetTB",
     inherit = CallbackSet,
     lock_objects = FALSE,
     public = list(
-        path = NULL,
         #' @description
         #' Creates a new instance of this [R6][R6::R6Class] class.
         initialize = function(path) {
@@ -70,7 +69,7 @@ CallbackSetTB = R6Class("CallbackSetTB",
 
 mlr3torch_callbacks$add("tb", function() {
     TorchCallback$new(
-        callback_generator = CallbackSetCheckpoint,
+        callback_generator = CallbackSetTB,
         param_set = ps(
             path      = p_uty(tags = c("train", "required"))
         ),
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 72e389ec1..616841537 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -8,13 +8,13 @@ test_that("a simple example works", {
     # using a temp dir
     cb = t_clbk("tb")
 
-    # check that directory doesn't exist
-    expect_false(dir.exists(cb$path))
-
     task = tsk("iris")
     n_epochs = 10
     batch_size = 50
     neurons = 200
+
+    pth0 = tempfile()
+
     mlp = lrn("classif.mlp",
           callbacks = cb,
           epochs = n_epochs, batch_size = batch_size, neurons = neurons,
@@ -22,6 +22,11 @@ test_that("a simple example works", {
           measures_valid = msrs(c("classif.acc", "classif.ce")),
           measures_train = msrs(c("classif.acc", "classif.ce"))
     )
+    mlp$param_set$set_values(cb.tb.path = pth0)
+
+    # check that directory doesn't exist
+    expect_false(dir.exists(mlp$param_set$get_values(path)))
+
     mlp$train(task)
 
     events = collect_events(cb$path)$summary %>%

From 78b95a5f6567b85e0132891138043c97126d7dc3 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 08:16:37 +0200
Subject: [PATCH 08/37] simple and eval_freq tests pass

---
 R/CallbackSetTB.R                   |  9 ++--
 tests/testthat/test_CallbackSetTB.R | 75 +++++++++++++++++++++++++----
 2 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index fd016c2c4..0efe792d6 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -41,7 +41,7 @@ CallbackSetTB = R6Class("CallbackSetTB",
                 })
             }
 
-            log_train_score = function(measure_name) {
+            log_train_score = function() {
                 # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
                 # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
                 with_logdir(self$path, {
@@ -49,13 +49,10 @@ CallbackSetTB = R6Class("CallbackSetTB",
                 })
             }
 
-            if (length(self$ctx$last_scores_train)) {
-                # TODO: decide whether we should put the temporary logdir modification here instead.
-                map(names(self$ctx$measures_train), log_train_score)
-            }
+            log_train_score()
 
             if (length(self$ctx$last_scores_valid)) {
-                map(names(self$ctx$measure_valid), log_valid_score)
+                map(names(self$ctx$measures_valid), log_valid_score)
             }
         }
     )
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 616841537..4cbeabf58 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -1,3 +1,5 @@
+library(tfevents)
+
 test_that("basic", {
     cb = t_clbk("tb", path = tempfile())
     expect_torch_callback(cb, check_man = FALSE)
@@ -24,21 +26,76 @@ test_that("a simple example works", {
     )
     mlp$param_set$set_values(cb.tb.path = pth0)
 
-    # check that directory doesn't exist
-    expect_false(dir.exists(mlp$param_set$get_values(path)))
-
     mlp$train(task)
 
-    events = collect_events(cb$path)$summary %>%
+    events = collect_events(pth0)$summary %>%
         mlr3misc::map(unlist)
 
-    n_last_loss = mlr3misc::map(\(x) x["tag"] == "last_loss") %>%
+    n_last_loss_events = mlr3misc::map(events, \(x) x["tag"] == "train.loss") %>%
         unlist() %>%
         sum()
-    expect_equal(n_last_loss, n_epochs)
 
-    # TODO: check that the correct training measure name was logged at the correct time (correct epoch)
-    # TODO: check that the correct validation measure name was logged at the correct time (correct epoch)
+    n_valid_acc_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc") %>%
+      unlist() %>%
+      sum()
+
+    n_valid_ce_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce") %>%
+      unlist() %>%
+      sum()
+
+    # TODO: refactor to expect a specific ordering of the events list
+    expect_equal(n_last_loss_events, n_epochs)
+    expect_equal(n_valid_acc_events, n_epochs)
+    expect_equal(n_valid_ce_events, n_epochs)
+})
+
+test_that("eval_freq works", {
+  # using a temp dir
+  cb = t_clbk("tb")
+
+  task = tsk("iris")
+  n_epochs = 9
+  batch_size = 50
+  neurons = 200
+  eval_freq = 4
+
+  pth0 = tempfile()
+
+  mlp = lrn("classif.mlp",
+            callbacks = cb,
+            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
+            validate = 0.2,
+            measures_valid = msrs(c("classif.acc", "classif.ce")),
+            measures_train = msrs(c("classif.acc", "classif.ce")),
+            eval_freq = eval_freq
+  )
+  mlp$param_set$set_values(cb.tb.path = pth0)
+
+  mlp$train(task)
+
+  events = collect_events(pth0)$summary %>%
+    mlr3misc::map(unlist)
+
+  n_last_loss_events = mlr3misc::map(events, \(x) x["tag"] == "train.loss") %>%
+    unlist() %>%
+    sum()
+
+  n_valid_acc_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc") %>%
+    unlist() %>%
+    sum()
+
+  n_valid_ce_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce") %>%
+    unlist() %>%
+    sum()
+
+  expect_equal(n_last_loss_events, n_epochs)
+  expect_equal(n_valid_acc_events, ceiling(n_epochs / 4))
+  expect_equal(n_valid_ce_events, ceiling(n_epochs / 4))
+})
 
-    # check that logging happens at the same frequency as eval_freq
+test_that("throws an error when using existing directory", {
+  path = tempfile()
+  dir.create(path)
+  cb = t_clbk("tb", path = path)
+  expect_error(cb$generate(), "already exists")
 })

From a365757ee3bc63d9da2908441fb13f9e8327f762 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 08:31:01 +0200
Subject: [PATCH 09/37] changed logging methods to private

---
 R/CallbackSetTB.R | 45 ++++++++++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 0efe792d6..10a7bc339 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -33,34 +33,33 @@ CallbackSetTB = R6Class("CallbackSetTB",
         # TODO: display the appropriate x axis with its label in TensorBoard
         # relevant when we log different scores at different times
         on_epoch_end = function() {
-            log_valid_score = function(measure_name) {
-                valid_score = list(self$ctx$last_scores_valid[[measure_name]])
-                names(valid_score) = paste0("valid.", measure_name)
-                with_logdir(self$path, {
-                    do.call(log_event, valid_score)
-                })
-            }
-
-            log_train_score = function() {
-                # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
-                # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
-                with_logdir(self$path, {
-                    log_event(train.loss = self$ctx$last_loss)
-                })
-            }
-
-            log_train_score()
+            private$log_train_score()
 
             if (length(self$ctx$last_scores_valid)) {
-                map(names(self$ctx$measures_valid), log_valid_score)
+                map(names(self$ctx$measures_valid), private$log_valid_score)
             }
         }
+    ),
+    private = list(
+      # TODO: refactor into a single function with the following signature
+        # log_score = function(prefix, measure_name, score) {
+        #
+        # },
+        log_valid_score = function(measure_name) {
+          valid_score = list(self$ctx$last_scores_valid[[measure_name]])
+          names(valid_score) = paste0("valid.", measure_name)
+          with_logdir(self$path, {
+            do.call(log_event, valid_score)
+          })
+        },
+        log_train_score = function() {
+          # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
+          # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
+          with_logdir(self$path, {
+            log_event(train.loss = self$ctx$last_loss)
+          })
+        }
     )
-    # private = list(
-    #     log_score = function(prefix, measure_name, score) {
-
-    #     }
-    # )
 )
 
 

From 43a8ffb4d9f48177600fed1df28846aaea46acb2 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 08:38:33 +0200
Subject: [PATCH 10/37] removed magrittr pipe from tests

---
 R/CallbackSetTB.R                   | 108 ++++++++++++++--------------
 tests/testthat/test_CallbackSetTB.R |  92 ++++++++++--------------
 2 files changed, 93 insertions(+), 107 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 10a7bc339..07593a50c 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -14,63 +14,63 @@
 #' @export
 #' @include CallbackSet.R
 CallbackSetTB = R6Class("CallbackSetTB",
-    inherit = CallbackSet,
-    lock_objects = FALSE,
-    public = list(
-        #' @description
-        #' Creates a new instance of this [R6][R6::R6Class] class.
-        initialize = function(path) {
-          self$path = assert_path_for_output(path)
-          if (!dir.exists(path)) {
-            dir.create(path, recursive = TRUE)
-          }
-        },
-        #' @description
-        #' Logs the training loss and validation measures as TensorFlow events.
-        #' Meaningful changes happen at the end of each epoch.
-        #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
-        #' and therefore differences are due to randomness
-        # TODO: display the appropriate x axis with its label in TensorBoard
-        # relevant when we log different scores at different times
-        on_epoch_end = function() {
-            private$log_train_score()
+  inherit = CallbackSet,
+  lock_objects = FALSE,
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function(path) {
+      self$path = assert_path_for_output(path)
+      if (!dir.exists(path)) {
+        dir.create(path, recursive = TRUE)
+      }
+    },
+    #' @description
+    #' Logs the training loss and validation measures as TensorFlow events.
+    #' Meaningful changes happen at the end of each epoch.
+    #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
+    #' and therefore differences are due to randomness
+    # TODO: display the appropriate x axis with its label in TensorBoard
+    # relevant when we log different scores at different times
+    on_epoch_end = function() {
+      private$log_train_score()
 
-            if (length(self$ctx$last_scores_valid)) {
-                map(names(self$ctx$measures_valid), private$log_valid_score)
-            }
-        }
-    ),
-    private = list(
-      # TODO: refactor into a single function with the following signature
-        # log_score = function(prefix, measure_name, score) {
-        #
-        # },
-        log_valid_score = function(measure_name) {
-          valid_score = list(self$ctx$last_scores_valid[[measure_name]])
-          names(valid_score) = paste0("valid.", measure_name)
-          with_logdir(self$path, {
-            do.call(log_event, valid_score)
-          })
-        },
-        log_train_score = function() {
-          # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
-          # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
-          with_logdir(self$path, {
-            log_event(train.loss = self$ctx$last_loss)
-          })
-        }
-    )
+      if (length(self$ctx$last_scores_valid)) {
+        map(names(self$ctx$measures_valid), private$log_valid_score)
+      }
+    }
+  ),
+  private = list(
+    # TODO: refactor into a single function with the following signature
+    # log_score = function(prefix, measure_name, score) {
+    #
+    # },
+    log_valid_score = function(measure_name) {
+      valid_score = list(self$ctx$last_scores_valid[[measure_name]])
+      names(valid_score) = paste0("valid.", measure_name)
+      with_logdir(self$path, {
+        do.call(log_event, valid_score)
+      })
+    },
+    log_train_score = function() {
+      # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
+      # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
+      with_logdir(self$path, {
+        log_event(train.loss = self$ctx$last_loss)
+      })
+    }
+  )
 )
 
 
 mlr3torch_callbacks$add("tb", function() {
-    TorchCallback$new(
-        callback_generator = CallbackSetTB,
-        param_set = ps(
-            path      = p_uty(tags = c("train", "required"))
-        ),
-        id = "tb",
-        label = "TensorBoard",
-        man = "mlr3torch::mlr_callback_set.tb"
-    )
+  TorchCallback$new(
+    callback_generator = CallbackSetTB,
+    param_set = ps(
+      path      = p_uty(tags = c("train", "required"))
+    ),
+    id = "tb",
+    label = "TensorBoard",
+    man = "mlr3torch::mlr_callback_set.tb"
+  )
 })
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 4cbeabf58..8ae879f1c 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -1,52 +1,45 @@
 library(tfevents)
 
 test_that("basic", {
-    cb = t_clbk("tb", path = tempfile())
-    expect_torch_callback(cb, check_man = FALSE)
+  cb = t_clbk("tb", path = tempfile())
+  expect_torch_callback(cb, check_man = FALSE)
 })
 
 # TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
 test_that("a simple example works", {
-    # using a temp dir
-    cb = t_clbk("tb")
-
-    task = tsk("iris")
-    n_epochs = 10
-    batch_size = 50
-    neurons = 200
-
-    pth0 = tempfile()
-
-    mlp = lrn("classif.mlp",
-          callbacks = cb,
-          epochs = n_epochs, batch_size = batch_size, neurons = neurons,
-          validate = 0.2,
-          measures_valid = msrs(c("classif.acc", "classif.ce")),
-          measures_train = msrs(c("classif.acc", "classif.ce"))
-    )
-    mlp$param_set$set_values(cb.tb.path = pth0)
-
-    mlp$train(task)
-
-    events = collect_events(pth0)$summary %>%
-        mlr3misc::map(unlist)
-
-    n_last_loss_events = mlr3misc::map(events, \(x) x["tag"] == "train.loss") %>%
-        unlist() %>%
-        sum()
-
-    n_valid_acc_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc") %>%
-      unlist() %>%
-      sum()
-
-    n_valid_ce_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce") %>%
-      unlist() %>%
-      sum()
-
-    # TODO: refactor to expect a specific ordering of the events list
-    expect_equal(n_last_loss_events, n_epochs)
-    expect_equal(n_valid_acc_events, n_epochs)
-    expect_equal(n_valid_ce_events, n_epochs)
+  # using a temp dir
+  cb = t_clbk("tb")
+
+  task = tsk("iris")
+  n_epochs = 10
+  batch_size = 50
+  neurons = 200
+
+  pth0 = tempfile()
+
+  mlp = lrn("classif.mlp",
+            callbacks = cb,
+            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
+            validate = 0.2,
+            measures_valid = msrs(c("classif.acc", "classif.ce")),
+            measures_train = msrs(c("classif.acc", "classif.ce"))
+  )
+  mlp$param_set$set_values(cb.tb.path = pth0)
+
+  mlp$train(task)
+
+  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
+
+  n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
+
+  n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
+
+  n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
+
+  # TODO: refactor to expect a specific ordering of the events list
+  expect_equal(n_last_loss_events, n_epochs)
+  expect_equal(n_valid_acc_events, n_epochs)
+  expect_equal(n_valid_ce_events, n_epochs)
 })
 
 test_that("eval_freq works", {
@@ -73,20 +66,13 @@ test_that("eval_freq works", {
 
   mlp$train(task)
 
-  events = collect_events(pth0)$summary %>%
-    mlr3misc::map(unlist)
+  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  n_last_loss_events = mlr3misc::map(events, \(x) x["tag"] == "train.loss") %>%
-    unlist() %>%
-    sum()
+  n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
 
-  n_valid_acc_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc") %>%
-    unlist() %>%
-    sum()
+  n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
 
-  n_valid_ce_events = mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce") %>%
-    unlist() %>%
-    sum()
+  n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
 
   expect_equal(n_last_loss_events, n_epochs)
   expect_equal(n_valid_acc_events, ceiling(n_epochs / 4))

From 6b9a8453d1fa3f295c580203d905e31efe5f9616 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 08:40:46 +0200
Subject: [PATCH 11/37] added details for callback class

---
 R/CallbackSetTB.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 07593a50c..4bdbf4912 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -5,7 +5,7 @@
 #' @description
 #' Logs training loss and validation measures as events that can be tracked using TensorBoard.
 #' @details
-#' TODO: add
+#' Logs at most every epoch.
 #'
 #' @param path (`character(1)`)\cr
 #'   The path to a folder where the events are logged.
@@ -53,7 +53,6 @@ CallbackSetTB = R6Class("CallbackSetTB",
       })
     },
     log_train_score = function() {
-      # TODO: figure out what self$ctx$last_loss looks like when there are multiple train measures
       # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
       with_logdir(self$path, {
         log_event(train.loss = self$ctx$last_loss)

From d354b2c09e91c2242be6bc19f2ba60eb6bd25c54 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 08:42:25 +0200
Subject: [PATCH 12/37] formatting

---
 tests/testthat/test_CallbackSetTB.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 8ae879f1c..440d8ccf6 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -31,9 +31,7 @@ test_that("a simple example works", {
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
   n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
-
   n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
-
   n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
 
   # TODO: refactor to expect a specific ordering of the events list
@@ -69,9 +67,7 @@ test_that("eval_freq works", {
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
   n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
-
   n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
-
   n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
 
   expect_equal(n_last_loss_events, n_epochs)

From b5b27b13f86386bf90f2f6074fecd11f0de6bcf5 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 10:59:14 +0200
Subject: [PATCH 13/37] built docs

---
 DESCRIPTION                         |  3 +-
 NAMESPACE                           |  1 +
 man/TorchCallback.Rd                |  1 +
 man/as_torch_callback.Rd            |  1 +
 man/as_torch_callbacks.Rd           |  1 +
 man/callback_set.Rd                 |  1 +
 man/mlr3torch-package.Rd            |  1 +
 man/mlr3torch_callbacks.Rd          |  1 +
 man/mlr_callback_set.Rd             |  1 +
 man/mlr_callback_set.checkpoint.Rd  |  1 +
 man/mlr_callback_set.progress.Rd    |  1 +
 man/mlr_callback_set.tb.Rd          | 97 +++++++++++++++++++++++++++++
 man/mlr_context_torch.Rd            |  1 +
 man/mlr_learners.torchvision.Rd     |  6 +-
 man/t_clbk.Rd                       |  1 +
 man/torch_callback.Rd               |  1 +
 tests/testthat/test_CallbackSetTB.R |  2 +-
 17 files changed, 116 insertions(+), 5 deletions(-)
 create mode 100644 man/mlr_callback_set.tb.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index cb1acb1b6..cd08ca4a2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -30,7 +30,7 @@ Authors@R:
              family = "Zhang",
              role = "ctb",
              email = "carsonzhang4@gmail.com")
-             ),
+             )
 Description: Deep Learning library that extends the mlr3 framework by building
   upon the 'torch' package. It allows to conveniently build, train,
   and evaluate deep learning models without having to worry about low level
@@ -86,6 +86,7 @@ Collate:
     'CallbackSetEarlyStopping.R'
     'CallbackSetHistory.R'
     'CallbackSetProgress.R'
+    'CallbackSetTB.R'
     'ContextTorch.R'
     'DataBackendLazy.R'
     'utils.R'
diff --git a/NAMESPACE b/NAMESPACE
index ef61f4e3e..d3f3593c0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -63,6 +63,7 @@ export(CallbackSet)
 export(CallbackSetCheckpoint)
 export(CallbackSetHistory)
 export(CallbackSetProgress)
+export(CallbackSetTB)
 export(ContextTorch)
 export(DataBackendLazy)
 export(DataDescriptor)
diff --git a/man/TorchCallback.Rd b/man/TorchCallback.Rd
index 32d30ecc6..d2d5bc9e4 100644
--- a/man/TorchCallback.Rd
+++ b/man/TorchCallback.Rd
@@ -61,6 +61,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/as_torch_callback.Rd b/man/as_torch_callback.Rd
index db5feeb82..51416f7a6 100644
--- a/man/as_torch_callback.Rd
+++ b/man/as_torch_callback.Rd
@@ -31,6 +31,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/as_torch_callbacks.Rd b/man/as_torch_callbacks.Rd
index 563a4251c..e3fb8442e 100644
--- a/man/as_torch_callbacks.Rd
+++ b/man/as_torch_callbacks.Rd
@@ -31,6 +31,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/callback_set.Rd b/man/callback_set.Rd
index 4ad98f06f..4fb2d46b2 100644
--- a/man/callback_set.Rd
+++ b/man/callback_set.Rd
@@ -81,6 +81,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/mlr3torch-package.Rd b/man/mlr3torch-package.Rd
index 77aeb3c04..31cf3ec22 100644
--- a/man/mlr3torch-package.Rd
+++ b/man/mlr3torch-package.Rd
@@ -39,6 +39,7 @@ Other contributors:
   \item Bernd Bischl \email{bernd_bischl@gmx.net} (\href{https://orcid.org/0000-0001-6002-6980}{ORCID}) [contributor]
   \item Lukas Burk \email{github@quantenbrot.de} (\href{https://orcid.org/0000-0001-7528-3795}{ORCID}) [contributor]
   \item Florian Pfisterer \email{pfistererf@googlemail.com} (\href{https://orcid.org/0000-0001-8867-762X}{ORCID}) [contributor]
+  \item Carson Zhang \email{carsonzhang4@gmail.com} [contributor]
 }
 
 }
diff --git a/man/mlr3torch_callbacks.Rd b/man/mlr3torch_callbacks.Rd
index 3fec81d64..ef9239154 100644
--- a/man/mlr3torch_callbacks.Rd
+++ b/man/mlr3torch_callbacks.Rd
@@ -34,6 +34,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/mlr_callback_set.Rd b/man/mlr_callback_set.Rd
index 54afcbe2a..9b5e6be07 100644
--- a/man/mlr_callback_set.Rd
+++ b/man/mlr_callback_set.Rd
@@ -68,6 +68,7 @@ Other Callback:
 \code{\link{mlr3torch_callbacks}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/mlr_callback_set.checkpoint.Rd b/man/mlr_callback_set.checkpoint.Rd
index 92da34cee..fcb846acf 100644
--- a/man/mlr_callback_set.checkpoint.Rd
+++ b/man/mlr_callback_set.checkpoint.Rd
@@ -21,6 +21,7 @@ Other Callback:
 \code{\link{mlr3torch_callbacks}},
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/mlr_callback_set.progress.Rd b/man/mlr_callback_set.progress.Rd
index 93b6af7c9..8927f6543 100644
--- a/man/mlr_callback_set.progress.Rd
+++ b/man/mlr_callback_set.progress.Rd
@@ -16,6 +16,7 @@ Other Callback:
 \code{\link{mlr3torch_callbacks}},
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
diff --git a/man/mlr_callback_set.tb.Rd b/man/mlr_callback_set.tb.Rd
new file mode 100644
index 000000000..cfa1cbb41
--- /dev/null
+++ b/man/mlr_callback_set.tb.Rd
@@ -0,0 +1,97 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CallbackSetTB.R
+\name{mlr_callback_set.tb}
+\alias{mlr_callback_set.tb}
+\alias{CallbackSetTB}
+\title{TensorBoard Logging Callback}
+\description{
+Logs training loss and validation measures as events that can be tracked using TensorBoard.
+}
+\details{
+Logs at most every epoch.
+}
+\seealso{
+Other Callback: 
+\code{\link{TorchCallback}},
+\code{\link{as_torch_callback}()},
+\code{\link{as_torch_callbacks}()},
+\code{\link{callback_set}()},
+\code{\link{mlr3torch_callbacks}},
+\code{\link{mlr_callback_set}},
+\code{\link{mlr_callback_set.checkpoint}},
+\code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_context_torch}},
+\code{\link{t_clbk}()},
+\code{\link{torch_callback}()}
+}
+\concept{Callback}
+\section{Super class}{
+\code{\link[mlr3torch:CallbackSet]{mlr3torch::CallbackSet}} -> \code{CallbackSetTB}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-CallbackSetTB-new}{\code{CallbackSetTB$new()}}
+\item \href{#method-CallbackSetTB-on_epoch_end}{\code{CallbackSetTB$on_epoch_end()}}
+\item \href{#method-CallbackSetTB-clone}{\code{CallbackSetTB$clone()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="load_state_dict"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-load_state_dict'><code>mlr3torch::CallbackSet$load_state_dict()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="print"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-print'><code>mlr3torch::CallbackSet$print()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="state_dict"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-state_dict'><code>mlr3torch::CallbackSet$state_dict()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetTB-new"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetTB-new}{}}}
+\subsection{Method \code{new()}}{
+Creates a new instance of this \link[R6:R6Class]{R6} class.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$new(path)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{(\code{character(1)})\cr
+The path to a folder where the events are logged.
+Point TensorBoard to this folder to view them.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetTB-on_epoch_end"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetTB-on_epoch_end}{}}}
+\subsection{Method \code{on_epoch_end()}}{
+Logs the training loss and validation measures as TensorFlow events.
+Meaningful changes happen at the end of each epoch.
+Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
+and therefore differences are due to randomness
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$on_epoch_end()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetTB-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetTB-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/mlr_context_torch.Rd b/man/mlr_context_torch.Rd
index c0913f203..474a4fff2 100644
--- a/man/mlr_context_torch.Rd
+++ b/man/mlr_context_torch.Rd
@@ -19,6 +19,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{t_clbk}()},
 \code{\link{torch_callback}()}
 }
diff --git a/man/mlr_learners.torchvision.Rd b/man/mlr_learners.torchvision.Rd
index dd0123ada..e1f4f9ba3 100644
--- a/man/mlr_learners.torchvision.Rd
+++ b/man/mlr_learners.torchvision.Rd
@@ -89,9 +89,9 @@ Krizhevsky, Alex, Sutskever, Ilya, Hinton, E. G (2017).
 Sandler, Mark, Howard, Andrew, Zhu, Menglong, Zhmoginov, Andrey, Chen, Liang-Chieh (2018).
 \dQuote{Mobilenetv2: Inverted residuals and linear bottlenecks.}
 In \emph{Proceedings of the IEEE conference on computer vision and pattern recognition}, 4510--4520.
-He, Kaiming, Zhang, Xiangyu, Ren, Shaoqing, Sun, Jian (2016 ).
-\dQuote{Deep residual learning for image recognition .}
-In \emph{Proceedings of the IEEE conference on computer vision and pattern recognition }, 770--778 .
+He, Kaiming, Zhang, Xiangyu, Ren, Shaoqing, Sun, Jian (2016).
+\dQuote{Deep residual learning for image recognition.}
+In \emph{Proceedings of the IEEE conference on computer vision and pattern recognition}, 770--778.
 Simonyan, Karen, Zisserman, Andrew (2014).
 \dQuote{Very deep convolutional networks for large-scale image recognition.}
 \emph{arXiv preprint arXiv:1409.1556}.}
diff --git a/man/t_clbk.Rd b/man/t_clbk.Rd
index c329ab468..fdac6ea24 100644
--- a/man/t_clbk.Rd
+++ b/man/t_clbk.Rd
@@ -43,6 +43,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{torch_callback}()}
 
diff --git a/man/torch_callback.Rd b/man/torch_callback.Rd
index ae54583a7..2d5568e3b 100644
--- a/man/torch_callback.Rd
+++ b/man/torch_callback.Rd
@@ -149,6 +149,7 @@ Other Callback:
 \code{\link{mlr_callback_set}},
 \code{\link{mlr_callback_set.checkpoint}},
 \code{\link{mlr_callback_set.progress}},
+\code{\link{mlr_callback_set.tb}},
 \code{\link{mlr_context_torch}},
 \code{\link{t_clbk}()}
 }
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 440d8ccf6..cbf031cba 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -34,7 +34,7 @@ test_that("a simple example works", {
   n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
   n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
 
-  # TODO: refactor to expect a specific ordering of the events list
+  # TODO: refactor to expect a specific ordering of the events list, not just the right counts
   expect_equal(n_last_loss_events, n_epochs)
   expect_equal(n_valid_acc_events, n_epochs)
   expect_equal(n_valid_ce_events, n_epochs)

From 7c9f431d3e368d838ec081d0fa37d15dd33831fb Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 17:37:06 +0200
Subject: [PATCH 14/37] all tests pass, I think this is parity with the
 previous broken commit. still need to incorporate the step logging

---
 R/CallbackSetTB.R                     | 35 ++++++++----
 man/mlr_callback_set.tb.Rd            |  5 +-
 man/mlr_learners.mlp.Rd               |  1 -
 man/mlr_learners.tab_resnet.Rd        |  1 -
 man/mlr_learners.torch_featureless.Rd |  1 -
 man/mlr_learners.torchvision.Rd       |  1 -
 man/mlr_learners_torch.Rd             |  1 -
 man/mlr_learners_torch_image.Rd       |  1 -
 man/mlr_learners_torch_model.Rd       |  1 -
 tests/testthat/test_CallbackSetTB.R   | 82 ++++++++++++++++++++++-----
 10 files changed, 96 insertions(+), 33 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 4bdbf4912..7424ba7df 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -10,6 +10,8 @@
 #' @param path (`character(1)`)\cr
 #'   The path to a folder where the events are logged.
 #'   Point TensorBoard to this folder to view them.
+#' @param log_train_loss (`logical(1)`)\cr
+#'  Whether we log the training loss.
 #' @family Callback
 #' @export
 #' @include CallbackSet.R
@@ -19,11 +21,12 @@ CallbackSetTB = R6Class("CallbackSetTB",
   public = list(
     #' @description
     #' Creates a new instance of this [R6][R6::R6Class] class.
-    initialize = function(path) {
+    initialize = function(path, log_train_loss) {
       self$path = assert_path_for_output(path)
       if (!dir.exists(path)) {
         dir.create(path, recursive = TRUE)
       }
+      self$log_train_loss = assert_logical(log_train_loss)
     },
     #' @description
     #' Logs the training loss and validation measures as TensorFlow events.
@@ -33,26 +36,35 @@ CallbackSetTB = R6Class("CallbackSetTB",
     # TODO: display the appropriate x axis with its label in TensorBoard
     # relevant when we log different scores at different times
     on_epoch_end = function() {
-      private$log_train_score()
+      if (self$log_train_loss) {
+        private$.log_train_loss()
+      }
+
+      if (length(self$ctx$last_scores_train)) {
+        map(names(self$ctx$measures_train), private$.log_train_score)
+      }
 
       if (length(self$ctx$last_scores_valid)) {
-        map(names(self$ctx$measures_valid), private$log_valid_score)
+        map(names(self$ctx$measures_valid), private$.log_valid_score)
       }
     }
   ),
   private = list(
-    # TODO: refactor into a single function with the following signature
-    # log_score = function(prefix, measure_name, score) {
-    #
-    # },
-    log_valid_score = function(measure_name) {
+    .log_valid_score = function(measure_name) {
       valid_score = list(self$ctx$last_scores_valid[[measure_name]])
       names(valid_score) = paste0("valid.", measure_name)
       with_logdir(self$path, {
         do.call(log_event, valid_score)
       })
     },
-    log_train_score = function() {
+    .log_train_score = function(measure_name) {
+      train_score = list(self$ctx$last_scores_train[[measure_name]])
+      names(train_score) = paste0("train.", measure_name)
+      with_logdir(self$path, {
+        do.call(log_event, train_score)
+      })
+    },
+    .log_train_loss = function() {
       # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
       with_logdir(self$path, {
         log_event(train.loss = self$ctx$last_loss)
@@ -61,12 +73,13 @@ CallbackSetTB = R6Class("CallbackSetTB",
   )
 )
 
-
+#' @include TorchCallback.R
 mlr3torch_callbacks$add("tb", function() {
   TorchCallback$new(
     callback_generator = CallbackSetTB,
     param_set = ps(
-      path      = p_uty(tags = c("train", "required"))
+      path           = p_uty(tags = c("train", "required")),
+      log_train_loss = p_lgl(tags = c("train", "required"))
     ),
     id = "tb",
     label = "TensorBoard",
diff --git a/man/mlr_callback_set.tb.Rd b/man/mlr_callback_set.tb.Rd
index cfa1cbb41..a5f226a5e 100644
--- a/man/mlr_callback_set.tb.Rd
+++ b/man/mlr_callback_set.tb.Rd
@@ -51,7 +51,7 @@ Other Callback:
 \subsection{Method \code{new()}}{
 Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$new(path)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$new(path, log_train_loss)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -60,6 +60,9 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \item{\code{path}}{(\code{character(1)})\cr
 The path to a folder where the events are logged.
 Point TensorBoard to this folder to view them.}
+
+\item{\code{log_train_loss}}{(\code{logical(1)})\cr
+Whether we log the training loss.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/mlr_learners.mlp.Rd b/man/mlr_learners.mlp.Rd
index 6eb586aaa..e6809199e 100644
--- a/man/mlr_learners.mlp.Rd
+++ b/man/mlr_learners.mlp.Rd
@@ -100,7 +100,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.tab_resnet.Rd b/man/mlr_learners.tab_resnet.Rd
index 1d8b9e8df..b78be86c1 100644
--- a/man/mlr_learners.tab_resnet.Rd
+++ b/man/mlr_learners.tab_resnet.Rd
@@ -102,7 +102,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.torch_featureless.Rd b/man/mlr_learners.torch_featureless.Rd
index 1fb6274ab..58f316e37 100644
--- a/man/mlr_learners.torch_featureless.Rd
+++ b/man/mlr_learners.torch_featureless.Rd
@@ -86,7 +86,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.torchvision.Rd b/man/mlr_learners.torchvision.Rd
index 87883dd94..e1f4f9ba3 100644
--- a/man/mlr_learners.torchvision.Rd
+++ b/man/mlr_learners.torchvision.Rd
@@ -42,7 +42,6 @@ number of classes inferred from the \code{\link[mlr3:Task]{Task}}.
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd
index 5797f2468..748b1b063 100644
--- a/man/mlr_learners_torch.Rd
+++ b/man/mlr_learners_torch.Rd
@@ -265,7 +265,6 @@ which are varied systematically during tuning (parameter values).}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch_image.Rd b/man/mlr_learners_torch_image.Rd
index af2b854de..723a4e223 100644
--- a/man/mlr_learners_torch_image.Rd
+++ b/man/mlr_learners_torch_image.Rd
@@ -36,7 +36,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch_model.Rd b/man/mlr_learners_torch_model.Rd
index da6fa0086..505b8fbd8 100644
--- a/man/mlr_learners_torch_model.Rd
+++ b/man/mlr_learners_torch_model.Rd
@@ -92,7 +92,6 @@ The ingress tokens. Must be non-\code{NULL} when calling \verb{$train()}.}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index cbf031cba..f2c3818f2 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -1,8 +1,8 @@
 library(tfevents)
 
-test_that("basic", {
-  cb = t_clbk("tb", path = tempfile())
-  expect_torch_callback(cb, check_man = FALSE)
+test_that("autotest", {
+  cb = t_clbk("tb", path = tempfile(), log_train_loss = TRUE)
+  expect_torch_callback(cb, check_man = TRUE)
 })
 
 # TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
@@ -17,6 +17,8 @@ test_that("a simple example works", {
 
   pth0 = tempfile()
 
+  log_train_loss = TRUE
+
   mlp = lrn("classif.mlp",
             callbacks = cb,
             epochs = n_epochs, batch_size = batch_size, neurons = neurons,
@@ -26,16 +28,26 @@ test_that("a simple example works", {
   )
   mlp$param_set$set_values(cb.tb.path = pth0)
 
+  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
+
   mlp$train(task)
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
-  n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
-  n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
+  event_tag_is = function(event, tag_name) {
+    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
+  }
+
+  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
+  n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
+  n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
+  n_valid_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.acc")))
+  n_valid_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.ce")))
 
   # TODO: refactor to expect a specific ordering of the events list, not just the right counts
-  expect_equal(n_last_loss_events, n_epochs)
+  expect_equal(n_train_loss_events, n_epochs)
+  expect_equal(n_train_acc_events, n_epochs)
+  expect_equal(n_train_ce_events, n_epochs)
   expect_equal(n_valid_acc_events, n_epochs)
   expect_equal(n_valid_ce_events, n_epochs)
 })
@@ -52,6 +64,8 @@ test_that("eval_freq works", {
 
   pth0 = tempfile()
 
+  log_train_loss = TRUE
+
   mlp = lrn("classif.mlp",
             callbacks = cb,
             epochs = n_epochs, batch_size = batch_size, neurons = neurons,
@@ -61,23 +75,63 @@ test_that("eval_freq works", {
             eval_freq = eval_freq
   )
   mlp$param_set$set_values(cb.tb.path = pth0)
+  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
+
+  mlp$train(task)
+
+  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
+
+  event_tag_is = function(event, tag_name) {
+    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
+  }
+
+  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
+  n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
+  n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
+  n_valid_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.acc")))
+  n_valid_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.ce")))
+
+  expect_equal(n_train_loss_events, n_epochs)
+  expect_equal(n_train_acc_events, ceiling(n_epochs / eval_freq))
+  expect_equal(n_train_ce_events, ceiling(n_epochs / eval_freq))
+  expect_equal(n_valid_acc_events, ceiling(n_epochs / eval_freq))
+  expect_equal(n_valid_ce_events, ceiling(n_epochs / eval_freq))
+})
+
+test_that("the flag for tracking the train loss works", {
+  cb = t_clbk("tb")
+
+  task = tsk("iris")
+  n_epochs = 10
+  batch_size = 50
+  neurons = 200
+
+  log_train_loss = FALSE
+
+  pth0 = tempfile()
+
+  mlp = lrn("classif.mlp",
+            callbacks = cb,
+            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
+            validate = 0.2,
+            measures_valid = msrs(c("classif.acc", "classif.ce")),
+            measures_train = msrs(c("classif.acc", "classif.ce"))
+  )
+  mlp$param_set$set_values(cb.tb.path = pth0)
+  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
 
   mlp$train(task)
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  n_last_loss_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "train.loss")))
-  n_valid_acc_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.acc")))
-  n_valid_ce_events = sum(unlist(mlr3misc::map(events, \(x) x["tag"] == "valid.classif.ce")))
+  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
 
-  expect_equal(n_last_loss_events, n_epochs)
-  expect_equal(n_valid_acc_events, ceiling(n_epochs / 4))
-  expect_equal(n_valid_ce_events, ceiling(n_epochs / 4))
+  expect_equal(n_train_loss_events, 0)
 })
 
 test_that("throws an error when using existing directory", {
   path = tempfile()
   dir.create(path)
-  cb = t_clbk("tb", path = path)
+  cb = t_clbk("tb", path = path, log_train_loss = TRUE)
   expect_error(cb$generate(), "already exists")
 })

From c6c93336d42b794abc36a974b6657352a3f3d514 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 17:57:22 +0200
Subject: [PATCH 15/37] implemented step logging

---
 R/CallbackSetTB.R                   | 23 +++++++++++------------
 tests/testthat/test_CallbackSetTB.R |  4 ++++
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 7424ba7df..55bbe7cbd 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -30,9 +30,6 @@ CallbackSetTB = R6Class("CallbackSetTB",
     },
     #' @description
     #' Logs the training loss and validation measures as TensorFlow events.
-    #' Meaningful changes happen at the end of each epoch.
-    #' Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
-    #' and therefore differences are due to randomness
     # TODO: display the appropriate x axis with its label in TensorBoard
     # relevant when we log different scores at different times
     on_epoch_end = function() {
@@ -50,19 +47,21 @@ CallbackSetTB = R6Class("CallbackSetTB",
     }
   ),
   private = list(
-    .log_valid_score = function(measure_name) {
-      valid_score = list(self$ctx$last_scores_valid[[measure_name]])
-      names(valid_score) = paste0("valid.", measure_name)
+    .log_score = function(prefix, measure_name, score) {
+      event_list = list(score, self$ctx$epoch)
+      names(event_list) = c(paste0(prefix, measure_name), "step")
+
       with_logdir(self$path, {
-        do.call(log_event, valid_score)
+        do.call(log_event, event_list)
       })
     },
+    .log_valid_score = function(measure_name) {
+      valid_score = self$ctx$last_scores_valid[[measure_name]]
+      private$.log_score("valid.", measure_name, valid_score)
+    },
     .log_train_score = function(measure_name) {
-      train_score = list(self$ctx$last_scores_train[[measure_name]])
-      names(train_score) = paste0("train.", measure_name)
-      with_logdir(self$path, {
-        do.call(log_event, train_score)
-      })
+      train_score = self$ctx$last_scores_train[[measure_name]]
+      private$.log_score("train.", measure_name, train_score)
     },
     .log_train_loss = function() {
       # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index f2c3818f2..5f7b56fb8 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -124,6 +124,10 @@ test_that("the flag for tracking the train loss works", {
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
+  event_tag_is = function(event, tag_name) {
+    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
+  }
+
   n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
 
   expect_equal(n_train_loss_events, 0)

From 43e7396e08820e3c9c9ee8e5417d8deabab8aa4f Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 18:01:46 +0200
Subject: [PATCH 16/37] removed extraneous comments

---
 NEWS.md                             | 1 +
 R/CallbackSetTB.R                   | 3 ---
 tests/testthat/test_CallbackSetTB.R | 2 --
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b072d9590..9bf2927e4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,6 @@
 # mlr3torch dev
 
+* Added `CallbackSetTB`, which allows logging that can be viewed by TensorBoard.
 * Don't use deprecated `data_formats` anymore
 
 # mlr3torch 0.1.1
diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index 55bbe7cbd..b50b0a354 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -30,8 +30,6 @@ CallbackSetTB = R6Class("CallbackSetTB",
     },
     #' @description
     #' Logs the training loss and validation measures as TensorFlow events.
-    # TODO: display the appropriate x axis with its label in TensorBoard
-    # relevant when we log different scores at different times
     on_epoch_end = function() {
       if (self$log_train_loss) {
         private$.log_train_loss()
@@ -64,7 +62,6 @@ CallbackSetTB = R6Class("CallbackSetTB",
       private$.log_score("train.", measure_name, train_score)
     },
     .log_train_loss = function() {
-      # TODO: remind ourselves why we wanted to display last_loss and not last_scores_train
       with_logdir(self$path, {
         log_event(train.loss = self$ctx$last_loss)
       })
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index 5f7b56fb8..c89b07e2d 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -7,7 +7,6 @@ test_that("autotest", {
 
 # TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
 test_that("a simple example works", {
-  # using a temp dir
   cb = t_clbk("tb")
 
   task = tsk("iris")
@@ -53,7 +52,6 @@ test_that("a simple example works", {
 })
 
 test_that("eval_freq works", {
-  # using a temp dir
   cb = t_clbk("tb")
 
   task = tsk("iris")

From ec5d8fc8bc2ddcaa5d25742bb4f0d4fff7e9849d Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 18:07:49 +0200
Subject: [PATCH 17/37] added tensorboard instructions

---
 R/CallbackSetTB.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
index b50b0a354..8c2a8351b 100644
--- a/R/CallbackSetTB.R
+++ b/R/CallbackSetTB.R
@@ -3,9 +3,10 @@
 #' @name mlr_callback_set.tb
 #'
 #' @description
-#' Logs training loss and validation measures as events that can be tracked using TensorBoard.
+#' Logs training loss, training measures, and validation measures as events.
+#' To view them, use TensorBoard with `tensorflow::tensorboard()` (requires `tensorflow`) or the CLI.
 #' @details
-#' Logs at most every epoch.
+#' Logs events at most every epoch.
 #'
 #' @param path (`character(1)`)\cr
 #'   The path to a folder where the events are logged.
@@ -29,7 +30,7 @@ CallbackSetTB = R6Class("CallbackSetTB",
       self$log_train_loss = assert_logical(log_train_loss)
     },
     #' @description
-    #' Logs the training loss and validation measures as TensorFlow events.
+    #' Logs the training loss, training measures, and validation measures as TensorFlow events.
     on_epoch_end = function() {
       if (self$log_train_loss) {
         private$.log_train_loss()

From f26a2544065b6bf52d83265d359d2e1c384d47aa Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 11 Oct 2024 18:40:54 +0200
Subject: [PATCH 18/37] passes R CMD Check, minimally addresses every comment
 in the previous PR

---
 man/mlr_callback_set.tb.Rd            | 10 ++++------
 man/mlr_learners.mlp.Rd               |  1 +
 man/mlr_learners.tab_resnet.Rd        |  1 +
 man/mlr_learners.torch_featureless.Rd |  1 +
 man/mlr_learners.torchvision.Rd       |  1 +
 man/mlr_learners_torch.Rd             |  1 +
 man/mlr_learners_torch_image.Rd       |  1 +
 man/mlr_learners_torch_model.Rd       |  1 +
 tests/testthat/test_CallbackSetTB.R   | 16 ++++------------
 9 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/man/mlr_callback_set.tb.Rd b/man/mlr_callback_set.tb.Rd
index a5f226a5e..ab0fb6aef 100644
--- a/man/mlr_callback_set.tb.Rd
+++ b/man/mlr_callback_set.tb.Rd
@@ -5,10 +5,11 @@
 \alias{CallbackSetTB}
 \title{TensorBoard Logging Callback}
 \description{
-Logs training loss and validation measures as events that can be tracked using TensorBoard.
+Logs training loss, training measures, and validation measures as events.
+To view them, use TensorBoard with \code{tensorflow::tensorboard()} (requires \code{tensorflow}) or the CLI.
 }
 \details{
-Logs at most every epoch.
+Logs events at most every epoch.
 }
 \seealso{
 Other Callback: 
@@ -71,10 +72,7 @@ Whether we log the training loss.}
 \if{html}{\out{<a id="method-CallbackSetTB-on_epoch_end"></a>}}
 \if{latex}{\out{\hypertarget{method-CallbackSetTB-on_epoch_end}{}}}
 \subsection{Method \code{on_epoch_end()}}{
-Logs the training loss and validation measures as TensorFlow events.
-Meaningful changes happen at the end of each epoch.
-Notably NOT on_batch_valid_end, since there are no gradient steps between validation batches,
-and therefore differences are due to randomness
+Logs the training loss, training measures, and validation measures as TensorFlow events.
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{CallbackSetTB$on_epoch_end()}\if{html}{\out{</div>}}
 }
diff --git a/man/mlr_learners.mlp.Rd b/man/mlr_learners.mlp.Rd
index e6809199e..6eb586aaa 100644
--- a/man/mlr_learners.mlp.Rd
+++ b/man/mlr_learners.mlp.Rd
@@ -100,6 +100,7 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.tab_resnet.Rd b/man/mlr_learners.tab_resnet.Rd
index b78be86c1..1d8b9e8df 100644
--- a/man/mlr_learners.tab_resnet.Rd
+++ b/man/mlr_learners.tab_resnet.Rd
@@ -102,6 +102,7 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.torch_featureless.Rd b/man/mlr_learners.torch_featureless.Rd
index 58f316e37..1fb6274ab 100644
--- a/man/mlr_learners.torch_featureless.Rd
+++ b/man/mlr_learners.torch_featureless.Rd
@@ -86,6 +86,7 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners.torchvision.Rd b/man/mlr_learners.torchvision.Rd
index e1f4f9ba3..87883dd94 100644
--- a/man/mlr_learners.torchvision.Rd
+++ b/man/mlr_learners.torchvision.Rd
@@ -42,6 +42,7 @@ number of classes inferred from the \code{\link[mlr3:Task]{Task}}.
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd
index 748b1b063..5797f2468 100644
--- a/man/mlr_learners_torch.Rd
+++ b/man/mlr_learners_torch.Rd
@@ -265,6 +265,7 @@ which are varied systematically during tuning (parameter values).}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch_image.Rd b/man/mlr_learners_torch_image.Rd
index 723a4e223..af2b854de 100644
--- a/man/mlr_learners_torch_image.Rd
+++ b/man/mlr_learners_torch_image.Rd
@@ -36,6 +36,7 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/man/mlr_learners_torch_model.Rd b/man/mlr_learners_torch_model.Rd
index 505b8fbd8..da6fa0086 100644
--- a/man/mlr_learners_torch_model.Rd
+++ b/man/mlr_learners_torch_model.Rd
@@ -92,6 +92,7 @@ The ingress tokens. Must be non-\code{NULL} when calling \verb{$train()}.}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
index c89b07e2d..8a894ec46 100644
--- a/tests/testthat/test_CallbackSetTB.R
+++ b/tests/testthat/test_CallbackSetTB.R
@@ -1,5 +1,9 @@
 library(tfevents)
 
+event_tag_is = function(event, tag_name) {
+  ifelse(is.null(event), FALSE, event["tag"] == tag_name)
+}
+
 test_that("autotest", {
   cb = t_clbk("tb", path = tempfile(), log_train_loss = TRUE)
   expect_torch_callback(cb, check_man = TRUE)
@@ -33,10 +37,6 @@ test_that("a simple example works", {
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  event_tag_is = function(event, tag_name) {
-    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
-  }
-
   n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
   n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
   n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
@@ -79,10 +79,6 @@ test_that("eval_freq works", {
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  event_tag_is = function(event, tag_name) {
-    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
-  }
-
   n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
   n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
   n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
@@ -122,10 +118,6 @@ test_that("the flag for tracking the train loss works", {
 
   events = mlr3misc::map(collect_events(pth0)$summary, unlist)
 
-  event_tag_is = function(event, tag_name) {
-    ifelse(is.null(event), FALSE, event["tag"] == tag_name)
-  }
-
   n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
 
   expect_equal(n_train_loss_events, 0)

From a86c9461569716b550076b0518f858c81e311639 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Sun, 13 Oct 2024 21:37:26 +0200
Subject: [PATCH 19/37] moved newest news to bottom

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 9bf2927e4..f2d363487 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,7 @@
 # mlr3torch dev
 
-* Added `CallbackSetTB`, which allows logging that can be viewed by TensorBoard.
 * Don't use deprecated `data_formats` anymore
+* Added `CallbackSetTB`, which allows logging that can be viewed by TensorBoard.
 
 # mlr3torch 0.1.1
 

From 3652fe680ee7ede31fe3ba31971d11801bb564bd Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Tue, 15 Oct 2024 14:28:28 +0200
Subject: [PATCH 20/37] init

---
 .gitignore                             |   1 +
 benchmarks/rf_use_case/get_data.R      |  28 +++++++
 benchmarks/rf_use_case/run_benchmark.R | 105 +++++++++++++++++++++++++
 benchmarks/rf_use_case/view_results.R  |   8 ++
 4 files changed, 142 insertions(+)
 create mode 100644 benchmarks/rf_use_case/get_data.R
 create mode 100644 benchmarks/rf_use_case/run_benchmark.R
 create mode 100644 benchmarks/rf_use_case/view_results.R

diff --git a/.gitignore b/.gitignore
index 652d7aaa7..f3d95835a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ inst/doc
 /doc/
 /Meta/
 CRAN-SUBMISSION
+benchmarks/data
\ No newline at end of file
diff --git a/benchmarks/rf_use_case/get_data.R b/benchmarks/rf_use_case/get_data.R
new file mode 100644
index 000000000..478295b06
--- /dev/null
+++ b/benchmarks/rf_use_case/get_data.R
@@ -0,0 +1,28 @@
+library(here)
+
+library(mlr3oml)
+library(data.table)
+library(tidytable)
+
+cc18_collection = ocl(99)
+
+cc18_simple = list_oml_data(data_id = cc18_collection$data_ids, 
+              number_classes = 2,
+              number_missing_values = 0)
+
+cc18_small = cc18_simple |>
+  filter(NumberOfSymbolicFeatures == 1) |>
+  select(data_id, name, NumberOfFeatures, NumberOfInstances) |>
+  filter(name %in% c("qsar-biodeg", "madelon", "kc1", "blood-transfusion-service-center", "climate-model-simulation-crashes"))
+
+# kc1_1067 = odt(1067)
+
+
+# save the data locally
+mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
+  dt = odt(data_id)$data
+  dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
+  fwrite(dt, file = dt_name)
+})
+
+fwrite(cc18_small, here("data", "oml", "cc18_small.csv"))
diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
new file mode 100644
index 000000000..bc9b3f5c7
--- /dev/null
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -0,0 +1,105 @@
+library(mlr3)
+library(data.table)
+library(mlr3torch)
+library(paradox)
+
+library(here)
+
+# define the tasks
+cc18_small = fread(here("data", "oml", "cc18_small.csv"))
+cc18_small_datasets = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
+  dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
+  fread(dt_name)
+})
+# cc18_small_datasets
+
+# cc18_small_datasets[[1]]
+
+# TODO: determine whether we can use OML tasks "directly"
+# didn't do this at first because they come with resamplings and we want to use our own resamplings
+kc1_1067 = as_task_classif(cc18_small_datasets[[1]], target = "defects")
+blood_1464 = as_task_classif(cc18_small_datasets[[2]], target = "Class")
+
+tasks = list(kc1_1067, blood_1464)
+
+# define the learners
+mlp = lrn("classif.mlp",
+  activation = nn_relu,
+  neurons = to_tune(
+    c(
+      10, 20,
+      c(10, 10), c(10, 20), c(20, 10), c(20, 20)
+    )
+  ),
+  batch_size = to_tune(16, 32, 64),
+  p = to_tune(0.1, 0.9),
+  epochs = to_tune(upper = 1000L, internal = TRUE),
+  validate = 0.3,
+  measures_valid = msr("classif.acc"),
+  patience = 10,
+  device = "cpu"
+)
+
+# define an AutoTuner that wraps the classif.mlp
+at = auto_tuner(
+  learner = mlp,
+  tuner = tnr("grid_search"),
+  resampling = rsmp("cv"),
+  measure = msr("clasif.acc"),
+  term_evals = 10
+)
+
+future::plan("multisession")
+
+design = benchmark_grid(
+  tasks,
+  learners = list(at, lrn("classif.ranger"),
+  resampling = rsmp("cv", folds = 10))
+)
+
+bmr = benchmark(design)
+
+bmrdt = as.data.table(bmr)
+
+fwrite(bmrdt, here("R", "rf_Use_case", "results", "bmrdt.csv"))
+
+  # define an optimization strategy: grid search
+
+  # define a search space: the parameters to tune over
+
+    # neurons
+
+    # batch size
+
+    # dropout rate
+
+    # epochs
+
+  # use something standard (e.g. accuracy) as the tuning measure
+
+  # use k-fold cross validation
+
+  # set a number of evaluations for the tuner
+
+# TODO: set up the tuning space for the neurons and layers
+
+# layers_search_space <- 1:5
+# neurons_search_space <- seq(10, 50, by = 10)
+
+# generate_permutations <- function(layers_search_space, neurons_search_space) {
+#   result <- list()
+  
+#   for (layers in layers_search_space) {
+#     # Generate all permutations with replacement
+#     perms <- expand.grid(replicate(layers, list(neurons_search_space), simplify = FALSE))
+    
+#     # Convert each row to a vector and add to the result
+#     result <- c(result, apply(perms, 1, as.numeric))
+#   }
+  
+#   return(result)
+# }
+
+# permutations <- generate_permutations(layers_search_space, neurons_search_space)
+
+# head(permutations)
diff --git a/benchmarks/rf_use_case/view_results.R b/benchmarks/rf_use_case/view_results.R
new file mode 100644
index 000000000..b179b9d33
--- /dev/null
+++ b/benchmarks/rf_use_case/view_results.R
@@ -0,0 +1,8 @@
+library(data.table)
+library(mlr3)
+
+library(here)
+
+bmrdt = fread(here("R", "rf_Use_case", "results", "bmrdt.csv"))
+
+bmrdt
\ No newline at end of file

From 92b4ffcbb0a2c67af4561bc4dd47b0159fb9906c Mon Sep 17 00:00:00 2001
From: cxzhang4 <carsonzhang4@gmail.com>
Date: Tue, 15 Oct 2024 14:40:42 +0200
Subject: [PATCH 21/37] Update benchmarks/rf_use_case/run_benchmark.R

---
 benchmarks/rf_use_case/run_benchmark.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index bc9b3f5c7..05b74f6aa 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -45,7 +45,7 @@ at = auto_tuner(
   learner = mlp,
   tuner = tnr("grid_search"),
   resampling = rsmp("cv"),
-  measure = msr("clasif.acc"),
+  measure = msr("classif.acc"),
   term_evals = 10
 )
 

From f821e0953ea3fd2d69665fa4f5f882540b5f32de Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 17 Oct 2024 22:42:52 +0200
Subject: [PATCH 22/37] use mlr3oml cache

---
 benchmarks/rf_use_case/get_data.R      | 21 +++++++--------------
 benchmarks/rf_use_case/run_benchmark.R | 24 ++++++++----------------
 2 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/benchmarks/rf_use_case/get_data.R b/benchmarks/rf_use_case/get_data.R
index 478295b06..77eba492d 100644
--- a/benchmarks/rf_use_case/get_data.R
+++ b/benchmarks/rf_use_case/get_data.R
@@ -1,28 +1,21 @@
 library(here)
-
 library(mlr3oml)
-library(data.table)
 library(tidytable)
 
 cc18_collection = ocl(99)
 
-cc18_simple = list_oml_data(data_id = cc18_collection$data_ids, 
+cc18_simple = list_oml_data(data_id = cc18_collection$data_ids,
               number_classes = 2,
               number_missing_values = 0)
 
 cc18_small = cc18_simple |>
-  filter(NumberOfSymbolicFeatures == 1) |>
+  filter(NumberOfSymbolicFeatures == 1) |> # the target class is a symbolic feature
   select(data_id, name, NumberOfFeatures, NumberOfInstances) |>
   filter(name %in% c("qsar-biodeg", "madelon", "kc1", "blood-transfusion-service-center", "climate-model-simulation-crashes"))
 
-# kc1_1067 = odt(1067)
-
-
-# save the data locally
-mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
-  dt = odt(data_id)$data
-  dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
-  fwrite(dt, file = dt_name)
-})
+cache_dir = here("benchmarks", "data", "oml")
+options(mlr3oml.cache = here("benchmarks", "data", "oml"))
+mlr3misc::pwalk(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) odt(data_id))
 
-fwrite(cc18_small, here("data", "oml", "cc18_small.csv"))
+dir.create(here("benchmarks", "data", "oml", "collections"))
+fwrite(cc18_small, here("benchmarks", "data", "oml", "collections", "cc18_small.csv"))
diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 05b74f6aa..1ff31750f 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -5,22 +5,14 @@ library(paradox)
 
 library(here)
 
-# define the tasks
-cc18_small = fread(here("data", "oml", "cc18_small.csv"))
-cc18_small_datasets = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
-  dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
-  fread(dt_name)
-})
-# cc18_small_datasets
+options(mlr3oml.cache = here("benchmarks", "data", "oml"))
 
-# cc18_small_datasets[[1]]
+# define the tasks
+cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))
 
-# TODO: determine whether we can use OML tasks "directly"
-# didn't do this at first because they come with resamplings and we want to use our own resamplings
-kc1_1067 = as_task_classif(cc18_small_datasets[[1]], target = "defects")
-blood_1464 = as_task_classif(cc18_small_datasets[[2]], target = "Class")
+task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) tsk("oml", data_id = data_id))
 
-tasks = list(kc1_1067, blood_1464)
+task_list
 
 # define the learners
 mlp = lrn("classif.mlp",
@@ -88,15 +80,15 @@ fwrite(bmrdt, here("R", "rf_Use_case", "results", "bmrdt.csv"))
 
 # generate_permutations <- function(layers_search_space, neurons_search_space) {
 #   result <- list()
-  
+
 #   for (layers in layers_search_space) {
 #     # Generate all permutations with replacement
 #     perms <- expand.grid(replicate(layers, list(neurons_search_space), simplify = FALSE))
-    
+
 #     # Convert each row to a vector and add to the result
 #     result <- c(result, apply(perms, 1, as.numeric))
 #   }
-  
+
 #   return(result)
 # }
 

From 59030015981a1c11ffbffb1df556359ea4a4a7d0 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 17 Oct 2024 22:46:57 +0200
Subject: [PATCH 23/37] Copied in Sebastian's solution for tuning the neurons
 as a paramset

---
 benchmarks/rf_use_case/run_benchmark.R | 53 +++-----------------------
 1 file changed, 6 insertions(+), 47 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 1ff31750f..85de80378 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -17,15 +17,15 @@ task_list
 # define the learners
 mlp = lrn("classif.mlp",
   activation = nn_relu,
-  neurons = to_tune(
-    c(
-      10, 20,
-      c(10, 10), c(10, 20), c(20, 10), c(20, 20)
-    )
+  neurons = to_tune(ps(
+    n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
+    .extra_trafo = function(x, param_set) {
+      list(neurons = rep(x$latent, x$n_layers))
+    })
   ),
   batch_size = to_tune(16, 32, 64),
   p = to_tune(0.1, 0.9),
-  epochs = to_tune(upper = 1000L, internal = TRUE),
+  epochs = to_tune(upper = 100, internal = TRUE),
   validate = 0.3,
   measures_valid = msr("classif.acc"),
   patience = 10,
@@ -54,44 +54,3 @@ bmr = benchmark(design)
 bmrdt = as.data.table(bmr)
 
 fwrite(bmrdt, here("R", "rf_Use_case", "results", "bmrdt.csv"))
-
-  # define an optimization strategy: grid search
-
-  # define a search space: the parameters to tune over
-
-    # neurons
-
-    # batch size
-
-    # dropout rate
-
-    # epochs
-
-  # use something standard (e.g. accuracy) as the tuning measure
-
-  # use k-fold cross validation
-
-  # set a number of evaluations for the tuner
-
-# TODO: set up the tuning space for the neurons and layers
-
-# layers_search_space <- 1:5
-# neurons_search_space <- seq(10, 50, by = 10)
-
-# generate_permutations <- function(layers_search_space, neurons_search_space) {
-#   result <- list()
-
-#   for (layers in layers_search_space) {
-#     # Generate all permutations with replacement
-#     perms <- expand.grid(replicate(layers, list(neurons_search_space), simplify = FALSE))
-
-#     # Convert each row to a vector and add to the result
-#     result <- c(result, apply(perms, 1, as.numeric))
-#   }
-
-#   return(result)
-# }
-
-# permutations <- generate_permutations(layers_search_space, neurons_search_space)
-
-# head(permutations)

From 869aba2b45d7f8b7f72a64c418466968dfc1c8cd Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 17 Oct 2024 23:16:51 +0200
Subject: [PATCH 24/37] looks like benchmark code working

---
 benchmarks/rf_use_case/run_benchmark.R | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 85de80378..361870d45 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -1,8 +1,13 @@
 library(mlr3)
-library(data.table)
+library(mlr3learners)
+library(mlr3oml)
 library(mlr3torch)
+library(mlr3tuning)
+
 library(paradox)
 
+library(data.table)
+
 library(here)
 
 options(mlr3oml.cache = here("benchmarks", "data", "oml"))
@@ -23,7 +28,7 @@ mlp = lrn("classif.mlp",
       list(neurons = rep(x$latent, x$n_layers))
     })
   ),
-  batch_size = to_tune(16, 32, 64),
+  batch_size = to_tune(c(16, 32, 64)),
   p = to_tune(0.1, 0.9),
   epochs = to_tune(upper = 100, internal = TRUE),
   validate = 0.3,
@@ -43,14 +48,16 @@ at = auto_tuner(
 
 future::plan("multisession")
 
+lrn_rf = lrn("classif.ranger")
 design = benchmark_grid(
-  tasks,
-  learners = list(at, lrn("classif.ranger"),
+  task_list,
+  learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 10))
-)
 
-bmr = benchmark(design)
+bench::system_time(
+  bmr <- benchmark(design)
+)
 
 bmrdt = as.data.table(bmr)
 
-fwrite(bmrdt, here("R", "rf_Use_case", "results", "bmrdt.csv"))
+fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))

From ab3bedf5dcd82b16f189df28170e78b54057ef7d Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 00:07:01 +0200
Subject: [PATCH 25/37] Error: Inner tuning and parameter transformations are
 currently not supported.

---
 benchmarks/rf_use_case/run_benchmark.R | 31 ++++++++++++++++++--------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 361870d45..8dbad412b 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -3,11 +3,11 @@ library(mlr3learners)
 library(mlr3oml)
 library(mlr3torch)
 library(mlr3tuning)
+library(mlr3mbo)
+library(bbotk)
 
-library(paradox)
-
+library(bench)
 library(data.table)
-
 library(here)
 
 options(mlr3oml.cache = here("benchmarks", "data", "oml"))
@@ -28,25 +28,37 @@ mlp = lrn("classif.mlp",
       list(neurons = rep(x$latent, x$n_layers))
     })
   ),
-  batch_size = to_tune(c(16, 32, 64)),
+  batch_size = to_tune(c(16, 32, 64, 128, 256)),
   p = to_tune(0.1, 0.9),
-  epochs = to_tune(upper = 100, internal = TRUE),
+  epochs = to_tune(upper = 1000L, internal = TRUE),
   validate = 0.3,
   measures_valid = msr("classif.acc"),
   patience = 10,
   device = "cpu"
 )
 
+# define the optimizatio nstrategy
+bayesopt_ego = mlr_loop_functions$get("bayesopt_ego")
+surrogate = srlrn(lrn("regr.km", covtype = "matern5_2",
+  optim.method = "BFGS", control = list(trace = FALSE)))
+acq_function = acqf("ei")
+acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_ORIG_DIRECT"),
+  terminator = trm("stagnation", iters = 100, threshold = 1e-5))
+
 # define an AutoTuner that wraps the classif.mlp
 at = auto_tuner(
   learner = mlp,
-  tuner = tnr("grid_search"),
+  tuner = tnr("mbo",
+    loop_function = bayesopt_ego,
+    surrogate = surrogate,
+    acq_function = acq_function,
+    acq_optimizer = acq_optimizer),
   resampling = rsmp("cv"),
   measure = msr("classif.acc"),
-  term_evals = 10
+  term_evals = 100
 )
 
-future::plan("multisession")
+future::plan("multisession", workers = 8)
 
 lrn_rf = lrn("classif.ranger")
 design = benchmark_grid(
@@ -54,10 +66,11 @@ design = benchmark_grid(
   learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 10))
 
-bench::system_time(
+time = bench::system_time(
   bmr <- benchmark(design)
 )
 
 bmrdt = as.data.table(bmr)
 
 fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))
+fwrite(time, here("R", "rf_use_case", "results", "time.csv"))

From 31b396406afbbcdfbfb8197cfc7a4045a32cf68d Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 00:08:45 +0200
Subject: [PATCH 26/37] changed to grid search

---
 benchmarks/rf_use_case/run_benchmark.R | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 8dbad412b..a5dd05328 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -45,17 +45,19 @@ acq_function = acqf("ei")
 acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_ORIG_DIRECT"),
   terminator = trm("stagnation", iters = 100, threshold = 1e-5))
 
-# define an AutoTuner that wraps the classif.mlp
-at = auto_tuner(
-  learner = mlp,
-  tuner = tnr("mbo",
+tnr_mbo = tnr("mbo",
     loop_function = bayesopt_ego,
     surrogate = surrogate,
     acq_function = acq_function,
-    acq_optimizer = acq_optimizer),
+    acq_optimizer = acq_optimizer)
+
+# define an AutoTuner that wraps the classif.mlp
+at = auto_tuner(
+  learner = mlp,
+  tuner = tnr("grid_search"),
   resampling = rsmp("cv"),
   measure = msr("classif.acc"),
-  term_evals = 100
+  term_evals = 1000
 )
 
 future::plan("multisession", workers = 8)

From a4898971c1e933f71d1f17f33122b0cf7599b2fe Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 01:07:01 +0200
Subject: [PATCH 27/37] LLM-generated fn for neuron search space

---
 benchmarks/rf_use_case/run_benchmark.R | 39 ++++++++++++++++++--------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index a5dd05328..068234e35 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -20,14 +20,28 @@ task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures,
 task_list
 
 # define the learners
+neurons = function(n_layers, latent_dim) {
+  rep(latent_dim, n_layers)
+}
+
+n_layers_values <- 1:10
+latent_dim_values <- seq(10, 500, by = 10)
+neurons_search_space <- mapply(
+  neurons,
+  expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$n_layers,
+  expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$latent_dim,
+  SIMPLIFY = FALSE
+)
+
 mlp = lrn("classif.mlp",
   activation = nn_relu,
-  neurons = to_tune(ps(
-    n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
-    .extra_trafo = function(x, param_set) {
-      list(neurons = rep(x$latent, x$n_layers))
-    })
-  ),
+  # neurons = to_tune(ps(
+  #   n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
+  #   .extra_trafo = function(x, param_set) {
+  #     list(neurons = rep(x$latent, x$n_layers))
+  #   })
+  # ),
+  neurons = to_tune(neurons_search_space),
   batch_size = to_tune(c(16, 32, 64, 128, 256)),
   p = to_tune(0.1, 0.9),
   epochs = to_tune(upper = 1000L, internal = TRUE),
@@ -37,7 +51,7 @@ mlp = lrn("classif.mlp",
   device = "cpu"
 )
 
-# define the optimizatio nstrategy
+# define the optimization strategy
 bayesopt_ego = mlr_loop_functions$get("bayesopt_ego")
 surrogate = srlrn(lrn("regr.km", covtype = "matern5_2",
   optim.method = "BFGS", control = list(trace = FALSE)))
@@ -54,19 +68,20 @@ tnr_mbo = tnr("mbo",
 # define an AutoTuner that wraps the classif.mlp
 at = auto_tuner(
   learner = mlp,
-  tuner = tnr("grid_search"),
+  tuner = tnr_mbo,
   resampling = rsmp("cv"),
   measure = msr("classif.acc"),
-  term_evals = 1000
+  term_evals = 10
 )
 
-future::plan("multisession", workers = 8)
+future::plan("multisession", workers = 64)
 
 lrn_rf = lrn("classif.ranger")
 design = benchmark_grid(
   task_list,
   learners = list(at, lrn_rf),
-  resampling = rsmp("cv", folds = 10))
+  resampling = rsmp("cv", folds = 10)
+)
 
 time = bench::system_time(
   bmr <- benchmark(design)
@@ -75,4 +90,4 @@ time = bench::system_time(
 bmrdt = as.data.table(bmr)
 
 fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))
-fwrite(time, here("R", "rf_use_case", "results", "time.csv"))
+fwrite(time, here("R", "rf_use_case", "results", "time.csv"))
\ No newline at end of file

From 0073dccf9cc6f0216dbcf3ab8bbd9918f2d471ec Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 01:57:22 +0200
Subject: [PATCH 28/37] should work, test this on another machine

---
 benchmarks/rf_use_case/run_benchmark.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 068234e35..0c6ae7fb5 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -12,6 +12,8 @@ library(here)
 
 options(mlr3oml.cache = here("benchmarks", "data", "oml"))
 
+# when working on the GPU server, don't forget to activate the mamba environment with the torch installation
+
 # define the tasks
 cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))
 
@@ -71,10 +73,10 @@ at = auto_tuner(
   tuner = tnr_mbo,
   resampling = rsmp("cv"),
   measure = msr("classif.acc"),
-  term_evals = 10
+  term_evals = 1000
 )
 
-future::plan("multisession", workers = 64)
+future::plan("multisession", workers = 8)
 
 lrn_rf = lrn("classif.ranger")
 design = benchmark_grid(

From 10f344843f4f4744fad1b704b8a10b8db6000427 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 11:11:45 +0200
Subject: [PATCH 29/37] fjwoie

---
 benchmarks/rf_use_case/run_benchmark.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 0c6ae7fb5..8483db776 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -1,6 +1,7 @@
 library(mlr3)
 library(mlr3learners)
 library(mlr3oml)
+# pak::pkg_install("mlr-org/mlr3torch")
 library(mlr3torch)
 library(mlr3tuning)
 library(mlr3mbo)
@@ -26,8 +27,8 @@ neurons = function(n_layers, latent_dim) {
   rep(latent_dim, n_layers)
 }
 
-n_layers_values <- 1:10
-latent_dim_values <- seq(10, 500, by = 10)
+n_layers_values <- 1:5
+latent_dim_values <- seq(10, 200, by = 20)
 neurons_search_space <- mapply(
   neurons,
   expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$n_layers,
@@ -44,7 +45,7 @@ mlp = lrn("classif.mlp",
   #   })
   # ),
   neurons = to_tune(neurons_search_space),
-  batch_size = to_tune(c(16, 32, 64, 128, 256)),
+  batch_size = to_tune(c(16, 32)),
   p = to_tune(0.1, 0.9),
   epochs = to_tune(upper = 1000L, internal = TRUE),
   validate = 0.3,
@@ -82,7 +83,7 @@ lrn_rf = lrn("classif.ranger")
 design = benchmark_grid(
   task_list,
   learners = list(at, lrn_rf),
-  resampling = rsmp("cv", folds = 10)
+  resampling = rsmp("cv", folds = 3)
 )
 
 time = bench::system_time(

From b81c23b63eb2a287178e5df74fac7eca8bc6e994 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 13:38:15 +0200
Subject: [PATCH 30/37] encapsulated the learner for parallelization

---
 benchmarks/rf_use_case/run_benchmark.R | 39 +++++++++++++-------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 8483db776..c5d2ba6dd 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -45,47 +45,48 @@ mlp = lrn("classif.mlp",
   #   })
   # ),
   neurons = to_tune(neurons_search_space),
-  batch_size = to_tune(c(16, 32)),
-  p = to_tune(0.1, 0.9),
+  batch_size = to_tune(c(64, 128, 256)),
+  p = to_tune(0.1, 0.7),
   epochs = to_tune(upper = 1000L, internal = TRUE),
-  validate = 0.3,
+  validate = "test",
   measures_valid = msr("classif.acc"),
   patience = 10,
   device = "cpu"
 )
 
-# define the optimization strategy
-bayesopt_ego = mlr_loop_functions$get("bayesopt_ego")
-surrogate = srlrn(lrn("regr.km", covtype = "matern5_2",
-  optim.method = "BFGS", control = list(trace = FALSE)))
-acq_function = acqf("ei")
-acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_ORIG_DIRECT"),
-  terminator = trm("stagnation", iters = 100, threshold = 1e-5))
-
-tnr_mbo = tnr("mbo",
-    loop_function = bayesopt_ego,
-    surrogate = surrogate,
-    acq_function = acq_function,
-    acq_optimizer = acq_optimizer)
+mlp$encapsulate("callr", lrn("classif.featureless"))
 
 # define an AutoTuner that wraps the classif.mlp
 at = auto_tuner(
   learner = mlp,
-  tuner = tnr_mbo,
-  resampling = rsmp("cv"),
+  tuner = tnr("mbo"),
+  resampling = rsmp("cv", folds = 5),
   measure = msr("classif.acc"),
-  term_evals = 1000
+  term_evals = 10
 )
 
+# two ways to parallelize:
+# 1: inner resampling by the tuner
+# outer resampling by the benchmark
+# 8 "learners whose final performance will be compared" are evalua
+# each task, learner, resampling fold are independent
+# some parallelization frameworks will wait for all 8 in the first "batch" to finish before working on the next 8
+# TODO: change this to parallelize both inner and outer resamplings
 future::plan("multisession", workers = 8)
 
 lrn_rf = lrn("classif.ranger")
+
+options(mlr3.exec_random = FALSE)
+
+# ensure that first the autotuner runs
 design = benchmark_grid(
   task_list,
   learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 3)
 )
 
+design = design[order(mlr3misc::ids(learner)), ]
+
 time = bench::system_time(
   bmr <- benchmark(design)
 )

From 00b272fece09b1fb21e05aab1b83d0be460eac71 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Fri, 18 Oct 2024 13:39:23 +0200
Subject: [PATCH 31/37] comments

---
 benchmarks/rf_use_case/run_benchmark.R | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index c5d2ba6dd..7bd48f8b9 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -13,8 +13,6 @@ library(here)
 
 options(mlr3oml.cache = here("benchmarks", "data", "oml"))
 
-# when working on the GPU server, don't forget to activate the mamba environment with the torch installation
-
 # define the tasks
 cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))
 
@@ -65,26 +63,17 @@ at = auto_tuner(
   term_evals = 10
 )
 
-# two ways to parallelize:
-# 1: inner resampling by the tuner
-# outer resampling by the benchmark
-# 8 "learners whose final performance will be compared" are evalua
-# each task, learner, resampling fold are independent
-# some parallelization frameworks will wait for all 8 in the first "batch" to finish before working on the next 8
-# TODO: change this to parallelize both inner and outer resamplings
 future::plan("multisession", workers = 8)
 
 lrn_rf = lrn("classif.ranger")
 
 options(mlr3.exec_random = FALSE)
 
-# ensure that first the autotuner runs
 design = benchmark_grid(
   task_list,
   learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 3)
 )
-
 design = design[order(mlr3misc::ids(learner)), ]
 
 time = bench::system_time(

From 89a72f126ebcc35274bc1ea913493af3fb6e07fd Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Sun, 20 Oct 2024 20:53:40 +0200
Subject: [PATCH 32/37] added install script

---
 benchmarks/rf_use_case/Rplots.pdf         | Bin
 benchmarks/rf_use_case/install_packages.R |  11 +++
 benchmarks/rf_use_case/run_benchmark.R    |  30 ++------
 benchmarks/rf_use_case/single_task.R      |  85 ++++++++++++++++++++++
 4 files changed, 102 insertions(+), 24 deletions(-)
 create mode 100644 benchmarks/rf_use_case/Rplots.pdf
 create mode 100644 benchmarks/rf_use_case/install_packages.R
 create mode 100644 benchmarks/rf_use_case/single_task.R

diff --git a/benchmarks/rf_use_case/Rplots.pdf b/benchmarks/rf_use_case/Rplots.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/benchmarks/rf_use_case/install_packages.R b/benchmarks/rf_use_case/install_packages.R
new file mode 100644
index 000000000..9262c2a77
--- /dev/null
+++ b/benchmarks/rf_use_case/install_packages.R
@@ -0,0 +1,11 @@
+devtools::install_github("mlr-org/mlr3torch")
+devtools::install_github("mlr-org/mlr3tuning@fix/int-tune-trafo")
+
+# Package names
+packages = c("here", "mlr3oml", "tidytable", "mlr3", "mlr3learners", "mlr3tuning", "mlr3mbo", "bbotk", "bench", "data.table")
+
+# Install packages not yet installed
+installed_packages = packages %in% rownames(installed.packages())
+if (any(installed_packages == FALSE)) {
+  install.packages(packages[!installed_packages], repos = "https://ftp.fau.de/cran/")
+}
\ No newline at end of file
diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 7bd48f8b9..42280d929 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -1,7 +1,6 @@
 library(mlr3)
 library(mlr3learners)
 library(mlr3oml)
-# pak::pkg_install("mlr-org/mlr3torch")
 library(mlr3torch)
 library(mlr3tuning)
 library(mlr3mbo)
@@ -18,31 +17,14 @@ cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.c
 
 task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) tsk("oml", data_id = data_id))
 
-task_list
-
-# define the learners
-neurons = function(n_layers, latent_dim) {
-  rep(latent_dim, n_layers)
-}
-
-n_layers_values <- 1:5
-latent_dim_values <- seq(10, 200, by = 20)
-neurons_search_space <- mapply(
-  neurons,
-  expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$n_layers,
-  expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$latent_dim,
-  SIMPLIFY = FALSE
-)
-
 mlp = lrn("classif.mlp",
   activation = nn_relu,
-  # neurons = to_tune(ps(
-  #   n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
-  #   .extra_trafo = function(x, param_set) {
-  #     list(neurons = rep(x$latent, x$n_layers))
-  #   })
-  # ),
-  neurons = to_tune(neurons_search_space),
+  neurons = to_tune(ps(
+    n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
+    .extra_trafo = function(x, param_set) {
+      list(neurons = rep(x$latent, x$n_layers))
+    })
+  ),
   batch_size = to_tune(c(64, 128, 256)),
   p = to_tune(0.1, 0.7),
   epochs = to_tune(upper = 1000L, internal = TRUE),
diff --git a/benchmarks/rf_use_case/single_task.R b/benchmarks/rf_use_case/single_task.R
new file mode 100644
index 000000000..5dd830924
--- /dev/null
+++ b/benchmarks/rf_use_case/single_task.R
@@ -0,0 +1,85 @@
+library(mlr3)
+library(mlr3learners)
+library(mlr3oml)
+library(mlr3torch)
+library(mlr3tuning)
+library(mlr3mbo)
+library(bbotk)
+
+library(bench)
+library(data.table)
+library(here)
+
+options(mlr3oml.cache = here("benchmarks", "data", "oml"))
+
+# define the tasks
+cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))
+
+task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) tsk("oml", data_id = data_id))
+
+task_list
+
+# define the learners
+# neurons = function(n_layers, latent_dim) {
+#   rep(latent_dim, n_layers)
+# }
+
+# n_layers_values <- 1:5
+# latent_dim_values <- seq(10, 200, by = 20)
+# neurons_search_space <- mapply(
+#   neurons,
+#   expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$n_layers,
+#   expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$latent_dim,
+#   SIMPLIFY = FALSE
+# )
+
+mlp = lrn("classif.mlp",
+  activation = nn_relu,
+  neurons = to_tune(ps(
+    n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
+    .extra_trafo = function(x, param_set) {
+      list(neurons = rep(x$latent, x$n_layers))
+    })
+  ),
+  # neurons = to_tune(neurons_search_space),
+  batch_size = to_tune(c(64, 128, 256)),
+  p = to_tune(0.1, 0.7),
+  epochs = to_tune(upper = 1000L, internal = TRUE),
+  validate = "test",
+  measures_valid = msr("classif.acc"),
+  patience = 10,
+  device = "cpu"
+)
+
+mlp$encapsulate("callr", lrn("classif.featureless"))
+
+# define an AutoTuner that wraps the classif.mlp
+at = auto_tuner(
+  learner = mlp,
+  tuner = tnr("mbo"),
+  resampling = rsmp("cv", folds = 5),
+  measure = msr("classif.acc"),
+  term_evals = 10
+)
+
+future::plan("multisession", workers = 8)
+
+lrn_rf = lrn("classif.ranger")
+
+options(mlr3.exec_random = FALSE)
+
+design = benchmark_grid(
+  task_list[[1]],
+  learners = list(at, lrn_rf),
+  resampling = rsmp("cv", folds = 3)
+)
+design = design[order(mlr3misc::ids(learner)), ]
+
+time = bench::system_time(
+  bmr <- benchmark(design)
+)
+
+bmrdt = as.data.table(bmr)
+
+fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))
+fwrite(time, here("R", "rf_use_case", "results", "time.csv"))
\ No newline at end of file

From 52af8ed61b2c6447ea32a1a848ac8d214803e6e1 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Tue, 22 Oct 2024 08:50:50 +0200
Subject: [PATCH 33/37] looks ready to run. 100 evals of mbo

---
 benchmarks/rf_use_case/Rplots.pdf      | Bin 0 -> 3611 bytes
 benchmarks/rf_use_case/get_data.R      |   8 ++++++--
 benchmarks/rf_use_case/run_benchmark.R |  19 +++++++++++++------
 benchmarks/rf_use_case/view_results.R  |   8 ++++++--
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/benchmarks/rf_use_case/Rplots.pdf b/benchmarks/rf_use_case/Rplots.pdf
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..243ae234c229f0c1970ea3a5c8ad776e41425478 100644
GIT binary patch
literal 3611
zcmZ`+dpK0<9u^@}kz1NgEJCBO%(x9vF5{9*?#4aSSeV_InP%4Hl5r0qCn-^`3B_?F
z<Q65Nav8UjQbao<m!fE>&P;pn_I~zx&U&8ptmk{b-|zjs-?#pGUlm83r3PA42cZ%>
z89Norjvex$BhY{rKt1*=!pH~#n$sZyOrlb71Q-HPI75sUMhC5hL2GGg>+9;OAwYX-
z!2jKICBbBFmos2RC(wdPL<Zo%glSBe+vZ581~7>bowuu_sjZ3OVn1e4d5{|7;6VgL
z3E-)~{8R&guHiHY0L=+7flLhoKu1Cl!~nFp1<t@<4ZQX5^8al3y$-Y`1uy__z?aKF
zWl~^n8E6j$kO*efFu+@jyVTXv;k~|pBh&dw_Cqt#1%d%@kb7%fZYT`)MSzx6E|rlH
zcRpwl23xtnyqhvI;&E{2kg0SR8iDu&0U6F#W`Hi2@rT^M6!RdSVsG%Uxw#pE0R;eh
zyei*6?|`@Z^GK#-7@|{nLqHsKl0<}fq~H<mHGp;yB?t}%FuDeM0BA|#6>`61ZY>TX
zQUf5KCI-y?qalc}+`Oz%SC`S9N$eK=7RU2DQZDo#4|e9a-ri!Jp}7w&vcu-yIX)#l
zoGT6Ilu^0`gPMpbHqWS5N;$pBsd{cR5;uH;CMBD7IG_?aekXjf_sY1!YSi(97WT`a
zMb=0Y-?@!=!TWX|A~D;FJfvPvcO;)p4e}S>v~7t`fXp{RtEn;Dxn{|q!yox9jhrL7
zCezX+3Vd1LH*_eijqiKGg+@;$G@oO3+Vsb@Q6HqtBdjq#SEb(JQTA;e`#vq3J|)jM
zjC&^Iqq=wG=Nw4PU>TEOgzRSYO+rg=#`aE~HIQgc%kW3aOmA6h#I&js<Ke@JCY{tb
z)yT*rdVHw!W14RnE&*f>;&~J%PGc&ZZ^HV~(WODqqlYcoE!NT3<E1H~T$YSf^>Il&
zn<<(4n$5PKPVMN?64gJuXA=D^2k9yIn$8!<-td$P4?*5iI`9l@fYl1o2x&zPNBeGE
zIlp=`%P%2%0f|#sYyXUo%Gu6$vlOZu<s^lWr1^a3HaRwbP3rXr`BHARGDTS@<eMV>
zoF7c82koyI8}qT<RB&7koLS@)h(?<qjbuBwzFho`5Vo9&JrASJ{q!k;5v8~X5Bnx*
zU$(t1NLo`6{>GHwDZZiHnhajBz;&0}0d`q+-VY^{1#`?2P1Xu7uZgvPyEfXGb>{h+
zcz>1$e~IERCQ<TysD0vq$f<z+{fn%vomK*0dQ%5-vh&v2ao<?@2D@($cPdAx6RmKV
z1_`IW_m?-k_;mn23lo7^6WJL7$$Oh`-xF`(6Ppm=H$^3HKH-m^h?6?E2?xkX#nct<
zx)4|CzxTQjvrt?oUXQj5jwMd$V1+efyZtwhZF1HU&5u!ALiCvINI3m)hqQAN446NZ
zyLH;*oa`F`L343OxeHrV%+0L%rA_7vov+K4C(afH&UnwD;9r{fwwu=%It5E`q$7m)
zXo-}zdEFEz3)c8MV7H_v$h19L!eA9N<Ld8SqzOdsSV(YazceB4fjAUfCLXgx-0*6u
zmiW&5mS!1Y$|B;573U<b@smuRpOe3)(3oPKQTEVVO31<j?Pzyl>thA!)00=TirkCz
zip!mZ9rLaq9CjN<{fhV`Zf|+~YVSbb3?Hm+h|EjXw8R%)>EAt5Gb6r&<wy&~7ZKCY
z>ARxs<W!|=_-l|gvP0<xr;(j$gwkq{e;ba-&82Mbpx-=3M&Cx9+Hp$al;DAMR{Wkx
zn06a`)W_&Z^8N>I6JE5vG(1hR0eMH_)sDq9v5w`X?hnwKWu#ctmP6?UiYUo6k^dxE
zo9!cL6rJz4#M(C4hM6i$d`dp@fOa*eA<^To5zfd#PNh}F$vRM3Pc2qiF+_2GZ=TXi
zr4}Ibk!64w-N@7Kl^I2~KyiHUU8ll*9{V<<mZaSG52N6HK2Bk!$#@;t6<42sXIFU?
zh2c(Ef4Cvx_sIH8<pU?L64zqb^<Cm%Eyius(!kZgJiLaU`cd#VD=GDk<Vo|_PARB$
zR1k{jno#Zlxik(lhb?D4XR+JOwMzy2ok=CN)ZqA9HR+U;JeyjZe4De6Pe`lnE|MJ(
zn0dE?;Se}X&H*<KHwU-YS=A?cPxhBrJ$ZXQ{Q8F{Hp9=KSPs9sp*_qR9vZH{5qe{M
z$b^(lN;v-J*Y^Y1iVv`8NP4&2LhgBkr0KDd1l?YpJv#W{g(12A+>ybNk3)|K%LfgM
zKe=A{&wNo{d5iga!slzEZe8|Pg*E2W*ZPQwi1y5kzUu)i=u%jh`IV{C^r<PnNjsu5
z!YSf(#C*i+iU>#jtMb>~oEDCYkfD&Gkf2bdQ2q7~iR$fA?Y@a$6RXaqojsPOw~L81
z#<**XYW3+;bX$*`>7CH`3>@^Q_ev2TW9+qGhD3~R)2q?9MOUC@F-g6<dj#J$_a<rd
z?>w^8ELA8~)=pw5GpC&lrF#u(#A|HTXjU)qM0#|1ht$m0bAm&td6bvItgggklEE&)
zL&W8Z${dZSw>v*S_xb2E-dHhxlFgz7QM&415JQLZe^oekx2^T*xA^$t_=iQkgu#J(
z=z;2y4??M(2x3TJUZ~lDaD&Q|hHqC!M%z5!M?U)^BU5U7**3MiIJ|RtZTXk!vyHQj
zbe}oj63VuETzw10In*~a{Z#zAfEY_G-zu{!q3lLb`LroJ$Zy0?sY<l!DLLq~&1W_0
zgBR@P2dDazCMAD!nP-gS+4`gPPeLEFUr|qwt3}sFpZl)&U3Qb#rm>i}F*i)Q?p=&!
znO-XtG<{+!7S|hR(Ppejv-W@Z`CRkz_UWzDp{L{eWBTWdR_~{^XPss#q>G4ctrCge
zMUZsbF>p7g$^TJYmSB}qi&D4JA}$Aa_-aJ4bg|^sE5%2<!n!VWSraRd;XD0OhLR;8
zFuRCT#Zy-%%Vap}V~sSUwu433*BqrOi||xU^%0NU*0nW>4wYOLD~W+K=rbB;77dDW
zh5A+oV@m35%NocFnhTQo!c<}E`NhZJw1aY^>czdrHYe?0<s0Qp<~wJ6biKFJb!&bZ
zr);F8^~hHH7X_cyj8$8V1ukV|pK!GnFBMtG)M+ncl4^QtQfjVs=U$rk(NjT4=tAbB
zskfRtbVk|3F%NLJ{4kfNvRpEkRrL*5t@G*mT2%kW(U11sdq1+4@A}1hwfPQs7kb~k
z?GKksKjyggmzPvT;OD9|y^LEsslBmv33a67(WqKYbuh9~Gtk!|!+qfL)#^`CbKa@a
zDHa|2dJW|3FOr|5o?#nE3#H}u3btV?GPaer`)$UlZz-*77klv+kFuVhuZ3>*6p`8P
zJ?^hZ=3b1|??&M@*`UG;`CBavVWQQe^h*aWy}#o~z&q8a-n6}C>xJ|g^@SYk>SkF?
zn@`qf@%E=G1LsShKWz?gu6qCB*^qmg8{TV`Qsuwc8*!J@Csdr6!pdeDtjp_J>VJ;z
z<uo-OF1}Iwu085?)VtVZ0akB%Z?DE>4XBb<agF)W|9)Hdcv$q%%T96ywXFV<Jb#jM
zl6>@)wfRr&rb(y%bu+z|6}09XB3xH|O>)-fmBEvXgDUgsbMgm&39GH|Dk+;f)xEf?
zFl6k`@cAulROJ4dtC9sr-WKE@yvUq=_wLOQvBfrGG53ryqY}JBT_X37E>B0Y^WLq#
zKNSAz(?<2yKACjC0N?jbZ)i6d_o51?gx3`eg?0-aNRmrx0uT5uAoKMjCsWoh4&hs>
z@|$<k<_8~cWW1hpdP;p|-Jsa=sQJr`JEwBGo<U)cc`{^J^>6YATT<39ugXU0e69AK
z1&j~Q8XGS8x5XbIj#ckH1bttLx-+Brh0u}Ehg;tB!m3M^wHG|(w-&Z>cqB8YtmDxB
z?>_52Z$?A!hHYT9qz~wR^I6##s=9LY`rT*W2N$44_}R)=JhCd#@6sKoXvT!bT*&w4
z>`+PV#>MKz{fDxnX1?rN`LIxy=1>qNu$r*E&NbkFxWzxLxc*NYiul8dTTr;Zl0@Np
zah`4cYlSCKOeqY~p8x{HkpcrDIz%CI4ZAm>qYH+RC`<-mpa-5{QelV;1;Plljt&?=
z<wgbug9Ly<bizrl$0ZP%JgZA2(TU8EKr$2tfG~;7^|D|Hfk>xP0Pq+c;uUf`2}B}9
zfe~N;i93oxVgR5fg$f5ifdEMVA(F_o`(!fVZ+I}15=5XgL&yXs41m-iDg`>uoksY<
z%-}h9-@j#%Fgg^7&;qast)CB|r>m>23j_i`F$_09@g9Kk6GQ89L&-ld44Rv4{)u4?
z{s+@G;6{^w)ahXUhff=g#{N|Y(+MOpMCV300CXWmKmZm14^slE0597)1E4dN3In`k
Z^&^QeU;-WfaW-vksza!#SU6ZB{tK?kF4h15

literal 0
HcmV?d00001

diff --git a/benchmarks/rf_use_case/get_data.R b/benchmarks/rf_use_case/get_data.R
index 77eba492d..5d39c67c1 100644
--- a/benchmarks/rf_use_case/get_data.R
+++ b/benchmarks/rf_use_case/get_data.R
@@ -13,8 +13,12 @@ cc18_small = cc18_simple |>
   select(data_id, name, NumberOfFeatures, NumberOfInstances) |>
   filter(name %in% c("qsar-biodeg", "madelon", "kc1", "blood-transfusion-service-center", "climate-model-simulation-crashes"))
 
-cache_dir = here("benchmarks", "data", "oml")
-options(mlr3oml.cache = here("benchmarks", "data", "oml"))
+data_dir =  here("benchmarks", "data")
+if (!dir.exists(data_dir)) {
+  dir.create(data_dir)
+}
+
+options(mlr3oml.cache = here(data_dir, "oml"))
 mlr3misc::pwalk(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) odt(data_id))
 
 dir.create(here("benchmarks", "data", "oml", "collections"))
diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 42280d929..76ac11349 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -27,10 +27,10 @@ mlp = lrn("classif.mlp",
   ),
   batch_size = to_tune(c(64, 128, 256)),
   p = to_tune(0.1, 0.7),
-  epochs = to_tune(upper = 1000L, internal = TRUE),
+  epochs = to_tune(lower = 1, upper = 500L, internal = TRUE),
   validate = "test",
   measures_valid = msr("classif.acc"),
-  patience = 10,
+  patience = 5,
   device = "cpu"
 )
 
@@ -42,7 +42,7 @@ at = auto_tuner(
   tuner = tnr("mbo"),
   resampling = rsmp("cv", folds = 5),
   measure = msr("classif.acc"),
-  term_evals = 10
+  term_evals = 100
 )
 
 future::plan("multisession", workers = 8)
@@ -53,7 +53,8 @@ options(mlr3.exec_random = FALSE)
 
 design = benchmark_grid(
   task_list,
-  learners = list(at, lrn_rf),
+  # learners = list(at, lrn_rf),
+  learners = lrn_rf,
   resampling = rsmp("cv", folds = 3)
 )
 design = design[order(mlr3misc::ids(learner)), ]
@@ -64,5 +65,11 @@ time = bench::system_time(
 
 bmrdt = as.data.table(bmr)
 
-fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))
-fwrite(time, here("R", "rf_use_case", "results", "time.csv"))
\ No newline at end of file
+bmr$aggregate()[, .(task_id, learner_id, classif.ce)]
+
+results_dir = here("benchmarks", "rf_use_case", "results")
+if (!dir.exists(results_dir)) {
+  dir.create(results_dir)
+}
+fwrite(bmr$aggregate()[, .(task_id, learner_id, classif.ce)], here(results_dir, "bmr_ce.csv"))
+fwrite(as.data.table(as.list(time)), here(results_dir, "time.csv"))
diff --git a/benchmarks/rf_use_case/view_results.R b/benchmarks/rf_use_case/view_results.R
index b179b9d33..8b3da5f4b 100644
--- a/benchmarks/rf_use_case/view_results.R
+++ b/benchmarks/rf_use_case/view_results.R
@@ -3,6 +3,10 @@ library(mlr3)
 
 library(here)
 
-bmrdt = fread(here("R", "rf_Use_case", "results", "bmrdt.csv"))
+bmr_ce = fread(here("benchmarks", "rf_use_case", "results", "bmr_ce.csv"))
 
-bmrdt
\ No newline at end of file
+bmr_ce
+
+time = fread(here("benchmarks", "rf_use_case", "results", "time.csv"))
+
+time

From 95f0a456db5ca9d7b436ca2a6a08a16abceb8bc3 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Tue, 22 Oct 2024 09:08:42 +0200
Subject: [PATCH 34/37] addoed surrogate learner for mbo

---
 benchmarks/rf_use_case/run_benchmark.R | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 76ac11349..02295156f 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -36,10 +36,15 @@ mlp = lrn("classif.mlp",
 
 mlp$encapsulate("callr", lrn("classif.featureless"))
 
+surrogate = srlrn(as_learner(po("imputesample", affect_columns = selector_type("logical")) %>>%
+  po("imputeoor", multiplier = 3, affect_columns = selector_type(c("integer", "numeric", "character", "factor", "ordered"))) %>>%
+  po("colapply", applicator = as.factor, affect_columns = selector_type("character")) %>>%
+  lrn("regr.ranger")), catch_errors = TRUE)
+
 # define an AutoTuner that wraps the classif.mlp
 at = auto_tuner(
   learner = mlp,
-  tuner = tnr("mbo"),
+  tuner = tnr("mbo", surrogate = surrogate),
   resampling = rsmp("cv", folds = 5),
   measure = msr("classif.acc"),
   term_evals = 100
@@ -53,8 +58,7 @@ options(mlr3.exec_random = FALSE)
 
 design = benchmark_grid(
   task_list,
-  # learners = list(at, lrn_rf),
-  learners = lrn_rf,
+  learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 3)
 )
 design = design[order(mlr3misc::ids(learner)), ]

From 5c0a4473eae012fa2ab67dccf34800c8e1e9de34 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 6 Feb 2025 16:53:37 +0100
Subject: [PATCH 35/37] Delete R/CallbackSetTB.R

---
 R/CallbackSetTB.R | 85 -----------------------------------------------
 1 file changed, 85 deletions(-)
 delete mode 100644 R/CallbackSetTB.R

diff --git a/R/CallbackSetTB.R b/R/CallbackSetTB.R
deleted file mode 100644
index 8c2a8351b..000000000
--- a/R/CallbackSetTB.R
+++ /dev/null
@@ -1,85 +0,0 @@
-#' @title TensorBoard Logging Callback
-#'
-#' @name mlr_callback_set.tb
-#'
-#' @description
-#' Logs training loss, training measures, and validation measures as events.
-#' To view them, use TensorBoard with `tensorflow::tensorboard()` (requires `tensorflow`) or the CLI.
-#' @details
-#' Logs events at most every epoch.
-#'
-#' @param path (`character(1)`)\cr
-#'   The path to a folder where the events are logged.
-#'   Point TensorBoard to this folder to view them.
-#' @param log_train_loss (`logical(1)`)\cr
-#'  Whether we log the training loss.
-#' @family Callback
-#' @export
-#' @include CallbackSet.R
-CallbackSetTB = R6Class("CallbackSetTB",
-  inherit = CallbackSet,
-  lock_objects = FALSE,
-  public = list(
-    #' @description
-    #' Creates a new instance of this [R6][R6::R6Class] class.
-    initialize = function(path, log_train_loss) {
-      self$path = assert_path_for_output(path)
-      if (!dir.exists(path)) {
-        dir.create(path, recursive = TRUE)
-      }
-      self$log_train_loss = assert_logical(log_train_loss)
-    },
-    #' @description
-    #' Logs the training loss, training measures, and validation measures as TensorFlow events.
-    on_epoch_end = function() {
-      if (self$log_train_loss) {
-        private$.log_train_loss()
-      }
-
-      if (length(self$ctx$last_scores_train)) {
-        map(names(self$ctx$measures_train), private$.log_train_score)
-      }
-
-      if (length(self$ctx$last_scores_valid)) {
-        map(names(self$ctx$measures_valid), private$.log_valid_score)
-      }
-    }
-  ),
-  private = list(
-    .log_score = function(prefix, measure_name, score) {
-      event_list = list(score, self$ctx$epoch)
-      names(event_list) = c(paste0(prefix, measure_name), "step")
-
-      with_logdir(self$path, {
-        do.call(log_event, event_list)
-      })
-    },
-    .log_valid_score = function(measure_name) {
-      valid_score = self$ctx$last_scores_valid[[measure_name]]
-      private$.log_score("valid.", measure_name, valid_score)
-    },
-    .log_train_score = function(measure_name) {
-      train_score = self$ctx$last_scores_train[[measure_name]]
-      private$.log_score("train.", measure_name, train_score)
-    },
-    .log_train_loss = function() {
-      with_logdir(self$path, {
-        log_event(train.loss = self$ctx$last_loss)
-      })
-    }
-  )
-)
-
-#' @include TorchCallback.R
-mlr3torch_callbacks$add("tb", function() {
-  TorchCallback$new(
-    callback_generator = CallbackSetTB,
-    param_set = ps(
-      path           = p_uty(tags = c("train", "required")),
-      log_train_loss = p_lgl(tags = c("train", "required"))
-    ),
-    id = "tb",
-    label = "TensorBoard",
-    man = "mlr3torch::mlr_callback_set.tb"
-  )
-})

From c384529eeaff79d8a4043bca9f186f9004009a46 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 6 Feb 2025 16:54:13 +0100
Subject: [PATCH 36/37] Delete tests/testthat/test_CallbackSetTB.R

---
 tests/testthat/test_CallbackSetTB.R | 131 ----------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 tests/testthat/test_CallbackSetTB.R

diff --git a/tests/testthat/test_CallbackSetTB.R b/tests/testthat/test_CallbackSetTB.R
deleted file mode 100644
index 8a894ec46..000000000
--- a/tests/testthat/test_CallbackSetTB.R
+++ /dev/null
@@ -1,131 +0,0 @@
-library(tfevents)
-
-event_tag_is = function(event, tag_name) {
-  ifelse(is.null(event), FALSE, event["tag"] == tag_name)
-}
-
-test_that("autotest", {
-  cb = t_clbk("tb", path = tempfile(), log_train_loss = TRUE)
-  expect_torch_callback(cb, check_man = TRUE)
-})
-
-# TODO: investigate what's happening when there is only a single epoch (why don't we log anything?)
-test_that("a simple example works", {
-  cb = t_clbk("tb")
-
-  task = tsk("iris")
-  n_epochs = 10
-  batch_size = 50
-  neurons = 200
-
-  pth0 = tempfile()
-
-  log_train_loss = TRUE
-
-  mlp = lrn("classif.mlp",
-            callbacks = cb,
-            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
-            validate = 0.2,
-            measures_valid = msrs(c("classif.acc", "classif.ce")),
-            measures_train = msrs(c("classif.acc", "classif.ce"))
-  )
-  mlp$param_set$set_values(cb.tb.path = pth0)
-
-  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
-
-  mlp$train(task)
-
-  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
-
-  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
-  n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
-  n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
-  n_valid_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.acc")))
-  n_valid_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.ce")))
-
-  # TODO: refactor to expect a specific ordering of the events list, not just the right counts
-  expect_equal(n_train_loss_events, n_epochs)
-  expect_equal(n_train_acc_events, n_epochs)
-  expect_equal(n_train_ce_events, n_epochs)
-  expect_equal(n_valid_acc_events, n_epochs)
-  expect_equal(n_valid_ce_events, n_epochs)
-})
-
-test_that("eval_freq works", {
-  cb = t_clbk("tb")
-
-  task = tsk("iris")
-  n_epochs = 9
-  batch_size = 50
-  neurons = 200
-  eval_freq = 4
-
-  pth0 = tempfile()
-
-  log_train_loss = TRUE
-
-  mlp = lrn("classif.mlp",
-            callbacks = cb,
-            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
-            validate = 0.2,
-            measures_valid = msrs(c("classif.acc", "classif.ce")),
-            measures_train = msrs(c("classif.acc", "classif.ce")),
-            eval_freq = eval_freq
-  )
-  mlp$param_set$set_values(cb.tb.path = pth0)
-  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
-
-  mlp$train(task)
-
-  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
-
-  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
-  n_train_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.acc")))
-  n_train_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.classif.ce")))
-  n_valid_acc_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.acc")))
-  n_valid_ce_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "valid.classif.ce")))
-
-  expect_equal(n_train_loss_events, n_epochs)
-  expect_equal(n_train_acc_events, ceiling(n_epochs / eval_freq))
-  expect_equal(n_train_ce_events, ceiling(n_epochs / eval_freq))
-  expect_equal(n_valid_acc_events, ceiling(n_epochs / eval_freq))
-  expect_equal(n_valid_ce_events, ceiling(n_epochs / eval_freq))
-})
-
-test_that("the flag for tracking the train loss works", {
-  cb = t_clbk("tb")
-
-  task = tsk("iris")
-  n_epochs = 10
-  batch_size = 50
-  neurons = 200
-
-  log_train_loss = FALSE
-
-  pth0 = tempfile()
-
-  mlp = lrn("classif.mlp",
-            callbacks = cb,
-            epochs = n_epochs, batch_size = batch_size, neurons = neurons,
-            validate = 0.2,
-            measures_valid = msrs(c("classif.acc", "classif.ce")),
-            measures_train = msrs(c("classif.acc", "classif.ce"))
-  )
-  mlp$param_set$set_values(cb.tb.path = pth0)
-  mlp$param_set$set_values(cb.tb.log_train_loss = log_train_loss)
-
-  mlp$train(task)
-
-  events = mlr3misc::map(collect_events(pth0)$summary, unlist)
-
-  n_train_loss_events = sum(unlist(mlr3misc::map(events, event_tag_is, tag_name = "train.loss")))
-
-  expect_equal(n_train_loss_events, 0)
-})
-
-test_that("throws an error when using existing directory", {
-  path = tempfile()
-  dir.create(path)
-  cb = t_clbk("tb", path = path, log_train_loss = TRUE)
-  expect_error(cb$generate(), "already exists")
-})

From c01c531d7faa0e5a676a23981b805e2e634ddd31 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 6 Feb 2025 17:15:24 +0100
Subject: [PATCH 37/37] update benchmark

---
 benchmarks/rf_use_case/run_benchmark.R | 67 ++++++++++----------------
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/benchmarks/rf_use_case/run_benchmark.R b/benchmarks/rf_use_case/run_benchmark.R
index 02295156f..cc2c71cd2 100644
--- a/benchmarks/rf_use_case/run_benchmark.R
+++ b/benchmarks/rf_use_case/run_benchmark.R
@@ -1,37 +1,25 @@
-library(mlr3)
-library(mlr3learners)
+library(mlr3verse)
 library(mlr3oml)
 library(mlr3torch)
-library(mlr3tuning)
+library(mlr3batchmark)
 library(mlr3mbo)
-library(bbotk)
-
-library(bench)
-library(data.table)
-library(here)
-
-options(mlr3oml.cache = here("benchmarks", "data", "oml"))
-
-# define the tasks
-cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))
+library(mlr3tuning)
 
-task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) tsk("oml", data_id = data_id))
+ids = c(1067, 1464, 1485, 1494, 40994)
+task_list = lapply(ids, function(id) tsk("oml", data_id = id))
 
 mlp = lrn("classif.mlp",
   activation = nn_relu,
-  neurons = to_tune(ps(
-    n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
-    .extra_trafo = function(x, param_set) {
-      list(neurons = rep(x$latent, x$n_layers))
-    })
-  ),
+  n_layers = to_tune(lower = 1, upper = 10),
+  neurons = to_tune(p_int(lower = 10, upper = 1000)),
   batch_size = to_tune(c(64, 128, 256)),
-  p = to_tune(0.1, 0.7),
-  epochs = to_tune(lower = 1, upper = 500L, internal = TRUE),
+  p = to_tune(0.1, 0.9),
+  epochs = to_tune(lower = 1, upper = 1000L, internal = TRUE),
   validate = "test",
-  measures_valid = msr("classif.acc"),
-  patience = 5,
-  device = "cpu"
+  measures_valid = msr("classif.logloss"),
+  patience = 10,
+  device = "auto",
+  predict_type = "prob"
 )
 
 mlp$encapsulate("callr", lrn("classif.featureless"))
@@ -46,34 +34,29 @@ at = auto_tuner(
   learner = mlp,
   tuner = tnr("mbo", surrogate = surrogate),
   resampling = rsmp("cv", folds = 5),
-  measure = msr("classif.acc"),
-  term_evals = 100
+  measure = msr("internal_valid_score", minimize = TRUE),
+  term_evals = 1
 )
 
-future::plan("multisession", workers = 8)
-
 lrn_rf = lrn("classif.ranger")
 
-options(mlr3.exec_random = FALSE)
-
 design = benchmark_grid(
   task_list,
   learners = list(at, lrn_rf),
   resampling = rsmp("cv", folds = 3)
 )
-design = design[order(mlr3misc::ids(learner)), ]
 
-time = bench::system_time(
-  bmr <- benchmark(design)
+design1 = benchmark_grid(
+  task_list[[1]],
+  learners = list(at, lrn_rf),
+  resampling = rsmp("holdout")
 )
 
-bmrdt = as.data.table(bmr)
+benchmark(design1)
 
-bmr$aggregate()[, .(task_id, learner_id, classif.ce)]
+reg = makeExperimentRegistry(
+  file.dir = here("benchmarks", "rf_use_case", "reg"),
+  packages = c("mlr3verse", "mlr3oml", "mlr3torch", "batchmark")
+)
 
-results_dir = here("benchmarks", "rf_use_case", "results")
-if (!dir.exists(results_dir)) {
-  dir.create(results_dir)
-}
-fwrite(bmr$aggregate()[, .(task_id, learner_id, classif.ce)], here(results_dir, "bmr_ce.csv"))
-fwrite(as.data.table(as.list(time)), here(results_dir, "time.csv"))
+batchmark(design)