Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2380913
TODO: write tests
cxzhang4 Sep 10, 2024
86f87c8
name -> TB. began refactoring based on last meeting with Sebastian
cxzhang4 Sep 22, 2024
400ed74
slight description change
cxzhang4 Oct 2, 2024
9e6acd8
removed extraneous comments
cxzhang4 Oct 2, 2024
fc4f2fa
added n_last_loss frequency test
cxzhang4 Oct 2, 2024
81d1ded
in progress
cxzhang4 Oct 10, 2024
cb03eb3
autotest working, accidentally used the wrong callback_generator
cxzhang4 Oct 10, 2024
78b95a5
simple and eval_freq tests pass
cxzhang4 Oct 11, 2024
a365757
changed logging methods to private
cxzhang4 Oct 11, 2024
43a8ffb
removed magrittr pipe from tests
cxzhang4 Oct 11, 2024
6b9a845
added details for callback class
cxzhang4 Oct 11, 2024
d354b2c
formatting
cxzhang4 Oct 11, 2024
b5b27b1
built docs
cxzhang4 Oct 11, 2024
565456b
Merge branch 'main' into feat/tflog-callback
cxzhang4 Oct 11, 2024
7c9f431
all tests pass, I think this is parity with the previous broken commi…
cxzhang4 Oct 11, 2024
c6c9333
implemented step logging
cxzhang4 Oct 11, 2024
43e7396
removed extraneous comments
cxzhang4 Oct 11, 2024
ec5d8fc
added tensorboard instructions
cxzhang4 Oct 11, 2024
f26a254
passes R CMD Check, minimally addresses every comment in the previous PR
cxzhang4 Oct 11, 2024
a86c946
moved newest news to bottom
cxzhang4 Oct 13, 2024
3652fe6
init
cxzhang4 Oct 15, 2024
92b4ffc
Update benchmarks/rf_use_case/run_benchmark.R
cxzhang4 Oct 15, 2024
f821e09
use mlr3oml cache
cxzhang4 Oct 17, 2024
5903001
Copied in Sebastian's solution for tuning the neurons as a paramset
cxzhang4 Oct 17, 2024
869aba2
looks like benchmark code working
cxzhang4 Oct 17, 2024
ab3bedf
Error: Inner tuning and parameter transformations are currently not s…
cxzhang4 Oct 17, 2024
31b3964
changed to grid search
cxzhang4 Oct 17, 2024
a489897
LLM-generated fn for neuron search space
cxzhang4 Oct 17, 2024
0073dcc
should work, test this on another machine
cxzhang4 Oct 17, 2024
10f3448
fjwoie
cxzhang4 Oct 18, 2024
b81c23b
encapsulated the learner for parallelization
cxzhang4 Oct 18, 2024
00b272f
comments
cxzhang4 Oct 18, 2024
89a72f1
added install script
cxzhang4 Oct 20, 2024
52af8ed
looks ready to run. 100 evals of mbo
cxzhang4 Oct 22, 2024
95f0a45
addoed surrogate learner for mbo
cxzhang4 Oct 22, 2024
5c0a447
Delete R/CallbackSetTB.R
sebffischer Feb 6, 2025
c384529
Delete tests/testthat/test_CallbackSetTB.R
sebffischer Feb 6, 2025
ee3f51d
merge main
sebffischer Feb 6, 2025
c01c531
update benchmark
sebffischer Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ inst/doc
/doc/
/Meta/
CRAN-SUBMISSION
benchmarks/data
paper/data
.idea/
.vsc/
paper/data
paper/data
Binary file added benchmarks/rf_use_case/Rplots.pdf
Binary file not shown.
25 changes: 25 additions & 0 deletions benchmarks/rf_use_case/get_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
library(here)
library(mlr3oml)
library(tidytable)

cc18_collection = ocl(99)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cc18_collection = ocl(99)
options(mlr3oml.cache = TRUE)
cc18_collection = ocl(99)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also add this to your .Rprofile


cc18_simple = list_oml_data(data_id = cc18_collection$data_ids,
number_classes = 2,
number_missing_values = 0)

cc18_small = cc18_simple |>
filter(NumberOfSymbolicFeatures == 1) |> # the target class is a symbolic feature
select(data_id, name, NumberOfFeatures, NumberOfInstances) |>
filter(name %in% c("qsar-biodeg", "madelon", "kc1", "blood-transfusion-service-center", "climate-model-simulation-crashes"))

data_dir = here("benchmarks", "data")
if (!dir.exists(data_dir)) {
dir.create(data_dir)
}

options(mlr3oml.cache = here(data_dir, "oml"))
mlr3misc::pwalk(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) odt(data_id))

dir.create(here("benchmarks", "data", "oml", "collections"))
fwrite(cc18_small, here("benchmarks", "data", "oml", "collections", "cc18_small.csv"))
11 changes: 11 additions & 0 deletions benchmarks/rf_use_case/install_packages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
devtools::install_github("mlr-org/mlr3torch")
devtools::install_github("mlr-org/mlr3tuning@fix/int-tune-trafo")

# Package names
packages = c("here", "mlr3oml", "tidytable", "mlr3", "mlr3learners", "mlr3tuning", "mlr3mbo", "bbotk", "bench", "data.table")

# Install packages not yet installed
installed_packages = packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages], repos = "https://ftp.fau.de/cran/")
}
62 changes: 62 additions & 0 deletions benchmarks/rf_use_case/run_benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
library(mlr3verse)
library(mlr3oml)
library(mlr3torch)
library(mlr3batchmark)
library(mlr3mbo)
library(mlr3tuning)

ids = c(1067, 1464, 1485, 1494, 40994)
task_list = lapply(ids, function(id) tsk("oml", data_id = id))

mlp = lrn("classif.mlp",
activation = nn_relu,
n_layers = to_tune(lower = 1, upper = 10),
neurons = to_tune(p_int(lower = 10, upper = 1000)),
batch_size = to_tune(c(64, 128, 256)),
p = to_tune(0.1, 0.9),
epochs = to_tune(lower = 1, upper = 1000L, internal = TRUE),
validate = "test",
measures_valid = msr("classif.logloss"),
patience = 10,
device = "auto",
predict_type = "prob"
)

mlp$encapsulate("callr", lrn("classif.featureless"))

surrogate = srlrn(as_learner(po("imputesample", affect_columns = selector_type("logical")) %>>%
po("imputeoor", multiplier = 3, affect_columns = selector_type(c("integer", "numeric", "character", "factor", "ordered"))) %>>%
po("colapply", applicator = as.factor, affect_columns = selector_type("character")) %>>%
lrn("regr.ranger")), catch_errors = TRUE)

# define an AutoTuner that wraps the classif.mlp
at = auto_tuner(
learner = mlp,
tuner = tnr("mbo", surrogate = surrogate),
resampling = rsmp("cv", folds = 5),
measure = msr("internal_valid_score", minimize = TRUE),
term_evals = 1
)

lrn_rf = lrn("classif.ranger")

design = benchmark_grid(
task_list,
learners = list(at, lrn_rf),
resampling = rsmp("cv", folds = 3)
)

design1 = benchmark_grid(
task_list[[1]],
learners = list(at, lrn_rf),
resampling = rsmp("holdout")
)

benchmark(design1)

reg = makeExperimentRegistry(
file.dir = here("benchmarks", "rf_use_case", "reg"),
packages = c("mlr3verse", "mlr3oml", "mlr3torch", "batchmark")
)

batchmark(design)
85 changes: 85 additions & 0 deletions benchmarks/rf_use_case/single_task.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
library(mlr3)
library(mlr3learners)
library(mlr3oml)
library(mlr3torch)
library(mlr3tuning)
library(mlr3mbo)
library(bbotk)

library(bench)
library(data.table)
library(here)

options(mlr3oml.cache = here("benchmarks", "data", "oml"))

# define the tasks
cc18_small = fread(here(getOption("mlr3oml.cache"), "collections", "cc18_small.csv"))

task_list = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) tsk("oml", data_id = data_id))

task_list

# define the learners
# neurons = function(n_layers, latent_dim) {
# rep(latent_dim, n_layers)
# }

# n_layers_values <- 1:5
# latent_dim_values <- seq(10, 200, by = 20)
# neurons_search_space <- mapply(
# neurons,
# expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$n_layers,
# expand.grid(n_layers = n_layers_values, latent_dim = latent_dim_values)$latent_dim,
# SIMPLIFY = FALSE
# )

mlp = lrn("classif.mlp",
activation = nn_relu,
neurons = to_tune(ps(
n_layers = p_int(lower = 1, upper = 10), latent = p_int(10, 500),
.extra_trafo = function(x, param_set) {
list(neurons = rep(x$latent, x$n_layers))
})
),
# neurons = to_tune(neurons_search_space),
batch_size = to_tune(c(64, 128, 256)),
p = to_tune(0.1, 0.7),
epochs = to_tune(upper = 1000L, internal = TRUE),
validate = "test",
measures_valid = msr("classif.acc"),
patience = 10,
device = "cpu"
)

mlp$encapsulate("callr", lrn("classif.featureless"))

# define an AutoTuner that wraps the classif.mlp
at = auto_tuner(
learner = mlp,
tuner = tnr("mbo"),
resampling = rsmp("cv", folds = 5),
measure = msr("classif.acc"),
term_evals = 10
)

future::plan("multisession", workers = 8)

lrn_rf = lrn("classif.ranger")

options(mlr3.exec_random = FALSE)

design = benchmark_grid(
task_list[[1]],
learners = list(at, lrn_rf),
resampling = rsmp("cv", folds = 3)
)
design = design[order(mlr3misc::ids(learner)), ]

time = bench::system_time(
bmr <- benchmark(design)
)

bmrdt = as.data.table(bmr)

fwrite(bmrdt, here("R", "rf_use_case", "results", "bmrdt.csv"))
fwrite(time, here("R", "rf_use_case", "results", "time.csv"))
12 changes: 12 additions & 0 deletions benchmarks/rf_use_case/view_results.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
library(data.table)
library(mlr3)

library(here)

bmr_ce = fread(here("benchmarks", "rf_use_case", "results", "bmr_ce.csv"))

bmr_ce

time = fread(here("benchmarks", "rf_use_case", "results", "time.csv"))

time
Loading