Skip to content

Commit 3652fe6

Browse files
committed
init
1 parent a86c946 commit 3652fe6

File tree

4 files changed

+142
-0
lines changed

4 files changed

+142
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ inst/doc
1414
/doc/
1515
/Meta/
1616
CRAN-SUBMISSION
17+
benchmarks/data

benchmarks/rf_use_case/get_data.R

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
library(here)
2+
3+
library(mlr3oml)
4+
library(data.table)
5+
library(tidytable)
6+
7+
cc18_collection = ocl(99)
8+
9+
cc18_simple = list_oml_data(data_id = cc18_collection$data_ids,
10+
number_classes = 2,
11+
number_missing_values = 0)
12+
13+
cc18_small = cc18_simple |>
14+
filter(NumberOfSymbolicFeatures == 1) |>
15+
select(data_id, name, NumberOfFeatures, NumberOfInstances) |>
16+
filter(name %in% c("qsar-biodeg", "madelon", "kc1", "blood-transfusion-service-center", "climate-model-simulation-crashes"))
17+
18+
# kc1_1067 = odt(1067)
19+
20+
21+
# save the data locally
22+
mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
23+
dt = odt(data_id)$data
24+
dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
25+
fwrite(dt, file = dt_name)
26+
})
27+
28+
fwrite(cc18_small, here("data", "oml", "cc18_small.csv"))
+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
library(mlr3)
2+
library(data.table)
3+
library(mlr3torch)
4+
library(paradox)
5+
6+
library(here)
7+
8+
# define the tasks
9+
cc18_small = fread(here("data", "oml", "cc18_small.csv"))
10+
cc18_small_datasets = mlr3misc::pmap(cc18_small, function(data_id, name, NumberOfFeatures, NumberOfInstances) {
11+
dt_name = here("data", "oml", paste0(name, "_", data_id, ".csv"))
12+
fread(dt_name)
13+
})
14+
# cc18_small_datasets
15+
16+
# cc18_small_datasets[[1]]
17+
18+
# TODO: determine whether we can use OML tasks "directly"
19+
# didn't do this at first because they come with resamplings and we want to use our own resamplings
20+
kc1_1067 = as_task_classif(cc18_small_datasets[[1]], target = "defects")
21+
blood_1464 = as_task_classif(cc18_small_datasets[[2]], target = "Class")
22+
23+
tasks = list(kc1_1067, blood_1464)
24+
25+
# define the learners
26+
mlp = lrn("classif.mlp",
27+
activation = nn_relu,
28+
neurons = to_tune(
29+
c(
30+
10, 20,
31+
c(10, 10), c(10, 20), c(20, 10), c(20, 20)
32+
)
33+
),
34+
batch_size = to_tune(16, 32, 64),
35+
p = to_tune(0.1, 0.9),
36+
epochs = to_tune(upper = 1000L, internal = TRUE),
37+
validate = 0.3,
38+
measures_valid = msr("classif.acc"),
39+
patience = 10,
40+
device = "cpu"
41+
)
42+
43+
# define an AutoTuner that wraps the classif.mlp
44+
at = auto_tuner(
45+
learner = mlp,
46+
tuner = tnr("grid_search"),
47+
resampling = rsmp("cv"),
48+
measure = msr("clasif.acc"),
49+
term_evals = 10
50+
)
51+
52+
future::plan("multisession")
53+
54+
design = benchmark_grid(
55+
tasks,
56+
learners = list(at, lrn("classif.ranger"),
57+
resampling = rsmp("cv", folds = 10))
58+
)
59+
60+
bmr = benchmark(design)
61+
62+
bmrdt = as.data.table(bmr)
63+
64+
fwrite(bmrdt, here("R", "rf_Use_case", "results", "bmrdt.csv"))
65+
66+
# define an optimization strategy: grid search
67+
68+
# define a search space: the parameters to tune over
69+
70+
# neurons
71+
72+
# batch size
73+
74+
# dropout rate
75+
76+
# epochs
77+
78+
# use something standard (e.g. accuracy) as the tuning measure
79+
80+
# use k-fold cross validation
81+
82+
# set a number of evaluations for the tuner
83+
84+
# TODO: set up the tuning space for the neurons and layers
85+
86+
# layers_search_space <- 1:5
87+
# neurons_search_space <- seq(10, 50, by = 10)
88+
89+
# generate_permutations <- function(layers_search_space, neurons_search_space) {
90+
# result <- list()
91+
92+
# for (layers in layers_search_space) {
93+
# # Generate all permutations with replacement
94+
# perms <- expand.grid(replicate(layers, list(neurons_search_space), simplify = FALSE))
95+
96+
# # Convert each row to a vector and add to the result
97+
# result <- c(result, apply(perms, 1, as.numeric))
98+
# }
99+
100+
# return(result)
101+
# }
102+
103+
# permutations <- generate_permutations(layers_search_space, neurons_search_space)
104+
105+
# head(permutations)

benchmarks/rf_use_case/view_results.R

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
library(data.table)
2+
library(mlr3)
3+
4+
library(here)
5+
6+
bmrdt = fread(here("R", "rf_Use_case", "results", "bmrdt.csv"))
7+
8+
bmrdt

0 commit comments

Comments
 (0)