diff --git a/.github/workflows/run_singularity_versions.yml b/.github/workflows/run_singularity_versions.yml
index fe576a30..c7862636 100644
--- a/.github/workflows/run_singularity_versions.yml
+++ b/.github/workflows/run_singularity_versions.yml
@@ -1,6 +1,16 @@
 name: Test Support for different Singularity Versions
 
-on: [push]
+on:
+  pull_request:
+    types: [ready_for_review]
+
+  pull_request_review:
+    types: [submitted]
+
+  push:
+    branches:
+      - 'main'
+      - 'development'
 
 jobs:
   Tests:
@@ -10,25 +20,25 @@ jobs:
       matrix:
         include:
           - python-version: 3.7
-            DISPLAY_NAME: "Singularity Container Examples with S3.5"
+            DISPLAY_NAME: "Singularity Container Examples with S3.7"
             RUN_CONTAINER_EXAMPLES: true
             USE_SINGULARITY: false
-            SINGULARITY_VERSION: "3.5"
+            SINGULARITY_VERSION: "3.7"
           - python-version: 3.7
-            DISPLAY_NAME: "Singularity Container Examples with S3.6"
+            DISPLAY_NAME: "Singularity Container Examples with S3.8"
             RUN_CONTAINER_EXAMPLES: true
             USE_SINGULARITY: false
-            SINGULARITY_VERSION: "3.6"
+            SINGULARITY_VERSION: "3.8"
           - python-version: 3.7
-            DISPLAY_NAME: "Singularity Container Examples with S3.7"
+            DISPLAY_NAME: "Singularity Container Examples with S3.9"
             RUN_CONTAINER_EXAMPLES: true
             USE_SINGULARITY: false
-            SINGULARITY_VERSION: "3.7"
+            SINGULARITY_VERSION: "3.9"
           - python-version: 3.7
-            DISPLAY_NAME: "Singularity Container Examples with S3.8"
+            DISPLAY_NAME: "Singularity Container Examples with S3.10"
             RUN_CONTAINER_EXAMPLES: true
             USE_SINGULARITY: false
-            SINGULARITY_VERSION: "3.8"
+            SINGULARITY_VERSION: "3.10"
 
       fail-fast: false
 
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 322812b2..4a5d2277 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -2,7 +2,7 @@
 
 name: Test Pull Requests
 
-on: [push, pull_request]
+on: [push]
 
 jobs:
   Tests:
@@ -11,34 +11,46 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: 3.6
-            DISPLAY_NAME: "Singularity Tests"
-            RUN_TESTS: true
-            USE_SINGULARITY: true
-          - python-version: 3.7
+          - python-version: "3.7"
             DISPLAY_NAME: "Singularity Tests + CODECOV"
             RUN_TESTS: true
             USE_SINGULARITY: true
+            SINGULARITY_VERSION: "3.8"
             RUN_CODECOV: true
-          - python-version: 3.7
+
+          - python-version: "3.7"
             DISPLAY_NAME: "Codestyle"
             RUN_CODESTYLE: true
-          - python-version: 3.7
+            USE_SINGULARITY: false
+
+          - python-version: "3.7"
             DISPLAY_NAME: "Singularity Container Examples"
             RUN_CONTAINER_EXAMPLES: true
             USE_SINGULARITY: true
-          - python-version: 3.7
+            SINGULARITY_VERSION: "3.8"
+
+          - python-version: "3.7"
             DISPLAY_NAME: "Local Examples"
             RUN_LOCAL_EXAMPLES: true
             USE_SINGULARITY: false
-          - python-version: 3.8
+
+          - python-version: "3.8"
             DISPLAY_NAME: "Singularity Tests"
             RUN_TESTS: true
             USE_SINGULARITY: true
-          - python-version: 3.9
+            SINGULARITY_VERSION: "3.8"
+
+          - python-version: "3.9"
             DISPLAY_NAME: "Singularity Tests"
             RUN_TESTS: true
             USE_SINGULARITY: true
+            SINGULARITY_VERSION: "3.8"
+
+          - python-version: "3.10"
+            DISPLAY_NAME: "Singularity Tests"
+            RUN_TESTS: true
+            USE_SINGULARITY: true
+            SINGULARITY_VERSION: "3.8"
       fail-fast: false
 
     name: Tests ${{ matrix.python-version }} ${{ matrix.DISPLAY_NAME }}
@@ -46,6 +58,7 @@ jobs:
     env:
       RUN_TESTS: ${{ matrix.RUN_TESTS }}
       USE_SINGULARITY: ${{ matrix.USE_SINGULARITY }}
+      SINGULARITY_VERSION: ${{ matrix.SINGULARITY_VERSION }}
       RUN_CODECOV: ${{ matrix.RUN_CODECOV }}
       RUN_CODESTYLE: ${{ matrix.RUN_CODESTYLE }}
       RUN_CONTAINER_EXAMPLES: ${{ matrix.RUN_CONTAINER_EXAMPLES }}
@@ -56,12 +69,16 @@ jobs:
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: "${{ matrix.python-version }}"
     - name: Set up Go for Singularity
       if: matrix.USE_SINGULARITY == true
       uses: actions/setup-go@v2
       with:
         go-version: '1.14.15' # The Go version to download (if necessary) and use.
+    - name: Set up Singularity
+      if: matrix.USE_SINGULARITY == true
+      run: |
+        chmod +x ci_scripts/install_singularity.sh && source ./ci_scripts/install_singularity.sh
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/README.md b/README.md
index 683f9486..745962ed 100644
--- a/README.md
+++ b/README.md
@@ -56,12 +56,12 @@ cd HPOBench
 pip install .
 ```
 
-**Note:** This does not install *singularity (version 3.6)*. Please follow the steps described here: [user-guide](https://sylabs.io/guides/3.6/user-guide/quick_start.html#quick-installation-steps).   
+**Note:** This does not install *singularity (version 3.8)*. Please follow the steps described here: [user-guide](https://sylabs.io/guides/3.8/user-guide/quick_start.html#quick-installation-steps).   
 If you run into problems, using the most recent singularity version might help: [here](https://singularity.hpcng.org/admin-docs/master/installation.html)
 
 ## Containerized Benchmarks
 
-We provide all benchmarks as containerized versions to (i) isolate their dependencies and (ii) keep them reproducible. Our containerized benchmarks do not rely on external dependencies and thus do not change over time. For this, we rely on [Singularity (version 3.6)](https://sylabs.io/guides/3.6/user-guide/) and for now upload all containers to a [gitlab registry](https://gitlab.tf.uni-freiburg.de/muelleph/hpobench-registry/container_registry)
+We provide all benchmarks as containerized versions to (i) isolate their dependencies and (ii) keep them reproducible. Our containerized benchmarks do not rely on external dependencies and thus do not change over time. For this, we rely on [Singularity (version 3.6)](https://sylabs.io/guides/3.6/user-guide/) and for now upload all containers to a [gitlab registry](https://gitlab.tf.uni-freiburg.de:5050/mallik/hpo-bench-singularity-gitlab-ci/container_registry)
 
 The only other requirements are: [ConfigSpace](https://github.com/automl/ConfigSpace), *scipy* and *numpy* 
 
@@ -72,7 +72,7 @@ Each benchmark can also be run locally, but the dependencies must be installed m
 A simple example is the XGBoost benchmark which can be installed with `pip install .[xgboost]`
 
 ```python
-from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
 
 b = XGBoostBenchmark(task_id=167149)
 config = b.get_configuration_space(seed=1).sample_configuration()
@@ -83,7 +83,7 @@ result_dict = b.objective_function(configuration=config,
 
 ### How to Build a Container Locally
 
-With singularity installed run the following to built the, e.g. xgboost container
+With singularity installed run the following to build (for example,) the xgboost container
 
 ```bash
 cd hpobench/container/recipes/ml
@@ -97,7 +97,7 @@ from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
 b = XGBoostBenchmark(task_id=167149, container_name="xgboost_benchmark", 
                      container_source='./') # path to hpobench/container/recipes/ml
 config = b.get_configuration_space(seed=1).sample_configuration()
-result_dict = b.objective_function(config, fidelity={"n_estimators": 128, "dataset_fraction": 0.5})
+result_dict = b.objective_function(config, fidelity={"n_estimators": 128, "subsample": 0.5})
 ```
 
 ## Configure HPOBench
diff --git a/changelog.md b/changelog.md
index 18b3b9fd..298903ac 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,7 @@
+# 0.0.11
+  * Drop Support for 3.6:
+    Although most of the functionality should still work, we drop the official support for 3.6.
+
 # 0.0.10
   * Cartpole Benchmark Version 0.0.4:
     Fix: Pass the hp `entropy_regularization` to the PPO Agent. 
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index b68a1b88..97612bd0 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -4,14 +4,24 @@ install_packages=""
 
 if [[ "$RUN_TESTS" == "true" ]]; then
     echo "Install tools for testing"
-    install_packages="${install_packages}xgboost,pytest,test_paramnet,test_tabular_datamanager,"
+    install_packages="${install_packages}pytest,test_tabular_datamanager,"
     pip install codecov
 
-    # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark)
-    # To make sure that no newer version is installed, we install it before the other requirements.
-    # Since we are not using a "--upgrade" option later on, pip skips to install another scikit-learn version.
-    echo "Install the right scikit-learn function for the param net tests."
-    pip install --upgrade scikit-learn==0.23.2
+    PYVERSION=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]*\).*/\1\2/')
+    if [[ "${PYVERSION}" != "310" ]]; then
+      # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark)
+      # To make sure that no newer version is installed, we install it before the other requirements.
+      # Since we are not using a "--upgrade" option later on, pip skips to install another scikit-learn version.
+      echo "Install the right scikit-learn function for the param net tests."
+      pip install --upgrade scikit-learn==0.23.2
+      install_packages="${install_packages}xgboost,test_paramnet,"
+    else
+      echo "Skip installing the extra paramnet tests."
+      # For 3.10, we need a different pandas version - this comes as a requirement for the old xgboost benchmark.
+      # building pandas<=1.5.0 does not work with 3.10 anymore. -> install a different version.
+      install_packages="${install_packages}xgboost_310,"
+    fi
+
 else
     echo "Skip installing tools for testing"
 fi
@@ -35,42 +45,25 @@ if [[ "$RUN_LOCAL_EXAMPLES" == "true" ]]; then
     echo "Install packages for local examples"
     echo "Install swig"
     sudo apt-get update && sudo apt-get install -y build-essential swig
-    install_packages="${install_packages}xgboost,"
-else
-    echo "Skip installing packages for local examples"
-fi
-
-if [[ "$USE_SINGULARITY" == "true" ]]; then
-    echo "Install Singularity"
 
-    sudo apt-get update && sudo apt-get install -y \
-      build-essential \
-      libssl-dev \
-      uuid-dev \
-      libgpgme11-dev \
-      squashfs-tools \
-      libseccomp-dev \
-      wget \
-      pkg-config \
-      git \
-      cryptsetup
+    PYVERSION=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]*\).*/\1\2/')
+    if [[ "${PYVERSION}" != "310" ]]; then
+      # For 3.10, we need a different pandas version - this comes as a requirement for the old xgboost benchmark.
+      # building pandas<=1.5.0 does not work with 3.10 anymore. -> install a different version.
+      install_packages="${install_packages}xgboost,"
+    else
+      install_packages="${install_packages}xgboost_310,"
+    fi
 
-    export VERSION=3.5.3 && # adjust this as necessary \
-      wget https://github.com/sylabs/singularity/archive/refs/tags/v${VERSION}.tar.gz && \
-      tar -xzf v${VERSION}.tar.gz && \
-      cd singularity-${VERSION}
-
-    ./mconfig && \
-      make -C builddir && \
-      sudo make -C builddir install
-
-    cd ..
-    install_packages="${install_packages}placeholder,"
 else
-    echo "Skip installing Singularity"
+    echo "Skip installing packages for local examples"
 fi
 
+# We add a placeholder / No-OP operator. When running the container examples, we don't install any
+# additional packages. That causes an error, since `pip install .[]` does not work.
+install_packages="${install_packages}NOP,"
+
 # remove the trailing comma
 install_packages="$(echo ${install_packages} | sed 's/,*\r*$//')"
 echo "Install HPOBench with options: ${install_packages}"
-pip install .["${install_packages}"]
+pip install .["${install_packages}"]
\ No newline at end of file
diff --git a/ci_scripts/install_singularity.sh b/ci_scripts/install_singularity.sh
index 292df85b..98aecc0f 100644
--- a/ci_scripts/install_singularity.sh
+++ b/ci_scripts/install_singularity.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env sh
 
-echo "Install Singularity"
+echo "Inside Singularity Installation Script"
 
 sudo apt-get update && sudo apt-get install -y \
   build-essential \
@@ -14,24 +14,36 @@ sudo apt-get update && sudo apt-get install -y \
   git \
   cryptsetup
 
-if [[ "$SINGULARITY_VERSION" == "3.5" ]]; then
-    export VERSION=3.5.3
-elif [[ "$SINGULARITY_VERSION" == "3.6" ]]; then
-    export VERSION=3.6.4
-elif [[ "$SINGULARITY_VERSION" == "3.7" ]]; then
+if [[ "$SINGULARITY_VERSION" == "3.7" ]]; then
     export VERSION=3.7.3
+    export FILENAME=singularity-"${VERSION}"
+    export EXTRACTED_FILENAME=singularity
+
 elif [[ "$SINGULARITY_VERSION" == "3.8" ]]; then
-    export VERSION=3.8.0
+    export VERSION=3.8.4
+    export FILENAME=singularity-ce-"${VERSION}"
+    export EXTRACTED_FILENAME=singularity-ce-"${VERSION}"
+
+elif [[ "$SINGULARITY_VERSION" == "3.9" ]]; then
+    export VERSION=3.9.3
+    export FILENAME=singularity-ce-"${VERSION}"
+    export EXTRACTED_FILENAME=singularity-ce-"${VERSION}"
+
+elif [[ "$SINGULARITY_VERSION" == "3.10" ]]; then
+    export VERSION=3.10.0
+    export FILENAME=singularity-ce-"${VERSION}"
+    export EXTRACTED_FILENAME=singularity-ce-"${VERSION}"
+
 else
     echo "Skip installing Singularity"
 fi
 
-wget https://github.com/sylabs/singularity/archive/refs/tags/v${VERSION}.tar.gz && \
-tar -xzf v${VERSION}.tar.gz && \
-cd singularity-${VERSION} && \
+wget https://github.com/sylabs/singularity/releases/download/v"${VERSION}"/"${FILENAME}".tar.gz && \
+tar -xzf "${FILENAME}".tar.gz && \
+cd "${EXTRACTED_FILENAME}" && \
 ./mconfig && \
 make -C builddir && \
 sudo make -C builddir install
 
 cd ..
-pip install .
+pip install .
\ No newline at end of file
diff --git a/examples/container/create_connection_to_benchmark.py b/examples/container/create_connection_to_benchmark.py
index 559c6d31..3f4fc7be 100644
--- a/examples/container/create_connection_to_benchmark.py
+++ b/examples/container/create_connection_to_benchmark.py
@@ -20,6 +20,7 @@
 
 import argparse
 
+from hpobench import config_file
 from hpobench.container.benchmarks.nas.tabular_benchmarks import SliceLocalizationBenchmark as TabBenchmarkContainer
 
 
@@ -27,7 +28,7 @@ def run_experiment(on_travis=False):
 
     # First, we start the benchmark. This generates the unix-socket (address) where the benchmark is reachable.
     benchmark = TabBenchmarkContainer(container_name='tabular_benchmarks',
-                                      container_source='library://phmueller/automl',
+                                      container_source=config_file.container_source,
                                       rng=1)
 
     print(benchmark.socket_id)
diff --git a/examples/container/tabular_benchmark_example.py b/examples/container/tabular_benchmark_example.py
index 1b9ddc52..6eb9fae0 100644
--- a/examples/container/tabular_benchmark_example.py
+++ b/examples/container/tabular_benchmark_example.py
@@ -17,13 +17,14 @@
 
 import argparse
 
+from hpobench import config_file
 from hpobench.container.benchmarks.nas.tabular_benchmarks import SliceLocalizationBenchmark as TabBenchmarkContainer
 
 
 def run_experiment(on_travis=False):
 
     benchmark = TabBenchmarkContainer(container_name='tabular_benchmarks',
-                                      container_source='library://phmueller/automl',
+                                      container_source=config_file.container_source,
                                       rng=1)
 
     cs = benchmark.get_configuration_space(seed=1)
diff --git a/examples/container/xgboost_with_container.py b/examples/container/xgboost_with_container.py
index d1d170ee..083d9a60 100644
--- a/examples/container/xgboost_with_container.py
+++ b/examples/container/xgboost_with_container.py
@@ -19,6 +19,7 @@
 import logging
 from time import time
 
+from hpobench import config_file
 from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
 from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
@@ -38,7 +39,7 @@ def run_experiment(on_travis: bool = False):
 
         b = Benchmark(task_id=task_id,
                       container_name='xgboost_benchmark',
-                      container_source='library://phmueller/automl')
+                      container_source=config_file.container_source)
 
         cs = b.get_configuration_space()
         start = time()
@@ -48,7 +49,7 @@ def run_experiment(on_travis: bool = False):
             print(configuration)
             for n_estimator in [8, 64]:
                 for subsample in [0.4, 1]:
-                    fidelity = {'n_estimators': n_estimator, 'dataset_fraction': subsample}
+                    fidelity = {'n_estimators': n_estimator, 'subsample': subsample}
                     result_dict = b.objective_function(configuration.get_dictionary(),
                                                        fidelity=fidelity)
                     valid_loss = result_dict['function_value']
diff --git a/examples/local/xgboost_local.py b/examples/local/xgboost_local.py
index 4f3b3ad3..6773a45d 100644
--- a/examples/local/xgboost_local.py
+++ b/examples/local/xgboost_local.py
@@ -4,18 +4,20 @@
 This example executes the xgboost benchmark locally with random configurations on the CC18 openml tasks.
 
 To run this example please install the necessary dependencies via:
-``pip3 install .[xgboost_example]``
+``pip install .[xgboost_example]``
 """
 
 import argparse
 from time import time
 
-from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
-from hpobench.util.openml_data_manager import get_openmlcc18_taskids
+from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
 
 
 def run_experiment(on_travis: bool = False):
-    task_ids = get_openmlcc18_taskids()
+    task_ids = [
+        10101,53,146818,146821,9952,146822,31,3917,168912,3,167119,12,146212,168911,
+        9981,168329,167120,14965,146606,168330
+    ]
     for task_no, task_id in enumerate(task_ids):
 
         if on_travis and task_no == 5:
@@ -32,9 +34,9 @@ def run_experiment(on_travis: bool = False):
         for i in range(num_configs):
             configuration = cs.sample_configuration()
             print(configuration)
-            for n_estimator in [8, 64]:
-                for subsample in [0.4, 1]:
-                    fidelity = {'n_estimators': n_estimator, 'dataset_fraction': subsample}
+            for n_estimator in [50, 75]:
+                for subsample in [0.5, 0.75]:
+                    fidelity = {'n_estimators': n_estimator, 'subsample': subsample}
                     result_dict = b.objective_function(configuration.get_dictionary(),
                                                        fidelity=fidelity)
                     valid_loss = result_dict['function_value']
diff --git a/examples/w_optimizer/cartpole_bohb.py b/examples/w_optimizer/cartpole_bohb.py
index 43aa60b5..0b544fea 100644
--- a/examples/w_optimizer/cartpole_bohb.py
+++ b/examples/w_optimizer/cartpole_bohb.py
@@ -20,6 +20,7 @@
 from hpbandster.core.worker import Worker
 from hpbandster.optimizers import BOHB
 
+from hpobench import config_file
 from hpobench.container.benchmarks.rl.cartpole import CartpoleReduced as Benchmark
 from hpobench.util.example_utils import get_travis_settings, set_env_variables_to_use_only_one_core
 from hpobench.util.rng_helper import get_rng
@@ -105,7 +106,7 @@ def run_experiment(out_path, on_travis):
                 f'with Performance: {inc_value:.2f}')
 
     if not on_travis:
-        benchmark = Benchmark(container_source='library://phmueller/automl')
+        benchmark = Benchmark(container_source=config_file.container_source)
         incumbent_result = benchmark.objective_function_test(configuration=inc_cfg,
                                                              fidelity={"budget": settings['max_budget']})
         print(incumbent_result)
diff --git a/extra_requirements/examples.json b/extra_requirements/examples.json
index 803c7d33..af1d60e0 100644
--- a/extra_requirements/examples.json
+++ b/extra_requirements/examples.json
@@ -1,5 +1,5 @@
 {
-    "xgboost_example": ["xgboost==0.90","json_tricks==3.14.0","openml==0.10.2"],
+    "xgboost_example": ["pyarrow==5.0.0", "fastparquet==0.8.1", "xgboost==0.90","json_tricks==3.14.0","openml==0.10.2"],
     "cartpole_example": ["tensorflow==1.13.2","gym==0.10.9","tensorforce==0.4.3","scikit-learn==0.22.0",
                          "smac==0.12.2","hpbandster==0.7.4"],
     "nasbench_101_example": ["torch>=1.2.0,<=1.5.1","torchvision>=0.4.0",
diff --git a/extra_requirements/outlier_detection.json b/extra_requirements/outlier_detection.json
index 7b256fbd..9f4c9ee5 100644
--- a/extra_requirements/outlier_detection.json
+++ b/extra_requirements/outlier_detection.json
@@ -1,3 +1,5 @@
 {
-  "outlier_detection": ["torch==1.9.0", "pytorch_lightning==1.3.8", "scikit-learn==0.24.2"]
-}
\ No newline at end of file
+  "outlier_detection": [
+    "pandas==1.2.4", "torchmetrics==0.6.0", "torch==1.13.1", "pytorch_lightning==1.3.8", "scikit-learn==0.24.2"
+  ]
+}
diff --git a/extra_requirements/paramnet.json b/extra_requirements/paramnet.json
index 422d41d7..71048cf7 100644
--- a/extra_requirements/paramnet.json
+++ b/extra_requirements/paramnet.json
@@ -1,3 +1,3 @@
 {
-  "paramnet": ["tqdm","scikit-learn==0.23.2"]
+  "paramnet": ["tqdm", "scikit-learn==0.23.2"]
 }
\ No newline at end of file
diff --git a/extra_requirements/tests.json b/extra_requirements/tests.json
index 6c27be97..396e618d 100644
--- a/extra_requirements/tests.json
+++ b/extra_requirements/tests.json
@@ -1,6 +1,6 @@
 {
-  "codestyle": ["pycodestyle","flake8","pylint"],
-  "pytest": ["pytest>=4.6","pytest-cov"],
+  "codestyle": ["pycodestyle", "flake8", "pylint"],
+  "pytest": ["pytest>=4.6", "pytest-cov"],
   "test_paramnet": ["tqdm", "scikit-learn==0.23.2"],
-  "test_tabular_datamanager": ["pyarrow", "fastparquet"]
+  "test_tabular_datamanager": ["tqdm", "pyarrow", "fastparquet"]
 }
\ No newline at end of file
diff --git a/extra_requirements/xgboost.json b/extra_requirements/xgboost.json
index 2789d2ef..eefc920c 100644
--- a/extra_requirements/xgboost.json
+++ b/extra_requirements/xgboost.json
@@ -1,3 +1,4 @@
 {
-  "xgboost": ["xgboost==0.90","pandas>=1.0.0,<1.1.5","openml==0.10.2","scikit-learn>=0.18.1"]
+  "xgboost": ["xgboost==0.90","pandas>=1.0.0,<1.1.5","openml==0.10.2","scikit-learn>=0.18.1"],
+  "xgboost_310": ["xgboost","pandas","openml==0.10.2","scikit-learn>=0.18.1"]
 }
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index 64e399cd..1ac0c52f 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -1,4 +1,3 @@
-from hpobench.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
 from hpobench.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
 from hpobench.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
 from hpobench.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
@@ -6,17 +5,25 @@
 from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
 from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
 
+
 try:
+    # `xgboost` is from https://xgboost.readthedocs.io/en/latest/install.html#conda
+    # and not part of the scikit-learn bundle and not a strict requirement for running HPOBench
+    # for other spaces and also for tabular benchmarks
     from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
-except ImportError:
-    pass
-
-
-__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
-           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
-           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
-           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
-           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
-           'TabularBenchmark',
-           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
-           ]
+    __all__ = [
+        'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+        'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+        'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+        'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+        'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
+        'TabularBenchmark',
+    ]
+except (ImportError, AttributeError):
+    __all__ = [
+        'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+        'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+        'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+        'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+        'TabularBenchmark',
+    ]
diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
index 8c317111..aa7aa162 100644
--- a/hpobench/benchmarks/ml/lr_benchmark.py
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -4,30 +4,38 @@
 
 0.0.1:
 * First implementation of the LR Benchmarks.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
 
-
+import time
 from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
+import pandas as pd
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.linear_model import SGDClassifier
 
+from hpobench.util.rng_helper import get_rng
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class LRBenchmark(MLBenchmark):
-    def __init__(self,
-                 task_id: int,
-                 rng: Union[np.random.RandomState, int, None] = None,
-                 valid_size: float = 0.33,
-                 data_path: Union[str, None] = None):
-
-        super(LRBenchmark, self).__init__(task_id, rng, valid_size, data_path)
-        self.cache_size = 500
+    """ Multi-multi-fidelity Logisitic Regression Benchmark
+    """
+    def __init__(
+            self,
+            task_id: int,
+            valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
+            data_path: Union[str, None] = None
+    ):
+        super(LRBenchmark, self).__init__(task_id, valid_size, rng, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -44,7 +52,8 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
         ])
         return cs
 
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-multi-fidelity) - iterations + data subsample
@@ -53,17 +62,11 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
     @staticmethod
-    def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+    def _get_fidelity_choices(
+            iter_choice: str, subsample_choice: str
+    ) -> Tuple[Hyperparameter, Hyperparameter]:
         """Fidelity space available --- specifies the fidelity dimensions
-
-        For SVM, only a single fidelity exists, i.e., subsample fraction.
-        if fidelity_choice == 0
-            uses the entire data (subsample=1), reflecting the black-box setup
-        else
-            parameterizes the fraction of data to subsample
-
         """
-
         assert iter_choice in ['fixed', 'variable']
         assert subsample_choice in ['fixed', 'variable']
 
@@ -79,14 +82,16 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
                 'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False
             )
         )
-
         iter = fidelity1[iter_choice]
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
         # initializing model
         rng = self.rng if rng is None else rng
 
@@ -103,13 +108,185 @@ def init_model(self, config: Union[CS.Configuration, Dict],
             learning_rate="adaptive",
             tol=None,
             random_state=rng,
-
         )
         return model
 
+    def get_model_size(self, model: SGDClassifier = None) -> float:
+        """ Returns the dimensionality as a proxy for the number of model parameters
+
+        Logistic Regression models have a fixed number of parameters given a dataset. Model size is
+        being approximated as the number of beta parameters required as the model support plus the
+        intercept. This depends on the dataset and not on the trained model.
+
+        Parameters
+        ----------
+        model : SGDClassifier
+            Trained LR model. This parameter is required to maintain function signature.
+
+        Returns
+        -------
+        float
+        """
+        ndims = self.train_X.shape[1]
+        # accounting for the intercept
+        ndims += 1
+        return ndims
+
+    def _train_objective(
+            self,
+            config: Dict,
+            fidelity: Dict,
+            shuffle: bool,
+            rng: Union[np.random.RandomState, int, None] = None,
+            evaluation: Union[str, None] = "valid",
+            record_stats: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ):
+        """Function that instantiates a 'config' on a 'fidelity' and trains it
+
+        The ML model is instantiated and trained on the training split. Optionally, the model is
+        evaluated on the training set. Optionally, the learning curves are collected.
+
+        Parameters
+        ----------
+        config : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        evaluation : str (optional)
+            If "valid", the ML model is trained on the training set alone.
+            If "test", the ML model is trained on the training + validation sets.
+        record_stats : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+            Enabling True, implies that the for each iteration, the model will be evaluated on both
+            the validation and test sets, optionally on the training set also.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
+        """
+        if rng is not None:
+            rng = get_rng(rng, self.rng)
+
+        # initializing model
+        model = self.init_model(config, fidelity, rng)
+
+        # preparing data
+        if evaluation == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+        elif evaluation == "test":
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+        else:
+            raise ValueError("{} not in ['valid', 'test']".format(evaluation))
+        train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            if isinstance(train_idx, np.ndarray):
+                train_X = train_X[train_idx]
+            else:
+                train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here:
+        # application of the other fidelity to the dataset that the model interfaces
+        # carried over from previous HPOBench code that borrowed from FABOLAS' SVM
+        lower_bound_lim = 1.0 / 512.0
+        if self.lower_bound_train_size is None:
+            self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
+            self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size))
+        subsample = np.max((fidelity['subsample'], self.lower_bound_train_size))
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                subsample * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        if get_learning_curve:
+            # IMPORTANT to allow partial_fit
+            model.warm_start = True
+            lc_time = 0.0
+            model_fit_time = 0.0
+            learning_curves = dict(train=[], valid=[], test=[])
+            lc_spacings = self._get_lc_spacing(model.max_iter, lc_every_k)
+            iter_start = 0
+            for i in range(len(lc_spacings)):
+                iter_end = lc_spacings[i]
+                start = time.time()
+                # trains model for k steps
+                for j in range(iter_end - iter_start):
+                    model.partial_fit(
+                        train_X[train_idx],
+                        train_y.iloc[train_idx],
+                        np.unique(train_y.iloc[train_idx])
+                    )
+                # adding all partial fit times
+                model_fit_time += time.time() - start
+                iter_start = iter_end
+                lc_start = time.time()
+                if record_stats:
+                    train_pred = model.predict(train_X)
+                    train_loss = 1 - self.scorers['acc'](
+                        train_y, train_pred, **self.scorer_args['acc']
+                    )
+                    learning_curves['train'].append(train_loss)
+                val_pred = model.predict(self.valid_X)
+                val_loss = 1 - self.scorers['acc'](
+                    self.valid_y, val_pred, **self.scorer_args['acc']
+                )
+                learning_curves['valid'].append(val_loss)
+                test_pred = model.predict(self.test_X)
+                test_loss = 1 - self.scorers['acc'](
+                    self.test_y, test_pred, **self.scorer_args['acc']
+                )
+                learning_curves['test'].append(test_loss)
+                # sums the time taken to evaluate and collect data for the learning curves
+                lc_time += time.time() - lc_start
+        else:
+            # default training as per the base benchmark template
+            learning_curves = None
+            lc_time = None
+            start = time.time()
+            model.fit(train_X[train_idx], train_y.iloc[train_idx])
+            model_fit_time = time.time() - start
+        # model inference
+        inference_time = 0.0
+        # can optionally not record evaluation metrics on training set to save compute
+        if record_stats:
+            start = time.time()
+            pred_train = model.predict(train_X)
+            inference_time = time.time() - start
+        # computing statistics on training data
+        scores = dict()
+        score_cost = dict()
+        for k, v in self.scorers.items():
+            scores[k] = 0.0
+            score_cost[k] = 0.0
+            _start = time.time()
+            if record_stats:
+                scores[k] = v(train_y, pred_train, **self.scorer_args[k])
+            score_cost[k] = time.time() - _start + inference_time
+        train_loss = 1 - scores["acc"]
+        return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time
+
 
 class LRBenchmarkBB(LRBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Black-box version of the LRBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
@@ -119,7 +296,10 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
 
 
 class LRBenchmarkMF(LRBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Multi-fidelity version of the LRBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - iterations
diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 06634661..4263278f 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -4,28 +4,39 @@
 
 0.0.1:
 * First implementation of the NN Benchmarks.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
 
+import time
 from copy import deepcopy
 from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
+import pandas as pd
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.neural_network import MLPClassifier
 
+from hpobench.util.rng_helper import get_rng
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class NNBenchmark(MLBenchmark):
-    def __init__(self,
-                 task_id: int,
-                 rng: Union[np.random.RandomState, int, None] = None,
-                 valid_size: float = 0.33,
-                 data_path: Union[str, None] = None):
-        super(NNBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+    """ Multi-multi-fidelity Multi-Layer Perceptron Benchmark
+    """
+    def __init__(
+            self,
+            task_id: int,
+            valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
+            data_path: Union[str, None] = None
+    ):
+        super(NNBenchmark, self).__init__(task_id, valid_size, rng, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -63,8 +74,11 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         return fidelity_space
 
     @staticmethod
-    def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
-
+    def _get_fidelity_choices(
+            iter_choice: str, subsample_choice: str
+    ) -> Tuple[Hyperparameter, Hyperparameter]:
+        """Fidelity space available --- specifies the fidelity dimensions
+        """
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=243),
             variable=CS.UniformIntegerHyperparameter(
@@ -81,11 +95,13 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
-        """ Function that returns the model initialized based on the configuration and fidelity
-        """
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
+        # initializing model
         rng = self.rng if rng is None else rng
 
         if isinstance(config, CS.Configuration):
@@ -99,6 +115,7 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         config.pop("depth")
         config.pop("width")
         hidden_layers = [width] * depth
+        # TODO: check for iteration length and edit n_iter_no_change maybe
         model = MLPClassifier(
             **config,
             hidden_layer_sizes=hidden_layers,
@@ -109,9 +126,175 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         )
         return model
 
+    def get_model_size(self, model: MLPClassifier) -> float:
+        """ Returns the total number of trained parameters in the MLP model
+
+        Parameters
+        ----------
+        model : MLPClassifier
+            Trained MLP model.
+
+        Returns
+        -------
+        float
+        """
+        nparams = 0
+        for layer in model.coefs_:
+            nparams += layer.shape[0] * layer.shape[1]
+        for layer in model.intercepts_:
+            nparams += layer.shape[0]
+        return nparams
+
+    def _train_objective(
+            self,
+            config: Dict,
+            fidelity: Dict,
+            shuffle: bool,
+            rng: Union[np.random.RandomState, int, None] = None,
+            evaluation: Union[str, None] = "valid",
+            record_stats: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ):
+        """Function that instantiates a 'config' on a 'fidelity' and trains it
+
+        The ML model is instantiated and trained on the training split. Optionally, the model is
+        evaluated on the training set. Optionally, the learning curves are collected.
+
+        Parameters
+        ----------
+        config : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        evaluation : str (optional)
+            If "valid", the ML model is trained on the training set alone.
+            If "test", the ML model is trained on the training + validation sets.
+        record_stats : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+            Enabling True, implies that the for each iteration, the model will be evaluated on both
+            the validation and test sets, optionally on the training set also.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
+        """
+        if rng is not None:
+            rng = get_rng(rng, self.rng)
+
+        # initializing model
+        model = self.init_model(config, fidelity, rng)
+
+        # preparing data
+        if evaluation == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+        elif evaluation == "test":
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+        else:
+            raise ValueError("{} not in ['valid', 'test']".format(evaluation))
+        train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            if isinstance(train_idx, np.ndarray):
+                train_X = train_X[train_idx]
+            else:
+                train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here:
+        # application of the other fidelity to the dataset that the model interfaces
+        if self.lower_bound_train_size is None:
+            self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
+            self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
+        subsample = np.max((fidelity['subsample'], self.lower_bound_train_size))
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                subsample * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        if get_learning_curve:
+            # IMPORTANT to allow partial_fit
+            model.warm_start = True
+            lc_time = 0.0
+            model_fit_time = 0.0
+            learning_curves = dict(train=[], valid=[], test=[])
+            lc_spacings = self._get_lc_spacing(model.max_iter, lc_every_k)
+            iter_start = 0
+            for i in range(len(lc_spacings)):
+                iter_end = lc_spacings[i]
+                start = time.time()
+                # trains model for k steps
+                for j in range(iter_end - iter_start):
+                    model.partial_fit(
+                        train_X[train_idx],
+                        train_y.iloc[train_idx],
+                        np.unique(train_y.iloc[train_idx])
+                    )
+                # adding all partial fit times
+                model_fit_time += time.time() - start
+                lc_start = time.time()
+                if record_stats:
+                    train_pred = model.predict(train_X)
+                    train_loss = 1 - self.scorers['acc'](
+                        train_y, train_pred, **self.scorer_args['acc']
+                    )
+                    learning_curves['train'].append(train_loss)
+                val_pred = model.predict(self.valid_X)
+                val_loss = 1 - self.scorers['acc'](
+                    self.valid_y, val_pred, **self.scorer_args['acc']
+                )
+                learning_curves['valid'].append(val_loss)
+                test_pred = model.predict(self.test_X)
+                test_loss = 1 - self.scorers['acc'](
+                    self.test_y, test_pred, **self.scorer_args['acc']
+                )
+                learning_curves['test'].append(test_loss)
+                lc_time += time.time() - lc_start
+        else:
+            learning_curves = None
+            lc_time = None
+            start = time.time()
+            model.fit(train_X[train_idx], train_y.iloc[train_idx])
+            model_fit_time = time.time() - start
+        # model inference
+        inference_time = 0.0
+        # can optionally not record evaluation metrics on training set to save compute
+        if record_stats:
+            start = time.time()
+            pred_train = model.predict(train_X)
+            inference_time = time.time() - start
+        # computing statistics on training data
+        scores = dict()
+        score_cost = dict()
+        for k, v in self.scorers.items():
+            scores[k] = 0.0
+            score_cost[k] = 0.0
+            _start = time.time()
+            if record_stats:
+                scores[k] = v(train_y, pred_train, **self.scorer_args[k])
+            score_cost[k] = time.time() - _start + inference_time
+        train_loss = 1 - scores["acc"]
+        return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time
+
 
 class NNBenchmarkBB(NNBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Black-box version of the NNBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
@@ -121,7 +304,10 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
 
 
 class NNBenchmarkMF(NNBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Multi-fidelity version of the NNBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - iterations
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 596f03b6..b6874788 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -4,28 +4,39 @@
 
 0.0.1:
 * First implementation of the RF Benchmarks.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
 
+import time
 from copy import deepcopy
 from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
+import pandas as pd
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.ensemble import RandomForestClassifier
 
+from hpobench.util.rng_helper import get_rng
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class RandomForestBenchmark(MLBenchmark):
-    def __init__(self,
-                 task_id: int,
-                 rng: Union[np.random.RandomState, int, None] = None,
-                 valid_size: float = 0.33,
-                 data_path: Union[str, None] = None):
-        super(RandomForestBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+    """ Multi-multi-fidelity Random Forest Benchmark
+    """
+    def __init__(
+            self,
+            task_id: int,
+            valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
+            data_path: Union[str, None] = None
+    ):
+        super(RandomForestBenchmark, self).__init__(task_id, valid_size, rng, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -54,12 +65,16 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+            RandomForestBenchmark._get_fidelity_choices(
+                n_estimators_choice='variable', subsample_choice='variable'
+            )
         )
         return fidelity_space
 
     @staticmethod
-    def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+    def _get_fidelity_choices(
+            n_estimators_choice: str, subsample_choice: str
+    ) -> Tuple[Hyperparameter, Hyperparameter]:
 
         assert n_estimators_choice in ['fixed', 'variable']
         assert subsample_choice in ['fixed', 'variable']
@@ -70,7 +85,6 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
                 'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
         )
-
         fidelity2 = dict(
             fixed=CS.Constant('subsample', value=1),
             variable=CS.UniformFloatHyperparameter(
@@ -81,11 +95,13 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         subsample = fidelity2[subsample_choice]
         return n_estimators, subsample
 
-    def init_model(self, config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
-        """ Function that returns the model initialized based on the configuration and fidelity
-        """
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
+        # initializing model
         rng = self.rng if rng is None else rng
         if isinstance(config, CS.Configuration):
             config = config.get_dictionary()
@@ -103,23 +119,194 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         )
         return model
 
+    def get_model_size(self, model: RandomForestClassifier) -> float:
+        """ Returns the total number of decision nodes in the entire Random Forest model
+
+        Parameters
+        ----------
+        model : RandomForestClassifier
+            Trained RF model.
+
+        Returns
+        -------
+        float
+        """
+        nodes = 0
+        for tree in model.estimators_:
+            # total number of nodes in the tree (internal + leaf)
+            nodes += tree.tree_.node_count
+        return nodes
+
+    def _train_objective(
+            self,
+            config: Dict,
+            fidelity: Dict,
+            shuffle: bool,
+            rng: Union[np.random.RandomState, int, None] = None,
+            evaluation: Union[str, None] = "valid",
+            record_stats: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ):
+        """Function that instantiates a 'config' on a 'fidelity' and trains it
+
+        The ML model is instantiated and trained on the training split. Optionally, the model is
+        evaluated on the training set. Optionally, the learning curves are collected.
+
+        Parameters
+        ----------
+        config : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        evaluation : str (optional)
+            If "valid", the ML model is trained on the training set alone.
+            If "test", the ML model is trained on the training + validation sets.
+        record_stats : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+            Enabling True, implies that the for each iteration, the model will be evaluated on both
+            the validation and test sets, optionally on the training set also.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
+        """
+        if rng is not None:
+            rng = get_rng(rng, self.rng)
+
+        # initializing model
+        model = self.init_model(config, fidelity, rng)
+
+        # preparing data
+        if evaluation == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+        elif evaluation == "test":
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+        else:
+            raise ValueError("{} not in ['valid', 'test']".format(evaluation))
+        train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            if isinstance(train_idx, np.ndarray):
+                train_X = train_X[train_idx]
+            else:
+                train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here:
+        # application of the other fidelity to the dataset that the model interfaces
+        # carried over from previous HPOBench code that borrowed from FABOLAS' SVM
+        lower_bound_lim = 1.0 / 512.0
+        if self.lower_bound_train_size is None:
+            self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
+            self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size))
+        subsample = np.max((fidelity['subsample'], self.lower_bound_train_size))
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                subsample * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        if get_learning_curve:
+            lc_spacings = self._get_lc_spacing(model.n_estimators, lc_every_k)
+            # IMPORTANT to allow refitting with more estimators
+            model.warm_start = True
+            model.n_estimators = 0
+            lc_time = 0.0
+            model_fit_time = 0.0
+            learning_curves = dict(train=[], valid=[], test=[])
+            iter_start = 0
+            # for i in range(fidelity['n_estimators']):
+            for i in range(len(lc_spacings)):
+                iter_end = lc_spacings[i]
+                start = time.time()
+                # adds k new estimators to the model for training
+                model.n_estimators += iter_end - iter_start
+                model.fit(train_X[train_idx], train_y.iloc[train_idx])
+                model_fit_time += time.time() - start
+                lc_start = time.time()
+                if record_stats:
+                    train_pred = model.predict(train_X)
+                    train_loss = 1 - self.scorers['acc'](
+                        train_y, train_pred, **self.scorer_args['acc']
+                    )
+                    learning_curves['train'].append(train_loss)
+                val_pred = model.predict(self.valid_X)
+                val_loss = 1 - self.scorers['acc'](
+                    self.valid_y, val_pred, **self.scorer_args['acc']
+                )
+                learning_curves['valid'].append(val_loss)
+                test_pred = model.predict(self.test_X)
+                test_loss = 1 - self.scorers['acc'](
+                    self.test_y, test_pred, **self.scorer_args['acc']
+                )
+                learning_curves['test'].append(test_loss)
+                lc_time += time.time() - lc_start
+        else:
+            learning_curves = None
+            lc_time = None
+            start = time.time()
+            model.fit(train_X[train_idx], train_y.iloc[train_idx])
+            model_fit_time = time.time() - start
+        # model inference
+        inference_time = 0.0
+        # can optionally not record evaluation metrics on training set to save compute
+        if record_stats:
+            start = time.time()
+            pred_train = model.predict(train_X)
+            inference_time = time.time() - start
+        # computing statistics on training data
+        scores = dict()
+        score_cost = dict()
+        for k, v in self.scorers.items():
+            scores[k] = 0.0
+            score_cost[k] = 0.0
+            _start = time.time()
+            if record_stats:
+                scores[k] = v(train_y, pred_train, **self.scorer_args[k])
+            score_cost[k] = time.time() - _start + inference_time
+        train_loss = 1 - scores["acc"]
+        return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time
+
 
 class RandomForestBenchmarkBB(RandomForestBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Black-box version of the RandomForestBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+            RandomForestBenchmark._get_fidelity_choices(
+                n_estimators_choice='fixed', subsample_choice='fixed'
+            )
         )
         return fidelity_space
 
 
 class RandomForestBenchmarkMF(RandomForestBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Multi-fidelity version of the RandomForestBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - ntrees
-            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
+            RandomForestBenchmark._get_fidelity_choices(
+                n_estimators_choice='variable', subsample_choice='fixed'
+            )
         )
         return fidelity_space
 
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 9462442f..c7b6a816 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -4,6 +4,10 @@
 
 0.0.1:
 * First implementation of the new SVM Benchmarks.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
 
 from typing import Union, Dict
@@ -15,18 +19,21 @@
 
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class SVMBenchmark(MLBenchmark):
-    def __init__(self,
-                 task_id: int,
-                 rng: Union[np.random.RandomState, int, None] = None,
-                 valid_size: float = 0.33,
-                 data_path: Union[str, None] = None):
-        super(SVMBenchmark, self).__init__(task_id, rng, valid_size, data_path)
-
-        self.cache_size = 200
+    """ Multi-multi-fidelity SVM Benchmark
+    """
+    def __init__(
+            self,
+            task_id: int,
+            valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
+            data_path: Union[str, None] = None
+    ):
+        super(SVMBenchmark, self).__init__(task_id, valid_size, rng, data_path)
+        self.cache_size = 1024  # in MB
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -54,7 +61,8 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
 
     @staticmethod
     def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
-
+        """Fidelity space available --- specifies the fidelity dimensions
+        """
         assert subsample_choice in ['fixed', 'variable']
 
         fidelity = dict(
@@ -64,12 +72,14 @@ def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
             )
         )
         subsample = fidelity[subsample_choice]
-
         return subsample
 
-    def init_model(self, config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
         # initializing model
         rng = self.rng if rng is None else rng
         if isinstance(config, CS.Configuration):
@@ -81,9 +91,27 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         )
         return model
 
+    def get_model_size(self, model: SVC) -> float:
+        """ Returns the number of support vectors in the SVM model
+
+        Parameters
+        ----------
+        model : SVC
+            Trained SVM model.
+
+        Returns
+        -------
+        float
+        """
+        nsupport = model.support_.shape[0]
+        return nsupport
+
 
 class SVMBenchmarkBB(SVMBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Black-box version of the SVMBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameter(
             # uses the entire data (subsample=1), reflecting the black-box setup
diff --git a/hpobench/benchmarks/ml/svm_benchmark_old.py b/hpobench/benchmarks/ml/svm_benchmark_old.py
deleted file mode 100644
index 9aad5e44..00000000
--- a/hpobench/benchmarks/ml/svm_benchmark_old.py
+++ /dev/null
@@ -1,354 +0,0 @@
-"""
-
-Changelog:
-==========
-0.0.3
-* New container release due to a general change in the communication between container and HPOBench.
-  Works with HPOBench >= v0.0.8
-
-0.0.2:
-* Standardize the structure of the meta information
-
-0.0.1:
-* First implementation
-
-"""
-
-import logging
-import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
-import numpy as np
-from scipy import sparse
-from sklearn import pipeline
-from sklearn import svm
-from sklearn.compose import ColumnTransformer
-from sklearn.impute import SimpleImputer
-from sklearn.metrics import accuracy_score, make_scorer
-from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
-
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.3'
-
-logger = logging.getLogger('SVMBenchmark')
-
-
-class SupportVectorMachine(AbstractBenchmark):
-    """
-    Hyperparameter optimization task to optimize the regularization
-    parameter C and the kernel parameter gamma of a support vector machine.
-    Both hyperparameters are optimized on a log scale in [-10, 10].
-    The X_test data set is only used for a final offline evaluation of
-    a configuration. For that the validation and training data is
-    concatenated to form the whole training data set.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-        Parameters
-        ----------
-        task_id : int, None
-        rng : np.random.RandomState, int, None
-        """
-        super(SupportVectorMachine, self).__init__(rng=rng)
-
-        self.task_id = task_id
-        self.cache_size = 200  # Cache for the SVC in MB
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM model
-        fidelity: Dict, None
-            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : training loss
-                fidelity : used fidelities in this evaluation
-        """
-        start_time = time.time()
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        # Split of dataset subset
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_size = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_size = fidelity['dataset_fraction']
-
-        train_size = int(train_size * len(self.train_idx))
-        train_idx = self.train_idx[:train_size]
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        # Train support vector machine
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(self.x_train[train_idx], self.y_train[train_idx])
-
-        # Compute validation error
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(val_loss),
-                "cost": cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}}
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model with a given configuration on both the X_train
-        and validation data set and evaluates the model on the X_test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : X_test loss
-            cost : time to X_train and evaluate the model
-            info : Dict
-                train_valid_loss: Loss on the train+valid data set
-                fidelity : used fidelities in this evaluation
-        """
-        assert np.isclose(fidelity['dataset_fraction'], 1), \
-            f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}'
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start_time = time.time()
-
-        # Concatenate training and validation dataset
-        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
-            data = sparse.vstack((self.x_train, self.x_valid))
-        else:
-            data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(data, targets)
-
-        # Compute validation error
-        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)
-
-        # Compute test error
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(test_loss),
-                "cost": cost,
-                'info': {'train_valid_loss': float(train_valid_loss),
-                         'fidelity': fidelity}}
-
-    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-
-        model = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
-            ('svm',
-             svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size))
-        ])
-        return model
-
-    @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the SVM Model
-
-        For a detailed explanation of the hyperparameters:
-        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        cs = CS.ConfigurationSpace(seed=seed)
-
-        cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False),
-            CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False),
-        ])
-        # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds'])
-        return cs
-
-    @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the SupportVector Benchmark
-
-        Fidelities
-        ----------
-        dataset_fraction: float - [0.1, 1]
-            fraction of training data set to use
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
-
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-        ])
-        return fidel_space
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        return {'name': 'Support Vector Machine',
-                'references': ["@InProceedings{pmlr-v54-klein17a",
-                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
-                               "Frank Hutter}, "
-                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
-                               "Large Datasets}}"
-                               "pages = {528--536}, year = {2017},"
-                               "editor = {Aarti Singh and Jerry Zhu},"
-                               "volume = {54},"
-                               "series = {Proceedings of Machine Learning Research},"
-                               "address = {Fort Lauderdale, FL, USA},"
-                               "month = {20--22 Apr},"
-                               "publisher = {PMLR},"
-                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
-                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
-                               ],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 72e5fb31..342766b4 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -4,6 +4,10 @@
 
 0.0.1:
 * First implementation of the Tabular Benchmark.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
 
 from pathlib import Path
@@ -17,7 +21,7 @@
 from hpobench.dependencies.ml.ml_benchmark_template import metrics
 from hpobench.util.data_manager import TabularDataManager
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class TabularBenchmark(AbstractBenchmark):
@@ -145,8 +149,8 @@ def _search_dataframe(self, row_dict, df):
         for i, param in enumerate(df.drop("result", axis=1).columns):
             mask *= df[param].values == row_dict[param]
         idx = np.where(mask)
-        assert len(idx) == 1, 'The query has resulted into mulitple matches. This should not happen. ' \
-                              f'The Query was {row_dict}'
+        assert len(idx) == 1, 'The query has resulted into mulitple matches. ' \
+                              'This should not happen. The Query was {row_dict}'
         idx = idx[0][0]
         result = df.iloc[idx]["result"]
         return result
@@ -163,7 +167,7 @@ def _objective(
         metric_str = ', '.join(list(metrics.keys()))
         assert metric in list(metrics.keys()), f"metric not found among: {metric_str}"
         score_key = f"{evaluation}_scores"
-        cost_key = f"{evaluation}_scores"
+        cost_key = f"{evaluation}_costs"
 
         key_path = dict()
         for name in self.configuration_space.get_hyperparameter_names():
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index ae554628..234c2cee 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -4,7 +4,12 @@
 
 0.0.1:
 * First implementation of the new XGB Benchmarks.
+0.0.2:
+* Restructuring for consistency and to match ML Benchmark Template updates.
+0.0.3:
+* Adding Learning Curve support.
 """
+
 from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
@@ -12,18 +17,23 @@
 import xgboost as xgb
 from ConfigSpace.hyperparameters import Hyperparameter
 
+from hpobench.util.rng_helper import get_rng
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
-__version__ = '0.0.1'
+__version__ = '0.0.3'
 
 
 class XGBoostBenchmark(MLBenchmark):
-    def __init__(self,
-                 task_id: int,
-                 rng: Union[np.random.RandomState, int, None] = None,
-                 valid_size: float = 0.33,
-                 data_path: Union[str, None] = None):
-        super(XGBoostBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+    """ Multi-multi-fidelity XGBoost Benchmark
+    """
+    def __init__(
+            self,
+            task_id: int,
+            valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
+            data_path: Union[str, None] = None
+    ):
+        super(XGBoostBenchmark, self).__init__(task_id, valid_size, rng, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -52,12 +62,16 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+            XGBoostBenchmark._get_fidelity_choices(
+                n_estimators_choice='variable', subsample_choice='variable'
+            )
         )
         return fidelity_space
 
     @staticmethod
-    def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+    def _get_fidelity_choices(
+            n_estimators_choice: str, subsample_choice: str
+    ) -> Tuple[Hyperparameter, Hyperparameter]:
 
         assert n_estimators_choice in ['fixed', 'variable']
         assert subsample_choice in ['fixed', 'variable']
@@ -74,28 +88,31 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         )
-
         n_estimators = fidelity1[n_estimators_choice]
         subsample = fidelity2[subsample_choice]
         return n_estimators, subsample
 
-    def init_model(self,
-                   config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
-        """ Function that returns the model initialized based on the configuration and fidelity
-        """
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
+        # initializing model
+        rng = self.rng if rng is None else get_rng(rng)
+        # xgb.XGBClassifier when trainied using the scikit-learn API of `fit`, requires
+        # random_state to be an integer and doesn't accept a RandomState
+        seed = rng.randint(1, 10**6)
+
         if isinstance(config, CS.Configuration):
             config = config.get_dictionary()
         if isinstance(fidelity, CS.Configuration):
             fidelity = fidelity.get_dictionary()
-
-        rng = rng if (rng is None or isinstance(rng, int)) else self.seed
         extra_args = dict(
             booster="gbtree",
             n_estimators=fidelity['n_estimators'],
             objective="binary:logistic",
-            random_state=rng,
+            random_state=seed,
             subsample=1
         )
         if self.n_classes > 2:
@@ -108,23 +125,48 @@ def init_model(self,
         )
         return model
 
+    def get_model_size(self, model: xgb.XGBClassifier) -> float:
+        """ Returns the total number of decision nodes in the sequence of Gradient Boosted trees
+
+        Parameters
+        ----------
+        model : xgb.XGBClassifier
+            Trained XGB model.
+
+        Returns
+        -------
+        float
+        """
+        nodes = model.get_booster().trees_to_dataframe().shape[0]
+        return nodes
+
 
 class XGBoostBenchmarkBB(XGBoostBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Black-box version of the XGBoostBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+            XGBoostBenchmark._get_fidelity_choices(
+                n_estimators_choice='fixed', subsample_choice='fixed'
+            )
         )
         return fidelity_space
 
 
 class XGBoostBenchmarkMF(XGBoostBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    """ Multi-fidelity version of the XGBoostBenchmark
+    """
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - ntrees
-            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
+            XGBoostBenchmark._get_fidelity_choices(
+                n_estimators_choice='variable', subsample_choice='fixed'
+            )
         )
         return fidelity_space
 
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/benchmarks/ml/xgboost_benchmark_old.py
deleted file mode 100644
index f8730f52..00000000
--- a/hpobench/benchmarks/ml/xgboost_benchmark_old.py
+++ /dev/null
@@ -1,430 +0,0 @@
-"""
-
-Changelog:
-==========
-0.0.3
-* New container release due to a general change in the communication between container and HPOBench.
-  Works with HPOBench >= v0.0.8
-
-0.0.2:
-* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf)
-    eta:                [1e-5, 1] (def: 0.3)    ->  [2**-10, 1] (def: 0.3)
-    min_child_weight:   [0,05, 10] (def: 1)     ->  [1, 2**7] (def: 1)
-    colsample_bytree:   [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    colsample_bylevel:  [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    reg_lambda:         [1e-5, 2] (def: 1)      ->  [2**-10, 2**10] (def: 1)
-    reg_alpha:          [1e-5, 2] (def: 1e-5)   ->  [2**-10, 2**10] (def: 1)
-    max_depth:          -                       ->  [1, 15] (def: 6)
-    subsample_per_it:   -                       ->  [0.01, 1] (def: 1)
-    [booster:            -                      ->  [gbtree, gblinear, dart] (def: gbtree)]  *)
-
-    *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class.
-
-* Increase the fidelity `n_estimators`
-    n_estimators        [2, 128] (def: 128)     ->  [1, 256] (def: 256)
-
-* Add class to optimize also the used booster method: (gbtree, gblinear or dart)
-    We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more
-    about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest).
-
-
-0.0.1:
-* First implementation of a XGBoost Benchmark.
-
-
-"""
-
-import logging
-import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
-import numpy as np
-import xgboost as xgb
-from sklearn import pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.impute import SimpleImputer
-from sklearn.metrics import accuracy_score, make_scorer
-from sklearn.preprocessing import OneHotEncoder
-
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.3'
-
-logger = logging.getLogger('XGBBenchmark')
-
-
-class XGBoostBenchmark(AbstractBenchmark):
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-
-        Parameters
-        ----------
-        task_id : int, None
-        n_threads  : int, None
-        rng : np.random.RandomState, int, None
-        """
-
-        super(XGBoostBenchmark, self).__init__(rng=rng)
-        self.n_threads = n_threads
-        self.task_id = task_id
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        # Determine the number of categories in the labels.
-        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
-        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
-        self.num_class = 1 if self.num_class == 2 else self.num_class
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost model
-        fidelity: Dict, None
-            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : trainings loss
-                fidelity : used fidelities in this evaluation
-        """
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_data_fraction = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_data_fraction = fidelity['dataset_fraction']
-
-        train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)]
-
-        model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration)
-        model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx])
-
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-        cost = time.time() - start
-
-        return {'function_value': float(val_loss),
-                'cost': cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}
-                }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model with a given configuration on both the train
-        and validation data set and evaluates the model on the test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : test loss
-            cost : time to train and evaluate the model
-            info : Dict
-                fidelity : used fidelities in this evaluation
-        """
-        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
-        if fidelity['dataset_fraction'] != default_dataset_fraction:
-            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
-                                      f'{default_dataset_fraction}')
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        # Impute potential nan values with the feature-
-        data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration)
-        model.fit(X=data, y=targets)
-
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-        cost = time.time() - start
-
-        return {'function_value': float(test_loss),
-                'cost': cost,
-                'info': {'fidelity': fidelity}}
-
-    @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the XGBoost Model
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        cs = CS.ConfigurationSpace(seed=seed)
-
-        cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True),
-            CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False),
-            CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True),
-            CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False)
-        ])
-
-        return cs
-
-    @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the XGBoost Benchmark
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
-
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-            CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
-        ])
-
-        return fidel_space
-
-    def get_meta_information(self) -> Dict:
-        """ Returns the meta information for the benchmark """
-        return {'name': 'XGBoost',
-                'references': ['@article{probst2019tunability,'
-                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
-                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
-                               'journal={J. Mach. Learn. Res.},'
-                               'volume={20},'
-                               'number={53},'
-                               'pages={1--32},'
-                               'year={2019}'
-                               '}'],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
-                        'xgboost_benchmark_old.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
-
-    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
-                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
-                      n_estimators: int, subsample_per_it: float) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(
-                 max_depth=max_depth,
-                 learning_rate=eta,
-                 min_child_weight=min_child_weight,
-                 colsample_bytree=colsample_bytree,
-                 colsample_bylevel=colsample_bylevel,
-                 reg_alpha=reg_alpha,
-                 reg_lambda=reg_lambda,
-                 n_estimators=n_estimators,
-                 objective=objective,
-                 n_jobs=self.n_threads,
-                 random_state=self.rng.randint(1, 100000),
-                 num_class=self.num_class,
-                 subsample=subsample_per_it))
-            ])
-        return clf
-
-
-class XGBoostExtendedBenchmark(XGBoostBenchmark):
-    """
-    Similar to XGBoostBenchmark but enables also the optimization of the used booster.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng)
-
-    @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = XGBoostBenchmark.get_configuration_space(seed)
-        hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'],
-                                                  default_value='gbtree')
-        cs.add_hyperparameter(hp_booster)
-
-        # XGBoost with 'gblinear' can not use some
-        # parameters. Exclude them from the configuration space by introducing a condition.
-        hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth',  'min_child_weight', 'subsample_per_it']
-
-        # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear."
-        conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps]
-        cs.add_conditions(conditions)
-        return cs
-
-    # noinspection PyMethodOverriding
-    # pylint: disable=arguments-differ
-    def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float,
-                      min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None,
-                      colsample_bylevel: float = None, subsample_per_it: float = None) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        configuration = dict(booster=booster,
-                             max_depth=max_depth,
-                             learning_rate=eta,
-                             min_child_weight=min_child_weight,
-                             colsample_bytree=colsample_bytree,
-                             colsample_bylevel=colsample_bylevel,
-                             reg_alpha=reg_alpha,
-                             reg_lambda=reg_lambda,
-                             n_estimators=n_estimators,
-                             objective=objective,
-                             n_jobs=self.n_threads,
-                             random_state=self.rng.randint(1, 100000),
-                             num_class=self.num_class,
-                             subsample=subsample_per_it)
-
-        configuration = {k: v for k, v in configuration.items() if v is not None}
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(**configuration))
-        ])
-        return clf
diff --git a/hpobench/config.py b/hpobench/config.py
index df16a2cf..bfed1be6 100644
--- a/hpobench/config.py
+++ b/hpobench/config.py
@@ -10,6 +10,9 @@
 root_logger = logging.getLogger("hpobench")
 
 
+CONTAINER_SOURCE = "oras://gitlab.tf.uni-freiburg.de:5050/mallik/hpo-bench-singularity-gitlab-ci"
+
+
 class HPOBenchConfig:
 
     def __init__(self):
@@ -64,8 +67,18 @@ def __init__(self):
 
         # Options for the singularity container
         self.socket_dir = Path(self.socket_dir).expanduser().absolute()
+
+        # os.getuid is only for posix os. Make it compatible with windows
+        # https://stackoverflow.com/questions/842059/is-there-a-portable-way-to-get-the-current-username-in-python
+        if os.name == 'nt':
+            import getpass
+            user_name = getpass.getuser()
+        else:
+            user_name = os.getuid()
+
+        self.container_dir = self.cache_dir / f'hpobench-{user_name}'
         self.container_dir = self.cache_dir / f'hpobench-{os.getuid()}'
-        self.container_source = 'oras://gitlab.tf.uni-freiburg.de:5050/muelleph/hpobench-registry'
+        self.container_source = CONTAINER_SOURCE
         self.pyro_connect_max_wait = 400
 
         # Read in the hpobenchrc file and set the default values if not specified
diff --git a/hpobench/container/benchmarks/ml/__init__.py b/hpobench/container/benchmarks/ml/__init__.py
index ed2ce40f..30b264a9 100644
--- a/hpobench/container/benchmarks/ml/__init__.py
+++ b/hpobench/container/benchmarks/ml/__init__.py
@@ -1,4 +1,3 @@
-from hpobench.container.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
 from hpobench.container.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
 from hpobench.container.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
 from hpobench.container.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
@@ -8,10 +7,11 @@
 from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
 
 
-__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
-           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
-           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
-           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
-           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
-           'TabularBenchmark',
-           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
+__all__ = [
+    'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+    'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+    'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+    'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+    'TabularBenchmark',
+    'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF'
+]
diff --git a/hpobench/container/benchmarks/ml/histgb_benchmark.py b/hpobench/container/benchmarks/ml/histgb_benchmark.py
index dc7af088..bf2ab690 100644
--- a/hpobench/container/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/container/benchmarks/ml/histgb_benchmark.py
@@ -4,29 +4,35 @@
 """ Benchmark for the HistGB Benchmarks from hpobench/benchmarks/ml_mmfb/histgb_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
+
 
 
 class HistGBBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(HistGBBenchmark, self).__init__(**kwargs)
 
 
 class HistGBBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(HistGBBenchmarkBB, self).__init__(**kwargs)
 
 
 class HistGBBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(HistGBBenchmarkMF, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/lr_benchmark.py b/hpobench/container/benchmarks/ml/lr_benchmark.py
index 979cda3e..0279763e 100644
--- a/hpobench/container/benchmarks/ml/lr_benchmark.py
+++ b/hpobench/container/benchmarks/ml/lr_benchmark.py
@@ -4,29 +4,34 @@
 """ Benchmark for the learning rate Benchmarks from hpobench/benchmarks/ml_mmfb/lr_benchmarks.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
 
 
 class LRBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(LRBenchmark, self).__init__(**kwargs)
 
 
 class LRBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(LRBenchmarkBB, self).__init__(**kwargs)
 
 
 class LRBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(LRBenchmarkMF, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/nn_benchmark.py b/hpobench/container/benchmarks/ml/nn_benchmark.py
index 04955e82..062850d2 100644
--- a/hpobench/container/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/container/benchmarks/ml/nn_benchmark.py
@@ -4,29 +4,34 @@
 """ Benchmark for the Neural Network Benchmarks from hpobench/benchmarks/ml_mmfb/nn_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
 
 
 class NNBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NNBenchmark, self).__init__(**kwargs)
 
 
 class NNBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NNBenchmarkBB, self).__init__(**kwargs)
 
 
 class NNBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NNBenchmarkMF, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/pybnn.py b/hpobench/container/benchmarks/ml/pybnn.py
index d9490e7f..618c0cee 100644
--- a/hpobench/container/benchmarks/ml/pybnn.py
+++ b/hpobench/container/benchmarks/ml/pybnn.py
@@ -4,35 +4,40 @@
 """ Benchmark for the pybnn Benchmark from hpobench/benchmarks/ml/pybnn.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "pybnn"
+container_version = get_container_version(container_name)
 
 
 class BNNOnToyFunction(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'BNNOnToyFunction')
-        kwargs['container_name'] = kwargs.get('container_name', 'pybnn')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(BNNOnToyFunction, self).__init__(**kwargs)
 
 
 class BNNOnBostonHousing(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'BNNOnBostonHousing')
-        kwargs['container_name'] = kwargs.get('container_name', 'pybnn')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(BNNOnBostonHousing, self).__init__(**kwargs)
 
 
 class BNNOnProteinStructure(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'BNNOnProteinStructure')
-        kwargs['container_name'] = kwargs.get('container_name', 'pybnn')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(BNNOnProteinStructure, self).__init__(**kwargs)
 
 
 class BNNOnYearPrediction(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'BNNOnYearPrediction')
-        kwargs['container_name'] = kwargs.get('container_name', 'pybnn')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(BNNOnYearPrediction, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/ml/rf_benchmark.py b/hpobench/container/benchmarks/ml/rf_benchmark.py
index a414349d..747d85c7 100644
--- a/hpobench/container/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/container/benchmarks/ml/rf_benchmark.py
@@ -4,29 +4,34 @@
 """ Benchmark for the Random Forest Benchmarks from hpobench/benchmarks/ml_mmfb/rf_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
 
 
 class RandomForestBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(RandomForestBenchmark, self).__init__(**kwargs)
 
 
 class RandomForestBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(RandomForestBenchmarkBB, self).__init__(**kwargs)
 
 
 class RandomForestBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(RandomForestBenchmarkMF, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/svm_benchmark.py b/hpobench/container/benchmarks/ml/svm_benchmark.py
index 7547a81a..204278cf 100644
--- a/hpobench/container/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/container/benchmarks/ml/svm_benchmark.py
@@ -4,29 +4,33 @@
 """ Benchmark for the SVM Benchmarks from hpobench/benchmarks/ml_mmfb/svm_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
 
 
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
+
 class SVMBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SVMBenchmark, self).__init__(**kwargs)
 
 
 class SVMBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SVMBenchmarkMF, self).__init__(**kwargs)
 
 
 class SVMBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SVMBenchmarkBB, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/svm_benchmark_old.py b/hpobench/container/benchmarks/ml/svm_benchmark_old.py
deleted file mode 100644
index 4955f057..00000000
--- a/hpobench/container/benchmarks/ml/svm_benchmark_old.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
-
-from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
-
-
-class SupportVectorMachine(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SupportVectorMachine')
-        kwargs['container_name'] = kwargs.get('container_name', 'svm_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
-        super(SupportVectorMachine, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/ml/tabular_benchmark.py b/hpobench/container/benchmarks/ml/tabular_benchmark.py
index 6d19953b..0428acc3 100644
--- a/hpobench/container/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/container/benchmarks/ml/tabular_benchmark.py
@@ -4,13 +4,18 @@
 """ Benchmark for the Tabular Benchmarks from hpobench/benchmarks/ml_mmfb/tabular_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_tabular_benchmarks"
+container_version = get_container_version(container_name)
 
 
 class TabularBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'TabularBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(TabularBenchmark, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark.py b/hpobench/container/benchmarks/ml/xgboost_benchmark.py
index c82ea606..43115f1c 100644
--- a/hpobench/container/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/container/benchmarks/ml/xgboost_benchmark.py
@@ -4,38 +4,45 @@
 """ Benchmark for the XGB Benchmarks from hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "ml_mmfb"
+container_version = get_container_version(container_name)
 
 
 class XGBoostBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(XGBoostBenchmark, self).__init__(**kwargs)
 
 
 class XGBoostBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(XGBoostBenchmarkBB, self).__init__(**kwargs)
 
 
 class XGBoostBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(XGBoostBenchmarkMF, self).__init__(**kwargs)
 
 
 class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
 
 
-__all__ = ['XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
+__all__ = [
+    'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF', 'XGBoostSearchSpace3Benchmark'
+]
diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py
deleted file mode 100644
index df475748..00000000
--- a/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
-
-from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
-
-
-class XGBoostBenchmark(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
-        super(XGBoostBenchmark, self).__init__(**kwargs)
-
-
-class XGBoostExtendedBenchmark(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostExtendedBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
-        super(XGBoostExtendedBenchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/nas/nasbench_101.py b/hpobench/container/benchmarks/nas/nasbench_101.py
index 7984d786..f68fc4fd 100644
--- a/hpobench/container/benchmarks/nas/nasbench_101.py
+++ b/hpobench/container/benchmarks/nas/nasbench_101.py
@@ -4,27 +4,32 @@
 """ Benchmark for the Tabular Benchmark from hpobench/benchmarks/nas/nasbench_101.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "nasbench_101"
+container_version = get_container_version(container_name)
 
 
 class NASCifar10ABenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10ABenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASCifar10ABenchmark, self).__init__(**kwargs)
 
 
 class NASCifar10BBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10BBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASCifar10BBenchmark, self).__init__(**kwargs)
 
 
 class NASCifar10CBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10CBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASCifar10CBenchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/nas/nasbench_1shot1.py b/hpobench/container/benchmarks/nas/nasbench_1shot1.py
index a88dcf9a..bd1cd675 100644
--- a/hpobench/container/benchmarks/nas/nasbench_1shot1.py
+++ b/hpobench/container/benchmarks/nas/nasbench_1shot1.py
@@ -4,27 +4,32 @@
 """ Benchmark for the nasbench 1shot1 benchmarks from hpobench/benchmarks/nas/nasbench_1shot1.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "nasbench_1shot1"
+container_version = get_container_version(container_name)
 
 
 class NASBench1shot1SearchSpace1Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace1Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASBench1shot1SearchSpace1Benchmark, self).__init__(**kwargs)
 
 
 class NASBench1shot1SearchSpace2Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace2Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASBench1shot1SearchSpace2Benchmark, self).__init__(**kwargs)
 
 
 class NASBench1shot1SearchSpace3Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NASBench1shot1SearchSpace3Benchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/nas/nasbench_201.py b/hpobench/container/benchmarks/nas/nasbench_201.py
index 5eb9c68f..6fb849b0 100644
--- a/hpobench/container/benchmarks/nas/nasbench_201.py
+++ b/hpobench/container/benchmarks/nas/nasbench_201.py
@@ -4,53 +4,58 @@
 """ Benchmark for the NasBench201 Benchmark from hpobench/benchmarks/nas/nasbench_201.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "nasbench_201"
+container_version = get_container_version(container_name)
 
 
 class Cifar10ValidNasBench201Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar10ValidNasBench201Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(Cifar10ValidNasBench201Benchmark, self).__init__(**kwargs)
 
 
 class Cifar100NasBench201Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar100NasBench201Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(Cifar100NasBench201Benchmark, self).__init__(**kwargs)
 
 
 class ImageNetNasBench201Benchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ImageNetNasBench201Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ImageNetNasBench201Benchmark, self).__init__(**kwargs)
 
 
 class Cifar10ValidNasBench201BenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar10ValidNasBench201BenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(Cifar10ValidNasBench201BenchmarkOriginal, self).__init__(**kwargs)
 
 
 class Cifar100NasBench201BenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar100NasBench201BenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(Cifar100NasBench201BenchmarkOriginal, self).__init__(**kwargs)
 
 
 class ImageNetNasBench201BenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ImageNetNasBench201BenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ImageNetNasBench201BenchmarkOriginal, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/nas/tabular_benchmarks.py b/hpobench/container/benchmarks/nas/tabular_benchmarks.py
index c213c249..41da8ecc 100644
--- a/hpobench/container/benchmarks/nas/tabular_benchmarks.py
+++ b/hpobench/container/benchmarks/nas/tabular_benchmarks.py
@@ -4,14 +4,19 @@
 """ Benchmark for the Tabular Benchmark from hpobench/benchmarks/nas/tabular_benchmarks.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "tabular_benchmarks"
+container_version = get_container_version(container_name)
 
 
 class SliceLocalizationBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SliceLocalizationBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SliceLocalizationBenchmark, self).__init__(**kwargs)
 
 
@@ -19,8 +24,8 @@ class ProteinStructureBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ProteinStructureBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ProteinStructureBenchmark, self).__init__(**kwargs)
 
 
@@ -28,8 +33,8 @@ class NavalPropulsionBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NavalPropulsionBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NavalPropulsionBenchmark, self).__init__(**kwargs)
 
 
@@ -37,8 +42,8 @@ class ParkinsonsTelemonitoringBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParkinsonsTelemonitoringBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParkinsonsTelemonitoringBenchmark, self).__init__(**kwargs)
 
 
@@ -46,8 +51,8 @@ class SliceLocalizationBenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SliceLocalizationBenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SliceLocalizationBenchmarkOriginal, self).__init__(**kwargs)
 
 
@@ -55,8 +60,8 @@ class ProteinStructureBenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ProteinStructureBenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ProteinStructureBenchmarkOriginal, self).__init__(**kwargs)
 
 
@@ -64,8 +69,8 @@ class NavalPropulsionBenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NavalPropulsionBenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(NavalPropulsionBenchmarkOriginal, self).__init__(**kwargs)
 
 
@@ -73,8 +78,8 @@ class ParkinsonsTelemonitoringBenchmarkOriginal(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/fcnet_tabular_benchmarks'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParkinsonsTelemonitoringBenchmarkOriginal')
-        kwargs['container_name'] = kwargs.get('container_name', 'tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.5')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParkinsonsTelemonitoringBenchmarkOriginal, self).__init__(**kwargs)
 
 
diff --git a/hpobench/container/benchmarks/od/od_benchmarks.py b/hpobench/container/benchmarks/od/od_benchmarks.py
index e5633af2..1ddbdf6f 100644
--- a/hpobench/container/benchmarks/od/od_benchmarks.py
+++ b/hpobench/container/benchmarks/od/od_benchmarks.py
@@ -4,14 +4,19 @@
 """ Benchmark for OCSVM and outlier detection """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "outlier_detection"
+container_version = get_container_version(container_name)
 
 
 class ODAutoencoder(AbstractBenchmarkClient):
     def __init__(self, dataset_name: str, **kwargs):
         kwargs['dataset_name'] = dataset_name
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ODAutoencoder')
-        kwargs['container_name'] = kwargs.get('container_name', 'outlier_detection')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ODAutoencoder, self).__init__(**kwargs)
 
 
@@ -19,8 +24,8 @@ class ODKernelDensityEstimation(AbstractBenchmarkClient):
     def __init__(self, dataset_name: str, **kwargs):
         kwargs['dataset_name'] = dataset_name
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ODKernelDensityEstimation')
-        kwargs['container_name'] = kwargs.get('container_name', 'outlier_detection')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ODKernelDensityEstimation, self).__init__(**kwargs)
 
 
@@ -28,6 +33,6 @@ class ODOneClassSupportVectorMachine(AbstractBenchmarkClient):
     def __init__(self, dataset_name: str, **kwargs):
         kwargs['dataset_name'] = dataset_name
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ODOneClassSupportVectorMachine')
-        kwargs['container_name'] = kwargs.get('container_name', 'outlier_detection')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ODOneClassSupportVectorMachine, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/rl/cartpole.py b/hpobench/container/benchmarks/rl/cartpole.py
index 8998c129..a5650af8 100644
--- a/hpobench/container/benchmarks/rl/cartpole.py
+++ b/hpobench/container/benchmarks/rl/cartpole.py
@@ -4,19 +4,24 @@
 """ Benchmark for the Cartpole Benchmark from hpobench/benchmarks/rl/cartpole.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "cartpole"
+container_version = get_container_version(container_name)
 
 
 class CartpoleReduced(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'CartpoleReduced')
-        kwargs['container_name'] = kwargs.get('container_name', 'cartpole')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(CartpoleReduced, self).__init__(**kwargs)
 
 
 class CartpoleFull(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'CartpoleFull')
-        kwargs['container_name'] = kwargs.get('container_name', 'cartpole')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(CartpoleFull, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/rl/learna_benchmark.py b/hpobench/container/benchmarks/rl/learna_benchmark.py
index e074a291..f91c04fc 100644
--- a/hpobench/container/benchmarks/rl/learna_benchmark.py
+++ b/hpobench/container/benchmarks/rl/learna_benchmark.py
@@ -4,14 +4,19 @@
 """ Benchmark for the learna benchmark from hpobench/benchmarks/rl/learna_benchmarks.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "learna_benchmark"
+container_version = get_container_version(container_name)
 
 
 class Learna(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/learna/data'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Learna')
-        kwargs['container_name'] = kwargs.get('container_name', 'learna_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(Learna, self).__init__(**kwargs)
 
 
@@ -19,6 +24,6 @@ class MetaLearna(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['data_path'] = '/home/learna/data'
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'MetaLearna')
-        kwargs['container_name'] = kwargs.get('container_name', 'learna_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(MetaLearna, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/surrogates/paramnet_benchmark.py b/hpobench/container/benchmarks/surrogates/paramnet_benchmark.py
index 24c04b30..cf8ab7b8 100644
--- a/hpobench/container/benchmarks/surrogates/paramnet_benchmark.py
+++ b/hpobench/container/benchmarks/surrogates/paramnet_benchmark.py
@@ -5,195 +5,200 @@
 """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "paramnet"
+container_version = get_container_version(container_name)
 
 
 class ParamNetAdultOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetAdultOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetAdultOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetAdultOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetAdultOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetAdultOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedAdultOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedAdultOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedAdultOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedAdultOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedAdultOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedAdultOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetHiggsOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetHiggsOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetHiggsOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetHiggsOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetHiggsOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetHiggsOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedHiggsOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedHiggsOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedHiggsOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedHiggsOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedHiggsOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedHiggsOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetLetterOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetLetterOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetLetterOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetLetterOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetLetterOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetLetterOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedLetterOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedLetterOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedLetterOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedLetterOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedLetterOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedLetterOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetMnistOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetMnistOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetMnistOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetMnistOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetMnistOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetMnistOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedMnistOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedMnistOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedMnistOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedMnistOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedMnistOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedMnistOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetOptdigitsOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetOptdigitsOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetOptdigitsOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetOptdigitsOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetOptdigitsOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetOptdigitsOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedOptdigitsOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedOptdigitsOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedOptdigitsOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedOptdigitsOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedOptdigitsOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedOptdigitsOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetPokerOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetPokerOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetPokerOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetPokerOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetPokerOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetPokerOnTimeBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedPokerOnStepsBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedPokerOnStepsBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedPokerOnStepsBenchmark, self).__init__(**kwargs)
 
 
 class ParamNetReducedPokerOnTimeBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ParamNetReducedPokerOnTimeBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'paramnet')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(ParamNetReducedPokerOnTimeBenchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/surrogates/svm_benchmark.py b/hpobench/container/benchmarks/surrogates/svm_benchmark.py
index 0b0300b4..715ad2e6 100644
--- a/hpobench/container/benchmarks/surrogates/svm_benchmark.py
+++ b/hpobench/container/benchmarks/surrogates/svm_benchmark.py
@@ -5,11 +5,16 @@
 """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+from hpobench.util.container_utils import get_container_version
+
+
+container_name = "surrogate_svm"
+container_version = get_container_version(container_name)
 
 
 class SurrogateSVMBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SurrogateSVMBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'surrogate_svm')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.2')
+        kwargs['container_name'] = kwargs.get('container_name', container_name)
+        kwargs['latest'] = kwargs.get('container_tag', container_version)
         super(SurrogateSVMBenchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/client_abstract_benchmark.py b/hpobench/container/client_abstract_benchmark.py
index 30c068a9..edc950b1 100644
--- a/hpobench/container/client_abstract_benchmark.py
+++ b/hpobench/container/client_abstract_benchmark.py
@@ -35,6 +35,7 @@
 from oslo_concurrency import lockutils
 
 import hpobench.config
+from hpobench.config import config_file, CONTAINER_SOURCE
 from hpobench import __version__
 from hpobench.util.container_utils import BenchmarkEncoder, BenchmarkDecoder
 
@@ -181,8 +182,9 @@ def load_benchmark(self, benchmark_name: str, container_name: str, container_sou
         self.container_source = container_source or self.config.container_source
         self.container_dir = Path(self.config.container_dir)
 
-        if (self.container_source.startswith('oras://gitlab.tf.uni-freiburg.de:5050/muelleph/hpobench-registry')
-                and container_tag == 'latest'):
+        if (
+            self.container_source.startswith(CONTAINER_SOURCE) and container_tag == 'latest'
+        ):
             assert 'latest' in kwargs, 'If the container is hosted on the gitlab registry, make sure that in the ' \
                                        'container init, the field \'latest\' is set.'
 
@@ -223,7 +225,7 @@ def download_container(container_dir, container_name, container_source, containe
                     # Currently, we can't see the available container tags on gitlab. Therefore, we create for each
                     # "tag" a new entry in the registry. This might change in the future. But as long as we don't have
                     # a fix for this, we need to map the container tag differently.
-                    if container_source.startswith('oras://gitlab.tf.uni-freiburg.de:5050/muelleph/hpobench-registry'):
+                    if container_source.startswith(CONTAINER_SOURCE):
                         cmd += f'{container_source}/{container_name.lower()}/{container_tag}:latest'
                     else:
                         cmd += f'{container_source}/{container_name.lower()}:{container_tag}'
diff --git a/hpobench/container/recipes/Singularity.template b/hpobench/container/recipes/Singularity.template
index 5880d688..614870fd 100644
--- a/hpobench/container/recipes/Singularity.template
+++ b/hpobench/container/recipes/Singularity.template
@@ -22,7 +22,7 @@ VERSION v0.0.1
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
     && echo "Please never push a recipe that checks out any other branch than development or master" \
-    && git checkout development \
+    && git checkout develop \
     && echo "Here you can install extra requirements additional to singularity" \
     && pip install .[<new_benchmark>] \
     && echo "Please don't touch the following lines"
diff --git a/hpobench/container/recipes/ml/Singularity.PyBNN b/hpobench/container/recipes/ml/Singularity.PyBNN
index 00db5a62..f86bc44e 100644
--- a/hpobench/container/recipes/ml/Singularity.PyBNN
+++ b/hpobench/container/recipes/ml/Singularity.PyBNN
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -11,7 +11,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[pybnn] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/ml/Singularity.SupportVectorMachine b/hpobench/container/recipes/ml/Singularity.SupportVectorMachine
index 7e1be914..72d6c991 100644
--- a/hpobench/container/recipes/ml/Singularity.SupportVectorMachine
+++ b/hpobench/container/recipes/ml/Singularity.SupportVectorMachine
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -11,7 +11,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[svm] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/ml/Singularity.XGBoostBenchmark b/hpobench/container/recipes/ml/Singularity.XGBoostBenchmark
index 86df6fb8..b00c74c9 100644
--- a/hpobench/container/recipes/ml/Singularity.XGBoostBenchmark
+++ b/hpobench/container/recipes/ml/Singularity.XGBoostBenchmark
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -11,7 +11,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[xgboost] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/ml/Singularity.ml_mmfb b/hpobench/container/recipes/ml/Singularity.ml_mmfb
index cd8b3e6e..aa475250 100644
--- a/hpobench/container/recipes/ml/Singularity.ml_mmfb
+++ b/hpobench/container/recipes/ml/Singularity.ml_mmfb
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.8-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -12,7 +12,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout development \
+    && git checkout develop \
     && pip install ".[ml_mfbb]" \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark b/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
index 16f92de8..b94d0aad 100644
--- a/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
+++ b/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.8-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -12,7 +12,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout development \
+    && git checkout develop \
     && pip install ".[ml_tabular_benchmarks]" \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/nas/Singularity.TabularBenchmarks b/hpobench/container/recipes/nas/Singularity.TabularBenchmarks
index 72bfdd5c..16577ea5 100644
--- a/hpobench/container/recipes/nas/Singularity.TabularBenchmarks
+++ b/hpobench/container/recipes/nas/Singularity.TabularBenchmarks
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -18,7 +18,7 @@ VERSION v0.0.1
     && pip install git+https://github.com/automl/nas_benchmarks.git \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[tabular_benchmarks] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/nas/Singularity.nasbench_101 b/hpobench/container/recipes/nas/Singularity.nasbench_101
index 11341851..d460462e 100644
--- a/hpobench/container/recipes/nas/Singularity.nasbench_101
+++ b/hpobench/container/recipes/nas/Singularity.nasbench_101
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -14,7 +14,7 @@ VERSION v0.0.1
     && pip install git+https://github.com/automl/nas_benchmarks.git \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[nasbench_101] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/nas/Singularity.nasbench_1shot1 b/hpobench/container/recipes/nas/Singularity.nasbench_1shot1
index 04be18f6..2ccf3d0b 100644
--- a/hpobench/container/recipes/nas/Singularity.nasbench_1shot1
+++ b/hpobench/container/recipes/nas/Singularity.nasbench_1shot1
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %environment
@@ -18,7 +18,7 @@ VERSION v0.0.1
     && git clone https://github.com/automl/nasbench-1shot1.git \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[nasbench_1shot1] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/nas/Singularity.nasbench_201 b/hpobench/container/recipes/nas/Singularity.nasbench_201
index 4f4cdcf2..e117df59 100644
--- a/hpobench/container/recipes/nas/Singularity.nasbench_201
+++ b/hpobench/container/recipes/nas/Singularity.nasbench_201
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -12,7 +12,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install . \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/od/Singularity.ODBenchmarks b/hpobench/container/recipes/od/Singularity.ODBenchmarks
index 016b2611..a2c7fb3a 100644
--- a/hpobench/container/recipes/od/Singularity.ODBenchmarks
+++ b/hpobench/container/recipes/od/Singularity.ODBenchmarks
@@ -1,8 +1,8 @@
 Bootstrap: docker
-From: python:3.7-slim
+From: python:3.10-slim
 
 %labels
-MAINTAINER sass@tnt.uni-hannover.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -11,7 +11,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout development \
+    && git checkout develop \
     && pip install .[outlier_detection] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/od/Singularity.ODKernelDensityEstimation b/hpobench/container/recipes/od/Singularity.ODKernelDensityEstimation
index a71e61fc..0cee7418 100644
--- a/hpobench/container/recipes/od/Singularity.ODKernelDensityEstimation
+++ b/hpobench/container/recipes/od/Singularity.ODKernelDensityEstimation
@@ -11,7 +11,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout development \
+    && git checkout develop \
     && pip install .[outlier_detection] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/rl/Singularity.Cartpole b/hpobench/container/recipes/rl/Singularity.Cartpole
index e4e2cffb..0c659978 100644
--- a/hpobench/container/recipes/rl/Singularity.Cartpole
+++ b/hpobench/container/recipes/rl/Singularity.Cartpole
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -13,7 +13,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install .[cartpole] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/rl/Singularity.learnaBenchmark b/hpobench/container/recipes/rl/Singularity.learnaBenchmark
index e52a26c2..d43592c3 100644
--- a/hpobench/container/recipes/rl/Singularity.learnaBenchmark
+++ b/hpobench/container/recipes/rl/Singularity.learnaBenchmark
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -28,7 +28,7 @@ VERSION v0.0.1
     && cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && ../learna/thirdparty/miniconda/miniconda/envs/learna/bin/python -m pip install . \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/surrogates/Singularity.ParamnetBenchmark b/hpobench/container/recipes/surrogates/Singularity.ParamnetBenchmark
index 026c663c..5937abf4 100644
--- a/hpobench/container/recipes/surrogates/Singularity.ParamnetBenchmark
+++ b/hpobench/container/recipes/surrogates/Singularity.ParamnetBenchmark
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -12,7 +12,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install --upgrade .[paramnet] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/container/recipes/surrogates/Singularity.SupportVectorMachine b/hpobench/container/recipes/surrogates/Singularity.SupportVectorMachine
index 2b2a0d86..2dd45d8b 100644
--- a/hpobench/container/recipes/surrogates/Singularity.SupportVectorMachine
+++ b/hpobench/container/recipes/surrogates/Singularity.SupportVectorMachine
@@ -2,7 +2,7 @@ Bootstrap: docker
 From: python:3.7-slim
 
 %labels
-MAINTAINER muelleph@cs.uni-freiburg.de
+MAINTAINER mallik@cs.uni-freiburg.de
 VERSION v0.0.1
 
 %post
@@ -14,7 +14,7 @@ VERSION v0.0.1
     cd /home \
     && git clone https://github.com/automl/HPOBench.git \
     && cd HPOBench \
-    && git checkout master \
+    && git checkout develop \
     && pip install --upgrade .[paramnet] \
     && cd / \
     && mkdir /var/lib/hpobench/ \
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index 3c6fcdaf..94aadcf8 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -33,25 +33,34 @@ class MLBenchmark(AbstractBenchmark):
     def __init__(
             self,
             task_id: int,
-            rng: Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
+            rng: Union[np.random.RandomState, int, None] = None,
             data_path: Union[str, Path, None] = None,
             global_seed: int = 1
     ):
+        """ Base template for the ML multi-fidelity benchmarks.
+
+        Parameters
+        ----------
+        task_id : int
+            A valid OpenML Task ID.
+        valid_size : float
+            The fraction of training set to be used as validation split.
+        rng : np.random.RandomState, int (optional)
+            The random seed that will be passed to the ML model if not explicitly passed.
+        data_path : str, Path (optional)
+            The path from where the training-validation-testing splits may be loaded.
+        global_seed : int
+            The fixed global seed that is used for creating validation splits if not available.
+        """
         super(MLBenchmark, self).__init__(rng=rng)
 
-        if isinstance(rng, int):
-            self.seed = rng
-        else:
-            self.seed = self.rng.randint(1, 10**6)
-
         self.global_seed = global_seed  # used for fixed training-validation splits
 
         self.task_id = task_id
         self.valid_size = valid_size
-        self.scorers = dict()
-        for k, v in metrics.items():
-            self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
+        self.scorers = metrics
+        self.scorer_args = metrics_kwargs
 
         if data_path is None:
             from hpobench import config_file
@@ -59,7 +68,7 @@ def __init__(
 
         self.data_path = Path(data_path)
 
-        dm = OpenMLDataManager(task_id, valid_size, data_path, global_seed)
+        dm = OpenMLDataManager(self.task_id, self.valid_size, self.data_path, self.global_seed)
         dm.load()
 
         # Data variables
@@ -77,10 +86,6 @@ def __init__(
         self.lower_bound_train_size = dm.lower_bound_train_size
         self.n_classes = dm.n_classes
 
-        # Observation and fidelity spaces
-        self.fidelity_space = self.get_fidelity_space(self.seed)
-        self.configuration_space = self.get_configuration_space(self.seed)
-
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
@@ -90,32 +95,32 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
         """
         raise NotImplementedError()
 
     def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
+        """ Returns the meta information for the benchmark
+        """
         return {
             'name': 'Support Vector Machine',
             'shape of train data': self.train_X.shape,
             'shape of test data': self.test_X.shape,
             'shape of valid data': self.valid_X.shape,
-            'initial random seed': self.seed,
+            'initial random seed': self.rng,
             'task_id': self.task_id
         }
 
-    def init_model(self, config: Union[CS.Configuration, Dict],
-                   fidelity: Union[CS.Configuration, Dict, None] = None,
-                   rng: Union[int, np.random.RandomState, None] = None):
+    def get_model_size(self, model):
+        """ Returns a custom model size specific to the ML model, if applicable
+        """
+        raise NotImplementedError
+
+    def init_model(
+            self,
+            config: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            rng: Union[int, np.random.RandomState, None] = None
+    ):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         raise NotImplementedError()
@@ -134,18 +139,74 @@ def get_fidelity(self, size: Union[int, None] = None):
             return self.fidelity_space.sample_configuration()
         return [self.fidelity_space.sample_configuration() for i in range(size)]
 
-    def shuffle_data_idx(self, train_idx: Iterable = None, rng: Union[np.random.RandomState, None] = None) -> Iterable:
+    def shuffle_data_idx(
+            self, train_idx: Iterable = None, rng: Union[np.random.RandomState, None] = None
+    ) -> Iterable:
         rng = self.rng if rng is None else rng
         train_idx = self.train_idx if train_idx is None else train_idx
         rng.shuffle(train_idx)
         return train_idx
 
-    def _train_objective(self,
-                         config: Dict,
-                         fidelity: Dict,
-                         shuffle: bool,
-                         rng: Union[np.random.RandomState, int, None] = None,
-                         evaluation: Union[str, None] = "valid"):
+    def _get_lc_spacing(self, max_iter, k):
+        """ Creates an integer sequence to record Learning Curves for every k iteration.
+
+        Designed to include the maximum iteration. A k-spaced iteration sequence may not include
+        the endpoint implicitly.
+        """
+        assert k > 0, "Spacing needs to be at >=1"
+        assert k < max_iter, "Spacing should be in {1, 2, ..., max_iter-1}"
+        spacing = np.arange(0, max_iter + 1, step=k).tolist()
+        spacing = spacing[1:]  # eliminating 0
+        if spacing[-1] != max_iter:
+            spacing.append(max_iter)
+        return spacing
+
+    def _train_objective(
+            self,
+            config: Dict,
+            fidelity: Dict,
+            shuffle: bool,
+            rng: Union[np.random.RandomState, int, None] = None,
+            evaluation: Union[str, None] = "valid",
+            record_stats: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ):
+        """Function that instantiates a 'config' on a 'fidelity' and trains it
+
+        The ML model is instantiated and trained on the training split. Optionally, the model is
+        evaluated on the training set.
+
+        Parameters
+        ----------
+        config : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        evaluation : str (optional)
+            If "valid", the ML model is trained on the training set alone.
+            If "test", the ML model is trained on the training + validation sets.
+        record_stats : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
+        """
+        if get_learning_curve:
+            raise NotImplementedError(
+                "Need to implement partial or intermediate training to record Learning curves"
+            )
+        learning_curves = None
+        lc_time = None
 
         if rng is not None:
             rng = get_rng(rng, self.rng)
@@ -154,26 +215,32 @@ def _train_objective(self,
         model = self.init_model(config, fidelity, rng)
 
         # preparing data
-        if eval == "valid":
+        if evaluation == "valid":
             train_X = self.train_X
             train_y = self.train_y
-            train_idx = self.train_idx
-        else:
+        elif evaluation == "test":
             train_X = np.vstack((self.train_X, self.valid_X))
             train_y = pd.concat((self.train_y, self.valid_y))
-            train_idx = np.arange(len(train_X))
+        else:
+            raise ValueError("{} not in ['valid', 'test']".format(evaluation))
+        train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx
 
         # shuffling data
         if shuffle:
             train_idx = self.shuffle_data_idx(train_idx, rng)
-            train_X = train_X.iloc[train_idx]
+            if isinstance(train_idx, np.ndarray):
+                train_X = train_X[train_idx]
+            else:
+                train_X = train_X.iloc[train_idx]
             train_y = train_y.iloc[train_idx]
 
         # subsample here:
         # application of the other fidelity to the dataset that the model interfaces
+        # carried over from previous HPOBench code that borrowed from FABOLAS' SVM
+        lower_bound_lim = 1.0 / 512.0
         if self.lower_bound_train_size is None:
             self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
-            self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
+            self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size))
         subsample = np.max((fidelity['subsample'], self.lower_bound_train_size))
         train_idx = self.rng.choice(
             np.arange(len(train_X)), size=int(
@@ -184,102 +251,209 @@ def _train_objective(self,
         start = time.time()
         model.fit(train_X[train_idx], train_y.iloc[train_idx])
         model_fit_time = time.time() - start
+        # model inference
+        inference_time = 0.0
+        # can optionally not record evaluation metrics on training set to save compute
+        if record_stats:
+            start = time.time()
+            pred_train = model.predict(train_X)
+            inference_time = time.time() - start
         # computing statistics on training data
         scores = dict()
         score_cost = dict()
         for k, v in self.scorers.items():
             scores[k] = 0.0
             score_cost[k] = 0.0
-            if evaluation == "test":
-                _start = time.time()
-                scores[k] = v(model, train_X, train_y)
-                score_cost[k] = time.time() - _start
+            _start = time.time()
+            if record_stats:
+                scores[k] = v(train_y, pred_train, **self.scorer_args[k])
+            score_cost[k] = time.time() - _start + inference_time
         train_loss = 1 - scores["acc"]
-        return model, model_fit_time, train_loss, scores, score_cost
+        return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time
 
     # pylint: disable=arguments-differ
     @AbstractBenchmark.check_parameters
-    def objective_function(self,
-                           configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None,
-                           **kwargs) -> Dict:
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            record_train: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
+
+        The ML model is trained on the training split, and evaluated on the valid and test splits.
+
+        Parameters
+        ----------
+        configuration : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        record_train : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
-            configuration, fidelity, shuffle, rng, evaluation="val"
-        )
+        # obtaining model and training statistics
+        model, model_fit_time, train_loss, train_scores, train_score_cost, lcs, lc_time = \
+            self._train_objective(
+                configuration, fidelity, shuffle, rng,
+                evaluation="valid", record_stats=record_train,
+                get_learning_curve=get_learning_curve, lc_every_k=lc_every_k
+            )
+        model_size = self.get_model_size(model)
+
+        # model inference on validation set
+        start = time.time()
+        pred_val = model.predict(self.valid_X)
+        val_inference_time = time.time() - start
         val_scores = dict()
         val_score_cost = dict()
         for k, v in self.scorers.items():
+            val_scores[k] = 0.0
+            val_score_cost[k] = 0.0
             _start = time.time()
-            val_scores[k] = v(model, self.valid_X, self.valid_y)
-            val_score_cost[k] = time.time() - _start
+            val_scores[k] = v(self.valid_y, pred_val, **self.scorer_args[k])
+            val_score_cost[k] = time.time() - _start + val_inference_time
         val_loss = 1 - val_scores["acc"]
 
+        # model inference on test set
+        start = time.time()
+        pred_test = model.predict(self.test_X)
+        test_inference_time = time.time() - start
         test_scores = dict()
         test_score_cost = dict()
         for k, v in self.scorers.items():
+            test_scores[k] = 0.0
+            test_score_cost[k] = 0.0
             _start = time.time()
-            test_scores[k] = v(model, self.test_X, self.test_y)
-            test_score_cost[k] = time.time() - _start
+            test_scores[k] = v(self.test_y, pred_test, **self.scorer_args[k])
+            test_score_cost[k] = time.time() - _start + test_inference_time
         test_loss = 1 - test_scores["acc"]
 
+        fidelity = fidelity.get_dictionary() if isinstance(fidelity, CS.Configuration) else fidelity
+        configuration = configuration.get_dictionary() \
+            if isinstance(configuration, CS.Configuration) else configuration
+
         info = {
             'train_loss': train_loss,
             'val_loss': val_loss,
             'test_loss': test_loss,
             'model_cost': model_fit_time,
+            'model_size': model_size,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
             'val_scores': val_scores,
             'val_costs': val_score_cost,
             'test_scores': test_scores,
             'test_costs': test_score_cost,
+            'learning_curves': lcs,
+            'learning_curves_cost': lc_time,
+            'learning_curves_spacing': lc_every_k,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity,
             'config': configuration,
         }
 
         return {
-            'function_value': info['val_loss'],
-            'cost': model_fit_time + info['val_costs']['acc'],
+            'function_value': float(info['val_loss']),
+            'cost': float(model_fit_time + info['val_costs']['acc']),
             'info': info
         }
 
     # pylint: disable=arguments-differ
     @AbstractBenchmark.check_parameters
-    def objective_function_test(self,
-                                configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None,
-                                **kwargs) -> Dict:
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            record_train: bool = False,
+            get_learning_curve: bool = False,
+            lc_every_k: int = 1,
+            **kwargs
+    ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
+
+        The ML model is trained on the training+valid split, and evaluated on the test split.
+
+        Parameters
+        ----------
+        configuration : CS.Configuration, Dict
+            The hyperparameter configuration.
+        fidelity : CS.Configuration, Dict
+            The fidelity configuration.
+        shuffle : bool (optional)
+            If True, shuffles the training split before fitting the ML model.
+        rng : np.random.RandomState, int (optional)
+            The random seed passed to the ML model and if applicable, used for shuffling the data
+            and subsampling the dataset fraction.
+        record_train : bool (optional)
+            If True, records the evaluation metrics of the trained ML model on the training set.
+            This is set to False by default to reduce overall compute time.
+        get_learning_curve : bool (optional)
+            If True, records the learning curve using partial_fit or warm starting, if applicable.
+            This is set to False by default to reduce overall compute time.
+        lc_every_k : int (optional)
+            If True, records the learning curve after every k iterations.
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
-            configuration, fidelity, shuffle, rng, evaluation="test"
-        )
+        # obtaining model and training statistics
+        model, model_fit_time, train_loss, train_scores, train_score_cost, lcs, lc_time = \
+            self._train_objective(
+                configuration, fidelity, shuffle, rng,
+                evaluation="test", record_stats=record_train,
+                get_learning_curve=get_learning_curve, lc_every_k=lc_every_k
+            )
+        model_size = self.get_model_size(model)
+
+        # model inference on test set
+        start = time.time()
+        pred_test = model.predict(self.test_X)
+        test_inference_time = time.time() - start
         test_scores = dict()
         test_score_cost = dict()
         for k, v in self.scorers.items():
+            test_scores[k] = 0.0
+            test_score_cost[k] = 0.0
             _start = time.time()
-            test_scores[k] = v(model, self.test_X, self.test_y)
-            test_score_cost[k] = time.time() - _start
+            test_scores[k] = v(self.test_y, pred_test, **self.scorer_args[k])
+            test_score_cost[k] = time.time() - _start + test_inference_time
         test_loss = 1 - test_scores["acc"]
 
+        fidelity = fidelity.get_dictionary() if isinstance(fidelity, CS.Configuration) else fidelity
+        configuration = configuration.get_dictionary() \
+            if isinstance(configuration, CS.Configuration) else configuration
+
         info = {
             'train_loss': train_loss,
             'val_loss': None,
             'test_loss': test_loss,
             'model_cost': model_fit_time,
+            'model_size': model_size,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
-            'val_scores': dict(),
-            'val_costs': dict(),
+            'val_scores': None,
+            'val_costs': None,
             'test_scores': test_scores,
             'test_costs': test_score_cost,
+            'learning_curves': lcs,
+            'learning_curves_cost': lc_time,
+            'learning_curves_spacing': lc_every_k,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity,
             'config': configuration,
diff --git a/hpobench/util/container_map.yaml b/hpobench/util/container_map.yaml
new file mode 100644
index 00000000..b66fd44f
--- /dev/null
+++ b/hpobench/util/container_map.yaml
@@ -0,0 +1,45 @@
+# ML Benchmarks
+ml_tabular_benchmarks: 
+  recipe: Singularity.ml_tabular_benchmark
+  version: 0.0.4
+pybnn:
+  recipe: Singularity.PyBNN
+  version: 0.0.4
+ml_mmfb:
+  recipe: Singularity.ml_mmfb
+  version: 0.0.4
+
+# RL Benchmarks
+cartpole: 
+  recipe: Singularity.Cartpole
+  version: 0.0.4
+learna_benchmark:
+  recipe: Singularity.learnaBenchmark
+  version: 0.0.4
+
+# OD Benchmark
+outlier_detection: 
+  recipe: Singularity.ODBenchmarks
+  version: 0.0.1
+
+# NAS Benchmarks
+nasbench_101:
+  recipe: Singularity.nasbench_101
+  version: 0.0.4
+nasbench_201:
+  recipe: Singularity.nasbench_201
+  version: 0.0.5
+nasbench_1shot1:
+  recipe: Singularity.nasbench_1shot1
+  version: 0.0.4
+tabular_benchmarks:
+  recipe: Singularity.TabularBenchmarks
+  version: 0.0.5
+
+# Surrogate Benchmarks
+paramnet: 
+  recipe: Singularity.ParamnetBenchmark
+  version: 0.0.4
+surrogate_svm: 
+  recipe: Singularity.SupportVectorMachine
+  version: 0.0.2
\ No newline at end of file
diff --git a/hpobench/util/container_utils.py b/hpobench/util/container_utils.py
index 7fee19e9..987c25f2 100644
--- a/hpobench/util/container_utils.py
+++ b/hpobench/util/container_utils.py
@@ -1,14 +1,27 @@
-import os
+import enum
 import importlib
 import json
 import numpy as np
-import enum
+from pathlib import Path
+import os
+import yaml
 
 from typing import Any, Union
 
 from hpobench.util.rng_helper import serialize_random_state, deserialize_random_state
 
 
+CONTAINER_MAP_PATH = Path(__file__).parent / "container_map.yaml"
+
+
+def get_container_version(bench_name: str) -> str:
+    with open(CONTAINER_MAP_PATH, "r") as f:
+        container_map = yaml.safe_load(f)
+    assert bench_name in container_map.keys(), f"{bench_name} not in version map!"
+    version = container_map.get(bench_name).get("version")
+    return version
+
+
 class BenchmarkEncoder(json.JSONEncoder):
     """ Json Encoder to save tuple and or numpy arrays | numpy floats / integer.
     from: https://stackoverflow.com/questions/15721363/preserve-python-tuples-with-json
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index a2e33121..9c78145d 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -41,6 +41,15 @@
 import hpobench
 
 
+tabular_multi_fidelity_urls = dict(
+    xgb="https://figshare.com/ndownloader/files/35414756",
+    svm="https://figshare.com/ndownloader/files/35414447",
+    lr="https://figshare.com/ndownloader/files/35412425",
+    rf="https://figshare.com/ndownloader/files/35414801",
+    nn="https://figshare.com/ndownloader/files/35414996"
+)
+
+
 class DataManager(abc.ABC, metaclass=abc.ABCMeta):
     """ Base Class for loading and managing the data.
 
@@ -929,21 +938,13 @@ def _load(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndar
 class TabularDataManager(DataManager):
     def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
         super(TabularDataManager, self).__init__()
+        self.model = model
+        self.task_id = str(task_id)
 
-        url_dict = dict(
-            xgb="https://ndownloader.figshare.com/files/30469920",
-            svm="https://ndownloader.figshare.com/files/30379359",
-            lr="https://ndownloader.figshare.com/files/30379038",
-            rf="https://ndownloader.figshare.com/files/30469089",
-            nn="https://ndownloader.figshare.com/files/30379005"
-        )
-
+        url_dict = tabular_multi_fidelity_urls
         assert model in url_dict.keys(), \
             f'Model has to be one of {list(url_dict.keys())} but was {model}'
 
-        self.model = model
-        self.task_id = str(task_id)
-
         self.url_to_use = url_dict.get(model)
 
         if data_dir is None:
diff --git a/hpobench/util/test_utils.py b/hpobench/util/test_utils.py
new file mode 100644
index 00000000..b2683135
--- /dev/null
+++ b/hpobench/util/test_utils.py
@@ -0,0 +1,24 @@
+import os
+
+CONST_RUN_ALL_TESTS_ENV_VAR = 'HPOBENCH_RUN_EXPENSIVE_TESTS'
+DEFAULT_SKIP_MSG = 'Skip this test due to time limitations'
+
+
+def check_run_all_tests():
+    """ Helper function: Check if all tests should run. """
+    return os.environ.get(CONST_RUN_ALL_TESTS_ENV_VAR, 'false').lower() == 'true'
+
+
+def enable_all_tests():
+    """
+    Some tests are quite expensive. We control if all runs should be executed by this
+    environment variable.
+    """
+    os.environ[CONST_RUN_ALL_TESTS_ENV_VAR] = 'true'
+
+
+def disable_all_tests():
+    """
+    This function disables the evaluation of all test functions.
+    """
+    os.environ[CONST_RUN_ALL_TESTS_ENV_VAR] = 'false'
diff --git a/requirements.txt b/requirements.txt
index 73ae9818..aad54f85 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-scipy>=1.4.1
 numpy>=1.18.1
 ConfigSpace>=0.4.12
 Pyro4==4.80
diff --git a/setup.py b/setup.py
index 4c53ecb0..2b04515e 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@ def read_file(file_name):
     version=read_file('hpobench/__version__.py').split()[-1].strip('\''),
     packages=setuptools.find_packages(exclude=['*.tests', '*.tests.*',
                                                'tests.*', 'tests'],),
-    python_requires='>=3.6, <=3.10',
+    python_requires='>=3.6',
     install_requires=read_file('./requirements.txt').split('\n'),
     extras_require=get_extra_requirements(),
     test_suite='pytest',
@@ -60,4 +60,4 @@ def read_file(file_name):
         'Topic :: Scientific/Engineering',
         'Topic :: Software Development',
     ]
-)
+)
\ No newline at end of file
diff --git a/tests/test_container_availbable.py b/tests/test_container_available.py
similarity index 81%
rename from tests/test_container_availbable.py
rename to tests/test_container_available.py
index c48de2e6..99a99565 100644
--- a/tests/test_container_availbable.py
+++ b/tests/test_container_available.py
@@ -1,11 +1,12 @@
+import re
 import subprocess
 import logging
-# from hpobench import config_file
+from hpobench import config_file
 
 # Currently, the gitlab registry does not easily support the search functionality.
 # The container are still available on sylabs (old registry), in case the gitlab registry is somehow not reachable.
 # TODO: Write a search functionality for the gitlab registry.
-library = 'library://phmueller/automl'  # config_file.container_source
+library = config_file.container_source
 
 
 def search_container(container_name):
@@ -13,7 +14,9 @@ def search_container(container_name):
     logging.debug(out)
 
     out = out.split('\n\n')
-    container_available = any((f'{library}/{container_name}' in line for line in out))
+    pattern = f"library:.*/{container_name}:latest.*"
+    container_available = any((re.match(pattern, line.strip()) is not None for line in out))
+
     return container_available
 
 
@@ -27,7 +30,7 @@ def test_availability():
                        'nasbench_1shot1',
                        'tabular_benchmarks',
                        'cartpole',
-                       'learna_benchmark'
+                       'learna_benchmark',
                        ]
 
     all_available = True
@@ -40,4 +43,4 @@ def test_availability():
             all_available = False
             not_available.append(container)
 
-    assert all_available, f'Some containers are not online available. {not_available}'
\ No newline at end of file
+    assert all_available, f'Some containers are not online available. {not_available}'
diff --git a/tests/test_data_manager.py b/tests/test_data_manager.py
index 7e32ce84..43b49b1f 100644
--- a/tests/test_data_manager.py
+++ b/tests/test_data_manager.py
@@ -1,14 +1,13 @@
 import shutil
-from multiprocessing import Pool
-
 import pytest
+from multiprocessing import Pool
 
 import hpobench
 from hpobench.util.data_manager import NASBench_201Data, YearPredictionMSDData, ProteinStructureData, BostonHousingData
-skip_message = 'We currently skip this test because it takes too much time.'
+from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests
 
 
-@pytest.mark.skip(reason=skip_message)
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench_201_load_thread_safe():
     shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True)
     function = lambda: NASBench_201Data(dataset='cifar100').load()
@@ -16,7 +15,7 @@ def test_nasbench_201_load_thread_safe():
         pool.map(function, [])
 
 
-@pytest.mark.skip(reason=skip_message)
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench_201_init():
 
     data_manager = NASBench_201Data(dataset='cifar100')
@@ -30,7 +29,7 @@ def test_nasbench_201_init():
     assert data_manager._save_dir.exists()
 
 
-@pytest.mark.skip(reason=skip_message)
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench_201_load():
 
     shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True)
@@ -111,4 +110,4 @@ def test_tabular_datamanager():
     assert (hpobench.config_file.data_dir / "TabularData" / 'lr' / str(3) / f'lr_3_data.parquet.gzip').exists()
     assert (hpobench.config_file.data_dir / "TabularData" / 'lr' / str(3) / f'lr_3_metadata.json').exists()
 
-    table_2, meta_data_2 = dm.load()
+    table_2, meta_data_2 = dm.load()
\ No newline at end of file
diff --git a/tests/test_nasbench_201.py b/tests/test_nasbench_201.py
index 925ac911..7f9fde76 100644
--- a/tests/test_nasbench_201.py
+++ b/tests/test_nasbench_201.py
@@ -1,14 +1,9 @@
-import logging
-logging.basicConfig(level=logging.DEBUG)
-
 import pytest
 
 from hpobench.benchmarks.nas.nasbench_201 import ImageNetNasBench201Benchmark, Cifar100NasBench201Benchmark, \
     Cifar10ValidNasBench201Benchmark
-
 from hpobench.util.container_utils import disable_container_debug, enable_container_debug
-
-skip_message = 'We currently skip this test because it takes too much time.'
+from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests
 
 
 @pytest.fixture(scope='module')
@@ -18,59 +13,78 @@ def enable_debug():
     disable_container_debug()
 
 
-@pytest.mark.skip(reason=skip_message)
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench201_cifar10valid(enable_debug):
 
     b = Cifar10ValidNasBench201Benchmark(rng=0)
 
-    cs = b.get_configuration_space(seed=0)
-    config = cs.sample_configuration()
-    fidelity = {'epoch': 199}
-
-    result = b.objective_function(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999))
-
-    assert result['function_value'] == pytest.approx(0.411, abs=0.1)
-    assert result['cost'] == pytest.approx(6650.88, abs=0.1)
-    assert result['info']['train_precision'] == result['function_value']
-    assert result['info']['train_cost'] == result['cost']
-
-    result = b.objective_function_test(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999))
+    cs_1 = b.get_configuration_space(seed=0)
+    config_1 = cs_1.sample_configuration()
+    cs_2 = b.get_configuration_space(seed=0)
+    config_2 = cs_2.sample_configuration()
+    assert config_1 == config_2
+
+    config = {
+        '1<-0': 'nor_conv_1x1',
+        '2<-0': 'nor_conv_3x3',
+        '2<-1': 'nor_conv_3x3',
+        '3<-0': 'nor_conv_1x1',
+        '3<-1': 'nor_conv_1x1',
+        '3<-2': 'nor_conv_3x3'
+    }
+    result = b.objective_function(configuration=config, fidelity={'epoch': 199}, data_seed=(777, 888, 999))
+    assert result['function_value'] == pytest.approx(0.0978, abs=0.1)
+    assert result['cost'] == pytest.approx(11973.20, abs=0.1)
+    assert result['info']['valid_misclassification_rate'] == result['function_value']
+    assert result['info']['valid_cost'] == result['cost']
+
+    result = b.objective_function_test(configuration=config, fidelity={'epoch': 200})
+    assert result['function_value'] == pytest.approx(0.0970, abs=0.1)
+    assert result['cost'] == pytest.approx(10426.33, abs=0.2)
+    assert result['info']['test_misclassification_rate'] == result['function_value']
+    assert result['info']['test_cost'] == result['cost']
 
     with pytest.raises(AssertionError):
         result = b.objective_function_test(configuration=config, fidelity={'epoch': 10})
 
-@pytest.mark.skip(reason=skip_message)
+
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench201_cifar100(enable_debug):
     b = Cifar100NasBench201Benchmark(rng=0)
 
-    cs = b.get_configuration_space(seed=0)
-    config = cs.sample_configuration()
+    config = {'1<-0': 'nor_conv_1x1',
+              '2<-0': 'nor_conv_3x3',
+              '2<-1': 'nor_conv_3x3',
+              '3<-0': 'nor_conv_1x1',
+              '3<-1': 'nor_conv_1x1',
+              '3<-2': 'nor_conv_3x3'}
     fidelity = {'epoch': 199}
 
     result = b.objective_function(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999))
-
     assert result is not None
-    assert result['function_value'] == pytest.approx(7.8259, abs=0.1)
-    assert result['cost'] == pytest.approx(13301.76, abs=0.1)
-    assert result['info']['train_precision'] == result['function_value']
-    assert result['info']['train_cost'] == result['cost']
+    assert result['function_value'] == pytest.approx(0.295233, abs=0.1)
+    assert result['cost'] == pytest.approx(19681.70, abs=0.1)
+    assert result['info']['valid_misclassification_rate'] == result['function_value']
+    assert result['info']['valid_cost'] == result['cost']
 
 
-@pytest.mark.skip(reason=skip_message)
+@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG)
 def test_nasbench201_Image(enable_debug):
     b = ImageNetNasBench201Benchmark(rng=0)
-
-    cs = b.get_configuration_space(seed=0)
-    config = cs.sample_configuration()
+    config = {'1<-0': 'nor_conv_1x1',
+              '2<-0': 'nor_conv_3x3',
+              '2<-1': 'nor_conv_3x3',
+              '3<-0': 'nor_conv_1x1',
+              '3<-1': 'nor_conv_1x1',
+              '3<-2': 'nor_conv_3x3'}
     fidelity = {'epoch': 199}
 
     result = b.objective_function(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999))
-
     assert result is not None
-    assert result['function_value'] == pytest.approx(62.858, abs=0.1)
-    assert result['cost'] == pytest.approx(40357.56, abs=0.1)
-    assert result['info']['train_precision'] == result['function_value']
-    assert result['info']['train_cost'] == result['cost']
+    assert result['function_value'] == pytest.approx(0.552167, abs=0.1)
+    assert result['cost'] == pytest.approx(57119.22, abs=0.1)
+    assert result['info']['valid_misclassification_rate'] == result['function_value']
+    assert result['info']['valid_cost'] == result['cost']
 
 
 def test_nasbench201_fidelity_space():
@@ -79,15 +93,17 @@ def test_nasbench201_fidelity_space():
 
 
 def test_nasbench201_config():
+
     cs = Cifar10ValidNasBench201Benchmark.get_configuration_space(seed=0)
     c = cs.sample_configuration()
+
     func = Cifar10ValidNasBench201Benchmark.config_to_structure_func(4)
     struct = func(c)
-
-    assert struct.__repr__() == '_Structure(4 nodes with |avg_pool_3x3~0|+|none~0|nor_conv_3x3~1|+' \
-                                '|nor_conv_3x3~0|nor_conv_3x3~1|skip_connect~2|)'
+    assert struct.__repr__() == '_Structure(4 nodes with |nor_conv_1x1~0|+|nor_conv_3x3~0|nor_conv_3x3~1|+' \
+                                '|nor_conv_1x1~0|nor_conv_1x1~1|nor_conv_3x3~2|)'
     assert len(struct) == 4
-    assert struct[0] == (('avg_pool_3x3', 0),)
+    assert struct[0] == (('nor_conv_1x1', 0),)
 
     struct_str = struct.tostr()
-    assert struct_str == '|avg_pool_3x3~0|+|none~0|nor_conv_3x3~1|+|nor_conv_3x3~0|nor_conv_3x3~1|skip_connect~2|'
+    assert struct_str == '|nor_conv_1x1~0|+|nor_conv_3x3~0|nor_conv_3x3~1|+' \
+                         '|nor_conv_1x1~0|nor_conv_1x1~1|nor_conv_3x3~2|'
\ No newline at end of file
diff --git a/tests/test_od.py b/tests/test_od.py
index f6ca038f..26074e21 100644
--- a/tests/test_od.py
+++ b/tests/test_od.py
@@ -26,15 +26,17 @@ def test_kde():
     assert config['bandwidth'] == pytest.approx(15.2274, abs=0.001)
     assert result['function_value'] == pytest.approx(0.14409, abs=0.0001)
 
-
 def test_ae():
     from hpobench.container.benchmarks.od.od_benchmarks import ODAutoencoder
-    seed = 6
+    seed = 100
     benchmark = ODAutoencoder("cardio", rng=seed)
 
     config = benchmark.get_configuration_space(seed=seed).sample_configuration()
+    print(config)
     result = benchmark.objective_function(configuration=config, rng=seed)
+    assert config["dropout"], "Dropout not True"
     print(config['dropout_rate'], result['function_value'])
 
-    assert config['dropout_rate'] == pytest.approx(0.69512, abs=0.00001)
-    assert result['function_value'] == pytest.approx(0.2833, abs=0.0001)
+    assert config['dropout_rate'] == pytest.approx(0.23821, abs=0.00001)
+    assert result['function_value'] == pytest.approx(0.22132, abs=0.0001)
+
diff --git a/tests/test_pybnn.py b/tests/test_pybnn.py
index 0e749457..957f248b 100644
--- a/tests/test_pybnn.py
+++ b/tests/test_pybnn.py
@@ -1,14 +1,19 @@
+import sys
 import pytest
 
 from hpobench.container.benchmarks.ml.pybnn import BNNOnToyFunction, BNNOnBostonHousing, BNNOnProteinStructure, \
     BNNOnYearPrediction
 
-import logging
-logging.basicConfig(level=logging.DEBUG)
 from hpobench.util.container_utils import enable_container_debug
+from hpobench.util.test_utils import check_run_all_tests, DEFAULT_SKIP_MSG
+
 enable_container_debug()
+MSG = 'Skip this test for new (>3.9) python versions. ' \
+      'The paramnet benchmarks require an specific old scikit learn version. This version however does not work under ' \
+      'python 3.10. Therefore we skip this test. The containerized version does still work under 3.10.'
 
 
+@pytest.mark.skipif(sys.version_info > (3, 9), reason=MSG)
 def test_bnn_init():
     benchmark = BNNOnToyFunction(rng=1)
 
diff --git a/tests/test_server.py b/tests/test_server.py
index d78cb0cc..e7f806de 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -2,6 +2,8 @@
 import logging
 import os
 
+from hpobench import config
+
 
 def set_log_level(debug):
     os.environ['HPOBENCH_DEBUG'] = 'true' if debug else 'false'
@@ -24,14 +26,17 @@ def test_debug_container():
 
     set_log_level(True)
 
-    from hpobench.container.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
+    from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
     from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
     task_id = get_openmlcc18_taskids()[0]
 
-    b = Benchmark(task_id=task_id,
-                  container_name='xgboost_benchmark',
-                  container_source='library://phmueller/automl')
+    b = Benchmark(
+        task_id=task_id,
+        container_name='ml_mmfb',
+        container_source=config.config_file.container_source,
+    )
+
     cs = b.get_configuration_space()
     assert cs is not None
 
diff --git a/tests/test_svm.py b/tests/test_svm.py
index c3acf007..3f442ba7 100644
--- a/tests/test_svm.py
+++ b/tests/test_svm.py
@@ -1,36 +1,41 @@
 import pytest
+from hpobench.container.benchmarks.ml.svm_benchmark import SVMBenchmark
+import logging
 
-from hpobench.container.benchmarks.ml.svm_benchmark_old import SupportVectorMachine
-from hpobench.util.openml_data_manager import get_openmlcc18_taskids
-
-task_ids = get_openmlcc18_taskids()
 
-import logging
 logging.basicConfig(level=logging.DEBUG)
 
+task_ids = [
+    10101,53,146818,146821,9952,146822,31,3917,168912,3,167119,12,146212,168911,
+    9981,168329,167120,14965,146606,168330
+]
+
 
 def test_svm_init():
-    benchmark = SupportVectorMachine(task_id=task_ids[0])
+    task_id = 146818
+    assert task_id in task_ids
+    benchmark = SVMBenchmark(task_id=task_id)
 
     fs = benchmark.get_fidelity_space(seed=0)
     fidelity = fs.sample_configuration().get_dictionary()
-    assert fidelity['dataset_fraction'] == pytest.approx(0.54881, abs=0.001)
+    assert fidelity['subsample'] == pytest.approx(0.59393, abs=0.001)
 
     meta = benchmark.get_meta_information()
     assert meta is not None
 
     cs = benchmark.get_configuration_space(seed=0)
     config = cs.sample_configuration().get_dictionary()
-    assert config['C'] == pytest.approx(0.9762, abs=0.001)
-    assert config['gamma'] == pytest.approx(4.3037, abs=0.001)
+    assert config['C'] == pytest.approx(1.9673, abs=0.001)
+    assert config['gamma'] == pytest.approx(19.7501, abs=0.001)
 
     result = benchmark.objective_function(configuration=config, fidelity=fidelity)
-    assert result['function_value'] == pytest.approx(0.4837, abs=0.1)
+    assert result['function_value'] == pytest.approx(0.4439, abs=0.1)
     assert result['cost'] is not None
 
-    with pytest.raises(AssertionError):
-        result = benchmark.objective_function_test(configuration=config, fidelity=fidelity)
+    result = benchmark.objective_function_test(configuration=config, fidelity=fidelity)
+    assert result['function_value'] == pytest.approx(0.4493, abs=0.1)
+    assert result['cost'] is not None
 
     result = benchmark.objective_function_test(configuration=config)
-    assert result['function_value'] == pytest.approx(0.4648, abs=0.1)
+    assert result['function_value'] == pytest.approx(0.4493, abs=0.1)
     assert result['cost'] is not None
diff --git a/tests/test_tabular_benchmarks.py b/tests/test_tabular_benchmarks.py
index 59d8dd45..573a2822 100644
--- a/tests/test_tabular_benchmarks.py
+++ b/tests/test_tabular_benchmarks.py
@@ -134,7 +134,7 @@ def test_parkinson_benchmark(self):
             benchmark.objective_function_test(default_config, fidelity=dict(budget=1, ))
 
         result = benchmark.objective_function_test(configuration=default_config, fidelity=dict(budget=100))
-        assert pytest.approx(0.15010187, result['function_value'], abs=0.001)
+        assert result['function_value'] == pytest.approx(0.15010187, abs=0.001)
 
         runtime = 62.7268
         assert result['cost'] == pytest.approx(runtime, abs=0.0001)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9bc5ff3b..51ca4307 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -64,7 +64,7 @@ def test_rng_serialization():
 def test_rng_serialization_xgb():
     import json
     from hpobench.util.container_utils import BenchmarkEncoder, BenchmarkDecoder
-    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
+    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
 
     b = XGBoostBenchmark(task_id=167149, rng=0)
     meta = b.get_meta_information()
@@ -105,3 +105,15 @@ def test_debug_level():
 
     disable_container_debug()
     assert os.environ['HPOBENCH_DEBUG'] == 'false'
+
+
+def test_test_utils():
+    from hpobench.util.test_utils import DEFAULT_SKIP_MSG, enable_all_tests, disable_all_tests, check_run_all_tests
+
+    assert isinstance(DEFAULT_SKIP_MSG, str)
+
+    enable_all_tests()
+    assert check_run_all_tests()
+
+    disable_all_tests()
+    assert not check_run_all_tests()
\ No newline at end of file
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
index 35a9a940..cf37f3d8 100644
--- a/tests/test_whitebox.py
+++ b/tests/test_whitebox.py
@@ -14,55 +14,58 @@
 
 
 def test_whitebox_without_container_xgb():
-    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
-    b = Benchmark(task_id=167199, rng=0)
+    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    b = Benchmark(task_id=146818, rng=0)
     cs = b.get_configuration_space(seed=0)
 
     configuration = cs.get_default_configuration()
-    assert configuration['colsample_bylevel'] == 1.0
-    assert len(configuration.keys()) == 8
+    assert configuration['colsample_bytree'] == 1.0
+    assert len(configuration.keys()) == 4
 
-    n_estimator = 32
+    n_estimator = 100
     subsample = 1
-    result_dict = b.objective_function(configuration, fidelity=dict(n_estimators=n_estimator, dataset_fraction=subsample),
-                                       rng=0)
+    result_dict = b.objective_function(
+        configuration, fidelity=dict(n_estimators=n_estimator, subsample=subsample), rng=0
+    )
     valid_loss = result_dict['function_value']
     train_loss = result_dict['info']['train_loss']
 
-    result_dict = b.objective_function_test(configuration, fidelity=dict(n_estimators=n_estimator), rng=0)
+    result_dict = b.objective_function_test(
+        configuration, fidelity=dict(n_estimators=n_estimator), rng=0
+    )
     test_loss = result_dict['function_value']
 
-    assert np.isclose(train_loss, 0.02678, atol=0.001)
-    assert np.isclose(valid_loss, 0.49549, atol=0.001)
-    assert np.isclose(test_loss, 0.43636, atol=0.001)
+    assert train_loss == pytest.approx(1.0, abs=0.001)
+    assert valid_loss == pytest.approx(0.166, abs=0.001)
+    assert test_loss == pytest.approx(0.087, abs=0.001)
 
 
 @pytest.mark.skipif(skip_container_test, reason="Requires singularity and flask")
 def test_whitebox_with_container():
-    from hpobench.container.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
-    b = Benchmark(container_name='xgboost_benchmark',
-                  task_id=167199,
-                  rng=0)
+    from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    b = Benchmark(task_id=146818, rng=0)  #, container_name='ml_mmfb',)
 
     cs = b.get_configuration_space()
     configuration = cs.get_default_configuration()
-    assert configuration['colsample_bylevel'] == 1.0
-    assert len(configuration.keys()) == 8
+    assert configuration['colsample_bytree'] == 1.0
+    assert len(configuration.keys()) == 4
 
-    n_estimator = 32
+    n_estimator = 100
     subsample = 1
-    result_dict = b.objective_function(configuration, fidelity=dict(n_estimators=n_estimator,
-                                                                    dataset_fraction=subsample))
+    result_dict = b.objective_function(
+        configuration, fidelity=dict(n_estimators=n_estimator, subsample=subsample)
+    )
     valid_loss = result_dict['function_value']
     train_loss = result_dict['info']['train_loss']
     result_dict = b.objective_function_test(configuration, fidelity=dict(n_estimators=n_estimator))
     test_loss = result_dict['function_value']
-
-    assert np.isclose(train_loss, 0.02232, atol=0.001)
-    assert np.isclose(valid_loss, 0.4234, atol=0.001)
-    assert np.isclose(test_loss, 0.43636, atol=0.001)
+    
+    assert train_loss == pytest.approx(1.0, abs=0.001)
+    assert valid_loss == pytest.approx(0.1512, abs=0.001)
+    assert test_loss == pytest.approx(0.1014, abs=0.001)
 
 
+@pytest.mark.skipif(skip_container_test, reason="Requires singularity and flask")
 def test_cartpole():
     from hpobench.container.benchmarks.rl.cartpole import CartpoleReduced as Benchmark
     b = Benchmark(container_name='cartpole',