diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 3b13c9908..7bc1bbaeb 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -22,48 +22,39 @@ jobs:
   build-and-deploy:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: 3.8
-    - name: Install dependencies
-      run: |
+      - uses: actions/checkout@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.8
+      - name: Install dependencies
+        run: |
           pip install -e .[docs,examples]
-    - name: Make docs
-      run: |
-        cd doc
-        make html
-    - name: Check links
-      run: |
-        cd doc
-        make linkcheck
-    - name: Pull latest gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        cd ..
-        git clone https://github.com/openml/openml-python.git --branch gh-pages --single-branch gh-pages
-    - name: Copy new doc into gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        branch_name=${GITHUB_REF##*/}
-        cd ../gh-pages
-        rm -rf $branch_name
-        cp -r ../openml-python/doc/build/html $branch_name
-    - name: Push to gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        last_commit=$(git log --pretty=format:"%an: %s")
-        cd ../gh-pages
-        branch_name=${GITHUB_REF##*/}
-        git add $branch_name/
-        git config --global user.name 'Github Actions'
-        git config --global user.email 'not@mail.com'
-        git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
-        # Only commit and push if there are changes
-        if ! git diff --cached --quiet; then
-          git commit -m "$last_commit"
-          git push
-        else
-          echo "Branch is up to date with origin/gh-pages, no need to update docs. Skipping."
-        fi
+      - name: Make docs
+        run: |
+          mkdocs build
+      - name: Deploy to GitHub Pages
+        env:
+          CI: false
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PAGES_BRANCH: gh-pages
+        if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
+        run: |
+          # mkdocs gh-deploy --force
+          git config user.name doc-bot
+          git config user.email doc-bot@openml.com
+          current_version=$(git tag | sort --version-sort | tail -n 1)
+          # This block will rename previous retitled versions
+          retitled_versions=$(mike list -j | jq ".[] | select(.title != .version) | .version" | tr -d '"')
+          for version in $retitled_versions; do
+            mike retitle "${version}" "${version}"
+          done
+
+          echo "Deploying docs for ${current_version}"
+          mike deploy \
+            --push \
+            --title "${current_version} (latest)" \
+            --update-aliases \
+            "${current_version}" \
+            "latest"\
+            -b $PAGES_BRANCH origin/$PAGES_BRANCH
diff --git a/.gitignore b/.gitignore
index 5687e41f1..241cf9630 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 doc/generated
 examples/.ipynb_checkpoints
 venv
+.uv-lock
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 000000000..c18de3ccc
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,24 @@
+# Contributing
+
+Contribution to the OpenML package is highly appreciated in all forms.
+In particular, a few ways to contribute to openml-python are:
+
+-   A direct contribution to the package, by means of improving the
+    code, documentation or examples. To get started, see [this
+    file](https://github.com/openml/openml-python/blob/main/CONTRIBUTING.md)
+    with details on how to set up your environment to develop for
+    openml-python.
+-   A contribution to an openml-python extension. An extension package
+    allows OpenML to interface with a machine learning package (such
+    as scikit-learn or keras). These extensions are hosted in separate
+    repositories and may have their own guidelines. For more
+    information, see also [extensions](extensions.md).
+-   Bug reports. If something doesn't work for you or is cumbersome,
+    please open a new issue to let us know about the problem. See
+    [this
+    section](https://github.com/openml/openml-python/blob/main/CONTRIBUTING.md).
+-   [Cite OpenML](https://www.openml.org/cite) if you use it in a
+    scientific publication.
+-   Visit one of our [hackathons](https://www.openml.org/meet).
+-   Contribute to another OpenML project, such as [the main OpenML
+    project](https://github.com/openml/OpenML/blob/master/CONTRIBUTING.md).
diff --git a/docs/extensions.md b/docs/extensions.md
new file mode 100644
index 000000000..f2aa230f5
--- /dev/null
+++ b/docs/extensions.md
@@ -0,0 +1,179 @@
+# Extensions
+
+OpenML-Python provides an extension interface to connect other machine
+learning libraries than scikit-learn to OpenML. Please check the
+`api_extensions`{.interpreted-text role="ref"} and use the scikit-learn
+extension in
+`openml.extensions.sklearn.SklearnExtension`{.interpreted-text
+role="class"} as a starting point.
+
+## List of extensions
+
+Here is a list of currently maintained OpenML extensions:
+
+-   `openml.extensions.sklearn.SklearnExtension`{.interpreted-text
+    role="class"}
+-   [openml-keras](https://github.com/openml/openml-keras)
+-   [openml-pytorch](https://github.com/openml/openml-pytorch)
+-   [openml-tensorflow (for tensorflow
+    2+)](https://github.com/openml/openml-tensorflow)
+
+## Connecting new machine learning libraries
+
+### Content of the Library
+
+To leverage support from the community and to tap in the potential of
+OpenML, interfacing with popular machine learning libraries is
+essential. The OpenML-Python package is capable of downloading meta-data
+and results (data, flows, runs), regardless of the library that was used
+to upload it. However, in order to simplify the process of uploading
+flows and runs from a specific library, an additional interface can be
+built. The OpenML-Python team does not have the capacity to develop and
+maintain such interfaces on its own. For this reason, we have built an
+extension interface to allows others to contribute back. Building a
+suitable extension for therefore requires an understanding of the
+current OpenML-Python support.
+
+The
+`sphx_glr_examples_20_basic_simple_flows_and_runs_tutorial.py`{.interpreted-text
+role="ref"} tutorial shows how scikit-learn currently works with
+OpenML-Python as an extension. The *sklearn* extension packaged with the
+[openml-python](https://github.com/openml/openml-python) repository can
+be used as a template/benchmark to build the new extension.
+
+#### API
+
+-   The extension scripts must import the [openml]{.title-ref} package
+    and be able to interface with any function from the OpenML-Python
+    `api`{.interpreted-text role="ref"}.
+-   The extension has to be defined as a Python class and must inherit
+    from `openml.extensions.Extension`{.interpreted-text role="class"}.
+-   This class needs to have all the functions from [class
+    Extension]{.title-ref} overloaded as required.
+-   The redefined functions should have adequate and appropriate
+    docstrings. The [Sklearn Extension API
+    :class:\`openml.extensions.sklearn.SklearnExtension.html]{.title-ref}
+    is a good example to follow.
+
+#### Interfacing with OpenML-Python
+
+Once the new extension class has been defined, the openml-python module
+to `openml.extensions.register_extension`{.interpreted-text role="meth"}
+must be called to allow OpenML-Python to interface the new extension.
+
+The following methods should get implemented. Although the documentation
+in the [Extension]{.title-ref} interface should always be leading, here
+we list some additional information and best practices. The [Sklearn
+Extension API
+:class:\`openml.extensions.sklearn.SklearnExtension.html]{.title-ref} is
+a good example to follow. Note that most methods are relatively simple
+and can be implemented in several lines of code.
+
+-   General setup (required)
+    -   `can_handle_flow`{.interpreted-text role="meth"}: Takes as
+        argument an OpenML flow, and checks whether this can be handled
+        by the current extension. The OpenML database consists of many
+        flows, from various workbenches (e.g., scikit-learn, Weka, mlr).
+        This method is called before a model is being deserialized.
+        Typically, the flow-dependency field is used to check whether
+        the specific library is present, and no unknown libraries are
+        present there.
+    -   `can_handle_model`{.interpreted-text role="meth"}: Similar as
+        `can_handle_flow`{.interpreted-text role="meth"}, except that in
+        this case a Python object is given. As such, in many cases, this
+        method can be implemented by checking whether this adheres to a
+        certain base class.
+-   Serialization and De-serialization (required)
+    -   `flow_to_model`{.interpreted-text role="meth"}: deserializes the
+        OpenML Flow into a model (if the library can indeed handle the
+        flow). This method has an important interplay with
+        `model_to_flow`{.interpreted-text role="meth"}. Running these
+        two methods in succession should result in exactly the same
+        model (or flow). This property can be used for unit testing
+        (e.g., build a model with hyperparameters, make predictions on a
+        task, serialize it to a flow, deserialize it back, make it
+        predict on the same task, and check whether the predictions are
+        exactly the same.) The example in the scikit-learn interface
+        might seem daunting, but note that here some complicated design
+        choices were made, that allow for all sorts of interesting
+        research questions. It is probably good practice to start easy.
+    -   `model_to_flow`{.interpreted-text role="meth"}: The inverse of
+        `flow_to_model`{.interpreted-text role="meth"}. Serializes a
+        model into an OpenML Flow. The flow should preserve the class,
+        the library version, and the tunable hyperparameters.
+    -   `get_version_information`{.interpreted-text role="meth"}: Return
+        a tuple with the version information of the important libraries.
+    -   `create_setup_string`{.interpreted-text role="meth"}: No longer
+        used, and will be deprecated soon.
+-   Performing runs (required)
+    -   `is_estimator`{.interpreted-text role="meth"}: Gets as input a
+        class, and checks whether it has the status of estimator in the
+        library (typically, whether it has a train method and a predict
+        method).
+    -   `seed_model`{.interpreted-text role="meth"}: Sets a random seed
+        to the model.
+    -   `_run_model_on_fold`{.interpreted-text role="meth"}: One of the
+        main requirements for a library to generate run objects for the
+        OpenML server. Obtains a train split (with labels) and a test
+        split (without labels) and the goal is to train a model on the
+        train split and return the predictions on the test split. On top
+        of the actual predictions, also the class probabilities should
+        be determined. For classifiers that do not return class
+        probabilities, this can just be the hot-encoded predicted label.
+        The predictions will be evaluated on the OpenML server. Also,
+        additional information can be returned, for example,
+        user-defined measures (such as runtime information, as this can
+        not be inferred on the server). Additionally, information about
+        a hyperparameter optimization trace can be provided.
+    -   `obtain_parameter_values`{.interpreted-text role="meth"}:
+        Obtains the hyperparameters of a given model and the current
+        values. Please note that in the case of a hyperparameter
+        optimization procedure (e.g., random search), you only should
+        return the hyperparameters of this procedure (e.g., the
+        hyperparameter grid, budget, etc) and that the chosen model will
+        be inferred from the optimization trace.
+    -   `check_if_model_fitted`{.interpreted-text role="meth"}: Check
+        whether the train method of the model has been called (and as
+        such, whether the predict method can be used).
+-   Hyperparameter optimization (optional)
+    -   `instantiate_model_from_hpo_class`{.interpreted-text
+        role="meth"}: If a given run has recorded the hyperparameter
+        optimization trace, then this method can be used to
+        reinstantiate the model with hyperparameters of a given
+        hyperparameter optimization iteration. Has some similarities
+        with `flow_to_model`{.interpreted-text role="meth"} (as this
+        method also sets the hyperparameters of a model). Note that
+        although this method is required, it is not necessary to
+        implement any logic if hyperparameter optimization is not
+        implemented. Simply raise a [NotImplementedError]{.title-ref}
+        then.
+
+### Hosting the library
+
+Each extension created should be a stand-alone repository, compatible
+with the [OpenML-Python
+repository](https://github.com/openml/openml-python). The extension
+repository should work off-the-shelf with *OpenML-Python* installed.
+
+Create a [public Github
+repo](https://docs.github.com/en/github/getting-started-with-github/create-a-repo)
+with the following directory structure:
+
+    | [repo name]
+    |    |-- [extension name]
+    |    |    |-- __init__.py
+    |    |    |-- extension.py
+    |    |    |-- config.py (optionally)
+
+### Recommended
+
+-   Test cases to keep the extension up to date with the
+    [openml-python]{.title-ref} upstream changes.
+-   Documentation of the extension API, especially if any new
+    functionality added to OpenML-Python\'s extension design.
+-   Examples to show how the new extension interfaces and works with
+    OpenML-Python.
+-   Create a PR to add the new extension to the OpenML-Python API
+    documentation.
+
+Happy contributing!
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..cda5bcb4b
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,89 @@
+# OpenML
+
+**Collaborative Machine Learning in Python**
+
+Welcome to the documentation of the OpenML Python API, a connector to
+the collaborative machine learning platform
+[OpenML.org](https://www.openml.org). The OpenML Python package allows
+to use datasets and tasks from OpenML together with scikit-learn and
+share the results online.
+
+## Example
+
+```python
+import openml
+from sklearn import impute, tree, pipeline
+
+# Define a scikit-learn classifier or pipeline
+clf = pipeline.Pipeline(
+    steps=[
+        ('imputer', impute.SimpleImputer()),
+        ('estimator', tree.DecisionTreeClassifier())
+    ]
+)
+# Download the OpenML task for the pendigits dataset with 10-fold
+# cross-validation.
+task = openml.tasks.get_task(32)
+# Run the scikit-learn model on the task.
+run = openml.runs.run_model_on_task(clf, task)
+# Publish the experiment on OpenML (optional, requires an API key.
+# You can get your own API key by signing up to OpenML.org)
+run.publish()
+print(f'View the run online: {run.openml_url}')
+```
+
+Find more examples in the sidebar on the left.
+
+## How to get OpenML for python
+
+You can install the OpenML package via `pip` (we recommend using a virtual environment):
+
+```bash
+python -m pip install openml
+```
+
+For more advanced installation information, please see the
+["Introduction"](../examples/20_basic/introduction_tutorial.py) example.
+
+
+## Further information
+
+-   [OpenML documentation](https://docs.openml.org/)
+-   [OpenML client APIs](https://docs.openml.org/APIs/)
+-   [OpenML developer guide](https://docs.openml.org/Contributing/)
+-   [Contact information](https://www.openml.org/contact)
+-   [Citation request](https://www.openml.org/cite)
+-   [OpenML blog](https://medium.com/open-machine-learning)
+-   [OpenML twitter account](https://twitter.com/open_ml)
+
+## Contributing
+
+Contribution to the OpenML package is highly appreciated. Please see the
+["Contributing"][contributing] page for more information.
+
+## Citing OpenML-Python
+
+If you use OpenML-Python in a scientific publication, we would
+appreciate a reference to our JMLR-MLOSS paper 
+["OpenML-Python: an extensible Python API for OpenML"](https://www.jmlr.org/papers/v22/19-920.html):
+
+=== "Bibtex"
+
+    ```bibtex
+    @article{JMLR:v22:19-920,
+        author  = {Matthias Feurer and Jan N. van Rijn and Arlind Kadra and Pieter Gijsbers and Neeratyoy Mallik and Sahithya Ravi and Andreas MÃ¼ller and Joaquin Vanschoren and Frank Hutter},
+        title   = {OpenML-Python: an extensible Python API for OpenML},
+        journal = {Journal of Machine Learning Research},
+        year    = {2021},
+        volume  = {22},
+        number  = {100},
+        pages   = {1--5},
+        url     = {http://jmlr.org/papers/v22/19-920.html}
+    }
+    ```
+
+=== "MLA"
+
+    Feurer, Matthias, et al. 
+    "OpenML-Python: an extensible Python API for OpenML."
+    _Journal of Machine Learning Research_ 22.100 (2021):1−5.
diff --git a/docs/progress.md b/docs/progress.md
new file mode 100644
index 000000000..c2923576b
--- /dev/null
+++ b/docs/progress.md
@@ -0,0 +1,489 @@
+# Changelog {#progress}
+
+## next
+
+> -   MAINT #1340: Add Numpy 2.0 support. Update tests to work with
+>     scikit-learn \<= 1.5.
+> -   ADD #1342: Add HTTP header to requests to indicate they are from
+>     openml-python.
+
+## 0.14.2
+
+> -   MAINT #1280: Use the server-provided `parquet_url` instead of
+>     `minio_url` to determine the location of the parquet file.
+> -   ADD #716: add documentation for remaining attributes of classes
+>     and functions.
+> -   ADD #1261: more annotations for type hints.
+> -   MAINT #1294: update tests to new tag specification.
+> -   FIX #1314: Update fetching a bucket from MinIO.
+> -   FIX #1315: Make class label retrieval more lenient.
+> -   ADD #1316: add feature descriptions ontologies support.
+> -   MAINT #1310/#1307: switch to ruff and resolve all mypy errors.
+
+## 0.14.1
+
+> -   FIX: Fallback on downloading ARFF when failing to download parquet
+>     from MinIO due to a ServerError.
+
+## 0.14.0
+
+**IMPORTANT:** This release paves the way towards a breaking update of
+OpenML-Python. From version 0.15, functions that had the option to
+return a pandas DataFrame will return a pandas DataFrame by default.
+This version (0.14) emits a warning if you still use the old access
+functionality. More concretely:
+
+-   In 0.15 we will drop the ability to return dictionaries in listing
+    calls and only provide pandas DataFrames. To disable warnings in
+    0.14 you have to request a pandas DataFrame (using
+    `output_format="dataframe"`).
+-   In 0.15 we will drop the ability to return datasets as numpy arrays
+    and only provide pandas DataFrames. To disable warnings in 0.14 you
+    have to request a pandas DataFrame (using
+    `dataset_format="dataframe"`).
+
+Furthermore, from version 0.15, OpenML-Python will no longer download
+datasets and dataset metadata by default. This version (0.14) emits a
+warning if you don\'t explicitly specifiy the desired behavior.
+
+Please see the pull requests #1258 and #1260 for further information.
+
+-   ADD #1081: New flag that allows disabling downloading dataset
+    features.
+-   ADD #1132: New flag that forces a redownload of cached data.
+-   FIX #1244: Fixes a rare bug where task listing could fail when the
+    server returned invalid data.
+-   DOC #1229: Fixes a comment string for the main example.
+-   DOC #1241: Fixes a comment in an example.
+-   MAINT #1124: Improve naming of helper functions that govern the
+    cache directories.
+-   MAINT #1223, #1250: Update tools used in pre-commit to the latest
+    versions (`black==23.30`, `mypy==1.3.0`, `flake8==6.0.0`).
+-   MAINT #1253: Update the citation request to the JMLR paper.
+-   MAINT #1246: Add a warning that warns the user that checking for
+    duplicate runs on the server cannot be done without an API key.
+
+## 0.13.1
+
+-   ADD #1081 #1132: Add additional options for (not) downloading
+    datasets `openml.datasets.get_dataset` and cache management.
+-   ADD #1028: Add functions to delete runs, flows, datasets, and tasks
+    (e.g., `openml.datasets.delete_dataset`).
+-   ADD #1144: Add locally computed results to the `OpenMLRun` object\'s
+    representation if the run was created locally and not downloaded
+    from the server.
+-   ADD #1180: Improve the error message when the checksum of a
+    downloaded dataset does not match the checksum provided by the API.
+-   ADD #1201: Make `OpenMLTraceIteration` a dataclass.
+-   DOC #1069: Add argument documentation for the `OpenMLRun` class.
+-   DOC #1241 #1229 #1231: Minor documentation fixes and resolve
+    documentation examples not working.
+-   FIX #1197 #559 #1131: Fix the order of ground truth and predictions
+    in the `OpenMLRun` object and in `format_prediction`.
+-   FIX #1198: Support numpy 1.24 and higher.
+-   FIX #1216: Allow unknown task types on the server. This is only
+    relevant when new task types are added to the test server.
+-   FIX #1223: Fix mypy errors for implicit optional typing.
+-   MAINT #1155: Add dependabot github action to automatically update
+    other github actions.
+-   MAINT #1199: Obtain pre-commit\'s flake8 from github.com instead of
+    gitlab.com.
+-   MAINT #1215: Support latest numpy version.
+-   MAINT #1218: Test Python3.6 on Ubuntu 20.04 instead of the latest
+    Ubuntu (which is 22.04).
+-   MAINT #1221 #1212 #1206 #1211: Update github actions to the latest
+    versions.
+
+## 0.13.0
+
+> -   FIX #1030: `pre-commit` hooks now no longer should issue a
+>     warning.
+> -   FIX #1058, #1100: Avoid `NoneType` error when printing task
+>     without `class_labels` attribute.
+> -   FIX #1110: Make arguments to `create_study` and `create_suite`
+>     that are defined as optional by the OpenML XSD actually optional.
+> -   FIX #1147: `openml.flow.flow_exists` no longer requires an API
+>     key.
+> -   FIX #1184: Automatically resolve proxies when downloading from
+>     minio. Turn this off by setting environment variable
+>     `no_proxy="*"`.
+> -   MAINT #1088: Do CI for Windows on Github Actions instead of
+>     Appveyor.
+> -   MAINT #1104: Fix outdated docstring for `list_task`.
+> -   MAINT #1146: Update the pre-commit dependencies.
+> -   ADD #1103: Add a `predictions` property to OpenMLRun for easy
+>     accessibility of prediction data.
+> -   ADD #1188: EXPERIMENTAL. Allow downloading all files from a minio
+>     bucket with `download_all_files=True` for `get_dataset`.
+
+## 0.12.2
+
+-   ADD #1065: Add a `retry_policy` configuration option that determines
+    the frequency and number of times to attempt to retry server
+    requests.
+-   ADD #1075: A docker image is now automatically built on a push to
+    develop. It can be used to build docs or run tests in an isolated
+    environment.
+-   ADD: You can now avoid downloading \'qualities\' meta-data when
+    downloading a task with the `download_qualities` parameter of
+    `openml.tasks.get_task[s]` functions.
+-   DOC: Fixes a few broken links in the documentation.
+-   DOC #1061: Improve examples to always show a warning when they
+    switch to the test server.
+-   DOC #1067: Improve documentation on the scikit-learn extension
+    interface.
+-   DOC #1068: Create dedicated extensions page.
+-   FIX #1075: Correctly convert [y]{.title-ref} to a pandas series when
+    downloading sparse data.
+-   MAINT: Rename [master]{.title-ref} brach to [ main]{.title-ref}
+    branch.
+-   MAINT/DOC: Automatically check for broken external links when
+    building the documentation.
+-   MAINT/DOC: Fail documentation building on warnings. This will make
+    the documentation building fail if a reference cannot be found (i.e.
+    an internal link is broken).
+
+## 0.12.1
+
+-   ADD #895/#1038: Measure runtimes of scikit-learn runs also for
+    models which are parallelized via the joblib.
+-   DOC #1050: Refer to the webpage instead of the XML file in the main
+    example.
+-   DOC #1051: Document existing extensions to OpenML-Python besides the
+    shipped scikit-learn extension.
+-   FIX #1035: Render class attributes and methods again.
+-   ADD #1049: Add a command line tool for configuration openml-python.
+-   FIX #1042: Fixes a rare concurrency issue with OpenML-Python and
+    joblib which caused the joblib worker pool to fail.
+-   FIX #1053: Fixes a bug which could prevent importing the package in
+    a docker container.
+
+## 0.12.0
+
+-   ADD #964: Validate `ignore_attribute`, `default_target_attribute`,
+    `row_id_attribute` are set to attributes that exist on the dataset
+    when calling `create_dataset`.
+-   ADD #979: Dataset features and qualities are now also cached in
+    pickle format.
+-   ADD #982: Add helper functions for column transformers.
+-   ADD #989: `run_model_on_task` will now warn the user the the model
+    passed has already been fitted.
+-   ADD #1009 : Give possibility to not download the dataset qualities.
+    The cached version is used even so download attribute is false.
+-   ADD #1016: Add scikit-learn 0.24 support.
+-   ADD #1020: Add option to parallelize evaluation of tasks with
+    joblib.
+-   ADD #1022: Allow minimum version of dependencies to be listed for a
+    flow, use more accurate minimum versions for scikit-learn
+    dependencies.
+-   ADD #1023: Add admin-only calls for adding topics to datasets.
+-   ADD #1029: Add support for fetching dataset from a minio server in
+    parquet format.
+-   ADD #1031: Generally improve runtime measurements, add them for some
+    previously unsupported flows (e.g. BaseSearchCV derived flows).
+-   DOC #973 : Change the task used in the welcome page example so it no
+    longer fails using numerical dataset.
+-   MAINT #671: Improved the performance of `check_datasets_active` by
+    only querying the given list of datasets in contrast to querying all
+    datasets. Modified the corresponding unit test.
+-   MAINT #891: Changed the way that numerical features are stored.
+    Numerical features that range from 0 to 255 are now stored as uint8,
+    which reduces the storage space required as well as storing and
+    loading times.
+-   MAINT #975, #988: Add CI through Github Actions.
+-   MAINT #977: Allow `short` and `long` scenarios for unit tests.
+    Reduce the workload for some unit tests.
+-   MAINT #985, #1000: Improve unit test stability and output
+    readability, and adds load balancing.
+-   MAINT #1018: Refactor data loading and storage. Data is now
+    compressed on the first call to [get_data]{.title-ref}.
+-   MAINT #1024: Remove flaky decorator for study unit test.
+-   FIX #883 #884 #906 #972: Various improvements to the caching system.
+-   FIX #980: Speed up `check_datasets_active`.
+-   FIX #984: Add a retry mechanism when the server encounters a
+    database issue.
+-   FIX #1004: Fixed an issue that prevented installation on some
+    systems (e.g. Ubuntu).
+-   FIX #1013: Fixes a bug where `OpenMLRun.setup_string` was not
+    uploaded to the server, prepares for `run_details` being sent from
+    the server.
+-   FIX #1021: Fixes an issue that could occur when running unit tests
+    and openml-python was not in PATH.
+-   FIX #1037: Fixes a bug where a dataset could not be loaded if a
+    categorical value had listed nan-like as a possible category.
+
+## 0.11.0
+
+-   ADD #753: Allows uploading custom flows to OpenML via OpenML-Python.
+-   ADD #777: Allows running a flow on pandas dataframes (in addition to
+    numpy arrays).
+-   ADD #888: Allow passing a [task_id]{.title-ref} to
+    [run_model_on_task]{.title-ref}.
+-   ADD #894: Support caching of datasets using feather format as an
+    option.
+-   ADD #929: Add `edit_dataset` and `fork_dataset` to allow editing and
+    forking of uploaded datasets.
+-   ADD #866, #943: Add support for scikit-learn\'s
+    [passthrough]{.title-ref} and [drop]{.title-ref} when uploading
+    flows to OpenML.
+-   ADD #879: Add support for scikit-learn\'s MLP hyperparameter
+    [layer_sizes]{.title-ref}.
+-   ADD #894: Support caching of datasets using feather format as an
+    option.
+-   ADD #945: PEP 561 compliance for distributing Type information.
+-   DOC #660: Remove nonexistent argument from docstring.
+-   DOC #901: The API reference now documents the config file and its
+    options.
+-   DOC #912: API reference now shows [create_task]{.title-ref}.
+-   DOC #954: Remove TODO text from documentation.
+-   DOC #960: document how to upload multiple ignore attributes.
+-   FIX #873: Fixes an issue which resulted in incorrect URLs when
+    printing OpenML objects after switching the server.
+-   FIX #885: Logger no longer registered by default. Added utility
+    functions to easily register logging to console and file.
+-   FIX #890: Correct the scaling of data in the SVM example.
+-   MAINT #371: `list_evaluations` default `size` changed from `None` to
+    `10_000`.
+-   MAINT #767: Source distribution installation is now unit-tested.
+-   MAINT #781: Add pre-commit and automated code formatting with black.
+-   MAINT #804: Rename arguments of list_evaluations to indicate they
+    expect lists of ids.
+-   MAINT #836: OpenML supports only pandas version 1.0.0 or above.
+-   MAINT #865: OpenML no longer bundles test files in the source
+    distribution.
+-   MAINT #881: Improve the error message for too-long URIs.
+-   MAINT #897: Dropping support for Python 3.5.
+-   MAINT #916: Adding support for Python 3.8.
+-   MAINT #920: Improve error messages for dataset upload.
+-   MAINT #921: Improve hangling of the OpenML server URL in the config
+    file.
+-   MAINT #925: Improve error handling and error message when loading
+    datasets.
+-   MAINT #928: Restructures the contributing documentation.
+-   MAINT #936: Adding support for scikit-learn 0.23.X.
+-   MAINT #945: Make OpenML-Python PEP562 compliant.
+-   MAINT #951: Converts TaskType class to a TaskType enum.
+
+## 0.10.2
+
+-   ADD #857: Adds task type ID to list_runs
+-   DOC #862: Added license BSD 3-Clause to each of the source files.
+
+## 0.10.1
+
+-   ADD #175: Automatically adds the docstring of scikit-learn objects
+    to flow and its parameters.
+-   ADD #737: New evaluation listing call that includes the
+    hyperparameter settings.
+-   ADD #744: It is now possible to only issue a warning and not raise
+    an exception if the package versions for a flow are not met when
+    deserializing it.
+-   ADD #783: The URL to download the predictions for a run is now
+    stored in the run object.
+-   ADD #790: Adds the uploader name and id as new filtering options for
+    `list_evaluations`.
+-   ADD #792: New convenience function `openml.flow.get_flow_id`.
+-   ADD #861: Debug-level log information now being written to a file in
+    the cache directory (at most 2 MB).
+-   DOC #778: Introduces instructions on how to publish an extension to
+    support other libraries than scikit-learn.
+-   DOC #785: The examples section is completely restructured into
+    simple simple examples, advanced examples and examples showcasing
+    the use of OpenML-Python to reproduce papers which were done with
+    OpenML-Python.
+-   DOC #788: New example on manually iterating through the split of a
+    task.
+-   DOC #789: Improve the usage of dataframes in the examples.
+-   DOC #791: New example for the paper *Efficient and Robust Automated
+    Machine Learning* by Feurer et al. (2015).
+-   DOC #803: New example for the paper *Don't Rule Out Simple Models
+    Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear
+    Classifiers in OpenML* by Benjamin Strang et al. (2018).
+-   DOC #808: New example demonstrating basic use cases of a dataset.
+-   DOC #810: New example demonstrating the use of benchmarking studies
+    and suites.
+-   DOC #832: New example for the paper *Scalable Hyperparameter
+    Transfer Learning* by Valerio Perrone et al. (2019)
+-   DOC #834: New example showing how to plot the loss surface for a
+    support vector machine.
+-   FIX #305: Do not require the external version in the flow XML when
+    loading an object.
+-   FIX #734: Better handling of *\"old\"* flows.
+-   FIX #736: Attach a StreamHandler to the openml logger instead of the
+    root logger.
+-   FIX #758: Fixes an error which made the client API crash when
+    loading a sparse data with categorical variables.
+-   FIX #779: Do not fail on corrupt pickle
+-   FIX #782: Assign the study id to the correct class attribute.
+-   FIX #819: Automatically convert column names to type string when
+    uploading a dataset.
+-   FIX #820: Make `__repr__` work for datasets which do not have an id.
+-   MAINT #796: Rename an argument to make the function
+    `list_evaluations` more consistent.
+-   MAINT #811: Print the full error message given by the server.
+-   MAINT #828: Create base class for OpenML entity classes.
+-   MAINT #829: Reduce the number of data conversion warnings.
+-   MAINT #831: Warn if there\'s an empty flow description when
+    publishing a flow.
+-   MAINT #837: Also print the flow XML if a flow fails to validate.
+-   FIX #838: Fix list_evaluations_setups to work when evaluations are
+    not a 100 multiple.
+-   FIX #847: Fixes an issue where the client API would crash when
+    trying to download a dataset when there are no qualities available
+    on the server.
+-   MAINT #849: Move logic of most different `publish` functions into
+    the base class.
+-   MAINt #850: Remove outdated test code.
+
+## 0.10.0
+
+-   ADD #737: Add list_evaluations_setups to return hyperparameters
+    along with list of evaluations.
+-   FIX #261: Test server is cleared of all files uploaded during unit
+    testing.
+-   FIX #447: All files created by unit tests no longer persist in
+    local.
+-   FIX #608: Fixing dataset_id referenced before assignment error in
+    get_run function.
+-   FIX #447: All files created by unit tests are deleted after the
+    completion of all unit tests.
+-   FIX #589: Fixing a bug that did not successfully upload the columns
+    to ignore when creating and publishing a dataset.
+-   FIX #608: Fixing dataset_id referenced before assignment error in
+    get_run function.
+-   DOC #639: More descriptive documention for function to convert array
+    format.
+-   DOC #719: Add documentation on uploading tasks.
+-   ADD #687: Adds a function to retrieve the list of evaluation
+    measures available.
+-   ADD #695: A function to retrieve all the data quality measures
+    available.
+-   ADD #412: Add a function to trim flow names for scikit-learn flows.
+-   ADD #715: [list_evaluations]{.title-ref} now has an option to sort
+    evaluations by score (value).
+-   ADD #722: Automatic reinstantiation of flow in
+    [run_model_on_task]{.title-ref}. Clearer errors if that\'s not
+    possible.
+-   ADD #412: The scikit-learn extension populates the short name field
+    for flows.
+-   MAINT #726: Update examples to remove deprecation warnings from
+    scikit-learn
+-   MAINT #752: Update OpenML-Python to be compatible with sklearn 0.21
+-   ADD #790: Add user ID and name to list_evaluations
+
+## 0.9.0
+
+-   ADD #560: OpenML-Python can now handle regression tasks as well.
+-   ADD #620, #628, #632, #649, #682: Full support for studies and
+    distinguishes suites from studies.
+-   ADD #607: Tasks can now be created and uploaded.
+-   ADD #647, #673: Introduced the extension interface. This provides an
+    easy way to create a hook for machine learning packages to perform
+    e.g. automated runs.
+-   ADD #548, #646, #676: Support for Pandas DataFrame and
+    SparseDataFrame
+-   ADD #662: Results of listing functions can now be returned as
+    pandas.DataFrame.
+-   ADD #59: Datasets can now also be retrieved by name.
+-   ADD #672: Add timing measurements for runs, when possible.
+-   ADD #661: Upload time and error messages now displayed with
+    [list_runs]{.title-ref}.
+-   ADD #644: Datasets can now be downloaded \'lazily\', retrieving only
+    metadata at first, and the full dataset only when necessary.
+-   ADD #659: Lazy loading of task splits.
+-   ADD #516: [run_flow_on_task]{.title-ref} flow uploading is now
+    optional.
+-   ADD #680: Adds
+    [openml.config.start_using_configuration_for_example]{.title-ref}
+    (and resp. stop) to easily connect to the test server.
+-   ADD #75, #653: Adds a pretty print for objects of the top-level
+    classes.
+-   FIX #642: [check_datasets_active]{.title-ref} now correctly also
+    returns active status of deactivated datasets.
+-   FIX #304, #636: Allow serialization of numpy datatypes and list of
+    lists of more types (e.g. bools, ints) for flows.
+-   FIX #651: Fixed a bug that would prevent openml-python from finding
+    the user\'s config file.
+-   FIX #693: OpenML-Python uses liac-arff instead of scipy.io for
+    loading task splits now.
+-   DOC #678: Better color scheme for code examples in documentation.
+-   DOC #681: Small improvements and removing list of missing functions.
+-   DOC #684: Add notice to examples that connect to the test server.
+-   DOC #688: Add new example on retrieving evaluations.
+-   DOC #691: Update contributing guidelines to use Github draft feature
+    instead of tags in title.
+-   DOC #692: All functions are documented now.
+-   MAINT #184: Dropping Python2 support.
+-   MAINT #596: Fewer dependencies for regular pip install.
+-   MAINT #652: Numpy and Scipy are no longer required before
+    installation.
+-   MAINT #655: Lazy loading is now preferred in unit tests.
+-   MAINT #667: Different tag functions now share code.
+-   MAINT #666: More descriptive error message for
+    [TypeError]{.title-ref} in [list_runs]{.title-ref}.
+-   MAINT #668: Fix some type hints.
+-   MAINT #677: [dataset.get_data]{.title-ref} now has consistent
+    behavior in its return type.
+-   MAINT #686: Adds ignore directives for several [mypy]{.title-ref}
+    folders.
+-   MAINT #629, #630: Code now adheres to single PEP8 standard.
+
+## 0.8.0
+
+-   ADD #440: Improved dataset upload.
+-   ADD #545, #583: Allow uploading a dataset from a pandas DataFrame.
+-   ADD #528: New functions to update the status of a dataset.
+-   ADD #523: Support for scikit-learn 0.20\'s new ColumnTransformer.
+-   ADD #459: Enhanced support to store runs on disk prior to uploading
+    them to OpenML.
+-   ADD #564: New helpers to access the structure of a flow (and find
+    its subflows).
+-   ADD #618: The software will from now on retry to connect to the
+    server if a connection failed. The number of retries can be
+    configured.
+-   FIX #538: Support loading clustering tasks.
+-   FIX #464: Fixes a bug related to listing functions (returns correct
+    listing size).
+-   FIX #580: Listing function now works properly when there are less
+    results than requested.
+-   FIX #571: Fixes an issue where tasks could not be downloaded in
+    parallel.
+-   FIX #536: Flows can now be printed when the flow name is None.
+-   FIX #504: Better support for hierarchical hyperparameters when
+    uploading scikit-learn\'s grid and random search.
+-   FIX #569: Less strict checking of flow dependencies when loading
+    flows.
+-   FIX #431: Pickle of task splits are no longer cached.
+-   DOC #540: More examples for dataset uploading.
+-   DOC #554: Remove the doubled progress entry from the docs.
+-   MAINT #613: Utilize the latest updates in OpenML evaluation
+    listings.
+-   MAINT #482: Cleaner interface for handling search traces.
+-   MAINT #557: Continuous integration works for scikit-learn 0.18-0.20.
+-   MAINT #542: Continuous integration now runs python3.7 as well.
+-   MAINT #535: Continuous integration now enforces PEP8 compliance for
+    new code.
+-   MAINT #527: Replace deprecated nose by pytest.
+-   MAINT #510: Documentation is now built by travis-ci instead of
+    circle-ci.
+-   MAINT: Completely re-designed documentation built on sphinx gallery.
+-   MAINT #462: Appveyor CI support.
+-   MAINT #477: Improve error handling for issue
+    [#479](https://github.com/openml/openml-python/pull/479): the OpenML
+    connector fails earlier and with a better error message when failing
+    to create a flow from the OpenML description.
+-   MAINT #561: Improve documentation on running specific unit tests.
+
+## 0.4.-0.7
+
+There is no changelog for these versions.
+
+## 0.3.0
+
+-   Add this changelog
+-   2nd example notebook PyOpenML.ipynb
+-   Pagination support for list datasets and list tasks
+
+## Prior
+
+There is no changelog for prior versions.
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
new file mode 100644
index 000000000..d0c4f79d8
--- /dev/null
+++ b/docs/stylesheets/extra.css
@@ -0,0 +1,3 @@
+.jp-InputArea-prompt, .jp-InputPrompt {
+    display: none !important;
+}
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 000000000..7c733fedc
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,155 @@
+# User Guide
+
+This document will guide you through the most important use cases,
+functions and classes in the OpenML Python API. Throughout this
+document, we will use [pandas](https://pandas.pydata.org/) to format and
+filter tables.
+
+## Installation
+
+The OpenML Python package is a connector to
+[OpenML](https://www.openml.org/). It allows you to use and share
+datasets and tasks, run machine learning algorithms on them and then
+share the results online.
+
+The ["intruduction tutorial and setup"][intro] tutorial gives a short introduction on how to install and
+set up the OpenML Python connector, followed up by a simple example.
+
+## Configuration
+
+The configuration file resides in a directory `.config/openml` in the
+home directory of the user and is called config (More specifically, it
+resides in the [configuration directory specified by the XDGB Base
+Directory
+Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html)).
+It consists of `key = value` pairs which are separated by newlines. The
+following keys are defined:
+
+- apikey: required to access the server. The [introduction tutorial][intro] describes how to obtain an API key.
+- server: the server to connect to (default: `http://www.openml.org`).
+          For connection to the test server, set this to `test.openml.org`.
+- cachedir: the root folder where the cache file directories should be created.
+    If not given, will default to `~/.openml/cache`
+- avoid_duplicate_runs: if set to `True` (default), when `run_flow_on_task` or similar methods
+            are called a lookup is performed to see if there already
+            exists such a run on the server. If so, download those
+            results instead.
+- retry_policy: Defines how to react when the server is unavailable or
+            experiencing high load. It determines both how often to
+            attempt to reconnect and how quickly to do so. Please don't
+            use `human` in an automated script that you run more than
+            one instance of, it might increase the time to complete your
+            jobs and that of others. One of:
+            -   human (default): For people running openml in interactive
+                fashion. Try only a few times, but in quick succession.
+            -   robot: For people using openml in an automated fashion. Keep
+                trying to reconnect for a longer time, quickly increasing
+                the time between retries.
+
+- connection_n_retries: number of times to retry a request if they fail. 
+Default depends on retry_policy (5 for `human`, 50 for `robot`)
+- verbosity: the level of output:
+      -   0: normal output
+      -   1: info output
+      -   2: debug output
+
+This file is easily configurable by the `openml` command line interface.
+To see where the file is stored, and what its values are, use openml
+configure none. 
+
+## Docker
+
+It is also possible to try out the latest development version of
+`openml-python` with docker:
+
+``` bash
+docker run -it openml/openml-python
+```
+
+See the [openml-python docker
+documentation](https://github.com/openml/openml-python/blob/main/docker/readme.md)
+for more information.
+
+## Key concepts
+
+OpenML contains several key concepts which it needs to make machine
+learning research shareable. A machine learning experiment consists of
+one or several **runs**, which describe the performance of an algorithm
+(called a **flow** in OpenML), its hyperparameter settings (called a
+**setup**) on a **task**. A **Task** is the combination of a
+**dataset**, a split and an evaluation metric. In this user guide we
+will go through listing and exploring existing **tasks** to actually
+running machine learning algorithms on them. In a further user guide we
+will examine how to search through **datasets** in order to curate a
+list of **tasks**.
+
+A further explanation is given in the [OpenML user
+guide](https://openml.github.io/OpenML/#concepts).
+
+## Working with tasks
+
+You can think of a task as an experimentation protocol, describing how
+to apply a machine learning model to a dataset in a way that is
+comparable with the results of others (more on how to do that further
+down). Tasks are containers, defining which dataset to use, what kind of
+task we\'re solving (regression, classification, clustering, etc\...)
+and which column to predict. Furthermore, it also describes how to split
+the dataset into a train and test set, whether to use several disjoint
+train and test splits (cross-validation) and whether this should be
+repeated several times. Also, the task defines a target metric for which
+a flow should be optimized.
+
+If you want to know more about tasks, try the ["Task tutorial"](../examples/30_extended/tasks_tutorial)
+
+## Running machine learning algorithms and uploading results
+
+In order to upload and share results of running a machine learning
+algorithm on a task, we need to create an
+[openml.runs.OpenMLRun][]. A run object can be
+created by running a [openml.flows.OpenMLFlow][] or a scikit-learn compatible model on a task. We will
+focus on the simpler example of running a scikit-learn model.
+
+Flows are descriptions of something runnable which does the machine
+learning. A flow contains all information to set up the necessary
+machine learning library and its dependencies as well as all possible
+parameters.
+
+A run is the outcome of running a flow on a task. It contains all
+parameter settings for the flow, a setup string (most likely a command
+line call) and all predictions of that run. When a run is uploaded to
+the server, the server automatically calculates several metrics which
+can be used to compare the performance of different flows to each other.
+
+So far, the OpenML Python connector works only with estimator objects
+following the [scikit-learn estimator
+API](https://scikit-learn.org/stable/developers/develop.html#apis-of-scikit-learn-objects).
+Those can be directly run on a task, and a flow will automatically be
+created or downloaded from the server if it already exists.
+
+See ["Simple Flows and Runs"](../examples/20_basic/simple_flows_and_runs_tutorial) for a tutorial covers how to train different machine learning models,
+how to run machine learning models on OpenML data and how to share the
+results.
+
+## Datasets
+
+OpenML provides a large collection of datasets and the benchmark
+[OpenML100](https://docs.openml.org/benchmark/) which consists of a
+curated list of datasets.
+
+You can find the dataset that best fits your requirements by making use
+of the available metadata. The tutorial ["extended datasets"](../examples/30_extended/datasets_tutorial) which follows explains how to
+get a list of datasets, how to filter the list to find the dataset that
+suits your requirements and how to download a dataset.
+
+OpenML is about sharing machine learning results and the datasets they
+were obtained on. Learn how to share your datasets in the following
+tutorial ["Upload"](../examples/30_extended/create_upload_tutorial) tutorial.
+
+# Extending OpenML-Python
+
+OpenML-Python provides an extension interface to connect machine
+learning libraries directly to the API and ships a `scikit-learn`
+extension. Read more about them in the ["Extensions"](extensions.md) section.
+
+[intro]: examples/20_basic/introduction_tutorial/
+
diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py
index 26d3143dd..a850a0792 100644
--- a/examples/20_basic/introduction_tutorial.py
+++ b/examples/20_basic/introduction_tutorial.py
@@ -1,10 +1,8 @@
-"""
-Introduction tutorial & Setup
-=============================
+# %% [markdown]
+# # Introduction tutorial & Setup
+# An example how to set up OpenML-Python followed up by a simple example.
 
-An example how to set up OpenML-Python followed up by a simple example.
-"""
-############################################################################
+# %% [markdown]
 # OpenML is an online collaboration platform for machine learning which allows
 # you to:
 #
@@ -16,22 +14,16 @@
 # * Large scale benchmarking, compare to state of the art
 #
 
-############################################################################
-# Installation
-# ^^^^^^^^^^^^
+# %% [markdown]
+# # Installation
 # Installation is done via ``pip``:
 #
-# .. code:: bash
-#
-#     pip install openml
-#
-# For further information, please check out the installation guide at
-# :ref:`installation`.
-#
+# ```bash
+# pip install openml
+# ```
 
-############################################################################
-# Authentication
-# ^^^^^^^^^^^^^^
+# %% [markdown]
+# # Authentication
 #
 # The OpenML server can only be accessed by users who have signed up on the
 # OpenML platform. If you don’t have an account yet, sign up now.
@@ -55,28 +47,38 @@
 # you authenticate for the duration of the python process.
 
 
-############################################################################
-
-# License: BSD 3-Clause
+# %%
 
 import openml
 from sklearn import neighbors
 
-############################################################################
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
-openml.config.start_using_configuration_for_example()
+# %% [markdown]
+# <div class="admonition warning">
+#     <p class="admonition-title">Warning</p>
+#     <p>
+#         This example uploads data. For that reason, this example connects to the
+#         test server at <a href="https://test.openml.org"
+#         target="_blank">test.openml.org</a>.<br>
+#         This prevents the main server from becoming overloaded with example datasets, tasks,
+#         runs, and other submissions.<br>
+#         Using this test server may affect the behavior and performance of the
+#         OpenML-Python API.
+#     </p>
+# </div>
 
-############################################################################
+# %%
+# openml.config.start_using_configuration_for_example()
+
+# %% [markdown]
 # When using the main server instead, make sure your apikey is configured.
 # This can be done with the following line of code (uncomment it!).
 # Never share your apikey with others.
 
+# %%
 # openml.config.apikey = 'YOURKEY'
 
-############################################################################
-# Caching
-# ^^^^^^^
+# %% [markdown]
+# # Caching
 # When downloading datasets, tasks, runs and flows, they will be cached to
 # retrieve them without calling the server later. As with the API key,
 # the cache directory can be either specified through the config file or
@@ -87,23 +89,27 @@
 #   will use **~/.openml/cache** as the cache directory.
 # * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
 
+# %%
 # Uncomment and set your OpenML cache directory
 # import os
 # openml.config.cache_directory = os.path.expanduser('YOURDIR')
+openml.config.set_root_cache_directory("YOURDIR")
 
-############################################################################
-# Simple Example
-# ^^^^^^^^^^^^^^
+# %% [markdown]
+# # Simple Example
 # Download the OpenML task for the eeg-eye-state.
+
+# %%
 task = openml.tasks.get_task(403)
-data = openml.datasets.get_dataset(task.dataset_id)
 clf = neighbors.KNeighborsClassifier(n_neighbors=5)
+openml.config.start_using_configuration_for_example()
+
 run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
 # Publish the experiment on OpenML (optional, requires an API key).
 # For this tutorial, our configuration publishes to the test server
 # as to not crowd the main server with runs created by examples.
 myrun = run.publish()
-print(f"kNN on {data.name}: {myrun.openml_url}")
 
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
index 9b18aab14..f855184c0 100644
--- a/examples/20_basic/simple_datasets_tutorial.py
+++ b/examples/20_basic/simple_datasets_tutorial.py
@@ -1,33 +1,29 @@
-"""
-========
-Datasets
-========
-
-A basic tutorial on how to list, load and visualize datasets.
-"""
-############################################################################
+# %% [markdown]
+# # Datasets
+# A basic tutorial on how to list, load and visualize datasets.
+#
 # In general, we recommend working with tasks, so that the results can
 # be easily reproduced. Furthermore, the results can be compared to existing results
 # at OpenML. However, for the purposes of this tutorial, we are going to work with
 # the datasets directly.
 
-# License: BSD 3-Clause
+# %%
 
 import openml
 
-############################################################################
-# List datasets
-# =============
+# %% [markdown]
+# ## List datasets
 
-datasets_df = openml.datasets.list_datasets()
+# %%
+datasets_df = openml.datasets.list_datasets(output_format="dataframe")
 print(datasets_df.head(n=10))
 
-############################################################################
-# Download a dataset
-# ==================
+# %% [markdown]
+# ## Download a dataset
 
+# %%
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(dataset_id="iris", version=1)
+dataset = openml.datasets.get_dataset(dataset_id=61, version=1)
 
 # Print a summary
 print(
@@ -37,33 +33,31 @@
 print(f"URL: {dataset.url}")
 print(dataset.description[:500])
 
-############################################################################
-# Load a dataset
-# ==============
-
+# %% [markdown]
+# ## Load a dataset
 # X - An array/dataframe where each row represents one example with
 # the corresponding feature values.
+#
 # y - the classes for each example
+#
 # categorical_indicator - an array that indicates which feature is categorical
+#
 # attribute_names - the names of the features for the examples (X) and
 # target feature (y)
+
+# %%
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
 
-############################################################################
-# Tip: you can get a progress bar for dataset downloads, simply set it in
-# the configuration. Either in code or in the configuration file
-# (see also the introduction tutorial)
-
-openml.config.show_progress = True
-
-
-############################################################################
+# %% [markdown]
 # Visualize the dataset
-# =====================
 
+<<<<<<< docs/mkdoc -- Incoming Change
+# %%
+=======
 import matplotlib.pyplot as plt
+>>>>>>> develop -- Current Change
 import pandas as pd
 import seaborn as sns
 
@@ -80,3 +74,5 @@ def hide_current_axis(*args, **kwds):
 iris_plot = sns.pairplot(combined_data, hue="class")
 iris_plot.map_upper(hide_current_axis)
 plt.show()
+
+# License: BSD 3-Clause
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
index f7d7a49d1..9f35e8bc1 100644
--- a/examples/20_basic/simple_flows_and_runs_tutorial.py
+++ b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -1,49 +1,65 @@
-"""
-Flows and Runs
-==============
+# %% [markdown]
+# # Flows and Runs
+# A simple tutorial on how to train/run a model and how to upload the results.
 
-A simple tutorial on how to train/run a model and how to upload the results.
-"""
+# %%
+import openml
+from sklearn import ensemble, neighbors
 
-# License: BSD 3-Clause
+from openml.utils import thread_safe_if_oslo_installed
 
-from sklearn import ensemble, neighbors
 
-import openml
+# %% [markdown]
+# <div class="admonition warning">
+#     <p class="admonition-title">Warning</p>
+#     <p>
+#         This example uploads data. For that reason, this example connects to the
+#         test server at <a href="https://test.openml.org"
+#         target="_blank">test.openml.org</a>.<br>
+#         This prevents the main server from becoming overloaded with example datasets, tasks,
+#         runs, and other submissions.<br>
+#         Using this test server may affect the behavior and performance of the
+#         OpenML-Python API.
+#     </p>
+# </div>
 
-############################################################################
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
+# %%
 openml.config.start_using_configuration_for_example()
 
-############################################################################
-# Train a machine learning model
-# ==============================
+# %% [markdown]
+# ## Train a machine learning model
+
+# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
 
-# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20
-dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1)
+# %%
+dataset = openml.datasets.get_dataset(20)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    target=dataset.default_target_attribute
+    dataset_format="dataframe", target=dataset.default_target_attribute
 )
+if y is None:
+    y = X["class"]
+    X = X.drop(columns=["class"], axis=1)
 clf = neighbors.KNeighborsClassifier(n_neighbors=3)
 clf.fit(X, y)
 
-############################################################################
-# Running a model on a task
-# =========================
+# %% [markdown]
+# ## Running a model on a task
 
+# %%
 task = openml.tasks.get_task(119)
+
 clf = ensemble.RandomForestClassifier()
 run = openml.runs.run_model_on_task(clf, task)
 print(run)
 
-############################################################################
-# Publishing the run
-# ==================
+# %% [markdown]
+# ## Publishing the run
 
+# %%
 myrun = run.publish()
 print(f"Run was uploaded to {myrun.openml_url}")
 print(f"The flow can be found at {myrun.flow.openml_url}")
 
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py
index 3daf7b992..5a1b429b1 100644
--- a/examples/20_basic/simple_suites_tutorial.py
+++ b/examples/20_basic/simple_suites_tutorial.py
@@ -1,19 +1,14 @@
-"""
-================
-Benchmark suites
-================
-
-This is a brief showcase of OpenML benchmark suites, which were introduced by
-`Bischl et al. (2019) <https://arxiv.org/abs/1708.03731v2>`_. Benchmark suites standardize the
-datasets and splits to be used in an experiment or paper. They are fully integrated into OpenML
-and simplify both the sharing of the setup and the results.
-"""
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Benchmark suites
+# This is a brief showcase of OpenML benchmark suites, which were introduced by
+# [Bischl et al. (2019)](https://arxiv.org/abs/1708.03731v2). Benchmark suites standardize the
+# datasets and splits to be used in an experiment or paper. They are fully integrated into OpenML
+# and simplify both the sharing of the setup and the results.
 
+# %%
 import openml
 
-####################################################################################################
+# %% [markdown]
 # OpenML-CC18
 # ===========
 #
@@ -30,40 +25,43 @@
 #    imbalanced datasets which require special treatment for both algorithms and evaluation
 #    measures).
 #
-# A full description can be found in the `OpenML benchmarking docs
-# <https://docs.openml.org/benchmark/#openml-cc18>`_.
+# A full description can be found in the
+# [OpenML benchmarking docs](https://docs.openml.org/benchmark/#openml-cc18).
 #
 # In this example we'll focus on how to use benchmark suites in practice.
 
-####################################################################################################
+# %% [markdown]
 # Downloading benchmark suites
 # ============================
 
-# OpenML Benchmarking Suites and the OpenML-CC18
-# https://www.openml.org/s/99
-suite = openml.study.get_suite("OpenML-CC18")
+# %%
+suite = openml.study.get_suite(99)
 print(suite)
 
-####################################################################################################
+# %% [markdown]
 # The benchmark suite does not download the included tasks and datasets itself, but only contains
 # a list of which tasks constitute the study.
 #
 # Tasks can then be accessed via
 
+# %%
 tasks = suite.tasks
 print(tasks)
 
-####################################################################################################
+# %% [markdown]
 # and iterated over for benchmarking. For speed reasons we only iterate over the first three tasks:
 
+# %%
 for task_id in tasks[:3]:
     task = openml.tasks.get_task(task_id)
     print(task)
 
-####################################################################################################
+# %% [markdown]
 # Further examples
 # ================
 #
-# * :ref:`sphx_glr_examples_30_extended_suites_tutorial.py`
-# * :ref:`sphx_glr_examples_30_extended_study_tutorial.py`
-# * :ref:`sphx_glr_examples_40_paper_2018_ida_strang_example.py`
+# * [Suites Tutorial](../../30_extended/suites_tutorial)
+# * [Study Tutoral](../../30_extended/study_tutorial)
+# * [Paper example: Strang et al.](../../40_paper/2018_ida_strang_example.py)
+
+# License: BSD 3-Clause
diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py
index 0fd4a63e5..67d106da3 100644
--- a/examples/30_extended/benchmark_with_optunahub.py
+++ b/examples/30_extended/benchmark_with_optunahub.py
@@ -7,28 +7,45 @@
 """
 ############################################################################
 # Please make sure to install the dependencies with:
-# ``pip install openml optunahub hebo`` and ``pip install --upgrade pymoo``
+# ``pip install "openml>=0.15.1" plotly``
 # Then we import all the necessary modules.
 
 # License: BSD 3-Clause
 
+import logging
+
+import optuna
+
 import openml
 from openml.extensions.sklearn import cat
 from openml.extensions.sklearn import cont
-import optuna
-import optunahub
 from sklearn.compose import ColumnTransformer
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
 
-# Set your openml api key if you want to publish the run
+
+logger = logging.Logger(name="Experiment Logger", level=1)
+
+# Set your openml api key if you want to upload your results to OpenML (eg:
+# https://openml.org/search?type=run&sort=date) . To get one, simply make an
+# account (you don't need one for anything else, just to upload your results),
+# go to your profile and select the API-KEY.
+# Or log in, and navigate to https://www.openml.org/auth/api-key
 openml.config.apikey = ""
 ############################################################################
 # Prepare for preprocessors and an OpenML task
 # ============================================
 
+# OpenML contains several key concepts which it needs to make machine learning research shareable.
+# A machine learning experiment consists of one or several runs, which describe the performance of
+# an algorithm (called a flow in OpenML), its hyperparameter settings (called a setup) on a task.
+# A Task is the combination of a dataset, a split and an evaluation metric We choose a dataset from
+# OpenML, (https://www.openml.org/d/1464) and a subsequent task (https://www.openml.org/t/10101) To
+# make your own dataset and task, please refer to
+# https://openml.github.io/openml-python/main/examples/30_extended/create_upload_tutorial.html
+
 # https://www.openml.org/search?type=study&study_type=task&id=218
 task_id = 10101
 seed = 42
@@ -41,13 +58,19 @@
 preproc = ColumnTransformer([categorical_preproc, numerical_preproc])
 
 ############################################################################
-# Define a pipeline for the hyperparameter optimization
+# Define a pipeline for the hyperparameter optimization (this is standark for Optuna)
 # =====================================================
 
-# Since we use `OptunaHub <https://hub.optuna.org/>`__ for the benchmarking of hyperparameter optimization,
+# Optuna explanation
 # we follow the `Optuna <https://github.com/optuna/optuna/>`__ search space design.
-# We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance of the pipeline
+
+# OpenML runs
+# We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance
+# of the pipeline
 # on the specified OpenML task.
+# Do you want to share your results along with an easily reproducible pipeline, you can set an API
+# key and just upload your results.
+# You can find more examples on https://www.openml.org/
 
 
 def objective(trial: optuna.Trial) -> Pipeline:
@@ -57,47 +80,37 @@ def objective(trial: optuna.Trial) -> Pipeline:
         random_state=seed,
     )
     pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)])
+    logger.log(1, f"Running pipeline - {pipe}")
     run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False)
+
+    logger.log(1, f"Model has been trained - {run}")
     if openml.config.apikey != "":
         try:
             run.publish()
+
+            logger.log(1, f"Run was uploaded to - {run.openml_url}")
         except Exception as e:
-            print(f"Could not publish run - {e}")
+            logger.log(1, f"Could not publish run - {e}")
     else:
-        print(
-            "If you want to publish your results to OpenML, please set an apikey using `openml.config.apikey = ''`"
+        logger.log(
+            0,
+            "If you want to publish your results to OpenML, please set an apikey",
         )
     accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values())
-    return accuracy
-
+    logger.log(0, f"Accuracy {accuracy}")
 
-############################################################################
-# Load a sampler from OptunaHub
-# =============================
-
-# OptunaHub is a feature-sharing plotform for hyperparameter optimization methods.
-# For example, we load a state-of-the-art algorithm (`HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>`__
-# , the winning solution of `NeurIPS 2020 Black-Box Optimisation Challenge <https://bbochallenge.com/leaderboard/>`__)
-# from OptunaHub here.
+    return accuracy
 
-sampler = optunahub.load_module("samplers/hebo").HEBOSampler(seed=seed)
 
 ############################################################################
 # Optimize the pipeline
 # =====================
-
-# We now run the optimization. For more details about Optuna API,
-# please visit `the API reference <https://optuna.readthedocs.io/en/stable/reference/index.html>`__.
-
-study = optuna.create_study(direction="maximize", sampler=sampler)
+study = optuna.create_study(direction="maximize")
+logger.log(0, f"Study {study}")
 study.optimize(objective, n_trials=15)
 
 ############################################################################
 # Visualize the optimization history
 # ==================================
-
-# It is very simple to visualize the result by the Optuna visualization module.
-# For more details, please check `the API reference <https://optuna.readthedocs.io/en/stable/reference/visualization/index.html>`__.
-
 fig = optuna.visualization.plot_optimization_history(study)
 fig.show()
diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
index 3878b0436..0191253e9 100644
--- a/examples/30_extended/configure_logging.py
+++ b/examples/30_extended/configure_logging.py
@@ -1,31 +1,26 @@
-"""
-========
-Logging
-========
-
-Explains openml-python logging, and shows how to configure it.
-"""
-##################################################################################
-# Openml-python uses the `Python logging module <https://docs.python.org/3/library/logging.html>`_
+# %% [markdown]
+# # Logging
+# This tutorial explains openml-python logging, and shows how to configure it.
+# Openml-python uses the [Python logging module](https://docs.python.org/3/library/logging.html)
 # to provide users with log messages. Each log message is assigned a level of importance, see
 # the table in Python's logging tutorial
-# `here <https://docs.python.org/3/howto/logging.html#when-to-use-logging>`_.
+# [here](https://docs.python.org/3/howto/logging.html#when-to-use-logging).
 #
 # By default, openml-python will print log messages of level `WARNING` and above to console.
 # All log messages (including `DEBUG` and `INFO`) are also saved in a file, which can be
 # found in your cache directory (see also the
-# :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`).
+# [introduction tutorial](../20_basic/introduction_tutorial).
 # These file logs are automatically deleted if needed, and use at most 2MB of space.
 #
 # It is possible to configure what log levels to send to console and file.
 # When downloading a dataset from OpenML, a `DEBUG`-level message is written:
 
-# License: BSD 3-Clause
-
+# %%
 import openml
 
 openml.datasets.get_dataset("iris", version=1)
 
+# %% [markdown]
 # With default configuration, the above example will show no output to console.
 # However, in your cache directory you should find a file named 'openml_python.log',
 # which has a DEBUG message written to it. It should be either like
@@ -35,12 +30,14 @@
 # , depending on whether or not you had downloaded iris before.
 # The processed log levels can be configured programmatically:
 
+# %%
 import logging
 
 openml.config.set_console_log_level(logging.DEBUG)
 openml.config.set_file_log_level(logging.WARNING)
 openml.datasets.get_dataset("iris", version=1)
 
+# %% [markdown]
 # Now the log level that was previously written to file should also be shown in the console.
 # The message is now no longer written to file as the `file_log` was set to level `WARNING`.
 #
@@ -52,3 +49,5 @@
 # * 0: `logging.WARNING` and up.
 # * 1: `logging.INFO` and up.
 # * 2: `logging.DEBUG` and up (i.e. all messages).
+#
+# License: BSD 3-Clause
diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index 7825d8cf7..2b010401c 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -1,12 +1,8 @@
-"""
-Dataset upload tutorial
-=======================
-
-A tutorial on how to create and upload a dataset to OpenML.
-"""
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Dataset upload tutorial
+# A tutorial on how to create and upload a dataset to OpenML.
 
+# %%
 import numpy as np
 import pandas as pd
 import sklearn.datasets
@@ -15,14 +11,14 @@
 import openml
 from openml.datasets.functions import create_dataset
 
-############################################################################
+# %% [markdown]
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
 
+# %%
 openml.config.start_using_configuration_for_example()
-############################################################################
 
-############################################################################
+# %% [markdown]
 # Below we will cover the following cases of the dataset object:
 #
 # * A numpy array
@@ -31,17 +27,17 @@
 # * A sparse matrix
 # * A pandas sparse dataframe
 
-############################################################################
+# %% [markdown]
 # Dataset is a numpy array
 # ========================
 # A numpy array can contain lists in the case of dense data or it can contain
 # OrderedDicts in the case of sparse data.
 #
-# Prepare dataset
-# ^^^^^^^^^^^^^^^
+# # Prepare dataset
 # Load an example dataset from scikit-learn which we will upload to OpenML.org
 # via the API.
 
+# %%
 diabetes = sklearn.datasets.load_diabetes()
 name = "Diabetes(scikit-learn)"
 X = diabetes.data
@@ -49,13 +45,14 @@
 attribute_names = diabetes.feature_names
 description = diabetes.DESCR
 
-############################################################################
+# %% [markdown]
 # OpenML does not distinguish between the attributes and targets on the data
 # level and stores all data in a single matrix.
 #
 # The target feature is indicated as meta-data of the dataset (and tasks on
 # that data).
 
+# %%
 data = np.concatenate((X, y.reshape((-1, 1))), axis=1)
 attribute_names = list(attribute_names)
 attributes = [(attribute_name, "REAL") for attribute_name in attribute_names] + [
@@ -68,14 +65,14 @@
 )
 paper_url = "https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf"
 
-############################################################################
-# Create the dataset object
-# ^^^^^^^^^^^^^^^^^^^^^^^^^
+# %% [markdown]
+# # Create the dataset object
 # The definition of all fields can be found in the XSD files describing the
 # expected format:
 #
 # https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.data.upload.xsd
 
+#  %%
 diabetes_dataset = create_dataset(
     # The name of the dataset (needs to be unique).
     # Must not be longer than 128 characters and only contain
@@ -113,20 +110,20 @@
     paper_url=paper_url,
 )
 
-############################################################################
+# %%
 
 diabetes_dataset.publish()
 print(f"URL for dataset: {diabetes_dataset.openml_url}")
 
-############################################################################
-# Dataset is a list
-# =================
+# %% [markdown]
+# ## Dataset is a list
 # A list can contain lists in the case of dense data or it can contain
 # OrderedDicts in the case of sparse data.
 #
 # Weather dataset:
 # https://storm.cis.fordham.edu/~gweiss/data-mining/datasets.html
 
+# %%
 data = [
     ["sunny", 85, 85, "FALSE", "no"],
     ["sunny", 80, 90, "TRUE", "no"],
@@ -186,14 +183,13 @@
     version_label="example",
 )
 
-############################################################################
 
+# %%
 weather_dataset.publish()
 print(f"URL for dataset: {weather_dataset.openml_url}")
 
-############################################################################
-# Dataset is a pandas DataFrame
-# =============================
+# %% [markdown]
+# ## Dataset is a pandas DataFrame
 # It might happen that your dataset is made of heterogeneous data which can usually
 # be stored as a Pandas DataFrame. DataFrames offer the advantage of
 # storing the type of data for each column as well as the attribute names.
@@ -202,20 +198,23 @@
 # function :func:`openml.datasets.create_dataset`. In this regard, you only
 # need to pass ``'auto'`` to the ``attributes`` parameter.
 
+# %%
 df = pd.DataFrame(data, columns=[col_name for col_name, _ in attribute_names])
+
 # enforce the categorical column to have a categorical dtype
 df["outlook"] = df["outlook"].astype("category")
 df["windy"] = df["windy"].astype("bool")
 df["play"] = df["play"].astype("category")
 print(df.info())
 
-############################################################################
+# %% [markdown]
 # We enforce the column 'outlook' and 'play' to be a categorical
 # dtype while the column 'windy' is kept as a boolean column. 'temperature'
 # and 'humidity' are kept as numeric columns. Then, we can
 # call :func:`openml.datasets.create_dataset` by passing the dataframe and
 # fixing the parameter ``attributes`` to ``'auto'``.
 
+# %%
 weather_dataset = create_dataset(
     name="Weather",
     description=description,
@@ -233,15 +232,15 @@
     version_label="example",
 )
 
-############################################################################
-
+# %%
 weather_dataset.publish()
 print(f"URL for dataset: {weather_dataset.openml_url}")
 
-############################################################################
+# %% [markdown]
 # Dataset is a sparse matrix
 # ==========================
 
+# %%
 sparse_data = coo_matrix(
     ([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
 )
@@ -269,15 +268,14 @@
     version_label="example",
 )
 
-############################################################################
 
+# %%
 xor_dataset.publish()
 print(f"URL for dataset: {xor_dataset.openml_url}")
 
 
-############################################################################
-# Dataset is a pandas dataframe with sparse columns
-# =================================================
+# %% [markdown]
+# ## Dataset is a pandas dataframe with sparse columns
 
 sparse_data = coo_matrix(
     ([1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
@@ -303,11 +301,11 @@
     version_label="example",
 )
 
-############################################################################
+# %%
 
 xor_dataset.publish()
 print(f"URL for dataset: {xor_dataset.openml_url}")
 
-
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py
index 241f3e6eb..15ec0e1fb 100644
--- a/examples/30_extended/custom_flow_.py
+++ b/examples/30_extended/custom_flow_.py
@@ -1,20 +1,18 @@
-"""
-================================
-Creating and Using a Custom Flow
-================================
+# %% [markdown]
+# # Creating and Using a Custom Flow
 
-The most convenient way to create a flow for your machine learning workflow is to generate it
-automatically as described in the :ref:`sphx_glr_examples_30_extended_flow_id_tutorial.py` tutorial.
-However, there are scenarios where this is not possible, such
-as when the flow uses a framework without an extension or when the flow is described by a script.
+# The most convenient way to create a flow for your machine learning workflow is to generate it
+# automatically as described in the
+# ["Obtaining Flow IDs"](../../30_extended/flow_id_tutorial) tutorial.
+# However, there are scenarios where this is not possible, such
+# as when the flow uses a framework without an extension or when the flow is described by a script.
 
-In those cases you can still create a custom flow by following the steps of this tutorial.
-As an example we will use the flows generated for the `AutoML Benchmark <https://openml.github.io/automlbenchmark/>`_,
-and also show how to link runs to the custom flow.
-"""
-
-# License: BSD 3-Clause
+# In those cases you can still create a custom flow by following the steps of this tutorial.
+# As an example we will use the flows generated for the
+# [AutoML Benchmark](https://openml.github.io/automlbenchmark/),
+# and also show how to link runs to the custom flow.
 
+# %%
 from collections import OrderedDict
 import numpy as np
 
@@ -22,14 +20,15 @@
 from openml import OpenMLClassificationTask
 from openml.runs.functions import format_prediction
 
-####################################################################################################
+# %% [markdown]
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
 
-####################################################################################################
-# 1. Defining the flow
-# ====================
+# %% [markdown]
+# ## 1. Defining the flow
 # The first step is to define all the hyperparameters of your flow.
 # The API pages feature a descriptions of each variable of the :class:`openml.flows.OpenMLFlow`.
 # Note that `external version` and `name` together uniquely identify a flow.
@@ -43,6 +42,7 @@
 # Make sure to leave enough information so others can determine exactly which
 # version of the package/script is used. Use tags so users can find your flow easily.
 
+# %%
 general = dict(
     name="automlbenchmark_autosklearn",
     description=(
@@ -55,12 +55,13 @@
     dependencies="amlb==0.9",
 )
 
-####################################################################################################
+# %% [markdown]
 # Next we define the flow hyperparameters. We define their name and default value in `parameters`,
 # and provide meta-data for each hyperparameter through `parameters_meta_info`.
 # Note that even though the argument name is `parameters` they describe the hyperparameters.
 # The use of ordered dicts is required.
 
+# %%
 flow_hyperparameters = dict(
     parameters=OrderedDict(time="240", memory="32", cores="8"),
     parameters_meta_info=OrderedDict(
@@ -70,7 +71,7 @@
     ),
 )
 
-####################################################################################################
+# %% [markdown]
 # It is possible to build a flow which uses other flows.
 # For example, the Random Forest Classifier is a flow, but you could also construct a flow
 # which uses a Random Forest Classifier in a ML pipeline. When constructing the pipeline flow,
@@ -86,6 +87,7 @@
 # Note: flow 9313 is not actually the right flow on the test server,
 # but that does not matter for this demonstration.
 
+# %%
 autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
 subflow = dict(
     components=OrderedDict(automl_tool=autosklearn_flow),
@@ -93,7 +95,7 @@
     # components=OrderedDict(),
 )
 
-####################################################################################################
+# %% [markdown]
 # With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
 # Because we provided all the details already, we do not need to provide a `model` to the flow.
 #
@@ -103,6 +105,7 @@
 # So whether you have a model with no extension or no model at all, explicitly set
 # the model of the flow to `None`.
 
+# %%
 autosklearn_amlb_flow = openml.flows.OpenMLFlow(
     **general,
     **flow_hyperparameters,
@@ -112,14 +115,14 @@
 autosklearn_amlb_flow.publish()
 print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
 
-####################################################################################################
-# 2. Using the flow
-# ====================
+# %% [markdown]
+# ## 2. Using the flow
 # This Section will show how to upload run data for your custom flow.
 # Take care to change the values of parameters as well as the task id,
 # to reflect the actual run.
 # Task and parameter values in the example are fictional.
 
+# %%
 flow_id = autosklearn_amlb_flow.flow_id
 
 parameters = [
@@ -133,7 +136,7 @@
 dataset_id = task.get_dataset().dataset_id
 
 
-####################################################################################################
+# %% [markdown]
 # The last bit of information for the run we need are the predicted values.
 # The exact format of the predictions will depend on the task.
 #
@@ -158,6 +161,8 @@
 # You can ignore this code, or use it to better understand the formatting of the predictions.
 #
 # Find the repeats/folds for this task:
+
+# %%
 n_repeats, n_folds, _ = task.get_split_dimensions()
 all_test_indices = [
     (repeat, fold, index)
@@ -193,10 +198,11 @@
     )
     predictions.append(prediction)
 
-####################################################################################################
+# %% [markdown]
 # Finally we can create the OpenMLRun object and upload.
 # We use the argument setup_string because the used flow was a script.
 
+# %%
 benchmark_command = f"python3 runbenchmark.py auto-sklearn medium -m aws -t 119"
 my_run = openml.runs.OpenMLRun(
     task_id=task_id,
@@ -211,4 +217,6 @@
 my_run.publish()
 print("run created:", my_run.run_id)
 
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 77a46d8b0..d7c74b843 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -1,21 +1,14 @@
-"""
-========
-Datasets
-========
-
-How to list and download datasets.
-"""
-
-# License: BSD 3-Clauses
+# %% [markdown]
+# # Datasets
+# How to list and download datasets.
 
 import pandas as pd
 
 import openml
 from openml.datasets import edit_dataset, fork_dataset, get_dataset
 
-############################################################################
-# Exercise 0
-# **********
+# %% [markdown]
+# ## Exercise 0
 #
 # * List datasets and return a dataframe
 datalist = openml.datasets.list_datasets()
@@ -28,23 +21,26 @@
 openml_df = openml.datasets.list_datasets()
 openml_df.head(n=10)
 
-############################################################################
-# Exercise 1
-# **********
+# %% [markdown]
+# ## Exercise 1
 #
 # * Find datasets with more than 10000 examples.
 # * Find a dataset called 'eeg_eye_state'.
 # * Find all datasets with more than 50 classes.
+
+# %%
 datalist[datalist.NumberOfInstances > 10000].sort_values(["NumberOfInstances"]).head(n=20)
-""
+
+# %%
 datalist.query('name == "eeg-eye-state"')
-""
+
+# %%
 datalist.query("NumberOfClasses > 50")
 
-############################################################################
-# Download datasets
-# =================
+# %% [markdown]
+# ## Download datasets
 
+# %%
 # This is done based on the dataset ID.
 dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 
@@ -56,24 +52,28 @@
 print(f"URL: {dataset.url}")
 print(dataset.description[:500])
 
-############################################################################
+# %% [markdown]
 # Get the actual data.
 #
 # openml-python returns data as pandas dataframes (stored in the `eeg` variable below),
 # and also some additional metadata that we don't care about right now.
+
+# %%
 eeg, *_ = dataset.get_data()
 
-############################################################################
+# %% [markdown]
 # You can optionally choose to have openml separate out a column from the
 # dataset. In particular, many datasets for supervised problems have a set
 # `default_target_attribute` which may help identify the target variable.
+
+# %%
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
 print(X.head())
 print(X.info())
 
-############################################################################
+# %% [markdown]
 # Sometimes you only need access to a dataset's metadata.
 # In those cases, you can download the dataset without downloading the
 # data file. The dataset object can be used as normal.
@@ -82,11 +82,15 @@
 # Starting from 0.15, not downloading data will be the default behavior instead.
 # The data will be downloading automatically when you try to access it through
 # openml objects, e.g., using `dataset.features`.
-dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False)
-############################################################################
-# Exercise 2
-# **********
+
+# %%
+dataset = openml.datasets.get_dataset(1471)
+
+# %% [markdown]
+# ## Exercise 2
 # * Explore the data visually.
+
+# %%
 eegs = eeg.sample(n=1000)
 _ = pd.plotting.scatter_matrix(
     X.iloc[:100, :4],
@@ -99,18 +103,21 @@
 )
 
 
-############################################################################
-# Edit a created dataset
-# ======================
+# %% [markdown]
+# ## Edit a created dataset
 # This example uses the test server, to avoid editing a dataset on the main server.
 #
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
-############################################################################
+# %% [markdown]
 # Edit non-critical fields, allowed for all authorized users:
 # description, creator, contributor, collection_date, language, citation,
 # original_data_url, paper_url
+
+# %%
 desc = (
     "This data sets consists of 3 different types of irises' "
     "(Setosa, Versicolour, and Virginica) petal and sepal length,"
@@ -129,29 +136,33 @@
 print(f"Edited dataset ID: {data_id}")
 
 
-############################################################################
+# %% [markdown]
 # Editing critical fields (default_target_attribute, row_id_attribute, ignore_attribute) is allowed
 # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any
 # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you,
 # configure the API key:
 # openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
 # This example here only shows a failure when trying to work on a dataset not owned by you:
+
+# %%
 try:
     data_id = edit_dataset(1, default_target_attribute="shape")
 except openml.exceptions.OpenMLServerException as e:
     print(e)
 
-############################################################################
-# Fork dataset
-# ============
+# %% [markdown]
+# ## Fork dataset
 # Used to create a copy of the dataset with you as the owner.
 # Use this API only if you are unable to edit the critical fields (default_target_attribute,
 # ignore_attribute, row_id_attribute) of a dataset through the edit_dataset API.
 # After the dataset is forked, you can edit the new version of the dataset using edit_dataset.
 
+# %%
 data_id = fork_dataset(1)
 print(data_id)
 data_id = edit_dataset(data_id, default_target_attribute="shape")
 print(f"Forked dataset ID: {data_id}")
 
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clauses
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index 6c8a88ec8..21f36a194 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -1,38 +1,35 @@
-"""
-====================
-Fetching Evaluations
-====================
-
-Evaluations contain a concise summary of the results of all runs made. Each evaluation
-provides information on the dataset used, the flow applied, the setup used, the metric
-evaluated, and the result obtained on the metric, for each such run made. These collection
-of results can be used for efficient benchmarking of an algorithm and also allow transparent
-reuse of results from previous experiments on similar parameters.
-
-In this example, we shall do the following:
-
-* Retrieve evaluations based on different metrics
-* Fetch evaluations pertaining to a specific task
-* Sort the obtained results in descending order of the metric
-* Plot a cumulative distribution function for the evaluations
-* Compare the top 10 performing flows based on the evaluation performance
-* Retrieve evaluations with hyperparameter settings
-"""
-
-############################################################################
-
-# License: BSD 3-Clause
-
+# %% [markdown]
+# # Fetching Evaluations
+
+# Evaluations contain a concise summary of the results of all runs made. Each evaluation
+# provides information on the dataset used, the flow applied, the setup used, the metric
+# evaluated, and the result obtained on the metric, for each such run made. These collection
+# of results can be used for efficient benchmarking of an algorithm and also allow transparent
+# reuse of results from previous experiments on similar parameters.
+#
+# In this example, we shall do the following:
+#
+# * Retrieve evaluations based on different metrics
+# * Fetch evaluations pertaining to a specific task
+# * Sort the obtained results in descending order of the metric
+# * Plot a cumulative distribution function for the evaluations
+# * Compare the top 10 performing flows based on the evaluation performance
+# * Retrieve evaluations with hyperparameter settings
+
+# %%
 import openml
 
-############################################################################
-# Listing evaluations
-# *******************
+# %% [markdown]
+# ## Listing evaluations
 # Evaluations can be retrieved from the database in the chosen output format.
 # Required filters can be applied to retrieve results from runs as required.
 
 # We shall retrieve a small set (only 10 entries) to test the listing function for evaluations
-openml.evaluations.list_evaluations(function="predictive_accuracy", size=10)
+
+# %%
+openml.evaluations.list_evaluations(
+    function="predictive_accuracy", size=10
+)
 
 # Using other evaluation metrics, 'precision' in this case
 evals = openml.evaluations.list_evaluations(
@@ -42,23 +39,23 @@
 # Querying the returned results for precision above 0.98
 print(evals[evals.value > 0.98])
 
-#############################################################################
-# Viewing a sample task
-# =====================
+# %% [markdown]
+# ## Viewing a sample task
 # Over here we shall briefly take a look at the details of the task.
-
 # We will start by displaying a simple *supervised classification* task:
+
+# %%
 task_id = 167140  # https://www.openml.org/t/167140
 task = openml.tasks.get_task(task_id)
 print(task)
 
-#############################################################################
-# Obtaining all the evaluations for the task
-# ==========================================
+# %% [markdown]
+# ## Obtaining all the evaluations for the task
 # We'll now obtain all the evaluations that were uploaded for the task
 # we displayed previously.
 # Note that we now filter the evaluations based on another parameter 'task'.
 
+# %%
 metric = "predictive_accuracy"
 evals = openml.evaluations.list_evaluations(
     function=metric, tasks=[task_id], output_format="dataframe"
@@ -70,13 +67,13 @@
 print("\nDisplaying head of sorted dataframe: ")
 print(evals.head())
 
-#############################################################################
-# Obtaining CDF of metric for chosen task
-# ***************************************
+# %% [markdown]
+# ## Obtaining CDF of metric for chosen task
 # We shall now analyse how the performance of various flows have been on this task,
 # by seeing the likelihood of the accuracy obtained across all runs.
 # We shall now plot a cumulative distributive function (CDF) for the accuracies obtained.
 
+# %%
 from matplotlib import pyplot as plt
 
 
@@ -97,16 +94,18 @@ def plot_cdf(values, metric="predictive_accuracy"):
 
 
 plot_cdf(evals.value, metric)
+
+# %% [markdown]
 # This CDF plot shows that for the given task, based on the results of the
 # runs uploaded, it is almost certain to achieve an accuracy above 52%, i.e.,
 # with non-zero probability. While the maximum accuracy seen till now is 96.5%.
 
-#############################################################################
-# Comparing top 10 performing flows
-# *********************************
+# %% [markdown]
+# ## Comparing top 10 performing flows
 # Let us now try to see which flows generally performed the best for this task.
 # For this, we shall compare the top performing flows.
 
+# %%
 import numpy as np
 import pandas as pd
 
@@ -139,6 +138,8 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 
 
 plot_flow_compare(evals, metric=metric, top_n=10)
+
+# %% [markdown]
 # The boxplots below show how the flows perform across multiple runs on the chosen
 # task. The green horizontal lines represent the median accuracy of all the runs for
 # that flow (number of runs denoted at the bottom of the boxplots). The higher the
@@ -146,19 +147,22 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 # are in the descending order of the higest accuracy value seen under that flow.
 
 # Printing the corresponding flow names for the top 10 performing flow IDs
+
+# %%
 top_n = 10
 flow_ids = evals.flow_id.unique()[:top_n]
 flow_names = evals.flow_name.unique()[:top_n]
 for i in range(top_n):
     print((flow_ids[i], flow_names[i]))
 
-#############################################################################
-# Obtaining evaluations with hyperparameter settings
-# ==================================================
+# %% [markdown]
+# ## Obtaining evaluations with hyperparameter settings
 # We'll now obtain the evaluations of a task and a flow with the hyperparameters
 
 # List evaluations in descending order based on predictive_accuracy with
 # hyperparameters
+
+# %%
 evals_setups = openml.evaluations.list_evaluations_setups(
     function="predictive_accuracy",
     tasks=[31],
@@ -166,18 +170,18 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
     sort_order="desc",
 )
 
-""
 print(evals_setups.head())
 
-""
+# %% [markdown]
 # Return evaluations for flow_id in descending order based on predictive_accuracy
 # with hyperparameters. parameters_in_separate_columns returns parameters in
 # separate columns
+
+# %%
 evals_setups = openml.evaluations.list_evaluations_setups(
     function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
 )
 
-""
 print(evals_setups.head(10))
 
-""
+# License: BSD 3-Clause
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
index 8adf37d31..b2a3f1d2a 100644
--- a/examples/30_extended/fetch_runtimes_tutorial.py
+++ b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -1,51 +1,43 @@
-"""
-
-==========================================
-Measuring runtimes for Scikit-learn models
-==========================================
-
-The runtime of machine learning models on specific datasets can be a deciding
-factor on the choice of algorithms, especially for benchmarking and comparison
-purposes. OpenML's scikit-learn extension provides runtime data from runs of
-model fit and prediction on tasks or datasets, for both the CPU-clock as well
-as the actual wallclock-time incurred. The objective of this example is to
-illustrate how to retrieve such timing measures, and also offer some potential
-means of usage and interpretation of the same.
-
-It should be noted that there are multiple levels at which parallelism can occur.
-
-* At the outermost level, OpenML tasks contain fixed data splits, on which the
-  defined model/flow is executed. Thus, a model can be fit on each OpenML dataset fold
-  in parallel using the `n_jobs` parameter to `run_model_on_task` or `run_flow_on_task`
-  (illustrated under Case 2 & 3 below).
-
-* The model/flow specified can also include scikit-learn models that perform their own
-  parallelization. For instance, by specifying `n_jobs` in a Random Forest model definition
-  (covered under Case 2 below).
-
-* The sklearn model can further be an HPO estimator and contain it's own parallelization.
-  If the base estimator used also supports `parallelization`, then there's at least a 2-level nested
-  definition for parallelization possible (covered under Case 3 below).
-
-We shall cover these 5 representative scenarios for:
-
-* (Case 1) Retrieving runtimes for Random Forest training and prediction on each of the
-  cross-validation folds
-
-* (Case 2) Testing the above setting in a parallel setup and monitor the difference using
-  runtimes retrieved
-
-* (Case 3) Comparing RandomSearchCV and GridSearchCV on the above task based on runtimes
-
-* (Case 4) Running models that don't run in parallel or models which scikit-learn doesn't
-  parallelize
-
-* (Case 5) Running models that do not release the Python Global Interpreter Lock (GIL)
-"""
-
-############################################################################
-
-# License: BSD 3-Clause
+# %% [markdown]
+# Measuring runtimes for Scikit-learn models
+#
+# The runtime of machine learning models on specific datasets can be a deciding
+# factor on the choice of algorithms, especially for benchmarking and comparison
+# purposes. OpenML's scikit-learn extension provides runtime data from runs of
+# model fit and prediction on tasks or datasets, for both the CPU-clock as well
+# as the actual wallclock-time incurred. The objective of this example is to
+# illustrate how to retrieve such timing measures, and also offer some potential
+# means of usage and interpretation of the same.
+#
+# It should be noted that there are multiple levels at which parallelism can occur.
+#
+# * At the outermost level, OpenML tasks contain fixed data splits, on which the
+#   defined model/flow is executed. Thus, a model can be fit on each OpenML dataset fold
+#   in parallel using the `n_jobs` parameter to `run_model_on_task` or `run_flow_on_task`
+#   (illustrated under Case 2 & 3 below).
+#
+# * The model/flow specified can also include scikit-learn models that perform their own
+#   parallelization. For instance, by specifying `n_jobs` in a Random Forest model definition
+#   (covered under Case 2 below).
+#
+# * The sklearn model can further be an HPO estimator and contain it's own parallelization.
+#   If the base estimator used also supports `parallelization`, then there's at least a 2-level nested
+#   definition for parallelization possible (covered under Case 3 below).
+#
+# We shall cover these 5 representative scenarios for:
+#
+# * (Case 1) Retrieving runtimes for Random Forest training and prediction on each of the
+#   cross-validation folds
+#
+# * (Case 2) Testing the above setting in a parallel setup and monitor the difference using
+#   runtimes retrieved
+#
+# * (Case 3) Comparing RandomSearchCV and GridSearchCV on the above task based on runtimes
+#
+# * (Case 4) Running models that don't run in parallel or models which scikit-learn doesn't
+#   parallelize
+#
+# * (Case 5) Running models that do not release the Python Global Interpreter Lock (GIL)
 
 import openml
 import numpy as np
@@ -59,10 +51,10 @@
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 
 
-############################################################################
-# Preparing tasks and scikit-learn models
-# ***************************************
+# %% [markdown]
+# # Preparing tasks and scikit-learn models
 
+# %%
 task_id = 167119
 
 task = openml.tasks.get_task(task_id)
@@ -91,13 +83,13 @@ def print_compare_runtimes(measures):
             )
 
 
-############################################################################
-# Case 1: Running a Random Forest model on an OpenML task
-# *******************************************************
+# %% [markdown]
+# # Case 1: Running a Random Forest model on an OpenML task
 # We'll run a Random Forest model and obtain an OpenML run object. We can
 # see the evaluations recorded per fold for the dataset and the information
 # available for this run.
 
+# %%
 clf = RandomForestClassifier(n_estimators=10)
 
 run1 = openml.runs.run_model_on_task(
@@ -122,7 +114,7 @@ def print_compare_runtimes(measures):
         print(f"Repeat #{repeat}-Fold #{fold}: {val2:.4f}")
     print()
 
-################################################################################
+# %% [markdown]
 # The remaining entries recorded in `measures` are the runtime records
 # related as:
 #
@@ -138,13 +130,15 @@ def print_compare_runtimes(measures):
 # follows the same procedure but for time taken for the `.predict()` procedure.
 
 # Comparing the CPU and wall-clock training times of the Random Forest model
+
+# %%
 print_compare_runtimes(measures)
 
-######################################################################
-# Case 2: Running Scikit-learn model on an OpenML task in parallel
-# ****************************************************************
+# %% [markdown]
+# ## Case 2: Running Scikit-learn model on an OpenML task in parallel
 # Redefining the model to allow parallelism with `n_jobs=2` (2 cores)
 
+# %%
 clf = RandomForestClassifier(n_estimators=10, n_jobs=2)
 
 run2 = openml.runs.run_model_on_task(
@@ -154,9 +148,10 @@ def print_compare_runtimes(measures):
 # The wall-clock time recorded per fold should be lesser than Case 1 above
 print_compare_runtimes(measures)
 
-####################################################################################
+# %% [markdown]
 # Running a Random Forest model on an OpenML task in parallel (all cores available):
 
+# %%
 # Redefining the model to use all available cores with `n_jobs=-1`
 clf = RandomForestClassifier(n_estimators=10, n_jobs=-1)
 
@@ -164,24 +159,27 @@ def print_compare_runtimes(measures):
     model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False
 )
 measures = run3.fold_evaluations
+
+# %% [markdown]
 # The wall-clock time recorded per fold should be lesser than the case above,
 # if more than 2 CPU cores are available. The speed-up is more pronounced for
 # larger datasets.
 print_compare_runtimes(measures)
 
-####################################################################################
+# %% [markdown]
 # We can now observe that the ratio of CPU time to wallclock time is lower
 # than in case 1. This happens because joblib by default spawns subprocesses
 # for the workloads for which CPU time cannot be tracked. Therefore, interpreting
 # the reported CPU and wallclock time requires knowledge of the parallelization
 # applied at runtime.
 
-####################################################################################
+# %% [markdown]
 # Running the same task with a different parallel backend. Joblib provides multiple
 # backends: {`loky` (default), `multiprocessing`, `dask`, `threading`, `sequential`}.
 # The backend can be explicitly set using a joblib context manager. The behaviour of
 # the job distribution can change and therefore the scale of runtimes recorded too.
 
+# %%
 with parallel_backend(backend="multiprocessing", n_jobs=-1):
     run3_ = openml.runs.run_model_on_task(
         model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False
@@ -189,7 +187,7 @@ def print_compare_runtimes(measures):
 measures = run3_.fold_evaluations
 print_compare_runtimes(measures)
 
-####################################################################################
+# %% [markdown]
 # The CPU time interpretation becomes ambiguous when jobs are distributed over an
 # unknown number of cores or when subprocesses are spawned for which the CPU time
 # cannot be tracked, as in the examples above. It is impossible for OpenML-Python
@@ -198,9 +196,8 @@ def print_compare_runtimes(measures):
 # cases that can arise as demonstrated in the rest of the example. Therefore,
 # the final interpretation of the runtimes is left to the `user`.
 
-#####################################################################
-# Case 3: Running and benchmarking HPO algorithms with their runtimes
-# *******************************************************************
+# %% [markdown]
+# ## Case 3: Running and benchmarking HPO algorithms with their runtimes
 # We shall now optimize a similar RandomForest model for the same task using
 # scikit-learn's HPO support by using GridSearchCV to optimize our earlier
 # RandomForest model's hyperparameter `n_estimators`. Scikit-learn also provides a
@@ -208,9 +205,9 @@ def print_compare_runtimes(measures):
 # and evaluating the model on the best found parameter setting. This is
 # included in the `wall_clock_time_millis_training` measure recorded.
 
+# %%
 from sklearn.model_selection import GridSearchCV
 
-
 clf = RandomForestClassifier(n_estimators=10, n_jobs=2)
 
 # GridSearchCV model
@@ -228,7 +225,7 @@ def print_compare_runtimes(measures):
 measures = run4.fold_evaluations
 print_compare_runtimes(measures)
 
-##################################################################################
+# %% [markdown]
 # Like any optimisation problem, scikit-learn's HPO estimators also generate
 # a sequence of configurations which are evaluated, using which the best found
 # configuration is tracked throughout the trace.
@@ -241,17 +238,19 @@ def print_compare_runtimes(measures):
 # is for the entire `fit()` procedure of GridSearchCV thus subsuming the runtimes of
 # the 2-fold (inner) CV search performed.
 
+# %%
 # We earlier extracted the number of repeats and folds for this task:
 print(f"# repeats: {n_repeats}\n# folds: {n_folds}")
 
 # To extract the training runtime of the first repeat, first fold:
 print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0])
 
-##################################################################################
+# %% [markdown]
 # To extract the training runtime of the 1-st repeat, 4-th (outer) fold and also
 # to fetch the parameters and performance of the evaluations made during
 # the 1-st repeat, 4-th fold evaluation by the Grid Search model.
 
+# %%
 _repeat = 0
 _fold = 3
 print(
@@ -268,7 +267,7 @@ def print_compare_runtimes(measures):
         )
     )
 
-##################################################################################
+# %% [markdown]
 # Scikit-learn's HPO estimators also come with an argument `refit=True` as a default.
 # In our previous model definition it was set to True by default, which meant that the best
 # found hyperparameter configuration was used to refit or retrain the model without any inner
@@ -283,6 +282,8 @@ def print_compare_runtimes(measures):
 # This refit time can therefore be explicitly extracted in this manner:
 
 
+# %%
+
 def extract_refit_time(run, repeat, fold):
     refit_time = (
         run.fold_evaluations["wall_clock_time_millis"][repeat][fold]
@@ -300,12 +301,13 @@ def extract_refit_time(run, repeat, fold):
             )
         )
 
-############################################################################
+# %% [markdown]
 # Along with the GridSearchCV already used above, we demonstrate how such
 # optimisation traces can be retrieved by showing an application of these
 # traces - comparing the speed of finding the best configuration using
 # RandomizedSearchCV and GridSearchCV available with scikit-learn.
 
+# %%
 # RandomizedSearchCV model
 rs_pipe = RandomizedSearchCV(
     estimator=clf,
@@ -320,7 +322,7 @@ def extract_refit_time(run, repeat, fold):
     model=rs_pipe, task=task, upload_flow=False, avoid_duplicate_runs=False, n_jobs=2
 )
 
-################################################################################
+# %% [markdown]
 # Since for the call to ``openml.runs.run_model_on_task`` the parameter
 # ``n_jobs`` is set to its default ``None``, the evaluations across the OpenML folds
 # are not parallelized. Hence, the time recorded is agnostic to the ``n_jobs``
@@ -334,6 +336,7 @@ def extract_refit_time(run, repeat, fold):
 # the runtimes per fold can be cumulatively added to plot the trace against time.
 
 
+# %%
 def extract_trace_data(run, n_repeats, n_folds, n_iter, key=None):
     key = "wall_clock_time_millis_training" if key is None else key
     data = {"score": [], "runtime": []}
@@ -376,9 +379,8 @@ def get_incumbent_trace(trace):
 plt.legend()
 plt.show()
 
-################################################################################
-# Case 4: Running models that scikit-learn doesn't parallelize
-# *************************************************************
+# %% [markdown]
+# ## Case 4: Running models that scikit-learn doesn't parallelize
 # Both scikit-learn and OpenML depend on parallelism implemented through `joblib`.
 # However, there can be cases where either models cannot be parallelized or don't
 # depend on joblib for its parallelism. 2 such cases are illustrated below.
@@ -386,6 +388,7 @@ def get_incumbent_trace(trace):
 # Running a Decision Tree model that doesn't support parallelism implicitly, but
 # using OpenML to parallelize evaluations for the outer-cross validation folds.
 
+# %%
 dt = DecisionTreeClassifier()
 
 run6 = openml.runs.run_model_on_task(
@@ -394,11 +397,12 @@ def get_incumbent_trace(trace):
 measures = run6.fold_evaluations
 print_compare_runtimes(measures)
 
-################################################################################
+# %% [markdown]
 # Although the decision tree does not run in parallel, it can release the
 # `Python GIL <https://docs.python.org/dev/glossary.html#term-global-interpreter-lock>`_.
 # This can result in surprising runtime measures as demonstrated below:
 
+# %%
 with parallel_backend("threading", n_jobs=-1):
     run7 = openml.runs.run_model_on_task(
         model=dt, task=task, upload_flow=False, avoid_duplicate_runs=False
@@ -406,11 +410,12 @@ def get_incumbent_trace(trace):
 measures = run7.fold_evaluations
 print_compare_runtimes(measures)
 
-################################################################################
+# %% [markdown]
 # Running a Neural Network from scikit-learn that uses scikit-learn independent
-# parallelism using libraries such as `MKL, OpenBLAS or BLIS
-# <https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-and-scipy-routines-from-numerical-libraries>`_.
+# parallelism using libraries such as
+# [MKL, OpenBLAS or BLIS](https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-and-scipy-routines-from-numerical-libraries>).
 
+# %%
 mlp = MLPClassifier(max_iter=10)
 
 run8 = openml.runs.run_model_on_task(
@@ -419,15 +424,15 @@ def get_incumbent_trace(trace):
 measures = run8.fold_evaluations
 print_compare_runtimes(measures)
 
-################################################################################
-# Case 5: Running Scikit-learn models that don't release GIL
-# **********************************************************
-# Certain Scikit-learn models do not release the `Python GIL
-# <https://docs.python.org/dev/glossary.html#term-global-interpreter-lock>`_ and
+# %% [markdown]
+# ## Case 5: Running Scikit-learn models that don't release GIL
+# Certain Scikit-learn models do not release the
+# [Python GIL](https://docs.python.org/dev/glossary.html#term-global-interpreter-lock) and
 # are also not executed in parallel via a BLAS library. In such cases, the
 # CPU times and wallclock times are most likely trustworthy. Note however
 # that only very few models such as naive Bayes models are of this kind.
 
+# %%
 clf = GaussianNB()
 
 with parallel_backend("multiprocessing", n_jobs=-1):
@@ -437,9 +442,8 @@ def get_incumbent_trace(trace):
 measures = run9.fold_evaluations
 print_compare_runtimes(measures)
 
-################################################################################
-# Summmary
-# *********
+# %% [markdown]
+# ## Summmary
 # The scikit-learn extension for OpenML-Python records model runtimes for the
 # CPU-clock and the wall-clock times. The above examples illustrated how these
 # recorded runtimes can be extracted when using a scikit-learn model and under
@@ -484,3 +488,4 @@ def get_incumbent_trace(trace):
 #
 # Because of all the cases mentioned above it is crucial to understand which case is triggered
 # when reporting runtimes for scikit-learn models measured with OpenML-Python!
+# License: BSD 3-Clause
diff --git a/examples/30_extended/flow_id_tutorial.py b/examples/30_extended/flow_id_tutorial.py
index 137f8d14e..e813655fc 100644
--- a/examples/30_extended/flow_id_tutorial.py
+++ b/examples/30_extended/flow_id_tutorial.py
@@ -1,41 +1,36 @@
-"""
-==================
-Obtaining Flow IDs
-==================
+# %% [markdown]
+# # Obtaining Flow IDs
+# This tutorial discusses different ways to obtain the ID of a flow in order to perform further
+# analysis.
 
-This tutorial discusses different ways to obtain the ID of a flow in order to perform further
-analysis.
-"""
-
-####################################################################################################
-
-# License: BSD 3-Clause
 
+# %%
 import sklearn.tree
 
 import openml
 
 
-############################################################################
+# %% [markdown]
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
-openml.config.start_using_configuration_for_example()
 
+# %%
+openml.config.start_using_configuration_for_example()
+openml.config.server = "https://api.openml.org/api/v1/xml"
 
-############################################################################
+# %%
 # Defining a classifier
 clf = sklearn.tree.DecisionTreeClassifier()
 
-####################################################################################################
-# 1. Obtaining a flow given a classifier
-# ======================================
-#
+# %% [markdown]
+# ## 1. Obtaining a flow given a classifier
 
+# %%
 flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
 flow_id = flow.flow_id
 print(flow_id)
 
-####################################################################################################
+# %% [markdown]
 # This piece of code is rather involved. First, it retrieves a
 # :class:`~openml.extensions.Extension` which is registered and can handle the given model,
 # in our case it is :class:`openml.extensions.sklearn.SklearnExtension`. Second, the extension
@@ -46,38 +41,46 @@
 #
 # To simplify the usage we have created a helper function which automates all these steps:
 
+# %%
 flow_id = openml.flows.get_flow_id(model=clf)
 print(flow_id)
 
-####################################################################################################
-# 2. Obtaining a flow given its name
-# ==================================
-# The schema of a flow is given in XSD (`here
-# <https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd>`_).  # noqa E501
+# %% [markdown]
+# ## 2. Obtaining a flow given its name
+# The schema of a flow is given in XSD (
+# [here](https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd)).  # noqa E501
 # Only two fields are required, a unique name, and an external version. While it should be pretty
 # obvious why we need a name, the need for the additional external version information might not
 # be immediately clear. However, this information is very important as it allows to have multiple
 # flows with the same name for different versions of a software. This might be necessary if an
 # algorithm or implementation introduces, renames or drop hyperparameters over time.
 
+# %%
 print(flow.name, flow.external_version)
 
-####################################################################################################
+# %% [markdown]
 # The name and external version are automatically added to a flow when constructing it from a
 # model. We can then use them to retrieve the flow id as follows:
 
+# %%
 flow_id = openml.flows.flow_exists(name=flow.name, external_version=flow.external_version)
 print(flow_id)
 
-####################################################################################################
+# %% [markdown]
 # We can also retrieve all flows for a given name:
+
+# %%
 flow_ids = openml.flows.get_flow_id(name=flow.name)
 print(flow_ids)
 
-####################################################################################################
+# %% [markdown]
 # This also works with the actual model (generalizing the first part of this example):
+
+# %%
 flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
 print(flow_ids)
 
-# Deactivating test server
+# %%
+# Deactivating test configuration
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index afd398feb..2d1bcb864 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -1,29 +1,28 @@
-"""
-Flows and Runs
-==============
+# %% [markdown]
+# #Flows and Runs
+# This tutorial covers how to train/run a model and how to upload the results.
 
-How to train/run a model and how to upload the results.
-"""
-
-# License: BSD 3-Clause
-
-from sklearn import compose, ensemble, impute, neighbors, pipeline, preprocessing, tree
+# %%
+import openml
+from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree
 
 import openml
 
-############################################################################
+# %% [markdown]
 # We'll use the test server for the rest of this tutorial.
 #
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
 
-############################################################################
-# Train machine learning models
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# %% [markdown]
+# ## Train machine learning models
 #
 # Train a scikit-learn model on the data manually.
 
+# %%
 # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
 dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
@@ -32,11 +31,13 @@
 clf = neighbors.KNeighborsClassifier(n_neighbors=1)
 clf.fit(X, y)
 
-############################################################################
+# %% [markdown]
 # You can also ask for meta-data to automatically preprocess the data.
 #
 # * e.g. categorical features -> do feature encoding
-dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)
+
+# %%
+dataset = openml.datasets.get_dataset(17)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
@@ -47,11 +48,11 @@
 X = transformer.fit_transform(X)
 clf.fit(X, y)
 
-############################################################################
-# Runs: Easily explore models
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# %% [markdown]
+# ## Runs: Easily explore models
 # We can run (many) scikit-learn algorithms on (many) OpenML tasks.
 
+# %%
 # Get a task
 task = openml.tasks.get_task(403)
 
@@ -63,31 +64,34 @@
 
 print(run)
 
-############################################################################
+# %% [markdown]
 # Share the run on the OpenML server
 #
 # So far the run is only available locally. By calling the publish function,
 # the run is sent to the OpenML server:
 
+# %%
 myrun = run.publish()
 # For this tutorial, our configuration publishes to the test server
 # as to not pollute the main server.
 print(f"Uploaded to {myrun.openml_url}")
 
-############################################################################
+# %% [markdown]
 # We can now also inspect the flow object which was automatically created:
 
+# %%
 flow = openml.flows.get_flow(run.flow_id)
 print(flow)
 
-############################################################################
-# It also works with pipelines
-# ############################
+# %% [markdown]
+# ## It also works with pipelines
 #
 # When you need to handle 'dirty' data, build pipelines to model then automatically.
 # To demonstrate this using the dataset `credit-a <https://test.openml.org/d/16>`_ via
 # `task <https://test.openml.org/t/96>`_ as it contains both numerical and categorical
 # variables and missing values in both.
+
+# %%
 task = openml.tasks.get_task(96)
 
 # OpenML helper functions for sklearn can be plugged in directly for complicated pipelines
@@ -121,10 +125,12 @@
 print(f"Uploaded to {myrun.openml_url}")
 
 
+# %% [markdown]
 # The above pipeline works with the helper functions that internally deal with pandas DataFrame.
 # In the case, pandas is not available, or a NumPy based data processing is the requirement, the
 # above pipeline is presented below to work with NumPy.
 
+# %%
 # Extracting the indices of the categorical columns
 features = task.get_dataset().features
 categorical_feature_indices = []
@@ -164,14 +170,15 @@
 myrun = run.publish()
 print(f"Uploaded to {myrun.openml_url}")
 
-###############################################################################
-# Running flows on tasks offline for later upload
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# %% [markdown]
+# ## Running flows on tasks offline for later upload
 # For those scenarios where there is no access to internet, it is possible to run
 # a model on a task without uploading results or flows to the server immediately.
 
 # To perform the following line offline, it is required to have been called before
 # such that the task is cached on the local openml cache directory:
+
+# %%
 task = openml.tasks.get_task(96)
 
 # The following lines can then be executed offline:
@@ -192,9 +199,10 @@
 # Publishing the run will automatically upload the related flow if
 # it does not yet exist on the server.
 
-############################################################################
+# %% [markdown]
 # Alternatively, one can also directly run flows.
 
+# %%
 # Get a task
 task = openml.tasks.get_task(403)
 
@@ -208,9 +216,8 @@
 
 run = openml.runs.run_flow_on_task(flow, task)
 
-############################################################################
-# Challenge
-# ^^^^^^^^^
+# %% [markdown]
+# ## Challenge
 #
 # Try to build the best possible models on several OpenML tasks,
 # compare your results with the rest of the class and learn from
@@ -227,6 +234,7 @@
 # * Higgs (Physics): data_id:`23512 <https://www.openml.org/d/23512>`_,
 #   task_id:`52950 <https://www.openml.org/t/52950>`_, 100k instances, missing values.
 
+# %%
 # Easy benchmarking:
 for task_id in [115]:  # Add further tasks. Disclaimer: they might take some time
     task = openml.tasks.get_task(task_id)
@@ -238,5 +246,6 @@
     print(f"kNN on {data.name}: {myrun.openml_url}")
 
 
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index 491507d16..faced588b 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -1,24 +1,20 @@
-"""
-================================
-Plotting hyperparameter surfaces
-================================
-"""
-
-# License: BSD 3-Clause
-
-import numpy as np
+# %% [markdown]
+# # Plotting hyperparameter surfaces
 
+# %%
 import openml
+import numpy as np
 
-####################################################################################################
-# First step - obtaining the data
-# ===============================
+# %% [markdown]
+# # First step - obtaining the data
 # First, we need to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are
 # not part of this tutorial, this could for example be done via the website.
 #
 # For this we use the function ``list_evaluations_setup`` which can automatically join
 # evaluations conducted by the server with the hyperparameter settings extracted from the
 # uploaded runs (called *setup*).
+
+# %%
 df = openml.evaluations.list_evaluations_setups(
     function="predictive_accuracy",
     flows=[8353],
@@ -29,21 +25,25 @@
 )
 print(df.head(n=10))
 
-####################################################################################################
+# %% [markdown]
 # We can see all the hyperparameter names in the columns of the dataframe:
+
+# %%
 for name in df.columns:
     print(name)
 
-####################################################################################################
+# %% [markdown]
 # Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we
 # can nicely plot them.
+
+# %%
 hyperparameters = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
 df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log10)
 
-####################################################################################################
-# Option 1 - plotting via the pandas helper functions
-# ===================================================
-#
+# %% [markdown]
+# ## Option 1 - plotting via the pandas helper functions
+
+# %%
 df.plot.hexbin(
     x="sklearn.svm.classes.SVC(16)_C",
     y="sklearn.svm.classes.SVC(16)_gamma",
@@ -53,10 +53,10 @@
     title="SVM performance landscape",
 )
 
-####################################################################################################
-# Option 2 - plotting via matplotlib
-# ==================================
-#
+# %% [markdown]
+# ## Option 2 - plotting via matplotlib
+
+# %%
 import matplotlib.pyplot as plt
 
 fig, ax = plt.subplots()
@@ -79,3 +79,4 @@
     ylabel="gamma (log10)",
 )
 ax.set_title("SVM performance landscape")
+# License: BSD 3-Clause
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index 477e49fa6..55d25d291 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -1,32 +1,26 @@
-"""
-=========
-Run Setup
-=========
-
-By: Jan N. van Rijn
-
-One of the key features of the openml-python library is that is allows to
-reinstantiate flows with hyperparameter settings that were uploaded before.
-This tutorial uses the concept of setups. Although setups are not extensively
-described in the OpenML documentation (because most users will not directly
-use them), they form a important concept within OpenML distinguishing between
-hyperparameter configurations.
-A setup is the combination of a flow with all its hyperparameters set.
-
-A key requirement for reinstantiating a flow is to have the same scikit-learn
-version as the flow that was uploaded. However, this tutorial will upload the
-flow (that will later be reinstantiated) itself, so it can be ran with any
-scikit-learn version that is supported by this library. In this case, the
-requirement of the corresponding scikit-learn versions is automatically met.
-
-In this tutorial we will
-    1) Create a flow and use it to solve a task;
-    2) Download the flow, reinstantiate the model with same hyperparameters,
-       and solve the same task again;
-    3) We will verify that the obtained results are exactly the same.
-"""
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Run Setup
+# One of the key features of the openml-python library is that is allows to
+# reinstantiate flows with hyperparameter settings that were uploaded before.
+# This tutorial uses the concept of setups. Although setups are not extensively
+# described in the OpenML documentation (because most users will not directly
+# use them), they form a important concept within OpenML distinguishing between
+# hyperparameter configurations.
+# A setup is the combination of a flow with all its hyperparameters set.
+#
+# A key requirement for reinstantiating a flow is to have the same scikit-learn
+# version as the flow that was uploaded. However, this tutorial will upload the
+# flow (that will later be reinstantiated) itself, so it can be ran with any
+# scikit-learn version that is supported by this library. In this case, the
+# requirement of the corresponding scikit-learn versions is automatically met.
+#
+# In this tutorial we will
+#     1) Create a flow and use it to solve a task;
+#     2) Download the flow, reinstantiate the model with same hyperparameters,
+#        and solve the same task again;
+#     3) We will verify that the obtained results are exactly the same.
+
+# %%
 
 import numpy as np
 import openml
@@ -39,24 +33,28 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.decomposition import TruncatedSVD
 
-############################################################################
+# %% [markdown]
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
 
-###############################################################################
+# %% [markdown]
 # 1) Create a flow and use it to solve a task
-###############################################################################
 
-# first, let's download the task that we are interested in
-task = openml.tasks.get_task(6)
+# First, let's download the task that we are interested in
 
+# %%
+task = openml.tasks.get_task(6)
 
+# %% [markdown]
 # we will create a fairly complex model, with many preprocessing components and
 # many potential hyperparameters. Of course, the model can be as complex and as
 # easy as you want it to be
 
 
+# %%
 cat_imp = make_pipeline(
     OneHotEncoder(handle_unknown="ignore"),
     TruncatedSVD(),
@@ -70,10 +68,13 @@
     ]
 )
 
+# %% [markdown]
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for
 # the purpose of this tutorial we set them to some specific values that might
 # or might not be optimal
+
+# %%
 hyperparameters_original = {
     "estimator__criterion": "gini",
     "estimator__n_estimators": 50,
@@ -86,10 +87,10 @@
 run = openml.runs.run_model_on_task(model_original, task, avoid_duplicate_runs=False)
 run_original = run.publish()  # this implicitly uploads the flow
 
-###############################################################################
-# 2) Download the flow and solve the same task again.
-###############################################################################
+# %% [markdown]
+# ## 2) Download the flow and solve the same task again.
 
+# %%
 # obtain setup id (note that the setup id is assigned by the OpenML server -
 # therefore it was not yet available in our local copy of the run)
 run_downloaded = openml.runs.get_run(run_original.run_id)
@@ -103,13 +104,16 @@
 run_duplicate = openml.runs.run_model_on_task(model_duplicate, task, avoid_duplicate_runs=False)
 
 
-###############################################################################
-# 3) We will verify that the obtained results are exactly the same.
-###############################################################################
+# %% [markdown]
+# ## 3) We will verify that the obtained results are exactly the same.
 
+# %%
 # the run has stored all predictions in the field data content
 np.testing.assert_array_equal(run_original.data_content, run_duplicate.data_content)
 
-###############################################################################
 
+# %%
 openml.config.stop_using_configuration_for_example()
+
+# By: Jan N. van Rijn
+# License: BSD 3-Clause
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index c0874b944..416e543bb 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -1,50 +1,58 @@
-"""
-=================
-Benchmark studies
-=================
-How to list, download and upload benchmark studies.
-In contrast to `benchmark suites <https://docs.openml.org/benchmark/#benchmarking-suites>`_ which
-hold a list of tasks, studies hold a list of runs. As runs contain all information on flows and
-tasks, all required information about a study can be retrieved.
-"""
-############################################################################
-
-# License: BSD 3-Clause
-
+# %% [markdown]
+# # Benchmark studies
+# How to list, download and upload benchmark studies.
+# In contrast to
+# [benchmark suites](https://docs.openml.org/benchmark/#benchmarking-suites) which
+# hold a list of tasks, studies hold a list of runs. As runs contain all information on flows and
+# tasks, all required information about a study can be retrieved.
+
+# %%
 import uuid
 
 from sklearn.ensemble import RandomForestClassifier
 
 import openml
 
-############################################################################
-# Listing studies
-# ***************
 
-studies = openml.study.list_studies(status="all")
+# %% [markdown]
+# ##  Listing studies
+#
+# * Use the output_format parameter to select output type
+# * Default gives ``dict``, but we'll use ``dataframe`` to obtain an
+#   easier-to-work-with data structure
+
+# %%
+studies = openml.study.list_studies(output_format="dataframe", status="all")
 print(studies.head(n=10))
 
 
-############################################################################
-# Downloading studies
-# ===================
+# %% [markdown]
+# ## Downloading studies
 
-############################################################################
+# %% [markdown]
 # This is done based on the study ID.
+
+# %%
 study = openml.study.get_study(123)
 print(study)
 
-############################################################################
+# %% [markdown]
 # Studies also features a description:
+
+# %%
 print(study.description)
 
-############################################################################
+# %% [markdown]
 # Studies are a container for runs:
+
+# %%
 print(study.runs)
 
-############################################################################
+# %% [markdown]
 # And we can use the evaluation listing functionality to learn more about
 # the evaluations available for the conducted runs:
+
+# %%
 evaluations = openml.evaluations.list_evaluations(
     function="predictive_accuracy",
     study=study.study_id,
@@ -52,21 +60,23 @@
 )
 print(evaluations.head())
 
-############################################################################
+# %% [markdown]
 # We'll use the test server for the rest of this tutorial.
 #
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
 
-############################################################################
-# Uploading studies
-# =================
+# %% [markdown]
+# ## Uploading studies
 #
 # Creating a study is as simple as creating any kind of other OpenML entity.
 # In this examples we'll create a few runs for the OpenML-100 benchmark
 # suite which is available on the OpenML test server.
 
+# %%
 # Model to be used
 clf = RandomForestClassifier()
 
@@ -100,5 +110,6 @@
 print(new_study)
 
 
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
index 19f5cdc1a..a92c1cdb5 100644
--- a/examples/30_extended/suites_tutorial.py
+++ b/examples/30_extended/suites_tutorial.py
@@ -1,69 +1,79 @@
-"""
-================
-Benchmark suites
-================
-
-How to list, download and upload benchmark suites.
-
-If you want to learn more about benchmark suites, check out our
-brief introductory tutorial :ref:`sphx_glr_examples_20_basic_simple_suites_tutorial.py` or the
-`OpenML benchmark docs <https://docs.openml.org/benchmark/#benchmarking-suites>`_.
-"""
-############################################################################
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Benchmark suites
+#
+# How to list, download and upload benchmark suites.
+#
+# If you want to learn more about benchmark suites, check out our
+# brief introductory tutorial ["Simple suites tutorial"](../20_basic/simple_suites_tutorial) or the
+# [OpenML benchmark docs](https://docs.openml.org/benchmark/#benchmarking-suites).
 
+# %%
 import uuid
 
 import numpy as np
 
 import openml
 
-############################################################################
-# Listing suites
-# **************
 
-suites = openml.study.list_suites(status="all")
+# %% [markdown]
+# ## Listing suites
+#
+# * Use the output_format parameter to select output type
+# * Default gives ``dict``, but we'll use ``dataframe`` to obtain an
+#   easier-to-work-with data structure
+
+# %%
+suites = openml.study.list_suites(output_format="dataframe", status="all")
 print(suites.head(n=10))
 
-############################################################################
-# Downloading suites
-# ==================
+# %% [markdown]
+# ## Downloading suites
 
-############################################################################
+# %% [markdown]
 # This is done based on the dataset ID.
-# https://www.openml.org/api/v1/study/99
-suite = openml.study.get_suite("OpenML-CC18")
+
+# %%
+suite = openml.study.get_suite(99)
 print(suite)
 
-############################################################################
+# %% [markdown]
 # Suites also feature a description:
+
+# %%
 print(suite.description)
 
-############################################################################
+# %% [markdown]
 # Suites are a container for tasks:
+
+# %%
 print(suite.tasks)
 
-############################################################################
+# %% [markdown]
 # And we can use the task listing functionality to learn more about them:
-tasks = openml.tasks.list_tasks()
 
-# Using ``@`` in `pd.DataFrame.query <
-# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html>`_
+# %%
+tasks = openml.tasks.list_tasks(output_format="dataframe")
+
+# %% [markdown]
+# Using ``@`` in
+# [pd.DataFrame.query](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html)
 # accesses variables outside of the current dataframe.
+
+# %%
 tasks = tasks.query("tid in @suite.tasks")
 print(tasks.describe().transpose())
 
-############################################################################
+# %% [markdown]
 # We'll use the test server for the rest of this tutorial.
 #
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+
+# %%
 openml.config.start_using_configuration_for_example()
 
-############################################################################
-# Uploading suites
-# ================
+# %% [markdown]
+# ## Uploading suites
 #
 # Uploading suites is as simple as uploading any kind of other OpenML
 # entity - the only reason why we need so much code in this example is
@@ -71,7 +81,9 @@
 
 # We'll take a random subset of at least ten tasks of all available tasks on
 # the test server:
-all_tasks = list(openml.tasks.list_tasks()["tid"])
+
+# %%
+all_tasks = list(openml.tasks.list_tasks(output_format="dataframe")["tid"])
 task_ids_for_suite = sorted(np.random.choice(all_tasks, replace=False, size=20))
 
 # The study needs a machine-readable and unique alias. To obtain this,
@@ -88,6 +100,6 @@
 new_suite.publish()
 print(new_suite)
 
-
-############################################################################
+# %%
 openml.config.stop_using_configuration_for_example()
+# License: BSD 3-Clause
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index dda40de50..8b35633a2 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -1,47 +1,49 @@
-"""
-Tasks: retrieving splits
-========================
-
-Tasks define a target and a train/test split. Normally, they are the input to the function
-``openml.runs.run_model_on_task`` which automatically runs the model on all splits of the task.
-However, sometimes it is necessary to manually split a dataset to perform experiments outside of
-the functions provided by OpenML. One such example is in the benchmark library
-`HPOBench <https://github.com/automl/HPOBench>`_ which extensively uses data from OpenML,
-but not OpenML's functionality to conduct runs.
-"""
+# %% [markdown]
+# # Tasks: retrieving splits
+
+# Tasks define a target and a train/test split. Normally, they are the input to the function
+# ``openml.runs.run_model_on_task`` which automatically runs the model on all splits of the task.
+# However, sometimes it is necessary to manually split a dataset to perform experiments outside of
+# the functions provided by OpenML. One such example is in the benchmark library
+# [HPOBench](https://github.com/automl/HPOBench) which extensively uses data from OpenML,
+# but not OpenML's functionality to conduct runs.
 
-# License: BSD 3-Clause
 
+# %%
 import openml
 
-####################################################################################################
+# %% [markdown]
 # For this tutorial we will use the famous King+Rook versus King+Pawn on A7 dataset, which has
-# the dataset ID 3 (`dataset on OpenML <https://www.openml.org/d/3>`_), and for which there exist
+# the dataset ID 3 ([dataset on OpenML](https://www.openml.org/d/3)), and for which there exist
 # tasks with all important estimation procedures. It is small enough (less than 5000 samples) to
 # efficiently use it in an example.
 #
-# We will first start with (`task 233 <https://www.openml.org/t/233>`_), which is a task with a
+# We will first start with ([task 233](https://www.openml.org/t/233)), which is a task with a
 # holdout estimation procedure.
+
+# %%
 task_id = 233
 task = openml.tasks.get_task(task_id)
 
-####################################################################################################
+# %% [markdown]
 # Now that we have a task object we can obtain the number of repetitions, folds and samples as
 # defined by the task:
 
+# %%
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 
-####################################################################################################
+# %% [markdown]
 # * ``n_repeats``: Number of times the model quality estimation is performed
 # * ``n_folds``: Number of folds per repeat
 # * ``n_samples``: How many data points to use. This is only relevant for learning curve tasks
 #
 # A list of all available estimation procedures is available
-# `here <https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure>`_.
+# [here](https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure).
 #
 # Task ``233`` is a simple task using the holdout estimation procedure and therefore has only a
 # single repeat, a single fold and a single sample size:
 
+# %%
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
         task_id,
@@ -51,11 +53,12 @@
     )
 )
 
-####################################################################################################
+# %% [markdown]
 # We can now retrieve the train/test split for this combination of repeats, folds and number of
 # samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
 # sizes, but we can neglect this here as there is only a single repetition.
 
+# %%
 train_indices, test_indices = task.get_train_test_split_indices(
     repeat=0,
     fold=0,
@@ -65,10 +68,11 @@
 print(train_indices.shape, train_indices.dtype)
 print(test_indices.shape, test_indices.dtype)
 
-####################################################################################################
+# %% [markdown]
 # And then split the data based on this:
 
-X, y = task.get_X_and_y()
+# %%
+X, y = task.get_X_and_y(dataset_format="dataframe")
 X_train = X.iloc[train_indices]
 y_train = y.iloc[train_indices]
 X_test = X.iloc[test_indices]
@@ -83,9 +87,10 @@
     )
 )
 
-####################################################################################################
+# %% [markdown]
 # Obviously, we can also retrieve cross-validation versions of the dataset used in task ``233``:
 
+# %%
 task_id = 3
 task = openml.tasks.get_task(task_id)
 X, y = task.get_X_and_y()
@@ -99,8 +104,10 @@
     )
 )
 
-####################################################################################################
+# %% [markdown]
 # And then perform the aforementioned iteration over all splits:
+
+# %%
 for repeat_idx in range(n_repeats):
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
@@ -127,9 +134,10 @@
                 )
             )
 
-####################################################################################################
+# %% [markdown]
 # And also versions with multiple repeats:
 
+# %%
 task_id = 1767
 task = openml.tasks.get_task(task_id)
 X, y = task.get_X_and_y()
@@ -143,8 +151,10 @@
     )
 )
 
-####################################################################################################
+# %% [markdown]
 # And then again perform the aforementioned iteration over all splits:
+
+# %%
 for repeat_idx in range(n_repeats):
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
@@ -171,9 +181,10 @@
                 )
             )
 
-####################################################################################################
+# %% [markdown]
 # And finally a task based on learning curves:
 
+# %%
 task_id = 1702
 task = openml.tasks.get_task(task_id)
 X, y = task.get_X_and_y()
@@ -187,8 +198,10 @@
     )
 )
 
-####################################################################################################
+# %% [markdown]
 # And then again perform the aforementioned iteration over all splits:
+
+# %%
 for repeat_idx in range(n_repeats):
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
@@ -214,3 +227,4 @@
                     y_test.shape,
                 )
             )
+# License: BSD 3-Clause
diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py
index 63821c7a2..54a373fca 100644
--- a/examples/30_extended/tasks_tutorial.py
+++ b/examples/30_extended/tasks_tutorial.py
@@ -1,16 +1,12 @@
-"""
-Tasks
-=====
-
-A tutorial on how to list and download tasks.
-"""
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Tasks
+# A tutorial on how to list and download tasks.
 
+# %%
 import openml
 from openml.tasks import TaskType
 
-############################################################################
+# %% [markdown]
 #
 # Tasks are identified by IDs and can be accessed in two different ways:
 #
@@ -24,67 +20,75 @@
 #    metric, the splits and an iterator which can be used to access the
 #    splits in a useful manner.
 
-############################################################################
-# Listing tasks
-# ^^^^^^^^^^^^^
+# %% [markdown]
+# ## Listing tasks
 #
 # We will start by simply listing only *supervised classification* tasks.
-# **openml.tasks.list_tasks()** getting a
-# `pandas dataframe <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html>`_
-# to have good visualization capabilities and easier access:
+#
+# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, but we
+# request a
+# [pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)
+# instead to have better visualization capabilities and easier access:
 
-tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
+# %%
+tasks = openml.tasks.list_tasks(
+    task_type=TaskType.SUPERVISED_CLASSIFICATION, output_format="dataframe"
+)
 print(tasks.columns)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
-############################################################################
+# %% [markdown]
 # We can filter the list of tasks to only contain datasets with more than
 # 500 samples, but less than 1000 samples:
 
+# %%
 filtered_tasks = tasks.query("NumberOfInstances > 500 and NumberOfInstances < 1000")
 print(list(filtered_tasks.index))
 
-############################################################################
 
+# %%
 # Number of tasks
 print(len(filtered_tasks))
 
-############################################################################
+# %% [markdown]
 # Then, we can further restrict the tasks to all have the same resampling strategy:
 
+# %%
 filtered_tasks = filtered_tasks.query('estimation_procedure == "10-fold Crossvalidation"')
 print(list(filtered_tasks.index))
 
-############################################################################
-
+# %%
 # Number of tasks
 print(len(filtered_tasks))
 
-############################################################################
+# %% [markdown]
 # Resampling strategies can be found on the
-# `OpenML Website <https://www.openml.org/search?type=measure&q=estimation%20procedure>`_.
+# [OpenML Website](https://www.openml.org/search?type=measure&q=estimation%20procedure).
 #
 # Similar to listing tasks by task type, we can list tasks by tags:
 
-tasks = openml.tasks.list_tasks(tag="OpenML100")
+# %%
+tasks = openml.tasks.list_tasks(tag="OpenML100", output_format="dataframe")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
-############################################################################
+# %% [markdown]
 # Furthermore, we can list tasks based on the dataset id:
 
-tasks = openml.tasks.list_tasks(data_id=1471)
+# %%
+tasks = openml.tasks.list_tasks(data_id=1471, output_format="dataframe")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
-############################################################################
+# %% [markdown]
 # In addition, a size limit and an offset can be applied both separately and simultaneously:
 
-tasks = openml.tasks.list_tasks(size=10, offset=50)
+# %%
+tasks = openml.tasks.list_tasks(size=10, offset=50, output_format="dataframe")
 print(tasks)
 
-############################################################################
+# %% [markdown]
 #
 # **OpenML 100**
 # is a curated list of 100 tasks to start using OpenML. They are all
@@ -92,48 +96,46 @@
 # instances per task. To make things easier, the tasks do not contain highly
 # unbalanced data and sparse data. However, the tasks include missing values and
 # categorical features. You can find out more about the *OpenML 100* on
-# `the OpenML benchmarking page <https://docs.openml.org/benchmark/>`_.
+# [the OpenML benchmarking page](https://docs.openml.org/benchmark/).
 #
 # Finally, it is also possible to list all tasks on OpenML with:
 
-############################################################################
-tasks = openml.tasks.list_tasks()
+# %%
+tasks = openml.tasks.list_tasks(output_format="dataframe")
 print(len(tasks))
 
-############################################################################
-# Exercise
-# ########
+# %% [markdown]
+# ## Exercise
 #
 # Search for the tasks on the 'eeg-eye-state' dataset.
 
+# %%
 tasks.query('name=="eeg-eye-state"')
 
-############################################################################
-# Downloading tasks
-# ^^^^^^^^^^^^^^^^^
+# %% [markdown]
+# ## Downloading tasks
 #
 # We provide two functions to download tasks, one which downloads only a
 # single task by its ID, and one which takes a list of IDs and downloads
 # all of these tasks:
 
+# %%
 task_id = 31
 task = openml.tasks.get_task(task_id)
 
-############################################################################
+# %%
 # Properties of the task are stored as member variables:
-
 print(task)
 
-############################################################################
+# %%
 # And:
 
 ids = [2, 1891, 31, 9983]
 tasks = openml.tasks.get_tasks(ids)
 print(tasks[0])
 
-############################################################################
-# Creating tasks
-# ^^^^^^^^^^^^^^
+# %% [markdown]
+# ## Creating tasks
 #
 # You can also create new tasks. Take the following into account:
 #
@@ -159,16 +161,16 @@
 # necessary (e.g. when other measure make no sense), since it will create a new task, which
 # scatters results across tasks.
 
-############################################################################
+# %% [markdown]
 # We'll use the test server for the rest of this tutorial.
 #
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
+# %%
 openml.config.start_using_configuration_for_example()
 
-############################################################################
-# Example
-# #######
+# %% [markdown]
+# ## Example
 #
 # Let's create a classification task on a dataset. In this example we will do this on the
 # Iris dataset (ID=128 (on test server)). We'll use 10-fold cross-validation (ID=1),
@@ -177,7 +179,7 @@
 # If such a task doesn't exist, a task will be created and the corresponding task_id
 # will be returned.
 
-
+# %%
 try:
     my_task = openml.tasks.create_task(
         task_type=TaskType.SUPERVISED_CLASSIFICATION,
@@ -200,12 +202,14 @@
         task_id = tasks.loc[:, "tid"].values[0]
         print("Task already exists. Task ID is", task_id)
 
+# %%
 # reverting to prod server
 openml.config.stop_using_configuration_for_example()
 
 
-############################################################################
-# * `Complete list of task types <https://www.openml.org/search?type=task_type>`_.
-# * `Complete list of model estimation procedures <https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure>`_.
-# * `Complete list of evaluation measures <https://www.openml.org/search?q=measure_type%3Aevaluation_measure&type=measure>`_.
+# %% [markdown]
+# * [Complete list of task types](https://www.openml.org/search?type=task_type).
+# * [Complete list of model estimation procedures](https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure).
+# * [Complete list of evaluation measures](https://www.openml.org/search?q=measure_type%3Aevaluation_measure&type=measure).
 #
+# License: BSD 3-Clause
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index 28015557b..8b1ac02f9 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -1,28 +1,27 @@
-"""
-Feurer et al. (2015)
-====================
+# %% [markdown]
+# # Feurer et al. (2015)
 
-A tutorial on how to get the datasets used in the paper introducing *Auto-sklearn* by Feurer et al..
-
-Auto-sklearn website: https://automl.github.io/auto-sklearn/
-
-Publication
-~~~~~~~~~~~
+# A tutorial on how to get the datasets used in the paper introducing *Auto-sklearn* by Feurer et al..
+#
+# Auto-sklearn website: https://automl.github.io/auto-sklearn/
+#
+# ## Publication
+#
+# | Efficient and Robust Automated Machine Learning
+# | Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter
+# | In *Advances in Neural Information Processing Systems 28*, 2015
+# | Available at https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
 
-| Efficient and Robust Automated Machine Learning
-| Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter
-| In *Advances in Neural Information Processing Systems 28*, 2015
-| Available at https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
-"""
-
-# License: BSD 3-Clause
+# %%
+import pandas as pd
 
 import openml
 
-####################################################################################################
+# %% [markdown]
 # List of dataset IDs given in the supplementary material of Feurer et al.:
 # https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning-supplemental.zip
-# fmt: off
+
+# %%
 dataset_ids = [
     3, 6, 12, 14, 16, 18, 21, 22, 23, 24, 26, 28, 30, 31, 32, 36, 38, 44, 46,
     57, 60, 179, 180, 181, 182, 184, 185, 273, 293, 300, 351, 354, 357, 389,
@@ -35,9 +34,8 @@
     1056, 1067, 1068, 1069, 1111, 1112, 1114, 1116, 1119, 1120, 1128, 1130,
     1134, 1138, 1139, 1142, 1146, 1161, 1166,
 ]
-# fmt: on
 
-####################################################################################################
+# %% [markdown]
 # The dataset IDs could be used directly to load the dataset and split the data into a training set
 # and a test set. However, to be reproducible, we will first obtain the respective tasks from
 # OpenML, which define both the target feature and the train/test split.
@@ -50,11 +48,13 @@
 #    Please check the `OpenML documentation of tasks <https://docs.openml.org/concepts/tasks/>`_ if you
 #    want to learn more about them.
 
-####################################################################################################
+# %% [markdown]
 # This lists both active and inactive tasks (because of ``status='all'``). Unfortunately,
 # this is necessary as some of the datasets contain issues found after the publication and became
 # deactivated, which also deactivated the tasks on them. More information on active or inactive
-# datasets can be found in the `online docs <https://docs.openml.org/concepts/data/#dataset-status>`_.
+# datasets can be found in the [online docs](https://docs.openml.org/#dataset-status).
+
+# %%
 tasks = openml.tasks.list_tasks(
     task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION,
     status="all",
@@ -88,3 +88,5 @@
 
 # These are the tasks to work with:
 print(task_ids)
+
+# License: BSD 3-Clause
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index d9fdc78a7..1a873a01c 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -1,26 +1,22 @@
-"""
-Strang et al. (2018)
-====================
-
-A tutorial on how to reproduce the analysis conducted for *Don't Rule Out Simple Models
-Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML*.
-
-Publication
-~~~~~~~~~~~
-
-| Don't Rule Out Simple Models Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML
-| Benjamin Strang, Peter van der Putten, Jan N. van Rijn and Frank Hutter
-| In *Advances in Intelligent Data Analysis XVII 17th International Symposium*, 2018
-| Available at https://link.springer.com/chapter/10.1007%2F978-3-030-01768-2_25
-"""
-
-# License: BSD 3-Clause
+# %% [markdown]
+# # Strang et al. (2018)
+#
+# A tutorial on how to reproduce the analysis conducted for *Don't Rule Out Simple Models
+# Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML*.
+#
+# ## Publication
+#
+# | Don't Rule Out Simple Models Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML
+# | Benjamin Strang, Peter van der Putten, Jan N. van Rijn and Frank Hutter
+# | In *Advances in Intelligent Data Analysis XVII 17th International Symposium*, 2018
+# | Available at https://link.springer.com/chapter/10.1007%2F978-3-030-01768-2_25
 
+# %%
 import matplotlib.pyplot as plt
 
 import openml
 
-##############################################################################
+# %% [markdown]
 # A basic step for each data-mining or machine learning task is to determine
 # which model to choose based on the problem and the data at hand. In this
 # work we investigate when non-linear classifiers outperform linear
@@ -35,6 +31,7 @@
 # more effort to distinguish the same flow with different hyperparameter
 # values.
 
+# %%
 study_id = 123
 # for comparing svms: flow_ids = [7754, 7756]
 # for comparing nns: flow_ids = [7722, 7729]
@@ -69,10 +66,10 @@
 # adds column that indicates the difference between the two classifiers
 evaluations["diff"] = evaluations[flow_ids[0]] - evaluations[flow_ids[1]]
 
-
-##############################################################################
+# %% [markdown]
 # makes the s-plot
 
+# %%
 fig_splot, ax_splot = plt.subplots()
 ax_splot.plot(range(len(evaluations)), sorted(evaluations["diff"]))
 ax_splot.set_title(classifier_family)
@@ -82,11 +79,12 @@
 plt.show()
 
 
-##############################################################################
+# %% [markdown]
 # adds column that indicates the difference between the two classifiers,
 # needed for the scatter plot
 
 
+# %%
 def determine_class(val_lin, val_nonlin):
     if val_lin < val_nonlin:
         return class_values[0]
@@ -112,10 +110,11 @@ def determine_class(val_lin, val_nonlin):
 ax_scatter.set_yscale("log")
 plt.show()
 
-##############################################################################
+# %% [markdown]
 # makes a scatter plot where each data point represents the performance of the
 # two algorithms on various axis (not in the paper)
 
+# %%
 fig_diagplot, ax_diagplot = plt.subplots()
 ax_diagplot.grid(linestyle="--")
 ax_diagplot.plot([0, 1], ls="-", color="black")
@@ -125,3 +124,4 @@ def determine_class(val_lin, val_nonlin):
 ax_diagplot.set_xlabel(measure)
 ax_diagplot.set_ylabel(measure)
 plt.show()
+# License: BSD 3-Clause
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index 751f53470..315c27dc3 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -1,38 +1,18 @@
-"""
-This example is deprecated! You will need to manually remove adapt this code to make it run.
-We deprecated this example in our CI as it requires fanova as a dependency. However, fanova is not supported in all Python versions used in our CI/CD.
-
-van Rijn and Hutter (2018)
-==========================
-
-A tutorial on how to reproduce the paper *Hyperparameter Importance Across Datasets*.
-
-Example Deprecation Warning!
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This example is not supported anymore by the OpenML-Python developers. The example is kept for reference purposes but not tested anymore.
-
-Publication
-~~~~~~~~~~~
-
-| Hyperparameter importance across datasets
-| Jan N. van Rijn and Frank Hutter
-| In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018
-| Available at https://dl.acm.org/doi/10.1145/3219819.3220058
-
-Requirements
-~~~~~~~~~~~~
-
-This is a Unix-only tutorial, as the requirements can not be satisfied on a Windows machine (Untested on other
-systems).
-
-The following Python packages are required:
-
-pip install openml[examples,docs] fanova ConfigSpace<1.0
-"""
+# %% [markdown]
+# # van Rijn and Hutter (2018)
+#
+# A tutorial on how to reproduce the paper *Hyperparameter Importance Across Datasets*.
+#
+# This is a Unix-only tutorial, as the requirements can not be satisfied on a Windows machine (Untested on other
+# systems).
+#
+# ## Publication
+#
+# | Hyperparameter importance across datasets
+# | Jan N. van Rijn and Frank Hutter
+# | In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018
+# | Available at https://dl.acm.org/doi/10.1145/3219819.3220058
 
-# License: BSD 3-Clause
-run_code = False
 import sys
 # DEPRECATED EXAMPLE -- Avoid running this code in our CI/CD pipeline
 print("This example is deprecated, remove this code to use it manually.")
@@ -107,22 +87,64 @@
         size=limit_per_task,
     )
 
-    performance_column = "value"
-    # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance
-    # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine
-    # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format
-    # Note that the ``json.loads(...)`` requires the content to be in JSON format, which is only the case for
-    # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the
-    # setups that belong to the flows embedded in this example though.
-    try:
-        setups_evals = pd.DataFrame(
-            [
-                dict(
-                    **{name: json.loads(value) for name, value in setup["parameters"].items()},
-                    **{performance_column: setup[performance_column]},
-                )
-                for _, setup in evals.iterrows()
-            ]
+# %% [markdown]
+# With the advent of automated machine learning, automated hyperparameter
+# optimization methods are by now routinely used in data mining. However, this
+# progress is not yet matched by equal progress on automatic analyses that
+# yield information beyond performance-optimizing hyperparameter settings.
+# In this example, we aim to answer the following two questions: Given an
+# algorithm, what are generally its most important hyperparameters?
+#
+# This work is carried out on the OpenML-100 benchmark suite, which can be
+# obtained by ``openml.study.get_suite('OpenML100')``. In this example, we
+# conduct the experiment on the Support Vector Machine (``flow_id=7707``)
+# with specific kernel (we will perform a post-process filter operation for
+# this). We should set some other experimental parameters (number of results
+# per task, evaluation measure and the number of trees of the internal
+# functional Anova) before the fun can begin.
+#
+# Note that we simplify the example in several ways:
+#
+# 1) We only consider numerical hyperparameters
+# 2) We consider all hyperparameters that are numerical (in reality, some
+#    hyperparameters might be inactive (e.g., ``degree``) or irrelevant
+#    (e.g., ``random_state``)
+# 3) We assume all hyperparameters to be on uniform scale
+#
+# Any difference in conclusion between the actual paper and the presented
+# results is most likely due to one of these simplifications. For example,
+# the hyperparameter C looks rather insignificant, whereas it is quite
+# important when it is put on a log-scale. All these simplifications can be
+# addressed by defining a ConfigSpace. For a more elaborated example that uses
+# this, please see:
+# https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401
+
+# %%
+    suite = openml.study.get_suite("OpenML100")
+    flow_id = 7707
+    parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"}
+    evaluation_measure = "predictive_accuracy"
+    limit_per_task = 500
+    limit_nr_tasks = 15
+    n_trees = 16
+
+    fanova_results = []
+    # we will obtain all results from OpenML per task. Practice has shown that this places the bottleneck on the
+    # communication with OpenML, and for iterated experimenting it is better to cache the results in a local file.
+    for idx, task_id in enumerate(suite.tasks):
+        if limit_nr_tasks is not None and idx >= limit_nr_tasks:
+            continue
+        print(
+            "Starting with task %d (%d/%d)"
+            % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks)
+        )
+        # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
+        evals = openml.evaluations.list_evaluations_setups(
+            evaluation_measure,
+            flows=[flow_id],
+            tasks=[task_id],
+            size=limit_per_task,
+            output_format="dataframe",
         )
     except json.decoder.JSONDecodeError as e:
         print("Task %d error: %s" % (task_id, e))
@@ -174,11 +196,13 @@
     # transform ``fanova_results`` from a list of dicts into a DataFrame
     fanova_results = pd.DataFrame(fanova_results)
 
-    ##############################################################################
-    # make the boxplot of the variance contribution. Obviously, we can also use
-    # this data to make the Nemenyi plot, but this relies on the rather complex
-    # ``Orange`` dependency (``pip install Orange3``). For the complete example,
-    # the reader is referred to the more elaborate script (referred to earlier)
+# %% [markdown]
+# make the boxplot of the variance contribution. Obviously, we can also use
+# this data to make the Nemenyi plot, but this relies on the rather complex
+# ``Orange`` dependency (``pip install Orange3``). For the complete example,
+# the reader is referred to the more elaborate script (referred to earlier)
+
+    # %%
     fig, ax = plt.subplots()
     sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax)
     ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
@@ -186,3 +210,4 @@
     ax.set_xlabel(None)
     plt.tight_layout()
     plt.show()
+    # License: BSD 3-Clause
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 91768e010..feb107cba 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -1,32 +1,28 @@
-"""
-Perrone et al. (2018)
-=====================
-
-A tutorial on how to build a surrogate model based on OpenML data as done for *Scalable
-Hyperparameter Transfer Learning* by Perrone et al..
-
-Publication
-~~~~~~~~~~~
-
-| Scalable Hyperparameter Transfer Learning
-| Valerio Perrone and Rodolphe Jenatton and Matthias Seeger and Cedric Archambeau
-| In *Advances in Neural Information Processing Systems 31*, 2018
-| Available at https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf
-
-This example demonstrates how OpenML runs can be used to construct a surrogate model.
-
-In the following section, we shall do the following:
-
-* Retrieve tasks and flows as used in the experiments by Perrone et al. (2018).
-* Build a tabular data by fetching the evaluations uploaded to OpenML.
-* Impute missing values and handle categorical data before building a Random Forest model that
-  maps hyperparameter values to the area under curve score.
-"""
-
-############################################################################
+# %% [markdown]
+# # Perrone et al. (2018)
+#
+# A tutorial on how to build a surrogate model based on OpenML data as done for *Scalable
+# Hyperparameter Transfer Learning* by Perrone et al..
+#
+# ## Publication
+#
+# | Scalable Hyperparameter Transfer Learning
+# | Valerio Perrone and Rodolphe Jenatton and Matthias Seeger and Cedric Archambeau
+# | In *Advances in Neural Information Processing Systems 31*, 2018
+# | Available at https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf
+#
+# This example demonstrates how OpenML runs can be used to construct a surrogate model.
+#
+# In the following section, we shall do the following:
+#
+# * Retrieve tasks and flows as used in the experiments by Perrone et al. (2018).
+# * Build a tabular data by fetching the evaluations uploaded to OpenML.
+# * Impute missing values and handle categorical data before building a Random Forest model that
+#   maps hyperparameter values to the area under curve score.
 
-# License: BSD 3-Clause
 
+# %%
+import openml
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
@@ -40,11 +36,13 @@
 import openml
 
 flow_type = "svm"  # this example will use the smaller svm flow evaluations
-############################################################################
+
+# %% [markdown]
 # The subsequent functions are defined to fetch tasks, flows, evaluations and preprocess them into
 # a tabular format that can be used to build models.
 
 
+# %%
 def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_curve"):
     """
     Fetch a list of evaluations based on the flows and tasks used in the experiments.
@@ -154,25 +152,26 @@ def list_categorical_attributes(flow_type="svm"):
     return ["booster"]
 
 
-#############################################################################
+# %% [markdown]
 # Fetching the data from OpenML
 # *****************************
 # Now, we read all the tasks and evaluations for them and collate into a table.
 # Here, we are reading all the tasks and evaluations for the SVM flow and
 # pre-processing all retrieved evaluations.
 
+# %%
 eval_df, task_ids, flow_id = fetch_evaluations(run_full=False, flow_type=flow_type)
 X, y = create_table_from_evaluations(eval_df, flow_type=flow_type)
 print(X.head())
 print("Y : ", y[:5])
 
-#############################################################################
-# Creating pre-processing and modelling pipelines
-# ***********************************************
+# %% [markdown]
+# ## Creating pre-processing and modelling pipelines
 # The two primary tasks are to impute the missing values, that is, account for the hyperparameters
 # that are not available with the runs from OpenML. And secondly, to handle categorical variables
 # using One-hot encoding prior to modelling.
 
+# %%
 # Separating data into categorical and non-categorical (numeric for this example) columns
 cat_cols = list_categorical_attributes(flow_type=flow_type)
 num_cols = list(set(X.columns) - set(cat_cols))
@@ -201,13 +200,13 @@ def list_categorical_attributes(flow_type="svm"):
 model = Pipeline(steps=[("preprocess", ct), ("surrogate", clf)])
 
 
-#############################################################################
-# Building a surrogate model on a task's evaluation
-# *************************************************
+# %% [markdown]
+# ## Building a surrogate model on a task's evaluation
 # The same set of functions can be used for a single task to retrieve a singular table which can
 # be used for the surrogate model construction. We shall use the SVM flow here to keep execution
 # time simple and quick.
 
+# %%
 # Selecting a task for the surrogate
 task_id = task_ids[-1]
 print("Task ID : ", task_id)
@@ -218,10 +217,8 @@ def list_categorical_attributes(flow_type="svm"):
 
 print(f"Training RMSE : {mean_squared_error(y, y_pred):.5}")
 
-
-#############################################################################
-# Evaluating the surrogate model
-# ******************************
+# %% [markdown]
+# ## Evaluating the surrogate model
 # The surrogate model built from a task's evaluations fetched from OpenML will be put into
 # trivial action here, where we shall randomly sample configurations and observe the trajectory
 # of the area under curve (auc) we can obtain from the surrogate we've built.
@@ -229,6 +226,7 @@ def list_categorical_attributes(flow_type="svm"):
 # NOTE: This section is written exclusively for the SVM flow
 
 
+# %%
 # Sampling random configurations
 def random_sample_configurations(num_samples=100):
     colnames = ["cost", "degree", "gamma", "kernel"]
@@ -251,7 +249,7 @@ def random_sample_configurations(num_samples=100):
 configs = random_sample_configurations(num_samples=1000)
 print(configs)
 
-#############################################################################
+# %%
 preds = model.predict(configs)
 
 # tracking the maximum AUC obtained over the functions evaluations
@@ -264,3 +262,4 @@ def random_sample_configurations(num_samples=100):
 plt.title("AUC regret for Random Search on surrogate")
 plt.xlabel("Numbe of function evaluations")
 plt.ylabel("Regret")
+# License: BSD 3-Clause
diff --git a/examples/test_server_usage_warning.txt b/examples/test_server_usage_warning.txt
new file mode 100644
index 000000000..c551480b6
--- /dev/null
+++ b/examples/test_server_usage_warning.txt
@@ -0,0 +1,3 @@
+This example uploads data. For that reason, this example connects to the test server at test.openml.org.
+This prevents the main server from crowding with example datasets, tasks, runs, and so on.
+The use of this test server can affect behaviour and performance of the OpenML-Python API.
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 000000000..20394ed32
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,45 @@
+site_name: openml-python
+theme:
+  name: material
+  features:
+    - content.code.copy
+  palette:
+    - scheme: default
+
+extra_css:
+  - stylesheets/extra.css
+
+nav:
+  - index.md
+  - Code Reference: reference/
+  - Examples: examples/
+  - Usage: usage.md
+  - Contributing: contributing.md
+  - Extensions: extensions.md
+  - Changelog: progress.md
+
+markdown_extensions:
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.superfences
+  - pymdownx.snippets
+  - attr_list
+  - pymdownx.tabbed:
+      alternate_style: true
+
+plugins:
+  - search
+  - autorefs
+  - section-index
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            docstring_style: numpy
+  - gen-files:
+      scripts:
+        - scripts/gen_ref_pages.py
+  - literate-nav:
+      nav_file: SUMMARY.md
+  - mkdocs-jupyter:
+      theme: light
diff --git a/pyproject.toml b/pyproject.toml
index fa9a70dc1..8019f981d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,10 +93,17 @@ examples=[
     "seaborn",
 ]
 docs=[
-    "sphinx>=3",
-    "sphinx-gallery",
-    "sphinx_bootstrap_theme",
+    "mkdocs",
     "numpydoc",
+    "mkdocs-material",
+    "mkdocs-autorefs",
+    "mkdocstrings[python]",
+    "mkdocs-gen-files",
+    "mkdocs-literate-nav",
+    "mkdocs-section-index",
+    "mkdocs-jupyter",
+    "mkdocs-linkcheck",
+    "mike"
 ]
 
 [project.urls]
diff --git a/scripts/gen_ref_pages.py b/scripts/gen_ref_pages.py
new file mode 100644
index 000000000..730a98024
--- /dev/null
+++ b/scripts/gen_ref_pages.py
@@ -0,0 +1,55 @@
+"""Generate the code reference pages.
+
+based on https://github.com/mkdocstrings/mkdocstrings/blob/33aa573efb17b13e7b9da77e29aeccb3fbddd8e8/docs/recipes.md
+but modified for lack of "src/" file structure.
+
+"""
+
+from pathlib import Path
+import shutil
+
+import mkdocs_gen_files
+
+nav = mkdocs_gen_files.Nav()
+
+root = Path(__file__).parent.parent
+src = root / "openml"
+
+for path in sorted(src.rglob("*.py")):
+    module_path = path.relative_to(root).with_suffix("")
+    doc_path = path.relative_to(src).with_suffix(".md")
+    full_doc_path = Path("reference", doc_path)
+
+    parts = tuple(module_path.parts)
+
+    if parts[-1] == "__init__":
+        parts = parts[:-1]
+        doc_path = doc_path.with_name("index.md")
+        full_doc_path = full_doc_path.with_name("index.md")
+    elif parts[-1] == "__main__":
+        continue
+
+    nav[parts] = doc_path.as_posix()
+
+    with mkdocs_gen_files.open(full_doc_path, "w") as fd:
+        identifier = ".".join(parts)
+        print("::: " + identifier, file=fd)
+
+    mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root))
+
+    with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
+        nav_file.writelines(nav.build_literate_nav())
+
+nav = mkdocs_gen_files.Nav()
+examples_dir = root / "examples"
+examples_doc_dir = root / "docs" / "examples"
+for path in sorted(examples_dir.rglob("*.py")):
+    dest_path = Path("examples") / path.relative_to(examples_dir)
+    with mkdocs_gen_files.open(dest_path, "w") as dest_file:
+        print(path.read_text(), file=dest_file)
+
+    new_relative_location = Path("../") / dest_path
+    nav[new_relative_location.parts[2:]] = new_relative_location.as_posix()
+
+    with mkdocs_gen_files.open("examples/SUMMARY.md", "w") as nav_file:
+        nav_file.writelines(nav.build_literate_nav())