diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a6a9728..d6792ad6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
### Features
- Support password recovery using SMTP
+- Support Dask from Jupyter notebook
### Bug fixes
- Use specific chart versions and image tags for MLFlow, Ambassador, and PostgresQL
diff --git a/docker/openmlops-notebook/Dockerfile b/docker/openmlops-notebook/Dockerfile
new file mode 100644
index 00000000..424581f7
--- /dev/null
+++ b/docker/openmlops-notebook/Dockerfile
@@ -0,0 +1,8 @@
+FROM jupyter/scipy-notebook:hub-1.4.2
+RUN set -ex \
+ && pip install \
+ 'prefect==0.14.12' \
+ 'mlflow==1.14.1' \
+ 'seldon-core==1.6.0' \
+ 'dask==2021.7.0' \
+ 'dask-kubernetes==2021.3.1'
\ No newline at end of file
diff --git a/main.tf b/main.tf
index 10d2fb48..9ef7eb14 100644
--- a/main.tf
+++ b/main.tf
@@ -1,34 +1,68 @@
-resource "kubernetes_namespace" "jupyterhub_namespace" {
+resource "kubernetes_namespace" "daskhub_namespace" {
metadata {
- name = var.jupyterhub_namespace
+ name = "daskhub"
}
}
-module "jupyterhub" {
- count = var.install_jupyterhub ? 1 : 0
- source = "./modules/jupyterhub"
- namespace = kubernetes_namespace.jupyterhub_namespace.metadata[0].name
+module "dask-jupyterhub" {
+ source = "./modules/dask-jupyterhub"
+ namespace = kubernetes_namespace.daskhub_namespace.metadata[0].name
+}
- # Proxy settings
- proxy_secret_token = var.jhub_proxy_secret_token
- proxy_https_enabled = var.jhub_proxy_https_enabled
- proxy_https_hosts = var.jhub_proxy_https_hosts
- proxy_https_letsencrypt_contact_email = var.jhub_proxy_https_letsencrypt_contact_email
- proxy_service_type = var.jhub_proxy_service_type
+resource "kubernetes_service_account" "daskhub-sa" {
+ metadata {
+ name = "daskhub-sa"
+ namespace = kubernetes_namespace.daskhub_namespace.metadata[0].name
+ }
+}
- # Authentication settings
- # Following values should be `null` if oauth_github is disabled. However we need to pass submodule's defaults here
- # explicitly because of this Terraform bug: https://github.com/hashicorp/terraform/issues/21702
- authentication_type = var.oauth_github_enable ? "github" : "dummy"
- authentication_config = merge(
- local.jhub_auth_config,
- {JupyterHub = {authenticator_class = var.oauth_github_enable ? "github" : "dummy"}}
- )
+resource "kubernetes_role" "daskhub-role" {
+ metadata {
+ name = "daskhub-role"
+ namespace = kubernetes_namespace.daskhub_namespace.metadata[0].name
+ }
- # Profile list configuration
- singleuser_profile_list = var.singleuser_profile_list
+ rule {
+ api_groups = [""]
+ resources = ["pods"]
+ verbs = ["get", "list", "watch", "create", "delete"]
+ }
+
+ rule {
+ api_groups = [""]
+ resources = ["pods/logs"]
+ verbs = ["get", "list"]
+ }
+
+ rule {
+ api_groups = [""]
+ resources = ["services"]
+ verbs = ["get", "list", "watch", "create", "delete"]
+ }
+
+ rule {
+ api_groups = ["policy"]
+ resources = ["poddisruptionbudgets"]
+ verbs = ["get", "list", "watch", "create", "delete"]
+ }
}
+resource "kubernetes_role_binding" "daskhub-rb" {
+ metadata {
+ name = "daskhub-rb"
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "Role"
+ name = "daskhub-role"
+ }
+
+ subject {
+ kind = "ServiceAccount"
+ name = "daskhub-sa"
+ }
+}
resource "kubernetes_namespace" "mlflow_namespace" {
metadata {
@@ -90,7 +124,6 @@ resource "kubernetes_namespace" "dask_namespace" {
}
module "dask" {
-
source = "./modules/dask"
namespace = kubernetes_namespace.dask_namespace.metadata[0].name
@@ -102,7 +135,7 @@ module "dask" {
worker_environment_variables = [
{
name = "EXTRA_PIP_PACKAGES"
- value = "prefect==0.14.1 --upgrade"
+ value = "prefect==0.14.1 aiohttp --upgrade"
}
]
}
diff --git a/modules/ambassador/values.yaml b/modules/ambassador/values.yaml
index 9c0a38b9..594421b9 100644
--- a/modules/ambassador/values.yaml
+++ b/modules/ambassador/values.yaml
@@ -57,7 +57,7 @@ service:
apiVersion: getambassador.io/v2
kind: Mapping
name: jhub_mapping
- service: proxy-public.jhub:80
+ service: proxy-public.daskhub:80
prefix: /
host: jupyter.${hostname}
allow_upgrade:
diff --git a/modules/dask-jupyterhub/main.tf b/modules/dask-jupyterhub/main.tf
new file mode 100644
index 00000000..c4728a80
--- /dev/null
+++ b/modules/dask-jupyterhub/main.tf
@@ -0,0 +1,23 @@
+resource "helm_release" "dask-jupyterhub" {
+ name = "daskhub"
+ namespace = var.namespace
+
+ repository = "https://helm.dask.org"
+ chart = "daskhub"
+ version = "2021.7.2"
+
+ // It takes some time to pull all the necessary images.
+ timeout = 15 * 60
+
+ values = [templatefile("${path.module}/values.yaml", {
+ jupyterhub_secret = var.jupyterhub_secret
+ daskgateway_secret = var.daskgateway_secret
+ singleuser_profile_list = var.singleuser_profile_list
+ singleuser_image_pull_secrets = var.singleuser_image_pull_secrets
+ singleuser_image_pull_policy = var.singleuser_image_pull_policy
+ singleuser_memory_guarantee = var.singleuser_memory_guarantee
+ singleuser_storage_capacity = var.singleuser_storage_capacity
+ singleuser_storage_mount_path = var.singleuser_storage_mount_path
+ hub_allow_named_servers = var.hub_allow_named_servers
+ })]
+}
\ No newline at end of file
diff --git a/modules/dask-jupyterhub/values.yaml b/modules/dask-jupyterhub/values.yaml
new file mode 100644
index 00000000..382de852
--- /dev/null
+++ b/modules/dask-jupyterhub/values.yaml
@@ -0,0 +1,32 @@
+jupyterhub:
+ proxy:
+ service:
+ type: "ClusterIP"
+ hub:
+ allowNamedServers: ${ hub_allow_named_servers }
+ singleuser:
+ serviceAccountName: "daskhub-sa"
+ profileList:
+%{for profile in singleuser_profile_list ~}
+ - display_name: ${ profile.display_name }
+ description: "${ profile.description }"
+ default: ${ profile.default }
+ kubespawner_override:
+ image: ${ profile.kubespawner_override.image }
+%{ endfor ~}
+ memory:
+ guarantee: ${ singleuser_memory_guarantee }
+ storage:
+ capacity: ${ singleuser_storage_capacity }
+ homeMountPath: ${ singleuser_storage_mount_path }
+ extraEnv:
+ TZ: "Europe/Berlin"
+
+dask-gateway:
+ enabled: false
+ gateway:
+ auth:
+ type: null
+
+dask-kubernetes:
+ enabled: true
\ No newline at end of file
diff --git a/modules/dask-jupyterhub/variables.tf b/modules/dask-jupyterhub/variables.tf
new file mode 100644
index 00000000..c19c58df
--- /dev/null
+++ b/modules/dask-jupyterhub/variables.tf
@@ -0,0 +1,71 @@
+variable "namespace" {
+ description = "Namespace name to deploy the application"
+ default = "default"
+}
+
+variable "jupyterhub_secret" {
+ type = string
+ default = "4301bf5a2aa1fbade157046863ac64ec46df03e3da39ec3bf345a2f8caa81e02"
+}
+
+variable "daskgateway_secret" {
+ type = string
+ default = "ca7de235a4ae54103d49f5004a11690004c66fe14810f35dc476103573e56ff1"
+}
+
+variable "singleuser_image_pull_secrets" {
+ type = list(
+ object({
+ name = string
+ })
+ )
+ default = []
+}
+
+variable "singleuser_image_pull_policy" {
+ default = "Always"
+}
+
+variable "singleuser_default_url" {
+ description = ""
+ default = "/lab"
+}
+
+variable "singleuser_profile_list" {
+ description = "List of images which the user can select to spawn a server"
+ type = list(
+ object({
+ display_name = string
+ description = string
+ default = bool
+ kubespawner_override = object({
+ image = string
+ })
+ }))
+
+ default = [{
+ display_name = "OpenMLOps client environment"
+ description = "Notebook with OpenMLOps required client libraries installed.
Image: drtools/openmlops-notebook:v1.4"
+ default = true
+ kubespawner_override = {
+ image = "drtools/openmlops-notebook:v1.4"
+ }
+ }]
+}
+
+variable "singleuser_memory_guarantee" {
+ default = "1G"
+}
+
+variable "singleuser_storage_capacity" {
+ default = "1G"
+}
+
+variable "singleuser_storage_mount_path" {
+ default = "/home/jovyan/persistent"
+}
+
+variable "hub_allow_named_servers" {
+ description = "Configures if a user can spawn multiple servers"
+ default = false
+}
\ No newline at end of file
diff --git a/modules/dask-jupyterhub/worker.yaml b/modules/dask-jupyterhub/worker.yaml
new file mode 100644
index 00000000..5d760434
--- /dev/null
+++ b/modules/dask-jupyterhub/worker.yaml
@@ -0,0 +1,18 @@
+kind: Pod
+spec:
+ restartPolicy: Never
+ containers:
+ - image: daskdev/dask:2021.3.1
+ imagePullPolicy: IfNotPresent
+ args: [dask-worker, --nthreads, '2', --no-dashboard, --memory-limit, 6GB, --death-timeout, '60']
+ name: dask
+ env:
+ - name: EXTRA_PIP_PACKAGES
+ value: git+https://github.com/dask/distributed
+ resources:
+ limits:
+ cpu: "0.5"
+ memory: 1G
+ requests:
+ cpu: "0.5"
+ memory: 1G
\ No newline at end of file
diff --git a/modules/dask/main.tf b/modules/dask/main.tf
index c2a04e1f..c4ffe852 100644
--- a/modules/dask/main.tf
+++ b/modules/dask/main.tf
@@ -7,67 +7,17 @@ resource "helm_release" "dask" {
chart = "dask"
version = "2021.7.0"
- set {
- name = "worker.name"
- value = var.worker_name
- }
-
- set {
- name = "worker.replicas"
- value = var.worker_replicas
- }
-
- set {
- name = "worker.image.repository"
- value = var.worker_image_repository
- }
-
- set {
- name = "worker.image.tag"
- value = var.worker_image_tag
- }
-
- set {
- name = "worker.image.pullPolicy"
- value = var.worker_image_pull_policy
- }
-
- set {
- name = "worker.image.dask_worker"
- value = var.worker_image_dask_worker_command
- }
-
- set {
- name = "scheduler.image.repository"
- value = var.scheduler_image_repository
- }
-
- set {
- name = "scheduler.image.tag"
- value = var.scheduler_image_tag
- }
-
- set {
- name = "scheduler.image.pullPolicy"
- value = var.scheduler_image_pull_policy
- }
-
-
- values = [
- yamlencode({
- "worker" = {
- "env" = var.worker_environment_variables
- }
- }),
- yamlencode({
- "worker" = {
- "image" = {
- "pullSecrets" = var.worker_image_pull_secret
- }
- }
- })
- ]
-
-
-
+ values = [templatefile("${path.module}/values.yaml", {
+ worker_name = var.worker_name
+ worker_replicas = var.worker_replicas
+ worker_environment_variables = var.worker_environment_variables
+ worker_image_repository = var.worker_image_repository
+ worker_image_tag = var.worker_image_tag
+ worker_image_pull_policy = var.worker_image_pull_policy
+ worker_image_pull_secret = var.worker_image_pull_secret
+ worker_image_dask_worker_command = var.worker_image_dask_worker_command
+ scheduler_image_repository = var.scheduler_image_repository
+ scheduler_image_tag = var.scheduler_image_tag
+ scheduler_image_pull_policy = var.scheduler_image_pull_policy
+ })]
}
\ No newline at end of file
diff --git a/modules/dask/values.yaml b/modules/dask/values.yaml
index 4146709d..7760e9d2 100644
--- a/modules/dask/values.yaml
+++ b/modules/dask/values.yaml
@@ -1,97 +1,27 @@
----
-# nameOverride: dask
-# fullnameOverride: dask
-
-scheduler:
- name: scheduler # Dask scheduler name.
+worker:
+ name: ${ worker_name }
+ replicas: ${ worker_replicas }
+%{ if length(worker_environment_variables) > 0 }
+ env:
+%{ for env in worker_environment_variables ~}
+ - name: ${ env.name }
+ value: ${ env.value }
+%{ endfor ~}
+%{ endif }
image:
- repository: "daskdev/dask" # Container image repository.
- tag: 2021.7.0 # Container image tag.
- pullPolicy: IfNotPresent # Container image pull policy.
- pullSecrets: # Container image [pull secrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/).
- # - name: regcred
- replicas: 1 # Number of schedulers (should always be 1).
- serviceType: "ClusterIP" # Scheduler service type. Set to `LoadBalancer` to expose outside of your cluster.
- # serviceType: "NodePort"
- # serviceType: "LoadBalancer"
- loadBalancerIP: null # Some cloud providers allow you to specify the loadBalancerIP when using the `LoadBalancer` service type. If your cloud does not support it this option will be ignored.
- servicePort: 8786 # Scheduler service internal port.
- serviceAnnotations: {} # Scheduler service annotations.
- extraArgs: [] # Extra CLI arguments to be passed to the scheduler
- # - --preload
- # - scheduler-setup.py
- resources: {} # Scheduler pod resources. See `values.yaml` for example values.
- # limits:
- # cpu: 1.8
- # memory: 6G
- # requests:
- # cpu: 1.8
- # memory: 6G
- tolerations: [] # Tolerations.
- affinity: {} # Container affinity.
- nodeSelector: {} # Node Selector.
- securityContext: {} # Security Context.
- # serviceAccountName: ""
+ repository: ${ worker_image_repository }
+ tag: ${ worker_image_tag }
+ pullPolicy: ${ worker_image_pull_policy }
+%{ if length(worker_image_pull_secret) > 0 }
+ pullSecrets:
+%{ for env in worker_image_pull_secret ~}
+ - name: ${ env.name }
+%{ endfor ~}
+%{ endif }
+ dask_worker: ${ worker_image_dask_worker_command }
-webUI:
- name: webui # Dask webui name.
- servicePort: 80 # webui service internal port.
- ingress:
- enabled: false # Enable ingress.
- tls: false # Ingress should use TLS.
- # secretName: dask-scheduler-tls
- hostname: dask-ui.example.com # Ingress hostname.
- annotations: # Ingress annotations. See `values.yaml` for example values.
- # kubernetes.io/ingress.class: "nginx"
- # secretName: my-tls-cert
- # kubernetes.io/tls-acme: "true"
-
-worker:
- name: worker # Dask worker name.
+scheduler:
image:
- repository: "daskdev/dask" # Container image repository.
- tag: 2021.7.0 # Container image tag.
- pullPolicy: IfNotPresent # Container image pull policy.
- dask_worker: "dask-worker" # Dask worker command. E.g `dask-cuda-worker` for GPU worker.
- pullSecrets: # Container image [pull secrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/).
- # - name: regcred
- replicas: 3 # Number of workers.
- default_resources: # overwritten by resource limits if they exist
- cpu: 1 # Default CPU (DEPRECATED use `resources`).
- memory: "4GiB" # Default memory (DEPRECATED use `resources`).
- env: # Environment variables. See `values.yaml` for example values.
- # - name: EXTRA_APT_PACKAGES
- # value: build-essential openssl
- - name: EXTRA_CONDA_PACKAGES
- value: python==3.7 -c conda-forge
- - name: EXTRA_PIP_PACKAGES
- value: prefect==0.14.1 --upgrade
- extraArgs: [] # Extra CLI arguments to be passed to the worker
- # - --preload
- # - worker-setup.py
- resources: {} # Worker pod resources. See `values.yaml` for example values.
- # limits:
- # cpu: 1
- # memory: 3G
- # nvidia.com/gpu: 1
- # requests:
- # cpu: 1
- # memory: 3G
- # nvidia.com/gpu: 1
- mounts: {} # Worker Pod volumes and volume mounts, mounts.volumes follows kuberentes api v1 Volumes spec. mounts.volumeMounts follows kubernetesapi v1 VolumeMount spec
- # volumes:
- # - name: data
- # emptyDir: {}
- # volumeMounts:
- # - name: data
- # mountPath: /data
- annotations: {} # Annotations
- tolerations: [] # Tolerations.
- affinity: {} # Container affinity.
- nodeSelector: {} # Node Selector.
- securityContext: {} # Security Context.
- # serviceAccountName: ""
- # port: ""
- # this option overrides "--nthreads" on workers, which defaults to resources.limits.cpu / default_resources.limits.cpu
- # use it if you need to limit the amount of threads used by multicore workers, or to make workers with non-whole-number cpu limits
- # threads_per_worker: 1
\ No newline at end of file
+ repository: ${ scheduler_image_repository }
+ tag: ${ scheduler_image_tag }
+ pullPolicy: ${ scheduler_image_pull_policy }
\ No newline at end of file
diff --git a/tutorials/README.md b/tutorials/README.md
index 3bef034c..858077bd 100644
--- a/tutorials/README.md
+++ b/tutorials/README.md
@@ -2,6 +2,7 @@
Here is a set of guides to get you started with Open MLOps.
+* [Set up your local minikube machine learning architecture](./set-up-minikube-cluster.md) shows how to set up and configure the infrastructure on your local minikube.
* [Set up your production machine learning architecture](./set-up-open-source-production-mlops-architecture-aws.md) shows how to set up and configure the infrastructure in your AWS account.
* [Basic Usage of Jupyter, MLFlow, and Prefect](./basic-usage-of-jupyter-mlflow-and-prefect.md) shows you how to train your first model.
* [Deploying a model to production with Prefect and Seldon](./deploy-model-seldon.md) shows you how to deploy the model you trained the previous tutorial behind a REST API.
diff --git a/tutorials/install-dependencies.md b/tutorials/install-dependencies.md
index d387c8ce..c1650bbe 100644
--- a/tutorials/install-dependencies.md
+++ b/tutorials/install-dependencies.md
@@ -1,4 +1,4 @@
-To set up the Open MLOps architecture, you need several dependencies on your local or client machine. If you've used docker, terraform ,and kubectl before, you'll likely hav everything you need. If not, below are instructions for installing all the client-side dependencies on a fresh install of Ubuntu 20.04.
+To set up the Open MLOps architecture, you need several dependencies on your local or client machine. If you've used Docker, Terraform, and Kubectl before, you'll likely have everything you need. If not, below are instructions for installing all the client-side dependencies on a fresh install of Ubuntu 20.04.
## Install Git
diff --git a/variables.tf b/variables.tf
index fc7ed2df..656b4d80 100644
--- a/variables.tf
+++ b/variables.tf
@@ -142,39 +142,6 @@ variable "oauth_github_allowed_organizations" {
default = [""]
}
-variable "singleuser_profile_list" {
- description = "List of images which the user can select to spawn a server"
- type = list(
- object({
- display_name = string
- description = string
- default = bool
- kubespawner_override = object({
- image = string
- })
- }))
-
- default = [
- {
- display_name = "OpenMLOps client environment"
- description = "Notebook with OpenMLOps required client libraries installed.
Image: drtools/openmlops-notebook:v1.2"
- default = true
- kubespawner_override = {
- image = "drtools/openmlops-notebook:v1.2"
- }
- },
-
- {
- display_name = "Data Science environment"
- description = "Default data science environment"
- default = false
- kubespawner_override = {
- image = "jupyter/datascience-notebook:2343e33dec46"
- }
- }
- ]
-}
-
locals {
jhub_auth_config = {
diff --git a/versions.tf b/versions.tf
index de526622..455fa0cf 100644
--- a/versions.tf
+++ b/versions.tf
@@ -6,7 +6,7 @@ terraform {
}
helm = {
source = "hashicorp/helm"
- version = "~> 2.0.1"
+ version = "~> 2.2.0"
}
kubernetes = {
source = "hashicorp/kubernetes"