Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-3104;DPE-3344] Jupyterlab Image to be integrated in KubeFlow #72

Merged
merged 3 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ jobs:
- name: Build image
run: sudo make build

- name: Build image (Jupyter)
run: sudo make build FLAVOUR=jupyter

- name: Get Artifact Name
id: artifact
run: |
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,14 @@ jobs:
-o $(find .make_cache -name "*.tag")

sg microk8s -c "make tests"

- name: Run tests (Jupyter)
run: |
# Import artifact into docker with new tag
sudo make import \
FLAVOUR=jupyter TARGET=microk8s \
TAG=$(yq .version rockcraft.yaml) \
REPOSITORY=ghcr.io/canonical/ PREFIX=test- \
-o $(find .make_cache -name "*.tag")

sg microk8s -c "make tests FLAVOUR=jupyter"
47 changes: 37 additions & 10 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,22 +86,49 @@ jobs:
RISK=${{ needs.release_checks.outputs.risk }}
TRACK=${{ needs.release_checks.outputs.track }}
if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi


IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs)

# Import artifact into docker with new tag
sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\
-o ${{ steps.artifact.outputs.name }}

IMAGE_NAME="${REPOSITORY}charmed-spark"

echo "Publishing ${IMAGE_NAME}:${TAG}"
docker push ${IMAGE_NAME}:${TAG}

if [[ "$RISK" == "edge" ]]; then
TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge"
VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge"

sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\
-o ${{ steps.artifact.outputs.name }}

echo "Publishing ${IMAGE_NAME}:${TAG}"
docker push ${IMAGE_NAME}:${TAG}
fi
docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG}

echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}"
docker push ${IMAGE_NAME}:${VERSION_TAG}
fi

- name: Publish JupyterLab Image to Channel
run: |

REPOSITORY="ghcr.io/canonical/"
RISK=${{ needs.release_checks.outputs.risk }}
TRACK=${{ needs.release_checks.outputs.track }}
if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi

# Import artifact into docker with new tag
sudo make import TARGET=docker FLAVOUR=jupyter \
REPOSITORY=${REPOSITORY} TAG=${TAG}\
-o $(find .make_cache -name "*.tag")

IMAGE_NAME=$(make help FLAVOUR=jupyter REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs)

echo "Publishing ${IMAGE_NAME}:${TAG}"
docker push ${IMAGE_NAME}:${TAG}

if [[ "$RISK" == "edge" ]]; then
VERSION_LONG=$(make help FLAVOUR=jupyter | grep "Tag\:" | cut -d ":" -f2 | xargs)
VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge"

docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG}

echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}"
docker push ${IMAGE_NAME}:${VERSION_TAG}
fi
79 changes: 56 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ REPOSITORY :=
PREFIX :=
TARGET := docker
PLATFORM := amd64
FLAVOUR := "spark"

# ======================
# INTERNAL VARIABLES
Expand All @@ -27,26 +28,44 @@ K8S_TAG := $(_MAKE_DIR)/.k8s_tag
AWS_TAG := $(_MAKE_DIR)/.aws_tag

IMAGE_NAME := $(shell yq .name rockcraft.yaml)

VERSION := $(shell yq .version rockcraft.yaml)

TAG := $(VERSION)
VERSION_FLAVOUR=$(shell grep "version:$(FLAVOUR)" rockcraft.yaml | sed "s/^#//" | cut -d ":" -f3)

_ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock

CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME)
CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab

ifeq ($(FLAVOUR), jupyter)
NAME=$(CHARMED_OCI_JUPYTER)
TAG=$(VERSION)-$(VERSION_FLAVOUR)
BASE_NAME=$(IMAGE_NAME)-jupyterlab_$(VERSION)_$(PLATFORM).tar
else
NAME=$(CHARMED_OCI_FULL_NAME)
TAG=$(VERSION)
BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar
endif

_ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock
FTAG=$(_MAKE_DIR)/$(NAME)/$(TAG)

_TMP_OCI_NAME := stage-$(IMAGE_NAME)
_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG).tag
CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG)
CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG)

CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME)
CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tag
_TMP_OCI_NAME := stage-$(IMAGE_NAME)
_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG)

help:
@echo "---------------HELP-----------------"
@echo "Image: $(IMAGE_NAME)"
@echo "Name: $(IMAGE_NAME)"
@echo "Version: $(VERSION)"
@echo "Platform: $(PLATFORM)"
@echo " "
@echo "Flavour: $(FLAVOUR)"
@echo " "
@echo "Image: $(NAME)"
@echo "Tag: $(TAG)"
@echo "Artifact: $(BASE_NAME)"
@echo " "
@echo "Type 'make' followed by one of these keywords:"
Expand All @@ -62,18 +81,13 @@ $(_ROCK_OCI): rockcraft.yaml
@echo "=== Building Charmed Image ==="
rockcraft pack

$(_TMP_OCI_TAG): $(_ROCK_OCI)
$(_TMP_OCI_TAG).tag: $(_ROCK_OCI)
skopeo --insecure-policy \
copy \
oci-archive:"$(_ROCK_OCI)" \
docker-daemon:"$(_TMP_OCI_NAME):$(TAG)"
if [ ! -d "$(_MAKE_DIR)/$(_TMP_OCI_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(_TMP_OCI_NAME)"; fi
touch $(_TMP_OCI_TAG)

$(CHARMED_OCI_TAG): $(_TMP_OCI_TAG)
docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f Dockerfile .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG)
touch $(_TMP_OCI_TAG).tag

$(K8S_TAG):
@echo "=== Setting up and configure local Microk8s cluster ==="
Expand All @@ -88,33 +102,52 @@ $(AWS_TAG): $(K8S_TAG)

microk8s: $(K8S_TAG)

$(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile
docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" \
--build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" \
-f build/Dockerfile .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG).tag

$(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter
docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" \
--build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" \
--build-arg JUPYTERLAB_VERSION="$(VERSION_FLAVOUR)" \
-f build/Dockerfile.jupyter .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi
touch $(CHARMED_OCI_JUPYTER_TAG).tag

$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar

$(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
@echo "=== Creating $(BASE_NAME) OCI archive ==="
cp $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar $(BASE_NAME)
$(BASE_NAME): $(FTAG).tar
@echo "=== Creating $(BASE_NAME) OCI archive (flavour: $(FLAVOUR)) ==="
cp $(FTAG).tar $(BASE_NAME)

build: $(BASE_NAME)

ifeq ($(TARGET), docker)
import: build
@echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into docker ==="
@echo "=== Importing image $(NAME):$(TAG) into docker ==="
$(eval IMAGE := $(shell docker load -i $(BASE_NAME)))
docker tag $(lastword $(IMAGE)) $(CHARMED_OCI_FULL_NAME):$(TAG)
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG)
docker tag $(lastword $(IMAGE)) $(NAME):$(TAG)
if [ ! -d "$(_MAKE_DIR)/$(NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(NAME)"; fi
touch $(FTAG).tag
endif

ifeq ($(TARGET), microk8s)
import: $(K8S_TAG) build
@echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into Microk8s container registry ==="
microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME)
@echo "=== Importing image $(NAME):$(TAG) into Microk8s container registry ==="
microk8s ctr images import --base-name $(NAME):$(TAG) $(BASE_NAME)
endif

tests: $(K8S_TAG) $(AWS_TAG)
@echo "=== Running Integration Tests ==="
ifeq ($(FLAVOUR), jupyter)
/bin/bash ./tests/integration/integration-tests-jupyter.sh
else
/bin/bash ./tests/integration/integration-tests.sh
endif

clean:
@echo "=== Cleaning environment ==="
Expand Down
31 changes: 27 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ https://github.com/canonical/charmed-spark-rock/pkgs/container/charmed-spark
The image can be used straight away when running Spark on Kubernetes by setting the appropriate configuration property:

```shell
spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge
spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4-22.04_edge
```

### Using `spark8t` CLI

The `spark8t` CLI tooling interacts with the K8s API to create, manage and delete K8s resources representing the Spark service account.
Make sure that the kube config file is correctly loaded into the container, e.g.
```shell
docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge
docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4-22.04_edge
```

Note that this will start the image and a long-living service, allowing you to exec commands:
Expand All @@ -59,7 +59,7 @@ docker exec charmed-spark spark-client.service-account-registry list

If you prefer to run one-shot commands, without having the Charmed Spark image running, use `\; exec` prefix, e.g.
```shell
docker run -v ... ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; exec spark-client.service-account-registry list
docker run -v ... ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; exec spark-client.service-account-registry list
```

For more information about spark-client API and `spark8t` tooling, please refer to [here](https://discourse.charmhub.io/t/spark-client-snap-how-to-manage-spark-accounts/8959).
Expand All @@ -71,9 +71,32 @@ Charmed Spark Rock Image is delivered with Pebble already included in order to m
#### Starting History Server

```shell
docker run ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; start history-server
docker run ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; start history-server
```

### Running Jupyter Lab

In the Charmed Spark bundle we also provide the `charmed-spark-jupyter` image,
specifically built for running JupterLab server integrated with Spark where any notebook will also
start dedicated executors and inject a SparkSession and/or SparkContext within the notebook.

To start a JupyterLab server using the `charmed-spark-jupyter` image, use

```shell
docker run \
-v /path/to/kube/config:/var/lib/spark/.kube/config \
-p <port>:8888
ghcr.io/canonical/charmed-spark-jupyter:3.4-22.04_edge \
--username <spark-service-account> --namespace <spark-namespace>
```

Make sure to have created the `<spark-service-account>` in the `<spark-namespace>` with the `spark8t` CLI beforehand.
You should be able to access the jupyter server at `http://0.0.0.0:<port>`.

You can provide extra-arguments to further configure the spark-executors by providing more `spark8t`
commands. The mount of the local `kubeconfig` file is necessary to provide the ability to the
JupyterLab server to act as a Spark driver and request resources on the K8s cluster.

## Developers and Contributing

Please see the [CONTRIBUTING.md](https://github.com/canonical/charmed-spark-rock/blob/3.4-22.04/edge/CONTRIBUTING.md) for guidelines and for developer guidance.
Expand Down
File renamed without changes.
18 changes: 18 additions & 0 deletions build/Dockerfile.jupyter
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
ARG BASE_IMAGE=base-charmed-spark:latest
ARG JUPYTERLAB_VERSION=4.0.0
FROM $BASE_IMAGE

ARG JUPYTERLAB_VERSION
USER root

RUN rm /var/lib/pebble/default/layers/*.yaml /opt/pebble/*.sh

RUN python3 -m pip install "jupyterlab==$JUPYTERLAB_VERSION"
COPY ./files/jupyter/pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml
COPY ./files/jupyter/bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh
RUN chown _daemon_:_daemon_ /opt/pebble/jupyterlab-server.sh

USER _daemon_

# Provide Default Entrypoint for Pebble
ENTRYPOINT [ "/bin/pebble", "enter", "--verbose", "--args", "jupyter" ]
16 changes: 16 additions & 0 deletions files/jupyter/bin/jupyterlab-server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

sleep 5

export PYSPARK_DRIVER_PYTHON=jupyter

# This variable is injected when running a notebook from Kubeflow.
if [ ! -z "${NB_PREFIX}" ]; then
NB_PREFIX_ARG="--NotebookApp.base_url '${NB_PREFIX}'"
fi

export PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --port=8888 ${NB_PREFIX_ARG} --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook"

echo "PYSPARK_DRIVER_PYTHON_OPTS: ${PYSPARK_DRIVER_PYTHON_OPTS}"

spark-client.pyspark $*
6 changes: 6 additions & 0 deletions files/jupyter/pebble/layers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
services:
jupyter:
command: "/opt/pebble/jupyterlab-server.sh"
summary: "This is the Spark-powered Jupyter service"
override: replace
startup: enabled
7 changes: 5 additions & 2 deletions rockcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ description: Spark ROCK
license: Apache-2.0

version: "3.4.2"
# version:spark:3.4.2
# version:jupyter:4.0.11

base: [email protected]

platforms:
amd64:

Expand All @@ -17,7 +21,6 @@ environment:
PYTHONPATH: /opt/spark/python:/opt/spark8t/python/dist:/usr/lib/python3.10/site-packages
PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark:/opt/spark/bin:/opt/spark/python/bin:/opt/spark-client/python/bin
HOME: /var/lib/spark
KUBECONFIG: /var/lib/spark/.kube/config
SPARK_USER_DATA: /var/lib/spark
SPARK_LOG_DIR: /var/log/spark

Expand Down Expand Up @@ -144,7 +147,7 @@ parts:
- python3-pip
overlay-script: |
mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.2/spark8t-0.0.2-py3-none-any.whl
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.3/spark8t-0.0.3-py3-none-any.whl
rm usr/bin/pip*
stage:
- opt/spark8t/python/dist
Expand Down
Loading
Loading