diff --git a/Dockerfile.infer b/Dockerfile.infer index 9c9b020..626a7e3 100644 --- a/Dockerfile.infer +++ b/Dockerfile.infer @@ -1,7 +1,15 @@ +FROM python:3.9-slim WORKDIR /app COPY requirements.txt . COPY server.py . -FROM python:3.9-slim -CMD ["python", "server.py"] RUN pip install --no-cache-dir -r requirements.txt +CMD ["python", "server.py"] EXPOSE 8080 + +# Reorder of instructions + +# To build: +# docker build -t sklearn_serve:v1 -f Dockerfile.infer . + +# run command: +# docker run --rm -p 8080:8080/tcp -v ./app/models:/app/models sklearn_serve:v1 \ No newline at end of file diff --git a/Dockerfile.train b/Dockerfile.train index 06ffaf9..9bfb474 100644 --- a/Dockerfile.train +++ b/Dockerfile.train @@ -1,12 +1,22 @@ -FROM +FROM python:3.9-slim -# TODO: Set a working directory +WORKDIR /app -# TODO: Copy the requirements.txt file to the working directory +COPY requirements.txt . -# TODO: Install the Python dependencies +COPY train.py . -# TODO: Copy the training script (train.py) to the working directory +RUN pip install --no-cache-dir -r requirements.txt -# TODO: Run the training script that generates the model -CMD [...] +# Create the models directory with higher permissions +RUN mkdir -p /app/models && chmod 777 /app/models + +EXPOSE 8080 + +CMD ["python", "train.py"] + +# build command: +# docker build -t sklearn_train:v1 -f Dockerfile.train . + +# run command: +# docker run --rm -p 8080:8080/tcp -v ./app/models:/app/models sklearn_train:v1 \ No newline at end of file diff --git a/Project_steps.md b/Project_steps.md new file mode 100644 index 0000000..632a120 --- /dev/null +++ b/Project_steps.md @@ -0,0 +1,64 @@ +# Project Docker Microcredential + +## Git + +The original repository was forked and cloned. +Changes to README.md are performed directly on the main branch. +Changes to Dockerfiles are performed in their own branch. + +## Containerize training the machine learning model + +Original Dockerfile.train worked on in branch `containerize_train`. +Completed TO DOs in the file and added commands to Dockerfile.train as comments. + +## Containerize serving of the machine learning model + +Original Dockerfile.infer worked on in branch `containerize_serve`. +Reorganized the Dockerfile.infer and added commands to build and run as comments. + +## Train and run the machine learning model using Docker + +Building image and running container commands are in Dockerfile.train +It results in the `iris_model.pkl` file to be used by the next step. + +## Run the Docker container serving the machine learning model + +Building image and running container commands are in Dockerfile.infer +The message `Welcome to Docker Lab` can be found at http://localhost:8080 + +## Store the Docker images on your personal account on Docker Hub +- Login with +`docker login` +- Tagging images +`docker tag sklearn_train:v1 rabuono/skelearn_train:v1` +`docker tag sklearn_serve:v1 rabuono/skelearn_serve:v1` +- Pushing images +`docker push rabuono/skelearn_train:v1` +`docker push rabuono/skelearn_serve:v1` + +## Building docker for multiple platforms + +Running the following command to build v2 for the images, so that they can support multiple platforms: +`docker buildx build --platform linux/amd64,linux/arm64 -t mydockerimage:latest .` + +Leading to the two following commands, so that the resulting images can be used in linux/amd64 and linux/arm64 based platforms: +`docker buildx build --platform linux/amd64,linux/arm64 -t sklearn_train:v2 -f Dockerfile.train .` +`docker buildx build --platform linux/amd64,linux/arm64 -t sklearn_serve:v2 -f Dockerfile.infer .` + +## Store multi-platform Docker images on your personal account on Docker Hub +- Login with +`docker login` +- Tagging images +`docker tag sklearn_train:v2 rabuono/skelearn_train:v2` +`docker tag sklearn_serve:v2 rabuono/skelearn_serve:v2` +- Pushing images +`docker push rabuono/skelearn_train:v2` +`docker push rabuono/skelearn_serve:v2` + +## Building apptainer images +- Run `image_build.sh` to build by downloading from docker hub +- Produces two log files in addition to the slurm.out file +- Alternative `image_build_ugent.sh` file provided to building in HPC Ugent, that leads to no `--fakeroot` related `xattrs` warnings. Corresponding folder for HPC Ugent logs is also provided. +- The script requires that the path is given in the command to run the script. For example: `sbatch image_build.sh `, where `` is the path to where the images should be built. + + diff --git a/README.md b/README.md index 8098afb..beeeaca 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,16 @@ In this project, you will train, run and serve a machine learning model using Do ## Deliverables -- [ ] Clone this repository to your personal github account -- [ ] Containerize training the machine learning model -- [ ] Containerize serving of the machine learning model -- [ ] Train and run the machine learning model using Docker -- [ ] Run the Docker container serving the machine learning model -- [ ] Store the Docker images on your personal account on Docker Hub -- [ ] Provide the resulting Dockerfiles in GitHub -- [ ] Build an Apptainer image on a HPC of your choice -- [ ] Provide the logs of the slurm job in GitHub -- [ ] Document the steps in a text document in GitHub +- [X] Clone this repository to your personal github account +- [X] Containerize training the machine learning model +- [X] Containerize serving of the machine learning model +- [X] Train and run the machine learning model using Docker +- [X] Run the Docker container serving the machine learning model +- [X] Store the Docker images on your personal account on Docker Hub +- [X] Provide the resulting Dockerfiles in GitHub +- [X] Build an Apptainer image on a HPC of your choice +- [X] Provide the logs of the slurm job in GitHub +- [X] Document the steps in a text document in GitHub ## Proposed steps - containerize and run training the machine learning model diff --git a/app/models/iris_model.pkl b/app/models/iris_model.pkl new file mode 100644 index 0000000..2bf91e8 Binary files /dev/null and b/app/models/iris_model.pkl differ diff --git a/image_build.sh b/image_build.sh new file mode 100644 index 0000000..90c1b41 --- /dev/null +++ b/image_build.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH --job-name=apptainer_build +#SBATCH --partition=debug_28C_56T_750GB +#SBATCH --mem=8G +#SBATCH --time=01:00:00 + +# Define variables +TRAIN_IMG="docker://rabuono/skelearn_train:v2" +SERVE_IMG="docker://rabuono/skelearn_serve:v2" +#Ask for path in command +SCRIPT_DIR=$1 +# Define output folder for logs +LOG_DIR="$SCRIPT_DIR/logs" + +# Create log directory if it doesn't exist +mkdir -p $LOG_DIR + +# Pull Docker images using Apptainer and save logs +echo "Building training image..." +apptainer build --fakeroot model-train.sif $TRAIN_IMG > $LOG_DIR/train_image_build.log 2>&1 + +echo "Building serving image..." +apptainer build --fakeroot model-serve.sif $SERVE_IMG > $LOG_DIR/serve_image_build.log 2>&1 + +echo 'Job finished' \ No newline at end of file diff --git a/image_build_ugent.sh b/image_build_ugent.sh new file mode 100644 index 0000000..858a6c9 --- /dev/null +++ b/image_build_ugent.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +#SBATCH --job-name=job_submission +#SBATCH --partition=donphan +#SBATCH --mem=8G +#SBATCH --time=00:30:00 + +# Define variables +TRAIN_IMG="docker://rabuono/skelearn_train:v2" +SERVE_IMG="docker://rabuono/skelearn_serve:v2" +#Ask for path in command +SCRIPT_DIR=$1 +# Define output folder for logs +LOG_DIR="$SCRIPT_DIR/logs" + +# Create log directory if it doesn't exist +mkdir -p $LOG_DIR + +# Pull Docker images using Apptainer and save logs +echo "Building training image..." +apptainer build --fakeroot model-train.sif $TRAIN_IMG > $LOG_DIR/train_image_build.log 2>&1 + +echo "Building serving image..." +apptainer build --fakeroot model-serve.sif $SERVE_IMG > $LOG_DIR/serve_image_build.log 2>&1 + +echo 'Job finished' \ No newline at end of file diff --git a/logs_for_apptainer/logs_from_hpc_ugent/serve_image_build.log b/logs_for_apptainer/logs_from_hpc_ugent/serve_image_build.log new file mode 100644 index 0000000..8b17cb3 --- /dev/null +++ b/logs_for_apptainer/logs_from_hpc_ugent/serve_image_build.log @@ -0,0 +1,21 @@ +INFO: Starting build... +Copying blob sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +Copying blob sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +Copying blob sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +Copying blob sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +Copying blob sha256:611c532b1d06dca8d109405f17c9d6dad162181677ba59bbb30410597614d4f4 +Copying blob sha256:4c4dc7d981d4dbdbf79828bfbd458638c207ff80825abd8fc5d84b0e4606c6e7 +Copying config sha256:a563cebff51e0fee3b1963aec0a25c4f7a02b2d1c5fd36bbe8074bed79a48b1a +Writing manifest to image destination +2025/04/22 15:43:58 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/22 15:43:59 info unpack layer: sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +2025/04/22 15:44:00 info unpack layer: sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +2025/04/22 15:44:00 info unpack layer: sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +2025/04/22 15:44:00 info unpack layer: sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +2025/04/22 15:44:00 info unpack layer: sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +2025/04/22 15:44:00 info unpack layer: sha256:611c532b1d06dca8d109405f17c9d6dad162181677ba59bbb30410597614d4f4 +2025/04/22 15:44:00 info unpack layer: sha256:4c4dc7d981d4dbdbf79828bfbd458638c207ff80825abd8fc5d84b0e4606c6e7 +INFO: Creating SIF file... +INFO: Build complete: model-serve.sif diff --git a/logs_for_apptainer/logs_from_hpc_ugent/slurm-20166805.out b/logs_for_apptainer/logs_from_hpc_ugent/slurm-20166805.out new file mode 100644 index 0000000..ac1e9c5 --- /dev/null +++ b/logs_for_apptainer/logs_from_hpc_ugent/slurm-20166805.out @@ -0,0 +1,3 @@ +Building training image... +Building serving image... +Job finished diff --git a/logs_for_apptainer/logs_from_hpc_ugent/train_image_build.log b/logs_for_apptainer/logs_from_hpc_ugent/train_image_build.log new file mode 100644 index 0000000..4bb4f74 --- /dev/null +++ b/logs_for_apptainer/logs_from_hpc_ugent/train_image_build.log @@ -0,0 +1,23 @@ +INFO: Starting build... +Copying blob sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +Copying blob sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +Copying blob sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +Copying blob sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +Copying blob sha256:d55d33e89e5c869ebd82678fadd8aaee2a674df9727c748afb101c0d1c89f5bb +Copying blob sha256:bba90a10b1ce867f2561dd0a276e81ebb6ca207261a205c309ac9b440b3755a6 +Copying blob sha256:fe39c22c9e0108a3151f0fa0e56bff032778e71642414a5d88eb18e9f2234ed7 +Copying config sha256:cfd1b91292e1dfe644d5223e80cb610b96187c2f609fd518cc871db64f78e144 +Writing manifest to image destination +2025/04/22 15:42:56 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/22 15:42:57 info unpack layer: sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +2025/04/22 15:42:57 info unpack layer: sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +2025/04/22 15:42:58 info unpack layer: sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +2025/04/22 15:42:58 info unpack layer: sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +2025/04/22 15:42:58 info unpack layer: sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +2025/04/22 15:42:58 info unpack layer: sha256:d55d33e89e5c869ebd82678fadd8aaee2a674df9727c748afb101c0d1c89f5bb +2025/04/22 15:42:58 info unpack layer: sha256:bba90a10b1ce867f2561dd0a276e81ebb6ca207261a205c309ac9b440b3755a6 +2025/04/22 15:43:01 info unpack layer: sha256:fe39c22c9e0108a3151f0fa0e56bff032778e71642414a5d88eb18e9f2234ed7 +INFO: Creating SIF file... +INFO: Build complete: model-train.sif diff --git a/logs_for_apptainer/serve_image_build.log b/logs_for_apptainer/serve_image_build.log new file mode 100644 index 0000000..215f3c8 --- /dev/null +++ b/logs_for_apptainer/serve_image_build.log @@ -0,0 +1,29 @@ +INFO: User not listed in /etc/subuid, trying root-mapped namespace +INFO: The %post section will be run under the fakeroot command +INFO: Starting build... +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +Copying blob sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +Copying blob sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +Copying blob sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +Copying blob sha256:611c532b1d06dca8d109405f17c9d6dad162181677ba59bbb30410597614d4f4 +Copying blob sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +Copying blob sha256:4c4dc7d981d4dbdbf79828bfbd458638c207ff80825abd8fc5d84b0e4606c6e7 +Copying config sha256:a563cebff51e0fee3b1963aec0a25c4f7a02b2d1c5fd36bbe8074bed79a48b1a +Writing manifest to image destination +2025/04/22 13:42:29 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/22 13:42:30 warn xattr{etc/gshadow} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:42:30 warn xattr{/data/tmp/81753/build-temp-2370069137/rootfs/etc/gshadow} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:42:51 info unpack layer: sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +2025/04/22 13:42:53 warn xattr{var/cache/apt/archives/partial} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:42:53 warn xattr{/data/tmp/81753/build-temp-2370069137/rootfs/var/cache/apt/archives/partial} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:42:53 info unpack layer: sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +2025/04/22 13:43:05 warn xattr{var/log/apt/term.log} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:43:05 warn xattr{/data/tmp/81753/build-temp-2370069137/rootfs/var/log/apt/term.log} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:43:05 info unpack layer: sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +2025/04/22 13:43:05 info unpack layer: sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +2025/04/22 13:43:05 info unpack layer: sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +2025/04/22 13:43:05 info unpack layer: sha256:611c532b1d06dca8d109405f17c9d6dad162181677ba59bbb30410597614d4f4 +2025/04/22 13:43:05 info unpack layer: sha256:4c4dc7d981d4dbdbf79828bfbd458638c207ff80825abd8fc5d84b0e4606c6e7 +INFO: Creating SIF file... +INFO: Build complete: model-serve.sif diff --git a/logs_for_apptainer/slurm-81753.out b/logs_for_apptainer/slurm-81753.out new file mode 100644 index 0000000..ac1e9c5 --- /dev/null +++ b/logs_for_apptainer/slurm-81753.out @@ -0,0 +1,3 @@ +Building training image... +Building serving image... +Job finished diff --git a/logs_for_apptainer/train_image_build.log b/logs_for_apptainer/train_image_build.log new file mode 100644 index 0000000..058de81 --- /dev/null +++ b/logs_for_apptainer/train_image_build.log @@ -0,0 +1,31 @@ +INFO: User not listed in /etc/subuid, trying root-mapped namespace +INFO: The %post section will be run under the fakeroot command +INFO: Starting build... +Copying blob sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +Copying blob sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +Copying blob sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +Copying blob sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +Copying blob sha256:d55d33e89e5c869ebd82678fadd8aaee2a674df9727c748afb101c0d1c89f5bb +Copying blob sha256:bba90a10b1ce867f2561dd0a276e81ebb6ca207261a205c309ac9b440b3755a6 +Copying blob sha256:fe39c22c9e0108a3151f0fa0e56bff032778e71642414a5d88eb18e9f2234ed7 +Copying config sha256:cfd1b91292e1dfe644d5223e80cb610b96187c2f609fd518cc871db64f78e144 +Writing manifest to image destination +2025/04/22 13:40:19 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/22 13:40:20 warn xattr{etc/gshadow} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:40:20 warn xattr{/data/tmp/81753/build-temp-3875897386/rootfs/etc/gshadow} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:40:41 info unpack layer: sha256:12593af8cbd21b1576220a9427a4348a814d745e8be2f1e0c291feeccef9435f +2025/04/22 13:40:43 warn xattr{var/cache/apt/archives/partial} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:40:43 warn xattr{/data/tmp/81753/build-temp-3875897386/rootfs/var/cache/apt/archives/partial} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:40:43 info unpack layer: sha256:1a2929da74e607ea6ef07742bf12f320bf1a3dbae71e551ac6ca616497bcac33 +2025/04/22 13:40:55 warn xattr{var/log/apt/term.log} ignoring ENOTSUP on setxattr "user.rootlesscontainers" +2025/04/22 13:40:55 warn xattr{/data/tmp/81753/build-temp-3875897386/rootfs/var/log/apt/term.log} destination filesystem does not support xattrs, further warnings will be suppressed +2025/04/22 13:40:55 info unpack layer: sha256:97e7d544f1102f53e73772843cd535a15024771fd751e3e7de7ac5601d82807e +2025/04/22 13:40:55 info unpack layer: sha256:3530cc4bfea2e74a5ef8726ad907b4fdf3b12cc8ae116b0c15f283d5e831958c +2025/04/22 13:40:55 info unpack layer: sha256:184a776449911524150de3a331d4c3eafe528bd2fc50c9b97326bf3fb8e1d4f6 +2025/04/22 13:40:55 info unpack layer: sha256:d55d33e89e5c869ebd82678fadd8aaee2a674df9727c748afb101c0d1c89f5bb +2025/04/22 13:40:55 info unpack layer: sha256:bba90a10b1ce867f2561dd0a276e81ebb6ca207261a205c309ac9b440b3755a6 +2025/04/22 13:41:35 info unpack layer: sha256:fe39c22c9e0108a3151f0fa0e56bff032778e71642414a5d88eb18e9f2234ed7 +INFO: Creating SIF file... +INFO: Build complete: model-train.sif