From 75220410d6e336e6d1138114a2b17dba303a49bc Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 11:25:00 +0200 Subject: [PATCH 1/7] Modified the Dockerfile.train for first attempt to build and run container --- Dockerfile.train | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/Dockerfile.train b/Dockerfile.train index 06ffaf9..28229c8 100644 --- a/Dockerfile.train +++ b/Dockerfile.train @@ -1,12 +1,28 @@ -FROM +# Official stable Python image selected from DockerHub based on python-based requirements +# Use Python image as base image +FROM python:3.11-slim -# TODO: Set a working directory +# Add label +LABEL maintainer="Leena Putzeys" -# TODO: Copy the requirements.txt file to the working directory +# Set a working directory +WORKDIR /data -# TODO: Install the Python dependencies +# Copy the requirements.txt file to the working directory of the container +COPY requirements.txt . -# TODO: Copy the training script (train.py) to the working directory +# Install the Python dependencies +RUN pip install --upgrade pip +RUN pip install -r requirements.txt -# TODO: Run the training script that generates the model -CMD [...] +# Copy the training script (train.py) to the working directory +COPY train.py . + +# Run the training script that generates the model +CMD ["python","train.py"] + +# Steps to run the training of model on own laptop +# 1. open docker desktop +# 2. build image: sudo docker build -t model-train:python3.11-slim -f Dockerfile.train . +# 3. check if image was build: sudo docker images +# 4. run the container: sudo docker run model-train:python3.11-slim From e36c3a14fd4f77df7ceb17c050ae855971be9503 Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 11:40:39 +0200 Subject: [PATCH 2/7] Modified Dockerfile.train with correct workdir and volume mounting. Added model training container output in /model folder. --- Dockerfile.train | 13 +++++++++---- models/iris_model.pkl | Bin 0 -> 5859 bytes 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 models/iris_model.pkl diff --git a/Dockerfile.train b/Dockerfile.train index 28229c8..7fc6c44 100644 --- a/Dockerfile.train +++ b/Dockerfile.train @@ -6,7 +6,8 @@ FROM python:3.11-slim LABEL maintainer="Leena Putzeys" # Set a working directory -WORKDIR /data +# Modified from /data to /app to ensure compatability with train.py +WORKDIR /app # Copy the requirements.txt file to the working directory of the container COPY requirements.txt . @@ -16,13 +17,17 @@ RUN pip install --upgrade pip RUN pip install -r requirements.txt # Copy the training script (train.py) to the working directory +# Error after running: outputdir in script needs to be compatible. +# Added mkdir command COPY train.py . +RUN mkdir -p /models # Run the training script that generates the model CMD ["python","train.py"] # Steps to run the training of model on own laptop # 1. open docker desktop -# 2. build image: sudo docker build -t model-train:python3.11-slim -f Dockerfile.train . -# 3. check if image was build: sudo docker images -# 4. run the container: sudo docker run model-train:python3.11-slim +# 2. build image: sudo docker build -t model-train:v1.1-python3.11-slim -f Dockerfile.train . +# 3. check if image was build: sudo docker images +# 4. make local folder to mount volume to get output files from container +# 5. run the container: sudo docker run -v ./models:/app/models model-train:v1.1-python3.11-slim diff --git a/models/iris_model.pkl b/models/iris_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..86ebedeb97a6762fb7d4b31799b7f886eec93d27 GIT binary patch literal 5859 zcmeHL+iw(A7~gK&?RHyA?^nc2Y*-Z5a#7KcY`LTbSE>PlL^PAx?X)v=cXpkbZ7oJE zF<_CS2{LOG{s6;^!RP}hctcA9%ofKBb$_yT3WM4A*M)5PN|AyYWpCN&snvzQE5cc235=MV->@S zk<6M3NQN`=^O9yMhW*$KG*)fMdVe+}nU-uRTvRf1vf+r<03V8MH2$i+-Hrn3CF||JzCLF&&YwiU zvXd+#r_;dL1B#zGQSZj(b)ER{8s zSSFvgG?5q<;uS6PG^#5$g-7mtrn5J5GHxG)95BdeE|)c|A~M#`A|t4+^>N|Ln9E4X zOg)6u2xtMc0@?t}0V@Ej0c!wj0S^Hl2CM@-0(ca_lhzH`2-pnh0c-(m1@QDe4%i0h z13UrP0eA|q6R-=g8}Kw>58zoq62Lj%3*a1buAc+&T#W!m0WSbv0=x{^4|oM|0B{g+ z2=E%=H&#nj4l5}uYna89Cdv*{g{^QYtD(vj{a0V|*4#bd zw3_ZUPP-Zx{rUK@cX(^U`6_o^%%6=5y++iJxa%UwAJI;Ot1slQq206i+%$Fz`Ea+e z`|H@WZSJ~=qaNd#!u1Z+qxc%!bs^u5 zenZ%M`g8dWpnHKN>AtRIg*;iovi!cH05sm7ju7l@y7^xKI0q&jc% z`SvRK0JOzy!0GXJ&=CAFF)G>{4y(W~qhV|Qw z@sfRt*ZFw>Q~vrOzNX#!5Oq%VPx@0>pLD*4Fdo`Z0ORrE6L!4kL*%s;hDgpU*qyc{E6p`&L=wG{QGNl>qq2`_UoOmRG-3bF@Iv7;`8<| z_TxW)o6sNS*IO6ZFtE1H3Itst0Kf1S62FufHB z1A)LVqn*7+wp_sd(K$95*Je(~PTuuF(LCynEv%oM@XLSt(S_}2YA+Ve`{fokF81^9 z^!;}H=w&ExcWqRQQf!m?tkLc%opFE{%PLz8~JbCmpuAj^qV-!1wQ-jOk2mh zh4^>x*ZkT1_t)kfA-lUed;N%|5s0WzxDQKohONEa-bwO`0+%6U#NJ4`yc6gqXEQi6Z$7KbY48s=c0G)usX86M!i#T5rB<{gZyv6TLtO`D3+)DoV zMU*!%ywJnB%A;dO7`c5uBM)Pu_diGe=ZnVwy}x}vbH=@M`u6$k@G)`yI`r4&C8&PsaWM@bf+V literal 0 HcmV?d00001 From 71b9fd9361f1258f250ece3960ee206ab68b75eb Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 13:16:20 +0200 Subject: [PATCH 3/7] Corrected and modified Dockerfiles (.train and .infer), and addition of step-by-step workflow of this project. --- Dockerfile.infer | 23 ++++++++++++++++++++--- PROJECT-STEPS.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 PROJECT-STEPS.md diff --git a/Dockerfile.infer b/Dockerfile.infer index 9c9b020..88b6ddc 100644 --- a/Dockerfile.infer +++ b/Dockerfile.infer @@ -1,7 +1,24 @@ +# Define base image +FROM python:3.9-slim + +# Define working directory in container WORKDIR /app + +# Copy requirements and install the python dependencies COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the server script in the container COPY server.py . -FROM python:3.9-slim + +# Expose the container port to 8080 +EXPOSE 8080 + +# Run the server.py script CMD ["python", "server.py"] -RUN pip install --no-cache-dir -r requirements.txt -EXPOSE 8080 + +# Commands to build and run the container +# 1. Build the image: sudo docker build -t model-infer:v1-python3.9-slim -f Dockerfile.infer +# 2. Check if build was successfull and find image id: sudo docker images +# 3. Run the container in detached mode and set port: sudo docker run --detach -p 5000:8080 -v ./models:/app/models model-infer:v1-python3.9-slim +# 4. Check URL to see if it worked: http://localhost:5000/ diff --git a/PROJECT-STEPS.md b/PROJECT-STEPS.md new file mode 100644 index 0000000..9e3ce18 --- /dev/null +++ b/PROJECT-STEPS.md @@ -0,0 +1,47 @@ +# Outline of steps used to carry out microcredential project 2 + +Project description: https://github.com/vib-tcp/project_docker_microcredential + +## Deliverable 1: Clone this repository to your personal github account +**step 1**: fork project +**step 2**: clone the project on local machine +`git clone git@github.com:leenput/project_docker_microcredential.git` + +## Deliverable 2: Containerize training the machine learning model +## Deliverable 4: Train and run the machine learning model using Docker + +**step 1:** complete file named Dockerfile.train + +approach: +- inspect Dockerfile, train.py script and the requirements.txt +- select an appropriate base image from DockerHub, which judging from the files should be python-based: selected python image 3.11-slim +- set appropriate working directory in the container. Make sure that it matches the output directory specified in the python script +- copy requirements.txt and install dependencies by using COPY rule in Dockerifle, followed by RUN pip install +- copy train.py to the working directory using COPY rule in Dockerfile +- set the command to run train.py: CMD ["python","train.py"] + +**step 2:** build the image based on the dockerfile +- build image using docker build +- make sure to add an informative tag +- point to appropriate Dockerfile using -f flag + +`sudo docker build -t model-train:v1.1-python3.11-slim -f Dockerfile.train .` + +**step 3:** run the training of the model on your computer +- run container from image using docker run +- make sure to mount the correct volume to get the model generated in the container +- retrieve the image id using docker images: +`sudo docker images` +- run the container +`sudo docker run -v ./models:/apps/models c2e5fad55e4d` + +**step 4:** document the command as comment in the Dockerfile +- added the commands as comments (#) in the Dockerfile + +**step 5:** store the created Dockerfile in your cloned github repository +`git add *` +`git commit -m "Modified Dockerfile.train with correct workdir and volume mounting. Added model training container output in /model folder." + +## Deliverable 3: Containerize serving of the machine learning model +## Deliverable 5: Run the Docker container serving the machine learning model + From aa3e3e87b812a9389acd11b45f37a566502162f6 Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 14:30:10 +0200 Subject: [PATCH 4/7] Add job scripts and slurm output log used to build Apptainer containers on VSC --- ML-job.sh | 18 +++++++++++++++++ slurm-20163444.out | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 ML-job.sh create mode 100644 slurm-20163444.out diff --git a/ML-job.sh b/ML-job.sh new file mode 100644 index 0000000..8727f19 --- /dev/null +++ b/ML-job.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +#SBATCH --job-name=job_submission +#SBATCH --partition=donphan +#SBATCH --mem=8G +#SBATCH --time=00:30:00 + +echo "Let's build containers on the VSC" + +apptainer build --fakeroot model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim +apptainer build --fakeroot model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim + +echo "Move the containers to containers folder in scratch" +mv model-train.sif $VSC_SCRATCH/containers/. +mv model-infer.sif $VSC_SCRATCH/containers/. + +echo "Finished!" + diff --git a/slurm-20163444.out b/slurm-20163444.out new file mode 100644 index 0000000..f5f3c1a --- /dev/null +++ b/slurm-20163444.out @@ -0,0 +1,49 @@ +Let's build containers on the VSC +INFO: Starting build... +Copying blob sha256:0ae3bf74519da2c8ccd7ff5d80c3575b217caa1b17701b98f9c2a08cd7f3da31 +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:2a47a8c4fd5c358f98fb193471dc5d7a1e75e33d480fb7a25a7130840a8769d9 +Copying blob sha256:9c538fc354918cfe13231798fe17d7c4b463cda38500187c0ecedcc061d6129e +Copying blob sha256:47bbb0afa7fe5695aca89f358dc3e73f46655977e201f553a558b4590b951dac +Copying blob sha256:bfaefce2698fc9aa292a77e6a7a2dc5c5b96e274cbc42d9f77fbc18bd65b2751 +Copying blob sha256:a155dd3a05b256fa2691a0c1733a51b4cdfc75a6bc0db4ca04abedbb16391dd3 +Copying blob sha256:44bfad6b88982ca2639da655987e42d675cdd14ba83cf7cd732a3544965b7b1b +Copying blob sha256:d18aa744867de9dee97868ff9aef53ec99e31aef166df44da964613d3d887501 +Copying blob sha256:cdc334c564348d4c408800f9fcd52e370830dc26a289a685d2f3b6146b791c3a +Copying config sha256:c7cb4ab762536a0cc70cd10455eb72323ef8624fb42e9e964b6c313bcdbcf854 +Writing manifest to image destination +2025/04/10 14:19:05 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/10 14:19:06 info unpack layer: sha256:2a47a8c4fd5c358f98fb193471dc5d7a1e75e33d480fb7a25a7130840a8769d9 +2025/04/10 14:19:07 info unpack layer: sha256:9c538fc354918cfe13231798fe17d7c4b463cda38500187c0ecedcc061d6129e +2025/04/10 14:19:07 info unpack layer: sha256:47bbb0afa7fe5695aca89f358dc3e73f46655977e201f553a558b4590b951dac +2025/04/10 14:19:07 info unpack layer: sha256:bfaefce2698fc9aa292a77e6a7a2dc5c5b96e274cbc42d9f77fbc18bd65b2751 +2025/04/10 14:19:07 info unpack layer: sha256:0ae3bf74519da2c8ccd7ff5d80c3575b217caa1b17701b98f9c2a08cd7f3da31 +2025/04/10 14:19:07 info unpack layer: sha256:a155dd3a05b256fa2691a0c1733a51b4cdfc75a6bc0db4ca04abedbb16391dd3 +2025/04/10 14:19:08 info unpack layer: sha256:44bfad6b88982ca2639da655987e42d675cdd14ba83cf7cd732a3544965b7b1b +2025/04/10 14:19:12 info unpack layer: sha256:d18aa744867de9dee97868ff9aef53ec99e31aef166df44da964613d3d887501 +2025/04/10 14:19:12 info unpack layer: sha256:cdc334c564348d4c408800f9fcd52e370830dc26a289a685d2f3b6146b791c3a +INFO: Creating SIF file... +INFO: Build complete: model-train.sif +INFO: Starting build... +Copying blob sha256:489fdbaf1b1eb7d6709d369a780794a39a2ce445c4a88f28df1d955c1d813d2b +Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +Copying blob sha256:74018f7cfa8f2965fd86b13c38f71417bc846e071a5f5bb5ae569ccb5a6e7248 +Copying blob sha256:a0b0cfc480ce03c723a597904bcfbf28c71438c689e6d5097c2332835f67a40c +Copying blob sha256:97d21b95fb00ac3b08975ab6f8709f3a7e35a05d75e2f9a70fa95348279dac27 +Copying blob sha256:160c6e4d51bd92dbfe5b4c5d7096ab7c0a227c6ceb0c6fe8270d68e0ffc46825 +Copying blob sha256:d210d3818bb9d1be41c2e8d2a17a06bd964955a12d0966d7b2b31caaeac1731f +Copying blob sha256:6def3b0f029ac8b94bf690b5e3cda7604c3b9c825a81c998ce7eeb83cd1894b6 +Copying config sha256:9e6fc2e2ad58b4fa80a0638427150d97aab7bdde9553ef69a0e7310f383228d3 +Writing manifest to image destination +2025/04/10 14:20:16 info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96 +2025/04/10 14:20:17 info unpack layer: sha256:74018f7cfa8f2965fd86b13c38f71417bc846e071a5f5bb5ae569ccb5a6e7248 +2025/04/10 14:20:17 info unpack layer: sha256:a0b0cfc480ce03c723a597904bcfbf28c71438c689e6d5097c2332835f67a40c +2025/04/10 14:20:18 info unpack layer: sha256:97d21b95fb00ac3b08975ab6f8709f3a7e35a05d75e2f9a70fa95348279dac27 +2025/04/10 14:20:18 info unpack layer: sha256:160c6e4d51bd92dbfe5b4c5d7096ab7c0a227c6ceb0c6fe8270d68e0ffc46825 +2025/04/10 14:20:18 info unpack layer: sha256:489fdbaf1b1eb7d6709d369a780794a39a2ce445c4a88f28df1d955c1d813d2b +2025/04/10 14:20:18 info unpack layer: sha256:d210d3818bb9d1be41c2e8d2a17a06bd964955a12d0966d7b2b31caaeac1731f +2025/04/10 14:20:21 info unpack layer: sha256:6def3b0f029ac8b94bf690b5e3cda7604c3b9c825a81c998ce7eeb83cd1894b6 +INFO: Creating SIF file... +INFO: Build complete: model-infer.sif +Move the containers to containers folder in scratch +Finished! From 1b6ecb5d0ccbe02b45123f618f474dcb7918818b Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 14:32:59 +0200 Subject: [PATCH 5/7] Update PROJECT-STEPS.md with steps to complete the project and check deliverables. --- PROJECT-STEPS.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/PROJECT-STEPS.md b/PROJECT-STEPS.md index 9e3ce18..9f051c9 100644 --- a/PROJECT-STEPS.md +++ b/PROJECT-STEPS.md @@ -20,7 +20,7 @@ approach: - copy train.py to the working directory using COPY rule in Dockerfile - set the command to run train.py: CMD ["python","train.py"] -**step 2:** build the image based on the dockerfile +**step 2:** build the image based on the Dockerfile - build image using docker build - make sure to add an informative tag - point to appropriate Dockerfile using -f flag @@ -45,3 +45,78 @@ approach: ## Deliverable 3: Containerize serving of the machine learning model ## Deliverable 5: Run the Docker container serving the machine learning model +**step 1:** correct the order of the instructions in the Dockerfile.infer +- inspect the Dockerfile.infer and adjust the order and content based on the Dockerfile.train file +- add comments for each step to make it more readable and understandable +- add commands to build the image and run it + +**step 2:** build the image based on modified Dockerfile +`sudo docker build -f Dockerfile.infer -t model-infer:v1-python3.9-slim .` + +**step 3:** run container to serve the model +- get image id using `sudo docker images` +- run the image and set ports +`sudo docker run --detach -p 8080:8080 -v ./models/:/app/models 0ca32d91de36` +- access container via user port 8080: check URL to see if it worked (http://localhost:8080/) + +## Deliverable 6: Store the Docker images on your personal account on Docker Hub +Personal DockerHub profile: leenaputzeys + +**step 1:** login using terminal: +`sudo docker login`: redirected to webpage to authenticate + +**step 2:** modify image tags to make compatible with DockerHub repository naming conventions +`sudo docker tag model-infer:v1-python3.9-slim leenaputzeys/model-infer:v1-python3.9-slim` +`sudo docker tag model-train:v1.1-python3.11-slim leenaputzeys/model-train:v1.1-python3.11-slim` + +**step 3:** push images to DockerHub +`sudo docker push leenaputzeys/model-infer:v1-python3.9-slim` +`sudo docker push leenaputzeys/model-train:v1.1-python3.11-slim` + +validate online: [DockerHub Leena](https://hub.docker.com/repositories/leenaputzeys) + +## Deliverable 7: Provide the resulting Dockerfiles in GitHub +`git add *` +`git commit -m "Add modified and corrected Dockerfiles, and the step-by-step workflow of this project.` + + +## Deliverable 8: Build an Apptainer image on a HPC of your choice +**step 1:** Make sure your images are publicly available (see deliverable 6) + +**step 2:** Connect to VSC and start interactive session +- connect using https://login.hpc.ugent.be/ +- initiate an interactive shell session on the donphan cluster for 4 hours (1 node, 4 cores) + +**step 3:** pull docker images from DockerHub using apptainer +`apptainer build model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim` + +`apptainer build model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim` +Edit: realised that this is not necessary, i should directly build images from the job script. + +**step 4:** make a job script that builds the model-train apptainer image +--> see [script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh) + +**step 5:** run the job script on the VSC +`sbatch ML-job.sh` + +## Deliverable 9: Provide the logs of the slurm job in GitHub + +**step 1:** Download ML-job.sh and slurm log from VSC +`scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/ML-job.sh .` +`scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/slurm*.out` + +**Step 2:** Add files to github repository +`git add *` +`git commit -m "Add job script and slurm output log used to build apptainer containers on VSC"` +`git push` + +To view the files: +[VSC script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh) +[output log](https://github.com/leenput/project_docker_microcredential/blob/main/slurm-20163444.out) + +## Deliverable 10: Document the steps in a text document in GitHub +Update PROJECT-STEPS.md file with final steps and make available on github. +`git add *` +`git commit -m "Update PROJECT-STEPS.md with steps to complete the project and achieve deliverables."` +`git push` + From 1981ee0a3dbcec1e61c3e7150c566a230f87f3b3 Mon Sep 17 00:00:00 2001 From: Leena Putzeys Date: Thu, 10 Apr 2025 14:41:10 +0200 Subject: [PATCH 6/7] Ticked off deliverables in README.txt and improved formatting of the PROJECT-STEPS.md file --- PROJECT-STEPS.md | 17 +++++++++++------ README.md | 22 ++++++++++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/PROJECT-STEPS.md b/PROJECT-STEPS.md index 9f051c9..73e31b8 100644 --- a/PROJECT-STEPS.md +++ b/PROJECT-STEPS.md @@ -5,10 +5,9 @@ Project description: https://github.com/vib-tcp/project_docker_microcredential ## Deliverable 1: Clone this repository to your personal github account **step 1**: fork project **step 2**: clone the project on local machine -`git clone git@github.com:leenput/project_docker_microcredential.git` +`git clone git@github.com:leenput project_docker_microcredential.git` -## Deliverable 2: Containerize training the machine learning model -## Deliverable 4: Train and run the machine learning model using Docker +## Deliverable 2 & 4: Containerize training the machine learning model and run it using Docker **step 1:** complete file named Dockerfile.train @@ -67,6 +66,7 @@ Personal DockerHub profile: leenaputzeys **step 2:** modify image tags to make compatible with DockerHub repository naming conventions `sudo docker tag model-infer:v1-python3.9-slim leenaputzeys/model-infer:v1-python3.9-slim` + `sudo docker tag model-train:v1.1-python3.11-slim leenaputzeys/model-train:v1.1-python3.11-slim` **step 3:** push images to DockerHub @@ -91,6 +91,7 @@ validate online: [DockerHub Leena](https://hub.docker.com/repositories/leenaputz `apptainer build model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim` `apptainer build model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim` + Edit: realised that this is not necessary, i should directly build images from the job script. **step 4:** make a job script that builds the model-train apptainer image @@ -103,12 +104,16 @@ Edit: realised that this is not necessary, i should directly build images from t **step 1:** Download ML-job.sh and slurm log from VSC `scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/ML-job.sh .` + `scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/slurm*.out` **Step 2:** Add files to github repository -`git add *` -`git commit -m "Add job script and slurm output log used to build apptainer containers on VSC"` -`git push` +``` +git add * +git commit -m "Add job script and slurm output log used to build apptainer containers on VSC"` +git push` + +``` To view the files: [VSC script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh) diff --git a/README.md b/README.md index 8098afb..0d6b3ac 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,18 @@ In this project, you will train, run and serve a machine learning model using Do ## Deliverables -- [ ] Clone this repository to your personal github account -- [ ] Containerize training the machine learning model -- [ ] Containerize serving of the machine learning model -- [ ] Train and run the machine learning model using Docker -- [ ] Run the Docker container serving the machine learning model -- [ ] Store the Docker images on your personal account on Docker Hub -- [ ] Provide the resulting Dockerfiles in GitHub -- [ ] Build an Apptainer image on a HPC of your choice -- [ ] Provide the logs of the slurm job in GitHub -- [ ] Document the steps in a text document in GitHub +- [X] Clone this repository to your personal github account +- [X] Containerize training the machine learning model +- [X] Containerize serving of the machine learning model +- [X] Train and run the machine learning model using Docker +- [X] Run the Docker container serving the machine learning model +- [X] Store the Docker images on your personal account on Docker Hub: [images](https://hub.docker.com/repositories/leenaputzeys) +- [X] Provide the resulting Dockerfiles in GitHub + - [Dockerfile.train](https://github.com/leenput/project_docker_microcredential/blob/main/Dockerfile.train) + - [Dockerfile.infer](https://github.com/leenput/project_docker_microcredential/blob/main/Dockerfile.infer) +- [X] Build an Apptainer image on a HPC of your choice +- [X] Provide the logs of the slurm job in GitHub: [log](https://github.com/leenput/project_docker_microcredential/blob/main/slurm-20163444.out) +- [X] Document the steps in a text document in GitHub: [workflow](https://github.com/leenput/project_docker_microcredential/blob/main/PROJECT-STEPS.md) ## Proposed steps - containerize and run training the machine learning model From 906d87e3d69b5f3fd81d87942b066f40dd1f48e1 Mon Sep 17 00:00:00 2001 From: leenput Date: Thu, 10 Apr 2025 14:47:30 +0200 Subject: [PATCH 7/7] Update formatting of PROJECT-STEPS.md to improve readability --- PROJECT-STEPS.md | 69 ++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/PROJECT-STEPS.md b/PROJECT-STEPS.md index 73e31b8..507c796 100644 --- a/PROJECT-STEPS.md +++ b/PROJECT-STEPS.md @@ -3,8 +3,8 @@ Project description: https://github.com/vib-tcp/project_docker_microcredential ## Deliverable 1: Clone this repository to your personal github account -**step 1**: fork project -**step 2**: clone the project on local machine +**step 1**: fork project
+**step 2**: clone the project on local machine
`git clone git@github.com:leenput project_docker_microcredential.git` ## Deliverable 2 & 4: Containerize training the machine learning model and run it using Docker @@ -19,14 +19,14 @@ approach: - copy train.py to the working directory using COPY rule in Dockerfile - set the command to run train.py: CMD ["python","train.py"] -**step 2:** build the image based on the Dockerfile +**step 2:** build the image based on the Dockerfile
- build image using docker build - make sure to add an informative tag - point to appropriate Dockerfile using -f flag `sudo docker build -t model-train:v1.1-python3.11-slim -f Dockerfile.train .` -**step 3:** run the training of the model on your computer +**step 3:** run the training of the model on your computer
- run container from image using docker run - make sure to mount the correct volume to get the model generated in the container - retrieve the image id using docker images: @@ -34,25 +34,24 @@ approach: - run the container `sudo docker run -v ./models:/apps/models c2e5fad55e4d` -**step 4:** document the command as comment in the Dockerfile +**step 4:** document the command as comment in the Dockerfile
- added the commands as comments (#) in the Dockerfile -**step 5:** store the created Dockerfile in your cloned github repository -`git add *` -`git commit -m "Modified Dockerfile.train with correct workdir and volume mounting. Added model training container output in /model folder." +**step 5:** store the created Dockerfile in your cloned github repository
+`git add *`
+`git commit -m "Modified Dockerfile.train with correct workdir and volume mounting. Added model training container output in /model folder."` -## Deliverable 3: Containerize serving of the machine learning model -## Deliverable 5: Run the Docker container serving the machine learning model +## Deliverable 3 & 5: Containerize serving of the machine learning model and run the Docker container -**step 1:** correct the order of the instructions in the Dockerfile.infer +**step 1:** correct the order of the instructions in the Dockerfile.infer
- inspect the Dockerfile.infer and adjust the order and content based on the Dockerfile.train file - add comments for each step to make it more readable and understandable - add commands to build the image and run it -**step 2:** build the image based on modified Dockerfile +**step 2:** build the image based on modified Dockerfile
`sudo docker build -f Dockerfile.infer -t model-infer:v1-python3.9-slim .` -**step 3:** run container to serve the model +**step 3:** run container to serve the model
- get image id using `sudo docker images` - run the image and set ports `sudo docker run --detach -p 8080:8080 -v ./models/:/app/models 0ca32d91de36` @@ -61,57 +60,56 @@ approach: ## Deliverable 6: Store the Docker images on your personal account on Docker Hub Personal DockerHub profile: leenaputzeys -**step 1:** login using terminal: +**step 1:** login using terminal:
`sudo docker login`: redirected to webpage to authenticate -**step 2:** modify image tags to make compatible with DockerHub repository naming conventions +**step 2:** modify image tags to make compatible with DockerHub repository naming conventions
`sudo docker tag model-infer:v1-python3.9-slim leenaputzeys/model-infer:v1-python3.9-slim` `sudo docker tag model-train:v1.1-python3.11-slim leenaputzeys/model-train:v1.1-python3.11-slim` -**step 3:** push images to DockerHub -`sudo docker push leenaputzeys/model-infer:v1-python3.9-slim` +**step 3:** push images to DockerHub
+`sudo docker push leenaputzeys/model-infer:v1-python3.9-slim`
`sudo docker push leenaputzeys/model-train:v1.1-python3.11-slim` validate online: [DockerHub Leena](https://hub.docker.com/repositories/leenaputzeys) ## Deliverable 7: Provide the resulting Dockerfiles in GitHub -`git add *` +`git add *`
`git commit -m "Add modified and corrected Dockerfiles, and the step-by-step workflow of this project.` ## Deliverable 8: Build an Apptainer image on a HPC of your choice -**step 1:** Make sure your images are publicly available (see deliverable 6) +**step 1:** Make sure your images are publicly available (see deliverable 6)
-**step 2:** Connect to VSC and start interactive session +**step 2:** Connect to VSC and start interactive session
- connect using https://login.hpc.ugent.be/ - initiate an interactive shell session on the donphan cluster for 4 hours (1 node, 4 cores) -**step 3:** pull docker images from DockerHub using apptainer +**step 3:** pull docker images from DockerHub using apptainer
`apptainer build model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim` `apptainer build model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim` Edit: realised that this is not necessary, i should directly build images from the job script. -**step 4:** make a job script that builds the model-train apptainer image +**step 4:** make a job script that builds the model-train apptainer image
--> see [script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh) -**step 5:** run the job script on the VSC +**step 5:** run the job script on the VSC
`sbatch ML-job.sh` ## Deliverable 9: Provide the logs of the slurm job in GitHub -**step 1:** Download ML-job.sh and slurm log from VSC -`scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/ML-job.sh .` - +**step 1:** Download ML-job.sh and slurm log from VSC
+`scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/ML-job.sh .`
`scp vsc49035@login.hpc.ugent.be:/user/gent/490/vsc49035/jobs/slurm*.out` -**Step 2:** Add files to github repository +**Step 2:** Add files to github repository
``` git add * -git commit -m "Add job script and slurm output log used to build apptainer containers on VSC"` -git push` +git commit -m "Add job script and slurm output log used to build apptainer containers on VSC" +git push ``` @@ -119,9 +117,12 @@ To view the files: [VSC script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh) [output log](https://github.com/leenput/project_docker_microcredential/blob/main/slurm-20163444.out) -## Deliverable 10: Document the steps in a text document in GitHub -Update PROJECT-STEPS.md file with final steps and make available on github. -`git add *` -`git commit -m "Update PROJECT-STEPS.md with steps to complete the project and achieve deliverables."` -`git push` +## Deliverable 10: Document the steps in a text document in GitHub
+Update PROJECT-STEPS.md file with final steps and make available on GitHub.
+ +``` +git add * +git commit -m "Update PROJECT-STEPS.md with steps to complete the project and achieve deliverables." +git push +```