vib-tcp · leenput · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/Dockerfile.infer b/Dockerfile.infer
@@ -1,7 +1,24 @@
+# Define base image
+FROM python:3.9-slim
+
+# Define working directory in container
 WORKDIR /app
+
+# Copy requirements and install the python dependencies
 COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the server script in the container
 COPY server.py .
-FROM python:3.9-slim
+
+# Expose the container port to 8080
+EXPOSE 8080 
+
+# Run the server.py script 
 CMD ["python", "server.py"]
-RUN pip install --no-cache-dir -r requirements.txt
-EXPOSE 8080
+
+# Commands to build and run the container
+#	1. Build the image: sudo docker build -t model-infer:v1-python3.9-slim -f Dockerfile.infer
+#	2. Check if build was successfull and find image id: sudo docker images 
+#	3. Run the container in detached mode and set port: sudo docker run --detach -p 5000:8080 -v ./models:/app/models model-infer:v1-python3.9-slim
+#	4. Check URL to see if it worked: http://localhost:5000/
diff --git a/Dockerfile.train b/Dockerfile.train
@@ -1,12 +1,33 @@
-FROM <base imagae>
+# Official stable Python image selected from DockerHub based on python-based requirements
+# Use Python image as base image
+FROM python:3.11-slim
 
-# TODO: Set a working directory
+# Add label
+LABEL maintainer="Leena Putzeys"
 
-# TODO: Copy the requirements.txt file to the working directory
+# Set a working directory
+# Modified from /data to /app to ensure compatability with train.py
+WORKDIR /app
 
-# TODO: Install the Python dependencies
+# Copy the requirements.txt file to the working directory of the container
+COPY requirements.txt .
 
-# TODO: Copy the training script (train.py) to the working directory
+# Install the Python dependencies
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
 
-# TODO: Run the training script that generates the model
-CMD [...]
+# Copy the training script (train.py) to the working directory
+# Error after running: outputdir in script needs to be compatible.
+# Added mkdir command
+COPY train.py .
+RUN mkdir -p /models
+
+# Run the training script that generates the model
+CMD ["python","train.py"]
+
+# Steps to run the training of model on own laptop
+#	1. open docker desktop
+#	2. build image: sudo docker build -t model-train:v1.1-python3.11-slim -f Dockerfile.train .
+#	3. check if image was build: sudo docker images
+#	4. make local folder to mount volume to get output files from container 
+#	5. run the container: sudo docker run -v ./models:/app/models model-train:v1.1-python3.11-slim
diff --git a/ML-job.sh b/ML-job.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+#SBATCH --job-name=job_submission
+#SBATCH --partition=donphan
+#SBATCH --mem=8G
+#SBATCH --time=00:30:00
+
+echo "Let's build containers on the VSC"
+
+apptainer build --fakeroot model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim
+apptainer build --fakeroot model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim
+
+echo "Move the containers to containers folder in scratch"
+mv model-train.sif $VSC_SCRATCH/containers/.
+mv model-infer.sif $VSC_SCRATCH/containers/.
+
+echo "Finished!"
+
diff --git a/PROJECT-STEPS.md b/PROJECT-STEPS.md
@@ -0,0 +1,128 @@
+# Outline of steps used to carry out microcredential project 2
+
+Project description: https://github.com/vib-tcp/project_docker_microcredential
+
+## Deliverable 1: Clone this repository to your personal github account
+**step 1**: fork project <br>
+**step 2**: clone the project on local machine  <br>
+`git clone [email protected]:leenput project_docker_microcredential.git`
+
+## Deliverable 2 & 4: Containerize training the machine learning model and run it using Docker
+
+**step 1:** complete file named Dockerfile.train
+
+approach:
+- inspect Dockerfile, train.py script and the requirements.txt
+- select an appropriate base image from DockerHub, which judging from the files should be python-based: selected python image 3.11-slim
+- set appropriate working directory in the container. Make sure that it matches the output directory specified in the python script 
+- copy requirements.txt and install dependencies by using COPY rule in Dockerifle, followed by RUN pip install 
+- copy train.py to the working directory using COPY rule in Dockerfile
+- set the command to run train.py: CMD ["python","train.py"]
+
+**step 2:** build the image based on the Dockerfile <br>
+- build image using docker build
+- make sure to add an informative tag
+- point to appropriate Dockerfile using -f flag
+
+`sudo docker build -t model-train:v1.1-python3.11-slim -f Dockerfile.train .`
+
+**step 3:** run the training of the model on your computer <br>
+- run container from image using docker run
+- make sure to mount the correct volume to get the model generated in the container
+- retrieve the image id using docker images:
+`sudo docker images`
+- run the container
+`sudo docker run -v ./models:/apps/models c2e5fad55e4d`
+
+**step 4:** document the command as comment in the Dockerfile <br>
+- added the commands as comments (#) in the Dockerfile 
+
+**step 5:** store the created Dockerfile in your cloned github repository <br>
+`git add *` <br>
+`git commit -m "Modified Dockerfile.train with correct workdir and volume mounting. Added model training container output in /model folder."`
+
+## Deliverable 3 & 5: Containerize serving of the machine learning model and run the Docker container
+
+**step 1:** correct the order of the instructions in the Dockerfile.infer  <br>
+- inspect the Dockerfile.infer and adjust the order and content based on the Dockerfile.train file
+- add comments for each step to make it more readable and understandable
+- add commands to build the image and run it 
+
+**step 2:** build the image based on modified Dockerfile  <br>
+`sudo docker build -f Dockerfile.infer -t model-infer:v1-python3.9-slim .`
+
+**step 3:** run container to serve the model <br>
+- get image id using `sudo docker images`
+- run the image and set ports
+`sudo docker run --detach -p 8080:8080 -v ./models/:/app/models 0ca32d91de36`
+- access container via user port 8080: check URL to see if it worked (http://localhost:8080/)
+
+## Deliverable 6: Store the Docker images on your personal account on Docker Hub
+Personal DockerHub profile: leenaputzeys
+
+**step 1:** login using terminal: <br>
+`sudo docker login`: redirected to webpage to authenticate
+
+**step 2:** modify image tags to make compatible with DockerHub repository naming conventions <br>
+`sudo docker tag model-infer:v1-python3.9-slim leenaputzeys/model-infer:v1-python3.9-slim`
+
+`sudo docker tag model-train:v1.1-python3.11-slim leenaputzeys/model-train:v1.1-python3.11-slim`
+
+**step 3:** push images to DockerHub <br>
+`sudo docker push leenaputzeys/model-infer:v1-python3.9-slim` <br>
+`sudo docker push leenaputzeys/model-train:v1.1-python3.11-slim`
+
+validate online: [DockerHub Leena](https://hub.docker.com/repositories/leenaputzeys)
+
+## Deliverable 7:  Provide the resulting Dockerfiles in GitHub
+`git add *` <br>
+`git commit -m "Add modified and corrected Dockerfiles, and the step-by-step workflow of this project.`
+
+
+## Deliverable 8: Build an Apptainer image on a HPC of your choice
+**step 1:** Make sure your images are publicly available (see deliverable 6) <br>
+
+**step 2:** Connect to VSC and start interactive session <br>
+- connect using https://login.hpc.ugent.be/
+- initiate an interactive shell session on the donphan cluster for 4 hours (1 node, 4 cores)
+
+**step 3:** pull docker images from DockerHub using apptainer  <br>
+`apptainer build model-infer.sif docker://leenaputzeys/model-infer:v1-python3.9-slim`
+
+`apptainer build model-train.sif docker://leenaputzeys/model-train:v1.1-python3.11-slim`
+
+Edit: realised that this is not necessary, i should directly build images from the job script. 
+
+**step 4:** make a job script that builds the model-train apptainer image <br>
+--> see [script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh)
+
+**step 5:** run the job script on the VSC <br>
+`sbatch ML-job.sh`
+
+## Deliverable 9: Provide the logs of the slurm job in GitHub
+
+**step 1:** Download ML-job.sh and slurm log from VSC <br>
+`scp [email protected]:/user/gent/490/vsc49035/jobs/ML-job.sh .` <br>
+`scp [email protected]:/user/gent/490/vsc49035/jobs/slurm*.out`
+
+**Step 2:** Add files to github repository  <br>
+```
+git add *
+git commit -m "Add job script and slurm output log used to build apptainer containers on VSC"
+git push
+
+```
+
+To view the files:
+[VSC script](https://github.com/leenput/project_docker_microcredential/blob/main/ML-job.sh)
+[output log](https://github.com/leenput/project_docker_microcredential/blob/main/slurm-20163444.out)
+
+## Deliverable 10: Document the steps in a text document in GitHub <br>
+Update PROJECT-STEPS.md file with final steps and make available on GitHub. <br> 
+
+```
+git add *
+git commit -m "Update PROJECT-STEPS.md with steps to complete the project and achieve deliverables." 
+git push
+```
+
diff --git a/README.md b/README.md
@@ -5,16 +5,18 @@ In this project, you will train, run and serve a machine learning model using Do
 
 ## Deliverables
 
-- [ ] Clone this repository to your personal github account
-- [ ] Containerize training the machine learning model
-- [ ] Containerize serving of the machine learning model
-- [ ] Train and run the machine learning model using Docker
-- [ ] Run the Docker container serving the machine learning model
-- [ ] Store the Docker images on your personal account on Docker Hub
-- [ ] Provide the resulting Dockerfiles in GitHub
-- [ ] Build an Apptainer image on a HPC of your choice
-- [ ] Provide the logs of the slurm job in GitHub
-- [ ] Document the steps in a text document in GitHub
+- [X] Clone this repository to your personal github account
+- [X] Containerize training the machine learning model
+- [X] Containerize serving of the machine learning model
+- [X] Train and run the machine learning model using Docker
+- [X] Run the Docker container serving the machine learning model
+- [X] Store the Docker images on your personal account on    Docker Hub: [images](https://hub.docker.com/repositories/leenaputzeys)
+- [X] Provide the resulting Dockerfiles in GitHub
+        - [Dockerfile.train](https://github.com/leenput/project_docker_microcredential/blob/main/Dockerfile.train)
+        - [Dockerfile.infer](https://github.com/leenput/project_docker_microcredential/blob/main/Dockerfile.infer)
+- [X] Build an Apptainer image on a HPC of your choice
+- [X] Provide the logs of the slurm job in GitHub: [log](https://github.com/leenput/project_docker_microcredential/blob/main/slurm-20163444.out)
+- [X] Document the steps in a text document in GitHub: [workflow](https://github.com/leenput/project_docker_microcredential/blob/main/PROJECT-STEPS.md)
 
 ## Proposed steps - containerize and run training the machine learning model
 

diff --git a/models/iris_model.pkl b/models/iris_model.pkl
diff --git a/slurm-20163444.out b/slurm-20163444.out
@@ -0,0 +1,49 @@
+Let's build containers on the VSC
+INFO:    Starting build...
+Copying blob sha256:0ae3bf74519da2c8ccd7ff5d80c3575b217caa1b17701b98f9c2a08cd7f3da31
+Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96
+Copying blob sha256:2a47a8c4fd5c358f98fb193471dc5d7a1e75e33d480fb7a25a7130840a8769d9
+Copying blob sha256:9c538fc354918cfe13231798fe17d7c4b463cda38500187c0ecedcc061d6129e
+Copying blob sha256:47bbb0afa7fe5695aca89f358dc3e73f46655977e201f553a558b4590b951dac
+Copying blob sha256:bfaefce2698fc9aa292a77e6a7a2dc5c5b96e274cbc42d9f77fbc18bd65b2751
+Copying blob sha256:a155dd3a05b256fa2691a0c1733a51b4cdfc75a6bc0db4ca04abedbb16391dd3
+Copying blob sha256:44bfad6b88982ca2639da655987e42d675cdd14ba83cf7cd732a3544965b7b1b
+Copying blob sha256:d18aa744867de9dee97868ff9aef53ec99e31aef166df44da964613d3d887501
+Copying blob sha256:cdc334c564348d4c408800f9fcd52e370830dc26a289a685d2f3b6146b791c3a
+Copying config sha256:c7cb4ab762536a0cc70cd10455eb72323ef8624fb42e9e964b6c313bcdbcf854
+Writing manifest to image destination
+2025/04/10 14:19:05  info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96
+2025/04/10 14:19:06  info unpack layer: sha256:2a47a8c4fd5c358f98fb193471dc5d7a1e75e33d480fb7a25a7130840a8769d9
+2025/04/10 14:19:07  info unpack layer: sha256:9c538fc354918cfe13231798fe17d7c4b463cda38500187c0ecedcc061d6129e
+2025/04/10 14:19:07  info unpack layer: sha256:47bbb0afa7fe5695aca89f358dc3e73f46655977e201f553a558b4590b951dac
+2025/04/10 14:19:07  info unpack layer: sha256:bfaefce2698fc9aa292a77e6a7a2dc5c5b96e274cbc42d9f77fbc18bd65b2751
+2025/04/10 14:19:07  info unpack layer: sha256:0ae3bf74519da2c8ccd7ff5d80c3575b217caa1b17701b98f9c2a08cd7f3da31
+2025/04/10 14:19:07  info unpack layer: sha256:a155dd3a05b256fa2691a0c1733a51b4cdfc75a6bc0db4ca04abedbb16391dd3
+2025/04/10 14:19:08  info unpack layer: sha256:44bfad6b88982ca2639da655987e42d675cdd14ba83cf7cd732a3544965b7b1b
+2025/04/10 14:19:12  info unpack layer: sha256:d18aa744867de9dee97868ff9aef53ec99e31aef166df44da964613d3d887501
+2025/04/10 14:19:12  info unpack layer: sha256:cdc334c564348d4c408800f9fcd52e370830dc26a289a685d2f3b6146b791c3a
+INFO:    Creating SIF file...
+INFO:    Build complete: model-train.sif
+INFO:    Starting build...
+Copying blob sha256:489fdbaf1b1eb7d6709d369a780794a39a2ce445c4a88f28df1d955c1d813d2b
+Copying blob sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96
+Copying blob sha256:74018f7cfa8f2965fd86b13c38f71417bc846e071a5f5bb5ae569ccb5a6e7248
+Copying blob sha256:a0b0cfc480ce03c723a597904bcfbf28c71438c689e6d5097c2332835f67a40c
+Copying blob sha256:97d21b95fb00ac3b08975ab6f8709f3a7e35a05d75e2f9a70fa95348279dac27
+Copying blob sha256:160c6e4d51bd92dbfe5b4c5d7096ab7c0a227c6ceb0c6fe8270d68e0ffc46825
+Copying blob sha256:d210d3818bb9d1be41c2e8d2a17a06bd964955a12d0966d7b2b31caaeac1731f
+Copying blob sha256:6def3b0f029ac8b94bf690b5e3cda7604c3b9c825a81c998ce7eeb83cd1894b6
+Copying config sha256:9e6fc2e2ad58b4fa80a0638427150d97aab7bdde9553ef69a0e7310f383228d3
+Writing manifest to image destination
+2025/04/10 14:20:16  info unpack layer: sha256:8a628cdd7ccc83e90e5a95888fcb0ec24b991141176c515ad101f12d6433eb96
+2025/04/10 14:20:17  info unpack layer: sha256:74018f7cfa8f2965fd86b13c38f71417bc846e071a5f5bb5ae569ccb5a6e7248
+2025/04/10 14:20:17  info unpack layer: sha256:a0b0cfc480ce03c723a597904bcfbf28c71438c689e6d5097c2332835f67a40c
+2025/04/10 14:20:18  info unpack layer: sha256:97d21b95fb00ac3b08975ab6f8709f3a7e35a05d75e2f9a70fa95348279dac27
+2025/04/10 14:20:18  info unpack layer: sha256:160c6e4d51bd92dbfe5b4c5d7096ab7c0a227c6ceb0c6fe8270d68e0ffc46825
+2025/04/10 14:20:18  info unpack layer: sha256:489fdbaf1b1eb7d6709d369a780794a39a2ce445c4a88f28df1d955c1d813d2b
+2025/04/10 14:20:18  info unpack layer: sha256:d210d3818bb9d1be41c2e8d2a17a06bd964955a12d0966d7b2b31caaeac1731f
+2025/04/10 14:20:21  info unpack layer: sha256:6def3b0f029ac8b94bf690b5e3cda7604c3b9c825a81c998ce7eeb83cd1894b6
+INFO:    Creating SIF file...
+INFO:    Build complete: model-infer.sif
+Move the containers to containers folder in scratch
+Finished!