From 3af39eb913de597fe52fc4a2c9c94a913ea1bb14 Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Tue, 5 May 2026 13:07:30 +0300 Subject: [PATCH 1/7] :adhesive_bandage: Added iterate in the dockerfile --- pipelines/Dockerfile.terratorch | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/Dockerfile.terratorch b/pipelines/Dockerfile.terratorch index ba11207..1e1c6ad 100644 --- a/pipelines/Dockerfile.terratorch +++ b/pipelines/Dockerfile.terratorch @@ -56,6 +56,7 @@ WORKDIR /terratorch RUN pip install --upgrade pip && \ pip install --timeout=7200 \ git+https://github.com/terrastackai/terratorch.git@v1.2.3pre1 \ + git+https://github.com/IBM/terratorch-iterate.git \ 'mlflow>=1.0.0' \ 'diffusers<=0.34.0' \ ipython \ From 4939335ace51646a56d61412e048a49fd49f3b16 Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Tue, 5 May 2026 13:44:40 +0300 Subject: [PATCH 2/7] :wrench: update k8s hpo template --- .../fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml index 63088bb..267c0f4 100644 --- a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml +++ b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml @@ -49,14 +49,7 @@ spec: initContainers: - name: copy-config image: busybox - command: - - sh - - -c - - | - set -e - cp /config/config-train.yaml /app/config/ - mkdir -p /data/output - chmod -R g+rwX /data/output + command: ['sh', 'c', 'cp /config/config-train.yaml /app/config/'] volumeMounts: - name: config-volume mountPath: /config @@ -90,7 +83,7 @@ spec: mountPath: /ftuning - name: dshm mountPath: /dev/shm - command: ["/bin/sh"] + command: ["/bin/bash"] args: ["-c", "/ftuning/ftuning.sh"] env: - name: HF_HOME From 445691d8f08f98326e6453dd2b6b6d4f167694fb Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Tue, 5 May 2026 15:13:16 +0300 Subject: [PATCH 3/7] update bash --- gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml index 267c0f4..8f8f254 100644 --- a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml +++ b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml @@ -128,7 +128,7 @@ metadata: app: ${FTUNE_NAME} data: ftuning.sh: | - #!/bin/sh + #!/bin/bash set -e # Function to handle errors From c95ba16d7b73e869211e85d59cac37205e9fb71c Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Wed, 6 May 2026 10:08:55 +0300 Subject: [PATCH 4/7] fix init-error bug --- gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml index 8f8f254..ed22184 100644 --- a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml +++ b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml @@ -49,7 +49,7 @@ spec: initContainers: - name: copy-config image: busybox - command: ['sh', 'c', 'cp /config/config-train.yaml /app/config/'] + command: ['sh', '-c', 'cp /config/config-train.yaml /app/config/'] volumeMounts: - name: config-volume mountPath: /config From 462cf72a8dd89428d723db6a74050a8204c99cec Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Wed, 6 May 2026 11:23:00 +0300 Subject: [PATCH 5/7] update requirements --- pipelines/Dockerfile.terratorch | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelines/Dockerfile.terratorch b/pipelines/Dockerfile.terratorch index 1e1c6ad..f03f9e9 100644 --- a/pipelines/Dockerfile.terratorch +++ b/pipelines/Dockerfile.terratorch @@ -56,7 +56,7 @@ WORKDIR /terratorch RUN pip install --upgrade pip && \ pip install --timeout=7200 \ git+https://github.com/terrastackai/terratorch.git@v1.2.3pre1 \ - git+https://github.com/IBM/terratorch-iterate.git \ + terratorch-iterate \ 'mlflow>=1.0.0' \ 'diffusers<=0.34.0' \ ipython \ @@ -69,7 +69,8 @@ RUN pip install --upgrade pip && \ tenacity \ pyyaml \ opentelemetry-distro \ - opentelemetry-exporter-otlp + opentelemetry-exporter-otlp \ + ray # ################################################################################ # # Stage 3: Final Production Image From 235444489bbb04a72fd1a43b652def8562dc3edd Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Wed, 6 May 2026 12:11:36 +0300 Subject: [PATCH 6/7] Update the iterate logs folder creation --- pipelines/Dockerfile.terratorch | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelines/Dockerfile.terratorch b/pipelines/Dockerfile.terratorch index f03f9e9..81db7d0 100644 --- a/pipelines/Dockerfile.terratorch +++ b/pipelines/Dockerfile.terratorch @@ -92,6 +92,8 @@ RUN mkdir -p \ /.local \ /.jupyter \ /geotunes/tune-tasks \ + # Required by terratorch-iterate + /geotunes/tune-tasks/job_logs \ /data/output \ /opt/app-root/src \ /opt/app-root/src/lightning_logs \ From aaecd81e5975c45a129524cdaefe50158b7ef43e Mon Sep 17 00:00:00 2001 From: WanjiruCate Date: Thu, 7 May 2026 11:06:41 +0300 Subject: [PATCH 7/7] fix bug --- gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml index ed22184..8f8f254 100644 --- a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml +++ b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml @@ -49,7 +49,7 @@ spec: initContainers: - name: copy-config image: busybox - command: ['sh', '-c', 'cp /config/config-train.yaml /app/config/'] + command: ['sh', 'c', 'cp /config/config-train.yaml /app/config/'] volumeMounts: - name: config-volume mountPath: /config