diff --git a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml index 63088bb..8f8f254 100644 --- a/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml +++ b/gfmstudio/fine_tuning/deployment/k8-hpo-tuning-jobs.tpl.yml @@ -49,14 +49,7 @@ spec: initContainers: - name: copy-config image: busybox - command: - - sh - - -c - - | - set -e - cp /config/config-train.yaml /app/config/ - mkdir -p /data/output - chmod -R g+rwX /data/output + command: ['sh', 'c', 'cp /config/config-train.yaml /app/config/'] volumeMounts: - name: config-volume mountPath: /config @@ -90,7 +83,7 @@ spec: mountPath: /ftuning - name: dshm mountPath: /dev/shm - command: ["/bin/sh"] + command: ["/bin/bash"] args: ["-c", "/ftuning/ftuning.sh"] env: - name: HF_HOME @@ -135,7 +128,7 @@ metadata: app: ${FTUNE_NAME} data: ftuning.sh: | - #!/bin/sh + #!/bin/bash set -e # Function to handle errors diff --git a/pipelines/Dockerfile.terratorch b/pipelines/Dockerfile.terratorch index ba11207..81db7d0 100644 --- a/pipelines/Dockerfile.terratorch +++ b/pipelines/Dockerfile.terratorch @@ -56,6 +56,7 @@ WORKDIR /terratorch RUN pip install --upgrade pip && \ pip install --timeout=7200 \ git+https://github.com/terrastackai/terratorch.git@v1.2.3pre1 \ + terratorch-iterate \ 'mlflow>=1.0.0' \ 'diffusers<=0.34.0' \ ipython \ @@ -68,7 +69,8 @@ RUN pip install --upgrade pip && \ tenacity \ pyyaml \ opentelemetry-distro \ - opentelemetry-exporter-otlp + opentelemetry-exporter-otlp \ + ray # ################################################################################ # # Stage 3: Final Production Image @@ -90,6 +92,8 @@ RUN mkdir -p \ /.local \ /.jupyter \ /geotunes/tune-tasks \ + # Required by terratorch-iterate + /geotunes/tune-tasks/job_logs \ /data/output \ /opt/app-root/src \ /opt/app-root/src/lightning_logs \