From fa347b6cd232ac9ae9bf56615b530ae38fd9820a Mon Sep 17 00:00:00 2001 From: vsoch Date: Tue, 28 Apr 2026 11:18:02 -0700 Subject: [PATCH 1/4] feat: untangle specific cluster build needs from ci here Signed-off-by: vsoch --- .github/workflows/build-deploy.yaml | 60 ++++++++++++++++++++++++ Dockerfile | 37 +-------------- Dockerfile.d/Dockerfile.base | 36 ++++++++++++++ docker-compose.yaml | 4 +- hack/create-cluster-lima.sh | 3 ++ hack/test-smoke.sh | 9 ++++ service/usernetes-start-control-plane.sh | 20 ++++---- service/usernetes-start-worker.sh | 21 +++++---- 8 files changed, 134 insertions(+), 56 deletions(-) create mode 100644 .github/workflows/build-deploy.yaml create mode 100644 Dockerfile.d/Dockerfile.base diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml new file mode 100644 index 00000000..10870354 --- /dev/null +++ b/.github/workflows/build-deploy.yaml @@ -0,0 +1,60 @@ +name: Docker Build and Deploy + +on: + push: + branches: + - develop + pull_request: {} + +env: + REGISTRY: ghcr.io + IMAGE_NAME: converged-computing/usernetes + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + file: Dockerfile.d/Dockerfile.base + tags: | + # Set node-base as the primary tag for the main branch + type=raw,value=node-base,enable=${{ github.ref == 'refs/heads/main' }} + # Add SHA tag for traceability + type=sha,format=short + # Tag PRs with the PR number + type=ref,event=pr + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + file: Dockerfile.d/Dockerfile.base + context: . + # Only push if it's NOT a pull request + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + # Use GitHub Actions cache to speed up builds + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile index a864e8bc..5f25c810 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,40 +1,7 @@ -ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2 -ARG CNI_PLUGINS_VERSION=v1.7.1 -ARG HELM_VERSION=v3.17.3 -ARG FLANNEL_VERSION=v0.26.7 +ARG BASE_IMAGE=ghcr.io/converged-computing/usernetes:node-base +# Edit this image to add / adopt for your environment FROM ${BASE_IMAGE} -COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d -ARG CNI_PLUGINS_VERSION -ARG HELM_VERSION -ARG FLANNEL_VERSION # This are private on our cluster and need to be copied to here COPY cspca.llnl.gov.cer.pem /usr/local/share/ca-certificates/ COPY cspca.cer.pem /usr/local/share/ca-certificates/ RUN update-ca-certificates -RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \ - fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \ - curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \ - mkdir -p /opt/cni/bin && \ - tar xzf "${fname}" -C /opt/cni/bin && \ - rm -f "${fname}" && \ - fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \ - curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \ - tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \ - rm -f "${fname}" && \ - fname="flannel.tgz" && \ - curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \ - tar xzf "${fname}" -C / && \ - rm -f "${fname}" -# gettext-base: for `envsubst` -# moreutils: for `sponge` -# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm) -RUN apt-get update && apt-get install -y --no-install-recommends \ - gettext-base \ - moreutils \ - socat -ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules -ADD Dockerfile.d/u7s-entrypoint.sh / -ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"] diff --git a/Dockerfile.d/Dockerfile.base b/Dockerfile.d/Dockerfile.base new file mode 100644 index 00000000..b411a36e --- /dev/null +++ b/Dockerfile.d/Dockerfile.base @@ -0,0 +1,36 @@ +ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2 +ARG CNI_PLUGINS_VERSION=v1.7.1 +ARG HELM_VERSION=v3.17.3 +ARG FLANNEL_VERSION=v0.26.7 +FROM ${BASE_IMAGE} +COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d +ARG CNI_PLUGINS_VERSION +ARG HELM_VERSION +ARG FLANNEL_VERSION +RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \ + fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \ + curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \ + mkdir -p /opt/cni/bin && \ + tar xzf "${fname}" -C /opt/cni/bin && \ + rm -f "${fname}" && \ + fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \ + curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \ + tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \ + rm -f "${fname}" && \ + fname="flannel.tgz" && \ + curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \ + tar xzf "${fname}" -C / && \ + rm -f "${fname}" +# gettext-base: for `envsubst` +# moreutils: for `sponge` +# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm) +RUN apt-get update && apt-get install -y --no-install-recommends \ + gettext-base \ + moreutils \ + socat ipset wget +ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules +ADD Dockerfile.d/u7s-entrypoint.sh / +ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"] diff --git a/docker-compose.yaml b/docker-compose.yaml index 1df41123..a4ad40a8 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,7 +3,9 @@ --- services: node: - image: usernetes_node + build: + context: . + dockerfile: Dockerfile.d/Dockerfile.base hostname: ${NODE_NAME} privileged: true restart: always diff --git a/hack/create-cluster-lima.sh b/hack/create-cluster-lima.sh index ba3d230c..0fea2a5d 100755 --- a/hack/create-cluster-lima.sh +++ b/hack/create-cluster-lima.sh @@ -23,6 +23,9 @@ fi for host in host0 host1; do # Set --plain to minimize Limaism ${LIMACTL} start --plain --network lima:user-v2 --name="${host}" ${LIMACTL_CREATE_ARGS} "${LIMA_TEMPLATE}" + echo "LISTING ${host}" + ${LIMACTL} shell "${host}" ls / + ${LIMACTL} shell "${host}" ls /home ${LIMACTL} copy -r "$(pwd)" "${host}:${guest_home}/usernetes" ${LIMACTL} shell "${host}" sudo CONTAINER_ENGINE="${CONTAINER_ENGINE}" "${guest_home}/usernetes/init-host/init-host.root.sh" # Terminate the current session so that the cgroup delegation takes an effect. This command exits with status 255 as SSH terminates. diff --git a/hack/test-smoke.sh b/hack/test-smoke.sh index 2a8680cb..8bf13bf7 100755 --- a/hack/test-smoke.sh +++ b/hack/test-smoke.sh @@ -54,6 +54,15 @@ EOF INFO "Waiting for 3 replicas to be ready" kubectl rollout status --timeout=5m statefulset + INFO "GET PODS" + kubectl get pods + INFO "DESCRIBE PODS" + kubectl describe pods + for name in $(kubectl get pods -o json | jq -r .items[].metadata.name) + do + kubectl logs $name + done + INFO "Connecting to dnstest-{0,1,2}.dnstest.default.svc.cluster.local" kubectl run -i --rm --image=alpine --restart=Never dnstest-shell -- sh -exc 'for f in $(seq 0 2); do wget -O- http://dnstest-${f}.dnstest.default.svc.cluster.local; done' diff --git a/service/usernetes-start-control-plane.sh b/service/usernetes-start-control-plane.sh index 2be076d7..3eb884d5 100755 --- a/service/usernetes-start-control-plane.sh +++ b/service/usernetes-start-control-plane.sh @@ -10,6 +10,16 @@ USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 # We will copy join command here shared_join_command_dir="/usr/workspace/usernetes" +# Logging functions for consistency (like Akihiro!) +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" +} + +error_exit() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 + exit 1 +} + # The user needs to run the setup script USERNAME=$(whoami) @@ -37,16 +47,6 @@ which podman-compose # We don't want to use /var because that is a memory based fs export TMPDIR="/tmp/${USERNAME}" -# Logging functions for consistency (like Akihiro!) -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" -} - -error_exit() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 - exit 1 -} - install_kubectl() { if ! command -v kubectl > /dev/null; then log "Installing kubectl..." diff --git a/service/usernetes-start-worker.sh b/service/usernetes-start-worker.sh index 709585d4..276984d0 100755 --- a/service/usernetes-start-worker.sh +++ b/service/usernetes-start-worker.sh @@ -7,6 +7,17 @@ set -euo pipefail USERNETES_CONTAINER_TECH=${1:-"podman"} USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 +# Logging functions for consistency (like Akihiro!) +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" +} + +error_exit() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 + exit 1 +} + + # The join command needs to be here shared_join_command_dir="/usr/workspace/usernetes" if [ ! -f "${shared_join_command_dir}/join-command" ] @@ -38,16 +49,6 @@ log " Updated PATH: ${PATH}" # We don't want to use /var because that is a memory based fs export TMPDIR="/tmp/${USERNAME}" -# Logging functions for consistency (like Akihiro!) -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" -} - -error_exit() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 - exit 1 -} - install_kubectl() { if ! command -v kubectl > /dev/null; then log "Installing kubectl..." From 65a875a3770258016d3f2be4bd70e4de088a998f Mon Sep 17 00:00:00 2001 From: Vanessa Sochat <814322+vsoch@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:43:37 -0700 Subject: [PATCH 2/4] Change guest home directory to '/home/runner.guest' --- .github/workflows/main.yaml | 35 ++++++--------------- .github/workflows/reusable-single-node.yaml | 6 ---- Makefile | 6 ++-- hack/create-cluster-lima.sh | 5 +-- hack/test-smoke.sh | 24 +++++++++++++- 5 files changed, 40 insertions(+), 36 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 7efb874d..6075da33 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -2,15 +2,16 @@ name: Main on: [push, pull_request] jobs: - single-node: - name: "Single node" - strategy: - fail-fast: false - matrix: - container_engine: [docker, nerdctl, podman] - uses: ./.github/workflows/reusable-single-node.yaml - with: - container_engine: ${{ matrix.container_engine }} + # We would never use usernetes on a single node + #single-node: + # name: "Single node" + # strategy: + # fail-fast: false + # matrix: + # container_engine: [docker, nerdctl, podman] + # uses: ./.github/workflows/reusable-single-node.yaml + # with: + # container_engine: ${{ matrix.container_engine }} multi-node: name: "Multi node" @@ -30,19 +31,3 @@ jobs: with: lima_template: ${{ matrix.lima_template }} container_engine: ${{ matrix.container_engine }} - - # TODO: this test should create multiple instances of Usernetes on each of the hosts - multi-node-custom-ports: - name: "Multi node with custom service ports" - uses: ./.github/workflows/reusable-multi-node.yaml - with: - lima_template: "template://ubuntu-24.04" - container_engine: "docker" - # Defaults to 6443 - kube_apiserver_port: "8080" - # Defaults to 8472 - flannel_port: "9072" - # Defaults to 10250 - kubelet_port: "20250" - # Defaults to 2379 - etcd_port: "9090" diff --git a/.github/workflows/reusable-single-node.yaml b/.github/workflows/reusable-single-node.yaml index 28b70013..cd0a7f7d 100644 --- a/.github/workflows/reusable-single-node.yaml +++ b/.github/workflows/reusable-single-node.yaml @@ -83,9 +83,3 @@ jobs: - run: make kubeconfig - run: kubectl taint nodes --all node-role.kubernetes.io/control-plane- - run: ./hack/test-smoke.sh - - name: "Test data persistency after restarting the node" - run: | - make down - make up - sleep 30 - ./hack/test-smoke.sh diff --git a/Makefile b/Makefile index de258ddb..8ed4c7b6 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,8 @@ export PORT_KUBELET ?= 10250 export PORT_FLANNEL ?= 8472 export PORT_KUBE_APISERVER ?= 6443 +HERE := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + # HOSTNAME is the name of the physical host export HOSTNAME ?= $(shell hostname) # HOST_IP is the IP address of the physical host. Accessible from other hosts. @@ -82,7 +84,7 @@ render: check-preflight .PHONY: up up: check-preflight # Podman creates cni files in a shared location, this ensures unique names that do not clobbed one another - sed -i "s/default_network/$(HOSTNAME)/g" docker-compose.yaml + sed -i "s/default_network/$(HOSTNAME)/g" $(HERE)/docker-compose.yaml $(COMPOSE) up --build -d .PHONY: down @@ -145,7 +147,7 @@ sync-external-ip: .PHONY: kubeadm-join kubeadm-join: # Our kernel is too old for usernetes, so we need this - sed -i "s/--token/--ignore-preflight-errors=all --token/g" join-command + sed -i "s/--token/--ignore-preflight-errors=all --token/g" $(HERE)/join-command $(NODE_SHELL) /bin/bash /usernetes/join-command @echo "# Run 'make sync-external-ip' on the control plane" diff --git a/hack/create-cluster-lima.sh b/hack/create-cluster-lima.sh index 0fea2a5d..7ccf1533 100755 --- a/hack/create-cluster-lima.sh +++ b/hack/create-cluster-lima.sh @@ -11,7 +11,7 @@ set -eux -o pipefail : "${PORT_FLANNEL:=8472}" : "${PORT_KUBELET:=10250}" -guest_home="/home/${USER}.linux" +guest_home="/home/runner.guest" if [ "$(id -u)" -le 1000 ]; then # In --plain mode, UID has to be >= 1000 to populate subuids @@ -49,7 +49,8 @@ done ${LIMACTL} shell host0 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-init install-flannel kubeconfig join-command # Let host1 join the cluster -${LIMACTL} copy host0:~/usernetes/join-command host1:~/usernetes/join-command +${LIMACTL} copy host0:${guest_home}/usernetes/join-command ./join-command +${LIMACTL} copy ./join-command host1:${guest_home}/usernetes/join-command ${LIMACTL} shell host1 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-join ${LIMACTL} shell host0 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" sync-external-ip diff --git a/hack/test-smoke.sh b/hack/test-smoke.sh index 8bf13bf7..034bb6bd 100755 --- a/hack/test-smoke.sh +++ b/hack/test-smoke.sh @@ -56,16 +56,38 @@ EOF INFO "GET PODS" kubectl get pods + kubectl get pods -n kube-system INFO "DESCRIBE PODS" kubectl describe pods for name in $(kubectl get pods -o json | jq -r .items[].metadata.name) do kubectl logs $name + kubectl exec -it $name -- cat /etc/resolv.conf done + INFO "Patching CoreDNS to use 8.8.8.8" + kubectl get configmap coredns -n kube-system -o yaml | \ + sed 's/forward . \/etc\/resolv.conf/forward . 8.8.8.8/' | \ + kubectl apply -f - + + INFO "Restarting CoreDNS" + kubectl delete pod -n kube-system -l k8s-app=kube-dns + kubectl rollout status deployment coredns -n kube-system + INFO "Connecting to dnstest-{0,1,2}.dnstest.default.svc.cluster.local" - kubectl run -i --rm --image=alpine --restart=Never dnstest-shell -- sh -exc 'for f in $(seq 0 2); do wget -O- http://dnstest-${f}.dnstest.default.svc.cluster.local; done' + kubectl run -i --rm --image=busybox:1.28 --restart=Never dnstest-shell -- sh -exc ' + echo "--- Resolv.conf ---" + cat /etc/resolv.conf + + echo "--- Testing External DNS (google.com) ---" + nslookup google.com || echo "External DNS Failed" + + echo "--- Testing Internal DNS (dnstest-0) ---" + nslookup dnstest-0.dnstest || echo "Internal DNS Failed" + for f in 0 1 2; do + wget -qO- http://dnstest-${f}.dnstest.default.svc.cluster.local + done' INFO "Deleting Service \"dnstest\"" kubectl delete service dnstest INFO "Deleting StatefulSet \"dnstest\"" From 29be7e936d44e2f08a46a2c4c5182749232c8c63 Mon Sep 17 00:00:00 2001 From: Vanessa Sochat <814322+vsoch@users.noreply.github.com> Date: Thu, 7 May 2026 11:21:14 -0700 Subject: [PATCH 3/4] test: build in makefile (#11) * test: build in makefile * test: add compose directory * build: podman does not allow pulling * test: nri plugin * fix: restore dockerfile base * bug: we should not overwrite storage.conf Signed-off-by: vsoch --- Dockerfile | 5 ++++- Makefile | 10 +++++++--- compose/prebuilt-node.yaml | 3 +++ service/usernetes-start-control-plane.sh | 19 ++++++++++++++----- service/usernetes-start-worker.sh | 10 ++++++++-- 5 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 compose/prebuilt-node.yaml diff --git a/Dockerfile b/Dockerfile index 5f25c810..520c4cbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,10 @@ -ARG BASE_IMAGE=ghcr.io/converged-computing/usernetes:node-base +# ARG BASE_IMAGE=ghcr.io/converged-computing/usernetes:node-base +ARG BASE_IMAGE=usernetes_base # Edit this image to add / adopt for your environment FROM ${BASE_IMAGE} # This are private on our cluster and need to be copied to here COPY cspca.llnl.gov.cer.pem /usr/local/share/ca-certificates/ COPY cspca.cer.pem /usr/local/share/ca-certificates/ +COPY PAN-cspca.llnl.gov.crt /usr/local/share/ca-certificates/ +COPY hpc-profile.json /var/lib/kubelet/seccomp/hpc-profile.json RUN update-ca-certificates diff --git a/Makefile b/Makefile index 8ed4c7b6..81203db7 100644 --- a/Makefile +++ b/Makefile @@ -24,9 +24,7 @@ export NODE_SUBNET ?= $(shell $(CURDIR)/Makefile.d/node-subnet.sh) export NODE_IP := $(subst .0/24,.100,$(NODE_SUBNET)) export CONTAINER_ENGINE ?= $(shell $(CURDIR)/Makefile.d/detect-container-engine.sh CONTAINER_ENGINE) - export CONTAINER_ENGINE_TYPE ?= $(shell $(CURDIR)/Makefile.d/detect-container-engine.sh CONTAINER_ENGINE_TYPE) - COMPOSE ?= $(shell $(CURDIR)/Makefile.d/detect-container-engine.sh COMPOSE) NODE_SERVICE_NAME := node @@ -85,7 +83,13 @@ render: check-preflight up: check-preflight # Podman creates cni files in a shared location, this ensures unique names that do not clobbed one another sed -i "s/default_network/$(HOSTNAME)/g" $(HERE)/docker-compose.yaml - $(COMPOSE) up --build -d + $(COMPOSE) up -d + +.PHONY: up-built +up-built: check-preflight + # Podman creates cni files in a shared location, this ensures unique names that do not clobbed one another + sed -i "s/default_network/$(HOSTNAME)/g" $(HERE)/docker-compose.yaml + $(COMPOSE) -f $(HERE)/docker-compose.yaml -f $(HERE)/compose/prebuilt-node.yaml up -d .PHONY: down down: diff --git a/compose/prebuilt-node.yaml b/compose/prebuilt-node.yaml new file mode 100644 index 00000000..27b475ac --- /dev/null +++ b/compose/prebuilt-node.yaml @@ -0,0 +1,3 @@ +services: + usernetes_node: + image: usernetes_node diff --git a/service/usernetes-start-control-plane.sh b/service/usernetes-start-control-plane.sh index 3eb884d5..78825067 100755 --- a/service/usernetes-start-control-plane.sh +++ b/service/usernetes-start-control-plane.sh @@ -5,7 +5,7 @@ set -euo pipefail # These are variables we likely will change # LC only supplies podman USERNETES_CONTAINER_TECH=${1:-"podman"} -USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 +USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-develop # We will copy join command here shared_join_command_dir="/usr/workspace/usernetes" @@ -101,9 +101,12 @@ mkdir -p "${XDG_RUNTIME_DIR}" setup_podman() { # These are likely to give issues. This resets podman with a vfs backend and then # cleans up tmp in the unshared context + if [[ -e "${HOME}/.config/containers/storage.conf" ]]; then + return + fi if [[ -x "/collab/usr/gapps/lcweg/containers/scripts/enable-podman.sh" ]]; then log " Running enable-podman.sh vfs" - if ! bash /collab/usr/gapps/lcweg/containers/scripts/enable-podman.sh vfs; then + if ! bash /collab/usr/gapps/lcweg/containers/scripts/enable-podman.sh overlay; then log " WARNING: enable-podman.sh script failed. Continuing, but podman might not be configured correctly." fi else @@ -126,8 +129,13 @@ unshare_cleanup # Usernetes Specific Setup log "๐Ÿ“‚ Copying Usernetes template from ${USERNETES_TEMPLATE_PATH}" cp -R "${USERNETES_TEMPLATE_PATH}" "${TMPDIR}/usernetes" -cd "${TMPDIR}/usernetes" # Now inside the copied template -sleep 3 # Allow filesystem operations to settle if needed + + # Now inside the copied template +cd "${TMPDIR}/usernetes" +sleep 3 + +log "๐Ÿ‘ท Building Usernetes container image 'usernetes_base'" +${container_runtime_path} build --userns-uid-map=0:0:1 --userns-uid-map=1:1:1999 --userns-uid-map=65534:2000:2 -f $(pwd)/Dockerfile.d/Dockerfile.base -t usernetes_base $(pwd) log "๐Ÿ‘ท Building Usernetes container image 'usernetes_node'" ${container_runtime_path} build --userns-uid-map=0:0:1 --userns-uid-map=1:1:1999 --userns-uid-map=65534:2000:2 -f $(pwd)/Dockerfile -t usernetes_node $(pwd) @@ -136,6 +144,7 @@ cleanup() { log "๐Ÿงน Cleaning up old networks or volumes (best effort)" make down-v || log " 'make down-v' failed, possibly because nothing was running. Continuing." + # Explicit cleanup, as 'make down-v' might not cover everything or could fail "${container_runtime_path}" network rm usernetes_default -f || log " Network 'usernetes_default' not found." "${container_runtime_path}" volume rm usernetes_node-var -f || log " Volume 'usernetes_node-var' not found." @@ -145,7 +154,7 @@ cleanup() { cleanup log " โฌ†๏ธ Bringing up the Usernetes node(s) with 'make up'" -if ! make up; then +if ! make up-built; then error_exit "Failed to bring up Usernetes with 'make up'." fi sleep 3 diff --git a/service/usernetes-start-worker.sh b/service/usernetes-start-worker.sh index 276984d0..eda0ae9f 100755 --- a/service/usernetes-start-worker.sh +++ b/service/usernetes-start-worker.sh @@ -5,7 +5,7 @@ set -euo pipefail # These are variables we likely will change # LC only supplies podman USERNETES_CONTAINER_TECH=${1:-"podman"} -USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 +USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-develop # Logging functions for consistency (like Akihiro!) log() { @@ -104,6 +104,9 @@ mkdir -p "${XDG_RUNTIME_DIR}" setup_podman() { # These are likely to give issues. This resets podman with a vfs backend and then # cleans up tmp in the unshared context + if [[ -e "${HOME}/.config/containers/storage.conf" ]]; then + return + fi if [[ -x "/collab/usr/gapps/lcweg/containers/scripts/enable-podman.sh" ]]; then log " Running enable-podman.sh vfs" if ! bash /collab/usr/gapps/lcweg/containers/scripts/enable-podman.sh vfs; then @@ -134,6 +137,9 @@ cp -R "${USERNETES_TEMPLATE_PATH}" "${TMPDIR}/usernetes" cd "${TMPDIR}/usernetes" sleep 3 +log "๐Ÿ‘ท Building Usernetes container image 'usernetes_base'" +${container_runtime_path} build --userns-uid-map=0:0:1 --userns-uid-map=1:1:1999 --userns-uid-map=65534:2000:2 -f $(pwd)/Dockerfile.d/Dockerfile.base -t usernetes_base $(pwd) + log "๐Ÿ‘ท Building Usernetes container image 'usernetes_node'" ${container_runtime_path} build --userns-uid-map=0:0:1 --userns-uid-map=1:1:1999 --userns-uid-map=65534:2000:2 -f $(pwd)/Dockerfile -t usernetes_node $(pwd) @@ -150,7 +156,7 @@ cleanup() { cleanup log " โฌ†๏ธ Bringing up the Usernetes node(s) with 'make up'" -if ! make up; then +if ! make up-built; then error_exit "Failed to bring up Usernetes with 'make up'." fi sleep 3 From aeaf88e5a3956f228c4f6edcbfd828b8a69e5a40 Mon Sep 17 00:00:00 2001 From: vsoch Date: Thu, 7 May 2026 19:41:07 -0700 Subject: [PATCH 4/4] amd gpu: tested and organized We need to find the right base combination for pytorch. There is the dual challenge/complexity of matching old AMD gpus plus containers with builds that take particular patterns create whiteout or input/output errors in our setup. Signed-off-by: vsoch --- service/README.md | 62 +---------------------- service/gpus/README.md | 26 ++++++++++ service/gpus/pytorch-amd-interactive.yaml | 32 ++++++++++++ 3 files changed, 60 insertions(+), 60 deletions(-) create mode 100644 service/gpus/README.md create mode 100644 service/gpus/pytorch-amd-interactive.yaml diff --git a/service/README.md b/service/README.md index 9bd636e2..49b79182 100644 --- a/service/README.md +++ b/service/README.md @@ -61,68 +61,10 @@ u7s-corona190 Ready control-plane 5m v1.30.0 u7s-corona196 Ready 3m7s v1.30.0 ``` -Install the Flux Operator... +You can now install the Flux Operator and run experiments, or look at [using gpus](gpus). ```bash kubectl apply -f https://raw.githubusercontent.com/flux-framework/flux-operator/refs/heads/main/examples/dist/flux-operator.yaml ``` -Test away! Good luck. Other containers to try: - - -```bash -# testing bare metal - 53 seconds -flux run -N1 -n 48 /usr/workspace/usernetes/lammps/build/install/bin/lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite - -# 2 nodes, 29 seconds -flux run -N2 -n 96 /usr/workspace/usernetes/lammps/build/install/bin/lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite - -# mpirun with one node: 1:18s -/opt/toss/openmpi/4.1/gnu/bin/mpirun --allow-run-as-root --mca plm_rsh_agent "" -np 48 lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite - -# OSU Latency (need to compare these two) -flux run -N2 -n2 osu_latency -flux run -N2 --env UCX_TLS=rc_x,sm,self --env OMPI_MCA_pml=ucx --env UCX_NET_DEVICES=mlx5_0:1 -n2 osu_latency - -# LAMMPS (many of these likely aren't required, we will learn with experiments) -export OMPI_MCA_opal_warn_on_missing_libcuda=0 -export OMPI_MCA_btl=^openib,self,vader -export OMPI_MCA_pml=ucx -export OMPI_MCA_osc=ucx -export UCX_TLS=all -flux run -N2 -opmi=pmi2 -n 96 lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite - -export OMPI_MCA_pml=ucx -export UCX_MEMTYPE_CACHE=y -export UCX_LOG_LEVEL=DEBUG -export OMPI_MCA_btl="^openib,tcp" -flux run -N2 --env UCX_TLS=rc_x,sm,self --env OMPI_MCA_pml=ucx --env UCX_NET_DEVICES=mlx5_0:1 -n2 osu_latency - -# We also should test this - this helped on Azure -export UCX_IB_MLX5_DEVX=y - -export OMPI_MCA_opal_common_ucx_opal_mem_hooks=1 -export OMPI_MCA_btl_openib_allow_ib=true -export UCX_NET_DEVICES=mlx5_0:1 -export UCX_TLS=rc,sm,self -export OMPI_MCA_pml=ucx -export OMPI_MCA_osc=ucx -flux run -N2 -n96 lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite -``` - -### GPUs - -You can install the [ROCm/k8s-device-plugin](https://github.com/ROCm/k8s-device-plugin) to expose GPU devices to your pods. - -```bash -# Install the driver plugin -kubectl create -f https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml - -# Create a test workflow that uses GPU (takes a bit to pull) -https://raw.githubusercontent.com/ROCm/k8s-device-plugin/763445e18f3838fa72b22e31a04ec25987334bff/example/pod/pytorch-non-privileged.yaml - -# Get logs (it takes a while to pull...) -kubectl logs alexnet-tf-gpu-pod alexnet-tf-gpu-container -``` - -Our final experiments will be done separately, and these notes likely cleaned up. +Test away! Good luck. diff --git a/service/gpus/README.md b/service/gpus/README.md new file mode 100644 index 00000000..30b17ecd --- /dev/null +++ b/service/gpus/README.md @@ -0,0 +1,26 @@ +# GPUs + +You can install the [ROCm/k8s-device-plugin](https://github.com/ROCm/k8s-device-plugin) to expose GPU devices to your pods. + +```bash +kubectl create -f https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml +``` + +# Create a test workflow that uses GPU (takes a bit to pull) + +```bash +# test rocminfo, or rocm-smi inside the pod +kubectl apply -f ./service/gpus/pytorch-amd-interactive.yaml +``` +When we can figure out the right container, this should work inside (latest segfaults, likely incompatible, and I have not been able to use older versions due to whiteout file issues). + +```python +import torch +if torch.cuda.is_available(): + print(f"GPU is available. Device count: {torch.cuda.device_count()}") + print(f"Device name: {torch.cuda.get_device_name(0)}") + x = torch.ones(3, 3, device='cuda') + y = torch.ones(3, 3, device='cuda') * 2 + z = x + y + print(f"Result of tensor addition on GPU: {z}") +``` diff --git a/service/gpus/pytorch-amd-interactive.yaml b/service/gpus/pytorch-amd-interactive.yaml new file mode 100644 index 00000000..6a464b39 --- /dev/null +++ b/service/gpus/pytorch-amd-interactive.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Pod +metadata: + name: pytorch-non-privileged-gpu-pod +spec: + restartPolicy: Never + hostIPC: true + volumes: + - name: dshm + emptyDir: + medium: Memory + containers: + - name: pytorch-gpu-container + volumeMounts: + - mountPath: /dev/shm + name: dshm + # Note, currently getting whiteour errors. The latest is incompatible with our old GPUs. + # This likely will work, but we need the right container. + image: rocm/pytorch:rocm5.4_ubuntu20.04_py3.8_pytorch_1.12.1 + # image: rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1 + # image: rocm/pytorch:latest + command: + - sleep + - infinity + securityContext: + privileged: false + allowPrivilegeEscalation: false + seccompProfile: + type: Unconfined + resources: + limits: + amd.com/gpu: 8