diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml new file mode 100644 index 00000000..10870354 --- /dev/null +++ b/.github/workflows/build-deploy.yaml @@ -0,0 +1,60 @@ +name: Docker Build and Deploy + +on: + push: + branches: + - develop + pull_request: {} + +env: + REGISTRY: ghcr.io + IMAGE_NAME: converged-computing/usernetes + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + file: Dockerfile.d/Dockerfile.base + tags: | + # Set node-base as the primary tag for the main branch + type=raw,value=node-base,enable=${{ github.ref == 'refs/heads/main' }} + # Add SHA tag for traceability + type=sha,format=short + # Tag PRs with the PR number + type=ref,event=pr + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + file: Dockerfile.d/Dockerfile.base + context: . + # Only push if it's NOT a pull request + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + # Use GitHub Actions cache to speed up builds + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 7efb874d..d087205d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - container_engine: [docker, nerdctl, podman] + container_engine: [docker, podman] uses: ./.github/workflows/reusable-single-node.yaml with: container_engine: ${{ matrix.container_engine }} @@ -20,8 +20,6 @@ jobs: include: - lima_template: template://ubuntu-24.04 container_engine: docker - - lima_template: template://ubuntu-24.04 - container_engine: nerdctl - lima_template: template://centos-stream-9 container_engine: podman - lima_template: template://fedora diff --git a/.github/workflows/reusable-single-node.yaml b/.github/workflows/reusable-single-node.yaml index 28b70013..04bcb970 100644 --- a/.github/workflows/reusable-single-node.yaml +++ b/.github/workflows/reusable-single-node.yaml @@ -76,10 +76,11 @@ jobs: sudo apt-get update sudo apt-get install -y podman-compose podman info + # Emulate build on hpc system - run: make up - run: sleep 5 - run: make kubeadm-init - - run: make install-flannel + - run: make install-calico - run: make kubeconfig - run: kubectl taint nodes --all node-role.kubernetes.io/control-plane- - run: ./hack/test-smoke.sh diff --git a/Dockerfile b/Dockerfile index a864e8bc..15e76a15 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,40 +1,6 @@ -ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2 -ARG CNI_PLUGINS_VERSION=v1.7.1 -ARG HELM_VERSION=v3.17.3 -ARG FLANNEL_VERSION=v0.26.7 +ARG BASE_IMAGE=ghcr.io/converged-computing/usernetes:node-base FROM ${BASE_IMAGE} -COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d -ARG CNI_PLUGINS_VERSION -ARG HELM_VERSION -ARG FLANNEL_VERSION # This are private on our cluster and need to be copied to here COPY cspca.llnl.gov.cer.pem /usr/local/share/ca-certificates/ COPY cspca.cer.pem /usr/local/share/ca-certificates/ RUN update-ca-certificates -RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \ - fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \ - curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \ - mkdir -p /opt/cni/bin && \ - tar xzf "${fname}" -C /opt/cni/bin && \ - rm -f "${fname}" && \ - fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \ - curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \ - tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \ - rm -f "${fname}" && \ - fname="flannel.tgz" && \ - curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \ - grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \ - tar xzf "${fname}" -C / && \ - rm -f "${fname}" -# gettext-base: for `envsubst` -# moreutils: for `sponge` -# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm) -RUN apt-get update && apt-get install -y --no-install-recommends \ - gettext-base \ - moreutils \ - socat -ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules -ADD Dockerfile.d/u7s-entrypoint.sh / -ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"] diff --git a/Dockerfile.d/Dockerfile.base b/Dockerfile.d/Dockerfile.base new file mode 100644 index 00000000..acdf6001 --- /dev/null +++ b/Dockerfile.d/Dockerfile.base @@ -0,0 +1,41 @@ +ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2 +ARG CNI_PLUGINS_VERSION=v1.7.1 +ARG HELM_VERSION=v3.17.3 +ARG FLANNEL_VERSION=v0.26.7 +FROM ${BASE_IMAGE} +COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d +ARG CNI_PLUGINS_VERSION +ARG HELM_VERSION +ARG FLANNEL_VERSION +RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \ + fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \ + curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \ + mkdir -p /opt/cni/bin && \ + tar xzf "${fname}" -C /opt/cni/bin && \ + rm -f "${fname}" && \ + fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \ + curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \ + tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \ + rm -f "${fname}" && \ + fname="flannel.tgz" && \ + curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \ + grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \ + tar xzf "${fname}" -C / && \ + rm -f "${fname}" +# gettext-base: for `envsubst` +# moreutils: for `sponge` +# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm) +RUN apt-get update && apt-get install -y --no-install-recommends \ + gettext-base \ + moreutils \ + socat ipset wget +ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules +ADD Dockerfile.d/etc_udev_rules.d_95-calico.rules /etc/udev/rules.d/95-calico.rules +ADD Dockerfile.d/u7s-entrypoint.sh / +# Calico +ENV FELIX_IGNORELOOSERPF=true +RUN wget https://github.com/projectcalico/calico/releases/download/v3.30.5/calicoctl-linux-amd64 -O /tmp/calicoctl && \ + chmod +x /tmp/calicoctl && mv /tmp/calicoctl /usr/local/bin +ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"] diff --git a/Dockerfile.d/etc_udev_rules.d_95-calico.rules b/Dockerfile.d/etc_udev_rules.d_95-calico.rules new file mode 100644 index 00000000..94beb184 --- /dev/null +++ b/Dockerfile.d/etc_udev_rules.d_95-calico.rules @@ -0,0 +1 @@ +SUBSYSTEM=="net", ACTION=="add|change|move", ENV{INTERFACE}=="vxlan.calico", RUN+="/usr/sbin/ethtool -K vxlan.calico tx-checksum-ip-generic off" diff --git a/Makefile b/Makefile index de258ddb..23fe2e37 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ export PORT_ETCD ?= 2379 export PORT_KUBELET ?= 10250 export PORT_FLANNEL ?= 8472 export PORT_KUBE_APISERVER ?= 6443 +export PORT_CALICO ?= 5473 # HOSTNAME is the name of the physical host export HOSTNAME ?= $(shell hostname) @@ -35,6 +36,7 @@ NODE_SHELL := $(COMPOSE) exec \ -e NODE_IP=$(NODE_IP) \ -e PORT_KUBE_APISERVER=$(PORT_KUBE_APISERVER) \ -e PORT_FLANNEL=$(PORT_FLANNEL) \ + -e PORT_CALICO=$(PORT_CALICO) \ -e PORT_KUBELET=$(PORT_KUBELET) \ -e PORT_ETCD=$(PORT_ETCD) \ $(NODE_SERVICE_NAME) @@ -83,7 +85,7 @@ render: check-preflight up: check-preflight # Podman creates cni files in a shared location, this ensures unique names that do not clobbed one another sed -i "s/default_network/$(HOSTNAME)/g" docker-compose.yaml - $(COMPOSE) up --build -d + $(COMPOSE) up -d .PHONY: down down: @@ -158,5 +160,10 @@ install-flannel: # Kubernetes 1.30.x removed the check for br_netfilter from kubeadm. # Flannel over version 0.25 checks for br_netfilter, which won't be in the podman node. # We don't actually need it there, just on the physical node, so we use newer K8s and older flannel - $(NODE_SHELL) kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.25.1/kube-flannel.yml - #$(NODE_SHELL) /usernetes/Makefile.d/install-flannel.sh + # $(NODE_SHELL) kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.25.1/kube-flannel.yml + $(NODE_SHELL) /usernetes/Makefile.d/install-flannel.sh + +.PHONY: install-calico +install-calico: + # Calico daemonset changes and node-level address changes + $(NODE_SHELL) /usernetes/Makefile.d/calico/install-calico.sh yes diff --git a/Makefile.d/calico/calico-ethtool.yaml b/Makefile.d/calico/calico-ethtool.yaml new file mode 100644 index 00000000..8e9b98f7 --- /dev/null +++ b/Makefile.d/calico/calico-ethtool.yaml @@ -0,0 +1,47 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: calico-checksum-fix + namespace: kube-system + labels: + k8s-app: calico-checksum-fix +spec: + selector: + matchLabels: + name: calico-checksum-fix + template: + metadata: + labels: + name: calico-checksum-fix + spec: + hostNetwork: true + hostPID: true + securityContext: + runAsUser: 0 + initContainers: + - name: fix-checksum + image: ghcr.io/converged-computing/usernetes:alpine + # image: alpine:latest + command: ["/bin/sh", "-c"] + args: + - | + # nsenter -t 1 enters the init process's namespace (of the host) + # check if the interface exists before running ethtool + if [ -d /sys/class/net/vxlan.calico ]; then + echo "Applying ethtool fix to vxlan.calico..." + nsenter -t 1 -n -u -i -m -- ethtool -K vxlan.calico tx-checksum-ip-generic off + else + echo "vxlan.calico interface not found, skipping." + fi + iptables -I INPUT -p udp --dport 8472 -j ACCEPT + sysctl -w net.ipv4.conf.all.rp_filter=1 + sysctl -w net.ipv4.conf.default.rp_filter=1 + sysctl -w net.ipv4.conf.eth0.rp_filter=1 + sysctl -w net.ipv4.conf.vxlan/calico.rp_filter=1 + securityContext: + privileged: true + containers: + - name: pause + # image: registry.k8s.io/pause:3.9 + image: ghcr.io/converged-computing/usernetes:pause + terminationGracePeriodSeconds: 0 diff --git a/Makefile.d/calico/install-calico.sh b/Makefile.d/calico/install-calico.sh new file mode 100755 index 00000000..10446451 --- /dev/null +++ b/Makefile.d/calico/install-calico.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# Install standard Calico +CALICO_VERSION="v3.31" +CALICO_FILE="calico.yaml" + +# Create local bin +LOCAL_BIN_DIR=~/.local/bin +mkdir -p $LOCAL_BIN_DIR +export PATH=$LOCAL_BIN_DIR:$PATH + +# 1. Download official manifest +wget https://raw.githubusercontent.com/projectcalico/calico/refs/heads/release-v3.31/manifests/calico.yaml -O $CALICO_FILE + +install_yq() { + if ! command -v yq > /dev/null; then + log "Installing yq..." + YQ_VERSION=v4.2.0 + YQ_PLATFORM=linux_amd64 + cd /tmp + wget https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_${YQ_PLATFORM}.tar.gz -O - | tar xz + chmod +x ./yq_${YQ_PLATFORM} + mv ./yq_${YQ_PLATFORM} "${LOCAL_BIN_DIR}/yq" + log " yq installed to ${LOCAL_BIN_DIR}/yq" + cd - + else + log " yq found at $(command -v yq)" + fi + command -v yq > /dev/null || error_exit "yq not found after installation attempt." +} + +install_yq + +# backend to vxlan +yq eval-all -i '(select(.kind == "ConfigMap" and .metadata.name == "calico-config").data.calico_backend) = "vxlan"' $CALICO_FILE + +# Images for corona +yq eval-all -i '(select(.kind == "Deployment" and .metadata.name == "calico-kube-controllers").spec.template.spec.containers[0].image) = "ghcr.io/converged-computing/usernetes:calico-kube-controllers"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "upgrade-ipam").image) = "ghcr.io/converged-computing/usernetes:calico-cni"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "install-cni").image) = "ghcr.io/converged-computing/usernetes:calico-cni"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "ebpf-bootstrap").image) = "ghcr.io/converged-computing/usernetes:calico-node"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].image) = "ghcr.io/converged-computing/usernetes:calico-node"' $CALICO_FILE + +# IPIP and VXLAN +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV4POOL_IPIP").value) = "Never"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV4POOL_VXLAN").value) = "CrossSubnet"' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV6POOL_VXLAN").value) = "CrossSubnet"' $CALICO_FILE + +# FELIX for rootless +yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_IGNORELOOSERPF", "value": "true"}' $CALICO_FILE +yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_VXLANPORT", "value": "8472"}' $CALICO_FILE +yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_EXTERNALNODESCIDRLIST", "value": "10.100.0.0/16"}' $CALICO_FILE + +# health probes (Remove bird-ready and bird-live) +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].livenessProbe.exec.command) = ["/bin/calico-node", "-felix-live"]' $CALICO_FILE +yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].readinessProbe.exec.command) = ["/bin/calico-node", "-felix-ready"]' $CALICO_FILE + +# install components with our rootless version +kubectl apply -f ${CALICO_FILE} +echo "Done. Final file is $CALICO_FILE" + +# Give a small break to settle - we need calico.vxlan to be created +sleep 10 + +# This must be removed or the address will be reset +kubectl set env daemonset/calico-node IP- -n kube-system + +# Allow pods to recreate +echo "Recreating calico pods..." +sleep 10 + +# https://youtu.be/noriIzBKYRk?si=mlOC27ntvSEDw_VM&t=299 +# These commands need to be done bringing up node +# iptables -I INPUT -p udp --dport 8472 -j ACCEPT +# sysctl -w net.ipv4.conf.all.rp_filter=2 +# sysctl -w net.ipv4.conf.default.rp_filter=2 +# sysctl -w net.ipv4.conf.eth0.rp_filter=2 +# sysctl -w "net.ipv4.conf.vxlan/calico.rp_filter=2" + +# This needs to be done after daemonset is patched +# Note that the calico-node has a warning after this, but it won't work if we don't do it +for node in $(kubectl get nodes -o name); do + host_ip="$(kubectl get "${node}" -o jsonpath='{.metadata.labels.usernetes/host-ip}')" + nodename=$(cut -d / -f 2 <<< $node) + calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}' +done + +# applies ethtool -K vxlan.calico tx-checksum-ip-generic off +# check with: bridge fdb show dev vxlan.calico should have node address NOT 10.x address +kubectl apply --server-side -f /usernetes/Makefile.d/calico/calico-ethtool.yaml + +# These should be run after calico installed +# 1. make sync-external-ip and make install-calico +# the second has a daemonset to apply these commands +# ethtool -K vxlan.calico tx-checksum-ip-generic off diff --git a/Makefile.d/check-preflight.sh b/Makefile.d/check-preflight.sh index 623e9f52..12ba05b5 100755 --- a/Makefile.d/check-preflight.sh +++ b/Makefile.d/check-preflight.sh @@ -16,7 +16,9 @@ script_dir="$(dirname "$0")" detect_engine="${script_dir}"/detect-container-engine.sh : "${CONTAINER_ENGINE:=$("${detect_engine}" CONTAINER_ENGINE)}" : "${CONTAINER_ENGINE_TYPE:=$("${detect_engine}" CONTAINER_ENGINE_TYPE)}" -: "${QUICK:=0}" + +# Set to 1 since we will do calico by default +: "${QUICK:=1}" : "${BUSYBOX_IMAGE:=docker.io/library/busybox:latest}" if [ -z "${CONTAINER_ENGINE}" ] || [ -z "${CONTAINER_ENGINE_TYPE}" ]; then diff --git a/Makefile.d/sync-external-ip.sh b/Makefile.d/sync-external-ip.sh index 2b4e8bec..47408da2 100755 --- a/Makefile.d/sync-external-ip.sh +++ b/Makefile.d/sync-external-ip.sh @@ -1,6 +1,8 @@ #!/bin/bash set -eu -o pipefail +USE_CALICO="${1:-no}" + for node in $(kubectl get nodes -o name); do # Set ExternalIP host_ip="$(kubectl get "${node}" -o jsonpath='{.metadata.labels.usernetes/host-ip}')" @@ -16,4 +18,10 @@ for node in $(kubectl get nodes -o name); do if echo "${taints}" | grep -q node.cloudprovider.kubernetes.io/uninitialized; then kubectl taint nodes "${node}" node.cloudprovider.kubernetes.io/uninitialized- fi + if [[ "${USE_CALICO}" == "yes" ]]; + then + echo "Changing node patch to use calico" + nodename=$(cut -d / -f 2 <<< $node) + calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}' + fi done diff --git a/README.md b/README.md index db1eccce..30f8c515 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ but Usernetes (Gen 2) supports creating a cluster with multiple hosts. - CRI: containerd - OCI: runc - CNI: Flannel +- CNI: Calico ## Requirements @@ -72,7 +73,8 @@ EOF sudo systemctl restart systemd-modules-load.service ``` -- sysctl: +- sysctl (should not be required for calico, but needs testing) + ``` sudo tee /etc/sysctl.d/99-usernetes.conf </dev/null net.ipv4.conf.default.rp_filter = 2 @@ -110,6 +112,8 @@ See `make help`. make up make kubeadm-init make install-flannel +# or +make install-calico # Enable kubectl make kubeconfig diff --git a/docker-compose.yaml b/docker-compose.yaml index 1df41123..f06dc9c5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,7 +3,9 @@ --- services: node: - image: usernetes_node + build: + context: . + dockerfile: Dockerfile.d/Dockerfile.base hostname: ${NODE_NAME} privileged: true restart: always @@ -12,6 +14,8 @@ services: ipv4_address: ${NODE_IP} ports: # : + # Calico + - ${PORT_CALICO}:${PORT_CALICO} # etcd (default: 2379) - ${PORT_ETCD}:${PORT_ETCD} # kube-apiserver (default: 6443) @@ -40,11 +44,6 @@ services: # In addition, `net.ipv4.conf.default.rp_filter` # has to be set to 0 (disabled) or 2 (loose) # in the daemon's network namespace. - annotations: - # Accelerate network for nerdctl >= 2.0.0-beta.4 with bypass4netns >= 0.4.1 - "nerdctl/bypass4netns": "${BYPASS4NETNS:-false}" - "nerdctl/bypass4netns-ignore-bind": "true" - "nerdctl/bypass4netns-ignore-subnets": "${BYPASS4NETNS_IGNORE_SUBNETS:-}" networks: default_network: ipam: diff --git a/hack/create-cluster-lima.sh b/hack/create-cluster-lima.sh index ba3d230c..7f14238e 100755 --- a/hack/create-cluster-lima.sh +++ b/hack/create-cluster-lima.sh @@ -37,6 +37,9 @@ done SERVICE_PORTS="PORT_KUBE_APISERVER=${PORT_KUBE_APISERVER} PORT_ETCD=${PORT_ETCD} PORT_FLANNEL=${PORT_FLANNEL} PORT_KUBELET=${PORT_KUBELET}" +# Emulate build on hpc system +# ${CONTAINER_ENGINE} build -f Dockerfile.d/Dockerfile.base -t ghcr.io/converged-computing/usernetes:node . + # Launch a Kubernetes node inside a Rootless Docker host for host in host0 host1; do ${LIMACTL} shell "${host}" ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" up @@ -46,7 +49,9 @@ done ${LIMACTL} shell host0 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-init install-flannel kubeconfig join-command # Let host1 join the cluster -${LIMACTL} copy host0:~/usernetes/join-command host1:~/usernetes/join-command +${LIMACTL} copy host0:~/usernetes/join-command ./join-command +${LIMACTL} copy ./join-command host1:~/usernetes/join-command +# ${LIMACTL} copy host0:~/usernetes/join-command host1:~/usernetes/join-command ${LIMACTL} shell host1 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-join ${LIMACTL} shell host0 ${SERVICE_PORTS} CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" sync-external-ip diff --git a/hack/test-smoke.sh b/hack/test-smoke.sh index 2a8680cb..1bbf271c 100755 --- a/hack/test-smoke.sh +++ b/hack/test-smoke.sh @@ -52,9 +52,17 @@ spec: - containerPort: 80 EOF INFO "Waiting for 3 replicas to be ready" - kubectl rollout status --timeout=5m statefulset + kubectl rollout status --timeout=5m statefulset || true INFO "Connecting to dnstest-{0,1,2}.dnstest.default.svc.cluster.local" + INFO "GET PODS" + kubectl get pods + INFO "DESCRIBE PODS" + kubectl describe pods + for name in $(kubectl get pods -o json | jq -r .items[].metadata.name) + do + kubectl logs $name + done kubectl run -i --rm --image=alpine --restart=Never dnstest-shell -- sh -exc 'for f in $(seq 0 2); do wget -O- http://dnstest-${f}.dnstest.default.svc.cluster.local; done' INFO "Deleting Service \"dnstest\"" diff --git a/init-host/init-host.root.d/install-nerdctl.sh b/init-host/init-host.root.d/install-nerdctl.sh deleted file mode 100755 index 4eb8c2fd..00000000 --- a/init-host/init-host.root.d/install-nerdctl.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -eux -o pipefail -if [ "$(id -u)" != "0" ]; then - echo "Must run as the root" - exit 1 -fi - -VERSION="2.0.2" -SHASHA="148f4631fa16b5bfc70152f951d2536a1f8eb1af7c94665240c80ba0345e7cbc" - -arch="" -case "$(uname -m)" in -"x86_64") - arch="amd64" - ;; -"aarch64") - arch="arm64" - ;; -*) - echo >&2 "Unsupported architecture" - exit 1 - ;; -esac - -mkdir -p /root/nerdctl.tmp -( - cd /root/nerdctl.tmp - curl -fSLO https://github.com/containerd/nerdctl/releases/download/v${VERSION}/nerdctl-full-${VERSION}-linux-${arch}.tar.gz - curl -fSLO https://github.com/containerd/nerdctl/releases/download/v${VERSION}/SHA256SUMS - [ "$(sha256sum SHA256SUMS | awk '{print $1}')" = "${SHASHA}" ] - sha256sum --check --ignore-missing SHA256SUMS - tar Cxzvvf /usr/local nerdctl-full-${VERSION}-linux-${arch}.tar.gz -) -rm -rf /root/nerdctl.tmp - -if [ -e /etc/apparmor.d/rootlesskit ]; then - # https://rootlesscontaine.rs/getting-started/common/apparmor/ - sed -e s@/usr/bin/rootlesskit@/usr/local/bin/rootlesskit@g /etc/apparmor.d/rootlesskit >/etc/apparmor.d/usr.local.bin.rootlesskit - systemctl restart apparmor -fi diff --git a/init-host/init-host.root.sh b/init-host/init-host.root.sh index 911ec70e..f608db02 100755 --- a/init-host/init-host.root.sh +++ b/init-host/init-host.root.sh @@ -82,11 +82,6 @@ case "${CONTAINER_ENGINE}" in "${script_dir}"/init-host.root.d/install-podman.sh fi ;; -"nerdctl") - if ! command -v nerdctl >/dev/null 2>&1; then - "${script_dir}"/init-host.root.d/install-nerdctl.sh - fi - ;; *) echo >&2 "Unsupported container engine: ${CONTAINER_ENGINE}" exit 1 diff --git a/init-host/init-host.rootless.sh b/init-host/init-host.rootless.sh index 1151fe1d..a7b85519 100755 --- a/init-host/init-host.rootless.sh +++ b/init-host/init-host.rootless.sh @@ -12,11 +12,6 @@ case "${CONTAINER_ENGINE}" in "docker") dockerd-rootless-setuptool.sh install || (journalctl --user --since "10 min ago"; exit 1) ;; -"nerdctl") - containerd-rootless-setuptool.sh install - containerd-rootless-setuptool.sh install-buildkit-containerd - containerd-rootless-setuptool.sh install-bypass4netnsd - ;; "podman") # pasta does not seem to work well # > 2024-12-02T17:15:40.070018488Z stderr F E1202 17:15:40.068621 1 main.go:228] Failed to create SubnetManager: diff --git a/service/README.md b/service/README.md index 9bd636e2..402804c2 100644 --- a/service/README.md +++ b/service/README.md @@ -15,6 +15,8 @@ flux alloc --bg -N2 -q pbatch -t 8h ### Control Plane +TODO: `export QUICK=1` + ```bash ssh corona189 # For the control plane - start @@ -40,25 +42,8 @@ Back on the control plane (if everything looks good) we can go to the copied con ```bash . source_env.sh -``` -```console -[sochat1@corona190:service]$ kubectl get nodes -NAME STATUS ROLES AGE VERSION -u7s-corona190 NotReady control-plane 3m20s v1.30.0 -u7s-corona196 NotReady 1m3s v1.30.0 -``` - -Importantly, the ips need to be sync'd (and an annotation added for flannel) after nodes are up. They will all be `NotReady`. - -```bash make sync-external-ip -make install-flannel -``` -```console -[sochat1@corona190:service]$ kubectl get nodes -NAME STATUS ROLES AGE VERSION -u7s-corona190 Ready control-plane 5m v1.30.0 -u7s-corona196 Ready 3m7s v1.30.0 +make install-calico ``` Install the Flux Operator... @@ -125,4 +110,17 @@ https://raw.githubusercontent.com/ROCm/k8s-device-plugin/763445e18f3838fa72b22e3 kubectl logs alexnet-tf-gpu-pod alexnet-tf-gpu-container ``` -Our final experiments will be done separately, and these notes likely cleaned up. + +### Debugging + +Calico: In u7s this address should be same as host: + +```bash +bridge fdb show dev vxlan.calico +``` +```console +# "this address" +66:63:44:f3:b6:76 dst 192.168.128.222 self permanent +``` + +If you see the container interface (10.0.x) this is a bug. It could be that the calico-node daemonset still has the `IP` environment variable set to autodetect (which will clobber any changes you make) or you did not issue all the commands in the sync external ip script, or the daemonset to run ethtool. diff --git a/service/usernetes-start-control-plane.sh b/service/usernetes-start-control-plane.sh index 2be076d7..a4679dde 100755 --- a/service/usernetes-start-control-plane.sh +++ b/service/usernetes-start-control-plane.sh @@ -5,7 +5,7 @@ set -euo pipefail # These are variables we likely will change # LC only supplies podman USERNETES_CONTAINER_TECH=${1:-"podman"} -USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 +USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-calico # We will copy join command here shared_join_command_dir="/usr/workspace/usernetes" @@ -13,6 +13,16 @@ shared_join_command_dir="/usr/workspace/usernetes" # The user needs to run the setup script USERNAME=$(whoami) +# Logging functions for consistency (like Akihiro!) +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" +} + +error_exit() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 + exit 1 +} + # This is way a lot for just deriving home, but I'm not convinced it will always # be defined in the environment if [[ -z "${HOME:-}" || ! -d "${HOME}" ]]; then @@ -37,16 +47,6 @@ which podman-compose # We don't want to use /var because that is a memory based fs export TMPDIR="/tmp/${USERNAME}" -# Logging functions for consistency (like Akihiro!) -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" -} - -error_exit() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 - exit 1 -} - install_kubectl() { if ! command -v kubectl > /dev/null; then log "Installing kubectl..." @@ -60,7 +60,20 @@ install_kubectl() { command -v kubectl > /dev/null || error_exit "kubectl not found after installation attempt." } - +install_yq() { + if ! command -v yq > /dev/null; then + log "Installing yq..." + YQ_VERSION=v4.2.0 + YQ_PLATFORM=linux_amd64 + wget https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_${YQ_PLATFORM}.tar.gz -O - | tar xz + chmod +x ./yq_${YQ_PLATFORM} + mv ./yq_${YQ_PLATFORM} "${LOCAL_BIN_DIR}/yq" + log " yq installed to ${LOCAL_BIN_DIR}/yq" + else + log " yq found at $(command -v yq)" + fi + command -v yq > /dev/null || error_exit "yq not found after installation attempt." +} # Pre-flight Checks & Setup log "🎬 Starting Usernetes Control Plane Setup" @@ -88,6 +101,7 @@ log " Found ${USERNETES_CONTAINER_TECH} at ${container_runtime_path}" # Install kubectl if not present log " 👀 Looking for kubectl" install_kubectl +install_yq # Cleanup any previous podman context, setup with vhs log " 📦 Configuring ${container_runtime_path}" diff --git a/service/usernetes-start-worker.sh b/service/usernetes-start-worker.sh index 709585d4..bb5d7d52 100755 --- a/service/usernetes-start-worker.sh +++ b/service/usernetes-start-worker.sh @@ -2,10 +2,20 @@ set -euo pipefail +# Logging functions for consistency (like Akihiro!) +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" +} + +error_exit() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 + exit 1 +} + # These are variables we likely will change # LC only supplies podman USERNETES_CONTAINER_TECH=${1:-"podman"} -USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-06-26-2025 +USERNETES_TEMPLATE_PATH=/usr/workspace/usernetes/usernetes-calico # The join command needs to be here shared_join_command_dir="/usr/workspace/usernetes" @@ -38,16 +48,6 @@ log " Updated PATH: ${PATH}" # We don't want to use /var because that is a memory based fs export TMPDIR="/tmp/${USERNAME}" -# Logging functions for consistency (like Akihiro!) -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - INFO - $1" -} - -error_exit() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - ERROR - $1" >&2 - exit 1 -} - install_kubectl() { if ! command -v kubectl > /dev/null; then log "Installing kubectl..." @@ -61,8 +61,6 @@ install_kubectl() { command -v kubectl > /dev/null || error_exit "kubectl not found after installation attempt." } - - # Pre-flight Checks & Setup log "🎬 Starting Usernetes Control Plane Setup" log " Temporary directory: ${TMPDIR}"