Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/build-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: Docker Build and Deploy

on:
push:
branches:
- develop
pull_request: {}

env:
REGISTRY: ghcr.io
IMAGE_NAME: converged-computing/usernetes

jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to the Container registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
file: Dockerfile.d/Dockerfile.base
tags: |
# Set node-base as the primary tag for the main branch
type=raw,value=node-base,enable=${{ github.ref == 'refs/heads/main' }}
# Add SHA tag for traceability
type=sha,format=short
# Tag PRs with the PR number
type=ref,event=pr

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
file: Dockerfile.d/Dockerfile.base
context: .
# Only push if it's NOT a pull request
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
# Use GitHub Actions cache to speed up builds
cache-from: type=gha
cache-to: type=gha,mode=max
4 changes: 1 addition & 3 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
strategy:
fail-fast: false
matrix:
container_engine: [docker, nerdctl, podman]
container_engine: [docker, podman]
uses: ./.github/workflows/reusable-single-node.yaml
with:
container_engine: ${{ matrix.container_engine }}
Expand All @@ -20,8 +20,6 @@ jobs:
include:
- lima_template: template://ubuntu-24.04
container_engine: docker
- lima_template: template://ubuntu-24.04
container_engine: nerdctl
- lima_template: template://centos-stream-9
container_engine: podman
- lima_template: template://fedora
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/reusable-single-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,11 @@ jobs:
sudo apt-get update
sudo apt-get install -y podman-compose
podman info
# Emulate build on hpc system
- run: make up
- run: sleep 5
- run: make kubeadm-init
- run: make install-flannel
- run: make install-calico
- run: make kubeconfig
- run: kubectl taint nodes --all node-role.kubernetes.io/control-plane-
- run: ./hack/test-smoke.sh
Expand Down
36 changes: 1 addition & 35 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,6 @@
ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2
ARG CNI_PLUGINS_VERSION=v1.7.1
ARG HELM_VERSION=v3.17.3
ARG FLANNEL_VERSION=v0.26.7
ARG BASE_IMAGE=ghcr.io/converged-computing/usernetes:node-base
FROM ${BASE_IMAGE}
COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d
ARG CNI_PLUGINS_VERSION
ARG HELM_VERSION
ARG FLANNEL_VERSION
# This are private on our cluster and need to be copied to here
COPY cspca.llnl.gov.cer.pem /usr/local/share/ca-certificates/
COPY cspca.cer.pem /usr/local/share/ca-certificates/
RUN update-ca-certificates
RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \
fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \
curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \
mkdir -p /opt/cni/bin && \
tar xzf "${fname}" -C /opt/cni/bin && \
rm -f "${fname}" && \
fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \
curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \
tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \
rm -f "${fname}" && \
fname="flannel.tgz" && \
curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \
tar xzf "${fname}" -C / && \
rm -f "${fname}"
# gettext-base: for `envsubst`
# moreutils: for `sponge`
# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm)
RUN apt-get update && apt-get install -y --no-install-recommends \
gettext-base \
moreutils \
socat
ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules
ADD Dockerfile.d/u7s-entrypoint.sh /
ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"]
41 changes: 41 additions & 0 deletions Dockerfile.d/Dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
ARG BASE_IMAGE=docker.io/kindest/node:v1.33.0@sha256:91e9ed777db80279c22d1d1068c091b899b2078506e4a0f797fbf6e397c0b0b2
ARG CNI_PLUGINS_VERSION=v1.7.1
ARG HELM_VERSION=v3.17.3
ARG FLANNEL_VERSION=v0.26.7
FROM ${BASE_IMAGE}
COPY Dockerfile.d/SHA256SUMS.d/ /tmp/SHA256SUMS.d
ARG CNI_PLUGINS_VERSION
ARG HELM_VERSION
ARG FLANNEL_VERSION
RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \
fname="cni-plugins-linux-${arch}-${CNI_PLUGINS_VERSION}.tgz" && \
curl --insecure -o "${fname}" -fSL "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/cni-plugins-${CNI_PLUGINS_VERSION}" | sha256sum -c && \
mkdir -p /opt/cni/bin && \
tar xzf "${fname}" -C /opt/cni/bin && \
rm -f "${fname}" && \
fname="helm-${HELM_VERSION}-linux-${arch}.tar.gz" && \
curl --insecure -o "${fname}" -fSL "https://get.helm.sh/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/helm-${HELM_VERSION}" | sha256sum -c && \
tar xzf "${fname}" -C /usr/local/bin --strip-components=1 -- "linux-${arch}/helm" && \
rm -f "${fname}" && \
fname="flannel.tgz" && \
curl --insecure -o "${fname}" -fSL "https://github.com/flannel-io/flannel/releases/download/${FLANNEL_VERSION}/${fname}" && \
grep "${fname}" "/tmp/SHA256SUMS.d/flannel-${FLANNEL_VERSION}" | sha256sum -c && \
tar xzf "${fname}" -C / && \
rm -f "${fname}"
# gettext-base: for `envsubst`
# moreutils: for `sponge`
# socat: for `socat` (to silence "[WARNING FileExisting-socat]" from kubeadm)
RUN apt-get update && apt-get install -y --no-install-recommends \
gettext-base \
moreutils \
socat ipset wget
ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules
ADD Dockerfile.d/etc_udev_rules.d_95-calico.rules /etc/udev/rules.d/95-calico.rules
ADD Dockerfile.d/u7s-entrypoint.sh /
# Calico
ENV FELIX_IGNORELOOSERPF=true
RUN wget https://github.com/projectcalico/calico/releases/download/v3.30.5/calicoctl-linux-amd64 -O /tmp/calicoctl && \
chmod +x /tmp/calicoctl && mv /tmp/calicoctl /usr/local/bin
ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"]
1 change: 1 addition & 0 deletions Dockerfile.d/etc_udev_rules.d_95-calico.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SUBSYSTEM=="net", ACTION=="add|change|move", ENV{INTERFACE}=="vxlan.calico", RUN+="/usr/sbin/ethtool -K vxlan.calico tx-checksum-ip-generic off"
13 changes: 10 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export PORT_ETCD ?= 2379
export PORT_KUBELET ?= 10250
export PORT_FLANNEL ?= 8472
export PORT_KUBE_APISERVER ?= 6443
export PORT_CALICO ?= 5473

# HOSTNAME is the name of the physical host
export HOSTNAME ?= $(shell hostname)
Expand Down Expand Up @@ -35,6 +36,7 @@ NODE_SHELL := $(COMPOSE) exec \
-e NODE_IP=$(NODE_IP) \
-e PORT_KUBE_APISERVER=$(PORT_KUBE_APISERVER) \
-e PORT_FLANNEL=$(PORT_FLANNEL) \
-e PORT_CALICO=$(PORT_CALICO) \
-e PORT_KUBELET=$(PORT_KUBELET) \
-e PORT_ETCD=$(PORT_ETCD) \
$(NODE_SERVICE_NAME)
Expand Down Expand Up @@ -83,7 +85,7 @@ render: check-preflight
up: check-preflight
# Podman creates cni files in a shared location, this ensures unique names that do not clobbed one another
sed -i "s/default_network/$(HOSTNAME)/g" docker-compose.yaml
$(COMPOSE) up --build -d
$(COMPOSE) up -d

.PHONY: down
down:
Expand Down Expand Up @@ -158,5 +160,10 @@ install-flannel:
# Kubernetes 1.30.x removed the check for br_netfilter from kubeadm.
# Flannel over version 0.25 checks for br_netfilter, which won't be in the podman node.
# We don't actually need it there, just on the physical node, so we use newer K8s and older flannel
$(NODE_SHELL) kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.25.1/kube-flannel.yml
#$(NODE_SHELL) /usernetes/Makefile.d/install-flannel.sh
# $(NODE_SHELL) kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.25.1/kube-flannel.yml
$(NODE_SHELL) /usernetes/Makefile.d/install-flannel.sh

.PHONY: install-calico
install-calico:
# Calico daemonset changes and node-level address changes
$(NODE_SHELL) /usernetes/Makefile.d/calico/install-calico.sh yes
47 changes: 47 additions & 0 deletions Makefile.d/calico/calico-ethtool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: calico-checksum-fix
namespace: kube-system
labels:
k8s-app: calico-checksum-fix
spec:
selector:
matchLabels:
name: calico-checksum-fix
template:
metadata:
labels:
name: calico-checksum-fix
spec:
hostNetwork: true
hostPID: true
securityContext:
runAsUser: 0
initContainers:
- name: fix-checksum
image: ghcr.io/converged-computing/usernetes:alpine
# image: alpine:latest
command: ["/bin/sh", "-c"]
args:
- |
# nsenter -t 1 enters the init process's namespace (of the host)
# check if the interface exists before running ethtool
if [ -d /sys/class/net/vxlan.calico ]; then
echo "Applying ethtool fix to vxlan.calico..."
nsenter -t 1 -n -u -i -m -- ethtool -K vxlan.calico tx-checksum-ip-generic off
else
echo "vxlan.calico interface not found, skipping."
fi
iptables -I INPUT -p udp --dport 8472 -j ACCEPT
sysctl -w net.ipv4.conf.all.rp_filter=1
sysctl -w net.ipv4.conf.default.rp_filter=1
sysctl -w net.ipv4.conf.eth0.rp_filter=1
sysctl -w net.ipv4.conf.vxlan/calico.rp_filter=1
securityContext:
privileged: true
containers:
- name: pause
# image: registry.k8s.io/pause:3.9
image: ghcr.io/converged-computing/usernetes:pause
terminationGracePeriodSeconds: 0
95 changes: 95 additions & 0 deletions Makefile.d/calico/install-calico.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/bash

# Install standard Calico
CALICO_VERSION="v3.31"
CALICO_FILE="calico.yaml"

# Create local bin
LOCAL_BIN_DIR=~/.local/bin
mkdir -p $LOCAL_BIN_DIR
export PATH=$LOCAL_BIN_DIR:$PATH

# 1. Download official manifest
wget https://raw.githubusercontent.com/projectcalico/calico/refs/heads/release-v3.31/manifests/calico.yaml -O $CALICO_FILE

install_yq() {
if ! command -v yq > /dev/null; then
log "Installing yq..."
YQ_VERSION=v4.2.0
YQ_PLATFORM=linux_amd64
cd /tmp
wget https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_${YQ_PLATFORM}.tar.gz -O - | tar xz
chmod +x ./yq_${YQ_PLATFORM}
mv ./yq_${YQ_PLATFORM} "${LOCAL_BIN_DIR}/yq"
log " yq installed to ${LOCAL_BIN_DIR}/yq"
cd -
else
log " yq found at $(command -v yq)"
fi
command -v yq > /dev/null || error_exit "yq not found after installation attempt."
}

install_yq

# backend to vxlan
yq eval-all -i '(select(.kind == "ConfigMap" and .metadata.name == "calico-config").data.calico_backend) = "vxlan"' $CALICO_FILE

# Images for corona
yq eval-all -i '(select(.kind == "Deployment" and .metadata.name == "calico-kube-controllers").spec.template.spec.containers[0].image) = "ghcr.io/converged-computing/usernetes:calico-kube-controllers"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "upgrade-ipam").image) = "ghcr.io/converged-computing/usernetes:calico-cni"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "install-cni").image) = "ghcr.io/converged-computing/usernetes:calico-cni"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.initContainers[] | select(.name == "ebpf-bootstrap").image) = "ghcr.io/converged-computing/usernetes:calico-node"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].image) = "ghcr.io/converged-computing/usernetes:calico-node"' $CALICO_FILE

# IPIP and VXLAN
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV4POOL_IPIP").value) = "Never"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV4POOL_VXLAN").value) = "CrossSubnet"' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env[] | select(.name == "CALICO_IPV6POOL_VXLAN").value) = "CrossSubnet"' $CALICO_FILE

# FELIX for rootless
yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_IGNORELOOSERPF", "value": "true"}' $CALICO_FILE
yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_VXLANPORT", "value": "8472"}' $CALICO_FILE
yq eval-all -i 'select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].env += {"name": "FELIX_EXTERNALNODESCIDRLIST", "value": "10.100.0.0/16"}' $CALICO_FILE

# health probes (Remove bird-ready and bird-live)
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].livenessProbe.exec.command) = ["/bin/calico-node", "-felix-live"]' $CALICO_FILE
yq eval-all -i '(select(.kind == "DaemonSet" and .metadata.name == "calico-node").spec.template.spec.containers[0].readinessProbe.exec.command) = ["/bin/calico-node", "-felix-ready"]' $CALICO_FILE

# install components with our rootless version
kubectl apply -f ${CALICO_FILE}
echo "Done. Final file is $CALICO_FILE"

# Give a small break to settle - we need calico.vxlan to be created
sleep 10

# This must be removed or the address will be reset
kubectl set env daemonset/calico-node IP- -n kube-system

# Allow pods to recreate
echo "Recreating calico pods..."
sleep 10

# https://youtu.be/noriIzBKYRk?si=mlOC27ntvSEDw_VM&t=299
# These commands need to be done bringing up node
# iptables -I INPUT -p udp --dport 8472 -j ACCEPT
# sysctl -w net.ipv4.conf.all.rp_filter=2
# sysctl -w net.ipv4.conf.default.rp_filter=2
# sysctl -w net.ipv4.conf.eth0.rp_filter=2
# sysctl -w "net.ipv4.conf.vxlan/calico.rp_filter=2"

# This needs to be done after daemonset is patched
# Note that the calico-node has a warning after this, but it won't work if we don't do it
for node in $(kubectl get nodes -o name); do
host_ip="$(kubectl get "${node}" -o jsonpath='{.metadata.labels.usernetes/host-ip}')"
nodename=$(cut -d / -f 2 <<< $node)
calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}'
done

# applies ethtool -K vxlan.calico tx-checksum-ip-generic off
# check with: bridge fdb show dev vxlan.calico should have node address NOT 10.x address
kubectl apply --server-side -f /usernetes/Makefile.d/calico/calico-ethtool.yaml

# These should be run after calico installed
# 1. make sync-external-ip and make install-calico
# the second has a daemonset to apply these commands
# ethtool -K vxlan.calico tx-checksum-ip-generic off
4 changes: 3 additions & 1 deletion Makefile.d/check-preflight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ script_dir="$(dirname "$0")"
detect_engine="${script_dir}"/detect-container-engine.sh
: "${CONTAINER_ENGINE:=$("${detect_engine}" CONTAINER_ENGINE)}"
: "${CONTAINER_ENGINE_TYPE:=$("${detect_engine}" CONTAINER_ENGINE_TYPE)}"
: "${QUICK:=0}"

# Set to 1 since we will do calico by default
: "${QUICK:=1}"
: "${BUSYBOX_IMAGE:=docker.io/library/busybox:latest}"

if [ -z "${CONTAINER_ENGINE}" ] || [ -z "${CONTAINER_ENGINE_TYPE}" ]; then
Expand Down
8 changes: 8 additions & 0 deletions Makefile.d/sync-external-ip.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/bash
set -eu -o pipefail

USE_CALICO="${1:-no}"

for node in $(kubectl get nodes -o name); do
# Set ExternalIP
host_ip="$(kubectl get "${node}" -o jsonpath='{.metadata.labels.usernetes/host-ip}')"
Expand All @@ -16,4 +18,10 @@ for node in $(kubectl get nodes -o name); do
if echo "${taints}" | grep -q node.cloudprovider.kubernetes.io/uninitialized; then
kubectl taint nodes "${node}" node.cloudprovider.kubernetes.io/uninitialized-
fi
if [[ "${USE_CALICO}" == "yes" ]];
then
echo "Changing node patch to use calico"
nodename=$(cut -d / -f 2 <<< $node)
calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}'
fi
done
Loading
Loading