Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,23 @@ RUN arch="$(uname -m | sed -e s/x86_64/amd64/ -e s/aarch64/arm64/)" && \
RUN apt-get update && apt-get install -y --no-install-recommends \
gettext-base \
moreutils \
socat
socat ipset wget
ADD Dockerfile.d/etc_udev_rules.d_90-flannel.rules /etc/udev/rules.d/90-flannel.rules
ADD Dockerfile.d/etc_udev_rules.d_95-calico.rules /etc/udev/rules.d/95-calico.rules
ADD Dockerfile.d/u7s-entrypoint.sh /
# Calico
ENV FELIX_IGNORELOOSERPF=true
RUN wget https://github.com/projectcalico/calico/releases/download/v3.30.5/calicoctl-linux-amd64 -O /tmp/calicoctl && \
chmod +x /tmp/calicoctl && mv /tmp/calicoctl /usr/local/bin

# - Add calicoctl command for IPs in sync-external-ip
# - nodename=$(cut -d / -f 2 <<< $node)
# - calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec":
# {"bgp":{"ipv4Address": "'"$host_ip"'"}}}'
# - https://docs.tigera.io/calico/latest/networking/ipam/ip-autodetection#manually-
# configure-ip-address-and-subnet-for-a-node
# - Docker-compose.yaml
# - Add port entry for 4789 (VXLAN port used by calico)
# - Dockerfile.d/etc_udev_rules.d_90-flannel.rules
# - Replace flannel.1 by vxlan.calico
ENTRYPOINT ["/u7s-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init"]
1 change: 1 addition & 0 deletions Dockerfile.d/etc_udev_rules.d_95-calico.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SUBSYSTEM=="net", ACTION=="add|change|move", ENV{INTERFACE}=="vxlan.calico", RUN+="/usr/sbin/ethtool -K vxlan.calico tx-checksum-ip-generic off"
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ kubeadm-join:
kubeadm-reset:
$(NODE_SHELL) kubeadm reset --force

.PHONY: install-calico
install-calico:
# Requires server side due to larger manifests
$(NODE_SHELL) kubectl apply --server-side -f /usernetes/service/calico/calico-vxlan.yaml
$(NODE_SHELL) /usernetes/Makefile.d/install-calico.sh
# applies ethtool -K vxlan.calico tx-checksum-ip-generic off
# check with: bridge fdb show dev vxlan.calico should have node address NOT 10.x address
$(NODE_SHELL) kubectl apply --server-side -f /usernetes/service/calico/calico-ethtool.yaml

.PHONY: install-flannel
install-flannel:
# Kubernetes 1.30.x removed the check for br_netfilter from kubeadm.
Expand Down
20 changes: 20 additions & 0 deletions Makefile.d/install-calico.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# These commands need to be done bringing up node
# iptables -I INPUT -p udp --dport 8472 -j ACCEPT
# sysctl -w net.ipv4.conf.all.rp_filter=2
# sysctl -w net.ipv4.conf.default.rp_filter=2
# sysctl -w net.ipv4.conf.eth0.rp_filter=2
# sysctl -w "net.ipv4.conf.vxlan/calico.rp_filter=2"

# This needs to be done after daemonset is patched
for node in $(kubectl get nodes -o name); do
host_ip="$(kubectl get "${node}" -o jsonpath='{.metadata.labels.usernetes/host-ip}')"
nodename=$(cut -d / -f 2 <<< $node)
calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}'
done

# These should be run after calico installed
# 1. make sync-external-ip and make install-calico
# the second has a daemonset to apply these commands
# ethtool -K vxlan.calico tx-checksum-ip-generic off
12 changes: 12 additions & 0 deletions Makefile.d/sync-external-ip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,16 @@ for node in $(kubectl get nodes -o name); do
if echo "${taints}" | grep -q node.cloudprovider.kubernetes.io/uninitialized; then
kubectl taint nodes "${node}" node.cloudprovider.kubernetes.io/uninitialized-
fi
nodename=$(cut -d / -f 2 <<< $node)
calicoctl --allow-version-mismatch patch node ${nodename} --patch='{"spec": {"bgp":{"ipv4Address": "'"$host_ip"'"}}}'

iptables -I INPUT -p udp --dport 8472 -j ACCEPT
sysctl -w net.ipv4.conf.all.rp_filter=2
sysctl -w net.ipv4.conf.default.rp_filter=2
sysctl -w net.ipv4.conf.eth0.rp_filter=2
# These should be run after calico installed
# 1. make sync-external-ip and install calico
# 2. then these commands
# ethtool -K vxlan.calico tx-checksum-ip-generic off
# sysctl -w "net.ipv4.conf.vxlan/calico.rp_filter=2"
done
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ but Usernetes (Gen 2) supports creating a cluster with multiple hosts.
- CRI: containerd
- OCI: runc
- CNI: Flannel
- CNI: Calico

## Requirements

Expand Down Expand Up @@ -72,7 +73,8 @@ EOF
sudo systemctl restart systemd-modules-load.service
```

- sysctl:
- sysctl (should not be required for calico, but needs testing)

```
sudo tee /etc/sysctl.d/99-usernetes.conf <<EOF >/dev/null
net.ipv4.conf.default.rp_filter = 2
Expand Down Expand Up @@ -110,6 +112,9 @@ See `make help`.
make up
make kubeadm-init
make install-flannel
# or
# make install-calico
kubectl -n kube-system set env daemonset/calico-node FELIX_IGNORELOOSERPF=true

# Enable kubectl
make kubeconfig
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ services:
ipv4_address: ${NODE_IP}
ports:
# <host>:<container>
# Calico
- 5473:5473
# etcd (default: 2379)
- ${PORT_ETCD}:${PORT_ETCD}
# kube-apiserver (default: 6443)
Expand Down
46 changes: 39 additions & 7 deletions service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ flux alloc --bg -N2 -q pbatch -t 8h
### Control Plane

```bash
ssh corona189
# For the control plane - start
ssh <allocated node>
# remove any old log
rm -rf /usr/workspace/usernetes/control-plane.log
systemctl --user start usernetes-control-plane
systemctl --user status usernetes-control-plane
systemctl --user start usernetes-control-plane-calico
systemctl --user status usernetes-control-plane-calico
# check log in /usr/workspace/usernetes/control-plane.log
```

Expand All @@ -32,15 +31,20 @@ Importantly, in the above you need a podman-compose that has the line to add a l
```bash
ssh corona190
rm -rf /usr/workspace/usernetes/worker.log
systemctl --user start usernetes-worker
systemctl --user status usernetes-worker
systemctl --user start usernetes-worker-calico
systemctl --user status usernetes-worker-calico
# check log in /usr/workspace/usernetes/worker.log
```

Back on the control plane (if everything looks good) we can go to the copied control plane directory, source a file to get kubectl and the correct paths, and see our cluster.

```bash
. source_env.sh
make sync-external-ip
make install-calico
# Unset daemonset variable for automatic ip
kubectl set env daemonset/calico-node IP- -n kube-system
# Do we need to then set the
```
```console
[sochat1@corona190:service]$ kubectl get nodes
Expand All @@ -49,11 +53,24 @@ u7s-corona190 NotReady control-plane 3m20s v1.30.0
u7s-corona196 NotReady <none> 1m3s v1.30.0
```

In u7s this should be same as host:

```
bridge fdb show dev vxlan.calico
```
```console
# "that address"
66:63:44:f3:b6:76 dst 192.168.128.222 self permanent
```

The problem is that address needs to be the host ip, NOT the container interface (10.100). The environment variable in the calico node for "IP" being set to "autodetect" needs to be removed via the daemonset. So next time, automate that and remove it, and then test the setup with DNS names, and then test the setup with Flux Operator.

Importantly, the ips need to be sync'd (and an annotation added for flannel) after nodes are up. They will all be `NotReady`.

```bash
make sync-external-ip
make install-flannel
# or (needs testing on >1 node)
kubectl apply -f service/calico/calico-vxlan.yaml
```
```console
[sochat1@corona190:service]$ kubectl get nodes
Expand Down Expand Up @@ -111,4 +128,19 @@ export OMPI_MCA_osc=ucx
flux run -N2 -n96 lmp -v x 8 -v y 8 -v z 8 -in in.reaxc.hns -nocite
```

### GPUs

You can install the [ROCm/k8s-device-plugin](https://github.com/ROCm/k8s-device-plugin) to expose GPU devices to your pods.

```bash
# Install the driver plugin
kubectl create -f https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml

# Create a test workflow that uses GPU (takes a bit to pull)
https://raw.githubusercontent.com/ROCm/k8s-device-plugin/763445e18f3838fa72b22e31a04ec25987334bff/example/pod/pytorch-non-privileged.yaml

# Get logs (it takes a while to pull...)
kubectl logs alexnet-tf-gpu-pod alexnet-tf-gpu-container
```

Our final experiments will be done separately, and these notes likely cleaned up.
42 changes: 42 additions & 0 deletions service/calico/calico-ethtool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: calico-checksum-fix
namespace: kube-system
labels:
k8s-app: calico-checksum-fix
spec:
selector:
matchLabels:
name: calico-checksum-fix
template:
metadata:
labels:
name: calico-checksum-fix
spec:
hostNetwork: true
hostPID: true
securityContext:
runAsUser: 0
initContainers:
- name: fix-checksum
image: ghcr.io/converged-computing/usernetes:alpine
# image: alpine:latest
command: ["/bin/sh", "-c"]
args:
- |
# nsenter -t 1 enters the init process's namespace (of the host)
# check if the interface exists before running ethtool
if [ -d /sys/class/net/vxlan.calico ]; then
echo "Applying ethtool fix to vxlan.calico..."
nsenter -t 1 -n -u -i -m -- ethtool -K vxlan.calico tx-checksum-ip-generic off
else
echo "vxlan.calico interface not found, skipping."
fi
securityContext:
privileged: true
containers:
- name: pause
# image: registry.k8s.io/pause:3.9
image: ghcr.io/converged-computing/usernetes:pause
terminationGracePeriodSeconds: 0
Loading