diff --git a/.github/workflows/k8s-integration-test.yml b/.github/workflows/k8s-integration-test.yml new file mode 100644 index 0000000..290fa12 --- /dev/null +++ b/.github/workflows/k8s-integration-test.yml @@ -0,0 +1,369 @@ +name: K8s Integration Test + +on: + push: + branches: + - main + - dev + - staging + pull_request: + branches: + - main + - dev + - staging + +jobs: + k8s-e2e-test: + name: E2E Test with Kind + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Create kind cluster + uses: helm/kind-action@v1 + with: + cluster_name: avs-test + config: k8s/kind-config.yaml + + - name: Build local images + run: | + echo "Building router image..." + docker build -f usecases/counter/router/Dockerfile -t commonware-avs-router:ci . + + echo "Building node image..." + docker build -f usecases/counter/node/Dockerfile -t commonware-avs-node:ci . + + - name: Load images into kind + run: | + echo "Loading images into kind cluster..." + kind load docker-image commonware-avs-router:ci --name avs-test + kind load docker-image commonware-avs-node:ci --name avs-test + + echo "Pulling external images and loading into kind..." + docker pull ghcr.io/breadchaincoop/ethereum:dev + docker pull ghcr.io/breadchaincoop/eigenlayer:dev + docker pull ghcr.io/layr-labs/cerberus:0.0.2 + docker pull bitnami/kubectl:1.28 + docker pull busybox:1.36 + + kind load docker-image ghcr.io/breadchaincoop/ethereum:dev --name avs-test + kind load docker-image ghcr.io/breadchaincoop/eigenlayer:dev --name avs-test + kind load docker-image ghcr.io/layr-labs/cerberus:0.0.2 --name avs-test + kind load docker-image bitnami/kubectl:1.28 --name avs-test + kind load docker-image busybox:1.36 --name avs-test + + - name: Apply K8s manifests + run: | + echo "Applying Kubernetes manifests..." + kubectl apply -k k8s/overlays/ci/ + + echo "Waiting for namespace to be ready..." + kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/commonware-avs --timeout=30s + + - name: Wait for ethereum to be ready + run: | + echo "Waiting for ethereum deployment to be ready..." + kubectl wait --for=condition=available deployment/ethereum \ + -n commonware-avs --timeout=120s + + echo "Verifying ethereum pod is running..." + kubectl get pods -n commonware-avs -l app=ethereum + + - name: Wait for eigenlayer-setup job to complete + run: | + echo "Waiting for eigenlayer-setup job to complete..." + timeout=300 + elapsed=0 + + while [ $elapsed -lt $timeout ]; do + STATUS=$(kubectl get job eigenlayer-setup -n commonware-avs -o jsonpath='{.status.succeeded}' 2>/dev/null || echo "0") + if [ "$STATUS" = "1" ]; then + echo "EigenLayer setup job completed successfully!" + break + fi + + # Check for failure + FAILED=$(kubectl get job eigenlayer-setup -n commonware-avs -o jsonpath='{.status.failed}' 2>/dev/null || echo "0") + if [ "$FAILED" != "0" ] && [ "$FAILED" != "" ]; then + echo "EigenLayer setup job failed!" + kubectl logs job/eigenlayer-setup -n commonware-avs --tail=100 + exit 1 + fi + + echo "Waiting for eigenlayer-setup job... ($elapsed/$timeout seconds)" + sleep 10 + elapsed=$((elapsed + 10)) + done + + if [ $elapsed -ge $timeout ]; then + echo "Timeout waiting for eigenlayer-setup job" + kubectl logs job/eigenlayer-setup -n commonware-avs --tail=100 + exit 1 + fi + + - name: Wait for all pods to be ready + run: | + echo "Waiting for signer deployment..." + kubectl wait --for=condition=available deployment/signer \ + -n commonware-avs --timeout=120s + + echo "Waiting for avs-node statefulset pods..." + kubectl wait --for=condition=ready pod -l app=avs-node \ + -n commonware-avs --timeout=180s + + echo "Waiting for router deployment..." + kubectl wait --for=condition=available deployment/router \ + -n commonware-avs --timeout=120s + + echo "All pods ready!" + kubectl get pods -n commonware-avs + + - name: Test counter increment functionality + run: | + echo "Testing counter increment..." + + # Wait for services to be fully ready + sleep 20 + + # Get the counter contract address from the deployment file + # We need to exec into a pod to read the file from the PVC + COUNTER_ADDRESS=$(kubectl exec deployment/router -n commonware-avs -- \ + cat /app/.nodes/avs_deploy.json | jq -r '.addresses.counter' || echo "") + + if [ -z "$COUNTER_ADDRESS" ]; then + echo "Counter contract address not found in deployment file" + kubectl exec deployment/router -n commonware-avs -- cat /app/.nodes/avs_deploy.json + exit 1 + fi + echo "Counter contract address: $COUNTER_ADDRESS" + + # Read the initial counter value from the smart contract + # The function signature for "number()" is 0x8381f58a + INITIAL_COUNT=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc":"2.0", + "method":"eth_call", + "params":[{ + "to":"'$COUNTER_ADDRESS'", + "data":"0x8381f58a" + }, "latest"], + "id":1 + }' | jq -r '.result' | xargs printf "%d\n") + + echo "Initial counter value: $INITIAL_COUNT" + echo "INITIAL_COUNT=$INITIAL_COUNT" >> $GITHUB_ENV + + # Wait for 5 aggregation cycles (30 seconds each by default = 150 seconds) + echo "Waiting for 5 aggregation cycles (150 seconds)..." + sleep 150 + + # Read the final counter value from the smart contract + FINAL_COUNT=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc":"2.0", + "method":"eth_call", + "params":[{ + "to":"'$COUNTER_ADDRESS'", + "data":"0x8381f58a" + }, "latest"], + "id":1 + }' | jq -r '.result' | xargs printf "%d\n") + + echo "Final counter value: $FINAL_COUNT" + + # Verify increment + if [ "$FINAL_COUNT" -gt "$INITIAL_COUNT" ]; then + echo "Counter successfully incremented from $INITIAL_COUNT to $FINAL_COUNT" + # Save the current count for next step + echo "LAST_COUNT=$FINAL_COUNT" >> $GITHUB_ENV + echo "COUNTER_ADDRESS=$COUNTER_ADDRESS" >> $GITHUB_ENV + else + echo "Counter did not increment (still at $FINAL_COUNT)" + echo "=== Recent router logs ===" + kubectl logs deployment/router -n commonware-avs --tail 50 + exit 1 + fi + + - name: Test with fast aggregation frequency + if: github.event_name == 'push' && github.ref == 'refs/heads/dev' + run: | + echo "Testing with fast aggregation frequency (0.5 seconds)..." + + # Update the configmap to set fast aggregation + kubectl patch configmap env-config -n commonware-avs --type merge \ + -p '{"data":{"AGGREGATION_FREQUENCY":"0.5"}}' + + # Restart router to pick up new environment variables + kubectl rollout restart deployment/router -n commonware-avs + kubectl wait --for=condition=available deployment/router \ + -n commonware-avs --timeout=60s + + # Wait for router to be ready + sleep 10 + + # Get the starting counter value from previous step + START_COUNT=${{ env.LAST_COUNT }} + DEFAULT_COUNT=${{ env.LAST_COUNT }} + echo "Starting counter value: $START_COUNT" + echo "DEFAULT_COUNT=$DEFAULT_COUNT" >> $GITHUB_ENV + + # Wait for 1 minute total with fast aggregation (0.5 seconds each cycle) + echo "Waiting for 1 minute with fast aggregation..." + sleep 60 + + # Read the counter value after fast aggregation + FAST_COUNT=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc":"2.0", + "method":"eth_call", + "params":[{ + "to":"'${{ env.COUNTER_ADDRESS }}'", + "data":"0x8381f58a" + }, "latest"], + "id":1 + }' | jq -r '.result' | xargs printf "%d\n") + + echo "Counter value after fast aggregation: $FAST_COUNT" + + # Verify fast aggregation worked (should have multiple increments) + if [ "$FAST_COUNT" -gt "$START_COUNT" ]; then + INCREMENTS=$((FAST_COUNT - START_COUNT)) + echo "Fast aggregation successful: $INCREMENTS increments in ~60 seconds" + echo "LAST_COUNT=$FAST_COUNT" >> $GITHUB_ENV + else + echo "Fast aggregation failed (counter still at $FAST_COUNT)" + kubectl logs deployment/router -n commonware-avs --tail 50 + exit 1 + fi + + - name: Test with ingress enabled + if: github.event_name == 'push' && github.ref == 'refs/heads/dev' + run: | + echo "Testing with ingress enabled..." + + # Update the configmap to enable ingress + kubectl patch configmap env-config -n commonware-avs --type merge \ + -p '{"data":{"INGRESS":"true"}}' + + # Restart router to pick up new environment variables + kubectl rollout restart deployment/router -n commonware-avs + kubectl wait --for=condition=available deployment/router \ + -n commonware-avs --timeout=60s + + # Wait for router to be ready and ingress server to start + sleep 15 + + # Get the starting counter value from previous step + START_COUNT=${{ env.LAST_COUNT }} + echo "Starting counter value: $START_COUNT" + + # Send ingress requests to trigger increments + echo "Sending ingress requests to /trigger endpoint..." + for i in {1..5}; do + echo "=== Sending ingress request $i ===" + # The ingress server runs on port 8080 with /trigger endpoint + RESPONSE=$(curl -s -w "\nHTTP_STATUS:%{http_code}" -X POST http://localhost:8080/trigger \ + -H "Content-Type: application/json" \ + -d '{"body": {"metadata": {"request_id": "'$i'", "action": "increment"}}}') + HTTP_STATUS=$(echo "$RESPONSE" | tail -n 1 | cut -d: -f2) + BODY=$(echo "$RESPONSE" | head -n -1) + echo "Response: $BODY" + echo "HTTP Status: $HTTP_STATUS" + + if [ "$HTTP_STATUS" != "200" ]; then + echo "Warning: HTTP request failed with status $HTTP_STATUS" + fi + sleep 1 + done + + # Wait for aggregation to process the ingress requests + echo "Waiting for aggregation to process ingress requests..." + sleep 15 + + # Read the counter value after ingress + COUNTER_ADDR="${{ env.COUNTER_ADDRESS }}" + COUNTER_RESPONSE=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc":"2.0", + "method":"eth_call", + "params":[{ + "to":"'$COUNTER_ADDR'", + "data":"0x8381f58a" + }, "latest"], + "id":1 + }') + + COUNTER_HEX=$(echo "$COUNTER_RESPONSE" | jq -r '.result') + + # Handle empty or invalid response + if [ -z "$COUNTER_HEX" ] || [ "$COUNTER_HEX" = "null" ] || [ "$COUNTER_HEX" = "0x" ]; then + echo "Warning: Invalid counter response, defaulting to 0" + INGRESS_COUNT=0 + else + INGRESS_COUNT=$(printf "%d\n" "$COUNTER_HEX" 2>/dev/null || echo "0") + fi + + echo "Counter value after ingress: $INGRESS_COUNT" + + # Verify ingress increments worked + if [ "$INGRESS_COUNT" -gt "$START_COUNT" ]; then + INCREMENTS=$((INGRESS_COUNT - START_COUNT)) + echo "Ingress test successful: $INCREMENTS increments after ingress requests" + else + echo "Ingress test failed (counter still at $INGRESS_COUNT)" + kubectl logs deployment/router -n commonware-avs --tail 50 + exit 1 + fi + + # Final summary + echo "=== Test Summary ===" + echo "Initial count: ${{ env.INITIAL_COUNT }}" + echo "After default aggregation (5 cycles @ 30s): ${{ env.DEFAULT_COUNT }}" + echo "After fast aggregation: $FAST_COUNT" + echo "After ingress requests (5 requests): $INGRESS_COUNT" + echo "Total increments: $((INGRESS_COUNT - ${{ env.INITIAL_COUNT }}))" + + - name: Collect logs on failure + if: failure() + run: | + echo "=== Kubernetes Cluster Status ===" + kubectl get all -n commonware-avs + + echo "=== Pod Descriptions ===" + kubectl describe pods -n commonware-avs + + echo "=== Ethereum Logs ===" + kubectl logs deployment/ethereum -n commonware-avs --tail 50 || true + + echo "=== EigenLayer Setup Logs ===" + kubectl logs job/eigenlayer-setup -n commonware-avs --tail 100 || true + + echo "=== Signer Logs ===" + kubectl logs deployment/signer -n commonware-avs --tail 50 || true + + echo "=== Router Logs ===" + kubectl logs deployment/router -n commonware-avs --tail 100 || true + + echo "=== AVS Node Logs ===" + for i in 0 1 2; do + echo "--- avs-node-$i logs ---" + kubectl logs avs-node-$i -n commonware-avs --tail 50 || true + done + + echo "=== Events ===" + kubectl get events -n commonware-avs --sort-by='.lastTimestamp' + + - name: Cleanup + if: always() + run: | + echo "Cleaning up kind cluster..." + kind delete cluster --name avs-test diff --git a/docs/k8s-refactor-spec.md b/docs/k8s-refactor-spec.md new file mode 100644 index 0000000..c4544aa --- /dev/null +++ b/docs/k8s-refactor-spec.md @@ -0,0 +1,688 @@ +# Docker to Kubernetes Refactor Spec + +## 1. Background + +### Problem Statement +The current deployment infrastructure relies on Docker Compose for local development, testing, and CI/CD E2E tests. While functional, Docker Compose has limitations for production readiness, scalability, and operational workflows. Moving to Kubernetes provides: +- Better alignment with production deployment patterns +- Native support for rolling updates, health checks, and self-healing +- Improved resource management and scaling capabilities +- Standard tooling for monitoring, logging, and debugging +- Consistent deployment model across environments + +### Context / History +- Current setup: 6 Docker Compose services (ethereum, eigenlayer, signer, node-1, node-2, node-3, router) +- CI/CD pipelines: `integration-test.yml` and `local-integration-test.yml` for E2E testing +- Multi-architecture Docker images already published to GHCR +- EigenLayer setup container runs as an init-style job (generates keys, deploys contracts) + +### Stakeholders +- Development team (local development workflows) +- DevOps/SRE (production deployment and operations) +- CI/CD systems (automated testing) + +## 2. Motivation + +### Goals & Success Criteria +1. **Production-ready infrastructure**: K8s manifests suitable for staging/production deployment +2. **Preserved E2E test coverage**: All existing CI tests must continue to pass +3. **Local development support**: Easy local K8s setup using kind/minikube +4. **Simplified operations**: Leverage K8s primitives for health checks, restarts, and scaling + +### Success Metrics +- All existing E2E tests pass in the K8s environment +- Counter increment test completes successfully +- Fast aggregation test passes (dev branch) +- Ingress endpoint test passes (dev branch) +- Local development setup documented and functional + +## 3. Scope and Approaches + +### In Scope +| Technical Functionality | Value | Trade-offs | +|------------------------|-------|------------| +| Kubernetes manifests for all services | Production-ready deployment | Additional complexity for simple setups | +| Helm chart for parameterization | Configurable deployments | Learning curve for Helm | +| CI/CD workflow updates | K8s-native E2E testing | More CI setup time | +| ConfigMaps and Secrets | Proper config management | Migration from .env files | +| Init containers for EigenLayer | Proper job sequencing | Init container complexity | + +### Non-Goals / Out of Scope +| Technical Functionality | Reasoning | Trade-offs | +|------------------------|-----------|------------| +| Multi-cluster deployment | Not needed for current scale | Could add later | +| Service mesh (Istio/Linkerd) | Over-engineering for current needs | Future consideration | +| Custom operators | Kubernetes primitives sufficient | Maintenance overhead | +| GitOps setup (ArgoCD/Flux) | Can be added independently | Team preference | + +### Alternative Approaches +| Approach | Pros | Cons | +|----------|------|------| +| Raw K8s manifests | Simple, no additional tooling | Duplication, harder parameterization | +| Helm charts | Parameterization, templating | Additional dependency | +| Kustomize | K8s-native, overlay support | Less powerful templating | +| **Selected: Kustomize + raw manifests** | Best balance of simplicity and flexibility | Limited templating compared to Helm | + +## 4. Step-by-Step Flow + +### 4.1 Main ("Happy") Path - CI E2E Test Flow + +**Pre-condition**: PR opened or push to main/dev/staging + +1. **GitHub Actions** triggers `k8s-integration-test.yml` workflow +2. **System** provisions kind cluster on runner +3. **System** loads locally-built images into kind +4. **System** applies K8s manifests via `kubectl apply -k` +5. **Init Job (eigenlayer-setup)** runs: + - Deploys EigenLayer contracts + - Generates BLS operator keys + - Creates AVS deployment artifacts + - Writes to shared PersistentVolume +6. **System** waits for Job completion (timeout: 300s) +7. **Signer Pod** starts (depends on eigenlayer-setup Job) +8. **Node Pods (1-3)** start (depend on eigenlayer-setup Job) +9. **Router Pod** starts (depends on nodes being ready) +10. **Test runner** executes counter increment verification +11. **System** cleans up cluster + +**Post-condition**: Test passes, PR check shows green + +### 4.2 Alternate / Error Paths + +| # | Condition | System Action | Handling | +|---|-----------|---------------|----------| +| A1 | eigenlayer-setup timeout | Job exceeds 300s | Collect logs, fail CI | +| A2 | Node fails to connect to ethereum | CrashLoopBackOff | Readiness probe fails, restart | +| A3 | Router can't reach nodes | Connection refused | Liveness probe fails, restart | +| A4 | Counter increment fails | Test assertion fails | Collect all logs, fail CI | +| A5 | Kind cluster creation fails | Docker socket issue | Retry or fail with diagnostics | + +## 5. Architecture Diagrams + +### 5.1 Kubernetes Resource Hierarchy + +```mermaid +classDiagram + class Namespace { + +name: commonware-avs + } + class ConfigMap { + +avs-config + +env-config + } + class Secret { + +operator-keys + } + class PersistentVolumeClaim { + +nodes-data + } + class Job { + +eigenlayer-setup + } + class Deployment { + +ethereum + +signer + +router + } + class StatefulSet { + +avs-nodes (replicas: 3) + } + class Service { + +ethereum-svc + +signer-svc + +node-svc (headless) + +router-svc + } + + Namespace --> ConfigMap + Namespace --> Secret + Namespace --> PersistentVolumeClaim + Namespace --> Job + Namespace --> Deployment + Namespace --> StatefulSet + Namespace --> Service + + Job --> PersistentVolumeClaim : writes keys + StatefulSet --> PersistentVolumeClaim : reads keys + Deployment --> ConfigMap : mounts config + StatefulSet --> Secret : mounts operator keys +``` + +### 5.2 Service Communication Flow + +```mermaid +sequenceDiagram + participant CI as GitHub Actions + participant Kind as Kind Cluster + participant ES as eigenlayer-setup Job + participant ETH as ethereum Pod + participant SIG as signer Pod + participant N1 as node-1 Pod + participant N2 as node-2 Pod + participant N3 as node-3 Pod + participant R as router Pod + participant Test as Test Runner + + CI->>Kind: Create cluster + CI->>Kind: Apply manifests + Kind->>ETH: Start ethereum + ETH-->>Kind: Ready + Kind->>ES: Run eigenlayer-setup + ES->>ETH: Deploy contracts + ES->>Kind: Write keys to PVC + ES-->>Kind: Job complete + Kind->>SIG: Start signer + Kind->>N1: Start node-1 + Kind->>N2: Start node-2 + Kind->>N3: Start node-3 + N1-->>Kind: Ready + N2-->>Kind: Ready + N3-->>Kind: Ready + Kind->>R: Start router + R->>N1: Connect gRPC + R->>N2: Connect gRPC + R->>N3: Connect gRPC + R-->>Kind: Ready + CI->>Test: Run E2E tests + Test->>ETH: Read counter (eth_call) + Test-->>CI: Pass/Fail +``` + +### 5.3 Pod Lifecycle State Machine + +```mermaid +stateDiagram-v2 + [*] --> Pending: Pod scheduled + Pending --> InitContainerRunning: Init containers start + InitContainerRunning --> Running: Init success + InitContainerRunning --> Failed: Init failure + Running --> Ready: Readiness probe passes + Ready --> Running: Readiness probe fails + Running --> Failed: Liveness probe fails + Ready --> Terminating: Scale down / delete + Failed --> Pending: Restart policy (backoff) + Terminating --> [*]: Cleanup complete +``` + +## 6. Kubernetes Manifest Structure + +### 6.1 Directory Layout + +``` +k8s/ +├── base/ +│ ├── kustomization.yaml +│ ├── namespace.yaml +│ ├── rbac.yaml +│ ├── configmap.yaml +│ ├── secret.yaml +│ ├── pvc.yaml +│ ├── ethereum/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── eigenlayer/ +│ │ └── job.yaml +│ ├── signer/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── nodes/ +│ │ ├── statefulset.yaml +│ │ └── service.yaml +│ └── router/ +│ ├── deployment.yaml +│ └── service.yaml +└── overlays/ + ├── ci/ + │ ├── kustomization.yaml + │ └── patches/ + │ ├── image-pull-policy.yaml + │ ├── nodeport-services.yaml + │ └── pvc-storage-class.yaml + └── local/ + ├── kustomization.yaml + └── patches/ + └── nodeport-services.yaml +``` + +### 6.2 Key Resource Definitions + +#### Namespace +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: commonware-avs +``` + +#### ConfigMap (avs-config) +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: avs-config + namespace: commonware-avs +data: + config.json: | + { + "quorum": { + "minimumStake": "1", + "maxOperatorCount": 32, + "kickBIPsOfOperatorStake": 10000, + "kickBIPsOfTotalStake": 100 + }, + "metadata": { + "uri": "metadataURI" + }, + "operators": { + "testacc1": {"socketAddress": "avs-node-0.avs-nodes.commonware-avs.svc.cluster.local:3001"}, + "testacc2": {"socketAddress": "avs-node-1.avs-nodes.commonware-avs.svc.cluster.local:3001"}, + "testacc3": {"socketAddress": "avs-node-2.avs-nodes.commonware-avs.svc.cluster.local:3001"} + } + } + public_orchestrator.json: | + + # NOTE: router_orchestrator.json with privateKey is in secret.yaml for security +``` + +#### Secret (router-secret) +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: router-secret + namespace: commonware-avs +type: Opaque +stringData: + # WARNING: This is a TEST-ONLY private key for development/CI environments. + # NEVER use this key in production. + router_orchestrator.json: | + {"privateKey": ""} +``` + +#### PersistentVolumeClaim +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nodes-data + namespace: commonware-avs +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +``` + +#### EigenLayer Setup Job +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: eigenlayer-setup + namespace: commonware-avs +spec: + ttlSecondsAfterFinished: 600 + backoffLimit: 3 + template: + spec: + restartPolicy: OnFailure + initContainers: + - name: wait-for-ethereum + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z ethereum-svc 8545; do sleep 2; done'] + containers: + - name: eigenlayer + image: ghcr.io/breadchaincoop/eigenlayer:dev + envFrom: + - configMapRef: + name: env-config + volumeMounts: + - name: nodes-data + mountPath: /root/.nodes + - name: config + mountPath: /bls-middleware/contracts/docker/eigenlayer/config.json + subPath: config.json + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config +``` + +#### Node StatefulSet +```yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: avs-node + namespace: commonware-avs +spec: + serviceName: avs-nodes + replicas: 3 + selector: + matchLabels: + app: avs-node + template: + metadata: + labels: + app: avs-node + spec: + initContainers: + - name: wait-for-eigenlayer + image: bitnami/kubectl:1.28 + command: + - sh + - -c + - | + until kubectl get job eigenlayer-setup -n commonware-avs \ + -o jsonpath='{.status.succeeded}' | grep -q "1"; do + sleep 5 + done + containers: + - name: node + image: ghcr.io/breadchaincoop/commonware-avs-node:dev + ports: + - containerPort: 3001 + name: grpc + env: + - name: HTTP_RPC + value: "http://ethereum-svc:8545" + - name: WS_RPC + value: "ws://ethereum-svc:8545" + - name: AVS_DEPLOYMENT_PATH + value: "/app/.nodes/avs_deploy.json" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + command: + - sh + - -c + - | + # In K8s, each pod has its own IP, so all nodes can listen on the same port + INDEX=${POD_NAME##*-} + KEY_NUM=$((INDEX + 1)) + KEY_FILE="/app/.nodes/operator_keys/testacc${KEY_NUM}.private.bls.key.json" + /app/commonware-avs-node-app --key-file $KEY_FILE --port 3001 --orchestrator /app/config/public_orchestrator.json + volumeMounts: + - name: nodes-data + mountPath: /app/.nodes + readOnly: true + - name: config + mountPath: /app/config + readinessProbe: + tcpSocket: + port: grpc + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: grpc + initialDelaySeconds: 30 + periodSeconds: 10 + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config +``` + +#### Router Deployment +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: router + namespace: commonware-avs +spec: + replicas: 1 + selector: + matchLabels: + app: router + template: + metadata: + labels: + app: router + spec: + initContainers: + - name: wait-for-nodes + image: busybox:1.36 + command: + - sh + - -c + - | + for i in 0 1 2; do + until nc -z avs-node-$i.avs-nodes.commonware-avs.svc $((3001 + i)); do + sleep 2 + done + done + containers: + - name: router + image: ghcr.io/breadchaincoop/commonware-avs-router:dev + ports: + - containerPort: 3000 + name: app + - containerPort: 8080 + name: ingress + env: + - name: HTTP_RPC + value: "http://ethereum-svc:8545" + - name: WS_RPC + value: "ws://ethereum-svc:8545" + - name: AVS_DEPLOYMENT_PATH + value: "/app/.nodes/avs_deploy.json" + envFrom: + - configMapRef: + name: env-config + command: + - /app/commonware-avs-router-app + - --key-file + - /app/config/router_orchestrator.json + - --port + - "3000" + volumeMounts: + # Security: Only mount the deployment artifact, not operator keys + - name: nodes-data + mountPath: /app/.nodes/avs_deploy.json + subPath: avs_deploy.json + readOnly: true + - name: config + mountPath: /app/config/public_orchestrator.json + subPath: public_orchestrator.json + - name: router-secret + mountPath: /app/config/router_orchestrator.json + subPath: router_orchestrator.json + readOnly: true + readinessProbe: + tcpSocket: + port: app + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: app + initialDelaySeconds: 30 + periodSeconds: 10 + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config + - name: router-secret + secret: + secretName: router-secret +``` + +## 7. CI/CD Workflow Updates + +### 7.1 New Workflow: `k8s-integration-test.yml` + +```yaml +name: K8s Integration Test + +on: + push: + branches: [main, dev, staging] + pull_request: + branches: [main, dev, staging] + +jobs: + k8s-e2e-test: + name: E2E Test with Kind + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Create kind cluster + uses: helm/kind-action@v1 + with: + cluster_name: avs-test + config: k8s/kind-config.yaml + + - name: Build and load images + run: | + docker build -f usecases/counter/router/Dockerfile -t commonware-avs-router:ci . + docker build -f usecases/counter/node/Dockerfile -t commonware-avs-node:ci . + kind load docker-image commonware-avs-router:ci --name avs-test + kind load docker-image commonware-avs-node:ci --name avs-test + + - name: Apply K8s manifests + run: | + kubectl apply -k k8s/overlays/ci/ + + - name: Wait for eigenlayer-setup job + run: | + kubectl wait --for=condition=complete job/eigenlayer-setup \ + -n commonware-avs --timeout=300s + + - name: Wait for pods to be ready + run: | + kubectl wait --for=condition=ready pod -l app=avs-node \ + -n commonware-avs --timeout=120s + kubectl wait --for=condition=ready pod -l app=router \ + -n commonware-avs --timeout=60s + + - name: Port forward ethereum + run: | + kubectl port-forward svc/ethereum-svc 8545:8545 \ + -n commonware-avs & + sleep 5 + + - name: Test counter increment + run: | + # Same test logic as current integration-test.yml + # Read counter, wait, verify increment + + - name: Collect logs on failure + if: failure() + run: | + kubectl logs -l app=router -n commonware-avs --tail=100 + kubectl logs -l app=avs-node -n commonware-avs --tail=100 + kubectl describe pods -n commonware-avs + + - name: Cleanup + if: always() + run: | + kind delete cluster --name avs-test +``` + +### 7.2 Kind Cluster Configuration + +```yaml +# k8s/kind-config.yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + extraPortMappings: + - containerPort: 30545 + hostPort: 8545 + protocol: TCP + - containerPort: 30080 + hostPort: 8080 + protocol: TCP +``` + +## 8. Edge Cases and Concessions + +### 8.1 Init Container vs Job Pattern +- **Decision**: Use a Job for eigenlayer-setup rather than init containers on every pod +- **Rationale**: The setup only needs to run once; using init containers would cause race conditions +- **Trade-off**: Requires pods to check Job completion status + +### 8.2 StatefulSet vs Deployment for Nodes +- **Decision**: Use StatefulSet for nodes +- **Rationale**: Provides stable network identities (node-0, node-1, node-2) and ordered startup +- **Trade-off**: More complex than Deployment, but necessary for stable addressing + +### 8.3 PVC Access Mode +- **Decision**: Use `ReadWriteOnce` for CI (single-node kind) +- **Limitation**: Won't work for multi-node production clusters +- **Mitigation**: Production overlay can switch to `ReadWriteMany` or node-local storage + +### 8.4 Image Pull Policy +- **CI**: Use `Never` (images loaded directly into kind) +- **Production**: Use `IfNotPresent` or `Always` with proper image tags + +### 8.5 Signer Service +- **Decision**: Deploy as a single-replica Deployment +- **Note**: The Cerberus signer is stateless and doesn't require special handling + +## 9. Migration Path + +### Phase 1: Parallel Implementation +1. Create K8s manifests alongside existing Docker Compose +2. Add new CI workflow without removing existing ones +3. Verify feature parity in E2E tests + +### Phase 2: Validation +1. Run both CI pipelines in parallel +2. Compare results and timing +3. Address any discrepancies + +### Phase 3: Switchover +1. Make K8s the primary CI pipeline +2. Deprecate Docker Compose CI workflow +3. Update documentation + +### Phase 4: Cleanup +1. Remove redundant Docker Compose CI workflow +2. Keep docker-compose.yml for simple local development (optional) + +## 10. Open Questions + +1. **Production storage backend**: What storage class should be used in production? (e.g., EBS, GCE PD, local-path) +2. **Secrets management**: Should we integrate with external secrets (Vault, AWS Secrets Manager)? +3. **Monitoring**: Do we want to include Prometheus ServiceMonitor resources? +4. **Network policies**: Should we restrict pod-to-pod communication? +5. **Resource limits**: What are appropriate CPU/memory limits for each service? + +## 11. Glossary / References + +| Term | Definition | +|------|------------| +| Kind | Kubernetes IN Docker - tool for running local K8s clusters | +| StatefulSet | K8s workload for stateful applications with stable identities | +| Kustomize | K8s-native configuration management tool | +| PVC | PersistentVolumeClaim - request for storage | +| Init Container | Container that runs before app containers start | +| Headless Service | Service without ClusterIP for direct pod addressing | + +### Links +- [Docker Compose file](../docker-compose.yml) +- [Current integration test](.github/workflows/integration-test.yml) +- [Kind documentation](https://kind.sigs.k8s.io/) +- [Kustomize documentation](https://kustomize.io/) +- [Kubernetes StatefulSet concepts](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) diff --git a/k8s/base/configmap.yaml b/k8s/base/configmap.yaml new file mode 100644 index 0000000..969f2d8 --- /dev/null +++ b/k8s/base/configmap.yaml @@ -0,0 +1,53 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: avs-config + namespace: commonware-avs +data: + config.json: | + { + "quorum": { + "minimumStake": "1", + "maxOperatorCount": 32, + "kickBIPsOfOperatorStake": 10000, + "kickBIPsOfTotalStake": 100 + }, + "metadata": { + "uri": "metadataURI" + }, + "operators": { + "testacc1": { + "socketAddress": "avs-node-0.avs-nodes.commonware-avs.svc.cluster.local:3001" + }, + "testacc2": { + "socketAddress": "avs-node-1.avs-nodes.commonware-avs.svc.cluster.local:3001" + }, + "testacc3": { + "socketAddress": "avs-node-2.avs-nodes.commonware-avs.svc.cluster.local:3001" + } + } + } + public_orchestrator.json: | + { + "g2_x1": "20265730220917057623326116620721648047640065506233168445998945605458084341755", + "g2_x2": "1537141129484558011683382469842956131676085503509229854572844956364492197092", + "g2_y1": "4380068110839997539835821427545270098552639074995346826656804866303457881635", + "g2_y2": "479676018937294309080674601592141614301396550682703157902264620243097107417", + "port": "3000", + "address": "router" + } + # NOTE: router_orchestrator.json with privateKey is in secret.yaml for security +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: env-config + namespace: commonware-avs +data: + ENVIRONMENT: "LOCAL" + CERBERUS_GRPC_PORT: "50051" + CERBERUS_METRICS_PORT: "9081" + SIGNER_ENDPOINT: "http://signer-svc:50051" + TEST_ACCOUNTS: "3" + INGRESS: "false" + AGGREGATION_FREQUENCY: "30" diff --git a/k8s/base/eigenlayer/job.yaml b/k8s/base/eigenlayer/job.yaml new file mode 100644 index 0000000..26e16fb --- /dev/null +++ b/k8s/base/eigenlayer/job.yaml @@ -0,0 +1,60 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: eigenlayer-setup + namespace: commonware-avs + labels: + app: eigenlayer-setup +spec: + ttlSecondsAfterFinished: 3600 + backoffLimit: 3 + template: + metadata: + labels: + app: eigenlayer-setup + spec: + restartPolicy: OnFailure + initContainers: + - name: wait-for-ethereum + image: busybox:1.36 + command: + - sh + - -c + - | + echo "Waiting for ethereum to be ready..." + until nc -z ethereum-svc 8545; do + echo "Waiting for ethereum-svc:8545..." + sleep 2 + done + echo "Ethereum is ready!" + containers: + - name: eigenlayer + image: ghcr.io/breadchaincoop/eigenlayer:dev + envFrom: + - configMapRef: + name: env-config + env: + - name: RPC_URL + value: "http://ethereum-svc:8545" + - name: FORK_URL + value: "https://ethereum-holesky.publicnode.com" + volumeMounts: + - name: nodes-data + mountPath: /root/.nodes + - name: config + mountPath: /bls-middleware/contracts/docker/eigenlayer/config.json + subPath: config.json + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "2Gi" + cpu: "1000m" + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config diff --git a/k8s/base/ethereum/deployment.yaml b/k8s/base/ethereum/deployment.yaml new file mode 100644 index 0000000..7606ba0 --- /dev/null +++ b/k8s/base/ethereum/deployment.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ethereum + namespace: commonware-avs + labels: + app: ethereum +spec: + replicas: 1 + selector: + matchLabels: + app: ethereum + template: + metadata: + labels: + app: ethereum + spec: + containers: + - name: ethereum + image: ghcr.io/breadchaincoop/ethereum:dev + ports: + - containerPort: 8545 + name: rpc + envFrom: + - configMapRef: + name: env-config + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "2Gi" + cpu: "1000m" + readinessProbe: + tcpSocket: + port: 8545 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: 8545 + initialDelaySeconds: 10 + periodSeconds: 10 diff --git a/k8s/base/ethereum/service.yaml b/k8s/base/ethereum/service.yaml new file mode 100644 index 0000000..24df9f6 --- /dev/null +++ b/k8s/base/ethereum/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: ethereum-svc + namespace: commonware-avs + labels: + app: ethereum +spec: + selector: + app: ethereum + ports: + - name: rpc + port: 8545 + targetPort: 8545 + type: ClusterIP diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml new file mode 100644 index 0000000..5794b8d --- /dev/null +++ b/k8s/base/kustomization.yaml @@ -0,0 +1,23 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: commonware-avs + +resources: + - namespace.yaml + - rbac.yaml + - configmap.yaml + - secret.yaml + - pvc.yaml + - ethereum/deployment.yaml + - ethereum/service.yaml + - eigenlayer/job.yaml + - signer/deployment.yaml + - signer/service.yaml + - nodes/statefulset.yaml + - nodes/service.yaml + - router/deployment.yaml + - router/service.yaml + +commonLabels: + app.kubernetes.io/part-of: commonware-avs diff --git a/k8s/base/namespace.yaml b/k8s/base/namespace.yaml new file mode 100644 index 0000000..481a20a --- /dev/null +++ b/k8s/base/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: commonware-avs + labels: + app.kubernetes.io/name: commonware-avs + app.kubernetes.io/part-of: commonware-avs diff --git a/k8s/base/nodes/service.yaml b/k8s/base/nodes/service.yaml new file mode 100644 index 0000000..5a3e8ce --- /dev/null +++ b/k8s/base/nodes/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: avs-nodes + namespace: commonware-avs + labels: + app: avs-node +spec: + selector: + app: avs-node + clusterIP: None + ports: + # Headless service for StatefulSet - all pods listen on port 3001 + # Access via: avs-node-{0,1,2}.avs-nodes.commonware-avs.svc.cluster.local:3001 + - name: grpc + port: 3001 + targetPort: 3001 diff --git a/k8s/base/nodes/statefulset.yaml b/k8s/base/nodes/statefulset.yaml new file mode 100644 index 0000000..46337bd --- /dev/null +++ b/k8s/base/nodes/statefulset.yaml @@ -0,0 +1,105 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: avs-node + namespace: commonware-avs + labels: + app: avs-node +spec: + serviceName: avs-nodes + replicas: 3 + podManagementPolicy: Parallel + selector: + matchLabels: + app: avs-node + template: + metadata: + labels: + app: avs-node + spec: + initContainers: + - name: wait-for-eigenlayer + image: bitnami/kubectl:1.28 + command: + - sh + - -c + - | + echo "Waiting for eigenlayer-setup job to complete..." + until kubectl get job eigenlayer-setup -n commonware-avs -o jsonpath='{.status.succeeded}' 2>/dev/null | grep -q "1"; do + echo "Waiting for eigenlayer-setup job..." + sleep 5 + done + echo "EigenLayer setup complete!" + - name: wait-for-ethereum + image: busybox:1.36 + command: + - sh + - -c + - | + echo "Waiting for ethereum to be ready..." + until nc -z ethereum-svc 8545; do + echo "Waiting for ethereum-svc:8545..." + sleep 2 + done + echo "Ethereum is ready!" + containers: + - name: node + image: ghcr.io/breadchaincoop/commonware-avs-node:dev + ports: + - containerPort: 3001 + name: grpc + env: + - name: HTTP_RPC + value: "http://ethereum-svc:8545" + - name: WS_RPC + value: "ws://ethereum-svc:8545" + - name: AVS_DEPLOYMENT_PATH + value: "/app/.nodes/avs_deploy.json" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + envFrom: + - configMapRef: + name: env-config + command: + - sh + - -c + - | + # In K8s, each pod has its own IP, so all nodes can listen on the same port + INDEX=${POD_NAME##*-} + KEY_NUM=$((INDEX + 1)) + KEY_FILE="/app/.nodes/operator_keys/testacc${KEY_NUM}.private.bls.key.json" + echo "Starting node with index=$INDEX, port=3001, key=$KEY_FILE" + /app/commonware-avs-node-app --key-file "$KEY_FILE" --port "3001" --orchestrator /app/config/public_orchestrator.json + volumeMounts: + - name: nodes-data + mountPath: /app/.nodes + readOnly: true + - name: config + mountPath: /app/config + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "500m" + readinessProbe: + tcpSocket: + port: 3001 + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: 3001 + initialDelaySeconds: 30 + periodSeconds: 10 + serviceAccountName: avs-service-account + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config diff --git a/k8s/base/pvc.yaml b/k8s/base/pvc.yaml new file mode 100644 index 0000000..e0c4278 --- /dev/null +++ b/k8s/base/pvc.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nodes-data + namespace: commonware-avs +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + # storageClassName intentionally omitted to use cluster default + # CI overlay patches this to "standard" for Kind compatibility diff --git a/k8s/base/rbac.yaml b/k8s/base/rbac.yaml new file mode 100644 index 0000000..79d3cad --- /dev/null +++ b/k8s/base/rbac.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: avs-service-account + namespace: commonware-avs +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: job-reader + namespace: commonware-avs +rules: + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: job-reader-binding + namespace: commonware-avs +subjects: + - kind: ServiceAccount + name: avs-service-account + namespace: commonware-avs +roleRef: + kind: Role + name: job-reader + apiGroup: rbac.authorization.k8s.io diff --git a/k8s/base/router/deployment.yaml b/k8s/base/router/deployment.yaml new file mode 100644 index 0000000..79a99e8 --- /dev/null +++ b/k8s/base/router/deployment.yaml @@ -0,0 +1,99 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: router + namespace: commonware-avs + labels: + app: router +spec: + replicas: 1 + selector: + matchLabels: + app: router + template: + metadata: + labels: + app: router + spec: + initContainers: + - name: wait-for-nodes + image: busybox:1.36 + command: + - sh + - -c + - | + echo "Waiting for all AVS nodes to be ready..." + # All nodes listen on port 3001 (each pod has its own IP in K8s) + for i in 0 1 2; do + NODE_HOST="avs-node-$i.avs-nodes.commonware-avs.svc.cluster.local" + echo "Waiting for $NODE_HOST:3001..." + until nc -z "$NODE_HOST" 3001; do + sleep 2 + done + echo "$NODE_HOST:3001 is ready!" + done + echo "All nodes are ready!" + containers: + - name: router + image: ghcr.io/breadchaincoop/commonware-avs-router:dev + ports: + - containerPort: 3000 + name: app + - containerPort: 8080 + name: ingress + env: + - name: HTTP_RPC + value: "http://ethereum-svc:8545" + - name: WS_RPC + value: "ws://ethereum-svc:8545" + - name: AVS_DEPLOYMENT_PATH + value: "/app/.nodes/avs_deploy.json" + envFrom: + - configMapRef: + name: env-config + command: + - /app/commonware-avs-router-app + - --key-file + - /app/config/router_orchestrator.json + - --port + - "3000" + volumeMounts: + # Security: Only mount the deployment artifact, not operator keys + - name: nodes-data + mountPath: /app/.nodes/avs_deploy.json + subPath: avs_deploy.json + readOnly: true + - name: config + mountPath: /app/config/public_orchestrator.json + subPath: public_orchestrator.json + - name: router-secret + mountPath: /app/config/router_orchestrator.json + subPath: router_orchestrator.json + readOnly: true + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "500m" + readinessProbe: + tcpSocket: + port: 3000 + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 10 + volumes: + - name: nodes-data + persistentVolumeClaim: + claimName: nodes-data + - name: config + configMap: + name: avs-config + - name: router-secret + secret: + secretName: router-secret diff --git a/k8s/base/router/service.yaml b/k8s/base/router/service.yaml new file mode 100644 index 0000000..d6b4192 --- /dev/null +++ b/k8s/base/router/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: router-svc + namespace: commonware-avs + labels: + app: router +spec: + selector: + app: router + ports: + - name: app + port: 3000 + targetPort: 3000 + - name: ingress + port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/k8s/base/secret.yaml b/k8s/base/secret.yaml new file mode 100644 index 0000000..bbd455c --- /dev/null +++ b/k8s/base/secret.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Secret +metadata: + name: router-secret + namespace: commonware-avs + labels: + app: router +type: Opaque +stringData: + # WARNING: This is a TEST-ONLY private key for development/CI environments. + # NEVER use this key in production. Generate a new key for production deployments. + router_orchestrator.json: | + { + "privateKey": "21747297277459394737337554964532622739983285244437107194284923826011566394548" + } diff --git a/k8s/base/signer/deployment.yaml b/k8s/base/signer/deployment.yaml new file mode 100644 index 0000000..af20282 --- /dev/null +++ b/k8s/base/signer/deployment.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: signer + namespace: commonware-avs + labels: + app: signer +spec: + replicas: 1 + selector: + matchLabels: + app: signer + template: + metadata: + labels: + app: signer + spec: + initContainers: + - name: wait-for-eigenlayer + image: bitnami/kubectl:1.28 + command: + - sh + - -c + - | + echo "Waiting for eigenlayer-setup job to complete..." + until kubectl get job eigenlayer-setup -n commonware-avs -o jsonpath='{.status.succeeded}' 2>/dev/null | grep -q "1"; do + echo "Waiting for eigenlayer-setup job..." + sleep 5 + done + echo "EigenLayer setup complete!" + containers: + - name: signer + image: ghcr.io/layr-labs/cerberus:0.0.2 + ports: + - containerPort: 50051 + name: grpc + - containerPort: 9081 + name: metrics + env: + - name: METRICS_PORT + value: "9081" + envFrom: + - configMapRef: + name: env-config + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + readinessProbe: + tcpSocket: + port: 50051 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + tcpSocket: + port: 50051 + initialDelaySeconds: 10 + periodSeconds: 10 + serviceAccountName: avs-service-account diff --git a/k8s/base/signer/service.yaml b/k8s/base/signer/service.yaml new file mode 100644 index 0000000..b29911d --- /dev/null +++ b/k8s/base/signer/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: signer-svc + namespace: commonware-avs + labels: + app: signer +spec: + selector: + app: signer + ports: + - name: grpc + port: 50051 + targetPort: 50051 + - name: metrics + port: 9081 + targetPort: 9081 + type: ClusterIP diff --git a/k8s/kind-config.yaml b/k8s/kind-config.yaml new file mode 100644 index 0000000..1baa358 --- /dev/null +++ b/k8s/kind-config.yaml @@ -0,0 +1,24 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: avs-test +nodes: + - role: control-plane + extraPortMappings: + # Ethereum RPC - used by CI tests for counter verification + - containerPort: 30545 + hostPort: 8545 + protocol: TCP + # Router app - for local development (not used in CI) + - containerPort: 30000 + hostPort: 4000 + protocol: TCP + # Router ingress - used by CI tests for ingress endpoint testing + - containerPort: 30080 + hostPort: 8080 + protocol: TCP + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" diff --git a/k8s/overlays/ci/kustomization.yaml b/k8s/overlays/ci/kustomization.yaml new file mode 100644 index 0000000..84602d0 --- /dev/null +++ b/k8s/overlays/ci/kustomization.yaml @@ -0,0 +1,20 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: commonware-avs + +resources: + - ../../base + +patches: + - path: patches/image-pull-policy.yaml + - path: patches/nodeport-services.yaml + - path: patches/pvc-storage-class.yaml + +images: + - name: ghcr.io/breadchaincoop/commonware-avs-router + newName: commonware-avs-router + newTag: ci + - name: ghcr.io/breadchaincoop/commonware-avs-node + newName: commonware-avs-node + newTag: ci diff --git a/k8s/overlays/ci/patches/image-pull-policy.yaml b/k8s/overlays/ci/patches/image-pull-policy.yaml new file mode 100644 index 0000000..4ca003b --- /dev/null +++ b/k8s/overlays/ci/patches/image-pull-policy.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ethereum + namespace: commonware-avs +spec: + template: + spec: + containers: + - name: ethereum + imagePullPolicy: IfNotPresent +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: signer + namespace: commonware-avs +spec: + template: + spec: + containers: + - name: signer + imagePullPolicy: IfNotPresent +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: router + namespace: commonware-avs +spec: + template: + spec: + containers: + - name: router + imagePullPolicy: Never +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: avs-node + namespace: commonware-avs +spec: + template: + spec: + containers: + - name: node + imagePullPolicy: Never +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: eigenlayer-setup + namespace: commonware-avs +spec: + template: + spec: + containers: + - name: eigenlayer + imagePullPolicy: IfNotPresent diff --git a/k8s/overlays/ci/patches/nodeport-services.yaml b/k8s/overlays/ci/patches/nodeport-services.yaml new file mode 100644 index 0000000..31d5834 --- /dev/null +++ b/k8s/overlays/ci/patches/nodeport-services.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Service +metadata: + name: ethereum-svc + namespace: commonware-avs +spec: + type: NodePort + ports: + - name: rpc + port: 8545 + targetPort: 8545 + nodePort: 30545 +--- +apiVersion: v1 +kind: Service +metadata: + name: router-svc + namespace: commonware-avs +spec: + type: NodePort + ports: + - name: app + port: 3000 + targetPort: 3000 + nodePort: 30000 + - name: ingress + port: 8080 + targetPort: 8080 + nodePort: 30080 diff --git a/k8s/overlays/ci/patches/pvc-storage-class.yaml b/k8s/overlays/ci/patches/pvc-storage-class.yaml new file mode 100644 index 0000000..a275d80 --- /dev/null +++ b/k8s/overlays/ci/patches/pvc-storage-class.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nodes-data + namespace: commonware-avs +spec: + storageClassName: standard diff --git a/k8s/overlays/local/kustomization.yaml b/k8s/overlays/local/kustomization.yaml new file mode 100644 index 0000000..5b1ddca --- /dev/null +++ b/k8s/overlays/local/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: commonware-avs + +resources: + - ../../base + +patches: + - path: patches/nodeport-services.yaml diff --git a/k8s/overlays/local/patches/nodeport-services.yaml b/k8s/overlays/local/patches/nodeport-services.yaml new file mode 100644 index 0000000..31d5834 --- /dev/null +++ b/k8s/overlays/local/patches/nodeport-services.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Service +metadata: + name: ethereum-svc + namespace: commonware-avs +spec: + type: NodePort + ports: + - name: rpc + port: 8545 + targetPort: 8545 + nodePort: 30545 +--- +apiVersion: v1 +kind: Service +metadata: + name: router-svc + namespace: commonware-avs +spec: + type: NodePort + ports: + - name: app + port: 3000 + targetPort: 3000 + nodePort: 30000 + - name: ingress + port: 8080 + targetPort: 8080 + nodePort: 30080