Add locust-based Kubernetes load tests and HPA benchmarks

Par-t · Par-t · commit ffb59cd2cb66 · 2026-03-09T23:56:37.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -35,4 +35,4 @@ outputs/
 
 # Plan (personal notes)
 plan.md
-notes.md
+notes.mdk8s/results/
diff --git a/k8s/load_test.sh b/k8s/load_test.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Kubernetes load test for the recommendation API.
+#
+# Runs two scenarios using Locust and saves CSV results:
+#   Scenario 1 — Baseline:  10 users, 60s  (measure steady-state latency)
+#   Scenario 2 — Stress:    50 users, 120s (trigger HPA, measure degradation)
+#
+# Prerequisites:
+#   - Minikube running: minikube start --driver=docker
+#   - Mounts active:   minikube mount $(pwd)/outputs:/mnt/outputs
+#                      minikube mount $(pwd)/data/processed:/mnt/data/processed
+#   - App deployed:    kubectl apply -f k8s/deployment.yaml -f k8s/service.yaml
+#   - HPA deployed:    kubectl apply -f k8s/hpa.yaml
+#   - metrics-server:  minikube addons enable metrics-server
+#   - locust installed: pip install locust
+#
+# Usage (from project root):
+#   bash k8s/load_test.sh
+
+set -e
+
+RESULTS_DIR="k8s/results"
+mkdir -p "$RESULTS_DIR"
+
+# Get Minikube service URL
+# On macOS with Docker driver, `minikube service --url` blocks (it's a tunnel).
+# Accept the URL as an argument or prompt the user to provide it.
+if [ -n "$1" ]; then
+    SERVICE_URL="$1"
+else
+    echo "Usage: bash k8s/load_test.sh <SERVICE_URL>"
+    echo ""
+    echo "To get the URL, run this in a separate terminal and leave it open:"
+    echo "  minikube service recommendation-api --url"
+    echo ""
+    echo "Then pass the printed URL here, e.g.:"
+    echo "  bash k8s/load_test.sh http://127.0.0.1:12345"
+    exit 1
+fi
+echo "Service URL: $SERVICE_URL"
+echo ""
+
+# Verify service is healthy before running load tests
+echo "Verifying service health..."
+HEALTH=$(curl -s --max-time 5 "$SERVICE_URL/health" 2>/dev/null || echo "")
+if ! echo "$HEALTH" | grep -q '"model_loaded":true'; then
+    echo "ERROR: Service is not healthy. Response: $HEALTH"
+    exit 1
+fi
+echo "Health check passed: $HEALTH"
+echo ""
+
+# ─────────────────────────────────────────────────────────────
+# Scenario 1 — Baseline (10 users, 60s)
+# ─────────────────────────────────────────────────────────────
+echo "========================================"
+echo "Scenario 1: Baseline (10 users, 60s)"
+echo "========================================"
+
+echo "Pod count before:"
+kubectl get pods -l app=recommendation-api --no-headers | wc -l | xargs echo "  Pods:"
+
+locust -f k8s/locustfile.py \
+    --headless \
+    --users 10 \
+    --spawn-rate 2 \
+    --run-time 60s \
+    --host "$SERVICE_URL" \
+    --csv "$RESULTS_DIR/baseline" \
+    --only-summary \
+    2>&1 | tail -20
+
+echo ""
+echo "Baseline results saved to $RESULTS_DIR/baseline_stats.csv"
+echo ""
+
+# Parse and print key metrics
+python3 - "$RESULTS_DIR/baseline_stats.csv" << 'PYEOF'
+import sys, csv
+with open(sys.argv[1]) as f:
+    rows = list(csv.DictReader(f))
+for row in rows:
+    if row.get("Name") == "/recommend":
+        p50  = row.get("50%", "N/A")
+        p95  = row.get("95%", "N/A")
+        rps  = row.get("Requests/s", "N/A")
+        fail = row.get("Failure Count", "0")
+        print(f"  /recommend  p50={p50}ms  p95={p95}ms  RPS={rps}  failures={fail}")
+PYEOF
+
+# ─────────────────────────────────────────────────────────────
+# Scenario 2 — Stress (50 users, 120s)
+# ─────────────────────────────────────────────────────────────
+echo ""
+echo "========================================"
+echo "Scenario 2: Stress (50 users, 120s)"
+echo "========================================"
+
+echo "Pod count before stress test:"
+kubectl get pods -l app=recommendation-api --no-headers | wc -l | xargs echo "  Pods:"
+
+echo "HPA state before stress test:"
+kubectl get hpa recommendation-api --no-headers 2>/dev/null || echo "  HPA not deployed"
+echo ""
+
+locust -f k8s/locustfile.py \
+    --headless \
+    --users 50 \
+    --spawn-rate 5 \
+    --run-time 120s \
+    --host "$SERVICE_URL" \
+    --csv "$RESULTS_DIR/stress" \
+    --only-summary \
+    2>&1 | tail -20
+
+echo ""
+echo "Stress results saved to $RESULTS_DIR/stress_stats.csv"
+echo ""
+
+# Parse and print key metrics
+python3 - "$RESULTS_DIR/stress_stats.csv" << 'PYEOF'
+import sys, csv
+with open(sys.argv[1]) as f:
+    rows = list(csv.DictReader(f))
+for row in rows:
+    if row.get("Name") == "/recommend":
+        p50  = row.get("50%", "N/A")
+        p95  = row.get("95%", "N/A")
+        rps  = row.get("Requests/s", "N/A")
+        fail = row.get("Failure Count", "0")
+        print(f"  /recommend  p50={p50}ms  p95={p95}ms  RPS={rps}  failures={fail}")
+PYEOF
+
+echo "Pod count after stress test (HPA may have scaled up):"
+kubectl get pods -l app=recommendation-api
+echo ""
+echo "HPA state after stress test:"
+kubectl get hpa recommendation-api 2>/dev/null || echo "  HPA not deployed"
+
+echo ""
+echo "========================================"
+echo "Load test complete."
+echo "Results: $RESULTS_DIR/baseline_stats.csv  $RESULTS_DIR/stress_stats.csv"
+echo "========================================"
diff --git a/k8s/locustfile.py b/k8s/locustfile.py
@@ -0,0 +1,73 @@
+"""
+Locust load test for the recommendation API.
+
+Tasks:
+  - GET  /health          (lightweight, ~10% of requests)
+  - POST /recommend       (main workload, ~90% of requests)
+
+The recommend task samples a random valid user_id from the local
+data/processed/user_to_idx.json file, so every request is realistic.
+
+Usage (run from project root):
+    locust -f k8s/locustfile.py --host http://<MINIKUBE_URL>
+
+Or headless (used by load_test.sh):
+    locust -f k8s/locustfile.py --headless -u 10 -r 2 --run-time 60s \
+        --host http://<MINIKUBE_URL> --csv k8s/results/baseline
+"""
+
+import json
+import random
+from pathlib import Path
+
+from locust import HttpUser, between, task
+
+# Load valid user IDs once at module level so all workers share the same list
+_USER_IDS_PATH = Path("data/processed/user_to_idx.json")
+if _USER_IDS_PATH.exists():
+    with open(_USER_IDS_PATH) as f:
+        _USER_IDS = list(json.load(f).keys())
+else:
+    # Fallback: empty list — recommend tasks will be skipped gracefully
+    _USER_IDS = []
+
+
+class RecommendationUser(HttpUser):
+    """
+    Simulates a client calling the recommendation API.
+
+    wait_time: pause 100ms–500ms between requests per user
+    (realistic for a frontend polling for recommendations)
+    """
+
+    wait_time = between(0.1, 0.5)
+
+    @task(1)
+    def health_check(self):
+        """Lightweight health poll — low weight, keeps the ratio realistic."""
+        self.client.get("/health", name="/health")
+
+    @task(9)
+    def get_recommendations(self):
+        """POST /recommend with a random valid user."""
+        if not _USER_IDS:
+            return
+
+        user_id = random.choice(_USER_IDS)
+        payload = {"user_id": user_id, "top_k": 10}
+
+        with self.client.post(
+            "/recommend",
+            json=payload,
+            name="/recommend",
+            catch_response=True,
+        ) as response:
+            if response.status_code == 200:
+                response.success()
+            elif response.status_code == 404:
+                # User not found in this model version — not a failure
+                response.success()
+            else:
+                response.failure(
+                    f"Unexpected status {response.status_code}: {response.text[:100]}"
+                )
diff --git a/requirements.txt b/requirements.txt
@@ -19,3 +19,6 @@ uvicorn>=0.27.0
 # Metrics & baselines
 scipy>=1.11.0
 scikit-learn>=1.4.0
+
+# Load testing
+locust>=2.20.0