diff --git a/.github/helm-e2e/action.yaml b/.github/helm-e2e/action.yaml
new file mode 100644
index 00000000..982e1291
--- /dev/null
+++ b/.github/helm-e2e/action.yaml
@@ -0,0 +1,18 @@
+---
+name: Helm E2E Test
+description: Tests Helm chart installation and operator deployment
+inputs:
+ version:
+ description: Operator version to install
+ required: true
+runs:
+ using: composite
+ steps:
+ - name: Run Helm E2E tests
+ shell: bash
+ run: |
+ ./tests/helm.sh \
+ --running-on-vm \
+ --version=${{ inputs.version }}
+ env:
+ VERSION: ${{ inputs.version }}
diff --git a/.github/workflows/pr-checks.yaml b/.github/workflows/pr-checks.yaml
index ee4c3f11..9cb5ea38 100644
--- a/.github/workflows/pr-checks.yaml
+++ b/.github/workflows/pr-checks.yaml
@@ -386,3 +386,62 @@ jobs:
with:
name: cluster-state
path: cluster-state
+
+ helm-validate:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v4
+
+ - name: Setup Go
+ uses: actions/setup-go@v5
+ with:
+ go-version-file: go.mod
+ cache: false
+
+ - name: Install all tools
+ uses: ./.github/tools-cache
+
+ - name: Run Helm validation
+ run: hack/helm/validate.sh
+
+ helm-e2e:
+ needs: [bundle, helm-validate]
+ env:
+ KIND_VERSION: 0.27.0
+ KIND_WORKER_NODES: 2
+ name: helm-e2e
+ runs-on: ubuntu-latest-16-cores
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v4
+
+ - name: Install Go
+ uses: actions/setup-go@v5
+ with:
+ go-version-file: go.mod
+ cache: false
+
+ - name: Install all tools
+ uses: ./.github/tools-cache
+
+ - name: Setup cluster with prerequisites
+ run: make cluster-up
+ env:
+ PROMETHEUS_ENABLE: "true"
+
+ - name: Compute version
+ uses: ./.github/compute-version
+ id: version
+
+ - name: Run Helm E2E tests
+ uses: ./.github/helm-e2e
+ with:
+ version: ${{ steps.version.outputs.version }}
+
+ - name: Archive cluster state
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: helm-cluster-state
+ path: cluster-state
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5cb6dafc..d8ee2027 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
rev: v1.37.0
hooks:
- id: yamllint
- exclude: ^(bundle|config|hack/crd)
+ exclude: ^(bundle|config|hack/crd|manifests/helm)
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.44.0
@@ -61,7 +61,7 @@ repos:
hooks:
- id: commitlint
stages: [commit-msg]
- additional_dependencies: ['@commitlint/config-conventional'] # yamllint disable-line rule:quoted-strings
+ additional_dependencies: ["@commitlint/config-conventional"] # yamllint disable-line rule:quoted-strings
- repo: https://github.com/fsfe/reuse-tool
rev: v5.0.2
diff --git a/Makefile b/Makefile
index a950e18b..ed3ce084 100644
--- a/Makefile
+++ b/Makefile
@@ -295,6 +295,51 @@ undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/confi
$(KUSTOMIZE) build config/default/k8s | \
kubectl delete --ignore-not-found=$(ignore-not-found) -f -
+##@ Helm Deployment
+
+HELM_CHART_DIR := manifests/helm/kepler-operator
+HELM_RELEASE_NAME ?= kepler-operator
+HELM_NAMESPACE ?= kepler-operator
+HELM_TIMEOUT ?= 2m
+
+.PHONY: helm-template
+helm-template: helm manifests ## Generate manifests from Helm chart
+ $(HELM) template $(HELM_RELEASE_NAME) $(HELM_CHART_DIR) \
+ --namespace $(HELM_NAMESPACE) \
+ --set operator.image=$(OPERATOR_IMG) \
+ --set kepler.image=$(KEPLER_IMG) \
+ --set kube-rbac-proxy.image=$(KUBE_RBAC_PROXY_IMG)
+
+.PHONY: helm-install
+helm-install: helm manifests helm-sync-crds ## Install operator via Helm
+ $(HELM) upgrade --install $(HELM_RELEASE_NAME) $(HELM_CHART_DIR) \
+ --namespace $(HELM_NAMESPACE) \
+ --create-namespace \
+ --set operator.image=$(OPERATOR_IMG) \
+ --set kepler.image=$(KEPLER_IMG) \
+ --set kube-rbac-proxy.image=$(KUBE_RBAC_PROXY_IMG) \
+ --timeout $(HELM_TIMEOUT) \
+ --wait
+
+.PHONY: helm-uninstall
+helm-uninstall: helm ## Uninstall operator via Helm
+ $(HELM) uninstall $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE)
+
+.PHONY: helm-package
+helm-package: helm manifests helm-sync-crds ## Package the Helm chart
+ $(HELM) package $(HELM_CHART_DIR) --destination tmp/
+
+.PHONY: helm-sync-crds
+helm-sync-crds: ## Sync CRDs from config/crd/bases to Helm chart
+ @mkdir -p $(HELM_CHART_DIR)/crds
+ cp config/crd/bases/*.yaml $(HELM_CHART_DIR)/crds/
+ @echo "โ
CRDs synced to Helm chart"
+
+.PHONY: helm-validate
+helm-validate: kustomize helm yq ## Validate Helm chart (syntax, templates, CRD sync, resources)
+ @echo "Validating Helm chart against kustomize..."
+ ./hack/helm/validate.sh
+
##@ Build Dependencies
## Location where binaries are installed
@@ -304,11 +349,13 @@ LOCALBIN ?= $(shell pwd)/tmp/bin
KUSTOMIZE ?= $(LOCALBIN)/kustomize
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
CRDOC ?= $(LOCALBIN)/crdoc
+HELM ?= $(LOCALBIN)/helm
# NOTE: please keep this list sorted so that it can be easily searched
TOOLS = controller-gen \
crdoc \
govulncheck \
+ helm \
jq \
kubectl \
kustomize \
diff --git a/README.md b/README.md
index b63f51d7..603e7bee 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ Deploy the operator and its dependencies:
```sh
make tools
kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.76.0/bundle.yaml
-kubectl create -f https://github.com/jetstack/cert-manager/releases/download/v1.15.3/cert-manager.yaml
+kubectl create -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml
make deploy
kubectl apply -k config/samples/
```
diff --git a/docs/developer/README.md b/docs/developer/README.md
index 600f6c78..b3f32192 100644
--- a/docs/developer/README.md
+++ b/docs/developer/README.md
@@ -62,3 +62,8 @@
* Kube Builder Book:
* Operator SDK Getting Started:
* Kubernetes Programming Book:
+
+# Developer Guides
+
+* [Helm Chart Maintenance](helm-chart-maintenance.md) - How to update and maintain the Helm chart
+* [Pre-commit Hooks](pre-commit-hooks.md) - Setting up and using pre-commit hooks
diff --git a/docs/developer/helm-chart-maintenance.md b/docs/developer/helm-chart-maintenance.md
new file mode 100644
index 00000000..f54e2447
--- /dev/null
+++ b/docs/developer/helm-chart-maintenance.md
@@ -0,0 +1,432 @@
+# โ Helm Chart Maintenance Guide
+
+This guide explains how to maintain and update the Helm chart for the Kepler Operator.
+
+---
+
+## ๐ Overview
+
+The Helm chart uses a **hybrid automation approach**:
+
+- **Manual**: Templates are hand-crafted for full control and customization
+- **Automated**: CRDs are automatically synced from `config/crd/bases/`
+- **Validated**: Automated checks ensure consistency with kustomize deployment
+
+This approach balances maintainability with flexibility.
+
+---
+
+## ๐๏ธ Chart Structure
+
+```text
+manifests/helm/kepler-operator/
+โโโ Chart.yaml # Chart metadata (version, appVersion)
+โโโ values.yaml # Default configuration values
+โโโ README.md # User-facing installation guide
+โโโ .helmignore # Files excluded from packaging
+โโโ crds/ # CRDs (auto-synced from config/crd/bases/)
+โ โโโ kepler.system...powermonitors.yaml
+โ โโโ kepler.system...powermonitorinternals.yaml
+โโโ templates/
+ โโโ _helpers.tpl # Template helper functions
+ โโโ NOTES.txt # Post-install instructions
+ โโโ serviceaccount.yaml
+ โโโ rbac.yaml # All RBAC resources
+ โโโ deployment.yaml
+ โโโ services.yaml # Metrics + webhook services
+ โโโ certificate.yaml # cert-manager resources (conditional)
+ โโโ webhooks.yaml # Webhook configurations (conditional)
+ โโโ servicemonitor.yaml # Prometheus ServiceMonitor (conditional)
+```
+
+---
+
+## ๐ When to Update the Helm Chart
+
+| Change Type | Action Required | Files to Update |
+|-------------|----------------|-----------------|
+| **CRD Modified** | Run `make helm-sync-crds` | Auto-synced to `crds/` |
+| **RBAC Changed** | Manual template update | `templates/rbac.yaml` |
+| **Deployment Changed** | Manual template update | `templates/deployment.yaml` |
+| **New Resource Added** | Create new template | `templates/.yaml` |
+| **Config Option Added** | Update values & templates | `values.yaml` + relevant template |
+| **Version Bump** | Update chart metadata | `Chart.yaml` (version, appVersion) |
+
+---
+
+## ๐ ๏ธ Update Workflow
+
+### 1. Make Changes
+
+```bash
+# If CRDs changed, sync them
+make helm-sync-crds
+
+# If templates changed, edit manually
+vim manifests/helm/kepler-operator/templates/.yaml
+
+# If configuration changed, update values
+vim manifests/helm/kepler-operator/values.yaml
+```
+
+### 2. Validate Changes
+
+```bash
+# Run all validation tests (recommended)
+make helm-validate # Full validation (syntax, templates, CRD sync, resources)
+
+# Or preview rendered manifests:
+make helm-template # Preview rendered manifests
+```
+
+### 3. Test Locally (Optional)
+
+```bash
+# Full end-to-end test (recommended)
+./tests/helm.sh
+
+# Or manual testing:
+make helm-install # Install to cluster
+kubectl get all -n kepler-operator # Verify deployment
+make helm-uninstall # Clean up
+
+# Advanced: test with existing image
+./tests/helm.sh --no-build --version=0.21.0
+```
+
+---
+
+## โ๏ธ Creating/Updating Templates
+
+### Use Kustomize as Reference
+
+**Important**: Always use `config/default/k8s` as your source of truth, NOT `config/manifests`.
+
+```bash
+# Generate reference manifest
+make manifests
+kustomize build config/default/k8s > /tmp/kustomize-ref.yaml
+
+# Extract specific resources
+./tmp/bin/yq 'select(.kind == "Deployment")' /tmp/kustomize-ref.yaml
+./tmp/bin/yq 'select(.kind == "Service")' /tmp/kustomize-ref.yaml
+```
+
+**Why `config/default/k8s`?**
+
+- `config/default/k8s`: Standard Kubernetes deployment (matches Helm use case)
+- `config/manifests`: OLM-specific with ClusterServiceVersion (different model)
+
+### Template Creation Steps
+
+1. Extract resource from kustomize output
+2. Replace hardcoded values with template helpers:
+ - Names: `{{ include "kepler-operator.fullname" . }}-`
+ - Namespace: `{{ include "kepler-operator.namespace" . }}`
+ - Labels: `{{ include "kepler-operator.labels" . | nindent 4 }}`
+ - Images: `{{ include "kepler-operator.image" . }}`
+3. Add conditional rendering if needed:
+
+ ```yaml
+ {{- if .Values.feature.enabled }}
+ # resource definition
+ {{- end }}
+ ```
+
+4. Use values from `values.yaml`:
+
+ ```yaml
+ replicas: {{ .Values.replicaCount }}
+ resources:
+ {{- toYaml .Values.resources | nindent 12 }}
+ ```
+
+### Helper Function Reference
+
+Common helpers available in `templates/_helpers.tpl`:
+
+```yaml
+# Chart name
+{{ include "kepler-operator.name" . }}
+
+# Full name (release-name + chart-name)
+{{ include "kepler-operator.fullname" . }}
+
+# Namespace
+{{ include "kepler-operator.namespace" . }}
+
+# Standard labels
+{{ include "kepler-operator.labels" . | nindent 4 }}
+
+# Selector labels (stable, for pod selectors)
+{{ include "kepler-operator.managerLabels" . | nindent 6 }}
+
+# Image references
+{{ include "kepler-operator.image" . }} # Operator image
+{{ include "kepler-operator.keplerImage" . }} # Kepler image
+{{ include "kepler-operator.kubeRbacProxyImage" . }} # Kube RBAC Proxy image
+
+# Service account name
+{{ include "kepler-operator.serviceAccountName" . }}
+```
+
+---
+
+## ๐งช Validation Details
+
+The `make helm-validate` command runs three layers of checks:
+
+### Layer 1: Syntax Validation
+
+```bash
+helm lint manifests/helm/kepler-operator
+```
+
+- Validates Chart.yaml structure
+- Checks template syntax
+- Verifies values.yaml schema
+
+### Layer 2: Template Rendering
+
+```bash
+helm template kepler-operator manifests/helm/kepler-operator \
+ --set metrics.serviceMonitor.enabled=true
+```
+
+- Ensures templates render without errors
+- Tests value substitution
+- Validates conditional logic
+
+### Layer 3: Consistency Checks
+
+```bash
+./hack/helm/validate.sh
+```
+
+- Verifies CRD sync status (CRDs match `config/crd/bases/`)
+- Validates all expected resources present
+- Checks project-local tools available
+
+---
+
+## ๐ก Common Patterns
+
+### Conditional Resources
+
+Use feature flags in `values.yaml`:
+
+```yaml
+# values.yaml
+webhooks:
+ enabled: true
+ certManager:
+ enabled: true
+```
+
+Then wrap entire templates:
+
+```yaml
+# templates/certificate.yaml
+{{- if .Values.webhooks.certManager.enabled }}
+# Certificate and Issuer resources
+{{- end }}
+```
+
+### Multi-Resource Templates
+
+Group related resources in single file with `---` separator:
+
+```yaml
+# templates/rbac.yaml
+# Role
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+...
+---
+# RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+...
+```
+
+### Image Configuration
+
+Use full image paths for simplicity:
+
+```yaml
+# values.yaml
+operator:
+ image: quay.io/sustainable_computing_io/kepler-operator:0.21.0
+ pullPolicy: IfNotPresent
+
+kepler:
+ image: quay.io/sustainable_computing_io/kepler:v0.11.0
+
+kube-rbac-proxy:
+ image: quay.io/brancz/kube-rbac-proxy:v0.19.0
+
+# _helpers.tpl
+{{- define "kepler-operator.image" -}}
+{{- .Values.operator.image }}
+{{- end }}
+
+{{- define "kepler-operator.keplerImage" -}}
+{{- .Values.kepler.image }}
+{{- end }}
+
+{{- define "kepler-operator.kubeRbacProxyImage" -}}
+{{- index .Values "kube-rbac-proxy" "image" }}
+{{- end }}
+```
+
+This approach is simpler and allows overriding with:
+
+```bash
+helm install kepler-operator ./chart \
+ --set operator.image=localhost:5001/kepler-operator:dev
+```
+
+---
+
+## โ ๏ธ Common Pitfalls
+
+### โ Wrong Kustomize Overlay
+
+```bash
+kustomize build config/manifests # OLM-specific, wrong!
+```
+
+โ
Use:
+
+```bash
+kustomize build config/default/k8s # Vanilla K8s, correct!
+```
+
+### โ Hardcoded Names
+
+```yaml
+name: kepler-operator-controller
+namespace: kepler-operator
+```
+
+โ
Use helpers:
+
+```yaml
+name: {{ include "kepler-operator.fullname" . }}-controller
+namespace: {{ include "kepler-operator.namespace" . }}
+```
+
+### โ Validation Without Optional Resources
+
+```bash
+helm template kepler-operator manifests/helm/kepler-operator
+# ServiceMonitor missing!
+```
+
+โ
Enable all optionals:
+
+```bash
+helm template kepler-operator manifests/helm/kepler-operator \
+ --set metrics.serviceMonitor.enabled=true
+```
+
+### โ Mutable Selector Labels
+
+```yaml
+selector:
+ matchLabels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ # Includes version, breaks on upgrade!
+```
+
+โ
Use stable selectors:
+
+```yaml
+selector:
+ matchLabels:
+ {{- include "kepler-operator.managerLabels" . | nindent 4 }}
+```
+
+### โ Namespace Template + --create-namespace Flag
+
+```yaml
+# templates/namespace.yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: {{ include "kepler-operator.namespace" . }}
+```
+
+AND using `--create-namespace` flag causes conflict:
+
+```text
+Error: namespaces "kepler-operator" already exists
+```
+
+โ
Use **only** `--create-namespace` flag (standard Helm practice):
+
+```bash
+helm install kepler-operator ./chart \
+ --namespace kepler-operator \
+ --create-namespace # Let Helm create namespace
+```
+
+**Rationale**: The `--create-namespace` flag is simpler and follows standard Helm conventions. Template-based namespace creation adds unnecessary complexity and potential conflicts.
+
+---
+
+## ๐ฆ Release Process
+
+When releasing a new version:
+
+1. **Update Chart.yaml**:
+
+ ```yaml
+ version: 0.22.0 # Bump chart version
+ appVersion: 0.22.0 # Match operator version
+ ```
+
+2. **Sync CRDs** (if changed):
+
+ ```bash
+ make helm-sync-crds
+ ```
+
+3. **Validate**:
+
+ ```bash
+ make helm-validate # Runs syntax, template, CRD sync, and resource validation
+ ```
+
+4. **Package** (optional):
+
+ ```bash
+ make helm-package
+ ```
+
+5. **Commit changes**:
+
+ ```bash
+ git add manifests/helm/kepler-operator/
+ git commit -m "chore(helm): bump chart version to 0.22.0"
+ ```
+
+---
+
+## ๐ Additional Resources
+
+- **Helm Best Practices**:
+- **Knowledge Base**: `tmp/agents/knowledge/helm-deployment.md`
+- **Chart README**: `manifests/helm/kepler-operator/README.md` (user guide)
+- **Kustomize Docs**:
+
+---
+
+## ๐ค Getting Help
+
+- Review existing templates for patterns
+- Check validation errors: `make helm-validate` provides specific guidance
+- See knowledge base for detailed explanations: `tmp/agents/knowledge/helm-deployment.md`
+- Ask in project discussions or issues
+
+Happy charting! โต
diff --git a/hack/cluster.sh b/hack/cluster.sh
index 05ce4191..adb2fc79 100755
--- a/hack/cluster.sh
+++ b/hack/cluster.sh
@@ -7,7 +7,7 @@ declare -r VERSION=${VERSION:-v0.0.3}
declare -r CLUSTER_PROVIDER=${CLUSTER_PROVIDER:-kind}
declare -r GRAFANA_ENABLE=${GRAFANA_ENABLE:-true}
declare -r KIND_WORKER_NODES=${KIND_WORKER_NODES:-2}
-declare -r CERTMANAGER_VERSION=${CERT_MANAGER_VERSION:-1.15.0}
+declare -r CERTMANAGER_VERSION=${CERT_MANAGER_VERSION:-1.18.2}
declare -r OLM_VERSION=${OLM_VERSION:-v0.28.0}
# constants
@@ -16,7 +16,7 @@ declare -r PROJECT_ROOT
declare -r TMP_DIR="$PROJECT_ROOT/tmp"
declare -r DEV_CLUSTER_DIR="$TMP_DIR/local-dev-cluster"
declare -r BIN_DIR="$TMP_DIR/bin"
-declare -r CERTMANAGER_URL="https://github.com/jetstack/cert-manager/releases/download/v$CERTMANAGER_VERSION/cert-manager.yaml"
+declare -r CERTMANAGER_URL="https://github.com/cert-manager/cert-manager/releases/download/v$CERTMANAGER_VERSION/cert-manager.yaml"
source "$PROJECT_ROOT/hack/utils.bash"
diff --git a/hack/helm/validate.sh b/hack/helm/validate.sh
new file mode 100755
index 00000000..2e3b27b0
--- /dev/null
+++ b/hack/helm/validate.sh
@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# Copyright 2025 The Kepler Contributors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+HELM_CHART_DIR="$PROJECT_ROOT/manifests/helm/kepler-operator"
+CRD_SOURCE_DIR="$PROJECT_ROOT/config/crd/bases"
+CRD_DEST_DIR="$HELM_CHART_DIR/crds"
+
+# Image versions for validation
+OPERATOR_IMAGE="${OPERATOR_IMAGE:-quay.io/sustainable_computing_io/kepler-operator:0.21.0}"
+KEPLER_IMAGE="${KEPLER_IMAGE:-quay.io/sustainable_computing_io/kepler:latest}"
+KUBE_RBAC_PROXY_IMAGE="${KUBE_RBAC_PROXY_IMAGE:-quay.io/brancz/kube-rbac-proxy:v0.18.1}"
+
+# shellcheck source=hack/utils.bash
+source "$SCRIPT_DIR/../utils.bash"
+
+# Validate that required tools are available
+check_tools() {
+ local bin_dir="$PROJECT_ROOT/tmp/bin"
+ local tools=("helm" "kustomize" "yq")
+ for tool in "${tools[@]}"; do
+ if [[ ! -x "$bin_dir/$tool" ]]; then
+ fail "$tool is not installed. Please run 'make $tool' to install it."
+ return 1
+ fi
+ done
+}
+
+# Use project-local tools
+export PATH="$PROJECT_ROOT/tmp/bin:$PATH"
+
+# Render Helm templates with standard test values
+render_helm_template() {
+ helm template kepler-operator "$HELM_CHART_DIR" \
+ --namespace kepler-operator \
+ --set operator.image="$OPERATOR_IMAGE" \
+ --set kepler.image="$KEPLER_IMAGE" \
+ --set kube-rbac-proxy.image="$KUBE_RBAC_PROXY_IMAGE" \
+ --set metrics.serviceMonitor.enabled=true
+}
+
+# Validate Helm chart syntax
+validate_helm_syntax() {
+ info "Validating Helm chart syntax..."
+
+ helm lint "$HELM_CHART_DIR" >/dev/null 2>&1 || {
+ fail "Helm chart syntax validation failed"
+ helm lint "$HELM_CHART_DIR"
+ return 1
+
+ }
+ ok "Helm chart syntax is valid"
+ return 0
+}
+
+# Validate that templates render successfully
+validate_helm_template() {
+ info "Validating Helm templates render successfully..."
+
+ render_helm_template >/dev/null 2>&1 || {
+ fail "Helm template rendering failed"
+ render_helm_template
+ return 1
+ }
+ ok "Helm templates render successfully"
+ return 0
+}
+
+# Validate CRD sync status
+validate_crd_sync() {
+ info "Validating CRD sync status..."
+ local all_synced=true
+
+ for crd_file in "$CRD_SOURCE_DIR"/*.yaml; do
+ local crd_name
+ crd_name=$(basename "$crd_file")
+ local dest_file="$CRD_DEST_DIR/$crd_name"
+
+ [[ -f "$dest_file" ]] || {
+ fail "CRD $crd_name not found in Helm chart crds/ directory"
+ all_synced=false
+ continue
+ }
+
+ diff -q "$crd_file" "$dest_file" >/dev/null 2>&1 || {
+ fail "CRD $crd_name is out of sync. Run 'make helm-sync-crds' to sync."
+ all_synced=false
+ continue
+ }
+ done
+
+ [[ "$all_synced" == "true" ]] || return 1
+ ok "All CRDs are synced"
+ return 0
+}
+
+# Validate that all expected resources are present
+validate_resources() {
+ info "Validating expected resources are present..."
+ local expected_resources=(
+ "ServiceAccount"
+ "Role"
+ "ClusterRole"
+ "RoleBinding"
+ "ClusterRoleBinding"
+ "Service"
+ "Deployment"
+ "Certificate"
+ "Issuer"
+ "MutatingWebhookConfiguration"
+ "ValidatingWebhookConfiguration"
+ "ServiceMonitor"
+ )
+
+ local rendered
+ rendered=$(render_helm_template)
+
+ local all_found=true
+ for resource in "${expected_resources[@]}"; do
+ echo "$rendered" | grep -q "^kind: $resource$" || {
+ fail "Expected resource $resource not found in rendered templates"
+ all_found=false
+ }
+ done
+
+ [[ "$all_found" == "true" ]] || return 1
+ ok "All expected resources are present"
+ return 0
+}
+
+main() {
+ info "Starting Helm chart validation..."
+
+ check_tools
+ validate_helm_syntax
+ validate_helm_template
+ validate_crd_sync
+ validate_resources
+
+ ok "Helm chart validation completed successfully"
+}
+
+main "$@"
diff --git a/hack/tools.sh b/hack/tools.sh
index 743eeb4b..62bb8d22 100755
--- a/hack/tools.sh
+++ b/hack/tools.sh
@@ -20,6 +20,7 @@ declare -r OC_VERSION=${OC_VERSION:-4.18.1}
declare -r KUBECTL_VERSION=${KUBECTL_VERSION:-v1.28.4}
declare -r SHFMT_VERSION=${SHFMT_VERSION:-v3.7.0}
declare -r JQ_VERSION=${JQ_VERSION:-1.7}
+declare -r HELM_VERSION=${HELM_VERSION:-v3.18.1}
# install
declare -r KUSTOMIZE_INSTALL_SCRIPT="https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
@@ -27,6 +28,7 @@ declare -r OPERATOR_SDK_INSTALL="https://github.com/operator-framework/operator-
declare -r YQ_INSTALL="https://github.com/mikefarah/yq/releases/download/$YQ_VERSION/yq_${GOOS}_${GOARCH}"
declare -r OC_URL="https://mirror.openshift.com/pub/openshift-v4/clients/ocp/$OC_VERSION"
declare -r JQ_INSTALL_URL="https://github.com/jqlang/jq/releases/download/jq-$JQ_VERSION"
+declare -r HELM_INSTALL_URL="https://get.helm.sh"
source "$PROJECT_ROOT/hack/utils.bash"
@@ -225,6 +227,33 @@ install_jq() {
ok "jq was installed successfully"
}
+version_helm() {
+ helm version
+}
+
+install_helm() {
+ local version_regex="Version:\"$HELM_VERSION\""
+ validate_version helm version "$version_regex" && return 0
+
+ info "installing helm version: $HELM_VERSION"
+ local helm_tar="helm-${HELM_VERSION}-${GOOS}-${GOARCH}.tar.gz"
+ local install_url="$HELM_INSTALL_URL/$helm_tar"
+
+ local helm_tmp="$LOCAL_BIN/tmp-helm"
+ mkdir -p "$helm_tmp"
+
+ curl -sSL "$install_url" | tar -xzf - -C "$helm_tmp" || {
+ fail "failed to install helm"
+ return 1
+ }
+
+ mv "$helm_tmp/$GOOS-$GOARCH/helm" "$LOCAL_BIN/"
+ chmod +x "$LOCAL_BIN/helm"
+ rm -rf "$helm_tmp"
+
+ ok "helm was installed successfully"
+}
+
install_all() {
info "installing all tools ..."
local ret=0
diff --git a/manifests/helm/kepler-operator/.helmignore b/manifests/helm/kepler-operator/.helmignore
new file mode 100644
index 00000000..43eb8e1d
--- /dev/null
+++ b/manifests/helm/kepler-operator/.helmignore
@@ -0,0 +1,27 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+# Testing and CI files
+.travis.yml
+.gitlab-ci.yml
+OWNERS
diff --git a/manifests/helm/kepler-operator/Chart.yaml b/manifests/helm/kepler-operator/Chart.yaml
new file mode 100644
index 00000000..f5f69913
--- /dev/null
+++ b/manifests/helm/kepler-operator/Chart.yaml
@@ -0,0 +1,26 @@
+apiVersion: v2
+name: kepler-operator
+description: A Helm chart for deploying the Kepler Operator on Kubernetes
+type: application
+version: 0.21.0
+appVersion: 0.21.0
+keywords:
+ - kepler
+ - power
+ - energy
+ - monitoring
+ - sustainability
+home: https://sustainable-computing.io/
+sources:
+ - https://github.com/sustainable-computing-io/kepler-operator
+maintainers:
+ - name: Sunil Thaha
+ email: sthaha@redhat.com
+ - name: Vibhu Prashar
+ email: vprashar@redhat.com
+ - name: Vimal Kumar
+ email: vimalkum@redhat.com
+ - name: Kaiyi Liu
+ email: kaliu@redhat.com
+icon: https://raw.githubusercontent.com/sustainable-computing-io/kepler-operator/v1alpha1/docs/logo/kepler-icon.svg
+kubeVersion: ">=1.24.0"
diff --git a/manifests/helm/kepler-operator/README.md b/manifests/helm/kepler-operator/README.md
new file mode 100644
index 00000000..2e82c94d
--- /dev/null
+++ b/manifests/helm/kepler-operator/README.md
@@ -0,0 +1,161 @@
+# Kepler Operator Helm Chart
+
+Helm chart for deploying the Kepler Operator on Kubernetes.
+
+> **Note**: This guide provides both `make` targets (for developers working from source) and direct `helm` commands (for users installing from packaged charts).
+
+## Prerequisites
+
+- Kubernetes >=1.24.0
+- Helm >=3.0.0
+- cert-manager >=1.18.0 (for webhook certificates)
+
+## Installation
+
+### Install cert-manager (if not already installed)
+
+```bash
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml
+```
+
+### Install Kepler Operator
+
+**From source repository:**
+
+```bash
+make helm-install
+```
+
+**Using Helm directly:**
+
+```bash
+helm install kepler-operator ./manifests/helm/kepler-operator \
+ --namespace kepler-operator \
+ --create-namespace
+```
+
+**From packaged chart:**
+
+```bash
+helm install kepler-operator kepler-operator-0.21.0.tgz \
+ --namespace kepler-operator \
+ --create-namespace
+```
+
+### Install with custom values
+
+```bash
+helm install kepler-operator ./manifests/helm/kepler-operator \
+ --namespace kepler-operator \
+ --create-namespace \
+ --set operator.image=quay.io/sustainable_computing_io/kepler-operator:v0.21.0 \
+ --set kepler.image=quay.io/sustainable_computing_io/kepler:v0.11.0 \
+ --set metrics.serviceMonitor.enabled=true
+```
+
+Or create a custom `values.yaml` and install:
+
+```bash
+helm install kepler-operator ./manifests/helm/kepler-operator \
+ --namespace kepler-operator \
+ --create-namespace \
+ --values custom-values.yaml
+```
+
+## Configuration
+
+Key configuration values:
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `operator.image` | Operator image (full path with tag) | `quay.io/sustainable_computing_io/kepler-operator:0.21.0` |
+| `operator.pullPolicy` | Image pull policy | `IfNotPresent` |
+| `kepler.image` | Kepler image (full path with tag) | `quay.io/sustainable_computing_io/kepler:v0.11.0` |
+| `kube-rbac-proxy.image` | Kube RBAC Proxy image (full path with tag) | `quay.io/brancz/kube-rbac-proxy:v0.19.0` |
+| `replicaCount` | Number of operator replicas | `1` |
+| `namespace` | Operator namespace | `kepler-operator` |
+| `webhooks.enabled` | Enable admission webhooks | `true` |
+| `webhooks.certManager.enabled` | Use cert-manager for webhook certificates | `true` |
+| `metrics.serviceMonitor.enabled` | Enable Prometheus ServiceMonitor | `false` |
+
+See [values.yaml](values.yaml) for complete list of configuration options.
+
+## Creating a PowerMonitor Resource
+
+After installing the operator, create a PowerMonitor resource:
+
+```yaml
+apiVersion: kepler.system.sustainable.computing.io/v1alpha1
+kind: PowerMonitor
+metadata:
+ name: power-monitor
+spec:
+ kepler:
+ deployment:
+ nodeSelector:
+ kubernetes.io/os: linux
+ config:
+ logLevel: info
+```
+
+## Upgrading
+
+**From source repository:**
+
+```bash
+make helm-install # Uses helm upgrade --install
+```
+
+**Using Helm directly:**
+
+```bash
+helm upgrade kepler-operator ./manifests/helm/kepler-operator \
+ --namespace kepler-operator
+```
+
+## Uninstalling
+
+**From source repository:**
+
+```bash
+make helm-uninstall
+```
+
+**Using Helm directly:**
+
+```bash
+helm uninstall kepler-operator --namespace kepler-operator
+```
+
+## Development
+
+For contributors working on the Helm chart, see the [Helm Chart Maintenance Guide](../../../docs/developer/helm-chart-maintenance.md).
+
+### Testing
+
+**Static validation:**
+
+```bash
+make helm-validate # Run all validation tests (syntax, templates, CRD sync, resources)
+make helm-template # Preview rendered manifests
+```
+
+**End-to-end testing:**
+
+```bash
+# Full e2e test (requires cluster with cert-manager)
+./tests/helm.sh
+
+# See all options
+./tests/helm.sh --help
+```
+
+### Syncing CRDs
+
+```bash
+make helm-sync-crds
+```
+
+## License
+
+Apache License 2.0
diff --git a/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitorinternals.yaml b/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitorinternals.yaml
new file mode 100644
index 00000000..785e0b51
--- /dev/null
+++ b/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitorinternals.yaml
@@ -0,0 +1,383 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.17.2
+ name: powermonitorinternals.kepler.system.sustainable.computing.io
+spec:
+ group: kepler.system.sustainable.computing.io
+ names:
+ kind: PowerMonitorInternal
+ listKind: PowerMonitorInternalList
+ plural: powermonitorinternals
+ singular: powermonitorinternal
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.kepler.desiredNumberScheduled
+ name: Desired
+ type: integer
+ - jsonPath: .status.kepler.currentNumberScheduled
+ name: Current
+ type: integer
+ - jsonPath: .status.kepler.updatedNumberScheduled
+ name: Up-to-date
+ type: integer
+ - jsonPath: .status.kepler.numberReady
+ name: Ready
+ type: integer
+ - jsonPath: .status.kepler.numberAvailable
+ name: Available
+ type: integer
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ - jsonPath: .spec.kepler.deployment.image
+ name: Image
+ type: string
+ - jsonPath: .spec.kepler.deployment.nodeSelector
+ name: Node-Selector
+ priority: 10
+ type: string
+ - jsonPath: .spec.kepler.deployment.tolerations
+ name: Tolerations
+ priority: 10
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: PowerMonitorInternal is the Schema for the internal kepler 2
+ API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: PowerMonitorInternalSpec defines the desired state of PowerMonitorInternalSpec
+ properties:
+ kepler:
+ properties:
+ config:
+ properties:
+ additionalConfigMaps:
+ description: |-
+ AdditionalConfigMaps is a list of ConfigMap names that will be merged with the default ConfigMap
+ These AdditionalConfigMaps must exist in the same namespace as PowerMonitor components
+ items:
+ description: ConfigMapRef defines a reference to a ConfigMap
+ properties:
+ name:
+ description: Name of the ConfigMap
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ logLevel:
+ default: info
+ type: string
+ maxTerminated:
+ default: 500
+ description: |-
+ MaxTerminated controls terminated workload tracking behavior
+ Negative values: track unlimited terminated workloads (no capacity limit)
+ Zero: disable terminated workload tracking completely
+ Positive values: track top N terminated workloads by energy consumption
+ format: int32
+ type: integer
+ metricLevels:
+ default:
+ - node
+ - pod
+ - vm
+ description: |-
+ MetricLevels specifies which metrics levels to export
+ Valid values are combinations of: node, process, container, vm, pod
+ items:
+ enum:
+ - node
+ - process
+ - container
+ - vm
+ - pod
+ type: string
+ type: array
+ x-kubernetes-list-type: set
+ sampleRate:
+ default: 5s
+ description: |-
+ SampleRate specifies the interval for monitoring resources (processes, containers, vms, etc.)
+ Must be a positive duration (e.g., "5s", "1m", "30s"). Negative values are not allowed.
+ pattern: ^[0-9]+(\.[0-9]+)?(ns|us|ms|s|m|h)$
+ type: string
+ staleness:
+ default: 500ms
+ description: |-
+ Staleness specifies how long to wait before considering calculated power values as stale
+ Must be a positive duration (e.g., "500ms", "5s", "1h"). Negative values are not allowed.
+ pattern: ^[0-9]+(\.[0-9]+)?(ns|us|ms|s|m|h)$
+ type: string
+ type: object
+ deployment:
+ properties:
+ image:
+ minLength: 3
+ type: string
+ kubeRbacProxyImage:
+ minLength: 3
+ type: string
+ namespace:
+ minLength: 1
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ default:
+ kubernetes.io/os: linux
+ description: Defines which Nodes the Pod is scheduled on
+ type: object
+ secrets:
+ description: Secrets to be mounted in the power monitor containers
+ items:
+ description: |-
+ SecretRef defines a reference to a Secret to be mounted
+
+ Mount Path Cautions:
+ Exercise caution when setting mount paths for secrets. Avoid mounting secrets to critical system paths
+ that may interfere with Kepler's operation or container security:
+ - /etc/kepler - Reserved for Kepler configuration files
+ - /sys, /proc, /dev - System directories that should remain read-only
+ - /usr, /bin, /sbin, /lib - System binaries and libraries
+ - / - Root filesystem
+
+ Best practices:
+ - Use subdirectories like /etc/kepler/secrets/ or /opt/secrets/
+ - Ensure mount paths don't conflict with existing volume mounts
+ - Test mount paths in development environments before production deployment
+ - Monitor Kepler pod logs for mount-related errors
+ properties:
+ mountPath:
+ description: MountPath where the secret should be mounted
+ in the container
+ minLength: 1
+ type: string
+ name:
+ description: Name of the secret in the same namespace
+ as the Kepler deployment
+ minLength: 1
+ type: string
+ readOnly:
+ default: true
+ description: ReadOnly specifies whether the secret should
+ be mounted read-only
+ type: boolean
+ required:
+ - mountPath
+ - name
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ security:
+ description: If set, defines the security mode and allowed
+ SANames
+ properties:
+ allowedSANames:
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ mode:
+ enum:
+ - none
+ - rbac
+ type: string
+ type: object
+ tolerations:
+ default:
+ - effect: ""
+ key: ""
+ operator: Exists
+ value: ""
+ description: If specified, define Pod's tolerations
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ required:
+ - image
+ - namespace
+ type: object
+ required:
+ - deployment
+ type: object
+ openshift:
+ properties:
+ dashboard:
+ properties:
+ enabled:
+ default: false
+ type: boolean
+ type: object
+ enabled:
+ default: true
+ type: boolean
+ required:
+ - enabled
+ type: object
+ required:
+ - kepler
+ type: object
+ status:
+ properties:
+ conditions:
+ description: conditions represent the latest available observations
+ of power-monitor-internal
+ items:
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: reason contains a programmatic identifier indicating
+ the reason for the condition's last transition.
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ type: string
+ type:
+ description: Type of Kepler Condition - Reconciled, Available
+ ...
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ kepler:
+ properties:
+ currentNumberScheduled:
+ description: |-
+ The number of nodes that are running at least 1 power-monitor-internal pod and are
+ supposed to run the power-monitor-internal pod.
+ format: int32
+ type: integer
+ desiredNumberScheduled:
+ description: |-
+ The total number of nodes that should be running the power-monitor-internal
+ pod (including nodes correctly running the power-monitor-internal pod).
+ format: int32
+ type: integer
+ numberAvailable:
+ description: |-
+ The number of nodes that should be running the power-monitor-internal pod and have one or
+ more of the power-monitor-internal pod running and available
+ format: int32
+ type: integer
+ numberMisscheduled:
+ description: |-
+ The number of nodes that are running the power-monitor-internal pod, but are not supposed
+ to run the power-monitor-internal pod.
+ format: int32
+ type: integer
+ numberReady:
+ description: |-
+ numberReady is the number of nodes that should be running the power-monitor-internal pod
+ and have one or more of the power-monitor-internal pod running with a Ready Condition.
+ format: int32
+ type: integer
+ numberUnavailable:
+ description: |-
+ The number of nodes that should be running the
+ power-monitor-internal pod and have none of the power-monitor-internal pod running and available
+ format: int32
+ type: integer
+ updatedNumberScheduled:
+ description: The total number of nodes that are running updated
+ power-monitor-internal pod
+ format: int32
+ type: integer
+ required:
+ - currentNumberScheduled
+ - desiredNumberScheduled
+ - numberMisscheduled
+ - numberReady
+ type: object
+ required:
+ - conditions
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitors.yaml b/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitors.yaml
new file mode 100644
index 00000000..daae1e1e
--- /dev/null
+++ b/manifests/helm/kepler-operator/crds/kepler.system.sustainable.computing.io_powermonitors.yaml
@@ -0,0 +1,352 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.17.2
+ name: powermonitors.kepler.system.sustainable.computing.io
+spec:
+ group: kepler.system.sustainable.computing.io
+ names:
+ kind: PowerMonitor
+ listKind: PowerMonitorList
+ plural: powermonitors
+ singular: powermonitor
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.kepler.desiredNumberScheduled
+ name: Desired
+ type: integer
+ - jsonPath: .status.kepler.currentNumberScheduled
+ name: Current
+ type: integer
+ - jsonPath: .status.kepler.numberReady
+ name: Ready
+ type: integer
+ - jsonPath: .status.kepler.updatedNumberScheduled
+ name: Up-to-date
+ type: integer
+ - jsonPath: .status.kepler.numberAvailable
+ name: Available
+ type: integer
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ - jsonPath: .spec.kepler.deployment.nodeSelector
+ name: Node-Selector
+ priority: 10
+ type: string
+ - jsonPath: .spec.kepler.deployment.tolerations
+ name: Tolerations
+ priority: 10
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: PowerMonitor is the Schema for the PowerMonitor API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: PowerMonitorSpec defines the desired state of Power Monitor
+ properties:
+ kepler:
+ properties:
+ config:
+ properties:
+ additionalConfigMaps:
+ description: |-
+ AdditionalConfigMaps is a list of ConfigMap names that will be merged with the default ConfigMap
+ These AdditionalConfigMaps must exist in the same namespace as PowerMonitor components
+ items:
+ description: ConfigMapRef defines a reference to a ConfigMap
+ properties:
+ name:
+ description: Name of the ConfigMap
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ logLevel:
+ default: info
+ type: string
+ maxTerminated:
+ default: 500
+ description: |-
+ MaxTerminated controls terminated workload tracking behavior
+ Negative values: track unlimited terminated workloads (no capacity limit)
+ Zero: disable terminated workload tracking completely
+ Positive values: track top N terminated workloads by energy consumption
+ format: int32
+ type: integer
+ metricLevels:
+ default:
+ - node
+ - pod
+ - vm
+ description: |-
+ MetricLevels specifies which metrics levels to export
+ Valid values are combinations of: node, process, container, vm, pod
+ items:
+ enum:
+ - node
+ - process
+ - container
+ - vm
+ - pod
+ type: string
+ type: array
+ x-kubernetes-list-type: set
+ sampleRate:
+ default: 5s
+ description: |-
+ SampleRate specifies the interval for monitoring resources (processes, containers, vms, etc.)
+ Must be a positive duration (e.g., "5s", "1m", "30s"). Negative values are not allowed.
+ pattern: ^[0-9]+(\.[0-9]+)?(ns|us|ms|s|m|h)$
+ type: string
+ staleness:
+ default: 500ms
+ description: |-
+ Staleness specifies how long to wait before considering calculated power values as stale
+ Must be a positive duration (e.g., "500ms", "5s", "1h"). Negative values are not allowed.
+ pattern: ^[0-9]+(\.[0-9]+)?(ns|us|ms|s|m|h)$
+ type: string
+ type: object
+ deployment:
+ properties:
+ nodeSelector:
+ additionalProperties:
+ type: string
+ default:
+ kubernetes.io/os: linux
+ description: Defines which Nodes the Pod is scheduled on
+ type: object
+ secrets:
+ description: Secrets to be mounted in the power monitor containers
+ items:
+ description: |-
+ SecretRef defines a reference to a Secret to be mounted
+
+ Mount Path Cautions:
+ Exercise caution when setting mount paths for secrets. Avoid mounting secrets to critical system paths
+ that may interfere with Kepler's operation or container security:
+ - /etc/kepler - Reserved for Kepler configuration files
+ - /sys, /proc, /dev - System directories that should remain read-only
+ - /usr, /bin, /sbin, /lib - System binaries and libraries
+ - / - Root filesystem
+
+ Best practices:
+ - Use subdirectories like /etc/kepler/secrets/ or /opt/secrets/
+ - Ensure mount paths don't conflict with existing volume mounts
+ - Test mount paths in development environments before production deployment
+ - Monitor Kepler pod logs for mount-related errors
+ properties:
+ mountPath:
+ description: MountPath where the secret should be mounted
+ in the container
+ minLength: 1
+ type: string
+ name:
+ description: Name of the secret in the same namespace
+ as the Kepler deployment
+ minLength: 1
+ type: string
+ readOnly:
+ default: true
+ description: ReadOnly specifies whether the secret should
+ be mounted read-only
+ type: boolean
+ required:
+ - mountPath
+ - name
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ security:
+ description: If set, defines the security mode and allowed
+ SANames
+ properties:
+ allowedSANames:
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ mode:
+ enum:
+ - none
+ - rbac
+ type: string
+ type: object
+ tolerations:
+ default:
+ - effect: ""
+ key: ""
+ operator: Exists
+ value: ""
+ description: If specified, define Pod's tolerations
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ type: object
+ type: object
+ required:
+ - kepler
+ type: object
+ status:
+ description: PowerMonitorStatus defines the observed state of Power Monitor
+ properties:
+ conditions:
+ description: conditions represent the latest available observations
+ of power-monitor
+ items:
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: reason contains a programmatic identifier indicating
+ the reason for the condition's last transition.
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ type: string
+ type:
+ description: Type of Kepler Condition - Reconciled, Available
+ ...
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ kepler:
+ properties:
+ currentNumberScheduled:
+ description: |-
+ The number of nodes that are running at least 1 power-monitor pod and are
+ supposed to run the power-monitor pod.
+ format: int32
+ type: integer
+ desiredNumberScheduled:
+ description: |-
+ The total number of nodes that should be running the power-monitor
+ pod (including nodes correctly running the power-monitor pod).
+ format: int32
+ type: integer
+ numberAvailable:
+ description: |-
+ The number of nodes that should be running the power-monitor pod and have one or
+ more of the power-monitor pod running and available
+ format: int32
+ type: integer
+ numberMisscheduled:
+ description: |-
+ The number of nodes that are running the power-monitor pod, but are not supposed
+ to run the power-monitor pod.
+ format: int32
+ type: integer
+ numberReady:
+ description: |-
+ numberReady is the number of nodes that should be running the power-monitor pod
+ and have one or more of the power-monitor pod running with a Ready Condition.
+ format: int32
+ type: integer
+ numberUnavailable:
+ description: |-
+ The number of nodes that should be running the
+ power-monitor pod and have none of the power-monitor pod running and available
+ format: int32
+ type: integer
+ updatedNumberScheduled:
+ description: The total number of nodes that are running updated
+ power-monitor pod
+ format: int32
+ type: integer
+ required:
+ - currentNumberScheduled
+ - desiredNumberScheduled
+ - numberMisscheduled
+ - numberReady
+ type: object
+ required:
+ - conditions
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/manifests/helm/kepler-operator/templates/NOTES.txt b/manifests/helm/kepler-operator/templates/NOTES.txt
new file mode 100644
index 00000000..ac72449c
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/NOTES.txt
@@ -0,0 +1,40 @@
+Thank you for installing {{ .Chart.Name }}!
+
+Your release is named {{ .Release.Name }}.
+
+The Kepler Operator has been deployed in namespace: {{ include "kepler-operator.namespace" . }}
+
+To check the operator status:
+
+ kubectl get pods -n {{ include "kepler-operator.namespace" . }} -l app.kubernetes.io/name={{ include "kepler-operator.name" . }}
+
+Next Steps:
+
+1. Verify the operator is running:
+
+ kubectl get deployment -n {{ include "kepler-operator.namespace" . }}
+
+2. Check the CRDs are installed:
+
+ kubectl get crds | grep powermonitor
+
+3. Create a PowerMonitor custom resource to deploy Kepler:
+
+ kubectl apply -f https://raw.githubusercontent.com/sustainable-computing-io/kepler-operator/v1alpha1/config/samples/kepler.system_v1alpha1_powermonitor.yaml
+
+4. Verify Kepler DaemonSet is created:
+
+ kubectl get daemonset -n {{ include "kepler-operator.deploymentNamespace" . }}
+
+{{- if .Values.webhooks.enabled }}
+{{- if .Values.webhooks.certManager.enabled }}
+
+Note: This installation requires cert-manager to be installed for webhook certificates.
+If you haven't installed cert-manager yet, install it with:
+
+ kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml
+{{- end }}
+{{- end }}
+
+For more information on using the Kepler Operator, visit:
+https://github.com/sustainable-computing-io/kepler-operator
diff --git a/manifests/helm/kepler-operator/templates/_helpers.tpl b/manifests/helm/kepler-operator/templates/_helpers.tpl
new file mode 100644
index 00000000..f21a169d
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/_helpers.tpl
@@ -0,0 +1,107 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "kepler-operator.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "kepler-operator.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "kepler-operator.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "kepler-operator.labels" -}}
+helm.sh/chart: {{ include "kepler-operator.chart" . }}
+{{ include "kepler-operator.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+app.kubernetes.io/part-of: kepler-operator
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "kepler-operator.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "kepler-operator.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Manager labels (for deployment and pod)
+*/}}
+{{- define "kepler-operator.managerLabels" -}}
+{{ include "kepler-operator.selectorLabels" . }}
+app.kubernetes.io/component: manager
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "kepler-operator.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default "kepler-operator-controller-manager" .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create the namespace to use
+*/}}
+{{- define "kepler-operator.namespace" -}}
+{{- default "kepler-operator-system" .Values.namespace }}
+{{- end }}
+
+{{/*
+Operator image
+*/}}
+{{- define "kepler-operator.image" -}}
+{{- .Values.operator.image }}
+{{- end }}
+
+{{/*
+Kepler image (managed by operator)
+*/}}
+{{- define "kepler-operator.keplerImage" -}}
+{{- .Values.kepler.image }}
+{{- end }}
+
+{{/*
+Kube RBAC Proxy image (managed by operator)
+*/}}
+{{- define "kepler-operator.kubeRbacProxyImage" -}}
+{{- index .Values "kube-rbac-proxy" "image" }}
+{{- end }}
+
+{{/*
+Deployment namespace for power monitoring components
+Defaults to "power-monitor" (the operator's code default) if not specified
+*/}}
+{{- define "kepler-operator.deploymentNamespace" -}}
+{{- default "power-monitor" .Values.operator.deploymentNamespace }}
+{{- end }}
diff --git a/manifests/helm/kepler-operator/templates/certificate.yaml b/manifests/helm/kepler-operator/templates/certificate.yaml
new file mode 100644
index 00000000..8b1e86c2
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/certificate.yaml
@@ -0,0 +1,31 @@
+{{- if .Values.webhooks.certManager.enabled }}
+# Self-signed Issuer
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-selfsigned-issuer
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: certificate
+spec:
+ selfSigned: {}
+---
+# Webhook TLS Certificate
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-serving-cert
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: certificate
+spec:
+ dnsNames:
+ - {{ include "kepler-operator.fullname" . }}-webhook-service.{{ include "kepler-operator.namespace" . }}.svc
+ - {{ include "kepler-operator.fullname" . }}-webhook-service.{{ include "kepler-operator.namespace" . }}.svc.cluster.local
+ issuerRef:
+ kind: Issuer
+ name: {{ include "kepler-operator.fullname" . }}-selfsigned-issuer
+ secretName: webhook-server-cert
+{{- end }}
diff --git a/manifests/helm/kepler-operator/templates/deployment.yaml b/manifests/helm/kepler-operator/templates/deployment.yaml
new file mode 100644
index 00000000..b748c3d8
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/deployment.yaml
@@ -0,0 +1,93 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-controller
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: manager
+spec:
+ replicas: {{ .Values.replicaCount }}
+ selector:
+ matchLabels:
+ {{- include "kepler-operator.managerLabels" . | nindent 6 }}
+ app.kubernetes.io/part-of: kepler-operator
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: manager
+ labels:
+ {{- include "kepler-operator.managerLabels" . | nindent 8 }}
+ app.kubernetes.io/part-of: kepler-operator
+ spec:
+ serviceAccountName: {{ include "kepler-operator.serviceAccountName" . }}
+ securityContext:
+ runAsNonRoot: true
+ terminationGracePeriodSeconds: 10
+ containers:
+ - name: manager
+ image: {{ include "kepler-operator.image" . }}
+ imagePullPolicy: {{ .Values.operator.pullPolicy }}
+ command:
+ - /manager
+ args:
+ {{- if .Values.operator.deploymentNamespace }}
+ - --deployment-namespace={{ .Values.operator.deploymentNamespace }}
+ {{- end }}
+ - --leader-elect
+ - --kepler.image=$(RELATED_IMAGE_KEPLER)
+ - --kube-rbac-proxy.image=$(RELATED_IMAGE_KUBE_RBAC_PROXY)
+ - --zap-log-level=5
+ env:
+ - name: RELATED_IMAGE_KEPLER
+ value: {{ include "kepler-operator.keplerImage" . }}
+ - name: RELATED_IMAGE_KUBE_RBAC_PROXY
+ value: {{ include "kepler-operator.kubeRbacProxyImage" . }}
+ ports:
+ - containerPort: 9443
+ name: webhook-server
+ protocol: TCP
+ - containerPort: 8080
+ name: metrics
+ protocol: TCP
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 20
+ periodSeconds: 20
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 20
+ periodSeconds: 20
+ resources:
+ {{- toYaml .Values.resources | nindent 12 }}
+ securityContext:
+ {{- toYaml .Values.securityContext | nindent 12 }}
+ {{- if .Values.webhooks.enabled }}
+ volumeMounts:
+ - mountPath: /tmp/k8s-webhook-server/serving-certs
+ name: cert
+ readOnly: true
+ {{- end }}
+ {{- if .Values.webhooks.enabled }}
+ volumes:
+ - name: cert
+ secret:
+ defaultMode: 420
+ secretName: webhook-server-cert
+ {{- end }}
+ {{- with .Values.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.affinity }}
+ affinity:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
diff --git a/manifests/helm/kepler-operator/templates/rbac.yaml b/manifests/helm/kepler-operator/templates/rbac.yaml
new file mode 100644
index 00000000..5b21aab9
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/rbac.yaml
@@ -0,0 +1,237 @@
+# Leader Election Role
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-leader-election
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+rules:
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - events
+ verbs:
+ - create
+ - patch
+---
+# Manager ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-manager
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+rules:
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ - namespaces
+ - persistentvolumeclaims
+ - serviceaccounts
+ - services
+ verbs:
+ - create
+ - delete
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - nodes/metrics
+ - nodes/proxy
+ - nodes/stats
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - secrets
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - serviceaccounts/token
+ verbs:
+ - create
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ - deployments
+ verbs:
+ - create
+ - delete
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - kepler.system.sustainable.computing.io
+ - rbac.authorization.k8s.io
+ resources:
+ - '*'
+ verbs:
+ - '*'
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - prometheusrules
+ - servicemonitors
+ verbs:
+ - create
+ - delete
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - create
+ - delete
+ - list
+ - patch
+ - update
+ - use
+ - watch
+ - apiGroups:
+ - authentication.k8s.io
+ resources:
+ - tokenreviews
+ verbs:
+ - create
+ - apiGroups:
+ - authorization.k8s.io
+ resources:
+ - subjectaccessreviews
+ verbs:
+ - create
+---
+# Metrics Auth ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-metrics-auth
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+rules:
+ - apiGroups:
+ - authentication.k8s.io
+ resources:
+ - tokenreviews
+ verbs:
+ - create
+ - apiGroups:
+ - authorization.k8s.io
+ resources:
+ - subjectaccessreviews
+ verbs:
+ - create
+---
+# Metrics Reader ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-metrics-reader
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+rules:
+ - nonResourceURLs:
+ - /metrics
+ verbs:
+ - get
+---
+# Leader Election RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-leader-election
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: {{ include "kepler-operator.fullname" . }}-leader-election
+subjects:
+ - kind: ServiceAccount
+ name: {{ include "kepler-operator.serviceAccountName" . }}
+ namespace: {{ include "kepler-operator.namespace" . }}
+---
+# Manager ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-manager
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ include "kepler-operator.fullname" . }}-manager
+subjects:
+ - kind: ServiceAccount
+ name: {{ include "kepler-operator.serviceAccountName" . }}
+ namespace: {{ include "kepler-operator.namespace" . }}
+---
+# Metrics Auth ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-metrics-auth
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ include "kepler-operator.fullname" . }}-metrics-auth
+subjects:
+ - kind: ServiceAccount
+ name: {{ include "kepler-operator.serviceAccountName" . }}
+ namespace: {{ include "kepler-operator.namespace" . }}
diff --git a/manifests/helm/kepler-operator/templates/serviceaccount.yaml b/manifests/helm/kepler-operator/templates/serviceaccount.yaml
new file mode 100644
index 00000000..5d7a2664
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/serviceaccount.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: {{ include "kepler-operator.serviceAccountName" . }}
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: rbac
+ {{- with .Values.serviceAccount.annotations }}
+ annotations:
+ {{- toYaml . | nindent 4 }}
+ {{- end }}
+{{- end }}
diff --git a/manifests/helm/kepler-operator/templates/servicemonitor.yaml b/manifests/helm/kepler-operator/templates/servicemonitor.yaml
new file mode 100644
index 00000000..5d9b2c7d
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/servicemonitor.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.metrics.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-metrics-monitor
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: metrics
+spec:
+ endpoints:
+ - port: metrics
+ selector:
+ matchLabels:
+ control-plane: controller-manager
+{{- end }}
diff --git a/manifests/helm/kepler-operator/templates/services.yaml b/manifests/helm/kepler-operator/templates/services.yaml
new file mode 100644
index 00000000..2a17d075
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/services.yaml
@@ -0,0 +1,36 @@
+# Metrics Service
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-metrics-service
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ control-plane: controller-manager
+spec:
+ ports:
+ - name: metrics
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ {{- include "kepler-operator.managerLabels" . | nindent 4 }}
+ app.kubernetes.io/part-of: kepler-operator
+---
+# Webhook Service
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-webhook-service
+ namespace: {{ include "kepler-operator.namespace" . }}
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: webhook
+spec:
+ ports:
+ - port: 443
+ protocol: TCP
+ targetPort: 9443
+ selector:
+ {{- include "kepler-operator.managerLabels" . | nindent 4 }}
+ app.kubernetes.io/part-of: kepler-operator
diff --git a/manifests/helm/kepler-operator/templates/webhooks.yaml b/manifests/helm/kepler-operator/templates/webhooks.yaml
new file mode 100644
index 00000000..36bacf71
--- /dev/null
+++ b/manifests/helm/kepler-operator/templates/webhooks.yaml
@@ -0,0 +1,70 @@
+{{- if .Values.webhooks.enabled }}
+# Mutating Webhook Configuration
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-mutating-webhook-configuration
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: webhook
+ {{- if .Values.webhooks.certManager.enabled }}
+ annotations:
+ cert-manager.io/inject-ca-from: {{ include "kepler-operator.namespace" . }}/{{ include "kepler-operator.fullname" . }}-serving-cert
+ {{- end }}
+webhooks:
+ - admissionReviewVersions:
+ - v1
+ clientConfig:
+ service:
+ name: {{ include "kepler-operator.fullname" . }}-webhook-service
+ namespace: {{ include "kepler-operator.namespace" . }}
+ path: /mutate-kepler-system-sustainable-computing-io-v1alpha1-powermonitor
+ failurePolicy: Fail
+ name: mpowermonitor.kb.io
+ rules:
+ - apiGroups:
+ - kepler.system.sustainable.computing.io
+ apiVersions:
+ - v1alpha1
+ operations:
+ - CREATE
+ - UPDATE
+ resources:
+ - powermonitors
+ sideEffects: None
+---
+# Validating Webhook Configuration
+apiVersion: admissionregistration.k8s.io/v1
+kind: ValidatingWebhookConfiguration
+metadata:
+ name: {{ include "kepler-operator.fullname" . }}-validating-webhook-configuration
+ labels:
+ {{- include "kepler-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: webhook
+ {{- if .Values.webhooks.certManager.enabled }}
+ annotations:
+ cert-manager.io/inject-ca-from: {{ include "kepler-operator.namespace" . }}/{{ include "kepler-operator.fullname" . }}-serving-cert
+ {{- end }}
+webhooks:
+ - admissionReviewVersions:
+ - v1
+ clientConfig:
+ service:
+ name: {{ include "kepler-operator.fullname" . }}-webhook-service
+ namespace: {{ include "kepler-operator.namespace" . }}
+ path: /validate-kepler-system-sustainable-computing-io-v1alpha1-powermonitor
+ failurePolicy: Fail
+ name: vpowermonitor.kb.io
+ rules:
+ - apiGroups:
+ - kepler.system.sustainable.computing.io
+ apiVersions:
+ - v1alpha1
+ operations:
+ - CREATE
+ - UPDATE
+ - DELETE
+ resources:
+ - powermonitors
+ sideEffects: None
+{{- end }}
diff --git a/manifests/helm/kepler-operator/values.yaml b/manifests/helm/kepler-operator/values.yaml
new file mode 100644
index 00000000..7789b200
--- /dev/null
+++ b/manifests/helm/kepler-operator/values.yaml
@@ -0,0 +1,62 @@
+# Default values for kepler-operator.
+
+# Operator
+operator:
+ image: quay.io/sustainable_computing_io/kepler-operator:0.21.0
+ pullPolicy: IfNotPresent
+ # Namespace where power monitoring components will be deployed
+ # Defaults to "power-monitor" if not specified
+ deploymentNamespace: ""
+
+# Managed Images (images that the operator will deploy)
+kepler:
+ image: quay.io/sustainable_computing_io/kepler:v0.11.0
+
+kube-rbac-proxy:
+ image: quay.io/brancz/kube-rbac-proxy:v0.19.0
+
+# Deployment
+replicaCount: 1
+namespace: kepler-operator
+nameOverride: ""
+fullnameOverride: ""
+
+# RBAC
+serviceAccount:
+ create: true
+ name: kepler-operator-controller-manager
+ annotations: {}
+
+# Webhooks & cert-manager
+webhooks:
+ enabled: true
+ certManager:
+ enabled: true # Requires cert-manager to be pre-installed
+
+# Monitoring
+metrics:
+ enabled: true
+ serviceMonitor:
+ enabled: false # Set true if Prometheus Operator is available
+
+# Resources
+resources:
+ limits:
+ cpu: 500m
+ memory: 128Mi
+ requests:
+ cpu: 10m
+ memory: 64Mi
+
+# Tolerations, nodeSelector, affinity
+tolerations: []
+nodeSelector: {}
+affinity: {}
+
+# Security Context
+securityContext:
+ runAsNonRoot: true
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
diff --git a/tests/helm.sh b/tests/helm.sh
new file mode 100755
index 00000000..9810a516
--- /dev/null
+++ b/tests/helm.sh
@@ -0,0 +1,380 @@
+#!/usr/bin/env bash
+# Helm E2E testing script
+# Tests the Kepler Operator Helm chart deployment end-to-end
+
+set -e -u -o pipefail
+
+PROJECT_ROOT="$(git rev-parse --show-toplevel)"
+declare -r PROJECT_ROOT
+
+# Source test utilities
+source "$PROJECT_ROOT/tests/utils.sh"
+
+# Script configuration
+declare -r HELM_RELEASE_NAME="${HELM_RELEASE_NAME:-kepler-operator}"
+declare -r HELM_NAMESPACE="${HELM_NAMESPACE:-kepler-operator}"
+declare -r POWERMONITOR_NS="${POWERMONITOR_NS:-power-monitor}"
+declare -r LOGS_DIR="${LOGS_DIR:-tmp/helm-e2e}"
+
+# Testdata paths
+declare -r TESTDATA_DIR="$PROJECT_ROOT/tests/testdata"
+declare -r POWERMONITOR_VM_YAML="$TESTDATA_DIR/powermonitor-vm.yaml"
+declare -r POWERMONITOR_BAREMETAL_YAML="$TESTDATA_DIR/powermonitor-baremetal.yaml"
+declare -r FAKE_CPU_CONFIGMAP_YAML="$TESTDATA_DIR/fake-cpu-configmap.yaml"
+
+# Image configuration
+# NOTE: these are not readonly because it can be overridden by --flag
+declare VERSION="${VERSION:-0.0.0-dev}"
+declare IMG_BASE="${IMG_BASE:-localhost:5001}"
+declare OPERATOR_IMG="$IMG_BASE/kepler-operator:$VERSION"
+
+# Managed image versions (what operator deploys)
+declare -r KEPLER_IMAGE="${KEPLER_IMAGE:-quay.io/sustainable_computing_io/kepler:latest}"
+declare -r KUBE_RBAC_PROXY_IMAGE="${KUBE_RBAC_PROXY_IMAGE:-quay.io/brancz/kube-rbac-proxy:v0.19.0}"
+
+# Script flags
+declare NO_BUILD=false
+declare NO_DEPLOY=false
+declare CLEANUP=false
+declare RUNNING_ON_VM=false
+declare SHOW_USAGE=false
+
+# Trap cleanup on exit
+trap cleanup_on_exit INT TERM
+
+cleanup_on_exit() {
+ cleanup_jobs
+ if $CLEANUP; then
+ uninstall_helm || true
+ fi
+}
+
+# Build operator image
+build_operator() {
+ header "Build Operator Image"
+
+ $NO_BUILD && {
+ info "Skipping operator image build (--no-build)"
+ return 0
+ }
+
+ run make operator-build \
+ VERSION="$VERSION" \
+ IMG_BASE="$IMG_BASE"
+
+ ok "Operator image built: $OPERATOR_IMG"
+}
+
+# Load operator image to kind cluster
+load_operator_image() {
+ header "Load Operator Image to Kind"
+
+ $NO_BUILD && {
+ info "Skipping image load (--no-build)"
+ return 0
+ }
+
+ kind_load_image "$OPERATOR_IMG"
+
+ ok "Operator image loaded to kind"
+}
+
+# Install operator via Helm
+install_helm() {
+ header "Install Operator via Helm"
+
+ # Sync CRDs first
+ run make helm-sync-crds
+
+ # Install via Helm
+ run helm upgrade --install "$HELM_RELEASE_NAME" \
+ manifests/helm/kepler-operator \
+ --namespace "$HELM_NAMESPACE" \
+ --create-namespace \
+ --set operator.image="$OPERATOR_IMG" \
+ --set kepler.image="$KEPLER_IMAGE" \
+ --set kube-rbac-proxy.image="$KUBE_RBAC_PROXY_IMAGE" \
+ --timeout=5m \
+ --wait
+
+ ok "Operator installed via Helm"
+}
+
+# Wait for webhook certificate to be ready
+wait_for_webhook_cert() {
+ header "Waiting for Webhook Certificate"
+
+ info "Waiting for webhook certificate to be issued..."
+ run kubectl wait --for=condition=Ready --timeout=300s \
+ -n "$HELM_NAMESPACE" certificate/kepler-operator-serving-cert
+
+ # Give webhook time to start with the certificate
+ sleep 10
+
+ ok "Webhook certificate ready"
+}
+
+# Deploy PowerMonitor on VM with fake CPU meter
+deploy_pm_on_vm() {
+ # Deploy PowerMonitor CR first (operator will create namespace)
+ info "Creating PowerMonitor resource with fake CPU meter"
+ kubectl apply -f "$POWERMONITOR_VM_YAML"
+
+ # Wait for operator to create the namespace
+ info "Waiting for operator to create namespace $POWERMONITOR_NS"
+ kubectl wait --for=jsonpath='{.status.phase}'=Active \
+ --timeout=60s namespace/"$POWERMONITOR_NS" 2>/dev/null || {
+ # Namespace might not exist yet, wait for it to be created
+ local retries=30
+ while [[ $retries -gt 0 ]]; do
+ if kubectl get namespace "$POWERMONITOR_NS" >/dev/null 2>&1; then
+ break
+ fi
+ sleep 2
+ ((retries--))
+ done
+ }
+
+ # Create fake CPU meter ConfigMap after namespace exists
+ info "Creating fake CPU meter ConfigMap"
+ kubectl apply -n "$POWERMONITOR_NS" -f "$FAKE_CPU_CONFIGMAP_YAML"
+}
+
+# Deploy PowerMonitor on bare metal with hardware sensors
+deploy_pm_on_baremetal() {
+ info "Creating PowerMonitor resource (using hardware sensors)"
+ kubectl apply -f "$POWERMONITOR_BAREMETAL_YAML"
+}
+
+# Deploy PowerMonitor
+deploy_powermonitor() {
+ header "Deploy PowerMonitor"
+
+ if $RUNNING_ON_VM; then
+ deploy_pm_on_vm
+ else
+ deploy_pm_on_baremetal
+ fi
+
+ # Wait for PowerMonitor to be ready
+ wait_for_powermonitor power-monitor
+
+ ok "PowerMonitor deployed successfully"
+}
+
+# Verify deployment
+verify_deployment() {
+ header "Verify Deployment"
+
+ # Check operator deployment
+ info "Verifying operator deployment..."
+ kubectl get deployment -n "$HELM_NAMESPACE" kepler-operator-controller
+
+ # Check PowerMonitor DaemonSet
+ info "Verifying PowerMonitor DaemonSet..."
+ kubectl get daemonset -n "$POWERMONITOR_NS" power-monitor
+
+ # Check pods are running
+ info "Checking PowerMonitor pods..."
+ kubectl get pods -n "$POWERMONITOR_NS"
+
+ ok "All components verified"
+}
+
+# Uninstall Helm release
+uninstall_helm() {
+ header "Uninstall Helm Release"
+
+ # Delete PowerMonitor first
+ kubectl delete powermonitor power-monitor --ignore-not-found=true || true
+ sleep 5
+
+ # Uninstall Helm release
+ run helm uninstall "$HELM_RELEASE_NAME" \
+ --namespace "$HELM_NAMESPACE" || true
+
+ ok "Helm release uninstalled"
+}
+
+# Parse command line arguments
+parse_args() {
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ -h | --help)
+ SHOW_USAGE=true
+ return 0
+ ;;
+ --no-build)
+ NO_BUILD=true
+ shift
+ ;;
+ --no-deploy)
+ NO_DEPLOY=true
+ shift
+ ;;
+ --cleanup)
+ CLEANUP=true
+ shift
+ ;;
+ --running-on-vm)
+ RUNNING_ON_VM=true
+ shift
+ ;;
+ --version)
+ shift
+ VERSION="$1"
+ OPERATOR_IMG="$IMG_BASE/kepler-operator:$VERSION"
+ shift
+ ;;
+ --version=*)
+ VERSION="${1#*=}"
+ OPERATOR_IMG="$IMG_BASE/kepler-operator:$VERSION"
+ shift
+ ;;
+ *)
+ err "Unknown option: $1"
+ SHOW_USAGE=true
+ return 1
+ ;;
+ esac
+ done
+ return 0
+}
+
+# Show usage
+show_usage() {
+ local scr
+ scr="$(basename "$0")"
+
+ cat <<-EOF
+ ๐ Usage:
+ $scr [OPTIONS]
+
+ ๐ Description:
+ Run Helm E2E tests for the Kepler Operator
+
+ ๐ก Examples:
+ # Full flow: build, load, deploy, verify
+ โฏ $scr
+
+ # Run in CI/VM environment (enables fake CPU meter)
+ โฏ $scr --running-on-vm
+
+ # Use existing image (skip build)
+ โฏ $scr --no-build --version=0.21.0
+
+ # Quick iteration (skip deploy, just verify)
+ โฏ $scr --no-deploy
+
+ # Full flow with cleanup
+ โฏ $scr --cleanup
+
+ โ๏ธ Options:
+ -h, --help Show this help
+ --no-build Skip building operator image
+ --no-deploy Skip deployment (assumes operator already installed)
+ --cleanup Uninstall Helm release after test
+ --running-on-vm Enable fake CPU meter (for VMs without hardware sensors)
+ --version VER Operator version to test (default: $VERSION)
+
+ ๐ Prerequisites:
+ - Kubernetes cluster running (kind recommended)
+ - cert-manager installed (run 'make cluster-up')
+ - helm, kubectl, docker available
+
+ ๐ Logs:
+ Test logs are saved to: $LOGS_DIR
+ EOF
+
+ return 0
+}
+
+# Print test configuration
+print_config() {
+ header "Test Configuration"
+ cat <<-EOF
+ Operator Image: $OPERATOR_IMG
+ Kepler Image: $KEPLER_IMAGE
+ Kube RBAC Proxy: $KUBE_RBAC_PROXY_IMAGE
+ Helm Release: $HELM_RELEASE_NAME
+ Helm Namespace: $HELM_NAMESPACE
+ PowerMonitor NS: $POWERMONITOR_NS
+ Skip Build: $NO_BUILD
+ Skip Deploy: $NO_DEPLOY
+ Running on VM: $RUNNING_ON_VM
+ Cleanup After: $CLEANUP
+ Logs Directory: $LOGS_DIR
+
+ EOF
+ line 50
+}
+
+# Main test flow
+main() {
+ export PATH="$LOCAL_BIN:$PATH"
+
+ # Parse arguments
+ parse_args "$@" || {
+ show_usage
+ return 1
+ }
+
+ if $SHOW_USAGE; then
+ show_usage
+ return 0
+ fi
+
+ cd "$PROJECT_ROOT"
+
+ # Initialize logs directory
+ init_logs_dir "$LOGS_DIR"
+
+ # Print configuration
+ print_config
+
+ # Start background event logging
+ log_events "$HELM_NAMESPACE" "$LOGS_DIR/operator-events.log" &
+ log_events "$POWERMONITOR_NS" "$LOGS_DIR/powermonitor-events.log" &
+
+ local ret=0
+
+ # Run test flow
+ if ! $NO_DEPLOY; then
+ build_operator || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+
+ load_operator_image || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+
+ install_helm || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+
+ wait_for_webhook_cert || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+
+ wait_for_operator "$HELM_NAMESPACE" "kepler-operator-controller" || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+
+ deploy_powermonitor || ret=$?
+ [[ $ret -ne 0 ]] && return $ret
+ fi
+
+ verify_deployment || ret=$?
+
+ # Cleanup background jobs
+ cleanup_jobs
+
+ # Always gather cluster state after test run (for debugging)
+ gather_cluster_state "$LOGS_DIR" "$HELM_NAMESPACE"
+
+ if [[ $ret -eq 0 ]]; then
+ ok "โ
Helm E2E Tests Passed"
+ else
+ fail "โ Helm E2E Tests Failed"
+ info "Check logs in: $LOGS_DIR"
+ fi
+
+ return $ret
+}
+
+main "$@"
diff --git a/tests/run-e2e.sh b/tests/run-e2e.sh
index 1c3d4914..fb870037 100755
--- a/tests/run-e2e.sh
+++ b/tests/run-e2e.sh
@@ -19,7 +19,7 @@ declare -r OPERATOR_DEPLOY_NAME="kepler-operator-controller"
declare -r OPERATOR_RELEASED_BUNDLE="quay.io/sustainable_computing_io/$OPERATOR-bundle"
declare -r TEST_IMAGES_YAML="tests/images.yaml"
-declare IMG_BASE="${IMG_BASE:-localhost:5001/$OPERATOR}"
+declare IMG_BASE="${IMG_BASE:-localhost:5001}"
# NOTE: this vars are initialized in init_operator_img
declare OPERATOR_IMG=""
declare BUNDLE_IMG=""
diff --git a/tests/testdata/fake-cpu-configmap.yaml b/tests/testdata/fake-cpu-configmap.yaml
new file mode 100644
index 00000000..c2bd66eb
--- /dev/null
+++ b/tests/testdata/fake-cpu-configmap.yaml
@@ -0,0 +1,10 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: power-monitor-config
+data:
+ config.yaml: |
+ dev:
+ fake-cpu-meter:
+ enabled: true
diff --git a/tests/testdata/powermonitor-baremetal.yaml b/tests/testdata/powermonitor-baremetal.yaml
new file mode 100644
index 00000000..2a53c2cb
--- /dev/null
+++ b/tests/testdata/powermonitor-baremetal.yaml
@@ -0,0 +1,12 @@
+---
+apiVersion: kepler.system.sustainable.computing.io/v1alpha1
+kind: PowerMonitor
+metadata:
+ name: power-monitor
+spec:
+ kepler:
+ deployment:
+ nodeSelector:
+ kubernetes.io/os: linux
+ config:
+ logLevel: info
diff --git a/tests/testdata/powermonitor-vm.yaml b/tests/testdata/powermonitor-vm.yaml
new file mode 100644
index 00000000..877862e3
--- /dev/null
+++ b/tests/testdata/powermonitor-vm.yaml
@@ -0,0 +1,14 @@
+---
+apiVersion: kepler.system.sustainable.computing.io/v1alpha1
+kind: PowerMonitor
+metadata:
+ name: power-monitor
+spec:
+ kepler:
+ deployment:
+ nodeSelector:
+ kubernetes.io/os: linux
+ config:
+ logLevel: info
+ additionalConfigMaps:
+ - name: power-monitor-config
diff --git a/tests/utils.sh b/tests/utils.sh
new file mode 100644
index 00000000..effc9cc0
--- /dev/null
+++ b/tests/utils.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+# Shared test utilities for e2e tests
+# This file contains common functions used by both run-e2e.sh and helm.sh
+
+# Ensure PROJECT_ROOT is set
+if [[ -z "${PROJECT_ROOT:-}" ]]; then
+ PROJECT_ROOT="$(git rev-parse --show-toplevel)"
+ declare -r PROJECT_ROOT
+fi
+
+# Source basic utilities
+source "$PROJECT_ROOT/hack/utils.bash"
+
+# Common test variables
+declare -r LOCAL_BIN="${LOCAL_BIN:-$PROJECT_ROOT/tmp/bin}"
+declare -r OPERATOR_DEPLOY_NAME="${OPERATOR_DEPLOY_NAME:-kepler-operator-controller}"
+
+# Initialize logs directory
+# Creates a new logs directory and moves the old one to -prev
+init_logs_dir() {
+ local logs_dir="${1:-tmp/e2e}"
+
+ rm -rf "$logs_dir-prev"
+ mv "$logs_dir" "$logs_dir-prev" 2>/dev/null || true
+ mkdir -p "$logs_dir"
+}
+
+# Load a docker image into kind cluster
+kind_load_image() {
+ local img="$1"
+
+ # Check if image exists locally first
+ if ! docker image inspect "$img" &>/dev/null; then
+ # Image not local, try to pull it
+ run docker pull "$img"
+ fi
+
+ run kind load docker-image "$img"
+}
+
+# Log kubernetes events for a namespace
+# Usage: log_events
+log_events() {
+ local ns="$1"
+ local log_file="${2:-events.log}"
+
+ kubectl get events -w \
+ -o custom-columns=FirstSeen:.firstTimestamp,LastSeen:.lastTimestamp,Count:.count,From:.source.component,Type:.type,Reason:.reason,Message:.message \
+ -n "$ns" | tee "$log_file"
+}
+
+# Wait for operator deployment to be ready
+# Works for both OLM and Helm deployments
+# Usage: wait_for_operator [deployment-name]
+wait_for_operator() {
+ local ns="$1"
+ local deploy_name="${2:-$OPERATOR_DEPLOY_NAME}"
+ local deployment="deploy/$deploy_name"
+
+ header "Waiting for Kepler Operator ($ns) to be Ready"
+
+ wait_until 30 10 "operator to run" \
+ kubectl -n "$ns" rollout status "$deployment"
+
+ run kubectl wait -n "$ns" --for=condition=Available \
+ --timeout=300s "$deployment"
+
+ ok "Operator up and running"
+}
+
+# Wait for PowerMonitor to be available
+# Usage: wait_for_powermonitor
+wait_for_powermonitor() {
+ local pm_name="${1:-power-monitor}"
+
+ header "Waiting for PowerMonitor to be ready"
+ wait_until 10 10 "powermonitor to be available" condition_check "True" kubectl get powermonitor "$pm_name" \
+ -o jsonpath="{.status.conditions[?(@.type=='Available')].status}" || {
+ fail "PowerMonitor is not ready"
+ return 1
+ }
+ ok "PowerMonitor is ready"
+ return 0
+}
+
+# Create ConfigMap to enable fake CPU meter for testing
+# Usage: create_fake_cpu_configmap [configmap-name]
+create_fake_cpu_configmap() {
+ local ns="$1"
+ local cm_name="${2:-power-monitor-config}"
+
+ info "Creating fake CPU meter ConfigMap in namespace $ns"
+ kubectl create namespace "$ns" 2>/dev/null || true
+ kubectl apply -n "$ns" -f - </dev/null || true
+ return 0
+}
+
+# Update CRDs
+# Usage: update_crds
+update_crds() {
+ info "Updating CRDs..."
+ run kubectl apply --server-side --force-conflicts -k config/crd
+ run kubectl wait --for=condition=Established crds --all --timeout=120s
+ return 0
+}
+
+# Gather cluster state for debugging
+# Usage: gather_cluster_state
+gather_cluster_state() {
+ local output_dir="$1"
+ local ns="${2:-}"
+
+ mkdir -p "$output_dir"
+
+ info "Gathering cluster state to $output_dir"
+
+ # All resources
+ kubectl get all -A >"$output_dir/all-resources.txt" 2>&1 || true
+
+ # Events
+ kubectl get events -A >"$output_dir/events.txt" 2>&1 || true
+
+ # PowerMonitor resources
+ kubectl get powermonitor -o yaml >"$output_dir/powermonitor.yaml" 2>&1 || true
+ kubectl get powermonitorinternal -o yaml >"$output_dir/powermonitorinternal.yaml" 2>&1 || true
+
+ # Operator logs if namespace provided
+ if [[ -n "$ns" ]]; then
+ kubectl logs -n "$ns" -l app.kubernetes.io/component=manager --tail=200 \
+ >"$output_dir/operator-logs.txt" 2>&1 || true
+ kubectl describe deployment -n "$ns" "$OPERATOR_DEPLOY_NAME" \
+ >"$output_dir/operator-deployment.txt" 2>&1 || true
+ fi
+
+ ok "Cluster state gathered"
+}
diff --git a/tests/utils/framework.go b/tests/utils/framework.go
index 690f82ab..f3855d23 100644
--- a/tests/utils/framework.go
+++ b/tests/utils/framework.go
@@ -593,7 +593,7 @@ func (f Framework) DeployOpenshiftCerts(serviceName, serviceNamespace, clusterIs
func (f Framework) InstallCertManager() {
f.T.Helper()
- _, err := oc.Literal().From("kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.yaml").Run()
+ _, err := oc.Literal().From("kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml").Run()
assert.NoError(f.T, err, "failed to install cert-manager")
f.WaitUntil("cert-manager pods are running", func(ctx context.Context) (bool, error) {
@@ -612,7 +612,7 @@ func (f Framework) InstallCertManager() {
}, Timeout(5*time.Minute))
f.T.Cleanup(func() {
- _, err := oc.Literal().From("kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.yaml").Run()
+ _, err := oc.Literal().From("kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml").Run()
assert.NoError(f.T, err, "failed to uninstall cert-manager")
})
}