diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml index 3f3dc04539..f85bbd8eb1 100644 --- a/helm/kagent/templates/controller-configmap.yaml +++ b/helm/kagent/templates/controller-configmap.yaml @@ -54,6 +54,10 @@ data: DATABASE_VECTOR_ENABLED: {{ .Values.database.postgres.vectorEnabled | quote }} WATCH_NAMESPACES: {{ include "kagent.watchNamespaces" . | quote }} MCP_EGRESS_PLAINTEXT: {{ .Values.controller.mcpEgressPlaintext | default false | quote }} + {{- if include "kagent.controller.metricsEnabled" . }} + METRICS_BIND_ADDRESS: {{ .Values.controller.metrics.bindAddress | quote }} + METRICS_SECURE: {{ .Values.controller.metrics.secureServing | quote }} + {{- end }} ZAP_LOG_LEVEL: {{ .Values.controller.loglevel | quote }} {{- $agentHost := "" }} {{- if and .Values.controller.agentDeployment .Values.controller.agentDeployment.host (not (eq .Values.controller.agentDeployment.host "")) }} diff --git a/helm/kagent/templates/controller-servicemonitor.yaml b/helm/kagent/templates/controller-servicemonitor.yaml new file mode 100644 index 0000000000..65d8911941 --- /dev/null +++ b/helm/kagent/templates/controller-servicemonitor.yaml @@ -0,0 +1,27 @@ +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.serviceMonitor.enabled (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.labels" . | nindent 4 }} + {{- with .Values.controller.metrics.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "kagent.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: controller-metrics + endpoints: + - port: metrics + interval: {{ .Values.controller.metrics.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.controller.metrics.serviceMonitor.scrapeTimeout }} + {{- if .Values.controller.metrics.secureServing }} + scheme: https + tlsConfig: + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} +{{- end }} diff --git a/helm/kagent/templates/rbac/metrics-auth-role.yaml b/helm/kagent/templates/rbac/metrics-auth-role.yaml new file mode 100644 index 0000000000..b02e06c6ea --- /dev/null +++ b/helm/kagent/templates/rbac/metrics-auth-role.yaml @@ -0,0 +1,30 @@ +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.secure }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "kagent.fullname" . }}-metrics-auth-role + labels: + {{- include "kagent.labels" . | nindent 4 }} +rules: +- apiGroups: ["authentication.k8s.io"] + resources: ["tokenreviews"] + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: ["subjectaccessreviews"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "kagent.fullname" . }}-metrics-auth-rolebinding + labels: + {{- include "kagent.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "kagent.fullname" . }}-metrics-auth-role +subjects: +- kind: ServiceAccount + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} +{{- end }} diff --git a/helm/kagent/templates/rbac/metrics-reader-role.yaml b/helm/kagent/templates/rbac/metrics-reader-role.yaml new file mode 100644 index 0000000000..cc4680e1ee --- /dev/null +++ b/helm/kagent/templates/rbac/metrics-reader-role.yaml @@ -0,0 +1,11 @@ +{{- if .Values.controller.metrics.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "kagent.fullname" . }}-metrics-reader + labels: + {{- include "kagent.labels" . | nindent 4 }} +rules: +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- end }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index 36837d9be3..3178720725 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -76,6 +76,29 @@ tests: - equal: path: spec.template.spec.containers[0].ports[0].containerPort value: 8083 + - lengthEqual: + path: spec.template.spec.containers[0].ports + count: 1 + + - it: should add metrics port and env vars when enabled + set: + controller.metrics.enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 8443 + protocol: TCP + template: controller-deployment.yaml + - equal: + path: data.METRICS_BIND_ADDRESS + value: ":8443" + template: controller-configmap.yaml + - equal: + path: data.METRICS_SECURE + value: "true" + template: controller-configmap.yaml - it: should set substrate ate-api env vars and projected token when substrate is enabled template: controller-deployment.yaml diff --git a/helm/kagent/tests/controller-service_test.yaml b/helm/kagent/tests/controller-service_test.yaml index f3bb1d97b8..d6bf35bfd3 100644 --- a/helm/kagent/tests/controller-service_test.yaml +++ b/helm/kagent/tests/controller-service_test.yaml @@ -29,6 +29,9 @@ tests: - equal: path: spec.ports[0].protocol value: TCP + - lengthEqual: + path: spec.ports + count: 1 - it: should have correct selector labels asserts: @@ -68,4 +71,15 @@ tests: asserts: - equal: path: metadata.namespace - value: custom-namespace \ No newline at end of file + value: custom-namespace + + - it: should not expose metrics port on main service when metrics enabled + set: + controller.metrics.enabled: true + asserts: + - lengthEqual: + path: spec.ports + count: 1 + - equal: + path: spec.ports[0].name + value: controller diff --git a/helm/kagent/tests/controller-servicemonitor_test.yaml b/helm/kagent/tests/controller-servicemonitor_test.yaml new file mode 100644 index 0000000000..ea29d73b96 --- /dev/null +++ b/helm/kagent/tests/controller-servicemonitor_test.yaml @@ -0,0 +1,77 @@ +suite: test controller servicemonitor +templates: + - controller-servicemonitor.yaml +tests: + - it: should not render by default + asserts: + - hasDocuments: + count: 0 + + - it: should not render when CRD is not installed + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should render ServiceMonitor when both enabled and CRD present + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor + asserts: + - isKind: + of: ServiceMonitor + - equal: + path: spec.endpoints[0].port + value: metrics + + - it: should target controller-metrics service via selector + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor + asserts: + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/component"] + value: controller-metrics + + - it: should add TLS config and bearer token when secure is true + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + controller.metrics.secureServing: true + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor + asserts: + - equal: + path: spec.endpoints[0].scheme + value: https + - equal: + path: spec.endpoints[0].tlsConfig.insecureSkipVerify + value: true + - equal: + path: spec.endpoints[0].bearerTokenFile + value: /var/run/secrets/kubernetes.io/serviceaccount/token + + - it: should not add TLS config or bearer token when secure is false + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + controller.metrics.secureServing: false + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor + asserts: + - isNull: + path: spec.endpoints[0].scheme + - isNull: + path: spec.endpoints[0].tlsConfig + - isNull: + path: spec.endpoints[0].bearerTokenFile diff --git a/helm/kagent/tests/metrics-rbac_test.yaml b/helm/kagent/tests/metrics-rbac_test.yaml new file mode 100644 index 0000000000..440a6f07a7 --- /dev/null +++ b/helm/kagent/tests/metrics-rbac_test.yaml @@ -0,0 +1,104 @@ +suite: test metrics rbac +templates: + - rbac/metrics-auth-role.yaml + - rbac/metrics-reader-role.yaml +tests: + - it: should not render when metrics disabled + asserts: + - hasDocuments: + count: 0 + template: rbac/metrics-auth-role.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-reader-role.yaml + + - it: should not render metrics-auth when secure is false + set: + controller.metrics.enabled: true + controller.metrics.secure: false + template: rbac/metrics-auth-role.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should render metrics-auth ClusterRole and ClusterRoleBinding when metrics enabled and secure + set: + controller.metrics.enabled: true + controller.metrics.secure: true + template: rbac/metrics-auth-role.yaml + asserts: + - hasDocuments: + count: 2 + - isKind: + of: ClusterRole + documentIndex: 0 + - isKind: + of: ClusterRoleBinding + documentIndex: 1 + + - it: metrics-auth ClusterRole should have tokenreview and subjectaccessreview rules + set: + controller.metrics.enabled: true + controller.metrics.secure: true + template: rbac/metrics-auth-role.yaml + documentIndex: 0 + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-auth-role + - contains: + path: rules + content: + apiGroups: ["authentication.k8s.io"] + resources: ["tokenreviews"] + verbs: ["create"] + - contains: + path: rules + content: + apiGroups: ["authorization.k8s.io"] + resources: ["subjectaccessreviews"] + verbs: ["create"] + + - it: metrics-auth ClusterRoleBinding should bind to controller serviceaccount + set: + controller.metrics.enabled: true + controller.metrics.secure: true + template: rbac/metrics-auth-role.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-auth-rolebinding + - equal: + path: roleRef.kind + value: ClusterRole + - equal: + path: roleRef.name + value: RELEASE-NAME-metrics-auth-role + - equal: + path: subjects[0].kind + value: ServiceAccount + - equal: + path: subjects[0].name + value: RELEASE-NAME-controller + - equal: + path: subjects[0].namespace + value: NAMESPACE + + - it: should render metrics-reader ClusterRole when metrics enabled + set: + controller.metrics.enabled: true + template: rbac/metrics-reader-role.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: ClusterRole + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-reader + - contains: + path: rules + content: + nonResourceURLs: ["/metrics"] + verbs: ["get"] diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 20c79d95c6..1b4516862b 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -249,6 +249,11 @@ controller: service: type: ClusterIP port: 8443 + serviceMonitor: + enabled: false + interval: 30s + scrapeTimeout: 10s + labels: {} # Extra controller env (mapped to flags via SUBSTRATE_* / OPENSHELL_* env names). # OpenShell AgentHarness: set OPENSHELL_GATEWAY_URL (or leave defaults below).