volcano-sh · WHOIM1205 · Mar 24, 2026 · gemini-code-assist · Mar 24, 2026 · Copilot
diff --git a/charts/kthena/charts/networking/templates/kthena-router/component/podmonitor.yaml b/charts/kthena/charts/networking/templates/kthena-router/component/podmonitor.yaml
@@ -0,0 +1,34 @@
+{{- if .Values.kthenaRouter.metrics.podMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: kthena-inference
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/component: kthena-inference
+    release: prometheus
+    {{- include "kthena.labels" . | nindent 4 }}
-    release: prometheus
-    {{- include "kthena.labels" . | nindent 4 }}
+    {{- include "kthena.labels" . | nindent 4 }}
+    {{- with .Values.kthenaRouter.metrics.podMonitor.additionalLabels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
-    release: prometheus
-    {{- include "kthena.labels" . | nindent 4 }}
+    {{- include "kthena.labels" . | nindent 4 }}
+    {{- with .Values.kthenaRouter.metrics.podMonitor.additionalLabels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchExpressions:
+      - key: modelserving.volcano.sh/name
+        operator: Exists
+      - key: modelserving.volcano.sh/group-name
+        operator: Exists
+      - key: modelserving.volcano.sh/role
+        operator: Exists
+      - key: modelserving.volcano.sh/entry
+        operator: In
+        values:
+          - "true"
+  podMetricsEndpoints:
+    - targetPort: 8000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+    - targetPort: 30000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
-  podMetricsEndpoints:
-    - targetPort: 8000
-      path: /metrics
-      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
-    - targetPort: 30000
-      path: /metrics
-      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+  podMetricsEndpoints:
+    {{- with .Values.kthenaRouter.metrics.podMonitor.podMetricsEndpoints }}
+    {{- toYaml . | nindent 4 }}
+    {{- else }}
+    - targetPort: 8000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+    - targetPort: 30000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+    {{- end }}
-  podMetricsEndpoints:
-    - targetPort: 8000
-      path: /metrics
-      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
-    - targetPort: 30000
-      path: /metrics
-      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+  podMetricsEndpoints:
+    {{- with .Values.kthenaRouter.metrics.podMonitor.podMetricsEndpoints }}
+    {{- toYaml . | nindent 4 }}
+    {{- else }}
+    - targetPort: 8000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+    - targetPort: 30000
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.podMonitor.interval }}
+    {{- end }}
+{{- end }}
diff --git a/charts/kthena/charts/networking/templates/kthena-router/component/scaledobject.yaml b/charts/kthena/charts/networking/templates/kthena-router/component/scaledobject.yaml
@@ -0,0 +1,24 @@
+{{- if .Values.kthenaRouter.autoscaling.enabled }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: kthena-inference-scaler
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/component: kthena-inference
+    {{- include "kthena.labels" . | nindent 4 }}
+spec:
+  cooldownPeriod: {{ .Values.kthenaRouter.autoscaling.cooldownPeriod }}
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ .Values.kthenaRouter.autoscaling.scaleTargetName }}
-    name: {{ .Values.kthenaRouter.autoscaling.scaleTargetName }}
+    name: {{ required "kthenaRouter.autoscaling.scaleTargetName must be set when autoscaling is enabled" .Values.kthenaRouter.autoscaling.scaleTargetName }}
-    name: {{ .Values.kthenaRouter.autoscaling.scaleTargetName }}
+    name: {{ required "kthenaRouter.autoscaling.scaleTargetName must be set when autoscaling is enabled" .Values.kthenaRouter.autoscaling.scaleTargetName }}
+  minReplicaCount: {{ .Values.kthenaRouter.autoscaling.minReplicas }}
+  maxReplicaCount: {{ .Values.kthenaRouter.autoscaling.maxReplicas }}
+  triggers:
+    - type: prometheus
+      metadata:
+        serverAddress: {{ .Values.kthenaRouter.autoscaling.prometheusAddress }}
+        query: {{ .Values.kthenaRouter.autoscaling.query }}
-        serverAddress: {{ .Values.kthenaRouter.autoscaling.prometheusAddress }}
-        query: {{ .Values.kthenaRouter.autoscaling.query }}
+        serverAddress: {{ .Values.kthenaRouter.autoscaling.prometheusAddress | quote }}
+        query: {{ .Values.kthenaRouter.autoscaling.query | quote }}
-        serverAddress: {{ .Values.kthenaRouter.autoscaling.prometheusAddress }}
-        query: {{ .Values.kthenaRouter.autoscaling.query }}
+        serverAddress: {{ .Values.kthenaRouter.autoscaling.prometheusAddress | quote }}
+        query: {{ .Values.kthenaRouter.autoscaling.query | quote }}
+        threshold: {{ .Values.kthenaRouter.autoscaling.threshold | quote }}
+{{- end }}
diff --git a/charts/kthena/charts/networking/templates/kthena-router/component/servicemonitor.yaml b/charts/kthena/charts/networking/templates/kthena-router/component/servicemonitor.yaml
@@ -0,0 +1,22 @@
+{{- if .Values.kthenaRouter.metrics.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: kthena-router
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/component: kthena-router
+    release: prometheus
-    release: prometheus
+    {{- $prometheusLabels := default (dict "release" "prometheus") .Values.kthenaRouter.metrics.serviceMonitor.prometheusLabels }}
+    {{- range $key, $value := $prometheusLabels }}
+    {{ $key }}: {{ $value }}
+    {{- end }}
-    release: prometheus
+    {{- $prometheusLabels := default (dict "release" "prometheus") .Values.kthenaRouter.metrics.serviceMonitor.prometheusLabels }}
+    {{- range $key, $value := $prometheusLabels }}
+    {{ $key }}: {{ $value }}
+    {{- end }}
+    {{- include "kthena.labels" . | nindent 4 }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: kthena-router
+  endpoints:
+    - port: http
+      path: /metrics
+      interval: {{ .Values.kthenaRouter.metrics.serviceMonitor.interval }}
+{{- end }}
diff --git a/charts/kthena/charts/networking/values.yaml b/charts/kthena/charts/networking/values.yaml
@@ -66,6 +66,36 @@ kthenaRouter:
   # kubeAPIBurst is the burst to use while talking with kubernetes apiserver
   # If 0 or not specified, uses default value (10)
   kubeAPIBurst: 0
+  # metrics configuration for Prometheus monitoring
+  metrics:
+    serviceMonitor:
+      # enabled creates a ServiceMonitor for kthena-router (requires Prometheus Operator)
+      enabled: false
+      # interval is the scrape interval for the ServiceMonitor
+      interval: 15s
+    podMonitor:
+      # enabled creates a PodMonitor for inference pods (requires Prometheus Operator)
+      enabled: false
+      # interval is the scrape interval for the PodMonitor
+      interval: 15s
+  # autoscaling configuration for KEDA-based inference scaling
+  autoscaling:
+    # enabled creates a KEDA ScaledObject (requires KEDA)
+    enabled: false
+    # scaleTargetName is the name of the Deployment to scale
+    scaleTargetName: ""
+    # prometheusAddress is the Prometheus server URL
+    prometheusAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+    # query is the PromQL query used for scaling decisions
+    query: sum(kthena_router_active_downstream_requests)
+    # threshold is the per-pod metric value that triggers scaling
-    # threshold is the per-pod metric value that triggers scaling
+    # threshold is the total metric value across all pods that triggers scaling
-    # threshold is the per-pod metric value that triggers scaling
+    # threshold is the total metric value across all pods that triggers scaling
+    threshold: 5
+    # cooldownPeriod is seconds to wait before scaling down after last trigger
+    cooldownPeriod: 30
+    # minReplicas is the minimum number of inference replicas
+    minReplicas: 1
+    # maxReplicas is the maximum number of inference replicas
+    maxReplicas: 10
 
 webhook:
   enabled: true

diff --git a/charts/kthena/values.yaml b/charts/kthena/values.yaml
@@ -81,6 +81,34 @@ networking:
       # -- Enable Gateway API Inference Extension features.<br/>
       # Requires `gatewayAPI.enabled` to be true.
       inferenceExtension: false
+    metrics:
+      serviceMonitor:
+        # -- Enable ServiceMonitor for kthena-router (requires Prometheus Operator).
+        enabled: false
+        # -- Scrape interval for the ServiceMonitor.
+        interval: 15s
+      podMonitor:
+        # -- Enable PodMonitor for inference pods (requires Prometheus Operator).
+        enabled: false
+        # -- Scrape interval for the PodMonitor.
+        interval: 15s
+    autoscaling:
+      # -- Enable KEDA ScaledObject for inference autoscaling (requires KEDA).
+      enabled: false
+      # -- Name of the Deployment to scale.
+      scaleTargetName: ""
+      # -- Prometheus server URL for KEDA to query.
+      prometheusAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+      # -- PromQL query for scaling decisions.
+      query: sum(kthena_router_active_downstream_requests)
+      # -- Per-pod metric threshold that triggers scaling.
-      # -- Per-pod metric threshold that triggers scaling.
+      # -- Metric threshold on the result of the PromQL query that triggers scaling.
-      # -- Per-pod metric threshold that triggers scaling.
+      # -- Metric threshold on the result of the PromQL query that triggers scaling.
+      threshold: 5
+      # -- Seconds to wait before scaling down.
+      cooldownPeriod: 30
+      # -- Minimum inference replicas.
+      minReplicas: 1
+      # -- Maximum inference replicas.
+      maxReplicas: 10
 
 global:
   # -- Certificate Management Mode.<br/>

diff --git a/monitoring.yaml b/monitoring.yaml
@@ -0,0 +1,61 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: kthena-router
+  labels:
+    app.kubernetes.io/component: kthena-router
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: kthena-router
+  endpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: kthena-inference
+  labels:
+    app.kubernetes.io/part-of: kthena
+spec:
+  selector:
+    matchExpressions:
+      - key: modelserving.volcano.sh/name
+        operator: Exists
+      - key: modelserving.volcano.sh/group-name
+        operator: Exists
+      - key: modelserving.volcano.sh/role
+        operator: Exists
+      - key: modelserving.volcano.sh/entry
+        operator: In
+        values:
+          - "true"
+  podMetricsEndpoints:
+    - port: ""
+      targetPort: 8000
+      path: /metrics
+      interval: 15s
+    - port: ""
+      targetPort: 30000
-    - port: ""
-      targetPort: 8000
-      path: /metrics
-      interval: 15s
-    - port: ""
-      targetPort: 30000
+    - targetPort: 8000
+      path: /metrics
+      interval: 15s
+    - targetPort: 30000
-    - port: ""
-      targetPort: 8000
-      path: /metrics
-      interval: 15s
-    - port: ""
-      targetPort: 30000
+    - targetPort: 8000
+      path: /metrics
+      interval: 15s
+    - targetPort: 30000
+      path: /metrics
+      interval: 15s
+---
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: kthena-inference-scaler
+  labels:
+    app.kubernetes.io/part-of: kthena
+spec:
+  minReplicaCount: 1
+  maxReplicaCount: 10
+  scaleTargetRef:
+    name: kthena-inference
+  triggers:
+    - type: prometheus
+      metadata:
+        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
+        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
+        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+        query: sum(kthena_router_active_downstream_requests)
+        threshold: "5"
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  name: kthena-router
-  labels:
-    app.kubernetes.io/component: kthena-router
-spec:
-  selector:
-    matchLabels:
-      app.kubernetes.io/component: kthena-router
-  endpoints:
-    - port: http
-      path: /metrics
-      interval: 15s
---
-apiVersion: monitoring.coreos.com/v1
-kind: PodMonitor
-metadata:
-  name: kthena-inference
-  labels:
-    app.kubernetes.io/part-of: kthena
-spec:
-  selector:
-    matchExpressions:
-      - key: modelserving.volcano.sh/name
-        operator: Exists
-      - key: modelserving.volcano.sh/group-name
-        operator: Exists
-      - key: modelserving.volcano.sh/role
-        operator: Exists
-      - key: modelserving.volcano.sh/entry
-        operator: In
-        values:
-          - "true"
-  podMetricsEndpoints:
-    - port: ""
-      targetPort: 8000
-      path: /metrics
-      interval: 15s
-    - port: ""
-      targetPort: 30000
-      path: /metrics
-      interval: 15s
---
-apiVersion: keda.sh/v1alpha1
-kind: ScaledObject
-metadata:
-  name: kthena-inference-scaler
-  labels:
-    app.kubernetes.io/part-of: kthena
-spec:
-  minReplicaCount: 1
-  maxReplicaCount: 10
-  scaleTargetRef:
-    name: kthena-inference
-  triggers:
-    - type: prometheus
-      metadata:
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
-        query: sum(kthena_router_active_downstream_requests)
-        threshold: "5"
+# Example monitoring configuration for kthena components.
+# 
+# NOTE:
+# - The authoritative ServiceMonitor, PodMonitor, and ScaledObject resources
+#   are rendered from the Helm charts under the `charts/` directory.
+# - This file is kept only as documentation / an example manifest and should
+#   not be applied directly in environments that use the Helm chart.
+# 
+# If you need to customize monitoring, prefer editing the Helm values/templates
+# rather than modifying this example file.
+
+# apiVersion: monitoring.coreos.com/v1
+# kind: ServiceMonitor
+# metadata:
+#   name: kthena-router
+#   labels:
+#     app.kubernetes.io/component: kthena-router
+# spec:
+#   selector:
+#     matchLabels:
+#       app.kubernetes.io/component: kthena-router
+#   endpoints:
+#     - port: http
+#       path: /metrics
+#       interval: 15s
+# ---
+# apiVersion: monitoring.coreos.com/v1
+# kind: PodMonitor
+# metadata:
+#   name: kthena-inference
+#   labels:
+#     app.kubernetes.io/part-of: kthena
+# spec:
+#   selector:
+#     matchExpressions:
+#       - key: modelserving.volcano.sh/name
+#         operator: Exists
+#       - key: modelserving.volcano.sh/group-name
+#         operator: Exists
+#       - key: modelserving.volcano.sh/role
+#         operator: Exists
+#       - key: modelserving.volcano.sh/entry
+#         operator: In
+#         values:
+#           - "true"
+#   podMetricsEndpoints:
+#     - port: ""
+#       targetPort: 8000
+#       path: /metrics
+#       interval: 15s
+#     - port: ""
+#       targetPort: 30000
+#       path: /metrics
+#       interval: 15s
+# ---
+# apiVersion: keda.sh/v1alpha1
+# kind: ScaledObject
+# metadata:
+#   name: kthena-inference-scaler
+#   labels:
+#     app.kubernetes.io/part-of: kthena
+# spec:
+#   minReplicaCount: 1
+#   maxReplicaCount: 10
+#   scaleTargetRef:
+#     name: kthena-inference
+#   triggers:
+#     - type: prometheus
+#       metadata:
+#         serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
+#         query: sum(kthena_router_active_downstream_requests)
+#         threshold: "5"
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  name: kthena-router
-  labels:
-    app.kubernetes.io/component: kthena-router
-spec:
-  selector:
-    matchLabels:
-      app.kubernetes.io/component: kthena-router
-  endpoints:
-    - port: http
-      path: /metrics
-      interval: 15s
---
-apiVersion: monitoring.coreos.com/v1
-kind: PodMonitor
-metadata:
-  name: kthena-inference
-  labels:
-    app.kubernetes.io/part-of: kthena
-spec:
-  selector:
-    matchExpressions:
-      - key: modelserving.volcano.sh/name
-        operator: Exists
-      - key: modelserving.volcano.sh/group-name
-        operator: Exists
-      - key: modelserving.volcano.sh/role
-        operator: Exists
-      - key: modelserving.volcano.sh/entry
-        operator: In
-        values:
-          - "true"
-  podMetricsEndpoints:
-    - port: ""
-      targetPort: 8000
-      path: /metrics
-      interval: 15s
-    - port: ""
-      targetPort: 30000
-      path: /metrics
-      interval: 15s
---
-apiVersion: keda.sh/v1alpha1
-kind: ScaledObject
-metadata:
-  name: kthena-inference-scaler
-  labels:
-    app.kubernetes.io/part-of: kthena
-spec:
-  minReplicaCount: 1
-  maxReplicaCount: 10
-  scaleTargetRef:
-    name: kthena-inference
-  triggers:
-    - type: prometheus
-      metadata:
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
-        query: sum(kthena_router_active_downstream_requests)
-        threshold: "5"
+# Example monitoring configuration for kthena components.
+# 
+# NOTE:
+# - The authoritative ServiceMonitor, PodMonitor, and ScaledObject resources
+#   are rendered from the Helm charts under the `charts/` directory.
+# - This file is kept only as documentation / an example manifest and should
+#   not be applied directly in environments that use the Helm chart.
+# 
+# If you need to customize monitoring, prefer editing the Helm values/templates
+# rather than modifying this example file.
+
+# apiVersion: monitoring.coreos.com/v1
+# kind: ServiceMonitor
+# metadata:
+#   name: kthena-router
+#   labels:
+#     app.kubernetes.io/component: kthena-router
+# spec:
+#   selector:
+#     matchLabels:
+#       app.kubernetes.io/component: kthena-router
+#   endpoints:
+#     - port: http
+#       path: /metrics
+#       interval: 15s
+# ---
+# apiVersion: monitoring.coreos.com/v1
+# kind: PodMonitor
+# metadata:
+#   name: kthena-inference
+#   labels:
+#     app.kubernetes.io/part-of: kthena
+# spec:
+#   selector:
+#     matchExpressions:
+#       - key: modelserving.volcano.sh/name
+#         operator: Exists
+#       - key: modelserving.volcano.sh/group-name
+#         operator: Exists
+#       - key: modelserving.volcano.sh/role
+#         operator: Exists
+#       - key: modelserving.volcano.sh/entry
+#         operator: In
+#         values:
+#           - "true"
+#   podMetricsEndpoints:
+#     - port: ""
+#       targetPort: 8000
+#       path: /metrics
+#       interval: 15s
+#     - port: ""
+#       targetPort: 30000
+#       path: /metrics
+#       interval: 15s
+# ---
+# apiVersion: keda.sh/v1alpha1
+# kind: ScaledObject
+# metadata:
+#   name: kthena-inference-scaler
+#   labels:
+#     app.kubernetes.io/part-of: kthena
+# spec:
+#   minReplicaCount: 1
+#   maxReplicaCount: 10
+#   scaleTargetRef:
+#     name: kthena-inference
+#   triggers:
+#     - type: prometheus
+#       metadata:
+#         serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring:9090
+#         query: sum(kthena_router_active_downstream_requests)
+#         threshold: "5"