volcano-sh · WHOIM1205 · Mar 25, 2026 · Mar 29, 2026 · Mar 31, 2026 · Apr 2, 2026
diff --git a/keda-rbac.yaml b/keda-rbac.yaml
@@ -0,0 +1,34 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: keda-modelserving-scaling
+rules:
+  - apiGroups:
+      - workload.serving.volcano.sh
+    resources:
+      - modelservings
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - workload.serving.volcano.sh
+    resources:
+      - modelservings/scale
+    verbs:
+      - get
+      - update
+      - patch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: keda-modelserving-scaling
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: keda-modelserving-scaling
+subjects:
+  - kind: ServiceAccount
+    name: keda-operator
+    namespace: keda
diff --git a/pkg/model-serving-controller/controller/model_serving_controller.go b/pkg/model-serving-controller/controller/model_serving_controller.go
@@ -1617,9 +1617,14 @@ func (c *ModelServingController) UpdateModelServingStatus(ms *workloadv1alpha1.M
 			// If no groups exist, handle gracefully by setting revisions to the new revision
 			if errors.Is(err, datastore.ErrServingGroupNotFound) {
 				copy := latestMS.DeepCopy()
-				if copy.Status.CurrentRevision != revision || copy.Status.UpdateRevision != revision {
+				selector := labels.Set{
+					workloadv1alpha1.ModelServingNameLabelKey: latestMS.Name,
+				}.String()
+				needsUpdate := copy.Status.CurrentRevision != revision || copy.Status.UpdateRevision != revision || copy.Status.LabelSelector != selector
+				if needsUpdate {
 					copy.Status.CurrentRevision = revision
 					copy.Status.UpdateRevision = revision
+					copy.Status.LabelSelector = selector
 					_, updateErr := c.modelServingClient.WorkloadV1alpha1().ModelServings(copy.GetNamespace()).UpdateStatus(context.TODO(), copy, metav1.UpdateOptions{})
 					return updateErr
 				}
@@ -1745,6 +1750,18 @@ func (c *ModelServingController) UpdateModelServingStatus(ms *workloadv1alpha1.M
 			copy.Status.ObservedGeneration = latestMS.Generation
 		}
 
+		// Set labelSelector so the scale subresource can report it to HPA.
+		// Without this, HPA fails with "selector is required" because it cannot
+		// determine which pods belong to this ModelServing.
+		// The selector matches the label applied to all pods by createBasePod().
+		selector := labels.Set{
+			workloadv1alpha1.ModelServingNameLabelKey: latestMS.Name,
+		}.String()
+		if copy.Status.LabelSelector != selector {
+			shouldUpdate = true
+			copy.Status.LabelSelector = selector
+		}
+
 		if shouldUpdate {
 			_, err := c.modelServingClient.WorkloadV1alpha1().ModelServings(copy.GetNamespace()).UpdateStatus(context.TODO(), copy, metav1.UpdateOptions{})
 			if err != nil {

diff --git a/pkg/model-serving-controller/controller/model_serving_controller_test.go b/pkg/model-serving-controller/controller/model_serving_controller_test.go
@@ -3431,6 +3431,109 @@ func TestScaleUpServingGroups_TemplateRecovery(t *testing.T) {
 
 // TestUpdateModelServingStatusRevisionFields tests the CurrentRevision and UpdateRevision logic
 // following StatefulSet's behavior
+func TestUpdateModelServingStatusLabelSelector(t *testing.T) {
+	tests := []struct {
+		name           string
+		msName         string
+		existingGroups map[int]string // ordinal -> revision; nil means no groups (ErrServingGroupNotFound path)
+		revision       string
+	}{
+		{
+			name:           "no ServingGroups yet — labelSelector is set on empty status",
+			msName:         "my-llm",
+			existingGroups: nil,
+			revision:       "rev-1",
+		},
+		{
+			name:   "existing ServingGroups — labelSelector is set consistently",
+			msName: "my-llm",
+			existingGroups: map[int]string{
+				0: "rev-1",
+				1: "rev-1",
+			},
+			revision: "rev-1",
+		},
+		{
+			name:   "name with special characters — selector encodes correctly",
-			name:   "name with special characters — selector encodes correctly",
+			name:   "name with dashes and numbers — selector string contains name unmodified",
-			name:   "name with special characters — selector encodes correctly",
+			name:   "name with dashes and numbers — selector string contains name unmodified",
+			msName: "serving-gpt-4o-mini",
+			existingGroups: map[int]string{
+				0: "rev-abc",
+			},
+			revision: "rev-abc",
+		},
+	}
+
+	for idx, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			kubeClient := kubefake.NewSimpleClientset()
+			kthenaClient := kthenafake.NewSimpleClientset()
+			volcanoClient := volcanofake.NewSimpleClientset()
+			apiextClient := apiextfake.NewSimpleClientset()
+
+			controller, err := NewModelServingController(kubeClient, kthenaClient, volcanoClient, apiextClient)
+			assert.NoError(t, err)
+
+			replicas := int32(len(tt.existingGroups))
+			if tt.existingGroups == nil {
+				replicas = 1
+			}
+			ms := &workloadv1alpha1.ModelServing{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "default",
+					Name:      tt.msName,
+				},
+				Spec: workloadv1alpha1.ModelServingSpec{
+					Replicas:      ptr.To(replicas),
+					SchedulerName: "volcano",
+					Template: workloadv1alpha1.ServingGroup{
+						Roles: []workloadv1alpha1.Role{
+							{
+								Name:     "prefill",
+								Replicas: ptr.To[int32](1),
+								EntryTemplate: workloadv1alpha1.PodTemplateSpec{
+									Spec: corev1.PodSpec{
+										Containers: []corev1.Container{
+											{Name: "c", Image: "img:latest"},
+										},
+									},
+								},
+							},
+						},
+					},
+					RecoveryPolicy: workloadv1alpha1.RoleRecreate,
+				},
+			}
+
+			_, err = kthenaClient.WorkloadV1alpha1().ModelServings("default").Create(context.Background(), ms, metav1.CreateOptions{})
+			assert.NoError(t, err)
+			err = controller.modelServingsInformer.GetIndexer().Add(ms)
+			assert.NoError(t, err)
+
+			// Populate store only when groups exist; nil means the "not found" path.
+			if tt.existingGroups != nil {
+				for ordinal, rev := range tt.existingGroups {
+					controller.store.AddServingGroup(utils.GetNamespaceName(ms), ordinal, rev)
+					groupName := utils.GenerateServingGroupName(tt.msName, ordinal)
+					controller.store.UpdateServingGroupStatus(utils.GetNamespaceName(ms), groupName, datastore.ServingGroupRunning)
+				}
+			}
+
+			err = controller.UpdateModelServingStatus(ms, tt.revision)
+			assert.NoError(t, err, "case %d: UpdateModelServingStatus should not error", idx)
+
+			updated, err := kthenaClient.WorkloadV1alpha1().ModelServings("default").Get(context.Background(), tt.msName, metav1.GetOptions{})
+			assert.NoError(t, err)
+
+			expectedSelector := labels.Set{
+				workloadv1alpha1.ModelServingNameLabelKey: tt.msName,
+			}.String()
+
+			assert.Equal(t, expectedSelector, updated.Status.LabelSelector,
+				"case %d: status.labelSelector must be %q", idx, expectedSelector)
+		})
+	}
+}
+
 func TestUpdateModelServingStatusRevisionFields(t *testing.T) {
 	tests := []struct {
 		name                    string

diff --git a/scaledobject.yaml b/scaledobject.yaml
@@ -0,0 +1,20 @@
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: modelserving-scaler
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: workload.serving.volcano.sh/v1alpha1
+    kind: ModelServing
+    name: test-model
+  minReplicaCount: 1
+  maxReplicaCount: 5
+  pollingInterval: 15
+  cooldownPeriod: 60
+  triggers:
+    - type: prometheus
+      metadata:
+        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
+        query: sum(rate(process_cpu_seconds_total[1m]))
-        query: sum(rate(process_cpu_seconds_total[1m]))
+        query: sum(rate(process_cpu_seconds_total{namespace="default", pod=~"test-model-.*"}[1m]))
-        query: sum(rate(process_cpu_seconds_total[1m]))
+        query: sum(rate(process_cpu_seconds_total{namespace="default", pod=~"test-model-.*"}[1m]))
+        threshold: "0.01"
-apiVersion: keda.sh/v1alpha1
-kind: ScaledObject
-metadata:
-  name: modelserving-scaler
-  namespace: default
-spec:
-  scaleTargetRef:
-    apiVersion: workload.serving.volcano.sh/v1alpha1
-    kind: ModelServing
-    name: test-model
-  minReplicaCount: 1
-  maxReplicaCount: 5
-  pollingInterval: 15
-  cooldownPeriod: 60
-  triggers:
-    - type: prometheus
-      metadata:
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
-        query: sum(rate(process_cpu_seconds_total[1m]))
-        threshold: "0.01"
+# Placeholder file at repository root.
+# The example KEDA ScaledObject manifest has been moved under the examples tree,
+# for example: examples/autoscaling/keda/scaledobject.yaml
+#
+# This file is intentionally left without any Kubernetes resources to avoid
+# having example manifests at the repository root.
-apiVersion: keda.sh/v1alpha1
-kind: ScaledObject
-metadata:
-  name: modelserving-scaler
-  namespace: default
-spec:
-  scaleTargetRef:
-    apiVersion: workload.serving.volcano.sh/v1alpha1
-    kind: ModelServing
-    name: test-model
-  minReplicaCount: 1
-  maxReplicaCount: 5
-  pollingInterval: 15
-  cooldownPeriod: 60
-  triggers:
-    - type: prometheus
-      metadata:
-        serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
-        query: sum(rate(process_cpu_seconds_total[1m]))
-        threshold: "0.01"
+# Placeholder file at repository root.
+# The example KEDA ScaledObject manifest has been moved under the examples tree,
+# for example: examples/autoscaling/keda/scaledobject.yaml
+#
+# This file is intentionally left without any Kubernetes resources to avoid
+# having example manifests at the repository root.
diff --git a/servicemonitor.yaml b/servicemonitor.yaml
@@ -0,0 +1,16 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: kthena-router
+  namespace: monitoring
+spec:
+  namespaceSelector:
+    matchNames:
+      - default
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: kthena-router
+  endpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
diff --git a/test-deployment.yaml b/test-deployment.yaml
@@ -0,0 +1,113 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: kthena-router
+  labels:
+    app.kubernetes.io/component: kthena-router
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: kthena-router
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/component: kthena-router
+    spec:
+      containers:
+        - name: kthena-router
+          image: nginx:alpine
+          ports:
+            - containerPort: 8080
+          volumeMounts:
+            - name: nginx-config
+              mountPath: /etc/nginx/conf.d
+      volumes:
+        - name: nginx-config
+          configMap:
+            name: kthena-router-nginx-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: kthena-router-nginx-config
+data:
+  default.conf: |
+    server {
+        listen 8080;
+
+        location /metrics {
+            default_type text/plain;
+            return 200 '# HELP kthena_router_active_downstream_requests Number of active downstream requests\n# TYPE kthena_router_active_downstream_requests gauge\nkthena_router_active_downstream_requests 3\n# HELP kthena_router_requests_total Total requests\n# TYPE kthena_router_requests_total counter\nkthena_router_requests_total 100\n';
+        }
+
+        location / {
+            return 200 'kthena-router ok\n';
+        }
+    }
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: kthena-router
+  labels:
+    app.kubernetes.io/component: kthena-router
+spec:
+  selector:
+    app.kubernetes.io/component: kthena-router
+  ports:
+    - name: http
+      port: 80
+      targetPort: 8080
+      protocol: TCP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dummy-inference-vllm
+  labels:
+    modelserving.volcano.sh/name: test-model
+    modelserving.volcano.sh/entry: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      modelserving.volcano.sh/name: test-model
+      modelserving.volcano.sh/entry: "true"
+  template:
+    metadata:
+      labels:
+        modelserving.volcano.sh/name: test-model
-    modelserving.volcano.sh/name: test-model
-    modelserving.volcano.sh/entry: "true"
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      modelserving.volcano.sh/name: test-model
-      modelserving.volcano.sh/entry: "true"
-  template:
-    metadata:
-      labels:
-        modelserving.volcano.sh/name: test-model
+    modelserving.volcano.sh/name: dummy-test-model
+    modelserving.volcano.sh/entry: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      modelserving.volcano.sh/name: dummy-test-model
+      modelserving.volcano.sh/entry: "true"
+  template:
+    metadata:
+      labels:
+        modelserving.volcano.sh/name: dummy-test-model
-    modelserving.volcano.sh/name: test-model
-    modelserving.volcano.sh/entry: "true"
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      modelserving.volcano.sh/name: test-model
-      modelserving.volcano.sh/entry: "true"
-  template:
-    metadata:
-      labels:
-        modelserving.volcano.sh/name: test-model
+    modelserving.volcano.sh/name: dummy-test-model
+    modelserving.volcano.sh/entry: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      modelserving.volcano.sh/name: dummy-test-model
+      modelserving.volcano.sh/entry: "true"
+  template:
+    metadata:
+      labels:
+        modelserving.volcano.sh/name: dummy-test-model
+        modelserving.volcano.sh/entry: "true"
-    modelserving.volcano.sh/name: test-model
-    modelserving.volcano.sh/entry: "true"
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      modelserving.volcano.sh/name: test-model
-      modelserving.volcano.sh/entry: "true"
-  template:
-    metadata:
-      labels:
-        modelserving.volcano.sh/name: test-model
-        modelserving.volcano.sh/entry: "true"
+    app.kubernetes.io/name: dummy-inference-vllm
+    app.kubernetes.io/entry: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: dummy-inference-vllm
+      app.kubernetes.io/entry: "true"
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: dummy-inference-vllm
+        app.kubernetes.io/entry: "true"
-    modelserving.volcano.sh/name: test-model
-    modelserving.volcano.sh/entry: "true"
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      modelserving.volcano.sh/name: test-model
-      modelserving.volcano.sh/entry: "true"
-  template:
-    metadata:
-      labels:
-        modelserving.volcano.sh/name: test-model
-        modelserving.volcano.sh/entry: "true"
+    app.kubernetes.io/name: dummy-inference-vllm
+    app.kubernetes.io/entry: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: dummy-inference-vllm
+      app.kubernetes.io/entry: "true"
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: dummy-inference-vllm
+        app.kubernetes.io/entry: "true"
+    spec:
+      containers:
+        - name: dummy-vllm
+          image: nginx:alpine
+          ports:
+            - containerPort: 8000
+          volumeMounts:
+            - name: nginx-config
+              mountPath: /etc/nginx/conf.d
+      volumes:
+        - name: nginx-config
+          configMap:
+            name: dummy-vllm-nginx-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: dummy-vllm-nginx-config
+data:
+  default.conf: |
+    server {
+        listen 8000;
+
+        location /metrics {
+            default_type text/plain;
+            return 200 '# HELP vllm_num_requests_running Number of running requests\n# TYPE vllm_num_requests_running gauge\nvllm_num_requests_running 2\n# HELP vllm_num_requests_waiting Number of waiting requests\n# TYPE vllm_num_requests_waiting gauge\nvllm_num_requests_waiting 0\n# HELP vllm_gpu_cache_usage_perc GPU cache usage percentage\n# TYPE vllm_gpu_cache_usage_perc gauge\nvllm_gpu_cache_usage_perc 0.45\n';
+        }
+
+        location / {
+            return 200 'dummy-vllm ok\n';
+        }
+    }