volcano-sh · Tweakzx · Apr 2, 2026 · gemini-code-assist · Mar 28, 2026 · hzxuzhonghu
diff --git a/charts/kthena/charts/networking/README.md b/charts/kthena/charts/networking/README.md
@@ -52,6 +52,24 @@ kthenaRouter:
 | `kthenaRouter.fairness.inputTokenWeight` | float | `1.0` | Weight for input tokens (≥0) |
 | `kthenaRouter.fairness.outputTokenWeight` | float | `2.0` | Weight for output tokens (≥0) |
 
+#### Backend Metric Port Configuration
+
+Use these values to configure model backend metric ports in `routerConfiguration`:
+
+```yaml
+kthenaRouter:
+  backend:
+    sglang:
+      metricPort: 30000
+    vllm:
+      metricPort: 8000
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `kthenaRouter.backend.sglang.metricPort` | int | `30000` | Metric port used for SGLang backends |
+| `kthenaRouter.backend.vllm.metricPort` | int | `8000` | Metric port used for vLLM backends |
+
 #### Configuration Scenarios
 
 ##### Development Environment

diff --git a/charts/kthena/charts/networking/templates/kthena-router/component/configmap.yaml b/charts/kthena/charts/networking/templates/kthena-router/component/configmap.yaml
@@ -40,3 +40,8 @@ data:
               weight: 1
             - name: prefix-cache
               weight: 1
+    backend:
+      sglang:
+        metricPort: {{ .Values.kthenaRouter.backend.sglang.metricPort | default 30000 }}
+      vllm:
+        metricPort: {{ .Values.kthenaRouter.backend.vllm.metricPort | default 8000 }}
diff --git a/charts/kthena/charts/networking/values.yaml b/charts/kthena/charts/networking/values.yaml
@@ -66,6 +66,12 @@ kthenaRouter:
   # kubeAPIBurst is the burst to use while talking with kubernetes apiserver
   # If 0 or not specified, uses default value (10)
   kubeAPIBurst: 0
+  # backend metric ports used by router to scrape engine metrics
+  backend:
+    sglang:
+      metricPort: 30000
+    vllm:
+      metricPort: 8000
 
 webhook:
   enabled: true

diff --git a/charts/kthena/values.yaml b/charts/kthena/values.yaml
@@ -81,6 +81,13 @@ networking:
       # -- Enable Gateway API Inference Extension features.<br/>
       # Requires `gatewayAPI.enabled` to be true.
       inferenceExtension: false
+    backend:
+      sglang:
+        # -- Metrics port exposed by SGLang model servers.
+        metricPort: 30000
+      vllm:
+        # -- Metrics port exposed by vLLM model servers.
+        metricPort: 8000
 
 global:
   # -- Certificate Management Mode.<br/>

diff --git a/docs/kthena/docs/reference/helm-chart-values.md b/docs/kthena/docs/reference/helm-chart-values.md
@@ -18,6 +18,8 @@ A Helm chart for deploying Kthena
 | global.certManagementMode | string | `"auto"` | Certificate Management Mode.<br/>  Three mutually exclusive options for managing TLS certificates:<br/>  - `auto`: Webhook servers generate self-signed certificates automatically.<br/>  - `cert-manager`: Use cert-manager to generate and manage certificates (requires cert-manager installation).<br/>  - `manual`: Provide your own certificates via caBundle. |
 | global.webhook.caBundle | string | `""` | CA bundle for webhook server certificates (base64-encoded).<br/> This is ONLY required when `certManagementMode` is set to "manual".<br/> You can generate it with: `cat /path/to/your/ca.crt | base64 | tr -d '\n'`<br/> |
 | networking.enabled | bool | `true` | Enable the networking subchart. |
+| networking.kthenaRouter.backend.sglang.metricPort | int | `30000` | Metrics port exposed by SGLang model servers. |
+| networking.kthenaRouter.backend.vllm.metricPort | int | `8000` | Metrics port exposed by vLLM model servers. |
 | networking.kthenaRouter.debugPort | int | `15000` | Debug server port for Kthena Router (localhost only). |
 | networking.kthenaRouter.enabled | bool | `true` | Enable Kthena Router. |
 | networking.kthenaRouter.fairness.enabled | bool | `false` | Enable fairness scheduling. |

diff --git a/pkg/kthena-router/backend/backend.go b/pkg/kthena-router/backend/backend.go
@@ -18,6 +18,7 @@ package backend
 
 import (
 	"fmt"
+	"sync"
 
 	dto "github.com/prometheus/client_model/go"
 	corev1 "k8s.io/api/core/v1"
@@ -34,9 +35,24 @@ type MetricsProvider interface {
 	GetHistogramPodMetrics(allMetrics map[string]*dto.MetricFamily, previousHistogram map[string]*dto.Histogram) (map[string]float64, map[string]*dto.Histogram)
 }
 
-var engineRegistry = map[string]MetricsProvider{
-	"SGLang": sglang.NewSglangEngine(),
-	"vLLM":   vllm.NewVllmEngine(),
+var (
+	engineRegistryMu sync.RWMutex
+	engineRegistries = buildEngineRegistries(0, 0)
+)
+
+func buildEngineRegistries(sglangMetricPort, vllmMetricPort uint32) map[string]MetricsProvider {
+	return map[string]MetricsProvider{
+		"SGLang": sglang.NewSglangEngine(sglangMetricPort),
+		"vLLM":   vllm.NewVllmEngine(vllmMetricPort),
+	}
+}
+
+// ConfigureEngineRegistry rebuilds engine providers with configured ports.
+// Zero-valued or invalid ports are handled by each engine constructor fallback logic.
+func ConfigureEngineRegistry(sglangMetricPort, vllmMetricPort uint32) {
+	engineRegistryMu.Lock()
+	defer engineRegistryMu.Unlock()
+	engineRegistries = buildEngineRegistries(sglangMetricPort, vllmMetricPort)
 }
 
 func GetPodMetrics(engine string, pod *corev1.Pod, previousHistogram map[string]*dto.Histogram) (map[string]float64, map[string]*dto.Histogram) {
@@ -65,7 +81,9 @@ func GetPodMetrics(engine string, pod *corev1.Pod, previousHistogram map[string]
 }
 
 func GetMetricsProvider(engine string) (MetricsProvider, error) {
-	if provider, exists := engineRegistry[engine]; exists {
+	engineRegistryMu.RLock()
+	defer engineRegistryMu.RUnlock()
+	if provider, exists := engineRegistries[engine]; exists {
 		return provider, nil
 	}
 	return nil, fmt.Errorf("unsupported engine: %s", engine)

diff --git a/pkg/kthena-router/backend/backend_test.go b/pkg/kthena-router/backend/backend_test.go
@@ -0,0 +1,60 @@
+/*
+Copyright The Volcano Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package backend
+
+import (
+	"testing"
+
+	"github.com/agiledragon/gomonkey/v2"
+	dto "github.com/prometheus/client_model/go"
+	corev1 "k8s.io/api/core/v1"
+
+	backendmetrics "github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
+)
+
+func TestConfigureEngineRegistryUsesConfiguredPorts(t *testing.T) {
+	ConfigureEngineRegistry(31000, 18000)
+	t.Cleanup(func() {
+		ConfigureEngineRegistry(0, 0)
+	})
+
+	var requestedURLs []string
+	patch := gomonkey.ApplyFunc(backendmetrics.ParseMetricsURL, func(url string) (map[string]*dto.MetricFamily, error) {
+		requestedURLs = append(requestedURLs, url)
+		return map[string]*dto.MetricFamily{}, nil
+	})
+	defer patch.Reset()
+
+	pod := &corev1.Pod{
+		Status: corev1.PodStatus{
+			PodIP: "10.0.0.1",
+		},
+	}
+
+	GetPodMetrics("SGLang", pod, nil)
+	GetPodMetrics("vLLM", pod, nil)
+
+	if len(requestedURLs) != 2 {
+		t.Fatalf("expected 2 metrics requests, got %d", len(requestedURLs))
+	}
+	if requestedURLs[0] != "http://10.0.0.1:31000/metrics" {
+		t.Fatalf("expected sglang metrics URL to use port 31000, got %s", requestedURLs[0])
+	}
+	if requestedURLs[1] != "http://10.0.0.1:18000/metrics" {
+		t.Fatalf("expected vllm metrics URL to use port 18000, got %s", requestedURLs[1])
+	}
+}
diff --git a/pkg/kthena-router/backend/sglang/metrics.go b/pkg/kthena-router/backend/sglang/metrics.go
@@ -21,11 +21,14 @@ import (
 
 	dto "github.com/prometheus/client_model/go"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/klog/v2"
 
 	"github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
 	"github.com/volcano-sh/kthena/pkg/kthena-router/utils"
 )
 
+const defaultMetricPort uint32 = 30000
+
 var (
 	GPUCacheUsage     = "sglang:token_usage"
 	RequestWaitingNum = "sglang:num_queue_reqs"
@@ -58,10 +61,22 @@ type sglangEngine struct {
 	MetricPort uint32
 }
 
-func NewSglangEngine() *sglangEngine {
-	// TODO: Get MetricsPort from sglang configuration
+func NewSglangEngine(metricPort ...uint32) *sglangEngine {
+	if len(metricPort) > 1 {
+		panic("NewSglangEngine accepts at most one metricPort argument")
+	}
+
+	port := defaultMetricPort
+	if len(metricPort) == 1 {
+		if metricPort[0] > 0 && metricPort[0] <= 65535 {
+			port = metricPort[0]
+		} else if metricPort[0] != 0 {
+			klog.Warningf("Invalid sglang metric port %d, falling back to default %d", metricPort[0], defaultMetricPort)
+		}
+	}
+
 	return &sglangEngine{
-		MetricPort: 30000,
+		MetricPort: port,
 	}
 }
 

diff --git a/pkg/kthena-router/backend/sglang/metrics_test.go b/pkg/kthena-router/backend/sglang/metrics_test.go
@@ -0,0 +1,60 @@
+/*
+Copyright The Volcano Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package sglang
+
+import "testing"
+
+func TestNewSglangEngine_UsesDefaultMetricPort(t *testing.T) {
+	engine := NewSglangEngine()
+
+	if engine.MetricPort != 30000 {
+		t.Fatalf("expected default metric port 30000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewSglangEngine_UsesConfiguredMetricPort(t *testing.T) {
+	engine := NewSglangEngine(31000)
+
+	if engine.MetricPort != 31000 {
+		t.Fatalf("expected metric port 31000, got %d", engine.MetricPort)
+	}
+}
-}
+}
+
+func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
+	engine := NewSglangEngine(0)
+
+	if engine.MetricPort != 30000 {
+		t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort)
+	}
+}
-}
+}
+
+func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
+	engine := NewSglangEngine(0)
+
+	if engine.MetricPort != 30000 {
+		t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
+	engine := NewSglangEngine(0)
+
+	if engine.MetricPort != 30000 {
+		t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) {
+	engine := NewSglangEngine(70000)
+
+	if engine.MetricPort != 30000 {
+		t.Fatalf("expected fallback metric port 30000 for out-of-range port, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewSglangEngine_PanicsWhenMultiplePortsProvided(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatal("expected panic when multiple metricPort arguments are provided")
+		}
+	}()
+	_ = NewSglangEngine(30000, 30001)
+}
diff --git a/pkg/kthena-router/backend/vllm/metrics.go b/pkg/kthena-router/backend/vllm/metrics.go
@@ -21,6 +21,7 @@ import (
 
 	dto "github.com/prometheus/client_model/go"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/klog/v2"
 
 	"github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
 	"github.com/volcano-sh/kthena/pkg/kthena-router/utils"
@@ -34,6 +35,8 @@ var (
 	TTFT              = "vllm:time_to_first_token_seconds"
 )
 
+const defaultMetricPort uint32 = 8000
+
 var (
 	CounterAndGaugeMetrics = []string{
 		GPUCacheUsage,
@@ -56,15 +59,27 @@ var (
 )
 
 type vllmEngine struct {
-	// The address of vllm's query metrics is http://{model server}:MetricPort/metrics
-	// Default is 8000
+	// vLLM serves both /metrics and /v1/models on the same service port.
+	// Default is 8000.
 	MetricPort uint32
 }
 
-func NewVllmEngine() *vllmEngine {
-	// TODO: Get MetricsPort from vllm configuration
+func NewVllmEngine(metricPort ...uint32) *vllmEngine {
+	if len(metricPort) > 1 {
+		panic("NewVllmEngine expects at most one metricPort argument")
+	}
+
+	port := defaultMetricPort
+	if len(metricPort) == 1 {
+		if metricPort[0] > 0 && metricPort[0] <= 65535 {
+			port = metricPort[0]
+		} else if metricPort[0] != 0 {
+			klog.Warningf("Invalid vllm metric port %d, falling back to default %d", metricPort[0], defaultMetricPort)
+		}
+	}
+
 	return &vllmEngine{
-		MetricPort: 8000,
+		MetricPort: port,
 	}
 }
 

diff --git a/pkg/kthena-router/backend/vllm/metrics_test.go b/pkg/kthena-router/backend/vllm/metrics_test.go
@@ -0,0 +1,60 @@
+/*
+Copyright The Volcano Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package vllm
+
+import "testing"
+
+func TestNewVllmEngine_UsesDefaultMetricPort(t *testing.T) {
+	engine := NewVllmEngine()
+
+	if engine.MetricPort != 8000 {
+		t.Fatalf("expected default metric port 8000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewVllmEngine_UsesConfiguredMetricPort(t *testing.T) {
+	engine := NewVllmEngine(18000)
+
+	if engine.MetricPort != 18000 {
+		t.Fatalf("expected custom metric port 18000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
+	engine := NewVllmEngine(0)
+
+	if engine.MetricPort != 8000 {
+		t.Fatalf("expected fallback metric port 8000, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) {
+	engine := NewVllmEngine(70000)
+
+	if engine.MetricPort != 8000 {
+		t.Fatalf("expected fallback metric port 8000 for out-of-range port, got %d", engine.MetricPort)
+	}
+}
+
+func TestNewVllmEngine_PanicsWhenMultiplePortsProvided(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatal("expected panic when multiple metricPort arguments are provided")
+		}
+	}()
+	_ = NewVllmEngine(8000, 8001)
+}