Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions charts/kthena/charts/networking/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,24 @@ kthenaRouter:
| `kthenaRouter.fairness.inputTokenWeight` | float | `1.0` | Weight for input tokens (≥0) |
| `kthenaRouter.fairness.outputTokenWeight` | float | `2.0` | Weight for output tokens (≥0) |

#### Backend Metric Port Configuration

Use these values to configure model backend metric ports in `routerConfiguration`:

```yaml
kthenaRouter:
backend:
sglang:
metricPort: 30000
vllm:
metricPort: 8000
```

| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `kthenaRouter.backend.sglang.metricPort` | int | `30000` | Metric port used for SGLang backends |
| `kthenaRouter.backend.vllm.metricPort` | int | `8000` | Metric port used for vLLM backends |

#### Configuration Scenarios

##### Development Environment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,8 @@ data:
weight: 1
- name: prefix-cache
weight: 1
backend:
sglang:
metricPort: {{ .Values.kthenaRouter.backend.sglang.metricPort | default 30000 }}
vllm:
metricPort: {{ .Values.kthenaRouter.backend.vllm.metricPort | default 8000 }}
6 changes: 6 additions & 0 deletions charts/kthena/charts/networking/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ kthenaRouter:
# kubeAPIBurst is the burst to use while talking with kubernetes apiserver
# If 0 or not specified, uses default value (10)
kubeAPIBurst: 0
# backend metric ports used by router to scrape engine metrics
backend:
sglang:
metricPort: 30000
vllm:
metricPort: 8000

webhook:
enabled: true
Expand Down
7 changes: 7 additions & 0 deletions charts/kthena/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ networking:
# -- Enable Gateway API Inference Extension features.<br/>
# Requires `gatewayAPI.enabled` to be true.
inferenceExtension: false
backend:
sglang:
# -- Metrics port exposed by SGLang model servers.
metricPort: 30000
vllm:
# -- Metrics port exposed by vLLM model servers.
metricPort: 8000

global:
# -- Certificate Management Mode.<br/>
Expand Down
2 changes: 2 additions & 0 deletions docs/kthena/docs/reference/helm-chart-values.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ A Helm chart for deploying Kthena
| global.certManagementMode | string | `"auto"` | Certificate Management Mode.<br/> Three mutually exclusive options for managing TLS certificates:<br/> - `auto`: Webhook servers generate self-signed certificates automatically.<br/> - `cert-manager`: Use cert-manager to generate and manage certificates (requires cert-manager installation).<br/> - `manual`: Provide your own certificates via caBundle. |
| global.webhook.caBundle | string | `""` | CA bundle for webhook server certificates (base64-encoded).<br/> This is ONLY required when `certManagementMode` is set to "manual".<br/> You can generate it with: `cat /path/to/your/ca.crt | base64 | tr -d '\n'`<br/> |
| networking.enabled | bool | `true` | Enable the networking subchart. |
| networking.kthenaRouter.backend.sglang.metricPort | int | `30000` | Metrics port exposed by SGLang model servers. |
| networking.kthenaRouter.backend.vllm.metricPort | int | `8000` | Metrics port exposed by vLLM model servers. |
| networking.kthenaRouter.debugPort | int | `15000` | Debug server port for Kthena Router (localhost only). |
| networking.kthenaRouter.enabled | bool | `true` | Enable Kthena Router. |
| networking.kthenaRouter.fairness.enabled | bool | `false` | Enable fairness scheduling. |
Expand Down
26 changes: 22 additions & 4 deletions pkg/kthena-router/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package backend

import (
"fmt"
"sync"

dto "github.com/prometheus/client_model/go"
corev1 "k8s.io/api/core/v1"
Expand All @@ -34,9 +35,24 @@ type MetricsProvider interface {
GetHistogramPodMetrics(allMetrics map[string]*dto.MetricFamily, previousHistogram map[string]*dto.Histogram) (map[string]float64, map[string]*dto.Histogram)
}

var engineRegistry = map[string]MetricsProvider{
"SGLang": sglang.NewSglangEngine(),
"vLLM": vllm.NewVllmEngine(),
var (
engineRegistryMu sync.RWMutex
engineRegistries = buildEngineRegistries(0, 0)
)

func buildEngineRegistries(sglangMetricPort, vllmMetricPort uint32) map[string]MetricsProvider {
return map[string]MetricsProvider{
"SGLang": sglang.NewSglangEngine(sglangMetricPort),
"vLLM": vllm.NewVllmEngine(vllmMetricPort),
}
}

// ConfigureEngineRegistry rebuilds engine providers with configured ports.
// Zero-valued or invalid ports are handled by each engine constructor fallback logic.
func ConfigureEngineRegistry(sglangMetricPort, vllmMetricPort uint32) {
engineRegistryMu.Lock()
defer engineRegistryMu.Unlock()
engineRegistries = buildEngineRegistries(sglangMetricPort, vllmMetricPort)
}

func GetPodMetrics(engine string, pod *corev1.Pod, previousHistogram map[string]*dto.Histogram) (map[string]float64, map[string]*dto.Histogram) {
Expand Down Expand Up @@ -65,7 +81,9 @@ func GetPodMetrics(engine string, pod *corev1.Pod, previousHistogram map[string]
}

func GetMetricsProvider(engine string) (MetricsProvider, error) {
if provider, exists := engineRegistry[engine]; exists {
engineRegistryMu.RLock()
defer engineRegistryMu.RUnlock()
if provider, exists := engineRegistries[engine]; exists {
return provider, nil
}
return nil, fmt.Errorf("unsupported engine: %s", engine)
Expand Down
60 changes: 60 additions & 0 deletions pkg/kthena-router/backend/backend_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright The Volcano Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package backend

import (
"testing"

"github.com/agiledragon/gomonkey/v2"
dto "github.com/prometheus/client_model/go"
corev1 "k8s.io/api/core/v1"

backendmetrics "github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
)

func TestConfigureEngineRegistryUsesConfiguredPorts(t *testing.T) {
ConfigureEngineRegistry(31000, 18000)
t.Cleanup(func() {
ConfigureEngineRegistry(0, 0)
})

var requestedURLs []string
patch := gomonkey.ApplyFunc(backendmetrics.ParseMetricsURL, func(url string) (map[string]*dto.MetricFamily, error) {
requestedURLs = append(requestedURLs, url)
return map[string]*dto.MetricFamily{}, nil
})
defer patch.Reset()

pod := &corev1.Pod{
Status: corev1.PodStatus{
PodIP: "10.0.0.1",
},
}

GetPodMetrics("SGLang", pod, nil)
GetPodMetrics("vLLM", pod, nil)

if len(requestedURLs) != 2 {
t.Fatalf("expected 2 metrics requests, got %d", len(requestedURLs))
}
if requestedURLs[0] != "http://10.0.0.1:31000/metrics" {
t.Fatalf("expected sglang metrics URL to use port 31000, got %s", requestedURLs[0])
}
if requestedURLs[1] != "http://10.0.0.1:18000/metrics" {
t.Fatalf("expected vllm metrics URL to use port 18000, got %s", requestedURLs[1])
}
}
21 changes: 18 additions & 3 deletions pkg/kthena-router/backend/sglang/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@ import (

dto "github.com/prometheus/client_model/go"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

"github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
"github.com/volcano-sh/kthena/pkg/kthena-router/utils"
)

const defaultMetricPort uint32 = 30000

var (
GPUCacheUsage = "sglang:token_usage"
RequestWaitingNum = "sglang:num_queue_reqs"
Expand Down Expand Up @@ -58,10 +61,22 @@ type sglangEngine struct {
MetricPort uint32
}

func NewSglangEngine() *sglangEngine {
// TODO: Get MetricsPort from sglang configuration
func NewSglangEngine(metricPort ...uint32) *sglangEngine {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

While this change makes the metric port configurable at the function level, the way this function is called in pkg/kthena-router/backend/backend.go prevents this configuration from being used. The engineRegistry is a global variable initialized with NewSglangEngine() (and NewVllmEngine()), so it will always use the default port.

To make this feature fully functional, a refactoring of backend.go is needed to allow passing configuration down to the engine constructors. This likely means changing engineRegistry from a global variable to something that is initialized with application configuration. Without this, the feature added in this PR is not usable.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think why not pass a metricPort uint32, now you make it like a slice

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a variadic metricPort ...uint32 is intentional for backward compatibility.
It keeps existing call sites like NewSglangEngine() unchanged, while allowing optional configuration via NewSglangEngine(customPort).
I also guard len(metricPort) > 1 to avoid ambiguous usage.

if len(metricPort) > 1 {
panic("NewSglangEngine accepts at most one metricPort argument")
}

port := defaultMetricPort
if len(metricPort) == 1 {
if metricPort[0] > 0 && metricPort[0] <= 65535 {
port = metricPort[0]
} else if metricPort[0] != 0 {
klog.Warningf("Invalid sglang metric port %d, falling back to default %d", metricPort[0], defaultMetricPort)
}
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning info if metric port specified by user is invalid?


return &sglangEngine{
MetricPort: 30000,
MetricPort: port,
}
}

Expand Down
60 changes: 60 additions & 0 deletions pkg/kthena-router/backend/sglang/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright The Volcano Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package sglang

import "testing"
Comment on lines +17 to +19
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New Go files in this repo typically start with the Apache 2.0 license header (see pkg/kthena-router/backend/sglang/metrics.go:1-15 or other *_test.go files like pkg/kthena-router/datastore/token_tracker_test.go:1-15). Please add that header to this new test file as well.

Copilot uses AI. Check for mistakes.

func TestNewSglangEngine_UsesDefaultMetricPort(t *testing.T) {
engine := NewSglangEngine()

if engine.MetricPort != 30000 {
t.Fatalf("expected default metric port 30000, got %d", engine.MetricPort)
}
}

func TestNewSglangEngine_UsesConfiguredMetricPort(t *testing.T) {
engine := NewSglangEngine(31000)

if engine.MetricPort != 31000 {
t.Fatalf("expected metric port 31000, got %d", engine.MetricPort)
}
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with the vllm tests and to ensure complete test coverage for the new logic, please add a test case for when a zero-value port is provided. It should fall back to the default port.

Suggested change
}
}
func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
engine := NewSglangEngine(0)
if engine.MetricPort != 30000 {
t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort)
}
}

Comment on lines +29 to +35
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NewSglangEngine implements a zero-value fallback (passing 0 results in the default port), but the tests only cover default and non-zero custom ports. Add a test case for NewSglangEngine(0) to lock down the fallback behavior (and to match the PR’s stated testing goals).

Copilot uses AI. Check for mistakes.

func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
engine := NewSglangEngine(0)

if engine.MetricPort != 30000 {
t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort)
}
}

func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) {
engine := NewSglangEngine(70000)

if engine.MetricPort != 30000 {
t.Fatalf("expected fallback metric port 30000 for out-of-range port, got %d", engine.MetricPort)
}
}

func TestNewSglangEngine_PanicsWhenMultiplePortsProvided(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatal("expected panic when multiple metricPort arguments are provided")
}
}()
_ = NewSglangEngine(30000, 30001)
}
25 changes: 20 additions & 5 deletions pkg/kthena-router/backend/vllm/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

dto "github.com/prometheus/client_model/go"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

"github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics"
"github.com/volcano-sh/kthena/pkg/kthena-router/utils"
Expand All @@ -34,6 +35,8 @@ var (
TTFT = "vllm:time_to_first_token_seconds"
)

const defaultMetricPort uint32 = 8000

var (
CounterAndGaugeMetrics = []string{
GPUCacheUsage,
Expand All @@ -56,15 +59,27 @@ var (
)

type vllmEngine struct {
// The address of vllm's query metrics is http://{model server}:MetricPort/metrics
// Default is 8000
// vLLM serves both /metrics and /v1/models on the same service port.
// Default is 8000.
MetricPort uint32
}

func NewVllmEngine() *vllmEngine {
// TODO: Get MetricsPort from vllm configuration
func NewVllmEngine(metricPort ...uint32) *vllmEngine {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Similar to sglang, while this change makes the metric port configurable at the function level, the way this function is called in pkg/kthena-router/backend/backend.go prevents this configuration from being used. The engineRegistry is a global variable initialized with NewVllmEngine(), so it will always use the default port.

To make this feature fully functional, a refactoring of backend.go is needed to allow passing configuration down to the engine constructors. Without this, the feature added in this PR is not usable.

if len(metricPort) > 1 {
panic("NewVllmEngine expects at most one metricPort argument")
}

port := defaultMetricPort
if len(metricPort) == 1 {
if metricPort[0] > 0 && metricPort[0] <= 65535 {
port = metricPort[0]
} else if metricPort[0] != 0 {
klog.Warningf("Invalid vllm metric port %d, falling back to default %d", metricPort[0], defaultMetricPort)
}
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto


return &vllmEngine{
MetricPort: 8000,
MetricPort: port,
}
Comment on lines +67 to 83
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constructor parameter is named metricPort, but MetricPort is also used for non-metrics endpoints (e.g., /v1/models in models.go). If this value is intended to configure only the metrics port, this will unintentionally change the models/API port too. Consider clarifying via naming/doc (e.g., serverPort) or splitting into separate ports if they can differ.

Copilot uses AI. Check for mistakes.
}

Expand Down
60 changes: 60 additions & 0 deletions pkg/kthena-router/backend/vllm/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright The Volcano Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package vllm

import "testing"
Comment on lines +17 to +19
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New Go files in this repo include the Apache 2.0 license header (e.g., pkg/kthena-router/backend/vllm/models.go:1-15). Please add the same header comment block to this new test file for consistency/compliance.

Copilot uses AI. Check for mistakes.

func TestNewVllmEngine_UsesDefaultMetricPort(t *testing.T) {
engine := NewVllmEngine()

if engine.MetricPort != 8000 {
t.Fatalf("expected default metric port 8000, got %d", engine.MetricPort)
}
}

func TestNewVllmEngine_UsesConfiguredMetricPort(t *testing.T) {
engine := NewVllmEngine(18000)

if engine.MetricPort != 18000 {
t.Fatalf("expected custom metric port 18000, got %d", engine.MetricPort)
}
}

func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) {
engine := NewVllmEngine(0)

if engine.MetricPort != 8000 {
t.Fatalf("expected fallback metric port 8000, got %d", engine.MetricPort)
}
}

func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) {
engine := NewVllmEngine(70000)

if engine.MetricPort != 8000 {
t.Fatalf("expected fallback metric port 8000 for out-of-range port, got %d", engine.MetricPort)
}
}

func TestNewVllmEngine_PanicsWhenMultiplePortsProvided(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatal("expected panic when multiple metricPort arguments are provided")
}
}()
_ = NewVllmEngine(8000, 8001)
}
Loading
Loading