-
Notifications
You must be signed in to change notification settings - Fork 84
fix(router): make sglang and vllm metrics port configurable #847
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* | ||
| Copyright The Volcano Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package backend | ||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| "github.com/agiledragon/gomonkey/v2" | ||
| dto "github.com/prometheus/client_model/go" | ||
| corev1 "k8s.io/api/core/v1" | ||
|
|
||
| backendmetrics "github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics" | ||
| ) | ||
|
|
||
| func TestConfigureEngineRegistryUsesConfiguredPorts(t *testing.T) { | ||
| ConfigureEngineRegistry(31000, 18000) | ||
| t.Cleanup(func() { | ||
| ConfigureEngineRegistry(0, 0) | ||
| }) | ||
|
|
||
| var requestedURLs []string | ||
| patch := gomonkey.ApplyFunc(backendmetrics.ParseMetricsURL, func(url string) (map[string]*dto.MetricFamily, error) { | ||
| requestedURLs = append(requestedURLs, url) | ||
| return map[string]*dto.MetricFamily{}, nil | ||
| }) | ||
| defer patch.Reset() | ||
|
|
||
| pod := &corev1.Pod{ | ||
| Status: corev1.PodStatus{ | ||
| PodIP: "10.0.0.1", | ||
| }, | ||
| } | ||
|
|
||
| GetPodMetrics("SGLang", pod, nil) | ||
| GetPodMetrics("vLLM", pod, nil) | ||
|
|
||
| if len(requestedURLs) != 2 { | ||
| t.Fatalf("expected 2 metrics requests, got %d", len(requestedURLs)) | ||
| } | ||
| if requestedURLs[0] != "http://10.0.0.1:31000/metrics" { | ||
| t.Fatalf("expected sglang metrics URL to use port 31000, got %s", requestedURLs[0]) | ||
| } | ||
| if requestedURLs[1] != "http://10.0.0.1:18000/metrics" { | ||
| t.Fatalf("expected vllm metrics URL to use port 18000, got %s", requestedURLs[1]) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,11 +21,14 @@ import ( | |
|
|
||
| dto "github.com/prometheus/client_model/go" | ||
| corev1 "k8s.io/api/core/v1" | ||
| "k8s.io/klog/v2" | ||
|
|
||
| "github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics" | ||
| "github.com/volcano-sh/kthena/pkg/kthena-router/utils" | ||
| ) | ||
|
|
||
| const defaultMetricPort uint32 = 30000 | ||
|
|
||
| var ( | ||
| GPUCacheUsage = "sglang:token_usage" | ||
| RequestWaitingNum = "sglang:num_queue_reqs" | ||
|
|
@@ -58,10 +61,22 @@ type sglangEngine struct { | |
| MetricPort uint32 | ||
| } | ||
|
|
||
| func NewSglangEngine() *sglangEngine { | ||
| // TODO: Get MetricsPort from sglang configuration | ||
| func NewSglangEngine(metricPort ...uint32) *sglangEngine { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think why not pass a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a variadic metricPort ...uint32 is intentional for backward compatibility. |
||
| if len(metricPort) > 1 { | ||
| panic("NewSglangEngine accepts at most one metricPort argument") | ||
| } | ||
|
|
||
| port := defaultMetricPort | ||
| if len(metricPort) == 1 { | ||
| if metricPort[0] > 0 && metricPort[0] <= 65535 { | ||
| port = metricPort[0] | ||
| } else if metricPort[0] != 0 { | ||
| klog.Warningf("Invalid sglang metric port %d, falling back to default %d", metricPort[0], defaultMetricPort) | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning info if metric port specified by user is invalid? |
||
|
|
||
| return &sglangEngine{ | ||
| MetricPort: 30000, | ||
| MetricPort: port, | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,60 @@ | ||||||||||||||||||||||
| /* | ||||||||||||||||||||||
| Copyright The Volcano Authors. | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||||||||||||
| you may not use this file except in compliance with the License. | ||||||||||||||||||||||
| You may obtain a copy of the License at | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Unless required by applicable law or agreed to in writing, software | ||||||||||||||||||||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||||||||||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||||||||||||
| See the License for the specific language governing permissions and | ||||||||||||||||||||||
| limitations under the License. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| package sglang | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| import "testing" | ||||||||||||||||||||||
|
Comment on lines
+17
to
+19
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| func TestNewSglangEngine_UsesDefaultMetricPort(t *testing.T) { | ||||||||||||||||||||||
| engine := NewSglangEngine() | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if engine.MetricPort != 30000 { | ||||||||||||||||||||||
| t.Fatalf("expected default metric port 30000, got %d", engine.MetricPort) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| func TestNewSglangEngine_UsesConfiguredMetricPort(t *testing.T) { | ||||||||||||||||||||||
| engine := NewSglangEngine(31000) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if engine.MetricPort != 31000 { | ||||||||||||||||||||||
| t.Fatalf("expected metric port 31000, got %d", engine.MetricPort) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with the
Suggested change
Comment on lines
+29
to
+35
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) { | ||||||||||||||||||||||
| engine := NewSglangEngine(0) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if engine.MetricPort != 30000 { | ||||||||||||||||||||||
| t.Fatalf("expected fallback metric port 30000, got %d", engine.MetricPort) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| func TestNewSglangEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) { | ||||||||||||||||||||||
| engine := NewSglangEngine(70000) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if engine.MetricPort != 30000 { | ||||||||||||||||||||||
| t.Fatalf("expected fallback metric port 30000 for out-of-range port, got %d", engine.MetricPort) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| func TestNewSglangEngine_PanicsWhenMultiplePortsProvided(t *testing.T) { | ||||||||||||||||||||||
| defer func() { | ||||||||||||||||||||||
| if recover() == nil { | ||||||||||||||||||||||
| t.Fatal("expected panic when multiple metricPort arguments are provided") | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| }() | ||||||||||||||||||||||
| _ = NewSglangEngine(30000, 30001) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ import ( | |
|
|
||
| dto "github.com/prometheus/client_model/go" | ||
| corev1 "k8s.io/api/core/v1" | ||
| "k8s.io/klog/v2" | ||
|
|
||
| "github.com/volcano-sh/kthena/pkg/kthena-router/backend/metrics" | ||
| "github.com/volcano-sh/kthena/pkg/kthena-router/utils" | ||
|
|
@@ -34,6 +35,8 @@ var ( | |
| TTFT = "vllm:time_to_first_token_seconds" | ||
| ) | ||
|
|
||
| const defaultMetricPort uint32 = 8000 | ||
|
|
||
| var ( | ||
| CounterAndGaugeMetrics = []string{ | ||
| GPUCacheUsage, | ||
|
|
@@ -56,15 +59,27 @@ var ( | |
| ) | ||
|
|
||
| type vllmEngine struct { | ||
| // The address of vllm's query metrics is http://{model server}:MetricPort/metrics | ||
| // Default is 8000 | ||
| // vLLM serves both /metrics and /v1/models on the same service port. | ||
| // Default is 8000. | ||
| MetricPort uint32 | ||
| } | ||
|
|
||
| func NewVllmEngine() *vllmEngine { | ||
| // TODO: Get MetricsPort from vllm configuration | ||
| func NewVllmEngine(metricPort ...uint32) *vllmEngine { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to To make this feature fully functional, a refactoring of |
||
| if len(metricPort) > 1 { | ||
| panic("NewVllmEngine expects at most one metricPort argument") | ||
| } | ||
|
|
||
| port := defaultMetricPort | ||
| if len(metricPort) == 1 { | ||
| if metricPort[0] > 0 && metricPort[0] <= 65535 { | ||
| port = metricPort[0] | ||
| } else if metricPort[0] != 0 { | ||
| klog.Warningf("Invalid vllm metric port %d, falling back to default %d", metricPort[0], defaultMetricPort) | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
|
|
||
| return &vllmEngine{ | ||
| MetricPort: 8000, | ||
| MetricPort: port, | ||
| } | ||
|
Comment on lines
+67
to
83
|
||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* | ||
| Copyright The Volcano Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package vllm | ||
|
|
||
| import "testing" | ||
|
Comment on lines
+17
to
+19
|
||
|
|
||
| func TestNewVllmEngine_UsesDefaultMetricPort(t *testing.T) { | ||
| engine := NewVllmEngine() | ||
|
|
||
| if engine.MetricPort != 8000 { | ||
| t.Fatalf("expected default metric port 8000, got %d", engine.MetricPort) | ||
| } | ||
| } | ||
|
|
||
| func TestNewVllmEngine_UsesConfiguredMetricPort(t *testing.T) { | ||
| engine := NewVllmEngine(18000) | ||
|
|
||
| if engine.MetricPort != 18000 { | ||
| t.Fatalf("expected custom metric port 18000, got %d", engine.MetricPort) | ||
| } | ||
| } | ||
|
|
||
| func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsZero(t *testing.T) { | ||
| engine := NewVllmEngine(0) | ||
|
|
||
| if engine.MetricPort != 8000 { | ||
| t.Fatalf("expected fallback metric port 8000, got %d", engine.MetricPort) | ||
| } | ||
| } | ||
|
|
||
| func TestNewVllmEngine_FallsBackToDefaultWhenConfiguredPortIsOutOfRange(t *testing.T) { | ||
| engine := NewVllmEngine(70000) | ||
|
|
||
| if engine.MetricPort != 8000 { | ||
| t.Fatalf("expected fallback metric port 8000 for out-of-range port, got %d", engine.MetricPort) | ||
| } | ||
| } | ||
|
|
||
| func TestNewVllmEngine_PanicsWhenMultiplePortsProvided(t *testing.T) { | ||
| defer func() { | ||
| if recover() == nil { | ||
| t.Fatal("expected panic when multiple metricPort arguments are provided") | ||
| } | ||
| }() | ||
| _ = NewVllmEngine(8000, 8001) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While this change makes the metric port configurable at the function level, the way this function is called in
pkg/kthena-router/backend/backend.goprevents this configuration from being used. TheengineRegistryis a global variable initialized withNewSglangEngine()(andNewVllmEngine()), so it will always use the default port.To make this feature fully functional, a refactoring of
backend.gois needed to allow passing configuration down to the engine constructors. This likely means changingengineRegistryfrom a global variable to something that is initialized with application configuration. Without this, the feature added in this PR is not usable.