Skip to content

Commit

Permalink
adding prometheus port configs for aggregating metrics in queue proxy (
Browse files Browse the repository at this point in the history
…kserve#2459)

* adding prometheus port configs for aggregating metrics in queue proxy

Signed-off-by: alexagriffith <[email protected]>

* change tensorflow metrics port to 8080

Signed-off-by: alexagriffith <[email protected]>

* resolving comments

Signed-off-by: alexagriffith <[email protected]>

* setting the tensorflow prom port to the rest api port

Signed-off-by: alexagriffith <[email protected]>

Signed-off-by: alexagriffith <[email protected]>
  • Loading branch information
alexagriffith authored Oct 7, 2022
1 parent 2c2c66e commit 8aac0b9
Show file tree
Hide file tree
Showing 13 changed files with 358 additions and 3 deletions.
3 changes: 3 additions & 0 deletions config/runtimes/kserve-lgbserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-lgbserver
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: lightgbm
version: "2"
Expand Down
4 changes: 4 additions & 0 deletions config/runtimes/kserve-mlserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ kind: ClusterServingRuntime
metadata:
name: kserve-mlserver
spec:
annotations:
# mlserver version 1.1.0 uses port 8082 as default instead of 8080.
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: sklearn
version: "0"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-paddleserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-paddleserver
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: paddle
version: "2"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-pmmlserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-pmmlserver
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: pmml
version: "3"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-sklearnserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-sklearnserver
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: sklearn
version: "1"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-tensorflow-serving.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-tensorflow-serving
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: tensorflow
version: "1"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-torchserve.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-torchserve
spec:
annotations:
prometheus.kserve.io/port: '8082'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: pytorch
version: "1"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-tritonserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-tritonserver
spec:
annotations:
prometheus.kserve.io/port: '8002'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: tensorrt
version: "8"
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kserve-xgbserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ kind: ClusterServingRuntime
metadata:
name: kserve-xgbserver
spec:
annotations:
prometheus.kserve.io/port: '8080'
prometheus.kserve.io/path: "/metrics"
supportedModelFormats:
- name: xgboost
version: "1"
Expand Down
17 changes: 14 additions & 3 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ var (
MinScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/minScale"
MaxScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/maxScale"
RollOutDurationAnnotationKey = KnativeServingAPIGroupName + "/rollout-duration"
EnableMetricAggregation = KServeAPIGroupName + "/enable-metric-aggregation"
SetPrometheusAggregateAnnotation = KServeAPIGroupName + "/enable-prometheus-aggregate-scraping"
KserveContainerPrometheusPortKey = "prometheus.kserve.io/port"
KServeContainerPrometheusPathKey = "prometheus.kserve.io/path"
PrometheusPortAnnotationKey = "prometheus.io/port"
PrometheusPathAnnotationKey = "prometheus.io/path"
DefaultPrometheusPath = "/metrics"
QueueProxyAggregatePrometheusMetricsPort = "9088"
)

// InferenceService Internal Annotations
Expand Down Expand Up @@ -176,9 +184,12 @@ const (

// InferenceService Environment Variables
const (
CustomSpecStorageUriEnvVarKey = "STORAGE_URI"
CustomSpecProtocolEnvVarKey = "PROTOCOL"
CustomSpecMultiModelServerEnvVarKey = "MULTI_MODEL_SERVER"
CustomSpecStorageUriEnvVarKey = "STORAGE_URI"
CustomSpecProtocolEnvVarKey = "PROTOCOL"
CustomSpecMultiModelServerEnvVarKey = "MULTI_MODEL_SERVER"
KServeContainerPrometheusMetricsPortEnvVarKey = "KSERVE_CONTAINER_PROMETHEUS_METRICS_PORT"
KServeContainerPrometheusMetricsPathEnvVarKey = "KSERVE_CONTAINER_PROMETHEUS_METRICS_PATH"
QueueProxyAggregatePrometheusMetricsPortEnvVarKey = "AGGREGATE_PROMETHEUS_METRICS_PORT"
)

type InferenceServiceComponent string
Expand Down
62 changes: 62 additions & 0 deletions pkg/webhook/admission/pod/metrics_aggregate_injector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2022 The KServe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package pod

import (
"github.com/kserve/kserve/pkg/constants"
v1 "k8s.io/api/core/v1"
)

const defaultPrometheusPort = "8080"

// InjectMetricsAggregator looks for the annotations to enable aggregate kserve-container and queue-proxy metrics and
// if specified, sets port-related EnvVars in queue-proxy and the aggregate prometheus annotation.
func InjectMetricsAggregator(pod *v1.Pod) error {
for i, container := range pod.Spec.Containers {
if container.Name == "queue-proxy" {
if enableMetricAgg, ok := pod.ObjectMeta.Annotations[constants.EnableMetricAggregation]; ok && enableMetricAgg == "true" {
// The kserve-container prometheus port/path is inherited from the ClusterServingRuntime YAML.
// If no port is defined (transformer using python SDK), use the default port/path for the kserve-container.
kserveContainerPromPort := defaultPrometheusPort
if port, ok := pod.ObjectMeta.Annotations[constants.KserveContainerPrometheusPortKey]; ok {
kserveContainerPromPort = port
}

kserveContainerPromPath := constants.DefaultPrometheusPath
if path, ok := pod.ObjectMeta.Annotations[constants.KServeContainerPrometheusPathKey]; ok {
kserveContainerPromPath = path
}

// The kserve container port/path is set as an EnvVar in the queue-proxy container
// so that it knows which port/path to scrape from the kserve-container.
pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, v1.EnvVar{Name: constants.KServeContainerPrometheusMetricsPortEnvVarKey, Value: kserveContainerPromPort})
pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, v1.EnvVar{Name: constants.KServeContainerPrometheusMetricsPathEnvVarKey, Value: kserveContainerPromPath})

// Set the port that queue-proxy will use to expose the aggregate metrics.
pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, v1.EnvVar{Name: constants.QueueProxyAggregatePrometheusMetricsPortEnvVarKey, Value: constants.QueueProxyAggregatePrometheusMetricsPort})

// If SetPrometheusAggregateAnnotation is true, the pod annotations for prometheus port and path will be set. The scrape annotation is not set,
// that is left for the user to configure.
if setPromAnnotation, ok := pod.ObjectMeta.Annotations[constants.SetPrometheusAggregateAnnotation]; ok && setPromAnnotation == "true" {
pod.ObjectMeta.Annotations[constants.PrometheusPortAnnotationKey] = constants.QueueProxyAggregatePrometheusMetricsPort
pod.ObjectMeta.Annotations[constants.PrometheusPathAnnotationKey] = constants.DefaultPrometheusPath
}
}
}
}
return nil
}
Loading

0 comments on commit 8aac0b9

Please sign in to comment.