Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -9061,6 +9061,44 @@ spec:
x-kubernetes-int-or-string: true
type: object
type: object
scalingConfig:
description: Control scaling parameters for various components
properties:
models:
properties:
enabled:
type: boolean
type: object
pipelines:
description: Scaling config impacting pipeline-gateway, dataflow-engine
and model-gateway
properties:
maxShardCountMultiplier:
description: |-
MaxShardCountMultiplier influences the way the inferencing workload is sharded over the
replicas of pipeline components.

- For each of pipeline-gateway and dataflow-engine, the max number of replicas is
`maxShardCountMultiplier * number of pipelines`
- For model-gateway, the max number of replicas is
`maxShardCountMultiplier * number of consumers`

It doesn't make sense to set this to a value larger than the number of partitions for kafka
topics used in the Core 2 install.
format: int32
type: integer
type: object
servers:
properties:
enabled:
type: boolean
scaleDownPackingEnabled:
type: boolean
scaleDownPackingPercentage:
format: int32
type: integer
type: object
type: object
serviceConfig:
properties:
grpcServicePrefix:
Expand Down Expand Up @@ -9187,6 +9225,44 @@ spec:
x-kubernetes-int-or-string: true
type: object
type: object
scalingConfig:
description: Control scaling parameters for various components
properties:
models:
properties:
enabled:
type: boolean
type: object
pipelines:
description: Scaling config impacting pipeline-gateway, dataflow-engine
and model-gateway
properties:
maxShardCountMultiplier:
description: |-
MaxShardCountMultiplier influences the way the inferencing workload is sharded over the
replicas of pipeline components.

- For each of pipeline-gateway and dataflow-engine, the max number of replicas is
`maxShardCountMultiplier * number of pipelines`
- For model-gateway, the max number of replicas is
`maxShardCountMultiplier * number of consumers`

It doesn't make sense to set this to a value larger than the number of partitions for kafka
topics used in the Core 2 install.
format: int32
type: integer
type: object
servers:
properties:
enabled:
type: boolean
scaleDownPackingEnabled:
type: boolean
scaleDownPackingPercentage:
format: int32
type: integer
type: object
type: object
serviceConfig:
properties:
grpcServicePrefix:
Expand Down
17 changes: 9 additions & 8 deletions k8s/helm-charts/seldon-core-v2-runtime/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,33 @@ seldonConfig: default
hodometer:
disable: false
replicas: 1

scheduler:
disable: false
replicas: 1
# controlplane exposure
serviceType: LoadBalancer
serviceType: LoadBalancer

envoy:
disable: false
replicas: 1
# dataplane exposure
serviceType: LoadBalancer
serviceType: LoadBalancer

dataflow:
disable: false
replicas: 1

modelgateway:
disable: false
replicas: 1

pipelinegateway:
disable: false
replicas: 1

config:
scalingConfig:
agentConfig:
rclone:
configSecrets:
Expand All @@ -48,4 +49,4 @@ config:
serviceConfig:
serviceGRPCPrefix:
serviceType:

Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ spec:
- --db-path=/mnt/scheduler/db
- --allow-plaintxt=$(ALLOW_PLAINTXT)
- --kafka-config-path=/mnt/kafka/kafka.json
- --scaling-config-path=/mnt/scaling/scaling.yaml
- --scheduler-ready-timeout-seconds=$(SCHEDULER_READY_TIMEOUT_SECONDS)
- --server-packing-enabled=$(SERVER_PACKING_ENABLED)
- --server-packing-percentage=$(SERVER_PACKING_PERCENTAGE)
Expand Down Expand Up @@ -639,6 +640,8 @@ spec:
volumeMounts:
- mountPath: /mnt/kafka
name: kafka-config-volume
- mountPath: /mnt/scaling
name: scaling-config-volume
- mountPath: /mnt/tracing
name: tracing-config-volume
- mountPath: /mnt/scheduler
Expand All @@ -652,6 +655,9 @@ spec:
serviceAccountName: seldon-scheduler
terminationGracePeriodSeconds: 5
volumes:
- configMap:
name: seldon-scaling
name: scaling-config-volume
- configMap:
name: seldon-kafka
name: kafka-config-volume
Expand Down Expand Up @@ -1306,6 +1312,18 @@ spec:
topics:
numPartitions: '{{ .Values.kafka.topics.numPartitions }}'
replicationFactor: '{{ .Values.kafka.topics.replicationFactor }}'
scalingConfig:
models:
enabled: {{ .Values.autoscaling.autoscalingModelEnabled }}
pipelines:
maxShardCountMultiplier: {{ .Values.kafka.topics.numPartitions
}}
servers:
enabled: {{ .Values.autoscaling.autoscalingServerEnabled }}
scaleDownPackingEnabled: {{ .Values.autoscaling.serverPackingEnabled
}}
scaleDownPackingPercentage: {{ .Values.autoscaling.serverPackingPercentage
}}
serviceConfig:
grpcServicePrefix: '{{ .Values.services.serviceGRPCPrefix }}'
serviceType: '{{ .Values.services.defaultServiceType }}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ spec:
- --db-path=/mnt/scheduler/db
- --allow-plaintxt=$(ALLOW_PLAINTXT)
- --kafka-config-path=/mnt/kafka/kafka.json
- --scaling-config-path=/mnt/scaling/scaling.yaml
- --scheduler-ready-timeout-seconds=$(SCHEDULER_READY_TIMEOUT_SECONDS)
- --server-packing-enabled=$(SERVER_PACKING_ENABLED)
- --server-packing-percentage=$(SERVER_PACKING_PERCENTAGE)
Expand Down Expand Up @@ -639,6 +640,8 @@ spec:
volumeMounts:
- mountPath: /mnt/kafka
name: kafka-config-volume
- mountPath: /mnt/scaling
name: scaling-config-volume
- mountPath: /mnt/tracing
name: tracing-config-volume
- mountPath: /mnt/scheduler
Expand All @@ -652,6 +655,9 @@ spec:
serviceAccountName: seldon-scheduler
terminationGracePeriodSeconds: 5
volumes:
- configMap:
name: seldon-scaling
name: scaling-config-volume
- configMap:
name: seldon-kafka
name: kafka-config-volume
Expand Down Expand Up @@ -1306,6 +1312,18 @@ spec:
topics:
numPartitions: '{{ .Values.kafka.topics.numPartitions }}'
replicationFactor: '{{ .Values.kafka.topics.replicationFactor }}'
scalingConfig:
models:
enabled: {{ .Values.autoscaling.autoscalingModelEnabled }}
pipelines:
maxShardCountMultiplier: {{ .Values.kafka.topics.numPartitions
}}
servers:
enabled: {{ .Values.autoscaling.autoscalingServerEnabled }}
scaleDownPackingEnabled: {{ .Values.autoscaling.serverPackingEnabled
}}
scaleDownPackingPercentage: {{ .Values.autoscaling.serverPackingPercentage
}}
serviceConfig:
grpcServicePrefix: '{{ .Values.services.serviceGRPCPrefix }}'
serviceType: '{{ .Values.services.defaultServiceType }}'
Expand Down
4 changes: 2 additions & 2 deletions k8s/helm-charts/seldon-core-v2-setup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ opentelemetry:

# logging
# this is a global setting, in the case individual components logLevel is not set
# Users should set a value from:
# Users should set a value from:
# fatal, error, warn, info, debug, trace
# if used also for .rclone.logLevel, the allowed set reduces to:
# debug, info, error
Expand Down Expand Up @@ -245,7 +245,7 @@ scheduler:
runAsGroup: 1000
runAsNonRoot: true
schedulerReadyTimeoutSeconds: 600

autoscaling:
autoscalingModelEnabled: false
autoscalingServerEnabled: true
Expand Down
4 changes: 2 additions & 2 deletions k8s/helm-charts/seldon-core-v2-setup/values.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ opentelemetry:

# logging
# this is a global setting, in the case individual components logLevel is not set
# Users should set a value from:
# Users should set a value from:
# fatal, error, warn, info, debug, trace
# if used also for .rclone.logLevel, the allowed set reduces to:
# debug, info, error
Expand Down Expand Up @@ -245,7 +245,7 @@ scheduler:
runAsGroup: 1000
runAsNonRoot: true
schedulerReadyTimeoutSeconds: 600

autoscaling:
autoscalingModelEnabled: false
autoscalingServerEnabled: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ patchesStrategicMerge:
- ../../kustomize/helm-components-sc/patch_scheduler.yaml
- ../../kustomize/helm-components-sc/patch_kafkaconfig.yaml
- ../../kustomize/helm-components-sc/patch_tracingconfig.yaml
- ../../kustomize/helm-components-sc/patch_scalingconfig.yaml
- ../../kustomize/helm-components-sc/patch_agentconfig.yaml
- ../../kustomize/helm-components-sc/patch_serviceconfig.yaml
- patch_mlserver.yaml
Expand Down Expand Up @@ -59,6 +60,11 @@ patches:
version: v1alpha1
kind: SeldonConfig
name: default
- path: ../../kustomize/helm-components-sc/patch_scalingconfig_json6902.yaml
target:
version: v1alpha1
kind: SeldonConfig
name: default
- path: ../../kustomize/helm-components-sc/patch_pipelinegateway_json6902.yaml
target:
version: v1alpha1
Expand Down
6 changes: 6 additions & 0 deletions k8s/kustomize/helm-components-sc/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ patchesStrategicMerge:
- patch_kafkaconfig.yaml
- patch_tracingconfig.yaml
- patch_agentconfig.yaml
- patch_scalingconfig.yaml
- patch_serviceconfig.yaml

patches:
Expand Down Expand Up @@ -59,6 +60,11 @@ patches:
version: v1alpha1
kind: SeldonConfig
name: default
- path: patch_scalingconfig_json6902.yaml
target:
version: v1alpha1
kind: SeldonConfig
name: default
- path: patch_pipelinegateway_json6902.yaml
target:
version: v1alpha1
Expand Down
15 changes: 15 additions & 0 deletions k8s/kustomize/helm-components-sc/patch_scalingconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: mlops.seldon.io/v1alpha1
kind: SeldonConfig
metadata:
name: default
spec:
config:
scalingConfig:
models:
enabled:
servers:
enabled:
scaleDownPackingEnabled:
scaleDownPackingPercentage:
pipelines:
maxShardCountMultiplier:
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- op: add
path: /spec/config/scalingConfig/models/enabled
value: HACK_REMOVE_ME{{ .Values.autoscaling.autoscalingModelEnabled }}
- op: add
path: /spec/config/scalingConfig/servers/enabled
value: HACK_REMOVE_ME{{ .Values.autoscaling.autoscalingServerEnabled }}
- op: add
path: /spec/config/scalingConfig/servers/scaleDownPackingEnabled
value: HACK_REMOVE_ME{{ .Values.autoscaling.serverPackingEnabled }}
- op: add
path: /spec/config/scalingConfig/servers/scaleDownPackingPercentage
value: HACK_REMOVE_ME{{ .Values.autoscaling.serverPackingPercentage }}
- op: add
path: /spec/config/scalingConfig/pipelines/maxShardCountMultiplier
value: HACK_REMOVE_ME{{ .Values.kafka.topics.numPartitions }}
15 changes: 15 additions & 0 deletions k8s/yaml/components.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ spec:
- --db-path=/mnt/scheduler/db
- --allow-plaintxt=$(ALLOW_PLAINTXT)
- --kafka-config-path=/mnt/kafka/kafka.json
- --scaling-config-path=/mnt/scaling/scaling.yaml
- --scheduler-ready-timeout-seconds=$(SCHEDULER_READY_TIMEOUT_SECONDS)
- --server-packing-enabled=$(SERVER_PACKING_ENABLED)
- --server-packing-percentage=$(SERVER_PACKING_PERCENTAGE)
Expand Down Expand Up @@ -481,6 +482,8 @@ spec:
volumeMounts:
- mountPath: /mnt/kafka
name: kafka-config-volume
- mountPath: /mnt/scaling
name: scaling-config-volume
- mountPath: /mnt/tracing
name: tracing-config-volume
- mountPath: /mnt/scheduler
Expand All @@ -493,6 +496,9 @@ spec:
serviceAccountName: seldon-scheduler
terminationGracePeriodSeconds: 5
volumes:
- configMap:
name: seldon-scaling
name: scaling-config-volume
- configMap:
name: seldon-kafka
name: kafka-config-volume
Expand Down Expand Up @@ -1127,6 +1133,15 @@ spec:
topics:
numPartitions: '1'
replicationFactor: '1'
scalingConfig:
models:
enabled: false
pipelines:
maxShardCountMultiplier: 1
servers:
enabled: true
scaleDownPackingEnabled: false
scaleDownPackingPercentage: 0
serviceConfig:
grpcServicePrefix: ''
serviceType: 'LoadBalancer'
Expand Down
Loading
Loading