From 71ce50c052f0a142d652cfc2e88191ecb10e97b6 Mon Sep 17 00:00:00 2001 From: Tommy Li Date: Fri, 31 Jan 2020 20:31:20 -0800 Subject: [PATCH] set default minscale to 1 (#655) --- .../v1alpha2/inferenceservice_types.go | 2 +- .../v1alpha2/inferenceservice_types_test.go | 7 ++-- .../inferenceservice_validation_test.go | 20 +++++----- pkg/apis/serving/v1alpha2/utils.go | 16 ++++++-- .../serving/v1alpha2/zz_generated.deepcopy.go | 5 +++ pkg/constants/constants.go | 14 ++++--- .../inferenceservice/controller_test.go | 39 ++++++++++--------- .../knative/service_reconciler_test.go | 6 ++- .../resources/knative/service.go | 9 +++-- .../resources/knative/service_test.go | 25 +++++++----- 10 files changed, 87 insertions(+), 56 deletions(-) diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_types.go b/pkg/apis/serving/v1alpha2/inferenceservice_types.go index cbe1386934b..be3ca86868f 100644 --- a/pkg/apis/serving/v1alpha2/inferenceservice_types.go +++ b/pkg/apis/serving/v1alpha2/inferenceservice_types.go @@ -56,7 +56,7 @@ type DeploymentSpec struct { ServiceAccountName string `json:"serviceAccountName,omitempty"` // Minimum number of replicas, pods won't scale down to 0 in case of no traffic // +optional - MinReplicas int `json:"minReplicas,omitempty"` + MinReplicas *int `json:"minReplicas,omitempty"` // This is the up bound for autoscaler to scale to // +optional MaxReplicas int `json:"maxReplicas,omitempty"` diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go b/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go index 095026cee64..8c85f5fec63 100644 --- a/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go +++ b/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go @@ -17,9 +17,10 @@ limitations under the License. package v1alpha2 import ( - "github.com/kubeflow/kfserving/pkg/constants" "testing" + "github.com/kubeflow/kfserving/pkg/constants" + "github.com/onsi/gomega" "golang.org/x/net/context" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -40,7 +41,7 @@ func TestInferenceService(t *testing.T) { Default: EndpointSpec{ Predictor: PredictorSpec{ DeploymentSpec: DeploymentSpec{ - MinReplicas: 1, + MinReplicas: GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &TensorflowSpec{ @@ -53,7 +54,7 @@ func TestInferenceService(t *testing.T) { Canary: &EndpointSpec{ Predictor: PredictorSpec{ DeploymentSpec: DeploymentSpec{ - MinReplicas: 1, + MinReplicas: GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &TensorflowSpec{ diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go b/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go index 27757af9fd3..f9a120a3256 100644 --- a/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go +++ b/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go @@ -160,16 +160,16 @@ func TestRejectTrafficProvidedWithoutCanary(t *testing.T) { func TestBadReplicaValues(t *testing.T) { g := gomega.NewGomegaWithT(t) isvc := makeTestInferenceService() - isvc.Spec.Default.Predictor.MinReplicas = -1 + isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(-1) g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError)) - isvc.Spec.Default.Predictor.MinReplicas = 1 + isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(1) isvc.Spec.Default.Predictor.MaxReplicas = -1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError)) - isvc.Spec.Default.Predictor.MinReplicas = 2 + isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(2) isvc.Spec.Default.Predictor.MaxReplicas = 1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError)) // Now test transformer and explainer, so set correct value for predictor - isvc.Spec.Default.Predictor.MinReplicas = 0 + isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(0) isvc.Spec.Default.Predictor.MaxReplicas = 0 isvc.Spec.Default.Transformer = &TransformerSpec{} @@ -179,12 +179,12 @@ func TestBadReplicaValues(t *testing.T) { }, } isvc.Default(c) - isvc.Spec.Default.Transformer.MinReplicas = -1 + isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(-1) g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError)) - isvc.Spec.Default.Transformer.MinReplicas = 1 + isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(1) isvc.Spec.Default.Transformer.MaxReplicas = -1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError)) - isvc.Spec.Default.Transformer.MinReplicas = 2 + isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(2) isvc.Spec.Default.Transformer.MaxReplicas = 1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError)) // Now test explainer, so ignore transformer @@ -196,12 +196,12 @@ func TestBadReplicaValues(t *testing.T) { }, } isvc.Default(c) - isvc.Spec.Default.Explainer.MinReplicas = -1 + isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(-1) g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError)) - isvc.Spec.Default.Explainer.MinReplicas = 1 + isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(1) isvc.Spec.Default.Explainer.MaxReplicas = -1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError)) - isvc.Spec.Default.Explainer.MinReplicas = 2 + isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(2) isvc.Spec.Default.Explainer.MaxReplicas = 1 g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError)) } diff --git a/pkg/apis/serving/v1alpha2/utils.go b/pkg/apis/serving/v1alpha2/utils.go index 2cf4f580099..c3da590453f 100644 --- a/pkg/apis/serving/v1alpha2/utils.go +++ b/pkg/apis/serving/v1alpha2/utils.go @@ -5,6 +5,7 @@ import ( "regexp" "strings" + "github.com/kubeflow/kfserving/pkg/constants" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/validation/field" @@ -91,15 +92,24 @@ func validateStorageURI(storageURI string) error { return fmt.Errorf(UnsupportedStorageURIFormatError, strings.Join(SupportedStorageURIPrefixList, ", "), storageURI) } -func validateReplicas(minReplicas int, maxReplicas int) error { - if minReplicas < 0 { +func validateReplicas(minReplicas *int, maxReplicas int) error { + if minReplicas == nil { + minReplicas = &constants.DefaultMinReplicas + } + if *minReplicas < 0 { return fmt.Errorf(MinReplicasLowerBoundExceededError) } if maxReplicas < 0 { return fmt.Errorf(MaxReplicasLowerBoundExceededError) } - if minReplicas > maxReplicas && maxReplicas != 0 { + if *minReplicas > maxReplicas && maxReplicas != 0 { return fmt.Errorf(MinReplicasShouldBeLessThanMaxError) } return nil } + +// GetIntReference returns the pointer for the integer input +func GetIntReference(number int) *int { + num := number + return &num +} diff --git a/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go index 8efb289d7d8..52df55542df 100644 --- a/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go @@ -92,6 +92,11 @@ func (in *CustomSpec) DeepCopy() *CustomSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeploymentSpec) DeepCopyInto(out *DeploymentSpec) { *out = *in + if in.MinReplicas != nil { + in, out := &in.MinReplicas, &out.MinReplicas + *out = new(int) + **out = **in + } if in.Logger != nil { in, out := &in.Logger, &out.Logger *out = new(Logger) diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index b074d111506..1d34e920aab 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -18,11 +18,12 @@ package constants import ( "fmt" - "knative.dev/pkg/network" "os" "regexp" "strings" + "knative.dev/pkg/network" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -63,15 +64,16 @@ var ( DefaultTransformerTimeout int64 = 120 DefaultExplainerTimeout int64 = 300 DefaultScalingTarget = "1" + DefaultMinReplicas = 1 ) // Webhook Constants var ( - EnableKFServingMutatingWebhook = "enabled" - EnableWebhookNamespaceSelectorEnvName = "ENABLE_WEBHOOK_NAMESPACE_SELECTOR" - EnableWebhookNamespaceSelectorEnvValue = "enabled" - IsEnableWebhookNamespaceSelector = isEnvVarMatched(EnableWebhookNamespaceSelectorEnvName, EnableWebhookNamespaceSelectorEnvValue) - PodMutatorWebhookName = KFServingName + "-pod-mutator-webhook" + EnableKFServingMutatingWebhook = "enabled" + EnableWebhookNamespaceSelectorEnvName = "ENABLE_WEBHOOK_NAMESPACE_SELECTOR" + EnableWebhookNamespaceSelectorEnvValue = "enabled" + IsEnableWebhookNamespaceSelector = isEnvVarMatched(EnableWebhookNamespaceSelectorEnvName, EnableWebhookNamespaceSelectorEnvValue) + PodMutatorWebhookName = KFServingName + "-pod-mutator-webhook" ) // GPU Constants diff --git a/pkg/controller/inferenceservice/controller_test.go b/pkg/controller/inferenceservice/controller_test.go index e25920f33f2..7e6cd772f59 100644 --- a/pkg/controller/inferenceservice/controller_test.go +++ b/pkg/controller/inferenceservice/controller_test.go @@ -18,11 +18,12 @@ package service import ( "fmt" - "github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/istio" - "knative.dev/pkg/network" "testing" "time" + "github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/istio" + "knative.dev/pkg/network" + "github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative" "k8s.io/apimachinery/pkg/api/errors" @@ -103,7 +104,7 @@ func TestInferenceServiceWithOnlyPredictor(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -340,7 +341,7 @@ func TestInferenceServiceWithDefaultAndCanaryPredictor(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -353,7 +354,7 @@ func TestInferenceServiceWithDefaultAndCanaryPredictor(t *testing.T) { Canary: &kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -658,7 +659,7 @@ func TestCanaryDelete(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -671,7 +672,7 @@ func TestCanaryDelete(t *testing.T) { Canary: &kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -930,7 +931,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -940,7 +941,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) { }, Transformer: &kfserving.TransformerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Custom: &kfserving.CustomSpec{ @@ -954,7 +955,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) { Canary: &kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -964,7 +965,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) { }, Transformer: &kfserving.TransformerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Custom: &kfserving.CustomSpec{ @@ -1318,7 +1319,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -1328,7 +1329,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) { }, Transformer: &kfserving.TransformerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Custom: &kfserving.CustomSpec{ @@ -1342,7 +1343,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) { Canary: &kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -1352,7 +1353,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) { }, Transformer: &kfserving.TransformerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Custom: &kfserving.CustomSpec{ @@ -1500,7 +1501,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) { Default: kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -1510,7 +1511,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) { }, Explainer: &kfserving.ExplainerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Alibi: &v1alpha2.AlibiExplainerSpec{ @@ -1523,7 +1524,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) { Canary: &kfserving.EndpointSpec{ Predictor: kfserving.PredictorSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &kfserving.TensorflowSpec{ @@ -1533,7 +1534,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) { }, Explainer: &kfserving.ExplainerSpec{ DeploymentSpec: kfserving.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Alibi: &v1alpha2.AlibiExplainerSpec{ diff --git a/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go b/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go index 433d9b63b61..31f409eb3c5 100644 --- a/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go +++ b/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go @@ -19,10 +19,11 @@ package knative import ( "context" "fmt" - "github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative" "testing" "time" + "github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative" + "github.com/google/go-cmp/cmp" "github.com/kubeflow/kfserving/pkg/apis/serving/v1alpha2" "github.com/kubeflow/kfserving/pkg/constants" @@ -118,6 +119,7 @@ func TestKnativeServiceReconcile(t *testing.T) { Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"}, Annotations: map[string]string{ "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri", "queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage, @@ -157,6 +159,7 @@ func TestKnativeServiceReconcile(t *testing.T) { Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"}, Annotations: map[string]string{ "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri2", "queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage, @@ -214,6 +217,7 @@ func TestKnativeServiceReconcile(t *testing.T) { Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"}, Annotations: map[string]string{ "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri", "queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage, diff --git a/pkg/controller/inferenceservice/resources/knative/service.go b/pkg/controller/inferenceservice/resources/knative/service.go index a9b85f4a8d7..316d045b021 100644 --- a/pkg/controller/inferenceservice/resources/knative/service.go +++ b/pkg/controller/inferenceservice/resources/knative/service.go @@ -310,14 +310,17 @@ func (c *ServiceBuilder) CreateExplainerService(name string, metadata metav1.Obj return service, nil } -func (c *ServiceBuilder) buildAnnotations(metadata metav1.ObjectMeta, minReplicas int, maxReplicas int) (map[string]string, error) { +func (c *ServiceBuilder) buildAnnotations(metadata metav1.ObjectMeta, minReplicas *int, maxReplicas int) (map[string]string, error) { annotations := utils.Filter(metadata.Annotations, func(key string) bool { return !utils.Includes(serviceAnnotationDisallowedList, key) }) - if minReplicas != 0 { - annotations[autoscaling.MinScaleAnnotationKey] = fmt.Sprint(minReplicas) + if minReplicas == nil { + annotations[autoscaling.MinScaleAnnotationKey] = fmt.Sprint(constants.DefaultMinReplicas) + } else if *minReplicas != 0 { + annotations[autoscaling.MinScaleAnnotationKey] = fmt.Sprint(*minReplicas) } + if maxReplicas != 0 { annotations[autoscaling.MaxScaleAnnotationKey] = fmt.Sprint(maxReplicas) } diff --git a/pkg/controller/inferenceservice/resources/knative/service_test.go b/pkg/controller/inferenceservice/resources/knative/service_test.go index 7ace150688b..b0d29fd7ddf 100644 --- a/pkg/controller/inferenceservice/resources/knative/service_test.go +++ b/pkg/controller/inferenceservice/resources/knative/service_test.go @@ -50,7 +50,7 @@ var isvc = v1alpha2.InferenceService{ Default: v1alpha2.EndpointSpec{ Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -189,7 +189,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { Default: v1alpha2.EndpointSpec{ Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -203,7 +203,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { Canary: &v1alpha2.EndpointSpec{ Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, }, Tensorflow: &v1alpha2.TensorflowSpec{ @@ -254,6 +254,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { Annotations: map[string]string{ constants.StorageInitializerSourceUriInternalAnnotationKey: "s3://test/sklearn/export", "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage, }, @@ -309,6 +310,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { Annotations: map[string]string{ constants.StorageInitializerSourceUriInternalAnnotationKey: "s3://test/xgboost/export", "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage, }, @@ -365,6 +367,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { Annotations: map[string]string{ constants.StorageInitializerSourceUriInternalAnnotationKey: "s3://test/xgboost/export", "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage, }, @@ -414,7 +417,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) { RuntimeVersion: "latest", }, DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), }, }, }, @@ -507,7 +510,7 @@ func TestTransformerToKnativeService(t *testing.T) { Default: v1alpha2.EndpointSpec{ Transformer: &v1alpha2.TransformerSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -519,7 +522,7 @@ func TestTransformerToKnativeService(t *testing.T) { }, Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -537,7 +540,7 @@ func TestTransformerToKnativeService(t *testing.T) { isvcCanary.Spec.Canary = &v1alpha2.EndpointSpec{ Transformer: &v1alpha2.TransformerSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 2, + MinReplicas: v1alpha2.GetIntReference(2), MaxReplicas: 4, ServiceAccountName: "testsvcacc", }, @@ -549,7 +552,7 @@ func TestTransformerToKnativeService(t *testing.T) { }, Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -704,7 +707,7 @@ func TestExplainerToKnativeService(t *testing.T) { Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -728,7 +731,7 @@ func TestExplainerToKnativeService(t *testing.T) { isvcCanary.Spec.Canary = &v1alpha2.EndpointSpec{ Predictor: v1alpha2.PredictorSpec{ DeploymentSpec: v1alpha2.DeploymentSpec{ - MinReplicas: 1, + MinReplicas: v1alpha2.GetIntReference(1), MaxReplicas: 3, ServiceAccountName: "testsvcacc", }, @@ -757,6 +760,7 @@ func TestExplainerToKnativeService(t *testing.T) { Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"}, Annotations: map[string]string{ "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage, }, @@ -798,6 +802,7 @@ func TestExplainerToKnativeService(t *testing.T) { Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"}, Annotations: map[string]string{ "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "autoscaling.knative.dev/minScale": "1", "autoscaling.knative.dev/target": "1", "queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage, },