set default minscale to 1 (kserve#655)

zillow · Feb 1, 2020 · 71ce50c · 71ce50c
1 parent 8115e0e
commit 71ce50c
Show file tree

Hide file tree

Showing 10 changed files with 87 additions and 56 deletions.
diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_types.go b/pkg/apis/serving/v1alpha2/inferenceservice_types.go
@@ -56,7 +56,7 @@ type DeploymentSpec struct {
 	ServiceAccountName string `json:"serviceAccountName,omitempty"`
 	// Minimum number of replicas, pods won't scale down to 0 in case of no traffic
 	// +optional
-	MinReplicas int `json:"minReplicas,omitempty"`
+	MinReplicas *int `json:"minReplicas,omitempty"`
 	// This is the up bound for autoscaler to scale to
 	// +optional
 	MaxReplicas int `json:"maxReplicas,omitempty"`

diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go b/pkg/apis/serving/v1alpha2/inferenceservice_types_test.go
@@ -17,9 +17,10 @@ limitations under the License.
 package v1alpha2
 
 import (
-	"github.com/kubeflow/kfserving/pkg/constants"
 	"testing"
 
+	"github.com/kubeflow/kfserving/pkg/constants"
+
 	"github.com/onsi/gomega"
 	"golang.org/x/net/context"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -40,7 +41,7 @@ func TestInferenceService(t *testing.T) {
 			Default: EndpointSpec{
 				Predictor: PredictorSpec{
 					DeploymentSpec: DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &TensorflowSpec{
@@ -53,7 +54,7 @@ func TestInferenceService(t *testing.T) {
 			Canary: &EndpointSpec{
 				Predictor: PredictorSpec{
 					DeploymentSpec: DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &TensorflowSpec{

diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go b/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go
@@ -160,16 +160,16 @@ func TestRejectTrafficProvidedWithoutCanary(t *testing.T) {
 func TestBadReplicaValues(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
 	isvc := makeTestInferenceService()
-	isvc.Spec.Default.Predictor.MinReplicas = -1
+	isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(-1)
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Predictor.MinReplicas = 1
+	isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(1)
 	isvc.Spec.Default.Predictor.MaxReplicas = -1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Predictor.MinReplicas = 2
+	isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(2)
 	isvc.Spec.Default.Predictor.MaxReplicas = 1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError))
 	// Now test transformer and explainer, so set correct value for predictor
-	isvc.Spec.Default.Predictor.MinReplicas = 0
+	isvc.Spec.Default.Predictor.MinReplicas = GetIntReference(0)
 	isvc.Spec.Default.Predictor.MaxReplicas = 0
 
 	isvc.Spec.Default.Transformer = &TransformerSpec{}
@@ -179,12 +179,12 @@ func TestBadReplicaValues(t *testing.T) {
 		},
 	}
 	isvc.Default(c)
-	isvc.Spec.Default.Transformer.MinReplicas = -1
+	isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(-1)
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Transformer.MinReplicas = 1
+	isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(1)
 	isvc.Spec.Default.Transformer.MaxReplicas = -1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Transformer.MinReplicas = 2
+	isvc.Spec.Default.Transformer.MinReplicas = GetIntReference(2)
 	isvc.Spec.Default.Transformer.MaxReplicas = 1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError))
 	// Now test explainer, so ignore transformer
@@ -196,12 +196,12 @@ func TestBadReplicaValues(t *testing.T) {
 		},
 	}
 	isvc.Default(c)
-	isvc.Spec.Default.Explainer.MinReplicas = -1
+	isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(-1)
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Explainer.MinReplicas = 1
+	isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(1)
 	isvc.Spec.Default.Explainer.MaxReplicas = -1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MaxReplicasLowerBoundExceededError))
-	isvc.Spec.Default.Explainer.MinReplicas = 2
+	isvc.Spec.Default.Explainer.MinReplicas = GetIntReference(2)
 	isvc.Spec.Default.Explainer.MaxReplicas = 1
 	g.Expect(isvc.ValidateCreate(c)).Should(gomega.MatchError(MinReplicasShouldBeLessThanMaxError))
 }

diff --git a/pkg/apis/serving/v1alpha2/utils.go b/pkg/apis/serving/v1alpha2/utils.go
@@ -5,6 +5,7 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/kubeflow/kfserving/pkg/constants"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/util/validation/field"
@@ -91,15 +92,24 @@ func validateStorageURI(storageURI string) error {
 	return fmt.Errorf(UnsupportedStorageURIFormatError, strings.Join(SupportedStorageURIPrefixList, ", "), storageURI)
 }
 
-func validateReplicas(minReplicas int, maxReplicas int) error {
-	if minReplicas < 0 {
+func validateReplicas(minReplicas *int, maxReplicas int) error {
+	if minReplicas == nil {
+		minReplicas = &constants.DefaultMinReplicas
+	}
+	if *minReplicas < 0 {
 		return fmt.Errorf(MinReplicasLowerBoundExceededError)
 	}
 	if maxReplicas < 0 {
 		return fmt.Errorf(MaxReplicasLowerBoundExceededError)
 	}
-	if minReplicas > maxReplicas && maxReplicas != 0 {
+	if *minReplicas > maxReplicas && maxReplicas != 0 {
 		return fmt.Errorf(MinReplicasShouldBeLessThanMaxError)
 	}
 	return nil
 }
+
+// GetIntReference returns the pointer for the integer input
+func GetIntReference(number int) *int {
+	num := number
+	return &num
+}
diff --git a/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/serving/v1alpha2/zz_generated.deepcopy.go
diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go
@@ -18,11 +18,12 @@ package constants
 
 import (
 	"fmt"
-	"knative.dev/pkg/network"
 	"os"
 	"regexp"
 	"strings"
 
+	"knative.dev/pkg/network"
+
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
@@ -63,15 +64,16 @@ var (
 	DefaultTransformerTimeout int64 = 120
 	DefaultExplainerTimeout   int64 = 300
 	DefaultScalingTarget            = "1"
+	DefaultMinReplicas              = 1
 )
 
 // Webhook Constants
 var (
-	EnableKFServingMutatingWebhook              = "enabled"
-	EnableWebhookNamespaceSelectorEnvName       = "ENABLE_WEBHOOK_NAMESPACE_SELECTOR"
-	EnableWebhookNamespaceSelectorEnvValue      = "enabled"
-	IsEnableWebhookNamespaceSelector            = isEnvVarMatched(EnableWebhookNamespaceSelectorEnvName, EnableWebhookNamespaceSelectorEnvValue)
-	PodMutatorWebhookName                       = KFServingName + "-pod-mutator-webhook"
+	EnableKFServingMutatingWebhook         = "enabled"
+	EnableWebhookNamespaceSelectorEnvName  = "ENABLE_WEBHOOK_NAMESPACE_SELECTOR"
+	EnableWebhookNamespaceSelectorEnvValue = "enabled"
+	IsEnableWebhookNamespaceSelector       = isEnvVarMatched(EnableWebhookNamespaceSelectorEnvName, EnableWebhookNamespaceSelectorEnvValue)
+	PodMutatorWebhookName                  = KFServingName + "-pod-mutator-webhook"
 )
 
 // GPU Constants

diff --git a/pkg/controller/inferenceservice/controller_test.go b/pkg/controller/inferenceservice/controller_test.go
@@ -18,11 +18,12 @@ package service
 
 import (
 	"fmt"
-	"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/istio"
-	"knative.dev/pkg/network"
 	"testing"
 	"time"
 
+	"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/istio"
+	"knative.dev/pkg/network"
+
 	"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative"
 
 	"k8s.io/apimachinery/pkg/api/errors"
@@ -103,7 +104,7 @@ func TestInferenceServiceWithOnlyPredictor(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -340,7 +341,7 @@ func TestInferenceServiceWithDefaultAndCanaryPredictor(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -353,7 +354,7 @@ func TestInferenceServiceWithDefaultAndCanaryPredictor(t *testing.T) {
 			Canary: &kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -658,7 +659,7 @@ func TestCanaryDelete(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -671,7 +672,7 @@ func TestCanaryDelete(t *testing.T) {
 			Canary: &kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -930,7 +931,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -940,7 +941,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) {
 				},
 				Transformer: &kfserving.TransformerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Custom: &kfserving.CustomSpec{
@@ -954,7 +955,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) {
 			Canary: &kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -964,7 +965,7 @@ func TestInferenceServiceWithTransformer(t *testing.T) {
 				},
 				Transformer: &kfserving.TransformerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Custom: &kfserving.CustomSpec{
@@ -1318,7 +1319,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -1328,7 +1329,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) {
 				},
 				Transformer: &kfserving.TransformerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Custom: &kfserving.CustomSpec{
@@ -1342,7 +1343,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) {
 			Canary: &kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -1352,7 +1353,7 @@ func TestInferenceServiceDeleteComponent(t *testing.T) {
 				},
 				Transformer: &kfserving.TransformerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Custom: &kfserving.CustomSpec{
@@ -1500,7 +1501,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) {
 			Default: kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -1510,7 +1511,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) {
 				},
 				Explainer: &kfserving.ExplainerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Alibi: &v1alpha2.AlibiExplainerSpec{
@@ -1523,7 +1524,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) {
 			Canary: &kfserving.EndpointSpec{
 				Predictor: kfserving.PredictorSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Tensorflow: &kfserving.TensorflowSpec{
@@ -1533,7 +1534,7 @@ func TestInferenceServiceWithExplainer(t *testing.T) {
 				},
 				Explainer: &kfserving.ExplainerSpec{
 					DeploymentSpec: kfserving.DeploymentSpec{
-						MinReplicas: 1,
+						MinReplicas: v1alpha2.GetIntReference(1),
 						MaxReplicas: 3,
 					},
 					Alibi: &v1alpha2.AlibiExplainerSpec{

diff --git a/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go b/pkg/controller/inferenceservice/reconcilers/knative/service_reconciler_test.go
@@ -19,10 +19,11 @@ package knative
 import (
 	"context"
 	"fmt"
-	"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative"
 	"testing"
 	"time"
 
+	"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative"
+
 	"github.com/google/go-cmp/cmp"
 	"github.com/kubeflow/kfserving/pkg/apis/serving/v1alpha2"
 	"github.com/kubeflow/kfserving/pkg/constants"
@@ -118,6 +119,7 @@ func TestKnativeServiceReconcile(t *testing.T) {
 								Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"},
 								Annotations: map[string]string{
 									"autoscaling.knative.dev/class":                               "kpa.autoscaling.knative.dev",
+									"autoscaling.knative.dev/minScale":                            "1",
 									"autoscaling.knative.dev/target":                              "1",
 									"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri",
 									"queue.sidecar.serving.knative.dev/resourcePercentage":        knative.DefaultQueueSideCarResourcePercentage,
@@ -157,6 +159,7 @@ func TestKnativeServiceReconcile(t *testing.T) {
 								Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"},
 								Annotations: map[string]string{
 									"autoscaling.knative.dev/class":                               "kpa.autoscaling.knative.dev",
+									"autoscaling.knative.dev/minScale":                            "1",
 									"autoscaling.knative.dev/target":                              "1",
 									"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri2",
 									"queue.sidecar.serving.knative.dev/resourcePercentage":        knative.DefaultQueueSideCarResourcePercentage,
@@ -214,6 +217,7 @@ func TestKnativeServiceReconcile(t *testing.T) {
 								Labels: map[string]string{"serving.kubeflow.org/inferenceservice": "mnist"},
 								Annotations: map[string]string{
 									"autoscaling.knative.dev/class":                               "kpa.autoscaling.knative.dev",
+									"autoscaling.knative.dev/minScale":                            "1",
 									"autoscaling.knative.dev/target":                              "1",
 									"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri",
 									"queue.sidecar.serving.knative.dev/resourcePercentage":        knative.DefaultQueueSideCarResourcePercentage,