From 1a55119cf6652477dc88597f9fc57c9f97ab7bd6 Mon Sep 17 00:00:00 2001 From: Suresh Nakkeran <30799624+Suresh-Nakkeran@users.noreply.github.com> Date: Sat, 28 May 2022 20:31:24 +0530 Subject: [PATCH] Added logic to update ModelStatus (#2088) * added logic to update model status Signed-off-by: Suresh Nakkeran * added tests for modelstatus support Signed-off-by: Suresh Nakkeran * model status changes - incorporated review commands Signed-off-by: Suresh Nakkeran * adding more tests for modelstatus changes Signed-off-by: Suresh Nakkeran --- .../serving.kserve.io_inferenceservices.yaml | 1 + config/rbac/role.yaml | 8 + .../v1beta1/inference_service_status.go | 117 ++++- pkg/constants/constants.go | 17 +- .../v1alpha1/trainedmodel/controller_test.go | 12 + .../inferenceservice/components/predictor.go | 54 ++ .../v1beta1/inferenceservice/controller.go | 2 + .../inferenceservice/controller_test.go | 486 ++++++++++++++++++ .../rawkube_controller_test.go | 12 + .../v1beta1/inferenceservice/utils/utils.go | 22 + .../serving.kserve.io_inferenceservices.yaml | 1 + 11 files changed, 730 insertions(+), 2 deletions(-) diff --git a/config/crd/serving.kserve.io_inferenceservices.yaml b/config/crd/serving.kserve.io_inferenceservices.yaml index e706ad2dbea..83e16a4b78c 100644 --- a/config/crd/serving.kserve.io_inferenceservices.yaml +++ b/config/crd/serving.kserve.io_inferenceservices.yaml @@ -13055,6 +13055,7 @@ spec: enum: - ModelLoadFailed - RuntimeUnhealthy + - RuntimeDisabled - NoSupportingRuntime - RuntimeNotRecognized - InvalidPredictorSpec diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3ea1db30a52..5f90ddbcfa4 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -73,6 +73,14 @@ rules: - get - list - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch - apiGroups: - "" resources: diff --git a/pkg/apis/serving/v1beta1/inference_service_status.go b/pkg/apis/serving/v1beta1/inference_service_status.go index 2d4973a7758..4ddd2316f14 100644 --- a/pkg/apis/serving/v1beta1/inference_service_status.go +++ b/pkg/apis/serving/v1beta1/inference_service_status.go @@ -17,6 +17,9 @@ limitations under the License. package v1beta1 import ( + "reflect" + + "github.com/kserve/kserve/pkg/constants" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -185,7 +188,7 @@ const ( ) // FailureReason enum -// +kubebuilder:validation:Enum=ModelLoadFailed;RuntimeUnhealthy;NoSupportingRuntime;RuntimeNotRecognized;InvalidPredictorSpec +// +kubebuilder:validation:Enum=ModelLoadFailed;RuntimeUnhealthy;RuntimeDisabled;NoSupportingRuntime;RuntimeNotRecognized;InvalidPredictorSpec type FailureReason string // FailureReason enum values @@ -194,6 +197,8 @@ const ( ModelLoadFailed FailureReason = "ModelLoadFailed" // Corresponding ServingRuntime containers failed to start or are unhealthy RuntimeUnhealthy FailureReason = "RuntimeUnhealthy" + // The ServingRuntime is disabled + RuntimeDisabled FailureReason = "RuntimeDisabled" // There are no ServingRuntime which support the specified model type NoSupportingRuntime FailureReason = "NoSupportingRuntime" // There is no ServingRuntime defined with the specified runtime name @@ -394,3 +399,113 @@ func (ss *InferenceServiceStatus) ClearCondition(conditionType apis.ConditionTyp conditionSet.Manage(ss).ClearCondition(conditionType) } } + +func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, totalCopies int, info *FailureInfo) { + if ss.ModelStatus.ModelRevisionStates == nil { + ss.ModelStatus.ModelRevisionStates = &ModelRevisionStates{TargetModelState: modelState} + } else { + ss.ModelStatus.ModelRevisionStates.TargetModelState = modelState + } + // Update transition status, failure info based on new model state + if modelState == Pending || modelState == Loading { + ss.ModelStatus.TransitionStatus = InProgress + } else if modelState == Loaded { + ss.ModelStatus.TransitionStatus = UpToDate + ss.ModelStatus.ModelCopies = &ModelCopies{TotalCopies: totalCopies} + ss.ModelStatus.ModelRevisionStates.ActiveModelState = Loaded + } else if modelState == FailedToLoad { + ss.ModelStatus.TransitionStatus = BlockedByFailedLoad + } + if info != nil { + ss.SetModelFailureInfo(info) + } +} + +func (ss *InferenceServiceStatus) UpdateModelTransitionStatus(status TransitionStatus, info *FailureInfo) { + ss.ModelStatus.TransitionStatus = status + // Update model state to 'FailedToLoad' in case of invalid spec provided + if ss.ModelStatus.TransitionStatus == InvalidSpec { + if ss.ModelStatus.ModelRevisionStates == nil { + ss.ModelStatus.ModelRevisionStates = &ModelRevisionStates{TargetModelState: FailedToLoad} + } else { + ss.ModelStatus.ModelRevisionStates.TargetModelState = FailedToLoad + } + } + if info != nil { + ss.SetModelFailureInfo(info) + } +} + +func (ss *InferenceServiceStatus) SetModelFailureInfo(info *FailureInfo) bool { + if reflect.DeepEqual(info, ss.ModelStatus.LastFailureInfo) { + return false + } + ss.ModelStatus.LastFailureInfo = info + return true +} + +func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatusSpec, podList *v1.PodList, rawDeplyment bool) { + // Check at least one pod is running for the latest revision of inferenceservice + totalCopies := len(podList.Items) + if totalCopies == 0 { + ss.UpdateModelRevisionStates(Pending, totalCopies, nil) + return + } + // Update model state to 'Loaded' if inferenceservice status is ready. + // For serverless deployment, the latest created revision and the latest ready revision should be equal + if ss.IsReady() { + if rawDeplyment { + ss.UpdateModelRevisionStates(Loaded, totalCopies, nil) + return + } else if statusSpec.LatestCreatedRevision == statusSpec.LatestReadyRevision { + ss.UpdateModelRevisionStates(Loaded, totalCopies, nil) + return + } + } + // Update model state to 'Loading' if storage initializer is running. + // If the storage initializer is terminated due to error or crashloopbackoff, update model + // state to 'ModelLoadFailed' with failure info. + for _, cs := range podList.Items[0].Status.InitContainerStatuses { + if cs.Name == constants.StorageInitializerContainerName { + if cs.State.Running != nil { + ss.UpdateModelRevisionStates(Loading, totalCopies, nil) + return + } else if cs.State.Terminated != nil && + cs.State.Terminated.Reason == constants.StateReasonError { + ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{ + Reason: ModelLoadFailed, + Message: cs.State.Terminated.Message, + }) + return + } else if cs.State.Waiting != nil && + cs.State.Waiting.Reason == constants.StateReasonCrashLoopBackOff { + ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{ + Reason: ModelLoadFailed, + Message: cs.LastTerminationState.Terminated.Message, + }) + return + } + } + } + // If the kserve container is terminated due to error or crashloopbackoff, update model + // state to 'ModelLoadFailed' with failure info. + for _, cs := range podList.Items[0].Status.ContainerStatuses { + if cs.Name == constants.InferenceServiceContainerName { + if cs.State.Terminated != nil && + cs.State.Terminated.Reason == constants.StateReasonError { + ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{ + Reason: ModelLoadFailed, + Message: cs.State.Terminated.Message, + }) + } else if cs.State.Waiting != nil && + cs.State.Waiting.Reason == constants.StateReasonCrashLoopBackOff { + ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{ + Reason: ModelLoadFailed, + Message: cs.LastTerminationState.Terminated.Message, + }) + } else { + ss.UpdateModelRevisionStates(Pending, totalCopies, nil) + } + } + } +} diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index cd3a53ebc32..a05aad2f5dd 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -237,7 +237,8 @@ const ( // InferenceService container name const ( - InferenceServiceContainerName = "kserve-container" + InferenceServiceContainerName = "kserve-container" + StorageInitializerContainerName = "storage-initializer" ) // DefaultModelLocalMountPath is where models will be mounted by the storage-initializer @@ -339,6 +340,20 @@ const ( Unknown ) +// revision label +const ( + RevisionLabel = "serving.knative.dev/revision" + RawDeploymentAppLabel = "app" +) + +// container state reason +const ( + StateReasonRunning = "Running" + StateReasonCompleted = "Completed" + StateReasonError = "Error" + StateReasonCrashLoopBackOff = "CrashLoopBackOff" +) + // GetRawServiceLabel generate native service label func GetRawServiceLabel(service string) string { return "isvc." + service diff --git a/pkg/controller/v1alpha1/trainedmodel/controller_test.go b/pkg/controller/v1alpha1/trainedmodel/controller_test.go index de8c9ef67ec..c9ff717da9c 100644 --- a/pkg/controller/v1alpha1/trainedmodel/controller_test.go +++ b/pkg/controller/v1alpha1/trainedmodel/controller_test.go @@ -106,6 +106,12 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, }, } + modelStatus = v1beta1.ModelStatus{ + TransitionStatus: v1beta1.UpToDate, + ModelRevisionStates: &v1beta1.ModelRevisionStates{ + ActiveModelState: v1beta1.Loaded, + }, + } ) Context("When creating a new TrainedModel with an unready InferenceService", func() { @@ -234,6 +240,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, timeout, interval).Should(BeTrue()) inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) // Create modelConfig @@ -349,6 +356,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { URL: clusterURL, } inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) tmInstance := &v1alpha1api.TrainedModel{ @@ -496,6 +504,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, timeout, interval).Should(BeTrue()) inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) tmInstance := &v1alpha1api.TrainedModel{ @@ -624,6 +633,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, timeout, interval).Should(BeTrue()) inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) // Create modelConfig @@ -755,6 +765,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, timeout, interval).Should(BeTrue()) inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) // Create modelConfig @@ -885,6 +896,7 @@ var _ = Describe("v1beta1 TrainedModel controller", func() { }, timeout, interval).Should(BeTrue()) inferenceService.Status.Status = readyConditions + inferenceService.Status.ModelStatus = modelStatus Expect(k8sClient.Status().Update(context.TODO(), inferenceService)).To(BeNil()) // Create modelConfig diff --git a/pkg/controller/v1beta1/inferenceservice/components/predictor.go b/pkg/controller/v1beta1/inferenceservice/components/predictor.go index d0bae638941..a3bd8d4af45 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/predictor.go +++ b/pkg/controller/v1beta1/inferenceservice/components/predictor.go @@ -93,20 +93,36 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro isvc.SetRuntimeDefaults() r, err := isvcutils.GetServingRuntime(p.client, *isvc.Spec.Predictor.Model.Runtime, isvc.Namespace) if err != nil { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.RuntimeNotRecognized, + Message: "Waiting for runtime to become available", + }) return ctrl.Result{}, err } if r.IsDisabled() { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.RuntimeDisabled, + Message: "Specified runtime is disabled", + }) return ctrl.Result{}, fmt.Errorf("specified runtime %s is disabled", *isvc.Spec.Predictor.Model.Runtime) } if isvc.Spec.Predictor.Model.ProtocolVersion != nil && !r.IsProtocolVersionSupported(*isvc.Spec.Predictor.Model.ProtocolVersion) { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.NoSupportingRuntime, + Message: "Specified runtime does not support specified protocol version", + }) return ctrl.Result{}, fmt.Errorf("specified runtime %s does not support specified protocol version", *isvc.Spec.Predictor.Model.Runtime) } // Verify that the selected runtime supports the specified framework. if !isvc.Spec.Predictor.Model.RuntimeSupportsModel(r) { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.NoSupportingRuntime, + Message: "Specified runtime does not support specified framework/version", + }) return ctrl.Result{}, fmt.Errorf("specified runtime %s does not support specified framework/version", *isvc.Spec.Predictor.Model.Runtime) } @@ -117,6 +133,10 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro return ctrl.Result{}, err } if len(runtimes) == 0 { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.NoSupportingRuntime, + Message: "No runtime found to support specified framework/version", + }) return ctrl.Result{}, fmt.Errorf("no runtime found to support predictor with model type: %v", isvc.Spec.Predictor.Model.ModelFormat) } // Get first supporting runtime. @@ -137,16 +157,28 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro } if len(sRuntime.Containers) == 0 { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.InvalidPredictorSpec, + Message: "No container configuration found in selected serving runtime", + }) return ctrl.Result{}, errors.New("no container configuration found in selected serving runtime") } // Assume only one container is specified in runtime spec. container, err = isvcutils.MergeRuntimeContainers(&sRuntime.Containers[0], &isvc.Spec.Predictor.Model.Container) if err != nil { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.InvalidPredictorSpec, + Message: "Failed to get runtime container", + }) return ctrl.Result{}, errors.Wrapf(err, "failed to get runtime container") } mergedPodSpec, err := isvcutils.MergePodSpec(&sRuntime.ServingRuntimePodSpec, &isvc.Spec.Predictor.PodSpec) if err != nil { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.InvalidPredictorSpec, + Message: "Failed to consolidate serving runtime PodSpecs", + }) return ctrl.Result{}, errors.Wrapf(err, "failed to consolidate serving runtime PodSpecs") } @@ -155,6 +187,10 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro // Replace placeholders in runtime container by values from inferenceservice metadata if err = isvcutils.ReplacePlaceholders(container, isvc.ObjectMeta); err != nil { + isvc.Status.UpdateModelTransitionStatus(v1beta1.InvalidSpec, &v1beta1.FailureInfo{ + Reason: v1beta1.InvalidPredictorSpec, + Message: "Failed to replace placeholders in serving runtime Container", + }) return ctrl.Result{}, errors.Wrapf(err, "failed to replace placeholders in serving runtime Container") } @@ -205,8 +241,14 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro return ctrl.Result{}, err } + var rawDeployment bool + var podLabelKey string + var podLabelValue string + // Here we allow switch between knative and vanilla deployment if isvcutils.GetDeploymentMode(annotations, deployConfig) == constants.RawDeployment { + rawDeployment = true + podLabelKey = constants.RawDeploymentAppLabel r, err := raw.NewRawKubeReconciler(p.client, p.scheme, objectMeta, &isvc.Spec.Predictor.ComponentExtensionSpec, &podSpec) if err != nil { @@ -233,6 +275,7 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro } isvc.Status.PropagateRawStatus(v1beta1.PredictorComponent, deployment, r.URL) } else { + podLabelKey = constants.RevisionLabel r := knative.NewKsvcReconciler(p.client, p.scheme, objectMeta, &isvc.Spec.Predictor.ComponentExtensionSpec, &podSpec, isvc.Status.Components[v1beta1.PredictorComponent]) if err := controllerutil.SetControllerReference(isvc, r.Service, p.scheme); err != nil { @@ -244,6 +287,17 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro } isvc.Status.PropagateStatus(v1beta1.PredictorComponent, status) } + statusSpec, _ := isvc.Status.Components[v1beta1.PredictorComponent] + if rawDeployment { + podLabelValue = constants.GetRawServiceLabel(constants.DefaultPredictorServiceName(isvc.ObjectMeta.Name)) + } else { + podLabelValue = statusSpec.LatestCreatedRevision + } + podList, err := isvcutils.ListPodsByLabel(p.client, isvc.ObjectMeta.Namespace, podLabelKey, podLabelValue) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "fails to list inferenceservice pods by label") + } + isvc.Status.PropagateModelStatus(statusSpec, podList, rawDeployment) return ctrl.Result{}, nil } diff --git a/pkg/controller/v1beta1/inferenceservice/controller.go b/pkg/controller/v1beta1/inferenceservice/controller.go index 8cb656197d0..7d75230dcec 100644 --- a/pkg/controller/v1beta1/inferenceservice/controller.go +++ b/pkg/controller/v1beta1/inferenceservice/controller.go @@ -69,6 +69,7 @@ import ( // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch // InferenceState describes the Readiness of the InferenceService type InferenceServiceState string @@ -179,6 +180,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req if err != nil { r.Log.Error(err, "Failed to reconcile", "reconciler", reflect.ValueOf(reconciler), "Name", isvc.Name) r.Recorder.Eventf(isvc, v1.EventTypeWarning, "InternalError", err.Error()) + r.updateStatus(isvc, deploymentMode) return reconcile.Result{}, errors.Wrapf(err, "fails to reconcile component") } if result.Requeue || result.RequeueAfter > 0 { diff --git a/pkg/controller/v1beta1/inferenceservice/controller_test.go b/pkg/controller/v1beta1/inferenceservice/controller_test.go index d45b7fc9aaa..45bacb6a98e 100644 --- a/pkg/controller/v1beta1/inferenceservice/controller_test.go +++ b/pkg/controller/v1beta1/inferenceservice/controller_test.go @@ -18,6 +18,7 @@ package inferenceservice import ( "context" + "fmt" "time" "knative.dev/pkg/kmp" @@ -42,6 +43,7 @@ import ( duckv1 "knative.dev/pkg/apis/duck/v1" "knative.dev/pkg/network" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -142,6 +144,7 @@ var _ = Describe("v1beta1 inference service controller", func() { }, } Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + defer k8sClient.Delete(ctx, isvc) inferenceService := &v1beta1.InferenceService{} Eventually(func() bool { @@ -512,6 +515,10 @@ var _ = Describe("v1beta1 inference service controller", func() { URL: transformerUrl, }, }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, } Eventually(func() string { isvc := &v1beta1.InferenceService{} @@ -732,6 +739,10 @@ var _ = Describe("v1beta1 inference service controller", func() { URL: explainerUrl, }, }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, } Eventually(func() string { isvc := &v1beta1.InferenceService{} @@ -1167,4 +1178,479 @@ var _ = Describe("v1beta1 inference service controller", func() { }) }) + + Context("When creating an inference service with a ServingRuntime which does not exists", func() { + It("Should fail with reason RuntimeNotRecognized", func() { + serviceName := "svc-with-unknown-servingruntime" + servingRuntimeName := "tf-serving-unknown" + namespace := "default" + + var predictorServiceKey = types.NamespacedName{Name: serviceName, Namespace: namespace} + + var isvc = &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + ComponentExtensionSpec: v1beta1.ComponentExtensionSpec{ + MinReplicas: v1beta1.GetIntReference(1), + MaxReplicas: 3, + }, + Model: &v1beta1.ModelSpec{ + ModelFormat: v1beta1.ModelFormat{ + Name: "tensorflow", + }, + Runtime: &servingRuntimeName, + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: proto.String("s3://test/mnist/export"), + }, + }, + }, + }, + } + + // Create configmap + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + + // Create the InferenceService object and expect the Reconcile and knative service to be created + Expect(k8sClient.Create(context.TODO(), isvc)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), isvc) + + inferenceService := &v1beta1.InferenceService{} + Eventually(func() bool { + err := k8sClient.Get(ctx, predictorServiceKey, inferenceService) + if err != nil { + return false + } + if inferenceService.Status.ModelStatus.LastFailureInfo == nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + var failureInfo = v1beta1.FailureInfo{ + Reason: v1beta1.RuntimeNotRecognized, + Message: "Waiting for runtime to become available", + } + Expect(inferenceService.Status.ModelStatus.TransitionStatus).To(Equal(v1beta1.InvalidSpec)) + Expect(inferenceService.Status.ModelStatus.ModelRevisionStates.TargetModelState).To(Equal(v1beta1.FailedToLoad)) + Expect(cmp.Diff(&failureInfo, inferenceService.Status.ModelStatus.LastFailureInfo)).To(gomega.Equal("")) + }) + }) + + Context("When creating an inference service with a ServingRuntime which is disabled", func() { + It("Should fail with reason RuntimeDisabled", func() { + serviceName := "svc-with-disabled-servingruntime" + servingRuntimeName := "tf-serving-disabled" + namespace := "default" + + var predictorServiceKey = types.NamespacedName{Name: serviceName, Namespace: namespace} + + var servingRuntime = &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "tensorflow", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Containers: []v1.Container{ + { + Name: "kserve-container", + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + }, + Disabled: proto.Bool(true), + }, + } + + Expect(k8sClient.Create(context.TODO(), servingRuntime)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), servingRuntime) + + var isvc = &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + ComponentExtensionSpec: v1beta1.ComponentExtensionSpec{ + MinReplicas: v1beta1.GetIntReference(1), + MaxReplicas: 3, + }, + Model: &v1beta1.ModelSpec{ + ModelFormat: v1beta1.ModelFormat{ + Name: "tensorflow", + }, + Runtime: &servingRuntimeName, + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: proto.String("s3://test/mnist/export"), + }, + }, + }, + }, + } + + // Create configmap + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + + // Create the InferenceService object and expect the Reconcile and knative service to be created + Expect(k8sClient.Create(context.TODO(), isvc)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), isvc) + + inferenceService := &v1beta1.InferenceService{} + Eventually(func() bool { + err := k8sClient.Get(ctx, predictorServiceKey, inferenceService) + if err != nil { + return false + } + if inferenceService.Status.ModelStatus.LastFailureInfo == nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + var failureInfo = v1beta1.FailureInfo{ + Reason: v1beta1.RuntimeDisabled, + Message: "Specified runtime is disabled", + } + Expect(inferenceService.Status.ModelStatus.TransitionStatus).To(Equal(v1beta1.InvalidSpec)) + Expect(inferenceService.Status.ModelStatus.ModelRevisionStates.TargetModelState).To(Equal(v1beta1.FailedToLoad)) + Expect(cmp.Diff(&failureInfo, inferenceService.Status.ModelStatus.LastFailureInfo)).To(gomega.Equal("")) + }) + }) + + Context("When creating an inference service with a ServingRuntime which does not support specified model format", func() { + It("Should fail with reason NoSupportingRuntime", func() { + serviceName := "svc-with-unsupported-servingruntime" + servingRuntimeName := "tf-serving-unsupported" + namespace := "default" + + var predictorServiceKey = types.NamespacedName{Name: serviceName, Namespace: namespace} + + // Create configmap + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + + var servingRuntime = &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "tensorflow", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Containers: []v1.Container{ + { + Name: "kserve-container", + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + }, + Disabled: proto.Bool(false), + }, + } + + Expect(k8sClient.Create(context.TODO(), servingRuntime)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), servingRuntime) + + var isvc = &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + ComponentExtensionSpec: v1beta1.ComponentExtensionSpec{ + MinReplicas: v1beta1.GetIntReference(1), + MaxReplicas: 3, + }, + Model: &v1beta1.ModelSpec{ + ModelFormat: v1beta1.ModelFormat{ + Name: "sklearn", + }, + Runtime: &servingRuntimeName, + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: proto.String("s3://test/mnist/export"), + }, + }, + }, + }, + } + + // Create the InferenceService object and expect the Reconcile and knative service to be created + Expect(k8sClient.Create(context.TODO(), isvc)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), isvc) + + inferenceService := &v1beta1.InferenceService{} + Eventually(func() bool { + err := k8sClient.Get(ctx, predictorServiceKey, inferenceService) + if err != nil { + return false + } + if inferenceService.Status.ModelStatus.LastFailureInfo == nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + var failureInfo = v1beta1.FailureInfo{ + Reason: v1beta1.NoSupportingRuntime, + Message: "Specified runtime does not support specified framework/version", + } + Expect(inferenceService.Status.ModelStatus.TransitionStatus).To(Equal(v1beta1.InvalidSpec)) + Expect(inferenceService.Status.ModelStatus.ModelRevisionStates.TargetModelState).To(Equal(v1beta1.FailedToLoad)) + Expect(cmp.Diff(&failureInfo, inferenceService.Status.ModelStatus.LastFailureInfo)).To(gomega.Equal("")) + }) + }) + + Context("When creating an inference service with invalid Storage URI", func() { + It("Should fail with reason ModelLoadFailed", func() { + serviceName := "svc-with-servingruntime" + servingRuntimeName := "tf-serving" + namespace := "default" + var inferenceServiceKey = types.NamespacedName{Name: serviceName, Namespace: namespace} + + // Create configmap + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + + var servingRuntime = &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "tensorflow", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Containers: []v1.Container{ + { + Name: "kserve-container", + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + }, + Disabled: proto.Bool(false), + }, + } + + Expect(k8sClient.Create(context.TODO(), servingRuntime)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), servingRuntime) + + var isvc = &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + ComponentExtensionSpec: v1beta1.ComponentExtensionSpec{ + MinReplicas: v1beta1.GetIntReference(1), + MaxReplicas: 3, + }, + Model: &v1beta1.ModelSpec{ + ModelFormat: v1beta1.ModelFormat{ + Name: "tensorflow", + }, + Runtime: &servingRuntimeName, + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: proto.String("s3://test/mnist/invalid"), + }, + }, + }, + }, + } + + // Create the InferenceService object and expect the Reconcile and knative service to be created + Expect(k8sClient.Create(context.TODO(), isvc)).NotTo(gomega.HaveOccurred()) + defer k8sClient.Delete(context.TODO(), isvc) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName + "-predictor-" + namespace + "-00001-deployment-76464ds2zpv", + Namespace: namespace, + Labels: map[string]string{"serving.knative.dev/revision": serviceName + "-predictor-" + namespace + "-00001"}, + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "storage-initializer", + Image: "kserve/storage-initializer:latest", + Args: []string{ + "gs://kfserving-invalid/models/sklearn/1.0/model", + "/mnt/models", + }, + Resources: defaultResource, + }, + }, + Containers: []v1.Container{ + { + Name: "kserve-container", + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Env: []v1.EnvVar{ + { + Name: "PORT", + Value: "8080", + }, + { + Name: "K_REVISION", + Value: serviceName + "-predictor-" + namespace + "-00001", + }, + { + Name: "K_CONFIGURATION", + Value: serviceName + "-predictor-" + namespace, + }, + { + Name: "K_SERVICE", + Value: serviceName + "-predictor-" + namespace, + }, + }, + Resources: defaultResource, + }, + }, + }, + } + Eventually(func() bool { + err := k8sClient.Create(context.TODO(), pod) + if err != nil { + fmt.Printf("Error #%v\n", err) + return false + } + return true + }, timeout).Should(BeTrue()) + defer k8sClient.Delete(context.TODO(), pod) + + podStatusPatch := []byte(`{"status":{"containerStatuses":[{"image":"tensorflow/serving:1.14.0","name":"kserve-container","lastState":{},"state":{"waiting":{"reason":"PodInitializing"}}}],"initContainerStatuses":[{"image":"kserve/storage-initializer:latest","name":"storage-initializer","lastState":{"terminated":{"exitCode":1,"message":"Invalid Storage URI provided","reason":"Error"}},"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}`) + Eventually(func() bool { + err := k8sClient.Status().Patch(context.TODO(), pod, client.RawPatch(types.StrategicMergePatchType, podStatusPatch)) + if err != nil { + fmt.Printf("Error #%v\n", err) + return false + } + return true + }, timeout).Should(BeTrue()) + + actualService := &knservingv1.Service{} + predictorServiceKey := types.NamespacedName{Name: constants.DefaultPredictorServiceName(serviceName), + Namespace: namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorServiceKey, actualService) }, timeout). + Should(Succeed()) + + predictorUrl, _ := apis.ParseURL("http://" + constants.InferenceServiceHostName(constants.DefaultPredictorServiceName(serviceName), namespace, domain)) + // update predictor status + updatedService := actualService.DeepCopy() + updatedService.Status.LatestCreatedRevisionName = serviceName + "-predictor-" + namespace + "-00001" + updatedService.Status.URL = predictorUrl + updatedService.Status.Conditions = duckv1.Conditions{ + { + Type: knservingv1.ServiceConditionReady, + Status: "False", + }, + } + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + return k8sClient.Status().Update(context.TODO(), updatedService) + })).NotTo(gomega.HaveOccurred()) + + inferenceService := &v1beta1.InferenceService{} + Eventually(func() bool { + err := k8sClient.Get(ctx, inferenceServiceKey, inferenceService) + if err != nil { + fmt.Printf("Error %#v\n", err) + return false + } + if inferenceService.Status.ModelStatus.LastFailureInfo == nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + Expect(inferenceService.Status.ModelStatus.TransitionStatus).To(Equal(v1beta1.BlockedByFailedLoad)) + Expect(inferenceService.Status.ModelStatus.ModelRevisionStates.TargetModelState).To(Equal(v1beta1.FailedToLoad)) + Expect(inferenceService.Status.ModelStatus.LastFailureInfo.Reason).To(Equal(v1beta1.ModelLoadFailed)) + Expect(inferenceService.Status.ModelStatus.LastFailureInfo.Message).To(Equal("Invalid Storage URI provided")) + }) + }) + }) diff --git a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go index 01154e1a9e0..b61de798982 100644 --- a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go +++ b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go @@ -402,6 +402,10 @@ var _ = Describe("v1beta1 inference service controller", func() { }, }, }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, } Eventually(func() string { isvc := &v1beta1.InferenceService{} @@ -815,6 +819,10 @@ var _ = Describe("v1beta1 inference service controller", func() { }, }, }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, } Eventually(func() string { isvc := &v1beta1.InferenceService{} @@ -1229,6 +1237,10 @@ var _ = Describe("v1beta1 inference service controller", func() { }, }, }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, } Eventually(func() string { isvc := &v1beta1.InferenceService{} diff --git a/pkg/controller/v1beta1/inferenceservice/utils/utils.go b/pkg/controller/v1beta1/inferenceservice/utils/utils.go index 569282688d5..f01d6df92a1 100644 --- a/pkg/controller/v1beta1/inferenceservice/utils/utils.go +++ b/pkg/controller/v1beta1/inferenceservice/utils/utils.go @@ -23,6 +23,7 @@ import ( "html/template" "k8s.io/apimachinery/pkg/util/strategicpatch" "regexp" + "sort" "strings" "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" @@ -202,3 +203,24 @@ func UpdateImageTag(container *v1.Container, runtimeVersion *string, isvcConfig } } } + +// ListPodsByLabel Get a PodList by label. +func ListPodsByLabel(cl client.Client, namespace string, labelKey string, labelVal string) (*v1.PodList, error) { + podList := &v1.PodList{} + opts := []client.ListOption{ + client.InNamespace(namespace), + client.MatchingLabels{labelKey: labelVal}, + } + err := cl.List(context.TODO(), podList, opts...) + if err != nil && !errors.IsNotFound(err) { + return nil, err + } + sortPodsByCreatedTimestampDesc(podList) + return podList, nil +} + +func sortPodsByCreatedTimestampDesc(pods *v1.PodList) { + sort.Slice(pods.Items, func(i, j int) bool { + return pods.Items[j].ObjectMeta.CreationTimestamp.Before(&pods.Items[i].ObjectMeta.CreationTimestamp) + }) +} diff --git a/test/crds/serving.kserve.io_inferenceservices.yaml b/test/crds/serving.kserve.io_inferenceservices.yaml index 805e9c7295e..22365496d33 100644 --- a/test/crds/serving.kserve.io_inferenceservices.yaml +++ b/test/crds/serving.kserve.io_inferenceservices.yaml @@ -14828,6 +14828,7 @@ spec: enum: - ModelLoadFailed - RuntimeUnhealthy + - RuntimeDisabled - NoSupportingRuntime - RuntimeNotRecognized - InvalidPredictorSpec