diff --git a/config/crd/serving.kserve.io_inferencegraphs.yaml b/config/crd/serving.kserve.io_inferencegraphs.yaml index 6b8915003ad..4b5977f06a3 100644 --- a/config/crd/serving.kserve.io_inferencegraphs.yaml +++ b/config/crd/serving.kserve.io_inferencegraphs.yaml @@ -40,6 +40,360 @@ spec: type: object spec: properties: + affinity: + properties: + nodeAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + preference: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + weight: + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + properties: + nodeSelectorTerms: + items: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + type: array + required: + - nodeSelectorTerms + type: object + type: object + podAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + podAntiAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + type: object nodes: additionalProperties: properties: @@ -74,6 +428,25 @@ spec: - routerType type: object type: object + resources: + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object required: - nodes type: object diff --git a/pkg/apis/serving/v1alpha1/inference_graph.go b/pkg/apis/serving/v1alpha1/inference_graph.go index e9ae203ebe6..4977047e77c 100644 --- a/pkg/apis/serving/v1alpha1/inference_graph.go +++ b/pkg/apis/serving/v1alpha1/inference_graph.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "knative.dev/pkg/apis" duckv1 "knative.dev/pkg/apis/duck/v1" @@ -45,6 +46,10 @@ type InferenceGraphSpec struct { // Map of InferenceGraph router nodes // Each node defines the router which can be different routing types Nodes map[string]InferenceRouter `json:"nodes"` + // +optional + Resources corev1.ResourceRequirements `json:"resources,omitempty"` + // +optional + Affinity *corev1.Affinity `json:"affinity,omitempty" protobuf:"bytes,18,opt,name=affinity"` } // InferenceRouterType constant for inference routing types diff --git a/pkg/apis/serving/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/serving/v1alpha1/zz_generated.deepcopy.go index ebb2617b544..3fa2af063d9 100644 --- a/pkg/apis/serving/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/serving/v1alpha1/zz_generated.deepcopy.go @@ -179,6 +179,12 @@ func (in *InferenceGraphSpec) DeepCopyInto(out *InferenceGraphSpec) { (*out)[key] = *val.DeepCopy() } } + in.Resources.DeepCopyInto(&out.Resources) + if in.Affinity != nil { + in, out := &in.Affinity, &out.Affinity + *out = new(v1.Affinity) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceGraphSpec. diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 6a1c56f6a45..f5731107b49 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -50,6 +50,7 @@ var ( // InferenceGraph Constants const ( RouterHeadersPropagateEnvVar = "PROPAGATE_HEADERS" + InferenceGraphLabel = "serving.kserve.io/inferencegraph" ) // TrainedModel Constants diff --git a/pkg/controller/v1alpha1/inferencegraph/controller_test.go b/pkg/controller/v1alpha1/inferencegraph/controller_test.go new file mode 100644 index 00000000000..566db562f63 --- /dev/null +++ b/pkg/controller/v1alpha1/inferencegraph/controller_test.go @@ -0,0 +1,464 @@ +/* +Copyright 2021 The KServe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inferencegraph + +import ( + "context" + "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/constants" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "knative.dev/pkg/kmp" + "knative.dev/pkg/ptr" + knservingdefaults "knative.dev/serving/pkg/apis/config" + knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "time" +) + +var _ = Describe("Inference Graph controller test", func() { + + // Define utility constants for object names and testing timeouts/durations and intervals. + const ( + timeout = time.Second * 10 + interval = time.Millisecond * 250 + domain = "example.com" + ) + + var ( + configs = map[string]string{ + "router": `{ + "image": "kserve/router:v0.10.0", + "memoryRequest": "100Mi", + "memoryLimit": "500Mi", + "cpuRequest": "100m", + "cpuLimit": "100m", + "headers": { + "propagate": [ + "Authorization", + "Intuit_tid" + ] + } + }`, + } + ) + + Context("When creating an inferencegraph with headers in global config", func() { + It("Should create a knative service with headers as env var of podspec", func() { + By("By creating a new InferenceGraph") + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + graphName := "singlenode1" + var expectedRequest = reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}} + var serviceKey = expectedRequest.NamespacedName + ctx := context.Background() + ig := &v1alpha1.InferenceGraph{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": string(constants.Serverless), + }, + }, + Spec: v1alpha1.InferenceGraphSpec{ + Nodes: map[string]v1alpha1.InferenceRouter{ + v1alpha1.GraphRootNodeName: { + RouterType: v1alpha1.Sequence, + Steps: []v1alpha1.InferenceStep{ + { + InferenceTarget: v1alpha1.InferenceTarget{ + ServiceURL: "http://someservice.exmaple.com", + }, + }, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, ig)).Should(Succeed()) + defer k8sClient.Delete(ctx, ig) + inferenceGraphSubmitted := &v1alpha1.InferenceGraph{} + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceGraphSubmitted) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + actualKnServiceCreated := &knservingv1.Service{} + Eventually(func() error { + return k8sClient.Get(context.TODO(), serviceKey, actualKnServiceCreated) + }, timeout). + Should(Succeed()) + + expectedKnService := &knservingv1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + }, + Spec: knservingv1.ServiceSpec{ + ConfigurationSpec: knservingv1.ConfigurationSpec{ + Template: knservingv1.RevisionTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "serving.kserve.io/inferencegraph": graphName, + }, + Annotations: map[string]string{ + "autoscaling.knative.dev/min-scale": "1", + "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "serving.kserve.io/deploymentMode": "Serverless", + }, + }, + Spec: knservingv1.RevisionSpec{ + ContainerConcurrency: ptr.Int64(knservingdefaults.DefaultContainerConcurrency), + TimeoutSeconds: ptr.Int64(knservingdefaults.DefaultRevisionTimeoutSeconds), + PodSpec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "kserve/router:v0.10.0", + Name: knservingdefaults.DefaultUserContainerName, + Env: []v1.EnvVar{ + { + Name: "PROPAGATE_HEADERS", + Value: "Authorization,Intuit_tid", + }, + }, + Args: []string{ + "--graph-json", + "{\"nodes\":{\"root\":{\"routerType\":\"Sequence\",\"steps\":[{\"serviceUrl\":\"http://someservice.exmaple.com\"}]}},\"resources\":{}}", + }, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("500Mi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + expectedKnService.SetDefaults(context.TODO()) + Expect(kmp.SafeDiff(actualKnServiceCreated.Spec, expectedKnService.Spec)).To(Equal("")) + }) + }) + + Context("When creating an IG with resource requirements in the spec", func() { + It("Should propagate to underlying pod", func() { + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + graphName := "singlenode2" + var expectedRequest = reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}} + var serviceKey = expectedRequest.NamespacedName + ctx := context.Background() + ig := &v1alpha1.InferenceGraph{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": string(constants.Serverless), + }, + }, + Spec: v1alpha1.InferenceGraphSpec{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("123m"), + v1.ResourceMemory: resource.MustParse("123Mi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("123m"), + v1.ResourceMemory: resource.MustParse("123Mi"), + }, + }, + Nodes: map[string]v1alpha1.InferenceRouter{ + v1alpha1.GraphRootNodeName: { + RouterType: v1alpha1.Sequence, + Steps: []v1alpha1.InferenceStep{ + { + InferenceTarget: v1alpha1.InferenceTarget{ + ServiceURL: "http://someservice.exmaple.com", + }, + }, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, ig)).Should(Succeed()) + defer k8sClient.Delete(ctx, ig) + inferenceGraphSubmitted := &v1alpha1.InferenceGraph{} + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceGraphSubmitted) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + actualKnServiceCreated := &knservingv1.Service{} + Eventually(func() error { + return k8sClient.Get(context.TODO(), serviceKey, actualKnServiceCreated) + }, timeout). + Should(Succeed()) + + expectedKnService := &knservingv1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + }, + Spec: knservingv1.ServiceSpec{ + ConfigurationSpec: knservingv1.ConfigurationSpec{ + Template: knservingv1.RevisionTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "serving.kserve.io/inferencegraph": graphName, + }, + Annotations: map[string]string{ + "autoscaling.knative.dev/min-scale": "1", + "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "serving.kserve.io/deploymentMode": "Serverless", + }, + }, + Spec: knservingv1.RevisionSpec{ + ContainerConcurrency: ptr.Int64(knservingdefaults.DefaultContainerConcurrency), + TimeoutSeconds: ptr.Int64(knservingdefaults.DefaultRevisionTimeoutSeconds), + PodSpec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "kserve/router:v0.10.0", + Name: knservingdefaults.DefaultUserContainerName, + Env: []v1.EnvVar{ + { + Name: "PROPAGATE_HEADERS", + Value: "Authorization,Intuit_tid", + }, + }, + Args: []string{ + "--graph-json", + "{\"nodes\":{\"root\":{\"routerType\":\"Sequence\",\"steps\":[{\"serviceUrl\":\"http://someservice.exmaple.com\"}]}},\"resources\":{\"limits\":{\"cpu\":\"123m\",\"memory\":\"123Mi\"},\"requests\":{\"cpu\":\"123m\",\"memory\":\"123Mi\"}}}", + }, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("123m"), + v1.ResourceMemory: resource.MustParse("123Mi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("123m"), + v1.ResourceMemory: resource.MustParse("123Mi"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + expectedKnService.SetDefaults(context.TODO()) + Expect(kmp.SafeDiff(actualKnServiceCreated.Spec, expectedKnService.Spec)).To(Equal("")) + }) + }) + + Context("When creating an IG with podaffinity in the spec", func() { + It("Should propagate to underlying pod", func() { + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + graphName := "singlenode3" + var expectedRequest = reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}} + var serviceKey = expectedRequest.NamespacedName + ctx := context.Background() + ig := &v1alpha1.InferenceGraph{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": string(constants.Serverless), + }, + }, + + Spec: v1alpha1.InferenceGraphSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "serving.kserve.io/inferencegraph", + Operator: metav1.LabelSelectorOpIn, + Values: []string{ + graphName, + }, + }, + }, + }, + TopologyKey: "topology.kubernetes.io/zone", + }, + }, + }, + }, + }, + Nodes: map[string]v1alpha1.InferenceRouter{ + v1alpha1.GraphRootNodeName: { + RouterType: v1alpha1.Sequence, + Steps: []v1alpha1.InferenceStep{ + { + InferenceTarget: v1alpha1.InferenceTarget{ + ServiceURL: "http://someservice.exmaple.com", + }, + }, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, ig)).Should(Succeed()) + defer k8sClient.Delete(ctx, ig) + inferenceGraphSubmitted := &v1alpha1.InferenceGraph{} + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceGraphSubmitted) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + actualKnServiceCreated := &knservingv1.Service{} + Eventually(func() error { + return k8sClient.Get(context.TODO(), serviceKey, actualKnServiceCreated) + }, timeout). + Should(Succeed()) + + expectedKnService := &knservingv1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + }, + Spec: knservingv1.ServiceSpec{ + ConfigurationSpec: knservingv1.ConfigurationSpec{ + Template: knservingv1.RevisionTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "serving.kserve.io/inferencegraph": graphName, + }, + Annotations: map[string]string{ + "autoscaling.knative.dev/min-scale": "1", + "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", + "serving.kserve.io/deploymentMode": "Serverless", + }, + }, + Spec: knservingv1.RevisionSpec{ + ContainerConcurrency: ptr.Int64(knservingdefaults.DefaultContainerConcurrency), + TimeoutSeconds: ptr.Int64(knservingdefaults.DefaultRevisionTimeoutSeconds), + PodSpec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "kserve/router:v0.10.0", + Name: knservingdefaults.DefaultUserContainerName, + Env: []v1.EnvVar{ + { + Name: "PROPAGATE_HEADERS", + Value: "Authorization,Intuit_tid", + }, + }, + Args: []string{ + "--graph-json", + "{\"nodes\":{\"root\":{\"routerType\":\"Sequence\",\"steps\":[{\"serviceUrl\":\"http://someservice.exmaple.com\"}]}},\"resources\":{},\"affinity\":{\"podAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"weight\":100,\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"serving.kserve.io/inferencegraph\",\"operator\":\"In\",\"values\":[\"singlenode3\"]}]},\"topologyKey\":\"topology.kubernetes.io/zone\"}}]}}}", + }, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("500Mi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + }, + }, + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "serving.kserve.io/inferencegraph", + Operator: metav1.LabelSelectorOpIn, + Values: []string{ + graphName, + }, + }, + }, + }, + TopologyKey: "topology.kubernetes.io/zone", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + expectedKnService.SetDefaults(context.TODO()) + Expect(kmp.SafeDiff(actualKnServiceCreated.Spec, expectedKnService.Spec)).To(Equal("")) + }) + }) + +}) diff --git a/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go b/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go index a5f698711c6..743286b5842 100644 --- a/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go +++ b/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go @@ -36,6 +36,7 @@ import ( "knative.dev/pkg/kmp" "knative.dev/serving/pkg/apis/autoscaling" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + "reflect" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "strings" @@ -120,7 +121,6 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In if labels == nil { labels = make(map[string]string) } - // User can pass down scaling class annotation to overwrite the default scaling KPA if _, ok := annotations[autoscaling.ClassAnnotationKey]; !ok { annotations[autoscaling.ClassAnnotationKey] = autoscaling.KPA @@ -133,6 +133,7 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In labels = utils.Filter(componentMeta.Labels, func(key string) bool { return !utils.Includes(constants.RevisionTemplateLabelDisallowedList, key) }) + labels[constants.InferenceGraphLabel] = componentMeta.Name service := &knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: componentMeta.Name, @@ -155,18 +156,10 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In "--graph-json", string(bytes), }, - Resources: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(config.CpuLimit), - v1.ResourceMemory: resource.MustParse(config.MemoryLimit), - }, - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(config.CpuRequest), - v1.ResourceMemory: resource.MustParse(config.MemoryRequest), - }, - }, + Resources: constructResourceRequirements(*graph, *config), }, }, + Affinity: graph.Spec.Affinity, }, }, }, @@ -190,3 +183,26 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In service.SetDefaults(context.TODO()) return service } + +func constructResourceRequirements(graph v1alpha1api.InferenceGraph, config RouterConfig) v1.ResourceRequirements { + var specResources v1.ResourceRequirements + if !reflect.ValueOf(graph.Spec.Resources).IsZero() { + log.Info("Ignoring defaults for ResourceRequirements as spec has resources mentioned", "specResources", graph.Spec.Resources) + specResources = v1.ResourceRequirements{ + Limits: graph.Spec.Resources.Limits, + Requests: graph.Spec.Resources.Requests, + } + } else { + specResources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(config.CpuLimit), + v1.ResourceMemory: resource.MustParse(config.MemoryLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(config.CpuRequest), + v1.ResourceMemory: resource.MustParse(config.MemoryRequest), + }, + } + } + return specResources +} diff --git a/pkg/controller/v1alpha1/inferencegraph/suite_test.go b/pkg/controller/v1alpha1/inferencegraph/suite_test.go new file mode 100644 index 00000000000..4575a83fcb3 --- /dev/null +++ b/pkg/controller/v1alpha1/inferencegraph/suite_test.go @@ -0,0 +1,120 @@ +/* +Copyright 2021 The KServe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inferencegraph + +import ( + "context" + kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + pkgtest "github.com/kserve/kserve/pkg/testing" + v1 "k8s.io/api/core/v1" + netv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var ( + cfg *rest.Config + k8sClient client.Client + testEnv *envtest.Environment + cancel context.CancelFunc + ctx context.Context +) + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "inferencegraph Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + ctx, cancel = context.WithCancel(context.TODO()) + By("bootstrapping test environment") + testEnv = pkgtest.SetupEnvTest() + cfg, err := testEnv.Start() + Expect(err).ToNot(HaveOccurred()) + Expect(cfg).ToNot(BeNil()) + + err = kfservingv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = v1beta1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = knservingv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = netv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).ToNot(HaveOccurred()) + Expect(k8sClient).ToNot(BeNil()) + + //Create namespace + kfservingNamespaceObj := &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.KServeNamespace, + }, + } + Expect(k8sClient.Create(context.Background(), kfservingNamespaceObj)).Should(Succeed()) + + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + MetricsBindAddress: "0", + }) + Expect(err).ToNot(HaveOccurred()) + + deployConfig := &v1beta1.DeployConfig{DefaultDeploymentMode: "Serverless"} + + err = (&InferenceGraphReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Log: ctrl.Log.WithName("V1alpha1InferenceGraphController"), + Recorder: k8sManager.GetEventRecorderFor("V1alpha1InferenceGraphController"), + }).SetupWithManager(k8sManager, deployConfig) + Expect(err).ToNot(HaveOccurred()) + + go func() { + defer GinkgoRecover() + err = k8sManager.Start(ctx) + Expect(err).ToNot(HaveOccurred()) + }() + + k8sClient = k8sManager.GetClient() + Expect(k8sClient).ToNot(BeNil()) + +}) + +var _ = AfterSuite(func() { + cancel() + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).ToNot(HaveOccurred()) +}) diff --git a/test/crds/serving.kserve.io_inferenceservices.yaml b/test/crds/serving.kserve.io_inferenceservices.yaml index 64f890f6bd1..737f341e031 100644 --- a/test/crds/serving.kserve.io_inferenceservices.yaml +++ b/test/crds/serving.kserve.io_inferenceservices.yaml @@ -1856,6 +1856,360 @@ spec: type: object spec: properties: + affinity: + properties: + nodeAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + preference: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + weight: + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + properties: + nodeSelectorTerms: + items: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + type: array + required: + - nodeSelectorTerms + type: object + type: object + podAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + podAntiAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + type: object nodes: additionalProperties: properties: @@ -1890,6 +2244,25 @@ spec: - routerType type: object type: object + resources: + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object required: - nodes type: object