-
Notifications
You must be signed in to change notification settings - Fork 84
Align heterogeneous autoscaler optimizer with CRD validation guarantees #766
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -223,6 +223,75 @@ func TestTwoBackendsHighLoad_then_DoOptimize_expect_DistributionA5B4(t *testing. | |
| } | ||
| } | ||
|
|
||
| func TestDefaultPanicThreshold_DoOptimize_NoPanic(t *testing.T) { | ||
| ns := "ns" | ||
| msA := &workload.ModelServing{ObjectMeta: metav1.ObjectMeta{Name: "ms-nil-a", Namespace: ns}, Spec: workload.ModelServingSpec{Replicas: ptrInt32(1)}} | ||
| msB := &workload.ModelServing{ObjectMeta: metav1.ObjectMeta{Name: "ms-nil-b", Namespace: ns}, Spec: workload.ModelServingSpec{Replicas: ptrInt32(2)}} | ||
| client := clientfake.NewSimpleClientset(msA, msB) | ||
| msLister := workloadLister.NewModelServingLister(newModelServingIndexer(msA, msB)) | ||
|
|
||
| srv := httptest.NewServer(httpHandlerWithBody("# TYPE load gauge\nload 10\n")) | ||
| defer srv.Close() | ||
| u, _ := url.Parse(srv.URL) | ||
| host, portStr, _ := net.SplitHostPort(u.Host) | ||
| port := toInt32(portStr) | ||
|
|
||
| paramA := workload.HeterogeneousTargetParam{Target: workload.Target{TargetRef: corev1.ObjectReference{Kind: workload.ModelServingKind.Kind, Namespace: ns, Name: "ms-nil-a"}, MetricEndpoint: workload.MetricEndpoint{Uri: u.Path, Port: port}}, MinReplicas: 1, MaxReplicas: 5, Cost: 10} | ||
| paramB := workload.HeterogeneousTargetParam{Target: workload.Target{TargetRef: corev1.ObjectReference{Kind: workload.ModelServingKind.Kind, Namespace: ns, Name: "ms-nil-b"}, MetricEndpoint: workload.MetricEndpoint{Uri: u.Path, Port: port}}, MinReplicas: 2, MaxReplicas: 4, Cost: 20} | ||
| // PanicThresholdPercent set to CRD default of 200 — per API guarantee this is never nil | ||
| var threshold int32 = 200 | ||
| policy := &workload.AutoscalingPolicy{Spec: workload.AutoscalingPolicySpec{TolerancePercent: 0, Metrics: []workload.AutoscalingPolicyMetric{{MetricName: "load", TargetValue: resource.MustParse("1")}}, Behavior: workload.AutoscalingPolicyBehavior{ScaleUp: workload.AutoscalingPolicyScaleUpPolicy{PanicPolicy: workload.AutoscalingPolicyPanicPolicy{Period: metav1.Duration{Duration: 1 * time.Second}, PanicThresholdPercent: &threshold}}}}} | ||
| binding := &workload.AutoscalingPolicyBinding{ObjectMeta: metav1.ObjectMeta{Name: "binding-nil", Namespace: ns}, Spec: workload.AutoscalingPolicyBindingSpec{PolicyRef: corev1.LocalObjectReference{Name: "ap"}, HeterogeneousTarget: &workload.HeterogeneousTarget{Params: []workload.HeterogeneousTargetParam{paramA, paramB}, CostExpansionRatePercent: 100}}} | ||
|
|
||
| lbsA := map[string]string{} | ||
| lbsB := map[string]string{} | ||
| pods := []*corev1.Pod{readyPod(ns, "pod-nil-a", host, lbsA), readyPod(ns, "pod-nil-b", host, lbsB)} | ||
| ac := &AutoscaleController{client: client, namespace: ns, modelServingLister: msLister, podsLister: fakePodLister{podsByNs: map[string][]*corev1.Pod{ns: pods}}, scalerMap: map[string]*autoscalerAutoscaler{}, optimizerMap: map[string]*autoscalerOptimizer{}} | ||
|
|
||
| if err := ac.doOptimize(context.Background(), binding, policy); err != nil { | ||
| t.Fatalf("doOptimize should not error with default PanicThresholdPercent: %v", err) | ||
| } | ||
| } | ||
|
|
||
| func TestSetPanicThreshold_DoOptimize_PanicModeWorks(t *testing.T) { | ||
| ns := "ns" | ||
| msA := &workload.ModelServing{ObjectMeta: metav1.ObjectMeta{Name: "ms-panic-a", Namespace: ns}, Spec: workload.ModelServingSpec{Replicas: ptrInt32(1)}} | ||
| msB := &workload.ModelServing{ObjectMeta: metav1.ObjectMeta{Name: "ms-panic-b", Namespace: ns}, Spec: workload.ModelServingSpec{Replicas: ptrInt32(2)}} | ||
| client := clientfake.NewSimpleClientset(msA, msB) | ||
| msLister := workloadLister.NewModelServingLister(newModelServingIndexer(msA, msB)) | ||
|
|
||
| srv := httptest.NewServer(httpHandlerWithBody("# TYPE load gauge\nload 100\n")) | ||
| defer srv.Close() | ||
| u, _ := url.Parse(srv.URL) | ||
| host, portStr, _ := net.SplitHostPort(u.Host) | ||
| port := toInt32(portStr) | ||
|
|
||
| paramA := workload.HeterogeneousTargetParam{Target: workload.Target{TargetRef: corev1.ObjectReference{Kind: workload.ModelServingKind.Kind, Namespace: ns, Name: "ms-panic-a"}, MetricEndpoint: workload.MetricEndpoint{Uri: u.Path, Port: port}}, MinReplicas: 1, MaxReplicas: 5, Cost: 10} | ||
| paramB := workload.HeterogeneousTargetParam{Target: workload.Target{TargetRef: corev1.ObjectReference{Kind: workload.ModelServingKind.Kind, Namespace: ns, Name: "ms-panic-b"}, MetricEndpoint: workload.MetricEndpoint{Uri: u.Path, Port: port}}, MinReplicas: 2, MaxReplicas: 4, Cost: 20} | ||
| // PanicThresholdPercent set to 200 — with load=100, recommended will far exceed threshold | ||
| var threshold int32 = 200 | ||
| policy := &workload.AutoscalingPolicy{Spec: workload.AutoscalingPolicySpec{TolerancePercent: 0, Metrics: []workload.AutoscalingPolicyMetric{{MetricName: "load", TargetValue: resource.MustParse("1")}}, Behavior: workload.AutoscalingPolicyBehavior{ScaleUp: workload.AutoscalingPolicyScaleUpPolicy{PanicPolicy: workload.AutoscalingPolicyPanicPolicy{Period: metav1.Duration{Duration: 1 * time.Second}, PanicThresholdPercent: &threshold}}}}} | ||
| binding := &workload.AutoscalingPolicyBinding{ObjectMeta: metav1.ObjectMeta{Name: "binding-panic", Namespace: ns}, Spec: workload.AutoscalingPolicyBindingSpec{PolicyRef: corev1.LocalObjectReference{Name: "ap"}, HeterogeneousTarget: &workload.HeterogeneousTarget{Params: []workload.HeterogeneousTargetParam{paramA, paramB}, CostExpansionRatePercent: 100}}} | ||
|
Comment on lines
+271
to
+274
|
||
|
|
||
| lbsA := map[string]string{} | ||
| lbsB := map[string]string{} | ||
| pods := []*corev1.Pod{readyPod(ns, "pod-panic-a", host, lbsA), readyPod(ns, "pod-panic-b", host, lbsB)} | ||
| ac := &AutoscaleController{client: client, namespace: ns, modelServingLister: msLister, podsLister: fakePodLister{podsByNs: map[string][]*corev1.Pod{ns: pods}}, scalerMap: map[string]*autoscalerAutoscaler{}, optimizerMap: map[string]*autoscalerOptimizer{}} | ||
|
|
||
| if err := ac.doOptimize(context.Background(), binding, policy); err != nil { | ||
| t.Fatalf("doOptimize error: %v", err) | ||
| } | ||
| updates := 0 | ||
| for _, a := range client.Fake.Actions() { | ||
| if a.GetVerb() == "update" && a.GetResource().Resource == "modelservings" { | ||
| updates++ | ||
| } | ||
| } | ||
| if updates == 0 { | ||
| t.Fatalf("expected update actions when PanicThresholdPercent is set, got 0") | ||
| } | ||
|
||
| } | ||
|
|
||
| func httpHandlerWithBody(body string) http.Handler { | ||
| return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(body)) }) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test name "TestDefaultPanicThreshold_DoOptimize_NoPanic" is ambiguous. It's unclear whether "NoPanic" means "no crash/error" or "panic mode not activated". The test only verifies that doOptimize doesn't return an error, but doesn't verify whether panic mode was activated or not. Consider either: (1) Renaming to something like "TestDefaultPanicThreshold_DoOptimize_NoError" to clarify it's testing for absence of errors, or (2) Adding assertions to verify whether panic mode was actually triggered and renaming accordingly.