@@ -223,6 +223,75 @@ func TestTwoBackendsHighLoad_then_DoOptimize_expect_DistributionA5B4(t *testing.
223223 }
224224}
225225
226+ func TestDefaultPanicThreshold_DoOptimize_NoPanic (t * testing.T ) {
227+ ns := "ns"
228+ msA := & workload.ModelServing {ObjectMeta : metav1.ObjectMeta {Name : "ms-nil-a" , Namespace : ns }, Spec : workload.ModelServingSpec {Replicas : ptrInt32 (1 )}}
229+ msB := & workload.ModelServing {ObjectMeta : metav1.ObjectMeta {Name : "ms-nil-b" , Namespace : ns }, Spec : workload.ModelServingSpec {Replicas : ptrInt32 (2 )}}
230+ client := clientfake .NewSimpleClientset (msA , msB )
231+ msLister := workloadLister .NewModelServingLister (newModelServingIndexer (msA , msB ))
232+
233+ srv := httptest .NewServer (httpHandlerWithBody ("# TYPE load gauge\n load 10\n " ))
234+ defer srv .Close ()
235+ u , _ := url .Parse (srv .URL )
236+ host , portStr , _ := net .SplitHostPort (u .Host )
237+ port := toInt32 (portStr )
238+
239+ paramA := workload.HeterogeneousTargetParam {Target : workload.Target {TargetRef : corev1.ObjectReference {Kind : workload .ModelServingKind .Kind , Namespace : ns , Name : "ms-nil-a" }, MetricEndpoint : workload.MetricEndpoint {Uri : u .Path , Port : port }}, MinReplicas : 1 , MaxReplicas : 5 , Cost : 10 }
240+ paramB := workload.HeterogeneousTargetParam {Target : workload.Target {TargetRef : corev1.ObjectReference {Kind : workload .ModelServingKind .Kind , Namespace : ns , Name : "ms-nil-b" }, MetricEndpoint : workload.MetricEndpoint {Uri : u .Path , Port : port }}, MinReplicas : 2 , MaxReplicas : 4 , Cost : 20 }
241+ // PanicThresholdPercent set to CRD default of 200 — per API guarantee this is never nil
242+ var threshold int32 = 200
243+ policy := & workload.AutoscalingPolicy {Spec : workload.AutoscalingPolicySpec {TolerancePercent : 0 , Metrics : []workload.AutoscalingPolicyMetric {{MetricName : "load" , TargetValue : resource .MustParse ("1" )}}, Behavior : workload.AutoscalingPolicyBehavior {ScaleUp : workload.AutoscalingPolicyScaleUpPolicy {PanicPolicy : workload.AutoscalingPolicyPanicPolicy {Period : metav1.Duration {Duration : 1 * time .Second }, PanicThresholdPercent : & threshold }}}}}
244+ binding := & workload.AutoscalingPolicyBinding {ObjectMeta : metav1.ObjectMeta {Name : "binding-nil" , Namespace : ns }, Spec : workload.AutoscalingPolicyBindingSpec {PolicyRef : corev1.LocalObjectReference {Name : "ap" }, HeterogeneousTarget : & workload.HeterogeneousTarget {Params : []workload.HeterogeneousTargetParam {paramA , paramB }, CostExpansionRatePercent : 100 }}}
245+
246+ lbsA := map [string ]string {}
247+ lbsB := map [string ]string {}
248+ pods := []* corev1.Pod {readyPod (ns , "pod-nil-a" , host , lbsA ), readyPod (ns , "pod-nil-b" , host , lbsB )}
249+ ac := & AutoscaleController {client : client , namespace : ns , modelServingLister : msLister , podsLister : fakePodLister {podsByNs : map [string ][]* corev1.Pod {ns : pods }}, scalerMap : map [string ]* autoscalerAutoscaler {}, optimizerMap : map [string ]* autoscalerOptimizer {}}
250+
251+ if err := ac .doOptimize (context .Background (), binding , policy ); err != nil {
252+ t .Fatalf ("doOptimize should not error with default PanicThresholdPercent: %v" , err )
253+ }
254+ }
255+
256+ func TestSetPanicThreshold_DoOptimize_PanicModeWorks (t * testing.T ) {
257+ ns := "ns"
258+ msA := & workload.ModelServing {ObjectMeta : metav1.ObjectMeta {Name : "ms-panic-a" , Namespace : ns }, Spec : workload.ModelServingSpec {Replicas : ptrInt32 (1 )}}
259+ msB := & workload.ModelServing {ObjectMeta : metav1.ObjectMeta {Name : "ms-panic-b" , Namespace : ns }, Spec : workload.ModelServingSpec {Replicas : ptrInt32 (2 )}}
260+ client := clientfake .NewSimpleClientset (msA , msB )
261+ msLister := workloadLister .NewModelServingLister (newModelServingIndexer (msA , msB ))
262+
263+ srv := httptest .NewServer (httpHandlerWithBody ("# TYPE load gauge\n load 100\n " ))
264+ defer srv .Close ()
265+ u , _ := url .Parse (srv .URL )
266+ host , portStr , _ := net .SplitHostPort (u .Host )
267+ port := toInt32 (portStr )
268+
269+ paramA := workload.HeterogeneousTargetParam {Target : workload.Target {TargetRef : corev1.ObjectReference {Kind : workload .ModelServingKind .Kind , Namespace : ns , Name : "ms-panic-a" }, MetricEndpoint : workload.MetricEndpoint {Uri : u .Path , Port : port }}, MinReplicas : 1 , MaxReplicas : 5 , Cost : 10 }
270+ paramB := workload.HeterogeneousTargetParam {Target : workload.Target {TargetRef : corev1.ObjectReference {Kind : workload .ModelServingKind .Kind , Namespace : ns , Name : "ms-panic-b" }, MetricEndpoint : workload.MetricEndpoint {Uri : u .Path , Port : port }}, MinReplicas : 2 , MaxReplicas : 4 , Cost : 20 }
271+ // PanicThresholdPercent set to 200 — with load=100, recommended will far exceed threshold
272+ var threshold int32 = 200
273+ policy := & workload.AutoscalingPolicy {Spec : workload.AutoscalingPolicySpec {TolerancePercent : 0 , Metrics : []workload.AutoscalingPolicyMetric {{MetricName : "load" , TargetValue : resource .MustParse ("1" )}}, Behavior : workload.AutoscalingPolicyBehavior {ScaleUp : workload.AutoscalingPolicyScaleUpPolicy {PanicPolicy : workload.AutoscalingPolicyPanicPolicy {Period : metav1.Duration {Duration : 1 * time .Second }, PanicThresholdPercent : & threshold }}}}}
274+ binding := & workload.AutoscalingPolicyBinding {ObjectMeta : metav1.ObjectMeta {Name : "binding-panic" , Namespace : ns }, Spec : workload.AutoscalingPolicyBindingSpec {PolicyRef : corev1.LocalObjectReference {Name : "ap" }, HeterogeneousTarget : & workload.HeterogeneousTarget {Params : []workload.HeterogeneousTargetParam {paramA , paramB }, CostExpansionRatePercent : 100 }}}
275+
276+ lbsA := map [string ]string {}
277+ lbsB := map [string ]string {}
278+ pods := []* corev1.Pod {readyPod (ns , "pod-panic-a" , host , lbsA ), readyPod (ns , "pod-panic-b" , host , lbsB )}
279+ ac := & AutoscaleController {client : client , namespace : ns , modelServingLister : msLister , podsLister : fakePodLister {podsByNs : map [string ][]* corev1.Pod {ns : pods }}, scalerMap : map [string ]* autoscalerAutoscaler {}, optimizerMap : map [string ]* autoscalerOptimizer {}}
280+
281+ if err := ac .doOptimize (context .Background (), binding , policy ); err != nil {
282+ t .Fatalf ("doOptimize error: %v" , err )
283+ }
284+ updates := 0
285+ for _ , a := range client .Fake .Actions () {
286+ if a .GetVerb () == "update" && a .GetResource ().Resource == "modelservings" {
287+ updates ++
288+ }
289+ }
290+ if updates == 0 {
291+ t .Fatalf ("expected update actions when PanicThresholdPercent is set, got 0" )
292+ }
293+ }
294+
226295func httpHandlerWithBody (body string ) http.Handler {
227296 return http .HandlerFunc (func (w http.ResponseWriter , r * http.Request ) { w .Write ([]byte (body )) })
228297}
0 commit comments