diff --git a/policyeval/base_worker.go b/policyeval/base_worker.go index c4eaaec9..ff6f6e0d 100644 --- a/policyeval/base_worker.go +++ b/policyeval/base_worker.go @@ -192,6 +192,7 @@ func (w *BaseWorker) handlePolicy(ctx context.Context, eval *sdk.ScalingEvaluati continue case sdk.ScalingPolicyOnErrorFail: return err + case sdk.ScalingPolicyOnErrorScale: default: if eval.Policy.OnCheckError == sdk.ScalingPolicyOnErrorFail { return err @@ -420,7 +421,11 @@ func (h *checkHandler) start(ctx context.Context, currentStatus *sdk.TargetStatu } if err != nil { - return nil, fmt.Errorf("failed to query source: %v", err) + if h.checkEval.Check.OnError != sdk.ScalingPolicyOnErrorScale { + h.logger.Error("failed to query source. Continuing with policy because on_error is '%s': %v", sdk.ScalingPolicyOnErrorScale, err) + } else { + return nil, fmt.Errorf("failed to query source: %v", err) + } } if h.checkEval.Metrics != nil { @@ -428,8 +433,12 @@ func (h *checkHandler) start(ctx context.Context, currentStatus *sdk.TargetStatu sort.Sort(h.checkEval.Metrics) if len(h.checkEval.Metrics) == 0 { - h.logger.Warn("no metrics available") - return &sdk.ScalingAction{Direction: sdk.ScaleDirectionNone}, nil + if h.checkEval.Check.OnError == sdk.ScalingPolicyOnErrorScale { + h.logger.Warn("no metrics available, continuing with policy because on_error is '%s'", sdk.ScalingPolicyOnErrorScale) + } else { + h.logger.Warn("no metrics available, nothing to do.") + return &sdk.ScalingAction{Direction: sdk.ScaleDirectionNone}, nil + } } if h.logger.IsTrace() { diff --git a/sdk/policy.go b/sdk/policy.go index 4dffe184..69b68788 100644 --- a/sdk/policy.go +++ b/sdk/policy.go @@ -15,6 +15,7 @@ const ( ScalingPolicyOnErrorFail = "fail" ScalingPolicyOnErrorIgnore = "ignore" + ScalingPolicyOnErrorScale = "scale" ) // ScalingPolicy is the internal representation of a scaling document and @@ -97,10 +98,10 @@ func (p *ScalingPolicy) Validate() error { } switch c.OnError { - case "", ScalingPolicyOnErrorFail, ScalingPolicyOnErrorIgnore: + case "", ScalingPolicyOnErrorFail, ScalingPolicyOnErrorIgnore, ScalingPolicyOnErrorScale: default: - err := fmt.Errorf("invalid value for on_error in check %s: only %s and %s are allowed", - c.Name, ScalingPolicyOnErrorFail, ScalingPolicyOnErrorIgnore) + err := fmt.Errorf("invalid value for on_error in check %s: only %s, %s, and %s are allowed", + c.Name, ScalingPolicyOnErrorFail, ScalingPolicyOnErrorIgnore, ScalingPolicyOnErrorScale) result = multierror.Append(result, err) } } @@ -136,13 +137,15 @@ type ScalingPolicyCheck struct { Strategy *ScalingPolicyStrategy // OnError defines how errors are handled by the Autoscaler when running - // this check. Possible values are "ignore" or "fail". If not set the + // this check. Possible values are "ignore", "fail", or "scale". If not set the // policy `on_check_error` value will be used. // // If "ignore" the check is not considered when calculating the final // scaling action result. - // If "fail" the the entire policy evaluation will stop and no action will + // If "fail" the entire policy evaluation will stop and no action will // be taken. + // If "scale" the check will be considered to be active. Use this to + // "fail-safe" scale-up to towards max when metrics are unavailable OnError string } diff --git a/sdk/policy_test.go b/sdk/policy_test.go index d170b902..da964839 100644 --- a/sdk/policy_test.go +++ b/sdk/policy_test.go @@ -87,6 +87,23 @@ func TestScalingPolicy_Validate(t *testing.T) { }, expectedError: "", }, + { + name: "valid policy using on_error = 'scale'", + policy: &ScalingPolicy{ + Type: "horizontal", + OnCheckError: "ignore", + Checks: []*ScalingPolicyCheck{ + { + Name: "valid", + OnError: "scale", + Strategy: &ScalingPolicyStrategy{ + Name: "target-value", + }, + }, + }, + }, + expectedError: "", + }, } for _, tc := range testCases {