From c736cf72e145009639f1395fc5d639617e778274 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Fri, 19 Sep 2025 19:34:42 +0300 Subject: [PATCH 1/6] init --- api/v1/perconaservermysql_types.go | 71 +- api/v1/zz_generated.deepcopy.go | 72 + cmd/manager/main.go | 9 + .../ps.percona.com_perconaservermysqls.yaml | 37 + deploy/bundle.yaml | 39 +- deploy/cr.yaml | 7 +- deploy/crd.yaml | 37 + deploy/cw-bundle.yaml | 37 + pkg/controller/ps/controller.go | 30 + pkg/controller/ps/mysql_primary.go | 6 + pkg/controller/ps/status.go | 7 + pkg/controller/ps/version.go | 6 + pkg/controller/pshibernation/controller.go | 827 ++++++++ .../pshibernation/controller_test.go | 1668 +++++++++++++++++ 14 files changed, 2849 insertions(+), 4 deletions(-) create mode 100644 pkg/controller/pshibernation/controller.go create mode 100644 pkg/controller/pshibernation/controller_test.go diff --git a/api/v1/perconaservermysql_types.go b/api/v1/perconaservermysql_types.go index 2b1428912..99492a94d 100644 --- a/api/v1/perconaservermysql_types.go +++ b/api/v1/perconaservermysql_types.go @@ -80,6 +80,7 @@ type PerconaServerMySQLSpec struct { Toolkit *ToolkitSpec `json:"toolkit,omitempty"` UpgradeOptions UpgradeOptions `json:"upgradeOptions,omitempty"` UpdateStrategy appsv1.StatefulSetUpdateStrategyType `json:"updateStrategy,omitempty"` + Hibernation *HibernationSpec `json:"hibernation,omitempty"` // Deprecated: not supported since v0.12.0. Use initContainer instead InitImage string `json:"initImage,omitempty"` @@ -629,7 +630,8 @@ type PerconaServerMySQLStatus struct { // INSERT ADDITIONAL STATUS FIELD - defin ToolkitVersion string `json:"toolkitVersion,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` // +optional - Host string `json:"host"` + Host string `json:"host"` + Hibernation *HibernationStatus `json:"hibernation,omitempty"` } func (s *PerconaServerMySQLStatus) CompareMySQLVersion(ver string) int { @@ -960,6 +962,13 @@ func (cr *PerconaServerMySQL) CheckNSetDefaults(_ context.Context, serverVersion cr.Spec.MySQL.VaultSecretName = cr.Name + "-vault" } + // Validate hibernation configuration + if cr.Spec.Hibernation != nil { + if err := cr.Spec.Hibernation.Validate(); err != nil { + return errors.Wrap(err, "invalid hibernation configuration") + } + } + return nil } @@ -1200,6 +1209,36 @@ func (cr *PerconaServerMySQL) PVCResizeInProgress() bool { return ok } +// Validate validates the hibernation specification. +func (h *HibernationSpec) Validate() error { + if h == nil || !h.Enabled { + return nil + } + + if h.Schedule.Pause != "" { + if _, err := cron.ParseStandard(h.Schedule.Pause); err != nil { + return errors.Wrap(err, "invalid pause schedule") + } + } + + if h.Schedule.Unpause != "" { + if _, err := cron.ParseStandard(h.Schedule.Unpause); err != nil { + return errors.Wrap(err, "invalid unpause schedule") + } + } + + if h.Schedule.Pause == "" && h.Schedule.Unpause == "" { + return errors.New("at least one schedule (pause or unpause) must be specified when hibernation is enabled") + } + + return nil +} + +// IsHibernationEnabled checks if hibernation is enabled. +func (cr *PerconaServerMySQL) IsHibernationEnabled() bool { + return cr.Spec.Hibernation != nil && cr.Spec.Hibernation.Enabled +} + // Registers PerconaServerMySQL types with the SchemeBuilder. func init() { SchemeBuilder.Register(&PerconaServerMySQL{}, &PerconaServerMySQLList{}) @@ -1213,9 +1252,37 @@ type UpgradeOptions struct { Apply string `json:"apply,omitempty"` } +type HibernationSpec struct { + Enabled bool `json:"enabled,omitempty"` + Schedule HibernationSchedule `json:"schedule,omitempty"` +} + +type HibernationSchedule struct { + Pause string `json:"pause,omitempty"` // Cron expression for pause (minute hour day month weekday) + Unpause string `json:"unpause,omitempty"` // Cron expression for unpause (minute hour day month weekday) +} + +type HibernationStatus struct { + // State indicates the current hibernation state of the cluster + // +kubebuilder:validation:Enum=Active;Paused;Scheduled;Blocked;Disabled + State string `json:"state,omitempty"` // Current hibernation state + LastPauseTime *metav1.Time `json:"lastPauseTime,omitempty"` // When cluster was last paused + LastUnpauseTime *metav1.Time `json:"lastUnpauseTime,omitempty"` // When cluster was last unpaused + NextPauseTime *metav1.Time `json:"nextPauseTime,omitempty"` // When cluster will be paused next + NextUnpauseTime *metav1.Time `json:"nextUnpauseTime,omitempty"` // When cluster will be unpaused next + Reason string `json:"reason,omitempty"` // Why pause was skipped or additional info +} + const ( UpgradeStrategyDisabled = "disabled" UpgradeStrategyNever = "never" UpgradeStrategyRecommended = "recommended" - UpgradeStrategyLatest = "latest" + + // Hibernation states + HibernationStateActive = "Active" // Cluster is running normally + HibernationStatePaused = "Paused" // Cluster is paused by hibernation + HibernationStateScheduled = "Scheduled" // Hibernation is scheduled but not yet active + HibernationStateBlocked = "Blocked" // Hibernation is blocked by active operations + HibernationStateDisabled = "Disabled" // Hibernation is disabled + UpgradeStrategyLatest = "latest" ) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 22db54e91..70e1c72ad 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -397,6 +397,68 @@ func (in *HAProxySpec) DeepCopy() *HAProxySpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HibernationSchedule) DeepCopyInto(out *HibernationSchedule) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HibernationSchedule. +func (in *HibernationSchedule) DeepCopy() *HibernationSchedule { + if in == nil { + return nil + } + out := new(HibernationSchedule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HibernationSpec) DeepCopyInto(out *HibernationSpec) { + *out = *in + out.Schedule = in.Schedule +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HibernationSpec. +func (in *HibernationSpec) DeepCopy() *HibernationSpec { + if in == nil { + return nil + } + out := new(HibernationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HibernationStatus) DeepCopyInto(out *HibernationStatus) { + *out = *in + if in.LastPauseTime != nil { + in, out := &in.LastPauseTime, &out.LastPauseTime + *out = (*in).DeepCopy() + } + if in.LastUnpauseTime != nil { + in, out := &in.LastUnpauseTime, &out.LastUnpauseTime + *out = (*in).DeepCopy() + } + if in.NextPauseTime != nil { + in, out := &in.NextPauseTime, &out.NextPauseTime + *out = (*in).DeepCopy() + } + if in.NextUnpauseTime != nil { + in, out := &in.NextUnpauseTime, &out.NextUnpauseTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HibernationStatus. +func (in *HibernationStatus) DeepCopy() *HibernationStatus { + if in == nil { + return nil + } + out := new(HibernationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InitContainerSpec) DeepCopyInto(out *InitContainerSpec) { *out = *in @@ -870,6 +932,11 @@ func (in *PerconaServerMySQLSpec) DeepCopyInto(out *PerconaServerMySQLSpec) { (*in).DeepCopyInto(*out) } out.UpgradeOptions = in.UpgradeOptions + if in.Hibernation != nil { + in, out := &in.Hibernation, &out.Hibernation + *out = new(HibernationSpec) + **out = **in + } in.InitContainer.DeepCopyInto(&out.InitContainer) } @@ -897,6 +964,11 @@ func (in *PerconaServerMySQLStatus) DeepCopyInto(out *PerconaServerMySQLStatus) (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Hibernation != nil { + in, out := &in.Hibernation, &out.Hibernation + *out = new(HibernationStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PerconaServerMySQLStatus. diff --git a/cmd/manager/main.go b/cmd/manager/main.go index eba9dd530..93deb72ad 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -44,6 +44,7 @@ import ( "github.com/percona/percona-server-mysql-operator/pkg/clientcmd" "github.com/percona/percona-server-mysql-operator/pkg/controller/ps" "github.com/percona/percona-server-mysql-operator/pkg/controller/psbackup" + "github.com/percona/percona-server-mysql-operator/pkg/controller/pshibernation" "github.com/percona/percona-server-mysql-operator/pkg/controller/psrestore" "github.com/percona/percona-server-mysql-operator/pkg/k8s" "github.com/percona/percona-server-mysql-operator/pkg/platform" @@ -181,6 +182,14 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "PerconaServerMySQLRestore") os.Exit(1) } + if err = (&pshibernation.PerconaServerMySQLHibernationReconciler{ + Client: nsClient, + Scheme: mgr.GetScheme(), + ServerVersion: serverVersion, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "pshibernation-controller") + os.Exit(1) + } //+kubebuilder:scaffold:builder err = mgr.GetFieldIndexer().IndexField( diff --git a/config/crd/bases/ps.percona.com_perconaservermysqls.yaml b/config/crd/bases/ps.percona.com_perconaservermysqls.yaml index 62cede056..52a6ab107 100644 --- a/config/crd/bases/ps.percona.com_perconaservermysqls.yaml +++ b/config/crd/bases/ps.percona.com_perconaservermysqls.yaml @@ -2681,6 +2681,18 @@ spec: type: string enableVolumeExpansion: type: boolean + hibernation: + properties: + enabled: + type: boolean + schedule: + properties: + pause: + type: string + unpause: + type: string + type: object + type: object ignoreAnnotations: items: type: string @@ -10815,6 +10827,31 @@ spec: version: type: string type: object + hibernation: + properties: + lastPauseTime: + format: date-time + type: string + lastUnpauseTime: + format: date-time + type: string + nextPauseTime: + format: date-time + type: string + nextUnpauseTime: + format: date-time + type: string + reason: + type: string + state: + enum: + - Active + - Paused + - Scheduled + - Blocked + - Disabled + type: string + type: object host: type: string mysql: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 28c497d34..2dec0c482 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -5014,6 +5014,18 @@ spec: type: string enableVolumeExpansion: type: boolean + hibernation: + properties: + enabled: + type: boolean + schedule: + properties: + pause: + type: string + unpause: + type: string + type: object + type: object ignoreAnnotations: items: type: string @@ -13148,6 +13160,31 @@ spec: version: type: string type: object + hibernation: + properties: + lastPauseTime: + format: date-time + type: string + lastUnpauseTime: + format: date-time + type: string + nextPauseTime: + format: date-time + type: string + nextUnpauseTime: + format: date-time + type: string + reason: + type: string + state: + enum: + - Active + - Paused + - Scheduled + - Blocked + - Disabled + type: string + type: object host: type: string mysql: @@ -13466,7 +13503,7 @@ spec: fieldPath: metadata.namespace - name: DISABLE_TELEMETRY value: "false" - image: perconalab/percona-server-mysql-operator:main + image: perconalab/percona-server-mysql-operator:main-h25 imagePullPolicy: Always livenessProbe: httpGet: diff --git a/deploy/cr.yaml b/deploy/cr.yaml index 1e7180834..aaa6ebf10 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -5,7 +5,7 @@ metadata: finalizers: - percona.com/delete-mysql-pods-in-order # - percona.com/delete-ssl - # - percona.com/delete-mysql-pvc + - percona.com/delete-mysql-pvc spec: # metadata: # annotations: @@ -28,6 +28,11 @@ spec: upgradeOptions: versionServiceEndpoint: https://check.percona.com apply: disabled + hibernation: + enabled: true + schedule: + pause: "55 12 * * 1-5" # Pause Mon-Fri at 8 PM + unpause: "45 12 * * 1-5" # Unpause Mon-Fri at 8 AM # initContainer: # image: perconalab/percona-server-mysql-operator:main # containerSecurityContext: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 751af04d2..08d00a85f 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -5014,6 +5014,18 @@ spec: type: string enableVolumeExpansion: type: boolean + hibernation: + properties: + enabled: + type: boolean + schedule: + properties: + pause: + type: string + unpause: + type: string + type: object + type: object ignoreAnnotations: items: type: string @@ -13148,6 +13160,31 @@ spec: version: type: string type: object + hibernation: + properties: + lastPauseTime: + format: date-time + type: string + lastUnpauseTime: + format: date-time + type: string + nextPauseTime: + format: date-time + type: string + nextUnpauseTime: + format: date-time + type: string + reason: + type: string + state: + enum: + - Active + - Paused + - Scheduled + - Blocked + - Disabled + type: string + type: object host: type: string mysql: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index fd407660e..e2128f1a6 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -5014,6 +5014,18 @@ spec: type: string enableVolumeExpansion: type: boolean + hibernation: + properties: + enabled: + type: boolean + schedule: + properties: + pause: + type: string + unpause: + type: string + type: object + type: object ignoreAnnotations: items: type: string @@ -13148,6 +13160,31 @@ spec: version: type: string type: object + hibernation: + properties: + lastPauseTime: + format: date-time + type: string + lastUnpauseTime: + format: date-time + type: string + nextPauseTime: + format: date-time + type: string + nextUnpauseTime: + format: date-time + type: string + reason: + type: string + state: + enum: + - Active + - Paused + - Scheduled + - Blocked + - Disabled + type: string + type: object host: type: string mysql: diff --git a/pkg/controller/ps/controller.go b/pkg/controller/ps/controller.go index c035c8e74..d053875fe 100644 --- a/pkg/controller/ps/controller.go +++ b/pkg/controller/ps/controller.go @@ -894,6 +894,12 @@ func (r *PerconaServerMySQLReconciler) reconcileServices(ctx context.Context, cr func (r *PerconaServerMySQLReconciler) reconcileReplication(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { log := logf.FromContext(ctx).WithName("reconcileReplication") + // Skip replication reconciliation when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping replication reconciliation - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + if err := r.reconcileGroupReplication(ctx, cr); err != nil { return errors.Wrap(err, "reconcile group replication") } @@ -1004,6 +1010,12 @@ func (r *PerconaServerMySQLReconciler) reconcileGroupReplication(ctx context.Con func (r *PerconaServerMySQLReconciler) reconcileBootstrapStatus(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { log := logf.FromContext(ctx) + // Skip bootstrap status reconciliation when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping bootstrap status reconciliation - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + if cr.Status.MySQL.Ready == 0 || cr.Status.MySQL.Ready != cr.Spec.MySQL.Size { log.V(1).Info("Waiting for all MySQL pods to be ready", "ready", cr.Status.MySQL.Ready, "expected", cr.Spec.MySQL.Size) return nil @@ -1075,6 +1087,12 @@ func (r *PerconaServerMySQLReconciler) rescanClusterIfNeeded(ctx context.Context log := logf.FromContext(ctx) + // Skip cluster rescan when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping cluster rescan - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + pod, err := mysql.GetReadyPod(ctx, r.Client, cr) if err != nil { if errors.Is(err, mysql.ErrNoReadyPods) { @@ -1265,6 +1283,12 @@ func (r *PerconaServerMySQLReconciler) reconcileMySQLRouter(ctx context.Context, return nil } + // Skip router reconciliation when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping router reconciliation - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + if cr.Spec.Proxy.Router.Size > 0 { if cr.Status.MySQL.Ready != cr.Spec.MySQL.Size { log.V(1).Info("Waiting for MySQL pods to be ready") @@ -1415,6 +1439,12 @@ func (r *PerconaServerMySQLReconciler) cleanupOutdated(ctx context.Context, cr * func (r *PerconaServerMySQLReconciler) getPrimaryFromOrchestrator(ctx context.Context, cr *apiv1.PerconaServerMySQL) (*orchestrator.Instance, error) { log := logf.FromContext(ctx) + + // Skip getting primary from orchestrator when cluster is paused + if cr.Spec.Pause { + return nil, errors.New("cluster is paused") + } + pod, err := getReadyOrcPod(ctx, r.Client, cr) if err != nil { return nil, err diff --git a/pkg/controller/ps/mysql_primary.go b/pkg/controller/ps/mysql_primary.go index 85b797e9b..43755e6c7 100644 --- a/pkg/controller/ps/mysql_primary.go +++ b/pkg/controller/ps/mysql_primary.go @@ -24,6 +24,12 @@ import ( func (r *PerconaServerMySQLReconciler) reconcileGRMySQLPrimaryLabel(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { logger := logf.FromContext(ctx) + // Skip primary label reconciliation when cluster is paused + if cr.Spec.Pause { + logger.V(1).Info("Skipping GR primary label reconciliation - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + if !cr.Spec.MySQL.IsGR() { return nil } diff --git a/pkg/controller/ps/status.go b/pkg/controller/ps/status.go index e7cf04558..e6848adae 100644 --- a/pkg/controller/ps/status.go +++ b/pkg/controller/ps/status.go @@ -225,6 +225,13 @@ func (r *PerconaServerMySQLReconciler) reconcileCRStatus(ctx context.Context, cr func (r *PerconaServerMySQLReconciler) isGRReady(ctx context.Context, cr *apiv1.PerconaServerMySQL) (bool, error) { log := logf.FromContext(ctx).WithName("groupReplicationStatus") + + // Skip GR readiness check when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping GR readiness check - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return false, nil + } + if cr.Status.MySQL.Ready != cr.Spec.MySQL.Size { log.Info("Not all MySQL pods are ready", "ready", cr.Status.MySQL.Ready, "expected", cr.Spec.MySQL.Size) return false, nil diff --git a/pkg/controller/ps/version.go b/pkg/controller/ps/version.go index baf3563bd..2a573016a 100644 --- a/pkg/controller/ps/version.go +++ b/pkg/controller/ps/version.go @@ -35,6 +35,12 @@ func (r *PerconaServerMySQLReconciler) reconcileMySQLVersion( ) error { log := logf.FromContext(ctx) + // Skip version reconciliation when cluster is paused + if cr.Spec.Pause { + log.V(1).Info("Skipping MySQL version reconciliation - cluster is paused", "cluster", cr.Name, "namespace", cr.Namespace) + return nil + } + pod, err := mysql.GetReadyPod(ctx, r.Client, cr) if err != nil { if errors.Is(err, mysql.ErrNoReadyPods) { diff --git a/pkg/controller/pshibernation/controller.go b/pkg/controller/pshibernation/controller.go new file mode 100644 index 000000000..16d490240 --- /dev/null +++ b/pkg/controller/pshibernation/controller.go @@ -0,0 +1,827 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pshibernation + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/robfig/cron/v3" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + k8sretry "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + apiv1 "github.com/percona/percona-server-mysql-operator/api/v1" + "github.com/percona/percona-server-mysql-operator/pkg/platform" +) + +// PerconaServerMySQLHibernationReconciler reconciles PerconaServerMySQL hibernation +type PerconaServerMySQLHibernationReconciler struct { + client.Client + Scheme *runtime.Scheme + ServerVersion *platform.ServerVersion +} + +//+kubebuilder:rbac:groups=ps.percona.com,resources=perconaservermysqls,verbs=get;list;watch;update;patch +//+kubebuilder:rbac:groups=ps.percona.com,resources=perconaservermysqlbackups,verbs=get;list;watch +//+kubebuilder:rbac:groups=ps.percona.com,resources=perconaservermysqlrestores,verbs=get;list;watch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *PerconaServerMySQLHibernationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := logf.FromContext(ctx).WithName("pshibernation-controller") + + // Fetch the PerconaServerMySQL instance + cr := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, req.NamespacedName, cr); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Check if hibernation is enabled + if !cr.IsHibernationEnabled() { + // Only update status if it's not already disabled to avoid log spam + if cr.Status.Hibernation == nil || cr.Status.Hibernation.State != apiv1.HibernationStateDisabled { + if err := r.updateHibernationState(ctx, cr, apiv1.HibernationStateDisabled, ""); err != nil { + log.Error(err, "Failed to update hibernation state to disabled", "cluster", cr.Name, "namespace", cr.Namespace) + } + } + return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil + } + + // Synchronize hibernation state with actual cluster state + if err := r.synchronizeHibernationState(ctx, cr); err != nil { + log.Error(err, "Failed to synchronize hibernation state", "cluster", cr.Name, "namespace", cr.Namespace) + return ctrl.Result{RequeueAfter: 30 * time.Second}, err + } + + // Skip hibernation processing if cluster is still initializing + // This prevents hibernation state from flipping during cluster startup + if cr.Status.State == apiv1.StateInitializing { + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + // Process hibernation logic + log.Info("🔄 DEBUG: About to call processHibernation", "cluster", cr.Name, "namespace", cr.Namespace) + if err := r.processHibernation(ctx, cr); err != nil { + log.Error(err, "Failed to process hibernation", "cluster", cr.Name, "namespace", cr.Namespace) + return ctrl.Result{RequeueAfter: 1 * time.Minute}, err + } + log.Info("✅ DEBUG: processHibernation completed successfully", "cluster", cr.Name, "namespace", cr.Namespace) + + // Requeue after 1 minute to check again + return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil +} + +// processHibernation handles the hibernation logic for a cluster +func (r *PerconaServerMySQLHibernationReconciler) processHibernation(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("processHibernation") + now := time.Now() + hibernation := cr.Spec.Hibernation + + log.Info("🔄 DEBUG: processHibernation started", "cluster", cr.Name, "namespace", cr.Namespace, "currentTime", now.Format("15:04:05")) + + // Check if it's time to pause + if hibernation.Schedule.Pause != "" { + log.Info("🔄 DEBUG: Checking pause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) + if shouldPause, err := r.shouldPauseCluster(ctx, cr, hibernation.Schedule.Pause, now); err != nil { + log.Error(err, "Failed to check pause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) + return errors.Wrap(err, "failed to check pause schedule") + } else { + log.Info("🔄 DEBUG: shouldPauseCluster result", "cluster", cr.Name, "namespace", cr.Namespace, "shouldPause", shouldPause) + if shouldPause { + log.Info("🔄 DEBUG: Should pause cluster", "cluster", cr.Name, "namespace", cr.Namespace) + if canPause, reason, err := r.canPauseCluster(ctx, cr); err != nil { + log.Error(err, "Failed to check if cluster can be paused", "cluster", cr.Name, "namespace", cr.Namespace) + return errors.Wrap(err, "failed to check if cluster can be paused") + } else if canPause { + if err := r.pauseCluster(ctx, cr); err != nil { + log.Error(err, "Failed to pause cluster", "cluster", cr.Name, "namespace", cr.Namespace) + return errors.Wrap(err, "failed to pause cluster") + } + log.Info("✅ Cluster paused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) + } else { + // Check if the reason is cluster not ready - if so, schedule for next window + if strings.Contains(reason, "cluster not ready") { + log.Info("⏰ Cluster not ready, scheduling hibernation for next window", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) + if err := r.scheduleHibernationForNextWindow(ctx, cr, hibernation.Schedule.Pause, reason); err != nil { + log.Error(err, "Failed to schedule hibernation for next window", "cluster", cr.Name, "namespace", cr.Namespace) + } + } else { + log.Info("⚠️ Skipped pause due to active operations", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) + if err := r.updateHibernationState(ctx, cr, apiv1.HibernationStateBlocked, reason); err != nil { + log.Error(err, "Failed to update hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) + } + } + } + } + } + } + + // Check if it's time to unpause + if hibernation.Schedule.Unpause != "" { + log.Info("🔄 DEBUG: Checking unpause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) + if shouldUnpause, err := r.shouldUnpauseCluster(ctx, cr, hibernation.Schedule.Unpause, now); err != nil { + log.Error(err, "Failed to check unpause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) + return errors.Wrap(err, "failed to check unpause schedule") + } else if shouldUnpause { + log.Info("🔄 DEBUG: Should unpause cluster", "cluster", cr.Name, "namespace", cr.Namespace) + if err := r.unpauseCluster(ctx, cr); err != nil { + log.Error(err, "Failed to unpause cluster", "cluster", cr.Name, "namespace", cr.Namespace) + return errors.Wrap(err, "failed to unpause cluster") + } + log.Info("✅ Cluster unpaused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) + } + } + + // Set appropriate state and calculate next times if hibernation status is not initialized + // or if hibernation is enabled but state is still "Disabled" + if cr.Status.Hibernation == nil || cr.Status.Hibernation.State == "" || + (cr.IsHibernationEnabled() && cr.Status.Hibernation.State == apiv1.HibernationStateDisabled) { + + // Log when hibernation gets enabled + if cr.IsHibernationEnabled() && (cr.Status.Hibernation == nil || cr.Status.Hibernation.State == apiv1.HibernationStateDisabled) { + pauseSchedule := "not set" + unpauseSchedule := "not set" + if cr.Spec.Hibernation.Schedule.Pause != "" { + pauseSchedule = cr.Spec.Hibernation.Schedule.Pause + } + if cr.Spec.Hibernation.Schedule.Unpause != "" { + unpauseSchedule = cr.Spec.Hibernation.Schedule.Unpause + } + log.Info("🔄 Hibernation enabled", "cluster", cr.Name, "namespace", cr.Namespace, + "pauseSchedule", pauseSchedule, "unpauseSchedule", unpauseSchedule) + } + + if err := r.initializeHibernationStatus(ctx, cr); err != nil { + log.Error(err, "Failed to initialize hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) + } + } else { + // Check if hibernation schedule has changed and update next times if needed + if err := r.updateHibernationScheduleIfChanged(ctx, cr); err != nil { + log.Error(err, "Failed to update hibernation schedule", "cluster", cr.Name, "namespace", cr.Namespace) + } + } + + return nil +} + +// scheduleHibernationForNextWindow schedules hibernation for the next available window when cluster is not ready +func (r *PerconaServerMySQLHibernationReconciler) scheduleHibernationForNextWindow(ctx context.Context, cr *apiv1.PerconaServerMySQL, schedule, reason string) error { + log := logf.FromContext(ctx).WithName("scheduleHibernationForNextWindow") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy for next window scheduling", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Ensure hibernation status exists + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + + // Parse the cron schedule to calculate next window + cronSchedule, err := cron.ParseStandard(schedule) + if err != nil { + log.Error(err, "Failed to parse schedule for next window calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", schedule) + return err + } + + // Calculate next available window (tomorrow's schedule) + now := time.Now() + nextWindow := r.calculateNextScheduleTime(now, cronSchedule) + + // Update the next pause time to the next window + fresh.Status.Hibernation.NextPauseTime = &nextWindow + + // Set state to indicate we're waiting for next window + fresh.Status.Hibernation.State = apiv1.HibernationStateScheduled + fresh.Status.Hibernation.Reason = fmt.Sprintf("Scheduled for next window: %s", reason) + + // Update the status + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update hibernation status for next window", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + log.Info("📅 Hibernation scheduled for next window", "cluster", cr.Name, "namespace", cr.Namespace, + "nextWindow", nextWindow, "reason", reason) + + return nil + }) +} + +// synchronizeHibernationState synchronizes the hibernation state with the actual cluster state +func (r *PerconaServerMySQLHibernationReconciler) synchronizeHibernationState(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("synchronizeHibernationState") + + // Get fresh copy of the cluster to check current state + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + return err + } + + // Ensure hibernation status exists + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + + // Check if the cluster is actually paused by looking at the cluster state + // A cluster is considered "paused" if it's in StatePaused or StateStopping + isClusterPaused := fresh.Status.State == apiv1.StatePaused || fresh.Status.State == apiv1.StateStopping + currentHibernationState := fresh.Status.Hibernation.State + + // Determine what the hibernation state should be + var expectedState string + if isClusterPaused { + expectedState = apiv1.HibernationStatePaused + } else { + expectedState = apiv1.HibernationStateActive + } + + // Update hibernation state if it doesn't match the actual cluster state + if currentHibernationState != expectedState { + log.Info("🔄 Synchronizing hibernation state with cluster state", + "cluster", cr.Name, "namespace", cr.Namespace, + "clusterState", fresh.Status.State, + "currentHibernationState", currentHibernationState, + "expectedHibernationState", expectedState) + + if err := r.updateHibernationState(ctx, fresh, expectedState, ""); err != nil { + return err + } + } + + return nil +} + +// shouldPauseCluster checks if the cluster should be paused based on the cron schedule +func (r *PerconaServerMySQLHibernationReconciler) shouldPauseCluster(ctx context.Context, cr *apiv1.PerconaServerMySQL, schedule string, now time.Time) (bool, error) { + log := logf.FromContext(ctx).WithName("shouldPauseCluster") + + // Parse cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + if err != nil { + log.Error(err, "Invalid pause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", schedule) + return false, errors.Wrap(err, "invalid pause schedule") + } + + // Check if cluster is already paused + if cr.Spec.Pause { + return false, nil + } + + // Get reference time for calculating next pause + var referenceTime time.Time + if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastPauseTime != nil { + // If we have a previous pause time, use it + referenceTime = cr.Status.Hibernation.LastPauseTime.Time + } else if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastUnpauseTime != nil { + // If no previous pause but we have an unpause time, use that + referenceTime = cr.Status.Hibernation.LastUnpauseTime.Time + } else { + // If no previous times, this is first-time evaluation + // For first-time evaluation, we should NOT pause if the scheduled time has already passed today + // This prevents immediate pausing when hibernation is enabled after the scheduled time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time + + // Check if the schedule actually applies to today (not tomorrow or later) + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + if isToday { + // For first-time evaluation, check if the scheduled time has arrived + if now.After(todaySchedule) || now.Equal(todaySchedule) { + // Scheduled time has arrived, we should pause + return true, nil + } + // Scheduled time hasn't arrived yet, don't pause + return false, nil + } + + // Schedule doesn't apply to today, don't pause + return false, nil + } + + // Check if we should pause now + // We need to check if the current time is after today's scheduled pause time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time + + // Check if the schedule actually applies to today (not tomorrow or later) + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + if isToday { + // If today's schedule is still in the future, don't pause + if now.Before(todaySchedule) { + return false, nil + } + // If we're past today's schedule, check if we haven't already paused today + // by comparing with the reference time + if referenceTime.After(todaySchedule) { + // We already paused after today's schedule, don't pause again + return false, nil + } + // We're past today's schedule and haven't paused yet, so pause + return true, nil + } + + // Schedule doesn't apply to today, don't pause + return false, nil +} + +// shouldUnpauseCluster checks if the cluster should be unpaused based on the cron schedule +func (r *PerconaServerMySQLHibernationReconciler) shouldUnpauseCluster(ctx context.Context, cr *apiv1.PerconaServerMySQL, schedule string, now time.Time) (bool, error) { + log := logf.FromContext(ctx).WithName("shouldUnpauseCluster") + + // Parse cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + if err != nil { + log.Error(err, "Invalid unpause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", schedule) + return false, errors.Wrap(err, "invalid unpause schedule") + } + + // Check if cluster is not paused + if !cr.Spec.Pause { + return false, nil + } + + // Get reference time for calculating next unpause + var referenceTime time.Time + if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastUnpauseTime != nil { + // If we have a previous unpause time, use it + referenceTime = cr.Status.Hibernation.LastUnpauseTime.Time + } else if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastPauseTime != nil { + // If no previous unpause but we have a pause time, use that + referenceTime = cr.Status.Hibernation.LastPauseTime.Time + } else { + // If no previous times, this is first-time evaluation + // For first-time evaluation, we should NOT unpause if the scheduled time has already passed today + // This prevents immediate unpausing when hibernation is enabled after the scheduled time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time + + // Check if the schedule actually applies to today (not tomorrow or later) + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + if isToday { + // For first-time evaluation, we should NOT unpause regardless of whether + // the scheduled time has passed or not - we should wait for the next window + // This prevents immediate unpausing when hibernation is enabled + return false, nil + } + + // Schedule doesn't apply to today, don't unpause + return false, nil + } + + // Check if we should unpause now + nextUnpauseTime := cronSchedule.Next(referenceTime) + shouldUnpause := now.After(nextUnpauseTime) || now.Equal(nextUnpauseTime) + + // Additional check: if we have a reference time but current time is after today's scheduled unpause time, + // we should still unpause (this handles the case where the cluster was paused earlier today) + // BUT only if the reference time is NOT today's scheduled unpause time (to avoid double-unpausing) + // AND only if the reference time is a LastUnpauseTime, not a LastPauseTime + if !shouldUnpause && referenceTime != (time.Time{}) && cr.Status.Hibernation != nil && cr.Status.Hibernation.LastUnpauseTime != nil { + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + // Check if reference time is NOT today's scheduled time (to avoid double-unpausing) + // We consider them the same if they're within 1 minute of each other + timeDiff := referenceTime.Sub(todaySchedule) + referenceIsTodaySchedule := timeDiff >= -1*time.Minute && timeDiff <= 1*time.Minute + + if isToday && !referenceIsTodaySchedule && (now.After(todaySchedule) || now.Equal(todaySchedule)) { + shouldUnpause = true + } + } + + return shouldUnpause, nil +} + +// canPauseCluster checks if the cluster can be paused (cluster is ready and no active backups/restores) +func (r *PerconaServerMySQLHibernationReconciler) canPauseCluster(ctx context.Context, cr *apiv1.PerconaServerMySQL) (bool, string, error) { + log := logf.FromContext(ctx).WithName("canPauseCluster") + + // Check if cluster is in a ready state + if cr.Status.State != apiv1.StateReady { + return false, fmt.Sprintf("cluster not ready (state: %s)", cr.Status.State), nil + } + + // Check for active backups + backupList := &apiv1.PerconaServerMySQLBackupList{} + if err := r.List(ctx, backupList, client.InNamespace(cr.Namespace)); err != nil { + log.Error(err, "Failed to list backups", "cluster", cr.Name, "namespace", cr.Namespace) + return false, "", errors.Wrap(err, "failed to list backups") + } + + for _, backup := range backupList.Items { + if backup.Spec.ClusterName == cr.Name { + switch backup.Status.State { + case apiv1.BackupStarting, apiv1.BackupRunning: + return false, fmt.Sprintf("active backup: %s (state: %s)", backup.Name, backup.Status.State), nil + } + } + } + + // Check for active restores + restoreList := &apiv1.PerconaServerMySQLRestoreList{} + if err := r.List(ctx, restoreList, client.InNamespace(cr.Namespace)); err != nil { + log.Error(err, "Failed to list restores", "cluster", cr.Name, "namespace", cr.Namespace) + return false, "", errors.Wrap(err, "failed to list restores") + } + + for _, restore := range restoreList.Items { + if restore.Spec.ClusterName == cr.Name { + switch restore.Status.State { + case apiv1.RestoreStarting, apiv1.RestoreRunning: + return false, fmt.Sprintf("active restore: %s (state: %s)", restore.Name, restore.Status.State), nil + } + } + } + + return true, "", nil +} + +// pauseCluster pauses the cluster by setting spec.pause to true +func (r *PerconaServerMySQLHibernationReconciler) pauseCluster(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("pauseCluster") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Set pause to true + fresh.Spec.Pause = true + + // Update the cluster + if err := r.Client.Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update cluster spec", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Update hibernation status + now := metav1.Now() + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + fresh.Status.Hibernation.State = apiv1.HibernationStatePaused + fresh.Status.Hibernation.LastPauseTime = &now + fresh.Status.Hibernation.Reason = "" + + // Calculate next pause time + if fresh.Spec.Hibernation.Schedule.Pause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Pause); err == nil { + nextPauseTime := metav1.NewTime(cronSchedule.Next(now.Time)) + fresh.Status.Hibernation.NextPauseTime = &nextPauseTime + } else { + log.Error(err, "Failed to parse pause schedule for next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Pause) + } + } + + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + log.Info("✅ Hibernation status updated after pause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastPauseTime", fresh.Status.Hibernation.LastPauseTime) + return nil + }) +} + +// unpauseCluster unpauses the cluster by setting spec.pause to false +func (r *PerconaServerMySQLHibernationReconciler) unpauseCluster(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("unpauseCluster") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Set pause to false + fresh.Spec.Pause = false + + // Update the cluster + if err := r.Client.Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update cluster spec", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Update hibernation status + now := metav1.Now() + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + fresh.Status.Hibernation.State = apiv1.HibernationStateActive + fresh.Status.Hibernation.LastUnpauseTime = &now + fresh.Status.Hibernation.Reason = "" + + // Calculate next unpause time + if fresh.Spec.Hibernation.Schedule.Unpause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Unpause); err == nil { + nextUnpauseTime := metav1.NewTime(cronSchedule.Next(now.Time)) + fresh.Status.Hibernation.NextUnpauseTime = &nextUnpauseTime + } else { + log.Error(err, "Failed to parse unpause schedule for next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Unpause) + } + } + + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + log.Info("✅ Hibernation status updated after unpause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastUnpauseTime", fresh.Status.Hibernation.LastUnpauseTime) + return nil + }) +} + +// updateHibernationState updates the hibernation status with a state and reason +func (r *PerconaServerMySQLHibernationReconciler) updateHibernationState(ctx context.Context, cr *apiv1.PerconaServerMySQL, state, reason string) error { + log := logf.FromContext(ctx).WithName("updateHibernationState") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy for status update", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Update hibernation status + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + + // Check if state or reason actually changed to avoid unnecessary updates and log spam + oldState := fresh.Status.Hibernation.State + oldReason := fresh.Status.Hibernation.Reason + stateChanged := oldState != state + reasonChanged := oldReason != reason + + if stateChanged || reasonChanged { + fresh.Status.Hibernation.State = state + fresh.Status.Hibernation.Reason = reason + + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update hibernation status", "cluster", cr.Name, "namespace", cr.Namespace, "state", state, "reason", reason) + return err + } + + // Only log significant state changes, not routine updates + if stateChanged { + log.Info("Hibernation state changed", "cluster", cr.Name, "namespace", cr.Namespace, "oldState", oldState, "newState", state, "reason", reason) + } else if reasonChanged && reason != "" { + log.V(1).Info("Hibernation reason updated", "cluster", cr.Name, "namespace", cr.Namespace, "state", state, "reason", reason) + } + } + return nil + }) +} + +// initializeHibernationStatus initializes the hibernation status with appropriate state and next times +func (r *PerconaServerMySQLHibernationReconciler) initializeHibernationStatus(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("initializeHibernationStatus") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy for status initialization", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Initialize hibernation status + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + + // Set appropriate state based on current pause status + if fresh.Spec.Pause { + fresh.Status.Hibernation.State = apiv1.HibernationStatePaused + } else { + fresh.Status.Hibernation.State = apiv1.HibernationStateActive + } + + now := time.Now() + + // Calculate next pause time if schedule is configured + if fresh.Spec.Hibernation.Schedule.Pause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Pause); err == nil { + nextPauseTime := r.calculateNextScheduleTime(now, cronSchedule) + fresh.Status.Hibernation.NextPauseTime = &nextPauseTime + } else { + log.Error(err, "Failed to parse pause schedule for initial next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Pause) + } + } + + // Calculate next unpause time if schedule is configured + if fresh.Spec.Hibernation.Schedule.Unpause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Unpause); err == nil { + nextUnpauseTime := r.calculateNextScheduleTime(now, cronSchedule) + fresh.Status.Hibernation.NextUnpauseTime = &nextUnpauseTime + } else { + log.Error(err, "Failed to parse unpause schedule for initial next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Unpause) + } + } + + // Don't set lastPauseTime or lastUnpauseTime here - they should only be set when actual pause/unpause occurs + + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to initialize hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + return nil + }) +} + +// updateHibernationStatus updates the hibernation status with a reason (deprecated, use updateHibernationState) +func (r *PerconaServerMySQLHibernationReconciler) updateHibernationStatus(ctx context.Context, cr *apiv1.PerconaServerMySQL, reason string) error { + return r.updateHibernationState(ctx, cr, "", reason) +} + +// updateHibernationScheduleIfChanged checks if the hibernation schedule has changed and updates next times if needed +func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfChanged(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("updateHibernationScheduleIfChanged") + + // Get the current hibernation status + if cr.Status.Hibernation == nil { + return nil // Nothing to update + } + + // Check if we need to initialize missing times or if schedule strings have changed + needsUpdate := false + + // Check if pause schedule has changed or is missing + if cr.Spec.Hibernation.Schedule.Pause != "" { + if cr.Status.Hibernation.NextPauseTime == nil { + needsUpdate = true + log.Info("📅 Initializing missing next pause time", "cluster", cr.Name, "namespace", cr.Namespace) + } else { + // Check if the schedule string has changed by comparing with current calculated time + if cronSchedule, err := cron.ParseStandard(cr.Spec.Hibernation.Schedule.Pause); err == nil { + expectedNextPauseTime := r.calculateNextScheduleTime(time.Now(), cronSchedule) + currentNextPauseTime := cr.Status.Hibernation.NextPauseTime + + // Only update if the calculated time is significantly different (more than 1 hour) + // This prevents race conditions while still detecting real schedule changes + timeDiff := expectedNextPauseTime.Sub(currentNextPauseTime.Time) + if timeDiff > time.Hour || timeDiff < -time.Hour { + needsUpdate = true + log.Info("📅 Pause schedule changed, updating next pause time", "cluster", cr.Name, "namespace", cr.Namespace, + "oldTime", currentNextPauseTime, "newTime", expectedNextPauseTime) + } + } + } + } + + // Check if unpause schedule has changed or is missing + if cr.Spec.Hibernation.Schedule.Unpause != "" { + if cr.Status.Hibernation.NextUnpauseTime == nil { + needsUpdate = true + log.Info("📅 Initializing missing next unpause time", "cluster", cr.Name, "namespace", cr.Namespace) + } else { + // Check if the schedule string has changed by comparing with current calculated time + if cronSchedule, err := cron.ParseStandard(cr.Spec.Hibernation.Schedule.Unpause); err == nil { + expectedNextUnpauseTime := r.calculateNextScheduleTime(time.Now(), cronSchedule) + currentNextUnpauseTime := cr.Status.Hibernation.NextUnpauseTime + + // Only update if the calculated time is significantly different (more than 1 hour) + // This prevents race conditions while still detecting real schedule changes + timeDiff := expectedNextUnpauseTime.Sub(currentNextUnpauseTime.Time) + if timeDiff > time.Hour || timeDiff < -time.Hour { + needsUpdate = true + log.Info("📅 Unpause schedule changed, updating next unpause time", "cluster", cr.Name, "namespace", cr.Namespace, + "oldTime", currentNextUnpauseTime, "newTime", expectedNextUnpauseTime) + } + } + } + } + + // Update the status if needed + if needsUpdate { + return r.updateHibernationNextTimes(ctx, cr) + } + + return nil +} + +// updateHibernationNextTimes updates the next pause and unpause times in the hibernation status +func (r *PerconaServerMySQLHibernationReconciler) updateHibernationNextTimes(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("updateHibernationNextTimes") + + return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { + // Get fresh copy of the cluster + fresh := &apiv1.PerconaServerMySQL{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + log.Error(err, "Failed to get fresh cluster copy for schedule update", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + // Ensure hibernation status exists + if fresh.Status.Hibernation == nil { + fresh.Status.Hibernation = &apiv1.HibernationStatus{} + } + + now := time.Now() + + // Update next pause time + if fresh.Spec.Hibernation.Schedule.Pause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Pause); err == nil { + nextPauseTime := r.calculateNextScheduleTime(now, cronSchedule) + fresh.Status.Hibernation.NextPauseTime = &nextPauseTime + } else { + log.Error(err, "Failed to parse pause schedule for next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Pause) + } + } + + // Update next unpause time + if fresh.Spec.Hibernation.Schedule.Unpause != "" { + if cronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Unpause); err == nil { + nextUnpauseTime := r.calculateNextScheduleTime(now, cronSchedule) + fresh.Status.Hibernation.NextUnpauseTime = &nextUnpauseTime + } else { + log.Error(err, "Failed to parse unpause schedule for next time calculation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", fresh.Spec.Hibernation.Schedule.Unpause) + } + } + + // Update the status + if err := r.Client.Status().Update(ctx, fresh); err != nil { + log.Error(err, "Failed to update hibernation next times", "cluster", cr.Name, "namespace", cr.Namespace) + return err + } + + log.Info("✅ Hibernation next times updated", "cluster", cr.Name, "namespace", cr.Namespace, + "nextPauseTime", fresh.Status.Hibernation.NextPauseTime, + "nextUnpauseTime", fresh.Status.Hibernation.NextUnpauseTime) + + return nil + }) +} + +// calculateNextScheduleTime calculates the next schedule time, considering if today's time is still available +func (r *PerconaServerMySQLHibernationReconciler) calculateNextScheduleTime(now time.Time, cronSchedule cron.Schedule) metav1.Time { + // Get today's start time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + + // Calculate today's scheduled time + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) + + // If today's scheduled time is still in the future, use it + if todaySchedule.After(now) { + return metav1.NewTime(todaySchedule) + } + + // Otherwise, use the next occurrence (tomorrow or later) + nextSchedule := cronSchedule.Next(now) + return metav1.NewTime(nextSchedule) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PerconaServerMySQLHibernationReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&apiv1.PerconaServerMySQL{}). + Named("pshibernation-controller"). + Complete(r) +} diff --git a/pkg/controller/pshibernation/controller_test.go b/pkg/controller/pshibernation/controller_test.go new file mode 100644 index 000000000..9d941fa73 --- /dev/null +++ b/pkg/controller/pshibernation/controller_test.go @@ -0,0 +1,1668 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pshibernation + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + apiv1 "github.com/percona/percona-server-mysql-operator/api/v1" + "github.com/percona/percona-server-mysql-operator/pkg/platform" + "github.com/robfig/cron/v3" +) + +func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + cr *apiv1.PerconaServerMySQL + schedule string + now time.Time + expectedResult bool + expectedError bool + description string + }{ + { + name: "should pause - first time evaluation with current time matching schedule", + description: "First-time evaluation when current time exactly matches the pause schedule - should pause", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), // Thursday 1:45 PM + expectedResult: true, // Should pause when time matches + expectedError: false, + }, + { + name: "should not pause - first time evaluation with current time before schedule", + description: "First-time evaluation when current time is before the pause schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 44, 0, 0, time.UTC), // Thursday 1:44 PM + expectedResult: false, + expectedError: false, + }, + { + name: "should pause - first time evaluation with current time after schedule", + description: "First-time evaluation when current time is after the pause schedule - should pause if time has arrived", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 47, 0, 0, time.UTC), // Thursday 1:47 PM + expectedResult: true, // Should pause when time has arrived + expectedError: false, + }, + { + name: "DEBUG: should pause - real scenario from logs (11:15 schedule, 11:18 time)", + description: "Real scenario: Schedule is 15 11 * * 1-5 (11:15 AM), current time is 11:18 AM, cluster was never paused", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "15 11 * * 1-5", // 11:15 AM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + // No LastPauseTime or LastUnpauseTime - first time evaluation + }, + }, + }, + schedule: "15 11 * * 1-5", + now: time.Date(2025, 9, 19, 11, 18, 0, 0, time.UTC), // Friday 11:18 AM (3 minutes after schedule) + expectedResult: true, // Should pause - scheduled time has arrived + expectedError: false, + }, + { + name: "DEBUG: should pause - real scenario with reference time (11:15 schedule, 11:18 time, with LastUnpauseTime)", + description: "Real scenario: Schedule is 15 11 * * 1-5 (11:15 AM), current time is 11:18 AM, but cluster has LastUnpauseTime from earlier", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "15 11 * * 1-5", // 11:15 AM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + LastUnpauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 19, 11, 10, 0, 0, time.UTC), // 11:10 AM (before schedule) + }, + }, + }, + }, + schedule: "15 11 * * 1-5", + now: time.Date(2025, 9, 19, 11, 18, 0, 0, time.UTC), // Friday 11:18 AM (3 minutes after schedule) + expectedResult: true, // Should pause - we have reference time and current time is after schedule + expectedError: false, + }, + { + name: "should pause - with previous pause time", + description: "Evaluation with previous pause time as reference", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + LastPauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 17, 13, 45, 0, 0, time.UTC), // Yesterday 1:45 PM + }, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), // Thursday 1:45 PM + expectedResult: true, + expectedError: false, + }, + { + name: "should pause - with previous unpause time", + description: "Evaluation with previous unpause time as reference (no previous pause)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + LastUnpauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 18, 8, 0, 0, 0, time.UTC), // Today 8:00 AM + }, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), // Thursday 1:45 PM + expectedResult: true, + expectedError: false, + }, + { + name: "invalid cron schedule", + description: "Should return error for invalid cron expression", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "invalid cron", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "invalid cron", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), + expectedResult: false, + expectedError: true, + }, + { + name: "real-world scenario - pause time passed, should pause", + description: "Real scenario: pause scheduled for 13:45, current time is 13:47, should trigger pause", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "ps", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "45 13 * * 1-5", // 1:45 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC), // Thursday 1:47:28 PM (2+ minutes after pause time) + expectedResult: true, // Should pause when time has passed + expectedError: false, + }, + { + name: "user reported bug - pause at 09:40, enable hibernation at 10:08, should pause", + description: "Bug fix: When hibernation is enabled after scheduled time has passed, should pause if time has arrived", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "40 09 * * 1-5", // 9:40 AM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "40 09 * * 1-5", + now: time.Date(2025, 9, 19, 10, 8, 40, 0, time.UTC), // 10:08:40 AM (28 minutes after scheduled time) + expectedResult: true, // Should pause when time has passed + expectedError: false, + }, + { + name: "should pause - time matches schedule", + description: "Time exactly matches the pause schedule - this should work for normal operation (not first-time)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", // 8 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + LastUnpauseTime: &metav1.Time{ + Time: time.Date(2024, 1, 15, 19, 0, 0, 0, time.UTC), // 1 hour before + }, + }, + }, + }, + schedule: "0 20 * * 1-5", + now: time.Date(2024, 1, 15, 20, 0, 0, 0, time.UTC), // Monday 8 PM + expectedResult: true, + expectedError: false, + }, + { + name: "should not pause - cluster already paused", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + }, + }, + }, + }, + schedule: "0 20 * * 1-5", + now: time.Date(2024, 1, 15, 20, 0, 0, 0, time.UTC), + expectedResult: false, + expectedError: false, + }, + { + name: "should not pause - time does not match schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + LastPauseTime: &metav1.Time{Time: time.Date(2024, 1, 14, 20, 0, 0, 0, time.UTC)}, // Previous day + }, + }, + }, + schedule: "0 20 * * 1-5", + now: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC), // Monday 10 AM + expectedResult: false, + expectedError: false, + }, + { + name: "invalid schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + }, + }, + schedule: "invalid-cron", + now: time.Date(2024, 1, 15, 20, 0, 0, 0, time.UTC), + expectedResult: false, + expectedError: true, + }, + { + name: "should not pause - past scheduled time with unpause time after schedule", + description: "Bug fix: Should not pause when current time is past today's schedule and there was an unpause after the schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "40 09 * * 1-5", // 9:40 AM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + LastUnpauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 19, 9, 48, 28, 0, time.UTC), // 9:48:28 AM (after schedule) + }, + }, + }, + }, + schedule: "40 09 * * 1-5", + now: time.Date(2025, 9, 19, 9, 48, 30, 0, time.UTC), // 9:48:30 AM (past schedule, after unpause) + expectedResult: false, + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + result, err := reconciler.shouldPauseCluster(context.Background(), tt.cr, tt.schedule, tt.now) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedResult, result) + } + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_shouldUnpauseCluster(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + cr *apiv1.PerconaServerMySQL + schedule string + now time.Time + expectedResult bool + expectedError bool + description string + }{ + { + name: "should NOT unpause - first time evaluation with current time matching schedule", + description: "First-time evaluation when current time exactly matches the unpause schedule - should wait for next window", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "50 13 * * 1-5", // 1:50 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + }, + }, + }, + schedule: "50 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 50, 0, 0, time.UTC), // Thursday 1:50 PM + expectedResult: false, // Should NOT unpause when time matches (first-time evaluation) + expectedError: false, + }, + { + name: "should not unpause - first time evaluation with current time before schedule", + description: "First-time evaluation when current time is before the unpause schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "50 13 * * 1-5", // 1:50 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + }, + }, + }, + schedule: "50 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 49, 0, 0, time.UTC), // Thursday 1:49 PM + expectedResult: false, + expectedError: false, + }, + { + name: "should NOT unpause - first time evaluation with current time after schedule", + description: "First-time evaluation when current time is after the unpause schedule - should wait for next window", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "50 13 * * 1-5", // 1:50 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + }, + }, + }, + schedule: "50 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 52, 0, 0, time.UTC), // Thursday 1:52 PM + expectedResult: false, // Should NOT unpause when time has passed (first-time evaluation) + expectedError: false, + }, + { + name: "real-world scenario - unpause time passed, should wait for next window", + description: "Real scenario: unpause scheduled for 13:50, current time is 13:52, should NOT trigger unpause (wait for next window)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "ps", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "50 13 * * 1-5", // 1:50 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + }, + }, + }, + schedule: "50 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 52, 0, 0, time.UTC), // Thursday 1:52 PM (2 minutes after unpause time) + expectedResult: false, // Should NOT unpause when time has passed (first-time evaluation) + expectedError: false, + }, + { + name: "should unpause - time matches schedule", + description: "Time exactly matches the unpause schedule - this should work for normal operation (not first-time)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "0 8 * * 1-5", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + LastPauseTime: &metav1.Time{ + Time: time.Date(2024, 1, 15, 7, 0, 0, 0, time.UTC), // 1 hour before + }, + }, + }, + }, + schedule: "0 8 * * 1-5", + now: time.Date(2024, 1, 15, 8, 0, 0, 0, time.UTC), // Monday 8 AM + expectedResult: true, + expectedError: false, + }, + { + name: "should not unpause - cluster not paused", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "0 8 * * 1-5", + }, + }, + }, + }, + schedule: "0 8 * * 1-5", + now: time.Date(2024, 1, 15, 8, 0, 0, 0, time.UTC), + expectedResult: false, + expectedError: false, + }, + { + name: "should not unpause - time does not match schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "0 8 * * 1-5", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + LastUnpauseTime: &metav1.Time{Time: time.Date(2024, 1, 15, 8, 0, 0, 0, time.UTC)}, // Same day at 8 AM + }, + }, + }, + schedule: "0 8 * * 1-5", + now: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC), // Monday 10 AM + expectedResult: false, + expectedError: false, + }, + { + name: "should unpause - real-world scenario with reference time", + description: "Real scenario: cluster paused earlier today, current time is after today's unpause schedule", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "ps", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "10 14 * * 1-5", // 2:10 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + LastPauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 18, 14, 5, 0, 0, time.UTC), // Today 2:05 PM (when paused, before unpause schedule) + }, + }, + }, + }, + schedule: "10 14 * * 1-5", + now: time.Date(2025, 9, 18, 16, 45, 0, 0, time.UTC), // Thursday 4:45 PM (2+ hours after unpause time) + expectedResult: true, + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + result, err := reconciler.shouldUnpauseCluster(context.Background(), tt.cr, tt.schedule, tt.now) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + if tt.expectedResult != result { + t.Logf("Debug - Test: %s", tt.name) + t.Logf(" Expected: %v", tt.expectedResult) + t.Logf(" Actual: %v", result) + t.Logf(" Schedule: %s", tt.schedule) + t.Logf(" Current time: %s", tt.now.Format(time.RFC3339)) + if tt.cr.Status.Hibernation != nil && tt.cr.Status.Hibernation.LastPauseTime != nil { + t.Logf(" LastPauseTime: %s", tt.cr.Status.Hibernation.LastPauseTime.Time.Format(time.RFC3339)) + } + } + assert.Equal(t, tt.expectedResult, result) + } + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_canPauseCluster(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + cr *apiv1.PerconaServerMySQL + backups []*apiv1.PerconaServerMySQLBackup + restores []*apiv1.PerconaServerMySQLRestore + expectedResult bool + expectedReason string + expectedError bool + }{ + { + name: "can pause - no active operations and cluster ready", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: true, + expectedReason: "", + expectedError: false, + }, + { + name: "cannot pause - cluster not ready (initializing)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateInitializing, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "cluster not ready (state: Initializing)", + expectedError: false, + }, + { + name: "cannot pause - cluster not ready (error)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateError, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "cluster not ready (state: Error)", + expectedError: false, + }, + { + name: "cannot pause - cluster not ready (stopping)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateStopping, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "cluster not ready (state: Stopping)", + expectedError: false, + }, + { + name: "cannot pause - active backup", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "active-backup", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLBackupSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLBackupStatus{ + State: apiv1.BackupRunning, + }, + }, + }, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "active backup: active-backup (state: Running)", + expectedError: false, + }, + { + name: "cannot pause - active restore", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "active-restore", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLRestoreSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLRestoreStatus{ + State: apiv1.RestoreRunning, + }, + }, + }, + expectedResult: false, + expectedReason: "active restore: active-restore (state: Running)", + expectedError: false, + }, + { + name: "can pause - completed backup", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "completed-backup", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLBackupSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLBackupStatus{ + State: apiv1.BackupSucceeded, + }, + }, + }, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: true, + expectedReason: "", + expectedError: false, + }, + { + name: "can pause - backup for different cluster", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "other-backup", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLBackupSpec{ + ClusterName: "other-cluster", + }, + Status: apiv1.PerconaServerMySQLBackupStatus{ + State: apiv1.BackupRunning, + }, + }, + }, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: true, + expectedReason: "", + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := []client.Object{tt.cr} + for _, backup := range tt.backups { + objects = append(objects, backup) + } + for _, restore := range tt.restores { + objects = append(objects, restore) + } + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + result, reason, err := reconciler.canPauseCluster(context.Background(), tt.cr) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedResult, result) + assert.Equal(t, tt.expectedReason, reason) + } + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_scheduleHibernationForNextWindow(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + cr *apiv1.PerconaServerMySQL + schedule string + reason string + expectedError bool + expectedState string + expectedReason string + }{ + { + name: "schedule for next window - cluster not ready", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateInitializing, + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "0 18 * * 1-5", // 6 PM Mon-Fri + reason: "cluster not ready (state: Initializing)", + expectedError: false, + expectedState: apiv1.HibernationStateScheduled, + expectedReason: "Scheduled for next window: cluster not ready (state: Initializing)", + }, + { + name: "schedule for next window - cluster in error state", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateError, + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "30 19 * * 1-5", // 7:30 PM Mon-Fri + reason: "cluster not ready (state: Error)", + expectedError: false, + expectedState: apiv1.HibernationStateScheduled, + expectedReason: "Scheduled for next window: cluster not ready (state: Error)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake client with the test cluster + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.cr). + WithStatusSubresource(tt.cr). + Build() + + // Create reconciler + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + // Create context + ctx := context.Background() + + // Call the method + err := reconciler.scheduleHibernationForNextWindow(ctx, tt.cr, tt.schedule, tt.reason) + + // Check error expectation + if tt.expectedError { + require.Error(t, err) + return + } + require.NoError(t, err) + + // Get updated cluster + updatedCluster := &apiv1.PerconaServerMySQL{} + err = fakeClient.Get(ctx, types.NamespacedName{Name: tt.cr.Name, Namespace: tt.cr.Namespace}, updatedCluster) + require.NoError(t, err) + + // Verify hibernation status + require.NotNil(t, updatedCluster.Status.Hibernation) + assert.Equal(t, tt.expectedState, updatedCluster.Status.Hibernation.State) + assert.Contains(t, updatedCluster.Status.Hibernation.Reason, "Scheduled for next window") + assert.NotNil(t, updatedCluster.Status.Hibernation.NextPauseTime) + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_pauseCluster(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + }, + }, + }, + } + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).WithStatusSubresource(cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + err := reconciler.pauseCluster(context.Background(), cr) + require.NoError(t, err) + + // Verify the cluster was paused + updated := &apiv1.PerconaServerMySQL{} + err = client.Get(context.Background(), types.NamespacedName{Name: "test-cluster", Namespace: "default"}, updated) + require.NoError(t, err) + + assert.True(t, updated.Spec.Pause) + assert.NotNil(t, updated.Status.Hibernation) + assert.Equal(t, apiv1.HibernationStatePaused, updated.Status.Hibernation.State) + assert.NotNil(t, updated.Status.Hibernation.LastPauseTime) + assert.NotNil(t, updated.Status.Hibernation.NextPauseTime) +} + +func TestPerconaServerMySQLHibernationReconciler_unpauseCluster(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "0 8 * * 1-5", + }, + }, + }, + } + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).WithStatusSubresource(cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + err := reconciler.unpauseCluster(context.Background(), cr) + require.NoError(t, err) + + // Verify the cluster was unpaused + updated := &apiv1.PerconaServerMySQL{} + err = client.Get(context.Background(), types.NamespacedName{Name: "test-cluster", Namespace: "default"}, updated) + require.NoError(t, err) + + assert.False(t, updated.Spec.Pause) + assert.NotNil(t, updated.Status.Hibernation) + assert.Equal(t, apiv1.HibernationStateActive, updated.Status.Hibernation.State) + assert.NotNil(t, updated.Status.Hibernation.LastUnpauseTime) + assert.NotNil(t, updated.Status.Hibernation.NextUnpauseTime) +} + +func TestPerconaServerMySQLHibernationReconciler_Reconcile(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + cr *apiv1.PerconaServerMySQL + expectedResult ctrl.Result + expectedError bool + }{ + { + name: "hibernation disabled", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: false, + }, + }, + }, + expectedResult: ctrl.Result{RequeueAfter: 5 * time.Minute}, + expectedError: false, + }, + { + name: "hibernation enabled", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + }, + }, + }, + }, + expectedResult: ctrl.Result{RequeueAfter: 1 * time.Minute}, + expectedError: false, + }, + { + name: "hibernation enabled but state is disabled", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateDisabled, // State is disabled but hibernation is enabled + }, + }, + }, + expectedResult: ctrl.Result{RequeueAfter: 1 * time.Minute}, + expectedError: false, + }, + { + name: "cluster not found", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "non-existent", + Namespace: "default", + }, + }, + expectedResult: ctrl.Result{RequeueAfter: 5 * time.Minute}, + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.cr).WithStatusSubresource(tt.cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: tt.cr.Name, + Namespace: tt.cr.Namespace, + }, + } + + result, err := reconciler.Reconcile(context.Background(), req) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedResult.RequeueAfter, result.RequeueAfter) + } + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_updateHibernationState(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: false, + }, + } + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).WithStatusSubresource(cr).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Test updating hibernation state + err := reconciler.updateHibernationState(ctx, cr, apiv1.HibernationStateScheduled, "Test reason") + require.NoError(t, err) + + // Verify the state was updated + updated := &apiv1.PerconaServerMySQL{} + err = client.Get(ctx, types.NamespacedName{Name: "test-cluster", Namespace: "default"}, updated) + require.NoError(t, err) + + assert.NotNil(t, updated.Status.Hibernation) + assert.Equal(t, apiv1.HibernationStateScheduled, updated.Status.Hibernation.State) + assert.Equal(t, "Test reason", updated.Status.Hibernation.Reason) +} + +func TestPerconaServerMySQLHibernationReconciler_SetupWithManager(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + }) + require.NoError(t, err) + + err = reconciler.SetupWithManager(mgr) + assert.NoError(t, err) +} + +// Benchmark tests +func BenchmarkHibernationValidation(b *testing.B) { + spec := &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 20 * * 1-5", + Unpause: "0 8 * * 1-5", + }, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = spec.Validate() + } +} + +func BenchmarkHibernationScheduleParsing(b *testing.B) { + schedule := "0 20 * * 1-5" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = time.Parse("0 20 * * 1-5", schedule) + } +} + +// TestScheduleEvaluationLogic tests the core cron parsing and schedule evaluation logic +func TestScheduleEvaluationLogic(t *testing.T) { + tests := []struct { + name string + schedule string + now time.Time + expectedResult bool + expectedError bool + description string + }{ + { + name: "cron parsing - valid schedule", + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), + expectedResult: true, + expectedError: false, + description: "Valid cron expression should parse and evaluate correctly", + }, + { + name: "cron parsing - invalid schedule", + schedule: "invalid cron", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), + expectedResult: false, + expectedError: true, + description: "Invalid cron expression should return error", + }, + { + name: "schedule evaluation - exact match", + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC), // Thursday 1:45 PM + expectedResult: true, + expectedError: false, + description: "Current time exactly matches schedule should return true", + }, + { + name: "schedule evaluation - before schedule", + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 44, 0, 0, time.UTC), // Thursday 1:44 PM + expectedResult: false, + expectedError: false, + description: "Current time before schedule should return false", + }, + { + name: "schedule evaluation - after schedule", + schedule: "45 13 * * 1-5", + now: time.Date(2025, 9, 18, 13, 47, 0, 0, time.UTC), // Thursday 1:47 PM + expectedResult: true, // With the fix, this should return true + expectedError: false, + description: "Current time after schedule should return true with fixed logic", + }, + { + name: "schedule evaluation - wrong day of week", + schedule: "45 13 * * 1-5", // Mon-Fri + now: time.Date(2025, 9, 20, 13, 45, 0, 0, time.UTC), // Saturday 1:45 PM + expectedResult: false, + expectedError: false, + description: "Current time on wrong day of week should return false", + }, + { + name: "schedule evaluation - wrong hour (before schedule)", + schedule: "45 13 * * 1-5", // 1:45 PM + now: time.Date(2025, 9, 18, 12, 45, 0, 0, time.UTC), // Thursday 12:45 PM (1 hour before) + expectedResult: false, + expectedError: false, + description: "Current time before scheduled hour should return false", + }, + { + name: "schedule evaluation - wrong minute (before schedule)", + schedule: "45 13 * * 1-5", // 1:45 PM + now: time.Date(2025, 9, 18, 13, 44, 0, 0, time.UTC), // Thursday 1:44 PM (1 minute before) + expectedResult: false, + expectedError: false, + description: "Current time before scheduled minute should return false", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test cron parsing + cronSchedule, err := cron.ParseStandard(tt.schedule) + if tt.expectedError { + assert.Error(t, err, "Expected error for invalid cron expression") + return + } + require.NoError(t, err, "Cron parsing should succeed for valid expression") + + // Test schedule evaluation logic (first-time evaluation with fix) + today := time.Date(tt.now.Year(), tt.now.Month(), tt.now.Day(), 0, 0, 0, 0, tt.now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time + + // Check if the schedule actually applies to today (not tomorrow or later) + isToday := todaySchedule.Year() == tt.now.Year() && + todaySchedule.Month() == tt.now.Month() && + todaySchedule.Day() == tt.now.Day() + + result := isToday && (tt.now.After(todaySchedule) || tt.now.Equal(todaySchedule)) + + // Debug output for failing tests + if tt.expectedResult != result { + t.Logf("Debug - Test: %s", tt.name) + t.Logf(" Current time: %s", tt.now.Format(time.RFC3339)) + t.Logf(" Today schedule: %s", todaySchedule.Format(time.RFC3339)) + t.Logf(" Is today: %v", isToday) + t.Logf(" After/Equal: %v", tt.now.After(todaySchedule) || tt.now.Equal(todaySchedule)) + t.Logf(" Result: %v", result) + t.Logf(" Expected: %v", tt.expectedResult) + } + + assert.Equal(t, tt.expectedResult, result, tt.description) + }) + } +} + +// TestRealWorldScenario tests the exact scenario we encountered in production +func TestRealWorldScenario(t *testing.T) { + t.Run("pause time passed but not triggered", func(t *testing.T) { + // This is the exact scenario we encountered: + // - Pause scheduled for 13:45 (1:45 PM) + // - Current time is 13:47:28 (1:47:28 PM) - 2+ minutes after pause time + // - Should trigger pause but didn't + + schedule := "45 13 * * 1-5" // 1:45 PM Mon-Fri + pauseTime := time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC) // Thursday 1:45 PM + currentTime := time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC) // Thursday 1:47:28 PM + + // Parse the cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + require.NoError(t, err, "Should parse valid cron expression") + + // Test first-time evaluation logic (no previous pause/unpause times) + nextPauseTime := cronSchedule.Next(currentTime.Add(-time.Second)) // Check from 1 second ago + shouldPause := currentTime.After(nextPauseTime) || currentTime.Equal(nextPauseTime) + + // This reveals the bug: shouldPause is false because cron.Next() returns tomorrow's time + assert.False(t, shouldPause, "This reveals the bug: cron.Next() returns tomorrow's time, not today's") + assert.True(t, currentTime.After(pauseTime), "Current time should be after pause time") + assert.NotEqual(t, pauseTime, nextPauseTime, "Next pause time is tomorrow's time, not today's (this is the bug)") + }) + + t.Run("unpause time passed but not triggered", func(t *testing.T) { + // Similar scenario for unpause: + // - Unpause scheduled for 13:50 (1:50 PM) + // - Current time is 13:52 (1:52 PM) - 2 minutes after unpause time + // - Should trigger unpause but didn't + + schedule := "50 13 * * 1-5" // 1:50 PM Mon-Fri + unpauseTime := time.Date(2025, 9, 18, 13, 50, 0, 0, time.UTC) // Thursday 1:50 PM + currentTime := time.Date(2025, 9, 18, 13, 52, 0, 0, time.UTC) // Thursday 1:52 PM + + // Parse the cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + require.NoError(t, err, "Should parse valid cron expression") + + // Test first-time evaluation logic (no previous pause/unpause times) + nextUnpauseTime := cronSchedule.Next(currentTime.Add(-time.Second)) // Check from 1 second ago + shouldUnpause := currentTime.After(nextUnpauseTime) || currentTime.Equal(nextUnpauseTime) + + // This reveals the bug: shouldUnpause is false because cron.Next() returns tomorrow's time + assert.False(t, shouldUnpause, "This reveals the bug: cron.Next() returns tomorrow's time, not today's") + assert.True(t, currentTime.After(unpauseTime), "Current time should be after unpause time") + assert.NotEqual(t, unpauseTime, nextUnpauseTime, "Next unpause time is tomorrow's time, not today's (this is the bug)") + }) + + t.Run("fixed logic - should work correctly", func(t *testing.T) { + // This test shows how the logic should work with the fix + schedule := "45 13 * * 1-5" // 1:45 PM Mon-Fri + pauseTime := time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC) // Thursday 1:45 PM + currentTime := time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC) // Thursday 1:47:28 PM + + // Parse the cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + require.NoError(t, err, "Should parse valid cron expression") + + // Fixed logic: check if current time matches today's schedule + // We need to check if the current time is after the scheduled time for today + today := time.Date(currentTime.Year(), currentTime.Month(), currentTime.Day(), 0, 0, 0, 0, currentTime.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time + + // If today's schedule has passed, we should trigger + shouldPause := currentTime.After(todaySchedule) || currentTime.Equal(todaySchedule) + + // This should be true with the fixed logic + assert.True(t, shouldPause, "With fixed logic, should pause when current time is after today's scheduled time") + assert.Equal(t, pauseTime, todaySchedule, "Today's schedule should match the expected pause time") + }) +} + +func TestScheduleChangeDetectionLogic(t *testing.T) { + tests := []struct { + name string + currentSchedule string + currentNextTime time.Time + shouldDetectChange bool + description string + }{ + { + name: "same schedule - no change", + currentSchedule: "45 19 * * 1-5", + currentNextTime: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Today's time + shouldDetectChange: false, + description: "Should not detect change when times match", + }, + { + name: "different minute - detect change", + currentSchedule: "30 19 * * 1-5", // Changed from 45 to 30 + currentNextTime: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Old time + shouldDetectChange: true, + description: "Should detect change when minute is different", + }, + { + name: "different hour - detect change", + currentSchedule: "45 20 * * 1-5", // Changed from 19 to 20 + currentNextTime: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Old time + shouldDetectChange: true, + description: "Should detect change when hour is different", + }, + { + name: "different day - detect change", + currentSchedule: "45 19 * * 0,6", // Changed from Mon-Fri to Sat-Sun + currentNextTime: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Old time (Wednesday) + shouldDetectChange: true, + description: "Should detect change when day of week is different", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Parse the current schedule + cronSchedule, err := cron.ParseStandard(tt.currentSchedule) + require.NoError(t, err, "Should parse valid cron expression") + + // Calculate expected next time based on current time + now := time.Date(2025, 9, 18, 19, 30, 0, 0, time.UTC) // Wednesday 7:30 PM + expectedNextTime := metav1.NewTime(cronSchedule.Next(now)) + currentNextTime := metav1.NewTime(tt.currentNextTime) + + // Test the change detection logic + timesEqual := currentNextTime.Equal(&expectedNextTime) + shouldDetectChange := !timesEqual + + assert.Equal(t, tt.shouldDetectChange, shouldDetectChange, tt.description) + + if tt.shouldDetectChange { + assert.NotEqual(t, currentNextTime, expectedNextTime, "Times should be different when change is detected") + } else { + assert.Equal(t, currentNextTime, expectedNextTime, "Times should be equal when no change is detected") + } + }) + } +} + +func TestPerconaServerMySQLHibernationReconciler_calculateNextScheduleTime(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + client := fake.NewClientBuilder().WithScheme(scheme).Build() + reconciler := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + ServerVersion: &platform.ServerVersion{}, + } + + tests := []struct { + name string + schedule string + currentTime time.Time + expectedResult time.Time + description string + }{ + { + name: "today's schedule still available", + schedule: "45 19 * * 1-5", // 7:45 PM Mon-Fri + currentTime: time.Date(2025, 9, 18, 19, 30, 0, 0, time.UTC), // Wednesday 7:30 PM (before schedule) + expectedResult: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Today 7:45 PM + description: "Should return today's schedule time when it's still in the future", + }, + { + name: "today's schedule already passed", + schedule: "45 19 * * 1-5", // 7:45 PM Mon-Fri + currentTime: time.Date(2025, 9, 18, 20, 0, 0, 0, time.UTC), // Wednesday 8:00 PM (after schedule) + expectedResult: time.Date(2025, 9, 19, 19, 45, 0, 0, time.UTC), // Tomorrow 7:45 PM + description: "Should return tomorrow's schedule time when today's has passed", + }, + { + name: "exact schedule time", + schedule: "45 19 * * 1-5", // 7:45 PM Mon-Fri + currentTime: time.Date(2025, 9, 18, 19, 45, 0, 0, time.UTC), // Wednesday 7:45 PM (exact time) + expectedResult: time.Date(2025, 9, 19, 19, 45, 0, 0, time.UTC), // Tomorrow 7:45 PM + description: "Should return tomorrow's schedule time when current time equals schedule time", + }, + { + name: "weekend schedule on weekday", + schedule: "45 19 * * 0,6", // 7:45 PM Sat-Sun + currentTime: time.Date(2025, 9, 18, 19, 30, 0, 0, time.UTC), // Wednesday 7:30 PM + expectedResult: time.Date(2025, 9, 20, 19, 45, 0, 0, time.UTC), // Saturday 7:45 PM + description: "Should return next weekend day when schedule is for weekends", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Parse the schedule + cronSchedule, err := cron.ParseStandard(tt.schedule) + require.NoError(t, err, "Should parse valid cron expression") + + // Test the method + result := reconciler.calculateNextScheduleTime(tt.currentTime, cronSchedule) + + // Check the result + expectedResult := metav1.NewTime(tt.expectedResult) + assert.Equal(t, expectedResult, result, tt.description) + }) + } +} From 6bc016089d640d5a75afde2fe4dc9e783d6d0726 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Tue, 23 Sep 2025 15:19:52 +0300 Subject: [PATCH 2/6] fix bugs --- deploy/bundle.yaml | 2 +- pkg/controller/pshibernation/controller.go | 323 +++++-- .../pshibernation/controller_test.go | 790 ++++++++++++++++-- 3 files changed, 970 insertions(+), 145 deletions(-) diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 2dec0c482..6cf1abce1 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -13503,7 +13503,7 @@ spec: fieldPath: metadata.namespace - name: DISABLE_TELEMETRY value: "false" - image: perconalab/percona-server-mysql-operator:main-h25 + image: perconalab/percona-server-mysql-operator:main imagePullPolicy: Always livenessProbe: httpGet: diff --git a/pkg/controller/pshibernation/controller.go b/pkg/controller/pshibernation/controller.go index 16d490240..665c033a6 100644 --- a/pkg/controller/pshibernation/controller.go +++ b/pkg/controller/pshibernation/controller.go @@ -75,19 +75,30 @@ func (r *PerconaServerMySQLHibernationReconciler) Reconcile(ctx context.Context, return ctrl.Result{RequeueAfter: 30 * time.Second}, err } - // Skip hibernation processing if cluster is still initializing - // This prevents hibernation state from flipping during cluster startup - if cr.Status.State == apiv1.StateInitializing { + // Skip hibernation processing if cluster is still initializing or in error state + // This prevents hibernation state from flipping during cluster startup/recovery + // BUT we need to proactively schedule for next window if the scheduled time is approaching or has passed + if cr.Status.State == apiv1.StateInitializing || cr.Status.State == apiv1.StateError { + // Proactively check and update nextPauseTime to next window if needed + if err := r.proactivelyScheduleForNextWindow(ctx, cr); err != nil { + log.Error(err, "Failed to proactively schedule for next window during cluster issues", "cluster", cr.Name, "namespace", cr.Namespace, "state", cr.Status.State) + } return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } + // When cluster becomes ready, check if we need to proactively schedule for next window + // This handles the case where the cluster was unready during the scheduled time + if cr.Status.State == apiv1.StateReady { + if err := r.proactivelyScheduleForNextWindow(ctx, cr); err != nil { + log.Error(err, "Failed to proactively schedule for next window when cluster became ready", "cluster", cr.Name, "namespace", cr.Namespace) + } + } + // Process hibernation logic - log.Info("🔄 DEBUG: About to call processHibernation", "cluster", cr.Name, "namespace", cr.Namespace) if err := r.processHibernation(ctx, cr); err != nil { log.Error(err, "Failed to process hibernation", "cluster", cr.Name, "namespace", cr.Namespace) return ctrl.Result{RequeueAfter: 1 * time.Minute}, err } - log.Info("✅ DEBUG: processHibernation completed successfully", "cluster", cr.Name, "namespace", cr.Namespace) // Requeue after 1 minute to check again return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil @@ -99,18 +110,13 @@ func (r *PerconaServerMySQLHibernationReconciler) processHibernation(ctx context now := time.Now() hibernation := cr.Spec.Hibernation - log.Info("🔄 DEBUG: processHibernation started", "cluster", cr.Name, "namespace", cr.Namespace, "currentTime", now.Format("15:04:05")) - // Check if it's time to pause if hibernation.Schedule.Pause != "" { - log.Info("🔄 DEBUG: Checking pause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) if shouldPause, err := r.shouldPauseCluster(ctx, cr, hibernation.Schedule.Pause, now); err != nil { log.Error(err, "Failed to check pause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) return errors.Wrap(err, "failed to check pause schedule") } else { - log.Info("🔄 DEBUG: shouldPauseCluster result", "cluster", cr.Name, "namespace", cr.Namespace, "shouldPause", shouldPause) if shouldPause { - log.Info("🔄 DEBUG: Should pause cluster", "cluster", cr.Name, "namespace", cr.Namespace) if canPause, reason, err := r.canPauseCluster(ctx, cr); err != nil { log.Error(err, "Failed to check if cluster can be paused", "cluster", cr.Name, "namespace", cr.Namespace) return errors.Wrap(err, "failed to check if cluster can be paused") @@ -119,38 +125,44 @@ func (r *PerconaServerMySQLHibernationReconciler) processHibernation(ctx context log.Error(err, "Failed to pause cluster", "cluster", cr.Name, "namespace", cr.Namespace) return errors.Wrap(err, "failed to pause cluster") } - log.Info("✅ Cluster paused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) + log.Info("Cluster paused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Pause) } else { - // Check if the reason is cluster not ready - if so, schedule for next window - if strings.Contains(reason, "cluster not ready") { - log.Info("⏰ Cluster not ready, scheduling hibernation for next window", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) + // Check if the reason is cluster not ready or active operations - if so, schedule for next window + if strings.Contains(reason, "cluster not ready") || strings.Contains(reason, "active backup") || strings.Contains(reason, "active restore") { + log.Info("Cluster not ready or active operations, scheduling hibernation for next window", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) if err := r.scheduleHibernationForNextWindow(ctx, cr, hibernation.Schedule.Pause, reason); err != nil { log.Error(err, "Failed to schedule hibernation for next window", "cluster", cr.Name, "namespace", cr.Namespace) } } else { - log.Info("⚠️ Skipped pause due to active operations", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) + log.Info("⚠️ Skipped pause due to other reasons", "cluster", cr.Name, "namespace", cr.Namespace, "reason", reason, "schedule", hibernation.Schedule.Pause) if err := r.updateHibernationState(ctx, cr, apiv1.HibernationStateBlocked, reason); err != nil { log.Error(err, "Failed to update hibernation status", "cluster", cr.Name, "namespace", cr.Namespace) } } } + } else { + // shouldPauseCluster returned false - check if we need to schedule for next window + // This handles the case where the scheduled time has passed beyond the 5-minute window + if err := r.checkAndScheduleForNextWindow(ctx, cr, hibernation.Schedule.Pause, now); err != nil { + log.Error(err, "Failed to check and schedule for next window", "cluster", cr.Name, "namespace", cr.Namespace) + } } } } // Check if it's time to unpause if hibernation.Schedule.Unpause != "" { - log.Info("🔄 DEBUG: Checking unpause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) if shouldUnpause, err := r.shouldUnpauseCluster(ctx, cr, hibernation.Schedule.Unpause, now); err != nil { log.Error(err, "Failed to check unpause schedule", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) return errors.Wrap(err, "failed to check unpause schedule") - } else if shouldUnpause { - log.Info("🔄 DEBUG: Should unpause cluster", "cluster", cr.Name, "namespace", cr.Namespace) - if err := r.unpauseCluster(ctx, cr); err != nil { - log.Error(err, "Failed to unpause cluster", "cluster", cr.Name, "namespace", cr.Namespace) - return errors.Wrap(err, "failed to unpause cluster") + } else { + if shouldUnpause { + if err := r.unpauseCluster(ctx, cr); err != nil { + log.Error(err, "Failed to unpause cluster", "cluster", cr.Name, "namespace", cr.Namespace) + return errors.Wrap(err, "failed to unpause cluster") + } + log.Info("Cluster unpaused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) } - log.Info("✅ Cluster unpaused by hibernation", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", hibernation.Schedule.Unpause) } } @@ -169,7 +181,7 @@ func (r *PerconaServerMySQLHibernationReconciler) processHibernation(ctx context if cr.Spec.Hibernation.Schedule.Unpause != "" { unpauseSchedule = cr.Spec.Hibernation.Schedule.Unpause } - log.Info("🔄 Hibernation enabled", "cluster", cr.Name, "namespace", cr.Namespace, + log.Info("Hibernation enabled", "cluster", cr.Name, "namespace", cr.Namespace, "pauseSchedule", pauseSchedule, "unpauseSchedule", unpauseSchedule) } @@ -211,12 +223,22 @@ func (r *PerconaServerMySQLHibernationReconciler) scheduleHibernationForNextWind } // Calculate next available window (tomorrow's schedule) - now := time.Now() + now := time.Now().UTC() nextWindow := r.calculateNextScheduleTime(now, cronSchedule) // Update the next pause time to the next window fresh.Status.Hibernation.NextPauseTime = &nextWindow + // Also update the unpause time to the next window if unpause schedule exists + // BUT only if the cluster is not currently paused (to avoid overriding today's unpause time) + if fresh.Spec.Hibernation != nil && fresh.Spec.Hibernation.Schedule.Unpause != "" && !fresh.Spec.Pause { + if unpauseCronSchedule, err := cron.ParseStandard(fresh.Spec.Hibernation.Schedule.Unpause); err == nil { + nextUnpauseWindow := r.calculateNextScheduleTime(now, unpauseCronSchedule) + fresh.Status.Hibernation.NextUnpauseTime = &nextUnpauseWindow + log.Info("Also updated next unpause time for next window", "cluster", cr.Name, "namespace", cr.Namespace, "nextUnpauseWindow", nextUnpauseWindow) + } + } + // Set state to indicate we're waiting for next window fresh.Status.Hibernation.State = apiv1.HibernationStateScheduled fresh.Status.Hibernation.Reason = fmt.Sprintf("Scheduled for next window: %s", reason) @@ -227,7 +249,7 @@ func (r *PerconaServerMySQLHibernationReconciler) scheduleHibernationForNextWind return err } - log.Info("📅 Hibernation scheduled for next window", "cluster", cr.Name, "namespace", cr.Namespace, + log.Info("Hibernation scheduled for next window", "cluster", cr.Name, "namespace", cr.Namespace, "nextWindow", nextWindow, "reason", reason) return nil @@ -259,12 +281,17 @@ func (r *PerconaServerMySQLHibernationReconciler) synchronizeHibernationState(ct if isClusterPaused { expectedState = apiv1.HibernationStatePaused } else { - expectedState = apiv1.HibernationStateActive + // If hibernation state is Scheduled, preserve it - don't change to Active + if currentHibernationState == apiv1.HibernationStateScheduled { + expectedState = apiv1.HibernationStateScheduled + } else { + expectedState = apiv1.HibernationStateActive + } } // Update hibernation state if it doesn't match the actual cluster state if currentHibernationState != expectedState { - log.Info("🔄 Synchronizing hibernation state with cluster state", + log.Info("Synchronizing hibernation state with cluster state", "cluster", cr.Name, "namespace", cr.Namespace, "clusterState", fresh.Status.State, "currentHibernationState", currentHibernationState, @@ -294,6 +321,24 @@ func (r *PerconaServerMySQLHibernationReconciler) shouldPauseCluster(ctx context return false, nil } + // Check if hibernation is scheduled for next window - if so, don't pause until that time + if cr.Status.Hibernation != nil && cr.Status.Hibernation.State == apiv1.HibernationStateScheduled { + // If we have a next pause time scheduled, only pause if that time has arrived + if cr.Status.Hibernation.NextPauseTime != nil { + if now.Before(cr.Status.Hibernation.NextPauseTime.Time) { + log.Info("Hibernation scheduled for next window, waiting", "cluster", cr.Name, "namespace", cr.Namespace, + "scheduledTime", cr.Status.Hibernation.NextPauseTime.Time, "currentTime", now) + return false, nil + } + // The scheduled time has arrived, we can proceed with pausing + log.Info("Scheduled hibernation time has arrived, proceeding with pause", "cluster", cr.Name, "namespace", cr.Namespace, + "scheduledTime", cr.Status.Hibernation.NextPauseTime.Time, "currentTime", now) + } else { + // No next pause time set, but state is scheduled - this shouldn't happen, but be safe + return false, nil + } + } + // Get reference time for calculating next pause var referenceTime time.Time if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastPauseTime != nil { @@ -316,12 +361,26 @@ func (r *PerconaServerMySQLHibernationReconciler) shouldPauseCluster(ctx context if isToday { // For first-time evaluation, check if the scheduled time has arrived - if now.After(todaySchedule) || now.Equal(todaySchedule) { - // Scheduled time has arrived, we should pause + if now.Before(todaySchedule) { + // Scheduled time is in the future, don't pause yet + return false, nil + } + // For first-time evaluation, only pause if we're exactly at the scheduled time + // or within a very small window (1 minute) to account for controller reconciliation + if now.Equal(todaySchedule) { + // If times are equal, we should pause return true, nil } - // Scheduled time hasn't arrived yet, don't pause - return false, nil + // If time has passed, check if it's within a very small window (1 minute) + // This is more restrictive for first-time evaluation to prevent immediate pausing + if now.After(todaySchedule) { + timeSinceSchedule := now.Sub(todaySchedule) + if timeSinceSchedule <= 1*time.Minute { + return true, nil + } + // Time has passed beyond the reasonable window, wait for next window + return false, nil + } } // Schedule doesn't apply to today, don't pause @@ -373,6 +432,24 @@ func (r *PerconaServerMySQLHibernationReconciler) shouldUnpauseCluster(ctx conte return false, nil } + // Check if hibernation is scheduled for next window - if so, don't unpause until that time + if cr.Status.Hibernation != nil && cr.Status.Hibernation.State == apiv1.HibernationStateScheduled { + // If we have a next unpause time scheduled, only unpause if that time has arrived + if cr.Status.Hibernation.NextUnpauseTime != nil { + if now.Before(cr.Status.Hibernation.NextUnpauseTime.Time) { + log.Info("Hibernation scheduled for next window, waiting", "cluster", cr.Name, "namespace", cr.Namespace, + "scheduledTime", cr.Status.Hibernation.NextUnpauseTime.Time, "currentTime", now) + return false, nil + } + // The scheduled time has arrived, we can proceed with unpausing + log.Info("Scheduled hibernation time has arrived, proceeding with unpause", "cluster", cr.Name, "namespace", cr.Namespace, + "scheduledTime", cr.Status.Hibernation.NextUnpauseTime.Time, "currentTime", now) + } else { + // No next unpause time set, but state is scheduled - this shouldn't happen, but be safe + return false, nil + } + } + // Get reference time for calculating next unpause var referenceTime time.Time if cr.Status.Hibernation != nil && cr.Status.Hibernation.LastUnpauseTime != nil { @@ -408,11 +485,22 @@ func (r *PerconaServerMySQLHibernationReconciler) shouldUnpauseCluster(ctx conte nextUnpauseTime := cronSchedule.Next(referenceTime) shouldUnpause := now.After(nextUnpauseTime) || now.Equal(nextUnpauseTime) + // If time has passed, check if it's within a reasonable window (5 minutes) + // This accounts for the controller's reconciliation interval + if now.After(nextUnpauseTime) && !now.Equal(nextUnpauseTime) { + timeSinceSchedule := now.Sub(nextUnpauseTime) + if timeSinceSchedule <= 5*time.Minute { + shouldUnpause = true + } else { + shouldUnpause = false + } + } + // Additional check: if we have a reference time but current time is after today's scheduled unpause time, // we should still unpause (this handles the case where the cluster was paused earlier today) // BUT only if the reference time is NOT today's scheduled unpause time (to avoid double-unpausing) - // AND only if the reference time is a LastUnpauseTime, not a LastPauseTime - if !shouldUnpause && referenceTime != (time.Time{}) && cr.Status.Hibernation != nil && cr.Status.Hibernation.LastUnpauseTime != nil { + // This works for both LastUnpauseTime and LastPauseTime as reference + if !shouldUnpause && referenceTime != (time.Time{}) && cr.Status.Hibernation != nil { today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) todaySchedule := cronSchedule.Next(today.Add(-time.Second)) isToday := todaySchedule.Year() == now.Year() && @@ -424,8 +512,19 @@ func (r *PerconaServerMySQLHibernationReconciler) shouldUnpauseCluster(ctx conte timeDiff := referenceTime.Sub(todaySchedule) referenceIsTodaySchedule := timeDiff >= -1*time.Minute && timeDiff <= 1*time.Minute - if isToday && !referenceIsTodaySchedule && (now.After(todaySchedule) || now.Equal(todaySchedule)) { - shouldUnpause = true + if isToday && !referenceIsTodaySchedule { + if now.Equal(todaySchedule) { + shouldUnpause = true + } else if now.After(todaySchedule) { + // For unpause, use a longer window (1 hour) since we want to get the cluster running + // This is more lenient than pause operations + timeSinceSchedule := now.Sub(todaySchedule) + if timeSinceSchedule <= 1*time.Hour { + shouldUnpause = true + } else { + shouldUnpause = false + } + } } } @@ -451,7 +550,7 @@ func (r *PerconaServerMySQLHibernationReconciler) canPauseCluster(ctx context.Co for _, backup := range backupList.Items { if backup.Spec.ClusterName == cr.Name { switch backup.Status.State { - case apiv1.BackupStarting, apiv1.BackupRunning: + case apiv1.BackupStarting, apiv1.BackupRunning, apiv1.BackupNew: return false, fmt.Sprintf("active backup: %s (state: %s)", backup.Name, backup.Status.State), nil } } @@ -467,7 +566,7 @@ func (r *PerconaServerMySQLHibernationReconciler) canPauseCluster(ctx context.Co for _, restore := range restoreList.Items { if restore.Spec.ClusterName == cr.Name { switch restore.Status.State { - case apiv1.RestoreStarting, apiv1.RestoreRunning: + case apiv1.RestoreStarting, apiv1.RestoreRunning, apiv1.RestoreNew: return false, fmt.Sprintf("active restore: %s (state: %s)", restore.Name, restore.Status.State), nil } } @@ -521,7 +620,7 @@ func (r *PerconaServerMySQLHibernationReconciler) pauseCluster(ctx context.Conte return err } - log.Info("✅ Hibernation status updated after pause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastPauseTime", fresh.Status.Hibernation.LastPauseTime) + log.Info("Hibernation status updated after pause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastPauseTime", fresh.Status.Hibernation.LastPauseTime) return nil }) } @@ -571,7 +670,7 @@ func (r *PerconaServerMySQLHibernationReconciler) unpauseCluster(ctx context.Con return err } - log.Info("✅ Hibernation status updated after unpause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastUnpauseTime", fresh.Status.Hibernation.LastUnpauseTime) + log.Info("Hibernation status updated after unpause", "cluster", cr.Name, "namespace", cr.Namespace, "state", fresh.Status.Hibernation.State, "lastUnpauseTime", fresh.Status.Hibernation.LastUnpauseTime) return nil }) } @@ -676,11 +775,6 @@ func (r *PerconaServerMySQLHibernationReconciler) initializeHibernationStatus(ct }) } -// updateHibernationStatus updates the hibernation status with a reason (deprecated, use updateHibernationState) -func (r *PerconaServerMySQLHibernationReconciler) updateHibernationStatus(ctx context.Context, cr *apiv1.PerconaServerMySQL, reason string) error { - return r.updateHibernationState(ctx, cr, "", reason) -} - // updateHibernationScheduleIfChanged checks if the hibernation schedule has changed and updates next times if needed func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfChanged(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { log := logf.FromContext(ctx).WithName("updateHibernationScheduleIfChanged") @@ -697,7 +791,7 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha if cr.Spec.Hibernation.Schedule.Pause != "" { if cr.Status.Hibernation.NextPauseTime == nil { needsUpdate = true - log.Info("📅 Initializing missing next pause time", "cluster", cr.Name, "namespace", cr.Namespace) + log.Info("Initializing missing next pause time", "cluster", cr.Name, "namespace", cr.Namespace) } else { // Check if the schedule string has changed by comparing with current calculated time if cronSchedule, err := cron.ParseStandard(cr.Spec.Hibernation.Schedule.Pause); err == nil { @@ -709,8 +803,17 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha timeDiff := expectedNextPauseTime.Sub(currentNextPauseTime.Time) if timeDiff > time.Hour || timeDiff < -time.Hour { needsUpdate = true - log.Info("📅 Pause schedule changed, updating next pause time", "cluster", cr.Name, "namespace", cr.Namespace, + log.Info("Pause schedule changed, updating next pause time", "cluster", cr.Name, "namespace", cr.Namespace, "oldTime", currentNextPauseTime, "newTime", expectedNextPauseTime) + } else { + // Check if the new schedule time is very close in the future (within 5 minutes) + // This handles the case where user changes schedule to a time very close to now + now := time.Now() + if expectedNextPauseTime.Time.After(now) && expectedNextPauseTime.Time.Sub(now) <= 5*time.Minute { + needsUpdate = true + log.Info("Schedule changed to very near future time, updating to pause soon", "cluster", cr.Name, "namespace", cr.Namespace, + "oldTime", currentNextPauseTime, "newTime", expectedNextPauseTime, "timeUntilPause", expectedNextPauseTime.Time.Sub(now)) + } } } } @@ -720,7 +823,7 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha if cr.Spec.Hibernation.Schedule.Unpause != "" { if cr.Status.Hibernation.NextUnpauseTime == nil { needsUpdate = true - log.Info("📅 Initializing missing next unpause time", "cluster", cr.Name, "namespace", cr.Namespace) + log.Info("Initializing missing next unpause time", "cluster", cr.Name, "namespace", cr.Namespace) } else { // Check if the schedule string has changed by comparing with current calculated time if cronSchedule, err := cron.ParseStandard(cr.Spec.Hibernation.Schedule.Unpause); err == nil { @@ -732,8 +835,17 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha timeDiff := expectedNextUnpauseTime.Sub(currentNextUnpauseTime.Time) if timeDiff > time.Hour || timeDiff < -time.Hour { needsUpdate = true - log.Info("📅 Unpause schedule changed, updating next unpause time", "cluster", cr.Name, "namespace", cr.Namespace, + log.Info("Unpause schedule changed, updating next unpause time", "cluster", cr.Name, "namespace", cr.Namespace, "oldTime", currentNextUnpauseTime, "newTime", expectedNextUnpauseTime) + } else { + // Check if the new schedule time is very close in the future (within 5 minutes) + // This handles the case where user changes schedule to a time very close to now + now := time.Now() + if expectedNextUnpauseTime.Time.After(now) && expectedNextUnpauseTime.Time.Sub(now) <= 5*time.Minute { + needsUpdate = true + log.Info("Unpause schedule changed to very near future time, updating to unpause soon", "cluster", cr.Name, "namespace", cr.Namespace, + "oldTime", currentNextUnpauseTime, "newTime", expectedNextUnpauseTime, "timeUntilUnpause", expectedNextUnpauseTime.Time.Sub(now)) + } } } } @@ -792,7 +904,7 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationNextTimes(ctx return err } - log.Info("✅ Hibernation next times updated", "cluster", cr.Name, "namespace", cr.Namespace, + log.Info("Hibernation next times updated", "cluster", cr.Name, "namespace", cr.Namespace, "nextPauseTime", fresh.Status.Hibernation.NextPauseTime, "nextUnpauseTime", fresh.Status.Hibernation.NextUnpauseTime) @@ -800,24 +912,127 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationNextTimes(ctx }) } +// checkAndScheduleForNextWindow checks if the scheduled time has passed and schedules for next window if needed +func (r *PerconaServerMySQLHibernationReconciler) checkAndScheduleForNextWindow(ctx context.Context, cr *apiv1.PerconaServerMySQL, schedule string, now time.Time) error { + log := logf.FromContext(ctx).WithName("checkAndScheduleForNextWindow") + + // Parse cron schedule + cronSchedule, err := cron.ParseStandard(schedule) + if err != nil { + return errors.Wrap(err, "invalid schedule") + } + + // Check if this is a first-time evaluation (no previous pause/unpause times) + if cr.Status.Hibernation == nil || (cr.Status.Hibernation.LastPauseTime == nil && cr.Status.Hibernation.LastUnpauseTime == nil) { + // Calculate today's scheduled time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) + + // Check if the schedule applies to today + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + if isToday { + // Check if the scheduled time has passed beyond the 5-minute window + if now.After(todaySchedule) { + timeSinceSchedule := now.Sub(todaySchedule) + if timeSinceSchedule > 5*time.Minute { + // The scheduled time has passed beyond the reasonable window + // Schedule for the next window + reason := fmt.Sprintf("Scheduled time passed beyond 5-minute window (passed %v ago)", timeSinceSchedule) + log.Info("Scheduled time has passed, scheduling for next window", "cluster", cr.Name, "namespace", cr.Namespace, + "schedule", schedule, "todaySchedule", todaySchedule, "timeSince", timeSinceSchedule) + + return r.scheduleHibernationForNextWindow(ctx, cr, schedule, reason) + } + } + } + } + + return nil +} + // calculateNextScheduleTime calculates the next schedule time, considering if today's time is still available func (r *PerconaServerMySQLHibernationReconciler) calculateNextScheduleTime(now time.Time, cronSchedule cron.Schedule) metav1.Time { - // Get today's start time - today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) + // Use UTC for all calculations to ensure consistency + utcNow := now.UTC() + + // Get today's start time in UTC + today := time.Date(utcNow.Year(), utcNow.Month(), utcNow.Day(), 0, 0, 0, 0, time.UTC) // Calculate today's scheduled time todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // If today's scheduled time is still in the future, use it - if todaySchedule.After(now) { + if todaySchedule.After(utcNow) { return metav1.NewTime(todaySchedule) } - // Otherwise, use the next occurrence (tomorrow or later) - nextSchedule := cronSchedule.Next(now) + // If today's scheduled time has passed, return the next occurrence (next window) + nextSchedule := cronSchedule.Next(utcNow) return metav1.NewTime(nextSchedule) } +// proactivelyScheduleForNextWindow proactively schedules hibernation for the next window when cluster is in unready state +// This prevents immediate pausing when the cluster becomes ready +func (r *PerconaServerMySQLHibernationReconciler) proactivelyScheduleForNextWindow(ctx context.Context, cr *apiv1.PerconaServerMySQL) error { + log := logf.FromContext(ctx).WithName("proactivelyScheduleForNextWindow") + + // Only check if hibernation is enabled and has a pause schedule + if !cr.IsHibernationEnabled() || cr.Spec.Hibernation == nil || cr.Spec.Hibernation.Schedule.Pause == "" { + return nil + } + + // Parse cron schedule + cronSchedule, err := cron.ParseStandard(cr.Spec.Hibernation.Schedule.Pause) + if err != nil { + log.Error(err, "Invalid pause schedule during proactive scheduling", "cluster", cr.Name, "namespace", cr.Namespace, "schedule", cr.Spec.Hibernation.Schedule.Pause) + return err + } + + now := time.Now().UTC() + + // Calculate today's scheduled time + today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC) + todaySchedule := cronSchedule.Next(today.Add(-time.Second)) + + // Check if the schedule applies to today + isToday := todaySchedule.Year() == now.Year() && + todaySchedule.Month() == now.Month() && + todaySchedule.Day() == now.Day() + + if !isToday { + return nil // Schedule doesn't apply to today + } + + // Check if we have a nextPauseTime set and if it's still today's time + if cr.Status.Hibernation != nil && cr.Status.Hibernation.NextPauseTime != nil { + currentNextPauseTime := cr.Status.Hibernation.NextPauseTime.Time + + // If the current nextPauseTime is still today's scheduled time, we need to update it to next window + if currentNextPauseTime.Year() == now.Year() && + currentNextPauseTime.Month() == now.Month() && + currentNextPauseTime.Day() == now.Day() && + currentNextPauseTime.Hour() == todaySchedule.Hour() && + currentNextPauseTime.Minute() == todaySchedule.Minute() { + + // Check if the scheduled time has passed + if now.After(todaySchedule) { + // The nextPauseTime is still set to today's schedule, but the time has passed + // Proactively update it to next window + reason := fmt.Sprintf("Scheduled time passed while cluster was unready, proactively scheduling for next window (state: %s)", cr.Status.State) + log.Info("Scheduled time passed while cluster was unready, proactively scheduling for next window", "cluster", cr.Name, "namespace", cr.Namespace, + "schedule", cr.Spec.Hibernation.Schedule.Pause, "todaySchedule", todaySchedule, "currentNextPauseTime", currentNextPauseTime, "clusterState", cr.Status.State, "currentTime", now) + + return r.scheduleHibernationForNextWindow(ctx, cr, cr.Spec.Hibernation.Schedule.Pause, reason) + } + } + } + + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *PerconaServerMySQLHibernationReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/pkg/controller/pshibernation/controller_test.go b/pkg/controller/pshibernation/controller_test.go index 9d941fa73..d36dfe50e 100644 --- a/pkg/controller/pshibernation/controller_test.go +++ b/pkg/controller/pshibernation/controller_test.go @@ -18,6 +18,7 @@ package pshibernation import ( "context" + "fmt" "testing" "time" @@ -103,8 +104,8 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T expectedError: false, }, { - name: "should pause - first time evaluation with current time after schedule", - description: "First-time evaluation when current time is after the pause schedule - should pause if time has arrived", + name: "should NOT pause - first time evaluation with current time after schedule", + description: "First-time evaluation when current time is after the pause schedule - should wait for next window", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", @@ -126,11 +127,11 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T }, schedule: "45 13 * * 1-5", now: time.Date(2025, 9, 18, 13, 47, 0, 0, time.UTC), // Thursday 1:47 PM - expectedResult: true, // Should pause when time has arrived + expectedResult: false, // Should NOT pause when time has passed (first-time evaluation) expectedError: false, }, { - name: "DEBUG: should pause - real scenario from logs (11:15 schedule, 11:18 time)", + name: "DEBUG: should NOT pause - real scenario from logs (11:15 schedule, 11:18 time)", description: "Real scenario: Schedule is 15 11 * * 1-5 (11:15 AM), current time is 11:18 AM, cluster was never paused", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ @@ -155,7 +156,7 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T }, schedule: "15 11 * * 1-5", now: time.Date(2025, 9, 19, 11, 18, 0, 0, time.UTC), // Friday 11:18 AM (3 minutes after schedule) - expectedResult: true, // Should pause - scheduled time has arrived + expectedResult: false, // Should NOT pause - first-time evaluation should wait for next window expectedError: false, }, { @@ -277,8 +278,8 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T expectedError: true, }, { - name: "real-world scenario - pause time passed, should pause", - description: "Real scenario: pause scheduled for 13:45, current time is 13:47, should trigger pause", + name: "real-world scenario - pause time passed, should NOT pause", + description: "Real scenario: pause scheduled for 13:45, current time is 13:47, should wait for next window", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "ps-cluster1", @@ -300,12 +301,39 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T }, schedule: "45 13 * * 1-5", now: time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC), // Thursday 1:47:28 PM (2+ minutes after pause time) - expectedResult: true, // Should pause when time has passed + expectedResult: false, // Should NOT pause when time has passed (first-time evaluation) expectedError: false, }, { - name: "user reported bug - pause at 09:40, enable hibernation at 10:08, should pause", - description: "Bug fix: When hibernation is enabled after scheduled time has passed, should pause if time has arrived", + name: "user reported bug - pause at 12:55, enable hibernation at 16:52, should NOT pause", + description: "Bug fix: When hibernation is enabled after scheduled time has passed (12:55 -> 16:52), should NOT pause immediately", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "55 12 * * 1-5", // 12:55 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStateActive, + }, + }, + }, + schedule: "55 12 * * 1-5", + now: time.Date(2025, 9, 19, 16, 52, 0, 0, time.UTC), // Friday 4:52 PM (4+ hours after schedule) + expectedResult: false, // Should NOT pause when time has passed + expectedError: false, + }, + { + name: "user reported bug - pause at 09:40, enable hibernation at 10:08, should NOT pause", + description: "Bug fix: When hibernation is enabled after scheduled time has passed, should wait for next window", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "ps-cluster1", @@ -327,7 +355,7 @@ func TestPerconaServerMySQLHibernationReconciler_shouldPauseCluster(t *testing.T }, schedule: "40 09 * * 1-5", now: time.Date(2025, 9, 19, 10, 8, 40, 0, time.UTC), // 10:08:40 AM (28 minutes after scheduled time) - expectedResult: true, // Should pause when time has passed + expectedResult: false, // Should NOT pause when time has passed (first-time evaluation) expectedError: false, }, { @@ -685,8 +713,8 @@ func TestPerconaServerMySQLHibernationReconciler_shouldUnpauseCluster(t *testing expectedError: false, }, { - name: "should unpause - real-world scenario with reference time", - description: "Real scenario: cluster paused earlier today, current time is after today's unpause schedule", + name: "should NOT unpause - real-world scenario with reference time (unpause time passed by more than 1 hour)", + description: "Real scenario: cluster paused earlier today, current time is 2+ hours after today's unpause schedule - should wait for next window", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "ps-cluster1", @@ -712,7 +740,38 @@ func TestPerconaServerMySQLHibernationReconciler_shouldUnpauseCluster(t *testing }, schedule: "10 14 * * 1-5", now: time.Date(2025, 9, 18, 16, 45, 0, 0, time.UTC), // Thursday 4:45 PM (2+ hours after unpause time) - expectedResult: true, + expectedResult: false, // Should NOT unpause when time has passed by more than 1 hour + expectedError: false, + }, + { + name: "should unpause - real-world scenario with reference time (unpause time passed by less than 1 hour)", + description: "Real scenario: cluster paused earlier today, current time is within 1 hour of today's unpause schedule - should unpause", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ps-cluster1", + Namespace: "ps", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Pause: true, + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Unpause: "10 14 * * 1-5", // 2:10 PM Mon-Fri + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + Hibernation: &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + LastPauseTime: &metav1.Time{ + Time: time.Date(2025, 9, 18, 14, 5, 0, 0, time.UTC), // Today 2:05 PM (when paused, before unpause schedule) + }, + }, + }, + }, + schedule: "10 14 * * 1-5", + now: time.Date(2025, 9, 18, 14, 30, 0, 0, time.UTC), // Thursday 2:30 PM (20 minutes after unpause time) + expectedResult: true, // Should unpause when time has passed by less than 1 hour expectedError: false, }, } @@ -830,7 +889,7 @@ func TestPerconaServerMySQLHibernationReconciler_canPauseCluster(t *testing.T) { expectedError: false, }, { - name: "cannot pause - active backup", + name: "cannot pause - active backup (Running)", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", @@ -860,7 +919,67 @@ func TestPerconaServerMySQLHibernationReconciler_canPauseCluster(t *testing.T) { expectedError: false, }, { - name: "cannot pause - active restore", + name: "cannot pause - active backup (Starting)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "starting-backup", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLBackupSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLBackupStatus{ + State: apiv1.BackupStarting, + }, + }, + }, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "active backup: starting-backup (state: Starting)", + expectedError: false, + }, + { + name: "cannot pause - active backup (New)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "new-backup", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLBackupSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLBackupStatus{ + State: apiv1.BackupNew, + }, + }, + }, + restores: []*apiv1.PerconaServerMySQLRestore{}, + expectedResult: false, + expectedReason: "active backup: new-backup (state: )", + expectedError: false, + }, + { + name: "cannot pause - active restore (Running)", cr: &apiv1.PerconaServerMySQL{ ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", @@ -889,6 +1008,66 @@ func TestPerconaServerMySQLHibernationReconciler_canPauseCluster(t *testing.T) { expectedReason: "active restore: active-restore (state: Running)", expectedError: false, }, + { + name: "cannot pause - active restore (Starting)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "starting-restore", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLRestoreSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLRestoreStatus{ + State: apiv1.RestoreStarting, + }, + }, + }, + expectedResult: false, + expectedReason: "active restore: starting-restore (state: Starting)", + expectedError: false, + }, + { + name: "cannot pause - active restore (New)", + cr: &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + }, + backups: []*apiv1.PerconaServerMySQLBackup{}, + restores: []*apiv1.PerconaServerMySQLRestore{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "new-restore", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLRestoreSpec{ + ClusterName: "test-cluster", + }, + Status: apiv1.PerconaServerMySQLRestoreStatus{ + State: apiv1.RestoreNew, + }, + }, + }, + expectedResult: false, + expectedReason: "active restore: new-restore (state: )", + expectedError: false, + }, { name: "can pause - completed backup", cr: &apiv1.PerconaServerMySQL{ @@ -1465,80 +1644,6 @@ func TestScheduleEvaluationLogic(t *testing.T) { } } -// TestRealWorldScenario tests the exact scenario we encountered in production -func TestRealWorldScenario(t *testing.T) { - t.Run("pause time passed but not triggered", func(t *testing.T) { - // This is the exact scenario we encountered: - // - Pause scheduled for 13:45 (1:45 PM) - // - Current time is 13:47:28 (1:47:28 PM) - 2+ minutes after pause time - // - Should trigger pause but didn't - - schedule := "45 13 * * 1-5" // 1:45 PM Mon-Fri - pauseTime := time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC) // Thursday 1:45 PM - currentTime := time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC) // Thursday 1:47:28 PM - - // Parse the cron schedule - cronSchedule, err := cron.ParseStandard(schedule) - require.NoError(t, err, "Should parse valid cron expression") - - // Test first-time evaluation logic (no previous pause/unpause times) - nextPauseTime := cronSchedule.Next(currentTime.Add(-time.Second)) // Check from 1 second ago - shouldPause := currentTime.After(nextPauseTime) || currentTime.Equal(nextPauseTime) - - // This reveals the bug: shouldPause is false because cron.Next() returns tomorrow's time - assert.False(t, shouldPause, "This reveals the bug: cron.Next() returns tomorrow's time, not today's") - assert.True(t, currentTime.After(pauseTime), "Current time should be after pause time") - assert.NotEqual(t, pauseTime, nextPauseTime, "Next pause time is tomorrow's time, not today's (this is the bug)") - }) - - t.Run("unpause time passed but not triggered", func(t *testing.T) { - // Similar scenario for unpause: - // - Unpause scheduled for 13:50 (1:50 PM) - // - Current time is 13:52 (1:52 PM) - 2 minutes after unpause time - // - Should trigger unpause but didn't - - schedule := "50 13 * * 1-5" // 1:50 PM Mon-Fri - unpauseTime := time.Date(2025, 9, 18, 13, 50, 0, 0, time.UTC) // Thursday 1:50 PM - currentTime := time.Date(2025, 9, 18, 13, 52, 0, 0, time.UTC) // Thursday 1:52 PM - - // Parse the cron schedule - cronSchedule, err := cron.ParseStandard(schedule) - require.NoError(t, err, "Should parse valid cron expression") - - // Test first-time evaluation logic (no previous pause/unpause times) - nextUnpauseTime := cronSchedule.Next(currentTime.Add(-time.Second)) // Check from 1 second ago - shouldUnpause := currentTime.After(nextUnpauseTime) || currentTime.Equal(nextUnpauseTime) - - // This reveals the bug: shouldUnpause is false because cron.Next() returns tomorrow's time - assert.False(t, shouldUnpause, "This reveals the bug: cron.Next() returns tomorrow's time, not today's") - assert.True(t, currentTime.After(unpauseTime), "Current time should be after unpause time") - assert.NotEqual(t, unpauseTime, nextUnpauseTime, "Next unpause time is tomorrow's time, not today's (this is the bug)") - }) - - t.Run("fixed logic - should work correctly", func(t *testing.T) { - // This test shows how the logic should work with the fix - schedule := "45 13 * * 1-5" // 1:45 PM Mon-Fri - pauseTime := time.Date(2025, 9, 18, 13, 45, 0, 0, time.UTC) // Thursday 1:45 PM - currentTime := time.Date(2025, 9, 18, 13, 47, 28, 0, time.UTC) // Thursday 1:47:28 PM - - // Parse the cron schedule - cronSchedule, err := cron.ParseStandard(schedule) - require.NoError(t, err, "Should parse valid cron expression") - - // Fixed logic: check if current time matches today's schedule - // We need to check if the current time is after the scheduled time for today - today := time.Date(currentTime.Year(), currentTime.Month(), currentTime.Day(), 0, 0, 0, 0, currentTime.Location()) - todaySchedule := cronSchedule.Next(today.Add(-time.Second)) // Get today's scheduled time - - // If today's schedule has passed, we should trigger - shouldPause := currentTime.After(todaySchedule) || currentTime.Equal(todaySchedule) - - // This should be true with the fixed logic - assert.True(t, shouldPause, "With fixed logic, should pause when current time is after today's scheduled time") - assert.Equal(t, pauseTime, todaySchedule, "Today's schedule should match the expected pause time") - }) -} - func TestScheduleChangeDetectionLogic(t *testing.T) { tests := []struct { name string @@ -1575,6 +1680,13 @@ func TestScheduleChangeDetectionLogic(t *testing.T) { shouldDetectChange: true, description: "Should detect change when day of week is different", }, + { + name: "schedule changed to very near future - should detect change", + currentSchedule: "27 18 * * 1-5", // 6:27 PM weekdays + currentNextTime: time.Date(2025, 9, 22, 18, 27, 0, 0, time.UTC), // Next Monday + shouldDetectChange: true, + description: "Should detect change when new schedule time is very close in the future (within 5 minutes)", + }, } for _, tt := range tests { @@ -1603,6 +1715,504 @@ func TestScheduleChangeDetectionLogic(t *testing.T) { } } +// TestComplexScheduleScenarios tests non-daily, hourly, and monthly schedules +func TestComplexScheduleScenarios(t *testing.T) { + tests := []struct { + name string + schedule string + now time.Time + expectedNext string // Expected next occurrence in format "2006-01-02 15:04" + description string + }{ + { + name: "weekday only schedule - Friday", + schedule: "0 9 * * 1,3,5", // 9 AM Mon, Wed, Fri + now: time.Date(2025, 9, 19, 8, 0, 0, 0, time.UTC), // Friday 8 AM + expectedNext: "2025-09-19 09:00", // Same day Friday 9 AM + description: "Weekday-only schedule should work on valid weekdays", + }, + { + name: "weekday only schedule - Wednesday", + schedule: "0 9 * * 1,3,5", // 9 AM Mon, Wed, Fri + now: time.Date(2025, 9, 22, 8, 0, 0, 0, time.UTC), // Monday 8 AM + expectedNext: "2025-09-22 09:00", // Same day 9 AM + description: "Weekday-only schedule should work on valid weekdays", + }, + { + name: "weekday only schedule - Saturday skip", + schedule: "0 9 * * 1,3,5", // 9 AM Mon, Wed, Fri + now: time.Date(2025, 9, 20, 8, 0, 0, 0, time.UTC), // Saturday 8 AM + expectedNext: "2025-09-22 09:00", // Next Monday 9 AM + description: "Weekday-only schedule should skip weekends", + }, + { + name: "hourly schedule", + schedule: "0 */2 * * *", // Every 2 hours + now: time.Date(2025, 9, 19, 9, 30, 0, 0, time.UTC), // 9:30 AM + expectedNext: "2025-09-19 10:00", // Next 2-hour mark + description: "Hourly schedule should calculate next occurrence correctly", + }, + { + name: "monthly schedule", + schedule: "0 9 1 * *", // 9 AM on 1st of every month + now: time.Date(2025, 9, 19, 10, 0, 0, 0, time.UTC), // Sep 19 + expectedNext: "2025-10-01 09:00", // Next month 1st + description: "Monthly schedule should calculate next month correctly", + }, + { + name: "end of month transition", + schedule: "0 9 1 * *", // 9 AM on 1st of every month + now: time.Date(2025, 1, 31, 10, 0, 0, 0, time.UTC), // Jan 31 + expectedNext: "2025-02-01 09:00", // Feb 1st + description: "Monthly schedule should handle month-end transitions", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cronSchedule, err := cron.ParseStandard(tt.schedule) + require.NoError(t, err, "Failed to parse schedule: %s", tt.schedule) + + // Calculate next occurrence + next := cronSchedule.Next(tt.now) + expectedTime, err := time.Parse("2006-01-02 15:04", tt.expectedNext) + require.NoError(t, err, "Failed to parse expected time") + + assert.Equal(t, expectedTime, next, tt.description) + }) + } +} + +// TestEdgeCaseScenarios tests time zone, DST, and other edge cases +func TestEdgeCaseScenarios(t *testing.T) { + tests := []struct { + name string + schedule string + now time.Time + description string + expectError bool + }{ + { + name: "leap year handling", + schedule: "0 9 29 2 *", // 9 AM on Feb 29 + now: time.Date(2024, 2, 29, 8, 0, 0, 0, time.UTC), // Leap year + description: "Leap year schedule should work correctly", + expectError: false, + }, + { + name: "invalid schedule", + schedule: "invalid cron", + now: time.Date(2025, 9, 19, 10, 0, 0, 0, time.UTC), + description: "Invalid schedule should return error", + expectError: true, + }, + { + name: "empty schedule", + schedule: "", + now: time.Date(2025, 9, 19, 10, 0, 0, 0, time.UTC), + description: "Empty schedule should return error", + expectError: true, + }, + { + name: "year boundary", + schedule: "0 9 * * *", // Daily at 9 AM + now: time.Date(2024, 12, 31, 23, 59, 0, 0, time.UTC), // Year end + description: "Schedule should handle year boundary transitions", + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := cron.ParseStandard(tt.schedule) + if tt.expectError { + assert.Error(t, err, tt.description) + } else { + assert.NoError(t, err, tt.description) + } + }) + } +} + +// TestEndToEndScenarios tests complete hibernation cycles +func TestEndToEndScenarios(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + description string + scenario func(t *testing.T, client client.Client) + }{ + { + name: "complete daily cycle", + description: "Test pause at 6 PM, unpause at 9 AM next day", + scenario: func(t *testing.T, client client.Client) { + // Create cluster with hibernation enabled + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 18 * * *", // 6 PM daily + Unpause: "0 9 * * *", // 9 AM daily + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + + // Create reconciler + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Test 1: Should pause at 6 PM + shouldPause, err := r.shouldPauseCluster(ctx, cr, "0 18 * * *", + time.Date(2025, 9, 19, 18, 0, 0, 0, time.UTC)) // 6 PM + require.NoError(t, err) + assert.True(t, shouldPause, "Should pause at scheduled time") + + // Test 2: Basic unpause logic validation + // Simulate cluster being paused + cr.Status.State = apiv1.StatePaused + cr.Status.Hibernation = &apiv1.HibernationStatus{ + State: apiv1.HibernationStatePaused, + LastPauseTime: &metav1.Time{Time: time.Date(2025, 9, 19, 18, 0, 0, 0, time.UTC)}, + LastUnpauseTime: &metav1.Time{Time: time.Date(2025, 9, 19, 9, 0, 0, 0, time.UTC)}, // Previous unpause + } + + // Test that unpause logic can be called without error + _, err = r.shouldUnpauseCluster(ctx, cr, "0 9 * * *", + time.Date(2025, 9, 20, 9, 0, 0, 0, time.UTC)) // Next day 9 AM + require.NoError(t, err, "ShouldUnpauseCluster should not return error") + + // Verify cluster state is correct for unpause + assert.Equal(t, apiv1.StatePaused, cr.Status.State, "Cluster should be in paused state") + assert.Equal(t, apiv1.HibernationStatePaused, cr.Status.Hibernation.State, "Hibernation should be in paused state") + }, + }, + { + name: "weekend skip scenario", + description: "Test weekday-only schedule skipping weekends", + scenario: func(t *testing.T, client client.Client) { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 18 * * 1-5", // 6 PM weekdays only + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Test: Should NOT pause on Saturday + shouldPause, err := r.shouldPauseCluster(ctx, cr, "0 18 * * 1-5", + time.Date(2025, 9, 20, 18, 0, 0, 0, time.UTC)) // Saturday 6 PM + require.NoError(t, err) + assert.False(t, shouldPause, "Should not pause on weekends") + + // Test: Should pause on Monday + shouldPause, err = r.shouldPauseCluster(ctx, cr, "0 18 * * 1-5", + time.Date(2025, 9, 22, 18, 0, 0, 0, time.UTC)) // Monday 6 PM + require.NoError(t, err) + assert.True(t, shouldPause, "Should pause on weekdays") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).Build() + tt.scenario(t, client) + }) + } +} + +// TestFailureRecoveryScenarios tests error handling and recovery +func TestFailureRecoveryScenarios(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + description string + scenario func(t *testing.T, client client.Client) + }{ + { + name: "cluster not found error", + description: "Test handling when cluster is deleted", + scenario: func(t *testing.T, client client.Client) { + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "non-existent-cluster", + Namespace: "default", + }, + } + + // Should not return error when cluster not found + result, err := r.Reconcile(ctx, req) + require.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result, "Should return empty result when cluster not found") + }, + }, + { + name: "invalid hibernation configuration", + description: "Test handling of invalid hibernation config", + scenario: func(t *testing.T, client client.Client) { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "invalid cron", // Invalid schedule + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + + require.NoError(t, client.Create(context.Background(), cr)) + + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Should return error for invalid schedule + _, err := r.shouldPauseCluster(ctx, cr, "invalid cron", time.Now()) + assert.Error(t, err, "Should return error for invalid cron schedule") + }, + }, + { + name: "cluster in error state", + description: "Test handling when cluster is in error state", + scenario: func(t *testing.T, client client.Client) { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 18 * * *", // 6 PM daily + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateError, // Error state + }, + } + + require.NoError(t, client.Create(context.Background(), cr)) + + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Should not be able to pause when cluster is in error state + canPause, reason, err := r.canPauseCluster(ctx, cr) + require.NoError(t, err) + assert.False(t, canPause, "Should not be able to pause cluster in error state") + assert.Contains(t, reason, "cluster not ready", "Should indicate cluster not ready") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).Build() + tt.scenario(t, client) + }) + } +} + +// TestPerformanceScenarios tests performance with multiple clusters +func TestPerformanceScenarios(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + t.Run("multiple clusters with hibernation", func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).Build() + + // Create multiple clusters + for i := 0; i < 10; i++ { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("test-cluster-%d", i), + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 18 * * *", // 6 PM daily + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + require.NoError(t, client.Create(context.Background(), cr)) + } + + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + now := time.Date(2025, 9, 19, 18, 0, 0, 0, time.UTC) // 6 PM + + // Test performance with multiple clusters + start := time.Now() + for i := 0; i < 10; i++ { + cr := &apiv1.PerconaServerMySQL{} + err := client.Get(ctx, types.NamespacedName{ + Name: fmt.Sprintf("test-cluster-%d", i), + Namespace: "default", + }, cr) + require.NoError(t, err) + + _, err = r.shouldPauseCluster(ctx, cr, "0 18 * * *", now) + require.NoError(t, err) + } + duration := time.Since(start) + + // Should complete quickly (less than 1 second for 10 clusters) + assert.Less(t, duration, time.Second, "Processing 10 clusters should be fast") + }) +} + +// TestUserExperienceScenarios tests error messages and status validation +func TestUserExperienceScenarios(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, apiv1.AddToScheme(scheme)) + + tests := []struct { + name string + description string + scenario func(t *testing.T, client client.Client) + }{ + { + name: "clear error messages", + description: "Test that error messages are clear and helpful", + scenario: func(t *testing.T, client client.Client) { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "invalid cron", + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + + r := &PerconaServerMySQLHibernationReconciler{ + Client: client, + Scheme: scheme, + } + + ctx := context.Background() + + // Test error message for invalid cron + _, err := r.shouldPauseCluster(ctx, cr, "invalid cron", time.Now()) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid cron", "Error message should mention invalid cron") + }, + }, + { + name: "status validation", + description: "Test that hibernation status is properly validated", + scenario: func(t *testing.T, client client.Client) { + cr := &apiv1.PerconaServerMySQL{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: apiv1.PerconaServerMySQLSpec{ + Hibernation: &apiv1.HibernationSpec{ + Enabled: true, + Schedule: apiv1.HibernationSchedule{ + Pause: "0 18 * * *", // 6 PM daily + }, + }, + }, + Status: apiv1.PerconaServerMySQLStatus{ + State: apiv1.StateReady, + }, + } + + // Test that hibernation status initialization would work + // (We can't easily test the full flow with fake client due to status updates) + assert.NotNil(t, cr.Spec.Hibernation, "Hibernation spec should be set") + assert.True(t, cr.Spec.Hibernation.Enabled, "Hibernation should be enabled") + assert.Equal(t, "0 18 * * *", cr.Spec.Hibernation.Schedule.Pause, "Pause schedule should be correct") + assert.Equal(t, apiv1.StateReady, cr.Status.State, "Cluster should be in ready state") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).Build() + tt.scenario(t, client) + }) + } +} + func TestPerconaServerMySQLHibernationReconciler_calculateNextScheduleTime(t *testing.T) { scheme := runtime.NewScheme() require.NoError(t, apiv1.AddToScheme(scheme)) From b4e399e32323b8c7923dbc1df794f7e57610e3c5 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Tue, 23 Sep 2025 20:34:53 +0300 Subject: [PATCH 3/6] fix go lint --- deploy/cr.yaml | 10 +++++----- pkg/controller/pshibernation/controller_test.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/cr.yaml b/deploy/cr.yaml index aaa6ebf10..f35c7749e 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -28,11 +28,11 @@ spec: upgradeOptions: versionServiceEndpoint: https://check.percona.com apply: disabled - hibernation: - enabled: true - schedule: - pause: "55 12 * * 1-5" # Pause Mon-Fri at 8 PM - unpause: "45 12 * * 1-5" # Unpause Mon-Fri at 8 AM +# hibernation: +# enabled: false +# schedule: +# pause: "55 12 * * 1-5" # Pause Mon-Fri at 8 PM +# unpause: "45 12 * * 1-5" # Unpause Mon-Fri at 8 AM # initContainer: # image: perconalab/percona-server-mysql-operator:main # containerSecurityContext: diff --git a/pkg/controller/pshibernation/controller_test.go b/pkg/controller/pshibernation/controller_test.go index d36dfe50e..2a48c8f3d 100644 --- a/pkg/controller/pshibernation/controller_test.go +++ b/pkg/controller/pshibernation/controller_test.go @@ -1527,7 +1527,7 @@ func BenchmarkHibernationScheduleParsing(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, _ = time.Parse("0 20 * * 1-5", schedule) + _, _ = cron.ParseStandard(schedule) } } From 12d917e25b19b4d364ccc47433b94bd467db3799 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Tue, 23 Sep 2025 20:55:56 +0300 Subject: [PATCH 4/6] fix tests --- pkg/controller/pshibernation/controller.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/controller/pshibernation/controller.go b/pkg/controller/pshibernation/controller.go index 665c033a6..b945edc52 100644 --- a/pkg/controller/pshibernation/controller.go +++ b/pkg/controller/pshibernation/controller.go @@ -54,7 +54,7 @@ func (r *PerconaServerMySQLHibernationReconciler) Reconcile(ctx context.Context, // Fetch the PerconaServerMySQL instance cr := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, req.NamespacedName, cr); err != nil { + if err := r.Get(ctx, req.NamespacedName, cr); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } @@ -205,7 +205,7 @@ func (r *PerconaServerMySQLHibernationReconciler) scheduleHibernationForNextWind return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy for next window scheduling", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -262,7 +262,7 @@ func (r *PerconaServerMySQLHibernationReconciler) synchronizeHibernationState(ct // Get fresh copy of the cluster to check current state fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { return err } @@ -809,10 +809,10 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha // Check if the new schedule time is very close in the future (within 5 minutes) // This handles the case where user changes schedule to a time very close to now now := time.Now() - if expectedNextPauseTime.Time.After(now) && expectedNextPauseTime.Time.Sub(now) <= 5*time.Minute { + if expectedNextPauseTime.After(now) && expectedNextPauseTime.Sub(now) <= 5*time.Minute { needsUpdate = true log.Info("Schedule changed to very near future time, updating to pause soon", "cluster", cr.Name, "namespace", cr.Namespace, - "oldTime", currentNextPauseTime, "newTime", expectedNextPauseTime, "timeUntilPause", expectedNextPauseTime.Time.Sub(now)) + "oldTime", currentNextPauseTime, "newTime", expectedNextPauseTime, "timeUntilPause", expectedNextPauseTime.Sub(now)) } } } @@ -841,10 +841,10 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationScheduleIfCha // Check if the new schedule time is very close in the future (within 5 minutes) // This handles the case where user changes schedule to a time very close to now now := time.Now() - if expectedNextUnpauseTime.Time.After(now) && expectedNextUnpauseTime.Time.Sub(now) <= 5*time.Minute { + if expectedNextUnpauseTime.After(now) && expectedNextUnpauseTime.Sub(now) <= 5*time.Minute { needsUpdate = true log.Info("Unpause schedule changed to very near future time, updating to unpause soon", "cluster", cr.Name, "namespace", cr.Namespace, - "oldTime", currentNextUnpauseTime, "newTime", expectedNextUnpauseTime, "timeUntilUnpause", expectedNextUnpauseTime.Time.Sub(now)) + "oldTime", currentNextUnpauseTime, "newTime", expectedNextUnpauseTime, "timeUntilUnpause", expectedNextUnpauseTime.Sub(now)) } } } From 02585255a1504da146b016023f46d9bec4cd1326 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Tue, 23 Sep 2025 21:02:56 +0300 Subject: [PATCH 5/6] fix lint --- pkg/controller/pshibernation/controller.go | 14 +++++++------- pkg/controller/pshibernation/controller_test.go | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/controller/pshibernation/controller.go b/pkg/controller/pshibernation/controller.go index b945edc52..982dda326 100644 --- a/pkg/controller/pshibernation/controller.go +++ b/pkg/controller/pshibernation/controller.go @@ -582,7 +582,7 @@ func (r *PerconaServerMySQLHibernationReconciler) pauseCluster(ctx context.Conte return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -591,7 +591,7 @@ func (r *PerconaServerMySQLHibernationReconciler) pauseCluster(ctx context.Conte fresh.Spec.Pause = true // Update the cluster - if err := r.Client.Update(ctx, fresh); err != nil { + if err := r.Update(ctx, fresh); err != nil { log.Error(err, "Failed to update cluster spec", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -632,7 +632,7 @@ func (r *PerconaServerMySQLHibernationReconciler) unpauseCluster(ctx context.Con return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -641,7 +641,7 @@ func (r *PerconaServerMySQLHibernationReconciler) unpauseCluster(ctx context.Con fresh.Spec.Pause = false // Update the cluster - if err := r.Client.Update(ctx, fresh); err != nil { + if err := r.Update(ctx, fresh); err != nil { log.Error(err, "Failed to update cluster spec", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -682,7 +682,7 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationState(ctx con return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy for status update", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -725,7 +725,7 @@ func (r *PerconaServerMySQLHibernationReconciler) initializeHibernationStatus(ct return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy for status initialization", "cluster", cr.Name, "namespace", cr.Namespace) return err } @@ -866,7 +866,7 @@ func (r *PerconaServerMySQLHibernationReconciler) updateHibernationNextTimes(ctx return k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error { // Get fresh copy of the cluster fresh := &apiv1.PerconaServerMySQL{} - if err := r.Client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, fresh); err != nil { log.Error(err, "Failed to get fresh cluster copy for schedule update", "cluster", cr.Name, "namespace", cr.Namespace) return err } diff --git a/pkg/controller/pshibernation/controller_test.go b/pkg/controller/pshibernation/controller_test.go index 2a48c8f3d..9f6688b6b 100644 --- a/pkg/controller/pshibernation/controller_test.go +++ b/pkg/controller/pshibernation/controller_test.go @@ -798,7 +798,7 @@ func TestPerconaServerMySQLHibernationReconciler_shouldUnpauseCluster(t *testing t.Logf(" Schedule: %s", tt.schedule) t.Logf(" Current time: %s", tt.now.Format(time.RFC3339)) if tt.cr.Status.Hibernation != nil && tt.cr.Status.Hibernation.LastPauseTime != nil { - t.Logf(" LastPauseTime: %s", tt.cr.Status.Hibernation.LastPauseTime.Time.Format(time.RFC3339)) + t.Logf(" LastPauseTime: %s", tt.cr.Status.Hibernation.LastPauseTime.Format(time.RFC3339)) } } assert.Equal(t, tt.expectedResult, result) From 78dcbca642cedb74118a1e5bd99294b9ded2e301 Mon Sep 17 00:00:00 2001 From: Viacheslav Sarzhan Date: Wed, 24 Sep 2025 15:13:16 +0300 Subject: [PATCH 6/6] fix cr --- deploy/cr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cr.yaml b/deploy/cr.yaml index e8357928b..98d4a6c7b 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -5,7 +5,7 @@ metadata: finalizers: - percona.com/delete-mysql-pods-in-order # - percona.com/delete-ssl - - percona.com/delete-mysql-pvc + # - percona.com/delete-mysql-pvc spec: # metadata: # annotations: