diff --git a/controllers/patroni_core_controller.go b/controllers/patroni_core_controller.go index b77d970d..3643d3c5 100644 --- a/controllers/patroni_core_controller.go +++ b/controllers/patroni_core_controller.go @@ -156,7 +156,8 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req newResVersion := cr.ResourceVersion newCrHash := util.HashJson(cr.Spec) if (pr.resVersions[cr.Name] == newResVersion || - pr.crHash == newCrHash) && len(cr.Status.Conditions) != 0 && cr.Status.Conditions[0].Type != Failed { + pr.crHash == newCrHash) && (len(cr.Status.Conditions) != 0 && + (cr.Status.Conditions[0].Type != Failed || (cr.Status.Conditions[0].Type == Failed && pr.errorCounter == 0))) { areCredsChanged, err := manager.AreCredsChanged(credentials.PostgresSecretNames) if err != nil { return reconcile.Result{}, err @@ -203,6 +204,7 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req return pr.handleReconcileError(maxReconcileAttempts, "CanNotActualizeCredsOnCluster", newCrHash, + "Error during actualization of creds on cluster", err) } @@ -212,12 +214,13 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req switch err.(type) { case *deployerrors.TestsError: { - return pr.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) + return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePostgresServiceClusterFailed", "Error during tests run", newCrHash, err) } case error: { return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePostgresServiceClusterFailed", + "Error during reconcile cycle", newCrHash, err) } @@ -253,12 +256,13 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req switch err.(type) { case *deployerrors.TestsError: { - return pr.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) + return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePatroniCoreClusterFailed", "Error during tests run", newCrHash, err) } case error: { return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePatroniCoreClusterFailed", + "Error during reconcile cycle", newCrHash, err) } @@ -368,23 +372,6 @@ func (pr *PatroniCoreReconciler) stanzaUpgrade(create bool) error { return nil } -func (pr *PatroniCoreReconciler) handleTestReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) { - pr.errorCounter++ - if pr.errorCounter < maxReconcileAttempts { - pr.logger.Error(errMsg, zap.Error(err)) - pr.logger.Error(fmt.Sprintf("Error counter for tests run: %d, let's try to run the reconcile again", pr.errorCounter)) - pr.reason = "PatroniCoreTestsFailed" - pr.message = "PatroniCore service reconcile cycle failed" - if err := pr.updateStatus(Failed, "PatroniCoreTestsFailed", err.Error()); err != nil { - pr.logger.Error("Cannot update CR status", zap.Error(err)) - return reconcile.Result{RequeueAfter: time.Minute}, err - } - return reconcile.Result{}, err - } - pr.logger.Error("Reconciliation cycle failed due to test pod ended with error") - return pr.stopReconcile(newCrHash, err) -} - func (pr *PatroniCoreReconciler) reconcilePatroniCoreCluster(cr *qubershipv1.PatroniCore) error { consulRegistrationRequired := true // reconcile Patroni @@ -527,25 +514,33 @@ func (pr *PatroniCoreReconciler) createTestsPods(cr *qubershipv1.PatroniCore) er return nil } -func (pr *PatroniCoreReconciler) stopReconcile(newCrHash string, err error) (ctrl.Result, error) { +func (pr *PatroniCoreReconciler) stopReconcile(newCrHash string, reason string, err error) (ctrl.Result, error) { pr.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", pr.errorCounter)) pr.crHash = newCrHash pr.errorCounter = 0 - return reconcile.Result{RequeueAfter: time.Minute}, err + return pr.failReconcile(reason, err, false) } -func (pr *PatroniCoreReconciler) handleReconcileError(maxAttempts int, reason, newCrHash string, err error) (ctrl.Result, error) { +func (pr *PatroniCoreReconciler) handleReconcileError(maxAttempts int, reason, errMsg, newCrHash string, err error) (ctrl.Result, error) { pr.errorCounter++ if pr.errorCounter < maxAttempts { + pr.logger.Error(errMsg, zap.Error(err)) pr.logger.Error(fmt.Sprintf("Error counter: %d, let's try to run the reconcile again", pr.errorCounter)) - pr.reason = reason - pr.message = "PatroniCore service reconcile cycle failed" - if err := pr.updateStatus(Failed, reason, - fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil { - pr.logger.Error("Cannot update CR status", zap.Error(err)) - return reconcile.Result{RequeueAfter: time.Minute}, err - } - return reconcile.Result{RequeueAfter: time.Minute}, err + return pr.failReconcile(reason, err, true) + } + return pr.stopReconcile(newCrHash, "No reconcile attempts left", err) +} + +func (pr *PatroniCoreReconciler) failReconcile(reason string, err error, requeue bool) (ctrl.Result, error) { + pr.reason = reason + pr.message = "PatroniCore service reconcile cycle failed" + if err := pr.updateStatus(Failed, reason, + fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil { + pr.logger.Error("Cannot update CR status", zap.Error(err)) + } + requireAfter := time.Duration(0) + if requeue { + requireAfter = time.Minute } - return pr.stopReconcile(newCrHash, err) + return reconcile.Result{RequeueAfter: requireAfter, Requeue: requeue}, err } diff --git a/controllers/postgresservice_controller.go b/controllers/postgresservice_controller.go index 5c4bf5b1..e578f0f0 100644 --- a/controllers/postgresservice_controller.go +++ b/controllers/postgresservice_controller.go @@ -152,7 +152,8 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl. newResVersion := cr.ResourceVersion newCrHash := util.HashJson(cr.Spec) if (r.resVersions[cr.Name] == newResVersion || - r.crHash == newCrHash) && len(cr.Status.Conditions) != 0 && cr.Status.Conditions[0].Type != Failed { + r.crHash == newCrHash) && (len(cr.Status.Conditions) != 0 && + (cr.Status.Conditions[0].Type != Failed || (cr.Status.Conditions[0].Type == Failed && r.errorCounter == 0))) { InfoMsg := "ResourceVersion didn't change, skipping reconcile loop" if cr.Spec.ExternalDataBase != nil { r.logger.Info(InfoMsg) @@ -207,7 +208,7 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl. switch err.(type) { case *deployerrors.TestsError: { - return r.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) + return r.handleReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) } default: { @@ -249,28 +250,11 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl. switch err.(type) { case *deployerrors.TestsError: { - return r.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) + return r.handleReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash) } case error: { - r.errorCounter++ - - if r.errorCounter < maxReconcileAttempts { - r.logger.Error(fmt.Sprintf("Error counter: %d, let's try to run the reconcile again", r.errorCounter)) - r.reason = "ReconcilePatroniServicesClusterFailed" - r.message = "Postgres-operator service reconcile cycle failed" - if err := r.updateStatus(Failed, "ReconcilePatroniServicesClusterFailed", - fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil { - r.logger.Error("Cannot update CR status", zap.Error(err)) - return reconcile.Result{RequeueAfter: time.Minute}, err - } - return reconcile.Result{RequeueAfter: time.Minute}, err - } - - r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter)) - r.crHash = newCrHash - r.errorCounter = 0 - return reconcile.Result{RequeueAfter: time.Minute}, err + return r.handleReconcileError(err, "Error during reconcile cycle", maxReconcileAttempts, newCrHash) } default: @@ -332,25 +316,15 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl. return reconcile.Result{}, nil } -func (r *PostgresServiceReconciler) handleTestReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) { +func (r *PostgresServiceReconciler) handleReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) { r.errorCounter++ if r.errorCounter < maxReconcileAttempts { r.logger.Error(errMsg, zap.Error(err)) - r.logger.Error(fmt.Sprintf("Error counter for tests run: %d, let's try to run the reconcile again", r.errorCounter)) - r.reason = "PostgresClusterTestsFailed" - r.message = "Postgres-operator service reconcile cycle failed" - if err := r.updateStatus(Failed, "PostgresClusterTestsFailed", err.Error()); err != nil { - r.logger.Error("Cannot update CR status", zap.Error(err)) - return reconcile.Result{RequeueAfter: time.Minute}, err - } - return reconcile.Result{}, err + r.logger.Error(fmt.Sprintf("Error counter for reconcile run: %d, let's try to run the reconcile again", r.errorCounter)) + return r.failReconcile("PostgresClusterTestsFailed", err, true) } - r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter)) - r.logger.Error("Reconciliation cycle failed due to test pod ended with error") - r.crHash = newCrHash - r.errorCounter = 0 - return reconcile.Result{RequeueAfter: time.Minute}, err + return r.stopReconcile(newCrHash, "No reconcile attempts left", err) } func (r *PostgresServiceReconciler) reconcilePostgresServiceCluster(cr *qubershipv1.PatroniServices) error { @@ -719,3 +693,26 @@ func (r *PostgresServiceReconciler) processExternalResources(cr *qubershipv1.Pat return nil } + +func (r *PostgresServiceReconciler) stopReconcile(newCrHash string, reason string, err error) (ctrl.Result, error) { + r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter)) + r.crHash = newCrHash + r.errorCounter = 0 + return r.failReconcile(reason, err, false) +} + +func (r *PostgresServiceReconciler) failReconcile(reason string, err error, requeue bool) (ctrl.Result, error) { + r.reason = reason + r.message = "Postgres-operator service reconcile cycle failed" + if err := r.updateStatus(Failed, reason, + fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil { + r.logger.Error("Cannot update CR status", zap.Error(err)) + return reconcile.Result{RequeueAfter: time.Minute}, err + } + requireAfter := time.Duration(0) + if requeue { + requireAfter = time.Minute + } + + return reconcile.Result{RequeueAfter: requireAfter, Requeue: requeue}, err +}