Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 27 additions & 32 deletions controllers/patroni_core_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req
newResVersion := cr.ResourceVersion
newCrHash := util.HashJson(cr.Spec)
if (pr.resVersions[cr.Name] == newResVersion ||
pr.crHash == newCrHash) && len(cr.Status.Conditions) != 0 && cr.Status.Conditions[0].Type != Failed {
pr.crHash == newCrHash) && (len(cr.Status.Conditions) != 0 &&
(cr.Status.Conditions[0].Type != Failed || (cr.Status.Conditions[0].Type == Failed && pr.errorCounter == 0))) {
areCredsChanged, err := manager.AreCredsChanged(credentials.PostgresSecretNames)
if err != nil {
return reconcile.Result{}, err
Expand Down Expand Up @@ -203,6 +204,7 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req
return pr.handleReconcileError(maxReconcileAttempts,
"CanNotActualizeCredsOnCluster",
newCrHash,
"Error during actualization of creds on cluster",
err)
}

Expand All @@ -212,12 +214,13 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req
switch err.(type) {
case *deployerrors.TestsError:
{
return pr.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePostgresServiceClusterFailed", "Error during tests run", newCrHash, err)
}
case error:
{
return pr.handleReconcileError(maxReconcileAttempts,
"ReconcilePostgresServiceClusterFailed",
"Error during reconcile cycle",
newCrHash,
err)
}
Expand Down Expand Up @@ -253,12 +256,13 @@ func (pr *PatroniCoreReconciler) Reconcile(ctx context.Context, request ctrl.Req
switch err.(type) {
case *deployerrors.TestsError:
{
return pr.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
return pr.handleReconcileError(maxReconcileAttempts, "ReconcilePatroniCoreClusterFailed", "Error during tests run", newCrHash, err)
}
case error:
{
return pr.handleReconcileError(maxReconcileAttempts,
"ReconcilePatroniCoreClusterFailed",
"Error during reconcile cycle",
newCrHash,
err)
}
Expand Down Expand Up @@ -368,23 +372,6 @@ func (pr *PatroniCoreReconciler) stanzaUpgrade(create bool) error {
return nil
}

func (pr *PatroniCoreReconciler) handleTestReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) {
pr.errorCounter++
if pr.errorCounter < maxReconcileAttempts {
pr.logger.Error(errMsg, zap.Error(err))
pr.logger.Error(fmt.Sprintf("Error counter for tests run: %d, let's try to run the reconcile again", pr.errorCounter))
pr.reason = "PatroniCoreTestsFailed"
pr.message = "PatroniCore service reconcile cycle failed"
if err := pr.updateStatus(Failed, "PatroniCoreTestsFailed", err.Error()); err != nil {
pr.logger.Error("Cannot update CR status", zap.Error(err))
return reconcile.Result{RequeueAfter: time.Minute}, err
}
return reconcile.Result{}, err
}
pr.logger.Error("Reconciliation cycle failed due to test pod ended with error")
return pr.stopReconcile(newCrHash, err)
}

func (pr *PatroniCoreReconciler) reconcilePatroniCoreCluster(cr *qubershipv1.PatroniCore) error {
consulRegistrationRequired := true
// reconcile Patroni
Expand Down Expand Up @@ -527,25 +514,33 @@ func (pr *PatroniCoreReconciler) createTestsPods(cr *qubershipv1.PatroniCore) er
return nil
}

func (pr *PatroniCoreReconciler) stopReconcile(newCrHash string, err error) (ctrl.Result, error) {
func (pr *PatroniCoreReconciler) stopReconcile(newCrHash string, reason string, err error) (ctrl.Result, error) {
pr.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", pr.errorCounter))
pr.crHash = newCrHash
pr.errorCounter = 0
return reconcile.Result{RequeueAfter: time.Minute}, err
return pr.failReconcile(reason, err, false)
}

func (pr *PatroniCoreReconciler) handleReconcileError(maxAttempts int, reason, newCrHash string, err error) (ctrl.Result, error) {
func (pr *PatroniCoreReconciler) handleReconcileError(maxAttempts int, reason, errMsg, newCrHash string, err error) (ctrl.Result, error) {
pr.errorCounter++
if pr.errorCounter < maxAttempts {
pr.logger.Error(errMsg, zap.Error(err))
pr.logger.Error(fmt.Sprintf("Error counter: %d, let's try to run the reconcile again", pr.errorCounter))
pr.reason = reason
pr.message = "PatroniCore service reconcile cycle failed"
if err := pr.updateStatus(Failed, reason,
fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil {
pr.logger.Error("Cannot update CR status", zap.Error(err))
return reconcile.Result{RequeueAfter: time.Minute}, err
}
return reconcile.Result{RequeueAfter: time.Minute}, err
return pr.failReconcile(reason, err, true)
}
return pr.stopReconcile(newCrHash, "No reconcile attempts left", err)
}

func (pr *PatroniCoreReconciler) failReconcile(reason string, err error, requeue bool) (ctrl.Result, error) {
pr.reason = reason
pr.message = "PatroniCore service reconcile cycle failed"
if err := pr.updateStatus(Failed, reason,
fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil {
pr.logger.Error("Cannot update CR status", zap.Error(err))
}
requireAfter := time.Duration(0)
if requeue {
requireAfter = time.Minute
}
return pr.stopReconcile(newCrHash, err)
return reconcile.Result{RequeueAfter: requireAfter, Requeue: requeue}, err
}
67 changes: 32 additions & 35 deletions controllers/postgresservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl.
newResVersion := cr.ResourceVersion
newCrHash := util.HashJson(cr.Spec)
if (r.resVersions[cr.Name] == newResVersion ||
r.crHash == newCrHash) && len(cr.Status.Conditions) != 0 && cr.Status.Conditions[0].Type != Failed {
r.crHash == newCrHash) && (len(cr.Status.Conditions) != 0 &&
(cr.Status.Conditions[0].Type != Failed || (cr.Status.Conditions[0].Type == Failed && r.errorCounter == 0))) {
InfoMsg := "ResourceVersion didn't change, skipping reconcile loop"
if cr.Spec.ExternalDataBase != nil {
r.logger.Info(InfoMsg)
Expand Down Expand Up @@ -207,7 +208,7 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl.
switch err.(type) {
case *deployerrors.TestsError:
{
return r.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
return r.handleReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
}
default:
{
Expand Down Expand Up @@ -249,28 +250,11 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl.
switch err.(type) {
case *deployerrors.TestsError:
{
return r.handleTestReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
return r.handleReconcileError(err, "Error during tests run", maxReconcileAttempts, newCrHash)
}
case error:
{
r.errorCounter++

if r.errorCounter < maxReconcileAttempts {
r.logger.Error(fmt.Sprintf("Error counter: %d, let's try to run the reconcile again", r.errorCounter))
r.reason = "ReconcilePatroniServicesClusterFailed"
r.message = "Postgres-operator service reconcile cycle failed"
if err := r.updateStatus(Failed, "ReconcilePatroniServicesClusterFailed",
fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil {
r.logger.Error("Cannot update CR status", zap.Error(err))
return reconcile.Result{RequeueAfter: time.Minute}, err
}
return reconcile.Result{RequeueAfter: time.Minute}, err
}

r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter))
r.crHash = newCrHash
r.errorCounter = 0
return reconcile.Result{RequeueAfter: time.Minute}, err
return r.handleReconcileError(err, "Error during reconcile cycle", maxReconcileAttempts, newCrHash)
}

default:
Expand Down Expand Up @@ -332,25 +316,15 @@ func (r *PostgresServiceReconciler) Reconcile(ctx context.Context, request ctrl.
return reconcile.Result{}, nil
}

func (r *PostgresServiceReconciler) handleTestReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) {
func (r *PostgresServiceReconciler) handleReconcileError(err error, errMsg string, maxReconcileAttempts int, newCrHash string) (ctrl.Result, error) {
r.errorCounter++
if r.errorCounter < maxReconcileAttempts {
r.logger.Error(errMsg, zap.Error(err))
r.logger.Error(fmt.Sprintf("Error counter for tests run: %d, let's try to run the reconcile again", r.errorCounter))
r.reason = "PostgresClusterTestsFailed"
r.message = "Postgres-operator service reconcile cycle failed"
if err := r.updateStatus(Failed, "PostgresClusterTestsFailed", err.Error()); err != nil {
r.logger.Error("Cannot update CR status", zap.Error(err))
return reconcile.Result{RequeueAfter: time.Minute}, err
}
return reconcile.Result{}, err
r.logger.Error(fmt.Sprintf("Error counter for reconcile run: %d, let's try to run the reconcile again", r.errorCounter))
return r.failReconcile("PostgresClusterTestsFailed", err, true)
}

r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter))
r.logger.Error("Reconciliation cycle failed due to test pod ended with error")
r.crHash = newCrHash
r.errorCounter = 0
return reconcile.Result{RequeueAfter: time.Minute}, err
return r.stopReconcile(newCrHash, "No reconcile attempts left", err)
}

func (r *PostgresServiceReconciler) reconcilePostgresServiceCluster(cr *qubershipv1.PatroniServices) error {
Expand Down Expand Up @@ -719,3 +693,26 @@ func (r *PostgresServiceReconciler) processExternalResources(cr *qubershipv1.Pat

return nil
}

func (r *PostgresServiceReconciler) stopReconcile(newCrHash string, reason string, err error) (ctrl.Result, error) {
r.logger.Error(fmt.Sprintf("Failed reconcile attempts: %d, updating crHash, resVersions", r.errorCounter))
r.crHash = newCrHash
r.errorCounter = 0
return r.failReconcile(reason, err, false)
}

func (r *PostgresServiceReconciler) failReconcile(reason string, err error, requeue bool) (ctrl.Result, error) {
r.reason = reason
r.message = "Postgres-operator service reconcile cycle failed"
if err := r.updateStatus(Failed, reason,
fmt.Sprintf("Postgres service reconcile cycle failed. Error: %s", err.Error())); err != nil {
r.logger.Error("Cannot update CR status", zap.Error(err))
return reconcile.Result{RequeueAfter: time.Minute}, err
}
requireAfter := time.Duration(0)
if requeue {
requireAfter = time.Minute
}

return reconcile.Result{RequeueAfter: requireAfter, Requeue: requeue}, err
}