From 13b2a7625c5399967160abab66cc8a3e7628277b Mon Sep 17 00:00:00 2001 From: Krzysztof Nazarewski <3494992+nazarewk@users.noreply.github.com> Date: Tue, 13 Sep 2022 15:04:16 +0200 Subject: [PATCH] handle stale BeforeHookCreation resources - fixes https://github.com/argoproj/gitops-engine/issues/446 - closes https://github.com/argoproj/argo-cd/pull/10579 - original issue https://github.com/argoproj/argo-cd/issues/10077 Signed-off-by: Krzysztof Nazarewski <3494992+nazarewk@users.noreply.github.com> --- pkg/sync/sync_context.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pkg/sync/sync_context.go b/pkg/sync/sync_context.go index 50e23bc46..4265909d3 100644 --- a/pkg/sync/sync_context.go +++ b/pkg/sync/sync_context.go @@ -30,7 +30,7 @@ import ( "github.com/argoproj/gitops-engine/pkg/diff" "github.com/argoproj/gitops-engine/pkg/health" "github.com/argoproj/gitops-engine/pkg/sync/common" - "github.com/argoproj/gitops-engine/pkg/sync/hook" + hookutil "github.com/argoproj/gitops-engine/pkg/sync/hook" resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource" "github.com/argoproj/gitops-engine/pkg/utils/kube" kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube" @@ -292,6 +292,22 @@ const ( // getOperationPhase returns a hook status from an _live_ unstructured object func (sc *syncContext) getOperationPhase(hook *unstructured.Unstructured) (common.OperationPhase, string, error) { + // start by detecting resources that: + // 1. have BeforeHookCreation deletion policies + // 2. were already deleted from the cluster + // 3. DELETE watch event from kubernetes control plane was not processed yet, + // this can happen under high load of controller and/or k8s control plane + // This results in old version still being present in cache and prematurely ending the sync wave, + // it is fixed by verifying creationTimestamp against Sync's start date + // fixes https://github.com/argoproj/gitops-engine/issues/446 + // related to artificial sync wave delays in ArgoCD: + // https://github.com/argoproj/argo-cd/blob/9fac0f6ae6e52d6f4978a1eaaf51fbffb9c0958a/controller/sync.go#L465-L485 + for _, policy := range hookutil.DeletePolicies(hook) { + if policy == common.HookDeletePolicyBeforeHookCreation && sc.startedAt.After(hook.GetCreationTimestamp().Time) { + return common.OperationRunning, fmt.Sprintf("%s pending recreation", hook.GetName()), nil + } + } + phase := common.OperationSucceeded message := fmt.Sprintf("%s created", hook.GetName()) @@ -630,7 +646,7 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) { obj := obj(resource.Target, resource.Live) // this creates garbage tasks - if hook.IsHook(obj) { + if hookutil.IsHook(obj) { sc.log.WithValues("group", obj.GroupVersionKind().Group, "kind", obj.GetKind(), "namespace", obj.GetNamespace(), "name", obj.GetName()).V(1).Info("Skipping hook") continue }