Skip to content

Commit 10c6cb5

Browse files
authored
CLOUDP-82115: Implement delete retry loop (#149)
1 parent 7aa9667 commit 10c6cb5

File tree

7 files changed

+78
-48
lines changed

7 files changed

+78
-48
lines changed

pkg/controller/atlas/api_error.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@ const (
1313

1414
// Error indicates that the database user doesn't exist
1515
UsernameNotFound = "USERNAME_NOT_FOUND"
16+
17+
// Error indicates that the cluster doesn't exist
18+
ClusterNotFound = "CLUSTER_NOT_FOUND"
1619
)

pkg/controller/atlascluster/atlascluster_controller.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"time"
2324

25+
"go.mongodb.org/atlas/mongodbatlas"
2426
"go.uber.org/zap"
2527
"k8s.io/apimachinery/pkg/runtime"
2628
ctrl "sigs.k8s.io/controller-runtime"
@@ -150,6 +152,8 @@ func (r *AtlasClusterReconciler) Delete(e event.DeleteEvent) error {
150152
return errors.New("cannot read project resource")
151153
}
152154

155+
log = log.With("projectID", project.Status.ID, "clusterName", cluster.Spec.Name)
156+
153157
connection, err := atlas.ReadConnection(log, r.Client, r.OperatorPod, project.ConnectionSecretObjectKey())
154158
if err != nil {
155159
return err
@@ -160,12 +164,28 @@ func (r *AtlasClusterReconciler) Delete(e event.DeleteEvent) error {
160164
return fmt.Errorf("cannot build Atlas client: %w", err)
161165
}
162166

163-
_, err = atlasClient.Clusters.Delete(context.Background(), project.Status.ID, cluster.Spec.Name)
164-
if err != nil {
165-
return fmt.Errorf("cannot delete Atlas cluster: %w", err)
166-
}
167-
168-
log.Infow("Started Atlas cluster deletion process", "projectID", project.Status.ID, "clusterName", cluster.Spec.Name)
169-
167+
go func() {
168+
timeout := time.Now().Add(workflow.DefaultTimeout)
169+
170+
for time.Now().Before(timeout) {
171+
_, err = atlasClient.Clusters.Delete(context.Background(), project.Status.ID, cluster.Spec.Name)
172+
var apiError *mongodbatlas.ErrorResponse
173+
if errors.As(err, &apiError) && apiError.ErrorCode == atlas.ClusterNotFound {
174+
log.Info("Cluster doesn't exist or is already deleted")
175+
return
176+
}
177+
178+
if err != nil {
179+
log.Errorw("cannot delete Atlas cluster", "error", err)
180+
time.Sleep(workflow.DefaultRetry)
181+
continue
182+
}
183+
184+
log.Info("Started Atlas cluster deletion process")
185+
return
186+
}
187+
188+
log.Error("Failed to delete Atlas cluster in time")
189+
}()
170190
return nil
171191
}

pkg/controller/atlasproject/atlasproject_controller.go

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ package atlasproject
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
23+
"time"
2224

25+
"go.mongodb.org/atlas/mongodbatlas"
2326
"go.uber.org/zap"
2427
corev1 "k8s.io/api/core/v1"
2528
"k8s.io/apimachinery/pkg/runtime"
@@ -134,12 +137,29 @@ func (r *AtlasProjectReconciler) Delete(e event.DeleteEvent) error {
134137
return fmt.Errorf("cannot build Atlas client: %w", err)
135138
}
136139

137-
_, err = atlasClient.Projects.Delete(context.Background(), project.Status.ID)
138-
if err != nil {
139-
return fmt.Errorf("cannot delete Atlas project: %w", err)
140-
}
141-
142-
log.Infow("Successfully deleted Atlas project", "projectID", project.Status.ID)
140+
go func() {
141+
timeout := time.Now().Add(workflow.DefaultTimeout)
142+
143+
for time.Now().Before(timeout) {
144+
_, err = atlasClient.Projects.Delete(context.Background(), project.Status.ID)
145+
var apiError *mongodbatlas.ErrorResponse
146+
if errors.As(err, &apiError) && apiError.ErrorCode == atlas.NotInGroup {
147+
log.Infow("Project doesn't exist or is already deleted", "projectID", project.Status.ID)
148+
return
149+
}
150+
151+
if err != nil {
152+
log.Errorw("cannot delete Atlas project", "error", err)
153+
time.Sleep(workflow.DefaultRetry)
154+
continue
155+
}
156+
157+
log.Infow("Successfully deleted Atlas project", "projectID", project.Status.ID)
158+
return
159+
}
160+
161+
log.Errorw("Failed to delete Atlas project in time", "projectID", project.Status.ID)
162+
}()
143163

144164
return nil
145165
}

pkg/controller/workflow/result.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ import (
66
"sigs.k8s.io/controller-runtime/pkg/reconcile"
77
)
88

9-
const DefaultRetry = time.Second * 10
9+
const (
10+
DefaultRetry = time.Second * 10
11+
DefaultTimeout = time.Minute * 20
12+
)
1013

1114
type Result struct {
1215
terminated bool

test/int/cluster_test.go

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,12 @@ var _ = Describe("AtlasCluster", func() {
5858
if createdCluster != nil {
5959
By("Removing Atlas Cluster " + createdCluster.Name)
6060
Expect(k8sClient.Delete(context.Background(), createdCluster)).To(Succeed())
61-
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdCluster.Name), 600, interval).Should(BeTrue())
61+
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdCluster.Spec.Name), 600, interval).Should(BeTrue())
6262
}
6363

64-
// TODO: CLOUDP-82115
65-
// By("Removing Atlas Project " + createdProject.Status.ID)
66-
// Expect(k8sClient.Delete(context.Background(), createdProject)).To(Succeed())
67-
// Eventually(checkAtlasProjectRemoved(createdProject.Status.ID), 20, interval).Should(BeTrue())
68-
6964
By("Removing Atlas Project " + createdProject.Status.ID)
70-
// This is a bit strange but the delete request right after the cluster is removed may fail with "Still active cluster" error
71-
// UI shows the cluster being deleted though. Seems to be the issue only if removal is done using API,
72-
// if the cluster is terminated using UI - it stays in "Deleting" state
73-
Eventually(removeAtlasProject(createdProject.Status.ID), 600, interval).Should(BeTrue())
65+
Expect(k8sClient.Delete(context.Background(), createdProject)).To(Succeed())
66+
Eventually(checkAtlasProjectRemoved(createdProject.Status.ID), 60, interval).Should(BeTrue())
7467
}
7568
removeControllersAndNamespace()
7669
})

test/int/dbuser_test.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ import (
2828
"github.com/mongodb/mongodb-atlas-kubernetes/pkg/util/testutil"
2929
)
3030

31-
const DevMode = false
32-
33-
const UserPasswordSecret = "user-password-secret"
34-
const DBUserPassword = "Passw0rd!"
35-
const UserPasswordSecret2 = "second-user-password-secret"
36-
const DBUserPassword2 = "H@lla#!"
31+
const (
32+
DevMode = false
33+
UserPasswordSecret = "user-password-secret"
34+
DBUserPassword = "Passw0rd!"
35+
UserPasswordSecret2 = "second-user-password-secret"
36+
DBUserPassword2 = "H@lla#!"
37+
)
3738

3839
var _ = Describe("AtlasDatabaseUser", func() {
3940
const interval = time.Second * 1
@@ -101,20 +102,23 @@ var _ = Describe("AtlasDatabaseUser", func() {
101102

102103
return
103104
}
105+
104106
if createdProject != nil && createdProject.ID() != "" {
105107
if createdClusterGCP != nil {
106108
By("Removing Atlas Cluster " + createdClusterGCP.Name)
107109
Expect(k8sClient.Delete(context.Background(), createdClusterGCP)).To(Succeed())
108-
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdClusterGCP.Name), 600, interval).Should(BeTrue())
110+
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdClusterGCP.Spec.Name), 600, interval).Should(BeTrue())
109111
}
112+
110113
if createdClusterAWS != nil {
111114
By("Removing Atlas Cluster " + createdClusterAWS.Name)
112115
Expect(k8sClient.Delete(context.Background(), createdClusterAWS)).To(Succeed())
113-
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdClusterAWS.Name), 600, interval).Should(BeTrue())
116+
Eventually(checkAtlasClusterRemoved(createdProject.Status.ID, createdClusterAWS.Spec.Name), 600, interval).Should(BeTrue())
114117
}
115118

116119
By("Removing Atlas Project " + createdProject.Status.ID)
117-
Eventually(removeAtlasProject(createdProject.Status.ID), 600, interval).Should(BeTrue())
120+
Expect(k8sClient.Delete(context.Background(), createdProject)).To(Succeed())
121+
Eventually(checkAtlasProjectRemoved(createdProject.Status.ID), 60, interval).Should(BeTrue())
118122
}
119123
removeControllersAndNamespace()
120124
})
@@ -289,12 +293,14 @@ func normalize(user mongodbatlas.DatabaseUser, projectID string) mongodbatlas.Da
289293
user.Password = ""
290294
return user
291295
}
296+
292297
func tryConnect(projectID string, cluster mdbv1.AtlasCluster, user mdbv1.AtlasDatabaseUser) func() error {
293298
return func() error {
294299
_, err := mongoClient(projectID, cluster, user)
295300
return err
296301
}
297302
}
303+
298304
func mongoClient(projectID string, cluster mdbv1.AtlasCluster, user mdbv1.AtlasDatabaseUser) (*mongo.Client, error) {
299305
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
300306
defer cancel()

test/int/project_test.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,8 @@ var _ = Describe("AtlasProject", func() {
373373
Eventually(testutil.WaitFor(k8sClient, createdProject, status.TrueCondition(status.ReadyType)),
374374
20, interval).Should(BeTrue())
375375
})
376-
377376
})
378377
})
379-
380378
})
381379

382380
func buildConnectionSecret(name string) corev1.Secret {
@@ -389,19 +387,6 @@ func buildConnectionSecret(name string) corev1.Secret {
389387
}
390388
}
391389

392-
func removeAtlasProject(projectID string) func() bool {
393-
return func() bool {
394-
_, err := atlasClient.Projects.Delete(context.Background(), projectID)
395-
if err != nil {
396-
var apiError *mongodbatlas.ErrorResponse
397-
Expect(errors.As(err, &apiError)).To(BeTrue())
398-
Expect(apiError.ErrorCode).To(Equal(atlas.CannotCloseGroupActiveAtlasCluster))
399-
return false
400-
}
401-
return true
402-
}
403-
}
404-
405390
// checkAtlasProjectRemoved returns true if the Atlas Project is removed from Atlas.
406391
func checkAtlasProjectRemoved(projectID string) func() bool {
407392
return func() bool {

0 commit comments

Comments
 (0)