@@ -176,15 +176,17 @@ func NewLiveStateCache(
176
176
resourceTracking argo.ResourceTracking ) LiveStateCache {
177
177
178
178
return & liveStateCache {
179
- appInformer : appInformer ,
180
- db : db ,
181
- clusters : make (map [string ]clustercache.ClusterCache ),
182
- onObjectUpdated : onObjectUpdated ,
183
- kubectl : kubectl ,
184
- settingsMgr : settingsMgr ,
185
- metricsServer : metricsServer ,
186
- clusterSharding : clusterSharding ,
187
- resourceTracking : resourceTracking ,
179
+ appInformer : appInformer ,
180
+ db : db ,
181
+ clusters : make (map [string ]clustercache.ClusterCache ),
182
+ clusterStatusCancel : make (map [string ]context.CancelFunc ),
183
+ onObjectUpdated : onObjectUpdated ,
184
+ kubectl : kubectl ,
185
+ settingsMgr : settingsMgr ,
186
+ metricsServer : metricsServer ,
187
+ clusterSharding : clusterSharding ,
188
+ clusterFilter : clusterFilter ,
189
+ resourceTracking : resourceTracking ,
188
190
}
189
191
}
190
192
@@ -210,9 +212,10 @@ type liveStateCache struct {
210
212
resourceTracking argo.ResourceTracking
211
213
ignoreNormalizerOpts normalizers.IgnoreNormalizerOpts
212
214
213
- clusters map [string ]clustercache.ClusterCache
214
- cacheSettings cacheSettings
215
- lock sync.RWMutex
215
+ clusterStatusCancel map [string ]context.CancelFunc
216
+ clusters map [string ]clustercache.ClusterCache
217
+ cacheSettings cacheSettings
218
+ lock sync.RWMutex
216
219
}
217
220
218
221
func (c * liveStateCache ) loadCacheSettings () (* cacheSettings , error ) {
@@ -520,11 +523,20 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e
520
523
clustercache .SetLogr (logutils .NewLogrusLogger (log .WithField ("server" , cluster .Server ))),
521
524
clustercache .SetRetryOptions (clusterCacheAttemptLimit , clusterCacheRetryUseBackoff , isRetryableError ),
522
525
clustercache .SetRespectRBAC (respectRBAC ),
526
+ clustercache .SetClusterStatusRetryFunc (isTransientNetworkErr ),
527
+ clustercache .SetClusterConnectionInterval (10 * time .Second ),
523
528
}
524
529
525
530
clusterCache = clustercache .NewClusterCache (clusterCacheConfig , clusterCacheOpts ... )
526
531
527
- clusterCache .StartClusterConnectionStatusMonitoring (context .Background ())
532
+ // Make sure to check if the monitoring interval is disabled
533
+
534
+ ctx , cancel := context .WithCancel (context .Background ())
535
+ if c .clusterStatusCancel == nil {
536
+ c .clusterStatusCancel = make (map [string ]context.CancelFunc )
537
+ }
538
+ c .clusterStatusCancel [server ] = cancel
539
+ clusterCache .StartClusterConnectionStatusMonitoring (ctx )
528
540
529
541
_ = clusterCache .OnResourceUpdated (func (newRes * clustercache.Resource , oldRes * clustercache.Resource , namespaceResources map [kube.ResourceKey ]* clustercache.Resource ) {
530
542
toNotify := make (map [string ]bool )
@@ -779,6 +791,12 @@ func (c *liveStateCache) handleModEvent(oldCluster *appv1.Cluster, newCluster *a
779
791
if ! c .canHandleCluster (newCluster ) {
780
792
cluster .Invalidate ()
781
793
c .lock .Lock ()
794
+ cancel , ok := c .clusterStatusCancel [newCluster .Server ]
795
+ if ok {
796
+ // stop the cluster status monitoring goroutine
797
+ cancel ()
798
+ delete (c .clusterStatusCancel , newCluster .Server )
799
+ }
782
800
delete (c .clusters , newCluster .Server )
783
801
c .lock .Unlock ()
784
802
return
@@ -820,6 +838,12 @@ func (c *liveStateCache) handleDeleteEvent(clusterServer string) {
820
838
if ok {
821
839
cluster .Invalidate ()
822
840
c .lock .Lock ()
841
+ cancel , ok := c .clusterStatusCancel [clusterServer ]
842
+ if ok {
843
+ // stop the cluster status monitoring goroutine
844
+ cancel ()
845
+ delete (c .clusterStatusCancel , clusterServer )
846
+ }
823
847
delete (c .clusters , clusterServer )
824
848
c .lock .Unlock ()
825
849
}
0 commit comments