@@ -147,7 +147,7 @@ type WeightedSemaphore interface {
147
147
Release (n int64 )
148
148
}
149
149
150
- type ListRetryFunc func (err error ) bool
150
+ type RetryFunc func (err error ) bool
151
151
152
152
// NewClusterCache creates new instance of cluster cache
153
153
func NewClusterCache (config * rest.Config , opts ... UpdateSettingsFunc ) * clusterCache {
@@ -176,9 +176,10 @@ func NewClusterCache(config *rest.Config, opts ...UpdateSettingsFunc) *clusterCa
176
176
log : log ,
177
177
listRetryLimit : 1 ,
178
178
listRetryUseBackoff : false ,
179
- listRetryFunc : ListRetryFuncNever ,
179
+ listRetryFunc : RetryFuncNever ,
180
180
connectionStatus : ConnectionStatusUnknown ,
181
181
watchFails : newWatchFailures (),
182
+ clusterStatusRetryFunc : RetryFuncNever ,
182
183
clusterConnectionInterval : defaultClusterConnectionInterval ,
183
184
}
184
185
for i := range opts {
@@ -208,6 +209,8 @@ type clusterCache struct {
208
209
// watchFails is used to keep track of the failures while watching resources.
209
210
watchFails * watchFailures
210
211
212
+ clusterStatusRetryFunc RetryFunc
213
+
211
214
apisMeta map [schema.GroupKind ]* apiMeta
212
215
serverVersion string
213
216
apiResources []kube.APIResourceInfo
@@ -228,7 +231,7 @@ type clusterCache struct {
228
231
// retry options for list operations
229
232
listRetryLimit int32
230
233
listRetryUseBackoff bool
231
- listRetryFunc ListRetryFunc
234
+ listRetryFunc RetryFunc
232
235
233
236
// lock is a rw lock which protects the fields of clusterInfo
234
237
lock sync.RWMutex
@@ -264,13 +267,13 @@ type clusterCacheSync struct {
264
267
resyncTimeout time.Duration
265
268
}
266
269
267
- // ListRetryFuncNever never retries on errors
268
- func ListRetryFuncNever (err error ) bool {
270
+ // RetryFuncNever never retries on errors
271
+ func RetryFuncNever (err error ) bool {
269
272
return false
270
273
}
271
274
272
- // ListRetryFuncAlways always retries on errors
273
- func ListRetryFuncAlways (err error ) bool {
275
+ // RetryFuncAlways always retries on errors
276
+ func RetryFuncAlways (err error ) bool {
274
277
return true
275
278
}
276
279
@@ -1247,6 +1250,10 @@ func (c *clusterCache) StartClusterConnectionStatusMonitoring(ctx context.Contex
1247
1250
}
1248
1251
1249
1252
func (c * clusterCache ) clusterConnectionService (ctx context.Context ) {
1253
+ if c .clusterConnectionInterval <= 0 {
1254
+ return
1255
+ }
1256
+
1250
1257
ticker := time .NewTicker (c .clusterConnectionInterval )
1251
1258
defer ticker .Stop ()
1252
1259
@@ -1267,16 +1274,23 @@ func (c *clusterCache) clusterConnectionService(ctx context.Context) {
1267
1274
}
1268
1275
1269
1276
if watchErrors > 0 || watchesRecovered {
1270
- c .log .V (1 ).Info ("verifying cluster connection" , "watches" , watchErrors )
1271
-
1272
- _ , err := c .kubectl .GetServerVersion (c .config )
1277
+ c .log .V (1 ).Info ("verifying cluster connection" , "server" , c .config .Host )
1278
+ // Retry fetching the server version to avoid invalidating the cache due to transient errors.
1279
+ err := retry .OnError (retry .DefaultBackoff , c .clusterStatusRetryFunc , func () error {
1280
+ _ , err := c .kubectl .GetServerVersion (c .config )
1281
+ if err != nil && c .clusterStatusRetryFunc (err ) {
1282
+ c .log .V (1 ).Info ("Error while fetching server version" , "error" , err .Error ())
1283
+ }
1284
+ return err
1285
+ })
1273
1286
if err != nil {
1274
1287
c .updateConnectionStatus (ConnectionStatusFailed )
1275
1288
} else {
1276
1289
c .updateConnectionStatus (ConnectionStatusSuccessful )
1277
1290
}
1278
1291
}
1279
1292
case <- ctx .Done ():
1293
+ c .log .V (1 ).Info ("Stopping cluster connection status monitoring" , "server" , c .config .Host )
1280
1294
ticker .Stop ()
1281
1295
return
1282
1296
}
0 commit comments