@@ -29,6 +29,7 @@ import (
2929 v1 "k8s.io/api/core/v1"
3030 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131 "k8s.io/apimachinery/pkg/fields"
32+ k8snet "k8s.io/apimachinery/pkg/util/net"
3233 "k8s.io/apimachinery/pkg/util/wait"
3334 clientset "k8s.io/client-go/kubernetes"
3435 "k8s.io/client-go/rest"
@@ -94,7 +95,6 @@ type Grabber struct {
9495// support it. If disabled for a component, the corresponding Grab function
9596// will immediately return an error derived from MetricsGrabbingDisabledError.
9697func NewMetricsGrabber (ctx context.Context , c clientset.Interface , ec clientset.Interface , config * rest.Config , kubelets bool , scheduler bool , controllers bool , apiServer bool , clusterAutoscaler bool , snapshotController bool ) (* Grabber , error ) {
97-
9898 kubeScheduler := ""
9999 kubeControllerManager := ""
100100 snapshotControllerManager := ""
@@ -213,28 +213,29 @@ func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string,
213213}
214214
215215func (g * Grabber ) getMetricsFromNode (ctx context.Context , nodeName string , kubeletPort int , pathSuffix string ) (string , error ) {
216- // There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
217- finished := make (chan struct {}, 1 )
216+ // There's a problem with timing out during proxy. We are going to set a 45 second client timeout, and issue a retry.
218217 var err error
219- var rawOutput []byte
220- go func () {
221- rawOutput , err = g .client .CoreV1 ().RESTClient ().Get ().
218+ var output []byte
219+ err = wait . PollUntilContextTimeout ( ctx , 15 * time . Second , 2 * time . Minute , true , func (ctx context. Context ) ( done bool , retErr error ) {
220+ rawOutput , err : = g .client .CoreV1 ().RESTClient ().Get ().
222221 Resource ("nodes" ).
223222 SubResource ("proxy" ).
224223 Name (fmt .Sprintf ("%v:%v" , nodeName , kubeletPort )).
225224 Suffix (pathSuffix ).
225+ Timeout (45 * time .Second ).
226226 Do (ctx ).Raw ()
227- finished <- struct {}{}
228- }()
229- select {
230- case <- time .After (proxyTimeout ):
231- return "" , fmt .Errorf ("Timed out when waiting for proxy to gather metrics from %v" , nodeName )
232- case <- finished :
233227 if err != nil {
234- return "" , err
228+ if k8snet .IsTimeout (err ) {
229+ klog .Warningf ("Metrics rest call timed out" )
230+ return false , nil
231+ }
232+ klog .Warningf ("Metrics rest call errored: %v" , err )
233+ return false , nil
235234 }
236- return string (rawOutput ), nil
237- }
235+ output = rawOutput
236+ return true , nil
237+ })
238+ return string (output ), err
238239}
239240
240241// GrabFromKubeProxy returns metrics from kube-proxy
0 commit comments