Skip to content

Commit 86b6d4b

Browse files
Merge pull request #2337 from simonpasquier/update-metrics-server-probe-sno
OCPBUGS-32510: change metrics-server probes for SNO
2 parents 6c9136a + f9670c7 commit 86b6d4b

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

pkg/manifests/manifests.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,6 +2053,24 @@ func (f *Factory) MetricsServerDeployment(apiAuthSecretName string, kubeletCABun
20532053
containers[idx].Args = f.setTLSSecurityConfiguration(podSpec.Containers[0].Args,
20542054
MetricsServerTLSCipherSuitesFlag, MetricsServerTLSMinTLSVersionFlag)
20552055

2056+
// By default, the /readyz endpoint is used to assert the component
2057+
// readiness. This endpoint returns success when the metrics-server has
2058+
// metric samples over 2 intervals (e.g. it has scraped at least one
2059+
// kubelet twice).
2060+
// In single-node deployments, it happens sometimes (especially in
2061+
// end-to-end tests) that the kubelet fails to respond in a timely fashion
2062+
// due to contention in cAdvisor, leading to a delayed readiness (and test
2063+
// failures). To workaround the issue, we use the /livez endpoint in this
2064+
// mode.
2065+
// The long-term plan is to switch resource metrics from cAdvisor to the
2066+
// CRI stats API (currently an alpha feature). Once it happens, we can
2067+
// remove this change.
2068+
// See https://issues.redhat.com//browse/OCPBUGS-32510 for details.
2069+
if !f.infrastructure.HighlyAvailableInfrastructure() {
2070+
containers[idx].StartupProbe = containers[idx].ReadinessProbe.DeepCopy()
2071+
containers[idx].ReadinessProbe.HTTPGet.Path = "/livez"
2072+
}
2073+
20562074
// Hash the Kubelet Serving CA Bundle configmap value and propagate it as a annotation to the
20572075
// deployment's pods to trigger a new rollout when the CA is rotated.
20582076
dep.Spec.Template.Annotations["monitoring.openshift.io/kubelet-serving-ca-bundle-hash"] = hashStringMap(kubeletCABundle.Data)

pkg/manifests/manifests_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,6 +2856,75 @@ metricsServer:
28562856
require.Equal(t, "383c7cmidrae2", podAnnotations["monitoring.openshift.io/serving-ca-secret-hash"])
28572857
}
28582858

2859+
func TestMetricsServerReadinessProbe(t *testing.T) {
2860+
c, err := NewConfigFromString("", true)
2861+
if err != nil {
2862+
t.Fatal(err)
2863+
}
2864+
2865+
c.SetImages(map[string]string{
2866+
"kube-metrics-server": "docker.io/openshift/origin-kube-metrics-server:latest",
2867+
})
2868+
2869+
f := NewFactory("openshift-monitoring", "openshift-user-workload-monitoring", c, &fakeInfrastructureReader{}, &fakeProxyReader{}, NewAssets(assetsPath), &APIServerConfig{}, &configv1.Console{})
2870+
kubeletCABundle := &v1.ConfigMap{
2871+
ObjectMeta: metav1.ObjectMeta{
2872+
Name: "kubelet-serving-ca-bundle",
2873+
Namespace: "openshift-monitoring",
2874+
},
2875+
Data: map[string]string{
2876+
"ca-bundle.crt": "ca-certificate",
2877+
},
2878+
}
2879+
servingCASecret := &v1.Secret{
2880+
ObjectMeta: metav1.ObjectMeta{
2881+
Name: "metrics-server-tls",
2882+
Namespace: "openshift-monitoring",
2883+
},
2884+
Data: map[string][]byte{
2885+
"tls.crt": []byte("foo"),
2886+
"tls.key": []byte("bar"),
2887+
},
2888+
}
2889+
metricsClientSecret := &v1.Secret{
2890+
ObjectMeta: metav1.ObjectMeta{
2891+
Name: "metrics-client-cert",
2892+
Namespace: "openshift-monitoring",
2893+
},
2894+
Data: map[string][]byte{
2895+
"tls.crt": []byte("bar"),
2896+
"tls.key": []byte("foo"),
2897+
},
2898+
}
2899+
apiAuthConfigMapData := map[string]string{
2900+
"requestheader-allowed-names": "",
2901+
"requestheader-extra-headers-prefix": "",
2902+
"requestheader-group-headers": "",
2903+
"requestheader-username-headers": "",
2904+
}
2905+
2906+
d, err := f.MetricsServerDeployment("foo", kubeletCABundle, servingCASecret, metricsClientSecret, apiAuthConfigMapData)
2907+
if err != nil {
2908+
t.Fatal(err)
2909+
}
2910+
2911+
for _, container := range d.Spec.Template.Spec.Containers {
2912+
if container.Name == "metrics-server" {
2913+
if container.ReadinessProbe.HTTPGet.Path != "/livez" {
2914+
t.Fatalf("expected readiness probe's path to be '/livez', got %q", container.ReadinessProbe.HTTPGet.Path)
2915+
}
2916+
2917+
if container.StartupProbe.HTTPGet.Path != "/readyz" {
2918+
t.Fatalf("expected startup probe's path to be '/readyz', got %q", container.StartupProbe.HTTPGet.Path)
2919+
}
2920+
2921+
return
2922+
}
2923+
}
2924+
2925+
t.Fatalf("failed to find container %q", "metrics-server")
2926+
}
2927+
28592928
func TestMetricsServerAuditLog(t *testing.T) {
28602929
argsForProfile := func(profile string) []string {
28612930
return []string{

0 commit comments

Comments
 (0)