@@ -71,6 +71,81 @@ var _ = SIGDescribe("Node Container Manager [Serial]", func() {
7171 framework .ExpectNoError (runTest (ctx , f ))
7272 })
7373 })
74+ ginkgo .Context ("Validate CGroup management" , func () {
75+ // Regression test for https://issues.k8s.io/125923
76+ // In this issue there's a race involved with systemd which seems to manifest most likely, or perhaps only
77+ // (data gathered so far seems inconclusive) on the very first boot of the machine, so restarting the kubelet
78+ // seems not sufficient. OTOH, the exact reproducer seems to require a dedicate lane with only this test, or
79+ // to reboot the machine before to run this test. Both are practically unrealistic in CI.
80+ // The closest approximation is this test in this current form, using a kubelet restart. This at least
81+ // acts as non regression testing, so it still brings value.
82+ ginkgo .It ("should correctly start with cpumanager none policy in use with systemd" , func (ctx context.Context ) {
83+ if ! IsCgroup2UnifiedMode () {
84+ ginkgo .Skip ("this test requires cgroups v2" )
85+ }
86+
87+ var err error
88+ var oldCfg * kubeletconfig.KubeletConfiguration
89+ // Get current kubelet configuration
90+ oldCfg , err = getCurrentKubeletConfig (ctx )
91+ framework .ExpectNoError (err )
92+
93+ ginkgo .DeferCleanup (func (ctx context.Context ) {
94+ if oldCfg != nil {
95+ // Update the Kubelet configuration.
96+ ginkgo .By ("Stopping the kubelet" )
97+ startKubelet := stopKubelet ()
98+
99+ // wait until the kubelet health check will fail
100+ gomega .Eventually (ctx , func () bool {
101+ return kubeletHealthCheck (kubeletHealthCheckURL )
102+ }).WithTimeout (time .Minute ).WithPolling (time .Second ).Should (gomega .BeFalseBecause ("expected kubelet health check to be failed" ))
103+ ginkgo .By ("Stopped the kubelet" )
104+
105+ framework .ExpectNoError (e2enodekubelet .WriteKubeletConfigFile (oldCfg ))
106+
107+ ginkgo .By ("Starting the kubelet" )
108+ startKubelet ()
109+
110+ // wait until the kubelet health check will succeed
111+ gomega .Eventually (ctx , func (ctx context.Context ) bool {
112+ return kubeletHealthCheck (kubeletHealthCheckURL )
113+ }).WithTimeout (2 * time .Minute ).WithPolling (5 * time .Second ).Should (gomega .BeTrueBecause ("expected kubelet to be in healthy state" ))
114+ ginkgo .By ("Started the kubelet" )
115+ }
116+ })
117+
118+ newCfg := oldCfg .DeepCopy ()
119+ // Change existing kubelet configuration
120+ newCfg .CPUManagerPolicy = "none"
121+ newCfg .CgroupDriver = "systemd"
122+
123+ // Update the Kubelet configuration.
124+ ginkgo .By ("Stopping the kubelet" )
125+ startKubelet := stopKubelet ()
126+
127+ // wait until the kubelet health check will fail
128+ gomega .Eventually (ctx , func () bool {
129+ return kubeletHealthCheck (kubeletHealthCheckURL )
130+ }).WithTimeout (time .Minute ).WithPolling (time .Second ).Should (gomega .BeFalseBecause ("expected kubelet health check to be failed" ))
131+ ginkgo .By ("Stopped the kubelet" )
132+
133+ framework .ExpectNoError (e2enodekubelet .WriteKubeletConfigFile (newCfg ))
134+
135+ ginkgo .By ("Starting the kubelet" )
136+ startKubelet ()
137+
138+ // wait until the kubelet health check will succeed
139+ gomega .Eventually (ctx , func () bool {
140+ return getNodeReadyStatus (ctx , f ) && kubeletHealthCheck (kubeletHealthCheckURL )
141+ }).WithTimeout (2 * time .Minute ).WithPolling (5 * time .Second ).Should (gomega .BeTrueBecause ("expected kubelet to be in healthy state" ))
142+ ginkgo .By ("Started the kubelet" )
143+
144+ gomega .Consistently (ctx , func (ctx context.Context ) bool {
145+ return getNodeReadyStatus (ctx , f ) && kubeletHealthCheck (kubeletHealthCheckURL )
146+ }).WithTimeout (2 * time .Minute ).WithPolling (2 * time .Second ).Should (gomega .BeTrueBecause ("node keeps reporting ready status" ))
147+ })
148+ })
74149})
75150
76151func expectFileValToEqual (filePath string , expectedValue , delta int64 ) error {
0 commit comments