Skip to content

Commit cc02cb4

Browse files
Merge pull request #4 from davidepasquero/codex/forzare-eliminazione-controller-in-terminating
Fix stuck pods cleanup
2 parents 795f4de + e596689 commit cc02cb4

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

cmd/controller/run.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"log"
77
"net/http"
8+
"time"
89

910
"github.com/rancher/wrangler/pkg/leader"
1011
"github.com/rancher/wrangler/pkg/signals"
@@ -16,6 +17,7 @@ import (
1617
"github.com/harvester/vm-dhcp-controller/pkg/config"
1718
"github.com/harvester/vm-dhcp-controller/pkg/controller"
1819
"github.com/harvester/vm-dhcp-controller/pkg/server"
20+
"github.com/harvester/vm-dhcp-controller/pkg/util"
1921
)
2022

2123
var (
@@ -46,6 +48,8 @@ func run(options *config.ControllerOptions) error {
4648
logrus.Fatalf("Error building controllers: %s", err.Error())
4749
}
4850

51+
go util.CleanupTerminatingPods(ctx, client, options.AgentNamespace, "controller", time.Minute)
52+
4953
callback := func(ctx context.Context) {
5054
if err := management.Register(ctx, cfg, controller.RegisterFuncList); err != nil {
5155
panic(err)

cmd/webhook/run.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ package main
22

33
import (
44
"context"
5+
"time"
56

7+
"github.com/harvester/vm-dhcp-controller/pkg/util"
68
"github.com/harvester/webhook/pkg/config"
79
"github.com/harvester/webhook/pkg/server"
810
"github.com/rancher/wrangler/pkg/start"
911
"github.com/sirupsen/logrus"
12+
"k8s.io/client-go/kubernetes"
1013
"k8s.io/client-go/rest"
1114

1215
ctlcore "github.com/harvester/vm-dhcp-controller/pkg/generated/controllers/core"
@@ -70,6 +73,13 @@ func run(ctx context.Context, cfg *rest.Config, options *config.Options) error {
7073
return err
7174
}
7275

76+
client, err := kubernetes.NewForConfig(cfg)
77+
if err != nil {
78+
return err
79+
}
80+
81+
go util.CleanupTerminatingPods(ctx, client, options.Namespace, "webhook", time.Minute)
82+
7383
webhookServer := server.NewWebhookServer(ctx, cfg, name, options)
7484

7585
if err := webhookServer.RegisterValidators(

pkg/util/pod.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package util
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/sirupsen/logrus"
8+
apierrors "k8s.io/apimachinery/pkg/api/errors"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/labels"
11+
"k8s.io/client-go/kubernetes"
12+
)
13+
14+
// CleanupTerminatingPods forcibly deletes pods with the specified component label
15+
// that have been stuck in the Terminating state for longer than the given
16+
// threshold.
17+
func CleanupTerminatingPods(ctx context.Context, client kubernetes.Interface, namespace, component string, threshold time.Duration) {
18+
if client == nil || namespace == "" || component == "" {
19+
return
20+
}
21+
22+
selector := labels.Set{"app.kubernetes.io/component": component}.AsSelector().String()
23+
ticker := time.NewTicker(threshold / 2)
24+
defer ticker.Stop()
25+
26+
for {
27+
select {
28+
case <-ctx.Done():
29+
return
30+
case <-ticker.C:
31+
pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector})
32+
if err != nil {
33+
logrus.Errorf("(CleanupTerminatingPods) list pods error: %v", err)
34+
continue
35+
}
36+
37+
for i := range pods.Items {
38+
pod := &pods.Items[i]
39+
if pod.DeletionTimestamp == nil {
40+
continue
41+
}
42+
if time.Since(pod.DeletionTimestamp.Time) < threshold {
43+
continue
44+
}
45+
46+
logrus.Infof("(CleanupTerminatingPods) force deleting stuck pod %s/%s", pod.Namespace, pod.Name)
47+
grace := int64(0)
48+
if err := client.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &grace}); err != nil && !apierrors.IsNotFound(err) {
49+
logrus.Errorf("(CleanupTerminatingPods) delete pod %s/%s error: %v", pod.Namespace, pod.Name, err)
50+
}
51+
}
52+
}
53+
}
54+
}

0 commit comments

Comments
 (0)