Skip to content

Commit 90cbc5c

Browse files
committed
fix: resolve merge conflict in sandboxclaim_controller
2 parents d2b1e79 + 0d300b4 commit 90cbc5c

4 files changed

Lines changed: 433 additions & 4 deletions

File tree

extensions/controllers/sandboxclaim_controller.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ func (r *SandboxClaimReconciler) adoptSandboxFromCandidates(ctx context.Context,
434434
// Remove warm pool labels so the sandbox no longer appears in warm pool queries
435435
delete(adopted.Labels, warmPoolSandboxLabel)
436436
delete(adopted.Labels, sandboxTemplateRefHash)
437+
delete(adopted.Labels, templateContentHashLabel)
437438

438439
// Transfer ownership from SandboxWarmPool to SandboxClaim
439440
adopted.OwnerReferences = nil
@@ -674,6 +675,27 @@ func (r *SandboxClaimReconciler) getOrCreateSandbox(ctx context.Context, claim *
674675
return nil, nil
675676
}
676677

678+
// Filter adoption candidates by template content hash to avoid adopting
679+
// sandboxes created from a stale template spec.
680+
if len(adoptionCandidates) > 0 {
681+
template, err := r.getTemplate(ctx, claim)
682+
if err == nil && template != nil {
683+
contentHash := templateContentHash(template)
684+
var filtered []*v1alpha1.Sandbox
685+
for _, c := range adoptionCandidates {
686+
if ch := c.Labels[templateContentHashLabel]; ch != "" && ch != contentHash {
687+
logger.Info("skipping stale adoption candidate (content hash mismatch)",
688+
"sandbox", c.Name,
689+
"sandboxHash", ch,
690+
"currentHash", contentHash)
691+
continue
692+
}
693+
filtered = append(filtered, c)
694+
}
695+
adoptionCandidates = filtered
696+
}
697+
}
698+
677699
// Try to adopt from warm pool
678700
if len(adoptionCandidates) > 0 {
679701
logger.V(1).Info("Found warm pool adoption candidates", "count", len(adoptionCandidates), "claim", claim.Name, "warmpool", policy)

extensions/controllers/sandboxclaim_controller_test.go

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,217 @@ func TestSandboxClaimCreationMetric(t *testing.T) {
13181318
})
13191319
}
13201320

1321+
func TestSandboxClaimAdoptionSkipsStaleContentHash(t *testing.T) {
1322+
scheme := newScheme(t)
1323+
1324+
template := &extensionsv1alpha1.SandboxTemplate{
1325+
ObjectMeta: metav1.ObjectMeta{Name: "test-template", Namespace: "default"},
1326+
Spec: extensionsv1alpha1.SandboxTemplateSpec{
1327+
PodTemplate: sandboxv1alpha1.PodTemplate{
1328+
Spec: corev1.PodSpec{
1329+
Containers: []corev1.Container{{Name: "c", Image: "new-image:v2"}},
1330+
},
1331+
},
1332+
},
1333+
}
1334+
1335+
claim := &extensionsv1alpha1.SandboxClaim{
1336+
ObjectMeta: metav1.ObjectMeta{
1337+
Name: "hash-claim", Namespace: "default",
1338+
UID: "hash-claim-uid",
1339+
},
1340+
Spec: extensionsv1alpha1.SandboxClaimSpec{
1341+
TemplateRef: extensionsv1alpha1.SandboxTemplateRef{Name: "test-template"},
1342+
},
1343+
}
1344+
1345+
poolNameHash := sandboxcontrollers.NameHash("test-pool")
1346+
currentHash := templateContentHash(template)
1347+
1348+
// Stale sandbox: content hash from old template version.
1349+
staleSandbox := &sandboxv1alpha1.Sandbox{
1350+
ObjectMeta: metav1.ObjectMeta{
1351+
Name: "stale-sb", Namespace: "default",
1352+
CreationTimestamp: metav1.Time{Time: time.Now().Add(-1 * time.Hour)},
1353+
Labels: map[string]string{
1354+
warmPoolSandboxLabel: poolNameHash,
1355+
sandboxTemplateRefHash: sandboxcontrollers.NameHash("test-template"),
1356+
templateContentHashLabel: "oldold00",
1357+
},
1358+
OwnerReferences: []metav1.OwnerReference{{
1359+
APIVersion: "extensions.agents.x-k8s.io/v1alpha1", Kind: "SandboxWarmPool",
1360+
Name: "test-pool", UID: "wp-uid", Controller: ptr.To(true),
1361+
}},
1362+
},
1363+
Spec: sandboxv1alpha1.SandboxSpec{
1364+
Replicas: ptr.To(int32(1)),
1365+
PodTemplate: sandboxv1alpha1.PodTemplate{
1366+
Spec: corev1.PodSpec{Containers: []corev1.Container{{Name: "c", Image: "old-image:v1"}}},
1367+
},
1368+
},
1369+
Status: sandboxv1alpha1.SandboxStatus{
1370+
Conditions: []metav1.Condition{{
1371+
Type: string(sandboxv1alpha1.SandboxConditionReady), Status: metav1.ConditionTrue, Reason: "Ready",
1372+
}},
1373+
},
1374+
}
1375+
1376+
// Fresh sandbox: content hash matches current template.
1377+
freshSandbox := &sandboxv1alpha1.Sandbox{
1378+
ObjectMeta: metav1.ObjectMeta{
1379+
Name: "fresh-sb", Namespace: "default",
1380+
CreationTimestamp: metav1.Now(),
1381+
Labels: map[string]string{
1382+
warmPoolSandboxLabel: poolNameHash,
1383+
sandboxTemplateRefHash: sandboxcontrollers.NameHash("test-template"),
1384+
templateContentHashLabel: currentHash,
1385+
},
1386+
OwnerReferences: []metav1.OwnerReference{{
1387+
APIVersion: "extensions.agents.x-k8s.io/v1alpha1", Kind: "SandboxWarmPool",
1388+
Name: "test-pool", UID: "wp-uid", Controller: ptr.To(true),
1389+
}},
1390+
},
1391+
Spec: sandboxv1alpha1.SandboxSpec{
1392+
Replicas: ptr.To(int32(1)),
1393+
PodTemplate: sandboxv1alpha1.PodTemplate{
1394+
Spec: corev1.PodSpec{Containers: []corev1.Container{{Name: "c", Image: "new-image:v2"}}},
1395+
},
1396+
},
1397+
Status: sandboxv1alpha1.SandboxStatus{
1398+
Conditions: []metav1.Condition{{
1399+
Type: string(sandboxv1alpha1.SandboxConditionReady), Status: metav1.ConditionTrue, Reason: "Ready",
1400+
}},
1401+
},
1402+
}
1403+
1404+
reconciler := &SandboxClaimReconciler{
1405+
Client: fake.NewClientBuilder().
1406+
WithScheme(scheme).
1407+
WithObjects(template, claim, staleSandbox, freshSandbox).
1408+
WithStatusSubresource(claim).
1409+
Build(),
1410+
Scheme: scheme,
1411+
Recorder: record.NewFakeRecorder(10),
1412+
Tracer: asmetrics.NewNoOp(),
1413+
}
1414+
1415+
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "hash-claim", Namespace: "default"}}
1416+
_, err := reconciler.Reconcile(context.Background(), req)
1417+
if err != nil {
1418+
t.Fatalf("reconcile failed: %v", err)
1419+
}
1420+
1421+
// Should have adopted the fresh sandbox, not the stale one.
1422+
var adopted sandboxv1alpha1.Sandbox
1423+
err = reconciler.Get(context.Background(), types.NamespacedName{Name: "fresh-sb", Namespace: "default"}, &adopted)
1424+
if err != nil {
1425+
t.Fatalf("failed to get fresh sandbox: %v", err)
1426+
}
1427+
controllerRef := metav1.GetControllerOf(&adopted)
1428+
if controllerRef == nil || controllerRef.UID != claim.UID {
1429+
t.Fatalf("expected fresh-sb to be adopted by claim, got controller %v", controllerRef)
1430+
}
1431+
1432+
// Stale sandbox should still be owned by warm pool (not adopted).
1433+
var stale sandboxv1alpha1.Sandbox
1434+
err = reconciler.Get(context.Background(), types.NamespacedName{Name: "stale-sb", Namespace: "default"}, &stale)
1435+
if err != nil {
1436+
t.Fatalf("failed to get stale sandbox: %v", err)
1437+
}
1438+
staleRef := metav1.GetControllerOf(&stale)
1439+
if staleRef != nil && staleRef.UID == claim.UID {
1440+
t.Fatal("stale sandbox should not have been adopted by claim")
1441+
}
1442+
}
1443+
1444+
func TestSandboxClaimAdoptionRemovesContentHashLabel(t *testing.T) {
1445+
scheme := newScheme(t)
1446+
1447+
template := &extensionsv1alpha1.SandboxTemplate{
1448+
ObjectMeta: metav1.ObjectMeta{Name: "test-template", Namespace: "default"},
1449+
Spec: extensionsv1alpha1.SandboxTemplateSpec{
1450+
PodTemplate: sandboxv1alpha1.PodTemplate{
1451+
Spec: corev1.PodSpec{
1452+
Containers: []corev1.Container{{Name: "c", Image: "img"}},
1453+
},
1454+
},
1455+
},
1456+
}
1457+
1458+
claim := &extensionsv1alpha1.SandboxClaim{
1459+
ObjectMeta: metav1.ObjectMeta{
1460+
Name: "label-claim", Namespace: "default", UID: "label-claim-uid",
1461+
},
1462+
Spec: extensionsv1alpha1.SandboxClaimSpec{
1463+
TemplateRef: extensionsv1alpha1.SandboxTemplateRef{Name: "test-template"},
1464+
},
1465+
}
1466+
1467+
poolNameHash := sandboxcontrollers.NameHash("test-pool")
1468+
contentHash := templateContentHash(template)
1469+
1470+
poolSandbox := &sandboxv1alpha1.Sandbox{
1471+
ObjectMeta: metav1.ObjectMeta{
1472+
Name: "pool-sb", Namespace: "default",
1473+
CreationTimestamp: metav1.Now(),
1474+
Labels: map[string]string{
1475+
warmPoolSandboxLabel: poolNameHash,
1476+
sandboxTemplateRefHash: sandboxcontrollers.NameHash("test-template"),
1477+
templateContentHashLabel: contentHash,
1478+
},
1479+
OwnerReferences: []metav1.OwnerReference{{
1480+
APIVersion: "extensions.agents.x-k8s.io/v1alpha1", Kind: "SandboxWarmPool",
1481+
Name: "test-pool", UID: "wp-uid", Controller: ptr.To(true),
1482+
}},
1483+
},
1484+
Spec: sandboxv1alpha1.SandboxSpec{
1485+
Replicas: ptr.To(int32(1)),
1486+
PodTemplate: sandboxv1alpha1.PodTemplate{
1487+
Spec: corev1.PodSpec{Containers: []corev1.Container{{Name: "c", Image: "img"}}},
1488+
},
1489+
},
1490+
Status: sandboxv1alpha1.SandboxStatus{
1491+
Conditions: []metav1.Condition{{
1492+
Type: string(sandboxv1alpha1.SandboxConditionReady), Status: metav1.ConditionTrue, Reason: "Ready",
1493+
}},
1494+
},
1495+
}
1496+
1497+
reconciler := &SandboxClaimReconciler{
1498+
Client: fake.NewClientBuilder().
1499+
WithScheme(scheme).
1500+
WithObjects(template, claim, poolSandbox).
1501+
WithStatusSubresource(claim).
1502+
Build(),
1503+
Scheme: scheme,
1504+
Recorder: record.NewFakeRecorder(10),
1505+
Tracer: asmetrics.NewNoOp(),
1506+
}
1507+
1508+
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "label-claim", Namespace: "default"}}
1509+
_, err := reconciler.Reconcile(context.Background(), req)
1510+
if err != nil {
1511+
t.Fatalf("reconcile failed: %v", err)
1512+
}
1513+
1514+
var adopted sandboxv1alpha1.Sandbox
1515+
err = reconciler.Get(context.Background(), types.NamespacedName{Name: "pool-sb", Namespace: "default"}, &adopted)
1516+
if err != nil {
1517+
t.Fatalf("failed to get adopted sandbox: %v", err)
1518+
}
1519+
1520+
// All warm pool labels should be removed after adoption.
1521+
if _, exists := adopted.Labels[warmPoolSandboxLabel]; exists {
1522+
t.Error("warmPoolSandboxLabel should be removed after adoption")
1523+
}
1524+
if _, exists := adopted.Labels[sandboxTemplateRefHash]; exists {
1525+
t.Error("sandboxTemplateRefHash should be removed after adoption")
1526+
}
1527+
if _, exists := adopted.Labels[templateContentHashLabel]; exists {
1528+
t.Error("templateContentHashLabel should be removed after adoption")
1529+
}
1530+
}
1531+
13211532
func newScheme(t *testing.T) *runtime.Scheme {
13221533
scheme := runtime.NewScheme()
13231534
if err := sandboxv1alpha1.AddToScheme(scheme); err != nil {

extensions/controllers/sandboxwarmpool_controller.go

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ package controllers
1616

1717
import (
1818
"context"
19+
"crypto/sha256"
20+
"encoding/json"
1921
"errors"
2022
"fmt"
2123
"slices"
@@ -39,10 +41,25 @@ import (
3941
)
4042

4143
const (
42-
sandboxTemplateRefHash = "agents.x-k8s.io/sandbox-template-ref-hash"
43-
warmPoolSandboxLabel = "agents.x-k8s.io/warm-pool-sandbox"
44+
sandboxTemplateRefHash = "agents.x-k8s.io/sandbox-template-ref-hash"
45+
warmPoolSandboxLabel = "agents.x-k8s.io/warm-pool-sandbox"
46+
templateContentHashLabel = "agents.x-k8s.io/template-content-hash"
4447
)
4548

49+
// templateContentHash computes a SHA-256 hash over the JSON-serialized
50+
// SandboxTemplate spec, returning the first 8 hex chars. This captures
51+
// everything the template defines and is used to detect spec drift between
52+
// warm pool sandboxes and the current template.
53+
func templateContentHash(template *extensionsv1alpha1.SandboxTemplate) string {
54+
data, err := json.Marshal(template.Spec)
55+
if err != nil {
56+
// Spec is always serializable; this should never happen.
57+
return "00000000"
58+
}
59+
sum := sha256.Sum256(data)
60+
return fmt.Sprintf("%x", sum[:])[:8]
61+
}
62+
4663
// SandboxWarmPoolReconciler reconciles a SandboxWarmPool object
4764
type SandboxWarmPoolReconciler struct {
4865
client.Client
@@ -144,6 +161,37 @@ func (r *SandboxWarmPoolReconciler) reconcilePool(ctx context.Context, warmPool
144161
}
145162
}
146163

164+
// Compute the current template content hash to detect spec drift.
165+
currentContentHash := ""
166+
if template, err := r.getTemplate(ctx, warmPool); err == nil {
167+
currentContentHash = templateContentHash(template)
168+
} else if !k8serrors.IsNotFound(err) {
169+
log.Error(err, "Failed to get template for content hash")
170+
allErrors = errors.Join(allErrors, err)
171+
}
172+
173+
// Delete sandboxes whose template content hash doesn't match the
174+
// current template spec (stale from a prior template version).
175+
if currentContentHash != "" {
176+
var freshSandboxes []sandboxv1alpha1.Sandbox
177+
for _, sb := range activeSandboxes {
178+
sbHash := sb.Labels[templateContentHashLabel]
179+
if sbHash != "" && sbHash != currentContentHash {
180+
log.Info("Deleting stale warm pool sandbox (template content hash mismatch)",
181+
"sandbox", sb.Name,
182+
"sandboxHash", sbHash,
183+
"currentHash", currentContentHash)
184+
if err := r.Delete(ctx, &sb); err != nil {
185+
log.Error(err, "Failed to delete stale sandbox", "sandbox", sb.Name)
186+
allErrors = errors.Join(allErrors, err)
187+
}
188+
continue
189+
}
190+
freshSandboxes = append(freshSandboxes, sb)
191+
}
192+
activeSandboxes = freshSandboxes
193+
}
194+
147195
const warmPoolReadinessGracePeriod = 5 * time.Minute
148196

149197
now := time.Now()
@@ -248,10 +296,14 @@ func (r *SandboxWarmPoolReconciler) createPoolSandbox(ctx context.Context, warmP
248296
return err
249297
}
250298

299+
// Compute template content hash for spec-drift detection.
300+
contentHash := templateContentHash(template)
301+
251302
// Build labels for the Sandbox CR
252303
sandboxLabels := map[string]string{
253-
warmPoolSandboxLabel: poolNameHash,
254-
sandboxTemplateRefHash: sandboxcontrollers.NameHash(warmPool.Spec.TemplateRef.Name),
304+
warmPoolSandboxLabel: poolNameHash,
305+
sandboxTemplateRefHash: sandboxcontrollers.NameHash(warmPool.Spec.TemplateRef.Name),
306+
templateContentHashLabel: contentHash,
255307
}
256308

257309
// Copy template pod labels into sandbox pod template
@@ -261,6 +313,7 @@ func (r *SandboxWarmPoolReconciler) createPoolSandbox(ctx context.Context, warmP
261313
}
262314
// Propagate template ref hash to pod template for NetworkPolicy targeting
263315
podLabels[sandboxTemplateRefHash] = sandboxcontrollers.NameHash(warmPool.Spec.TemplateRef.Name)
316+
podLabels[templateContentHashLabel] = contentHash
264317

265318
podAnnotations := make(map[string]string)
266319
for k, v := range template.Spec.PodTemplate.ObjectMeta.Annotations {

0 commit comments

Comments
 (0)