-
Notifications
You must be signed in to change notification settings - Fork 185
fix: ensure warm pool pod-name annotation before Sandbox Ready #469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -397,6 +397,13 @@ func (r *SandboxClaimReconciler) adoptSandboxFromCandidates(ctx context.Context, | |
| if adopted.Annotations == nil { | ||
| adopted.Annotations = make(map[string]string) | ||
| } | ||
| // Ensure the adopted sandbox records its pod name before it can be observed Ready. | ||
| if podName := adopted.Annotations[v1alpha1.SandboxPodNameAnnotation]; podName != adopted.Name { | ||
| if podName != "" { | ||
| log.Info("Correcting adopted sandbox pod-name annotation", "sandbox", adopted.Name, "oldPodName", podName, "newPodName", adopted.Name) | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assigning |
||
| adopted.Annotations[v1alpha1.SandboxPodNameAnnotation] = adopted.Name | ||
| } | ||
| if tc, ok := claim.Annotations[asmetrics.TraceContextAnnotation]; ok { | ||
| adopted.Annotations[asmetrics.TraceContextAnnotation] = tc | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -842,6 +842,7 @@ func TestSandboxClaimSandboxAdoption(t *testing.T) { | |
| existingObjects []client.Object | ||
| expectSandboxAdoption bool | ||
| expectedAdoptedSandbox string | ||
| expectedAnnotations map[string]string | ||
| expectNewSandboxCreated bool | ||
| simulateConflicts int | ||
| }{ | ||
|
|
@@ -927,6 +928,46 @@ func TestSandboxClaimSandboxAdoption(t *testing.T) { | |
| expectedAdoptedSandbox: "not-ready-1", | ||
| expectNewSandboxCreated: false, | ||
| }, | ||
| { | ||
| name: "corrects stale pod-name annotation when adopting sandbox", | ||
| existingObjects: []client.Object{ | ||
| template, | ||
| claim, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By assigning a completely new map to |
||
| func() client.Object { | ||
| sb := createWarmPoolSandbox("pool-sb-1", metav1.Time{Time: metav1.Now().Add(-1 * time.Hour)}, true) | ||
| sb.Annotations = map[string]string{ | ||
| sandboxv1alpha1.SandboxPodNameAnnotation: "stale-pod-name", | ||
| } | ||
| return sb | ||
| }(), | ||
| createWarmPoolSandbox("pool-sb-2", metav1.Time{Time: metav1.Now().Add(-30 * time.Minute)}, true), | ||
| }, | ||
| expectSandboxAdoption: true, | ||
| expectedAdoptedSandbox: "pool-sb-1", | ||
| expectNewSandboxCreated: false, | ||
| }, | ||
| { | ||
| name: "accepts existing correct pod-name annotation when adopting sandbox", | ||
| existingObjects: []client.Object{ | ||
| template, | ||
| claim, | ||
| func() client.Object { | ||
| sb := createWarmPoolSandbox("pool-sb-1", metav1.Time{Time: metav1.Now().Add(-1 * time.Hour)}, true) | ||
| sb.Annotations = map[string]string{ | ||
| sandboxv1alpha1.SandboxPodNameAnnotation: "pool-sb-1", | ||
| "test.annotation/preserved": "true", | ||
| } | ||
| return sb | ||
| }(), | ||
| createWarmPoolSandbox("pool-sb-2", metav1.Time{Time: metav1.Now().Add(-30 * time.Minute)}, true), | ||
| }, | ||
| expectSandboxAdoption: true, | ||
| expectedAdoptedSandbox: "pool-sb-1", | ||
| expectedAnnotations: map[string]string{ | ||
| "test.annotation/preserved": "true", | ||
| }, | ||
| expectNewSandboxCreated: false, | ||
| }, | ||
| { | ||
| name: "retries on conflict when adopting sandbox", | ||
| existingObjects: []client.Object{ | ||
|
|
@@ -1009,6 +1050,17 @@ func TestSandboxClaimSandboxAdoption(t *testing.T) { | |
| t.Errorf("expected adopted sandbox to be controlled by claim, got %v", controllerRef) | ||
| } | ||
|
|
||
| // 4. Verify the adopted sandbox records the adopted pod name | ||
| if val := adoptedSandbox.Annotations[sandboxv1alpha1.SandboxPodNameAnnotation]; val != adoptedSandbox.Name { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||
| t.Errorf("expected adopted sandbox to have %q annotation %q, got %q; annotations=%v", sandboxv1alpha1.SandboxPodNameAnnotation, adoptedSandbox.Name, val, adoptedSandbox.Annotations) | ||
| } | ||
|
|
||
| for key, expected := range tc.expectedAnnotations { | ||
| if val := adoptedSandbox.Annotations[key]; val != expected { | ||
| t.Errorf("expected adopted sandbox to preserve annotation %q=%q, got %q; annotations=%v", key, expected, val, adoptedSandbox.Annotations) | ||
| } | ||
| } | ||
|
|
||
| } else if tc.expectNewSandboxCreated { | ||
| // Verify a new sandbox was created with the claim's name | ||
| var sandbox sandboxv1alpha1.Sandbox | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -15,31 +15,28 @@ | |||
| package extensions | ||||
|
|
||||
| import ( | ||||
| "context" | ||||
| "fmt" | ||||
| "testing" | ||||
| "time" | ||||
|
|
||||
| "github.com/stretchr/testify/require" | ||||
| corev1 "k8s.io/api/core/v1" | ||||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||||
| "k8s.io/apimachinery/pkg/runtime" | ||||
| "k8s.io/apimachinery/pkg/types" | ||||
| "k8s.io/apimachinery/pkg/watch" | ||||
| sandboxv1alpha1 "sigs.k8s.io/agent-sandbox/api/v1alpha1" | ||||
| extensionsv1alpha1 "sigs.k8s.io/agent-sandbox/extensions/api/v1alpha1" | ||||
| "sigs.k8s.io/agent-sandbox/test/e2e/framework" | ||||
| "sigs.k8s.io/controller-runtime/pkg/client" | ||||
| ) | ||||
|
|
||||
| func TestWarmPoolSandboxWatcher(t *testing.T) { | ||||
| tc := framework.NewTestContext(t) | ||||
|
|
||||
| ns := &corev1.Namespace{} | ||||
| ns.Name = fmt.Sprintf("warmpool-watcher-test-%d", time.Now().UnixNano()) | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), ns)) | ||||
|
|
||||
| // Create a SandboxTemplate | ||||
| func newWarmPoolTemplate(namespace string) *extensionsv1alpha1.SandboxTemplate { | ||||
| template := &extensionsv1alpha1.SandboxTemplate{} | ||||
| template.Name = "test-template" | ||||
| template.Namespace = ns.Name | ||||
| template.Namespace = namespace | ||||
| template.Spec.PodTemplate = sandboxv1alpha1.PodTemplate{ | ||||
| Spec: corev1.PodSpec{ | ||||
| Containers: []corev1.Container{ | ||||
|
|
@@ -50,39 +47,128 @@ func TestWarmPoolSandboxWatcher(t *testing.T) { | |||
| }, | ||||
| }, | ||||
| } | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), template)) | ||||
| return template | ||||
| } | ||||
|
|
||||
| // Create a SandboxWarmPool | ||||
| warmPool := &extensionsv1alpha1.SandboxWarmPool{} | ||||
| warmPool.Name = "test-warmpool" | ||||
| warmPool.Namespace = ns.Name | ||||
| warmPool.Spec.TemplateRef.Name = template.Name | ||||
| warmPool.Spec.Replicas = 1 | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), warmPool)) | ||||
| func waitForWarmPoolSandboxReady(t *testing.T, tc *framework.TestContext, namespace string, warmPool *extensionsv1alpha1.SandboxWarmPool) { | ||||
| t.Helper() | ||||
|
|
||||
| // Wait for warm pool Sandbox to become ready | ||||
| var poolSandboxName string | ||||
| require.Eventually(t, func() bool { | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In Go versions prior to 1.22, taking the address of a loop variable ( |
||||
| sandboxList := &sandboxv1alpha1.SandboxList{} | ||||
| if err := tc.List(t.Context(), sandboxList, client.InNamespace(ns.Name)); err != nil { | ||||
| if err := tc.List(t.Context(), sandboxList, client.InNamespace(namespace)); err != nil { | ||||
| return false | ||||
| } | ||||
| for _, sb := range sandboxList.Items { | ||||
| if sb.DeletionTimestamp.IsZero() && metav1.IsControlledBy(&sb, warmPool) { | ||||
| for _, cond := range sb.Status.Conditions { | ||||
| if cond.Type == string(sandboxv1alpha1.SandboxConditionReady) && cond.Status == metav1.ConditionTrue { | ||||
| poolSandboxName = sb.Name | ||||
| return true | ||||
| } | ||||
| } | ||||
| if sb.DeletionTimestamp.IsZero() && metav1.IsControlledBy(&sb, warmPool) && isSandboxReady(&sb) { | ||||
| return true | ||||
| } | ||||
| } | ||||
| return false | ||||
| }, 60*time.Second, 2*time.Second, "warm pool sandbox should become ready") | ||||
| } | ||||
|
|
||||
| // Find the pod name from the pool sandbox | ||||
| poolSandbox := &sandboxv1alpha1.Sandbox{} | ||||
| require.NoError(t, tc.Get(t.Context(), types.NamespacedName{Name: poolSandboxName, Namespace: ns.Name}, poolSandbox)) | ||||
| func waitForClaimReady(t *testing.T, tc *framework.TestContext, claim *extensionsv1alpha1.SandboxClaim) { | ||||
| t.Helper() | ||||
|
|
||||
| require.Eventually(t, func() bool { | ||||
| if err := tc.Get(t.Context(), types.NamespacedName{Name: claim.Name, Namespace: claim.Namespace}, claim); err != nil { | ||||
| return false | ||||
| } | ||||
| return claim.Status.SandboxStatus.Name != "" && isClaimReady(claim) | ||||
| }, 30*time.Second, 1*time.Second, "claim should become ready") | ||||
| } | ||||
|
|
||||
| func isSandboxReady(sb *sandboxv1alpha1.Sandbox) bool { | ||||
| for _, cond := range sb.Status.Conditions { | ||||
| if cond.Type == string(sandboxv1alpha1.SandboxConditionReady) && cond.Status == metav1.ConditionTrue { | ||||
| return true | ||||
| } | ||||
| } | ||||
| return false | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function |
||||
| } | ||||
|
|
||||
| func isClaimReady(claim *extensionsv1alpha1.SandboxClaim) bool { | ||||
| for _, cond := range claim.Status.Conditions { | ||||
| if cond.Type == string(sandboxv1alpha1.SandboxConditionReady) && cond.Status == metav1.ConditionTrue { | ||||
| return true | ||||
| } | ||||
| } | ||||
| return false | ||||
| } | ||||
|
|
||||
| func requirePodNameAnnotationWhenReady( | ||||
| t *testing.T, | ||||
| tc *framework.TestContext, | ||||
| namespace string, | ||||
| claim *extensionsv1alpha1.SandboxClaim, | ||||
| ) { | ||||
| t.Helper() | ||||
|
|
||||
| ctx, cancel := context.WithTimeout(t.Context(), 60*time.Second) | ||||
| defer cancel() | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||||
|
|
||||
| // Use a direct API watch to avoid the async subscription race in framework.Watch | ||||
| sandboxWatcher, err := tc.DynamicClient().Resource( | ||||
| sandboxv1alpha1.GroupVersion.WithResource("sandboxes"), | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The helper function |
||||
| ).Namespace(namespace).Watch(ctx, metav1.ListOptions{}) | ||||
| require.NoError(t, err) | ||||
| defer sandboxWatcher.Stop() | ||||
|
|
||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), claim)) | ||||
|
|
||||
| for { | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The line |
||||
| select { | ||||
| case <-ctx.Done(): | ||||
| t.Fatalf("timed out watching adopted sandbox readiness: %v", ctx.Err()) | ||||
| case event, ok := <-sandboxWatcher.ResultChan(): | ||||
| require.True(t, ok, "sandbox watch closed before observing adopted sandbox readiness") | ||||
| require.NotEqual(t, watch.Error, event.Type, "received error event while watching sandboxes") | ||||
|
|
||||
| if event.Type == watch.Deleted { | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The watch event might occasionally yield a |
||||
| continue | ||||
| } | ||||
|
|
||||
| u, ok := event.Object.(*unstructured.Unstructured) | ||||
| require.True(t, ok, "unexpected sandbox watch event object type: %T", event.Object) | ||||
|
|
||||
| sb := &sandboxv1alpha1.Sandbox{} | ||||
| require.NoError(t, runtime.DefaultUnstructuredConverter.FromUnstructured(u.Object, sb)) | ||||
|
|
||||
| controllerRef := metav1.GetControllerOf(sb) | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||
| if controllerRef == nil || controllerRef.Kind != "SandboxClaim" || controllerRef.Name != claim.Name { | ||||
| continue | ||||
| } | ||||
| if !isSandboxReady(sb) { | ||||
| continue | ||||
| } | ||||
| if sb.Annotations[sandboxv1alpha1.SandboxPodNameAnnotation] == "" { | ||||
| t.Fatalf("observed adopted sandbox %s Ready=True without %s annotation", sb.Name, sandboxv1alpha1.SandboxPodNameAnnotation) | ||||
| } | ||||
| return | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| func TestWarmPoolSandboxWatcher(t *testing.T) { | ||||
| tc := framework.NewTestContext(t) | ||||
|
|
||||
| ns := &corev1.Namespace{} | ||||
| ns.Name = fmt.Sprintf("warmpool-watcher-test-%d", time.Now().UnixNano()) | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), ns)) | ||||
|
|
||||
| template := newWarmPoolTemplate(ns.Name) | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), template)) | ||||
|
|
||||
| // Create a SandboxWarmPool | ||||
| warmPool := &extensionsv1alpha1.SandboxWarmPool{} | ||||
| warmPool.Name = "test-warmpool" | ||||
| warmPool.Namespace = ns.Name | ||||
| warmPool.Spec.TemplateRef.Name = template.Name | ||||
| warmPool.Spec.Replicas = 1 | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), warmPool)) | ||||
|
|
||||
| // Wait for warm pool Sandbox to become ready | ||||
| waitForWarmPoolSandboxReady(t, tc, ns.Name, warmPool) | ||||
|
|
||||
| // Create a SandboxClaim to adopt the warm pool sandbox | ||||
| claim := &extensionsv1alpha1.SandboxClaim{} | ||||
|
|
@@ -92,20 +178,7 @@ func TestWarmPoolSandboxWatcher(t *testing.T) { | |||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), claim)) | ||||
|
|
||||
| // Wait for claim to be ready with sandbox name in status | ||||
| require.Eventually(t, func() bool { | ||||
| if err := tc.Get(t.Context(), types.NamespacedName{Name: claim.Name, Namespace: ns.Name}, claim); err != nil { | ||||
| return false | ||||
| } | ||||
| if claim.Status.SandboxStatus.Name == "" { | ||||
| return false | ||||
| } | ||||
| for _, cond := range claim.Status.Conditions { | ||||
| if cond.Type == string(sandboxv1alpha1.SandboxConditionReady) && cond.Status == metav1.ConditionTrue { | ||||
| return true | ||||
| } | ||||
| } | ||||
| return false | ||||
| }, 30*time.Second, 1*time.Second, "claim should become ready") | ||||
| waitForClaimReady(t, tc, claim) | ||||
|
|
||||
| // Verify the adopted sandbox is now owned by the claim | ||||
| adoptedSandbox := &sandboxv1alpha1.Sandbox{} | ||||
|
|
@@ -117,7 +190,7 @@ func TestWarmPoolSandboxWatcher(t *testing.T) { | |||
|
|
||||
| // Find the pod belonging to the adopted sandbox | ||||
| podName := adoptedSandbox.Name | ||||
| if ann, ok := adoptedSandbox.Annotations["agents.x-k8s.io/pod-name"]; ok && ann != "" { | ||||
| if ann, ok := adoptedSandbox.Annotations[sandboxv1alpha1.SandboxPodNameAnnotation]; ok && ann != "" { | ||||
| podName = ann | ||||
| } | ||||
| adoptedPod := &corev1.Pod{} | ||||
|
|
@@ -138,3 +211,34 @@ func TestWarmPoolSandboxWatcher(t *testing.T) { | |||
| return false | ||||
| }, 15*time.Second, 500*time.Millisecond, "sandbox should become not-ready after pod deletion") | ||||
| } | ||||
|
|
||||
| // TestWarmPoolPodNameAnnotationBeforeReady verifies that a warm-pool sandbox | ||||
| // records its pod-name annotation before adoption can be observed as Ready. | ||||
| func TestWarmPoolPodNameAnnotationBeforeReady(t *testing.T) { | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for this PR. This PR also resolves:
Can we simplify this test? It's long and hard to read. Can you add some comments and add helper methods wherever needed? |
||||
| tc := framework.NewTestContext(t) | ||||
|
|
||||
| ns := &corev1.Namespace{} | ||||
| ns.Name = fmt.Sprintf("warmpool-ready-annotation-test-%d", time.Now().UnixNano()) | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), ns)) | ||||
|
|
||||
| template := newWarmPoolTemplate(ns.Name) | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), template)) | ||||
|
|
||||
| warmPool := &extensionsv1alpha1.SandboxWarmPool{} | ||||
| warmPool.Name = "test-warmpool" | ||||
| warmPool.Namespace = ns.Name | ||||
| warmPool.Spec.TemplateRef.Name = template.Name | ||||
| warmPool.Spec.Replicas = 1 | ||||
| require.NoError(t, tc.CreateWithCleanup(t.Context(), warmPool)) | ||||
|
|
||||
| // Start from a Ready warm-pool Sandbox so the claim reconcile path must adopt it | ||||
| waitForWarmPoolSandboxReady(t, tc, ns.Name, warmPool) | ||||
|
|
||||
| claim := &extensionsv1alpha1.SandboxClaim{} | ||||
| claim.Name = "test-claim" | ||||
| claim.Namespace = ns.Name | ||||
| claim.Spec.TemplateRef.Name = template.Name | ||||
|
|
||||
| // Creating the claim should not observe Ready before the pod-name annotation is set | ||||
| requirePodNameAnnotationWhenReady(t, tc, ns.Name, claim) | ||||
| } | ||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It appears
log.Infois being used here directly. Iflogis a package-level or globally scoped logger, it is recommended to retrieve the context-aware logger usinglogger := log.FromContext(ctx). This ensures trace IDs and other contextual request data are injected into the logs.