Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions pkg/workloadmanager/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
response, err := s.createSandbox(c.Request.Context(), dynamicClient, sandbox, sandboxClaim, sandboxEntry, resultChan)
if err != nil {
klog.Errorf("create sandbox failed %s/%s: %v", sandbox.Namespace, sandbox.Name, err)
respondError(c, http.StatusInternalServerError, "internal server error")
respondError(c, http.StatusInternalServerError, err.Error())
return
}

Expand All @@ -141,7 +141,7 @@
}

// createSandbox performs sandbox creation and returns the response payload or an error with an HTTP status code.
func (s *Server) createSandbox(ctx context.Context, dynamicClient dynamic.Interface, sandbox *sandboxv1alpha1.Sandbox, sandboxClaim *extensionsv1alpha1.SandboxClaim, sandboxEntry *sandboxEntry, resultChan <-chan SandboxStatusUpdate) (*types.CreateSandboxResponse, error) {

Check failure on line 144 in pkg/workloadmanager/handlers.go

View workflow job for this annotation

GitHub Actions / golangci-lint

cyclomatic complexity 17 of func `(*Server).createSandbox` is high (> 15) (gocyclo)
// Store placeholder before creating, make sandbox/sandboxClaim GarbageCollection possible
sandboxStorePlaceHolder := buildSandboxPlaceHolder(sandbox, sandboxEntry)
if err := s.storeClient.StoreSandbox(ctx, sandboxStorePlaceHolder); err != nil {
Expand Down Expand Up @@ -197,12 +197,25 @@
sandboxRollbackFunc()
}()

// Use NewTimer so we can stop it explicitly when another branch wins,
// preventing the runtime from retaining the timer until it fires.
timer := time.NewTimer(2 * time.Minute) // consistent with router settings

var createdSandbox *sandboxv1alpha1.Sandbox
select {
case result := <-resultChan:
timer.Stop()
if result.Err != nil {
klog.Warningf("sandbox %s/%s failed: %v", sandbox.Namespace, sandbox.Name, result.Err)
return nil, result.Err
}
Comment on lines +219 to +222
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The error returned here contains the descriptive failure reason (e.g., "sandbox failed: ErrImagePull"). However, the caller in handleSandboxCreate (line 136) is currently hardcoded to return a generic "internal server error" to the client. To fulfill the PR's objective of providing descriptive errors to the user, you should update the caller to use err.Error() instead of a static string.

createdSandbox = result.Sandbox
klog.V(2).Infof("sandbox %s/%s running", createdSandbox.Namespace, createdSandbox.Name)
case <-time.After(2 * time.Minute): // consistent with router settings
case <-ctx.Done():
timer.Stop()
klog.Warningf("sandbox %s/%s wait canceled: %v", sandbox.Namespace, sandbox.Name, ctx.Err())
return nil, fmt.Errorf("sandbox creation canceled: %w", ctx.Err())
case <-timer.C:
klog.Warningf("sandbox %s/%s create timed out", sandbox.Namespace, sandbox.Name)
return nil, fmt.Errorf("sandbox creation timed out")
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/workloadmanager/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func TestHandleSandboxCreate(t *testing.T) {
body: `{"name":"workload","namespace":"ns"}`,
createErr: errors.New("create failed"),
expectStatus: http.StatusInternalServerError,
expectMessage: "internal server error",
expectMessage: "create failed",
expectCreateCalls: 1,
},
{
Expand Down
64 changes: 41 additions & 23 deletions pkg/workloadmanager/sandbox_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package workloadmanager

import (
"context"
"fmt"
"sync"

"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -40,6 +41,7 @@ type SandboxReconciler struct {

type SandboxStatusUpdate struct {
Sandbox *sandboxv1alpha1.Sandbox
Err error
}

func (r *SandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
Expand All @@ -48,30 +50,46 @@ func (r *SandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, client.IgnoreNotFound(err)
}

status := getSandboxStatus(sandbox)

// Check for pending requests with proper locking
if status == "running" {
klog.V(2).Infof("Sandbox %s/%s is running, sending notification", sandbox.Namespace, sandbox.Name)
r.mu.Lock()
resultChan, exists := r.watchers[req.NamespacedName]
if exists {
klog.V(2).Infof("Found %d pending requests for sandbox %s/%s", len(r.watchers), sandbox.Namespace, sandbox.Name)
// Remove from map before sending to avoid memory leak
delete(r.watchers, req.NamespacedName)
} else {
klog.V(2).Infof("No pending requests found for sandbox %s/%s", sandbox.Namespace, sandbox.Name)
status, failMsg := getSandboxStatus(sandbox)

// Only notify the waiter on a terminal state (running or failed).
// "unknown" means the sandbox is still being scheduled/started; stay quiet.
var (
update SandboxStatusUpdate
hasWork bool
)
switch status {
case "running":
klog.V(2).Infof("Sandbox %s/%s is running, notifying waiter", sandbox.Namespace, sandbox.Name)
update = SandboxStatusUpdate{Sandbox: sandbox}
hasWork = true
case "failed":
klog.Warningf("Sandbox %s/%s entered a terminal failure state: %s", sandbox.Namespace, sandbox.Name, failMsg)
update = SandboxStatusUpdate{
Sandbox: sandbox,
Err: fmt.Errorf("sandbox %s/%s failed: %s", sandbox.Namespace, sandbox.Name, failMsg),
}
r.mu.Unlock()

if exists {
// Send notification outside the lock to avoid deadlock
select {
case resultChan <- SandboxStatusUpdate{Sandbox: sandbox}:
klog.V(2).Infof("Notified waiter about sandbox %s/%s reaching Running state", sandbox.Namespace, sandbox.Name)
default:
klog.Warningf("Failed to notify watcher for sandbox %s/%s: channel buffer full or not receiving", sandbox.Namespace, sandbox.Name)
}
hasWork = true
default:
return ctrl.Result{}, nil
}

r.mu.Lock()
resultChan, exists := r.watchers[req.NamespacedName]
if exists {
delete(r.watchers, req.NamespacedName)
}
r.mu.Unlock()

if exists && hasWork {
// Send notification outside the lock to avoid deadlock.
// The channel has buffer 1 so this never blocks when the map entry
// is deleted before the send (only one sender per key is possible).
select {
case resultChan <- update:
klog.V(2).Infof("Notified waiter about sandbox %s/%s (status: %s)", sandbox.Namespace, sandbox.Name, status)
default:
klog.Warningf("Could not notify waiter for sandbox %s/%s: channel unexpectedly full", sandbox.Namespace, sandbox.Name)
}
}

Expand Down
32 changes: 25 additions & 7 deletions pkg/workloadmanager/sandbox_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,35 @@ func buildSandboxInfo(sandbox *sandboxv1alpha1.Sandbox, podIP string, entry *san
SessionID: entry.SessionID,
CreatedAt: createdAt,
ExpiresAt: expiresAt,
Status: getSandboxStatus(sandbox),
Status: sandboxStatusString(sandbox),
}
}

// getSandboxStatus extracts status from Sandbox CRD conditions
func getSandboxStatus(sandbox *sandboxv1alpha1.Sandbox) string {
// Check conditions for Ready status
// sandboxStatusString returns only the status string (discards the failure message).
// Use this where only the status label is needed (e.g. store metadata).
func sandboxStatusString(sandbox *sandboxv1alpha1.Sandbox) string {
s, _ := getSandboxStatus(sandbox)
return s
}

// getSandboxStatus extracts status from Sandbox CRD conditions.
// Returns "running", "failed", or "unknown".
func getSandboxStatus(sandbox *sandboxv1alpha1.Sandbox) (string, string) {
for _, condition := range sandbox.Status.Conditions {
if condition.Type == string(sandboxv1alpha1.SandboxConditionReady) && condition.Status == metav1.ConditionTrue {
return "running"
if condition.Type != string(sandboxv1alpha1.SandboxConditionReady) {
continue
}
if condition.Status == metav1.ConditionTrue {
return "running", ""
}
// ConditionFalse with a reason indicates a terminal failure, not transient pending.
if condition.Status == metav1.ConditionFalse && condition.Reason != "" {
msg := condition.Message
if msg == "" {
msg = condition.Reason
}
return "failed", msg
}
}
return "unknown"
return "unknown", ""
}
66 changes: 54 additions & 12 deletions pkg/workloadmanager/sandbox_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,10 @@ func TestBuildSandboxInfo_TableDriven(t *testing.T) {

func TestGetSandboxStatus_TableDriven(t *testing.T) {
tests := []struct {
name string
sandbox *sandboxv1alpha1.Sandbox
expected string
name string
sandbox *sandboxv1alpha1.Sandbox
expected string
expectedMsg string
}{
{
name: "ready condition true",
Expand All @@ -223,10 +224,11 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
},
},
},
expected: "running",
expected: "running",
expectedMsg: "",
},
{
name: "ready condition false",
name: "ready condition false without reason",
sandbox: &sandboxv1alpha1.Sandbox{
Status: sandboxv1alpha1.SandboxStatus{
Conditions: []metav1.Condition{
Expand All @@ -237,7 +239,41 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
},
},
},
expected: "unknown",
expected: "unknown",
expectedMsg: "",
},
{
name: "ready condition false with reason indicates terminal failure",
sandbox: &sandboxv1alpha1.Sandbox{
Status: sandboxv1alpha1.SandboxStatus{
Conditions: []metav1.Condition{
{
Type: string(sandboxv1alpha1.SandboxConditionReady),
Status: metav1.ConditionFalse,
Reason: "ErrImagePull",
Message: "Back-off pulling image",
},
},
},
},
expected: "failed",
expectedMsg: "Back-off pulling image",
},
{
name: "ready condition false with reason but no message falls back to reason",
sandbox: &sandboxv1alpha1.Sandbox{
Status: sandboxv1alpha1.SandboxStatus{
Conditions: []metav1.Condition{
{
Type: string(sandboxv1alpha1.SandboxConditionReady),
Status: metav1.ConditionFalse,
Reason: "OOMKilled",
},
},
},
},
expected: "failed",
expectedMsg: "OOMKilled",
},
{
name: "ready condition unknown",
Expand All @@ -251,7 +287,8 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
},
},
},
expected: "unknown",
expected: "unknown",
expectedMsg: "",
},
{
name: "no conditions",
Expand All @@ -260,7 +297,8 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
Conditions: []metav1.Condition{},
},
},
expected: "unknown",
expected: "unknown",
expectedMsg: "",
},
{
name: "nil conditions",
Expand All @@ -269,7 +307,8 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
Conditions: nil,
},
},
expected: "unknown",
expected: "unknown",
expectedMsg: "",
},
{
name: "other condition type",
Expand All @@ -283,7 +322,8 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
},
},
},
expected: "unknown",
expected: "unknown",
expectedMsg: "",
},
{
name: "multiple conditions with ready true",
Expand All @@ -301,14 +341,16 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) {
},
},
},
expected: "running",
expected: "running",
expectedMsg: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := getSandboxStatus(tt.sandbox)
result, msg := getSandboxStatus(tt.sandbox)
assert.Equal(t, tt.expected, result)
assert.Equal(t, tt.expectedMsg, msg)
})
}
}
Loading