Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions ai/worker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,26 @@ tickerLoop:
return err
}

// If the container is running, we're done.
if json.State.Running {
break tickerLoop
}

// Fail fast on states that won't become running after startup.
if json.State != nil {
status := strings.ToLower(json.State.Status)
// Consider exited/dead as terminal. "removing" will surface via
// inspect error or transition to exited/dead shortly.
if status == "exited" || status == "dead" {
return fmt.Errorf("container entered terminal state before running: %s (exitCode=%d)", json.State.Status, json.State.ExitCode)
}
if !json.State.Restarting && json.State.ExitCode != 0 {
return fmt.Errorf("container exited before running (status=%s, exitCode=%d)", json.State.Status, json.State.ExitCode)
}
if !json.State.Restarting && json.State.Error != "" {
return fmt.Errorf("container error before running: %s", json.State.Error)
}
}
}
}

Expand Down
57 changes: 57 additions & 0 deletions ai/worker/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,63 @@ func TestDockerWaitUntilRunning(t *testing.T) {
require.Contains(t, err.Error(), "timed out waiting for managed container")
mockDockerClient.AssertExpectations(t)
})

t.Run("FailFastOnExited", func(t *testing.T) {
// If the container is immediately exited, we should fail fast instead of waiting.
mockDockerClient := new(MockDockerClient)
// Always return non-running, exited state
mockDockerClient.On("ContainerInspect", mock.Anything, containerID).Return(types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
State: &types.ContainerState{
Status: "exited",
Running: false,
ExitCode: 137,
},
},
}, nil)

err := dockerWaitUntilRunning(ctx, mockDockerClient, containerID, pollingInterval)
require.Error(t, err)
require.Contains(t, err.Error(), "terminal state")
mockDockerClient.AssertExpectations(t)
})

t.Run("FailFastOnDead", func(t *testing.T) {
mockDockerClient := new(MockDockerClient)
mockDockerClient.On("ContainerInspect", mock.Anything, containerID).Return(types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
State: &types.ContainerState{
Status: "dead",
Running: false,
Error: "killed",
},
},
}, nil)

err := dockerWaitUntilRunning(ctx, mockDockerClient, containerID, pollingInterval)
require.Error(t, err)
require.Contains(t, err.Error(), "container entered terminal state")
mockDockerClient.AssertExpectations(t)
})

t.Run("FailFastOnExitCodeNonZeroWithoutRestarting", func(t *testing.T) {
mockDockerClient := new(MockDockerClient)
mockDockerClient.On("ContainerInspect", mock.Anything, containerID).Return(types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
State: &types.ContainerState{
Status: "created",
Running: false,
Restarting: false,
ExitCode: 1,
},
},
}, nil)

err := dockerWaitUntilRunning(ctx, mockDockerClient, containerID, pollingInterval)
require.Error(t, err)
require.Contains(t, err.Error(), "exited before running")
mockDockerClient.AssertExpectations(t)
})
}

func TestHwGPU(t *testing.T) {
Expand Down
Loading