This repository was archived by the owner on Oct 6, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 16
Clean up standalone container startup using Moby idioms #107
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
d119b50
Use correct containerd error detection.
xenoscopic 5900e78
simplify container start logic, taking advantage of idempotency
thaJeztah 97a5347
Clean up a few bits post-rebase and don't return on nil error.
xenoscopic 5871b6f
Allow EOF errors to pass silently on ContainerStart.
xenoscopic 40ba102
Rename waitForContainerToStart to ensureContainerStarted
xenoscopic File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,12 +6,13 @@ import ( | |
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "io" | ||
| "os" | ||
| "regexp" | ||
| "strconv" | ||
| "strings" | ||
| "time" | ||
|
|
||
| "github.com/containerd/errdefs" | ||
| "github.com/docker/docker/api/types/container" | ||
| "github.com/docker/docker/api/types/filters" | ||
| "github.com/docker/docker/api/types/mount" | ||
|
|
@@ -25,11 +26,6 @@ import ( | |
| // controllerContainerName is the name to use for the controller container. | ||
| const controllerContainerName = "docker-model-runner" | ||
|
|
||
| // concurrentInstallMatcher matches error message that indicate a concurrent | ||
| // standalone model runner installation is taking place. It extracts the ID of | ||
| // the conflicting container in a capture group. | ||
| var concurrentInstallMatcher = regexp.MustCompile(`is already in use by container "([a-z0-9]+)"`) | ||
|
|
||
| // copyDockerConfigToContainer copies the Docker config file from the host to the container | ||
| // and sets up proper ownership and permissions for the modelrunner user. | ||
| // It does nothing for Desktop and Cloud engine kinds. | ||
|
|
@@ -177,38 +173,33 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient client.NetworkAP | |
| return "", nil | ||
| } | ||
|
|
||
| // waitForContainerToStart waits for a container to start. | ||
| func waitForContainerToStart(ctx context.Context, dockerClient client.ContainerAPIClient, containerID string) error { | ||
| // Unfortunately the Docker API's /containers/{id}/wait API (and the | ||
| // corresponding Client.ContainerWait method) don't allow waiting for | ||
| // container startup, so instead we'll take a polling approach. | ||
| // ensureContainerStarted ensures that a container has started. It may be called | ||
| // concurrently, taking advantage of the fact that ContainerStart is idempotent. | ||
| func ensureContainerStarted(ctx context.Context, dockerClient client.ContainerAPIClient, containerID string) error { | ||
| for i := 10; i > 0; i-- { | ||
| if status, err := dockerClient.ContainerInspect(ctx, containerID); err != nil { | ||
| // There is a small gap between the time that a container ID and | ||
| // name are registered and the time that the container is actually | ||
| // created and shows up in container list and inspect requests: | ||
| // | ||
| // https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156 | ||
| // | ||
| // Given that multiple install operations tend to end up tightly | ||
| // synchronized by the preceeding pull operation and that this | ||
| // method is specifically designed to work around these race | ||
| // conditions, we'll allow 404 errors to pass silently (at least up | ||
| // until the polling time out - unfortunately we can't make the 404 | ||
| // acceptance window any smaller than that because the CUDA-based | ||
| // containers are large and can take time to create). | ||
| if !strings.Contains(err.Error(), "No such container") { | ||
| return fmt.Errorf("unable to inspect container (%s): %w", containerID[:12], err) | ||
| } | ||
| } else { | ||
| switch status.State.Status { | ||
| case container.StateRunning: | ||
| return nil | ||
| case container.StateCreated, container.StateRestarting: | ||
| // wait for container to start | ||
| default: | ||
| return fmt.Errorf("container is in unexpected state %q", status.State.Status) | ||
| } | ||
| err := dockerClient.ContainerStart(ctx, containerID, container.StartOptions{}) | ||
| if err == nil { | ||
| return nil | ||
| } | ||
| // There is a small gap between the time that a container ID and | ||
| // name are registered and the time that the container is actually | ||
| // created and shows up in container list and inspect requests: | ||
| // | ||
| // https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156 | ||
| // | ||
| // Given that multiple install operations tend to end up tightly | ||
| // synchronized by the preceeding pull operation and that this | ||
| // method is specifically designed to work around these race | ||
| // conditions, we'll allow 404 errors to pass silently (at least up | ||
| // until the polling time out - unfortunately we can't make the 404 | ||
| // acceptance window any smaller than that because the CUDA-based | ||
| // containers are large and can take time to create). | ||
| // | ||
| // For some reason, this error case can also manifest as an EOF on the | ||
| // request (I'm not sure where this arises in the Moby server), so we'll | ||
| // let that pass silently too. | ||
| if !(errdefs.IsNotFound(err) || errors.Is(err, io.EOF)) { | ||
| return err | ||
| } | ||
| if i > 1 { | ||
| select { | ||
|
|
@@ -280,30 +271,31 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client, | |
| } | ||
|
|
||
| // Create the container. If we detect that a concurrent installation is in | ||
| // progress, then we wait for whichever install process creates the | ||
| // container first and then wait for its container to be ready. | ||
| // progress (as indicated by a conflicting container name (which should have | ||
| // been detected just before installation)), then we'll allow the error to | ||
| // pass silently and simply work in conjunction with any concurrent | ||
| // installers to start the container. | ||
| resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, controllerContainerName) | ||
| if err != nil { | ||
| if match := concurrentInstallMatcher.FindStringSubmatch(err.Error()); match != nil { | ||
| if err := waitForContainerToStart(ctx, dockerClient, match[1]); err != nil { | ||
| return fmt.Errorf("failed waiting for concurrent installation: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
| if err != nil && !errdefs.IsConflict(err) { | ||
| return fmt.Errorf("failed to create container %s: %w", controllerContainerName, err) | ||
| } | ||
| created := err == nil | ||
|
|
||
| // Start the container. | ||
| printer.Printf("Starting model runner container %s...\n", controllerContainerName) | ||
| if err := dockerClient.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil { | ||
| _ = dockerClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) | ||
| if err := ensureContainerStarted(ctx, dockerClient, controllerContainerName); err != nil { | ||
| if created { | ||
| _ = dockerClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a blocker; mostly thinking out loud if there would be cases where the remove would fail, and cause issues later (so if it would make sense to print a warning and continue). |
||
| } | ||
| return fmt.Errorf("failed to start container %s: %w", controllerContainerName, err) | ||
| } | ||
|
|
||
| // Copy Docker config file if it exists | ||
| if err := copyDockerConfigToContainer(ctx, dockerClient, resp.ID, engineKind); err != nil { | ||
| // Log warning but continue - don't fail container creation | ||
| printer.Printf("Warning: failed to copy Docker config: %v\n", err) | ||
| // Copy Docker config file if it exists and we're the container creator. | ||
| if created { | ||
| if err := copyDockerConfigToContainer(ctx, dockerClient, resp.ID, engineKind); err != nil { | ||
| // Log warning but continue - don't fail container creation | ||
| printer.Printf("Warning: failed to copy Docker config: %v\n", err) | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still interested in this situation; something we should look into to see what's causing this (as it seems unexpected).