Skip to content
This repository was archived by the owner on Oct 6, 2025. It is now read-only.

Commit 94f5b6f

Browse files
committed
Work around TOCTTOU install check race
We'll use creation of a container with the model runner container name as a semaphore of sorts to avoid failing on conflicting installs. Signed-off-by: Jacob Howard <[email protected]>
1 parent 3e384bc commit 94f5b6f

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

pkg/standalone/containers.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@ package standalone
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
7+
"regexp"
68
"strconv"
79
"strings"
10+
"time"
811

912
"github.com/docker/docker/api/types/container"
1013
"github.com/docker/docker/api/types/filters"
@@ -18,6 +21,11 @@ import (
1821
// controllerContainerName is the name to use for the controller container.
1922
const controllerContainerName = "docker-model-runner"
2023

24+
// concurrentInstallMatcher matches error message that indicate a concurrent
25+
// standalone model runner installation is taking place. It extracts the ID of
26+
// the conflicting container in a capture group.
27+
var concurrentInstallMatcher = regexp.MustCompile(`is already in use by container "([a-z0-9]+)"`)
28+
2129
// FindControllerContainer searches for a running controller container. It
2230
// returns the ID of the container (if found), the container name (if any), the
2331
// full container summary (if found), or any error that occurred.
@@ -65,6 +73,28 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient *client.Client)
6573
return "", nil
6674
}
6775

76+
// waitForContainerToStart waits for a container to start.
77+
func waitForContainerToStart(ctx context.Context, dockerClient *client.Client, containerID string) error {
78+
// Unfortunately the Docker API's /containers/{id}/wait API (and the
79+
// corresponding Client.ContainerWait method) don't allow waiting for
80+
// container startup, so instead we'll take a polling approach.
81+
for i := 5; i > 0; i-- {
82+
if status, err := dockerClient.ContainerInspect(ctx, containerID); err != nil {
83+
return fmt.Errorf("unable to inspect container (%s): %w", containerID[:12], err)
84+
} else if status.State.Status == "running" {
85+
return nil
86+
}
87+
if i > 1 {
88+
select {
89+
case <-time.After(1 * time.Second):
90+
case <-ctx.Done():
91+
return errors.New("waiting cancelled")
92+
}
93+
}
94+
}
95+
return errors.New("timed out")
96+
}
97+
6898
// CreateControllerContainer creates and starts a controller container.
6999
func CreateControllerContainer(ctx context.Context, dockerClient *client.Client, port uint16, doNotTrack bool, gpu gpupkg.GPUSupport, modelStorageVolume string, printer StatusPrinter) error {
70100
// Determine the target image.
@@ -117,9 +147,17 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client,
117147
hostConfig.DeviceRequests = []container.DeviceRequest{{Count: -1, Capabilities: [][]string{{"gpu"}}}}
118148
}
119149

120-
// Create the container.
150+
// Create the container. If we detect that a concurrent installation is in
151+
// progress, then we wait for whichever install process creates the
152+
// container first and then wait for its container to be ready.
121153
resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, controllerContainerName)
122154
if err != nil {
155+
if match := concurrentInstallMatcher.FindStringSubmatch(err.Error()); match != nil {
156+
if err := waitForContainerToStart(ctx, dockerClient, match[1]); err != nil {
157+
return fmt.Errorf("failed waiting for concurrent installation: %w", err)
158+
}
159+
return nil
160+
}
123161
return fmt.Errorf("failed to create container %s: %w", controllerContainerName, err)
124162
}
125163

0 commit comments

Comments
 (0)