docker · p1-0tr · Aug 22, 2025 · Aug 20, 2025
diff --git a/commands/compose.go b/commands/compose.go
@@ -4,15 +4,16 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"github.com/docker/model-cli/pkg/types"
-	"github.com/spf13/pflag"
 	"slices"
 	"strings"
 
+	"github.com/docker/model-cli/pkg/types"
+	"github.com/spf13/pflag"
+
 	"github.com/docker/model-cli/desktop"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
-	"github.com/docker/model-runner/pkg/inference/scheduling"
 	dmrm "github.com/docker/model-runner/pkg/inference/models"
+	"github.com/docker/model-runner/pkg/inference/scheduling"
 	"github.com/spf13/cobra"
 )
 
@@ -155,7 +156,7 @@ func downloadModelsOnlyIfNotFound(desktopClient *desktop.Client, models []string
 			}
 			return false
 		}) {
-			_, _, err = desktopClient.Pull(model, func(s string) {
+			_, _, err = desktopClient.Pull(model, false, func(s string) {
 				_ = sendInfo(s)
 			})
 			if err != nil {

diff --git a/commands/pull.go b/commands/pull.go
@@ -11,13 +11,15 @@ import (
 )
 
 func newPullCmd() *cobra.Command {
+	var ignoreRuntimeMemoryCheck bool
+
 	c := &cobra.Command{
 		Use:   "pull MODEL",
 		Short: "Pull a model from Docker Hub or HuggingFace to your local environment",
 		Args: func(cmd *cobra.Command, args []string) error {
 			if len(args) != 1 {
 				return fmt.Errorf(
-					"'docker model run' requires 1 argument.\n\n" +
+					"'docker model pull' requires 1 argument.\n\n" +
 						"Usage:  docker model pull MODEL\n\n" +
 						"See 'docker model pull --help' for more information",
 				)
@@ -28,21 +30,24 @@ func newPullCmd() *cobra.Command {
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), cmd); err != nil {
 				return fmt.Errorf("unable to initialize standalone model runner: %w", err)
 			}
-			return pullModel(cmd, desktopClient, args[0])
+			return pullModel(cmd, desktopClient, args[0], ignoreRuntimeMemoryCheck)
 		},
 		ValidArgsFunction: completion.NoComplete,
 	}
+
+	c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.")
+
 	return c
 }
 
-func pullModel(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
+func pullModel(cmd *cobra.Command, desktopClient *desktop.Client, model string, ignoreRuntimeMemoryCheck bool) error {
 	var progress func(string)
 	if isatty.IsTerminal(os.Stdout.Fd()) {
 		progress = TUIProgress
 	} else {
 		progress = RawProgress
 	}
-	response, progressShown, err := desktopClient.Pull(model, progress)
+	response, progressShown, err := desktopClient.Pull(model, ignoreRuntimeMemoryCheck, progress)
 
 	// Add a newline before any output (success or error) if progress was shown.
 	if progressShown {

diff --git a/commands/run.go b/commands/run.go
@@ -80,6 +80,7 @@ func readMultilineInput(cmd *cobra.Command, scanner *bufio.Scanner) (string, err
 func newRunCmd() *cobra.Command {
 	var debug bool
 	var backend string
+	var ignoreRuntimeMemoryCheck bool
 
 	const cmdArgs = "MODEL [PROMPT]"
 	c := &cobra.Command{
@@ -124,7 +125,7 @@ func newRunCmd() *cobra.Command {
 						return handleNotRunningError(handleClientError(err, "Failed to inspect model"))
 					}
 					cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.")
-					if err := pullModel(cmd, desktopClient, model); err != nil {
+					if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil {
 						return err
 					}
 				}
@@ -188,6 +189,7 @@ func newRunCmd() *cobra.Command {
 	c.Flags().BoolVar(&debug, "debug", false, "Enable debug logging")
 	c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys()))
 	c.Flags().MarkHidden("backend")
+	c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.")
 
 	return c
 }
diff --git a/desktop/desktop.go b/desktop/desktop.go
@@ -106,9 +106,9 @@ func (c *Client) Status() Status {
 	}
 }
 
-func (c *Client) Pull(model string, progress func(string)) (string, bool, error) {
+func (c *Client) Pull(model string, ignoreRuntimeMemoryCheck bool, progress func(string)) (string, bool, error) {
 	model = normalizeHuggingFaceModelName(model)
-	jsonData, err := json.Marshal(dmrm.ModelCreateRequest{From: model})
+	jsonData, err := json.Marshal(dmrm.ModelCreateRequest{From: model, IgnoreRuntimeMemoryCheck: ignoreRuntimeMemoryCheck})
 	if err != nil {
 		return "", false, fmt.Errorf("error marshaling request: %w", err)
 	}

diff --git a/desktop/desktop_test.go b/desktop/desktop_test.go
@@ -36,7 +36,7 @@ func TestPullHuggingFaceModel(t *testing.T) {
 		Body:       io.NopCloser(bytes.NewBufferString(`{"type":"success","message":"Model pulled successfully"}`)),
 	}, nil)
 
-	_, _, err := client.Pull(modelName, func(s string) {})
+	_, _, err := client.Pull(modelName, false, func(s string) {})
 	assert.NoError(t, err)
 }
 
@@ -122,7 +122,7 @@ func TestNonHuggingFaceModel(t *testing.T) {
 		Body:       io.NopCloser(bytes.NewBufferString(`{"type":"success","message":"Model pulled successfully"}`)),
 	}, nil)
 
-	_, _, err := client.Pull(modelName, func(s string) {})
+	_, _, err := client.Pull(modelName, false, func(s string) {})
 	assert.NoError(t, err)
 }
 

diff --git a/docs/reference/docker_model_pull.yaml b/docs/reference/docker_model_pull.yaml
@@ -5,6 +5,18 @@ long: |
 usage: docker model pull MODEL
 pname: docker model
 plink: docker_model.yaml
+options:
+    - option: ignore-runtime-memory-check
+      value_type: bool
+      default_value: "false"
+      description: |
+        Do not block pull if estimated runtime memory for model exceeds system resources.
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
 examples: |-
     ### Pulling a model from Docker Hub
 

diff --git a/docs/reference/docker_model_run.yaml b/docs/reference/docker_model_run.yaml
@@ -29,6 +29,17 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
+    - option: ignore-runtime-memory-check
+      value_type: bool
+      default_value: "false"
+      description: |
+        Do not block pull if estimated runtime memory for model exceeds system resources.
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
 examples: |-
     ### One-time prompt
 

diff --git a/docs/reference/model_pull.md b/docs/reference/model_pull.md
@@ -3,6 +3,12 @@
 <!---MARKER_GEN_START-->
 Pull a model from Docker Hub or HuggingFace to your local environment
 
+### Options
+
+| Name                            | Type   | Default | Description                                                                       |
+|:--------------------------------|:-------|:--------|:----------------------------------------------------------------------------------|
+| `--ignore-runtime-memory-check` | `bool` |         | Do not block pull if estimated runtime memory for model exceeds system resources. |
+
 
 <!---MARKER_GEN_END-->
 

diff --git a/docs/reference/model_run.md b/docs/reference/model_run.md
@@ -5,9 +5,10 @@ Run a model and interact with it using a submitted prompt or chat mode
 
 ### Options
 
-| Name      | Type   | Default | Description          |
-|:----------|:-------|:--------|:---------------------|
-| `--debug` | `bool` |         | Enable debug logging |
+| Name                            | Type   | Default | Description                                                                       |
+|:--------------------------------|:-------|:--------|:----------------------------------------------------------------------------------|
+| `--debug`                       | `bool` |         | Enable debug logging                                                              |
+| `--ignore-runtime-memory-check` | `bool` |         | Do not block pull if estimated runtime memory for model exceeds system resources. |
 
 
 <!---MARKER_GEN_END-->

diff --git a/go.mod b/go.mod
@@ -11,8 +11,8 @@ require (
 	github.com/docker/docker v28.2.2+incompatible
 	github.com/docker/go-connections v0.5.0
 	github.com/docker/go-units v0.5.0
-	github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c
-	github.com/docker/model-runner v0.0.0-20250724122432-ecfa5e7e6807
+	github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8
+	github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584
 	github.com/fatih/color v1.15.0
 	github.com/google/go-containerregistry v0.20.6
 	github.com/mattn/go-isatty v0.0.20

diff --git a/go.sum b/go.sum
@@ -78,10 +78,10 @@ github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHz
 github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
-github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c h1:w9MekYamXmWLe9ZWXWgNXJ7BLDDemXwB8WcF7wzHF5Q=
-github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
-github.com/docker/model-runner v0.0.0-20250724122432-ecfa5e7e6807 h1:02vImD8wqUDv6VJ2cBLbqzbjn17IMYEi4ileCEjXMQ8=
-github.com/docker/model-runner v0.0.0-20250724122432-ecfa5e7e6807/go.mod h1:rCzRjRXJ42E8JVIA69E9hErJVV5mnUpWdJ2POsktfRs=
+github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8 h1:agH5zeO6tf8lHgMcBZxqCFKPuXHM/cA53gdsn895eMI=
+github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
+github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584 h1:8YAzh9lihwcFGyHTK9pTFqdM7IwYwb0R/YkrNxmQ2do=
+github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584/go.mod h1:0IAh5ekLg8ipcPAF+Rdav1wbt9xF+zQPoRC1bblk/ik=
 github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae/go.mod h1:7BvyPhdbLxMXIYTFPLsyJRFMsKmOZnQmzh6Gb+uquuM=
 github.com/elastic/go-sysinfo v1.15.3 h1:W+RnmhKFkqPTCRoFq2VCTmsT4p/fwpo+3gKNQsn1XU0=
 github.com/elastic/go-sysinfo v1.15.3/go.mod h1:K/cNrqYTDrSoMh2oDkYEMS2+a72GRxMvNP+GC+vRIlo=

diff --git a/vendor/github.com/docker/model-distribution/distribution/client.go b/vendor/github.com/docker/model-distribution/distribution/client.go
diff --git a/vendor/github.com/docker/model-distribution/registry/client.go b/vendor/github.com/docker/model-distribution/registry/client.go
diff --git a/vendor/github.com/docker/model-runner/pkg/inference/backend.go b/vendor/github.com/docker/model-runner/pkg/inference/backend.go