docker · ericcurtin · Oct 15, 2025 · Oct 15, 2025 · doringeman · Oct 15, 2025
diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
@@ -307,6 +307,7 @@ func newRunCmd() *cobra.Command {
 	var backend string
 	var ignoreRuntimeMemoryCheck bool
 	var colorMode string
+	var detach bool
 
 	const cmdArgs = "MODEL [PROMPT]"
 	c := &cobra.Command{
@@ -341,17 +342,20 @@ func newRunCmd() *cobra.Command {
 				prompt = strings.Join(args[1:], " ")
 			}
 
-			fi, err := os.Stdin.Stat()
-			if err == nil && (fi.Mode()&os.ModeCharDevice) == 0 {
-				// Read all from stdin
-				reader := bufio.NewReader(os.Stdin)
-				input, err := io.ReadAll(reader)
-				if err == nil {
-					if prompt != "" {
-						prompt += "\n\n"
+			// Only read from stdin if not in detach mode
+			if !detach {
+				fi, err := os.Stdin.Stat()
+				if err == nil && (fi.Mode()&os.ModeCharDevice) == 0 {
+					// Read all from stdin
+					reader := bufio.NewReader(os.Stdin)
+					input, err := io.ReadAll(reader)
+					if err == nil {
+						if prompt != "" {
+							prompt += "\n\n"
+						}
+
+						prompt += string(input)
 					}
-
-					prompt += string(input)
 				}
 			}
 
@@ -381,6 +385,21 @@ func newRunCmd() *cobra.Command {
 				}
 			}
 
+			// Handle --detach flag: just load the model without interaction
+			if detach {
+				// Make a minimal request to load the model into memory
+				err := desktopClient.Chat(backend, model, "", apiKey, func(content string) {
+					// Silently discard output in detach mode
+				}, false)
+				if err != nil {
+					return handleClientError(err, "Failed to load model")
+				}
+				if debug {
+					cmd.Printf("Model %s loaded successfully\n", model)
+				}
+				return nil
+			}
+
 			if prompt != "" {
 				if err := chatWithMarkdown(cmd, desktopClient, backend, model, prompt, apiKey); err != nil {
 					return handleClientError(err, "Failed to generate a response")
@@ -439,6 +458,7 @@ func newRunCmd() *cobra.Command {
 	c.Flags().MarkHidden("backend")
 	c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.")
 	c.Flags().StringVar(&colorMode, "color", "auto", "Use colored output (auto|yes|no)")
+	c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction")
 
 	return c
 }
diff --git a/cmd/cli/commands/run_test.go b/cmd/cli/commands/run_test.go
@@ -113,3 +113,46 @@ func TestReadMultilineInputUnclosed(t *testing.T) {
 		t.Errorf("readMultilineInput() error should mention unclosed multiline input, got: %v", err)
 	}
 }
+
+func TestRunCmdDetachFlag(t *testing.T) {
+	// Create the run command
+	cmd := newRunCmd()
+
+	// Verify the --detach flag exists
+	detachFlag := cmd.Flags().Lookup("detach")
+	if detachFlag == nil {
+		t.Fatal("--detach flag not found")
+	}
+
+	// Verify the shorthand flag exists
+	detachFlagShort := cmd.Flags().ShorthandLookup("d")
+	if detachFlagShort == nil {
+		t.Fatal("-d shorthand flag not found")
+	}
+
+	// Verify the default value is false
+	if detachFlag.DefValue != "false" {
+		t.Errorf("Expected default detach value to be 'false', got '%s'", detachFlag.DefValue)
+	}
+
+	// Verify the flag type
+	if detachFlag.Value.Type() != "bool" {
+		t.Errorf("Expected detach flag type to be 'bool', got '%s'", detachFlag.Value.Type())
+	}
+
+	// Test setting the flag value
+	err := cmd.Flags().Set("detach", "true")
+	if err != nil {
+		t.Errorf("Failed to set detach flag: %v", err)
+	}
+
+	// Verify the value was set
+	detachValue, err := cmd.Flags().GetBool("detach")
+	if err != nil {
+		t.Errorf("Failed to get detach flag value: %v", err)
+	}
+
+	if !detachValue {
+		t.Errorf("Expected detach flag value to be true, got false")
+	}
+}
diff --git a/cmd/cli/docs/reference/docker_model_run.yaml b/cmd/cli/docs/reference/docker_model_run.yaml
@@ -39,6 +39,17 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
+    - option: detach
+      shorthand: d
+      value_type: bool
+      default_value: "false"
+      description: Load the model in the background without interaction
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
     - option: ignore-runtime-memory-check
       value_type: bool
       default_value: "false"
@@ -78,6 +89,14 @@ examples: |-
     > /bye
     Chat session ended.
     ```
+
+    ### Pre-load a model
+
+    ```console
+    docker model run --detach ai/smollm2
+    ```
+
+    This loads the model into memory without interaction, ensuring maximum performance for subsequent requests.
 deprecated: false
 hidden: false
 experimental: false

diff --git a/cmd/cli/docs/reference/model_run.md b/cmd/cli/docs/reference/model_run.md
@@ -9,6 +9,7 @@ Run a model and interact with it using a submitted prompt or chat mode
 |:--------------------------------|:---------|:--------|:----------------------------------------------------------------------------------|
 | `--color`                       | `string` | `auto`  | Use colored output (auto\|yes\|no)                                                |
 | `--debug`                       | `bool`   |         | Enable debug logging                                                              |
+| `-d`, `--detach`                | `bool`   |         | Load the model in the background without interaction                              |
 | `--ignore-runtime-memory-check` | `bool`   |         | Do not block pull if estimated runtime memory for model exceeds system resources. |
 
 
@@ -51,3 +52,11 @@ Hi there! It's SmolLM, AI assistant. How can I help you today?
 > /bye
 Chat session ended.
 ```
+
+### Pre-load a model
+
+```console
+docker model run --detach ai/smollm2
+```
+
+This loads the model into memory without interaction, ensuring maximum performance for subsequent requests.