diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go index a3e672f7e..bb7e504e6 100644 --- a/cmd/cli/commands/run.go +++ b/cmd/cli/commands/run.go @@ -307,6 +307,7 @@ func newRunCmd() *cobra.Command { var backend string var ignoreRuntimeMemoryCheck bool var colorMode string + var detach bool const cmdArgs = "MODEL [PROMPT]" c := &cobra.Command{ @@ -341,17 +342,20 @@ func newRunCmd() *cobra.Command { prompt = strings.Join(args[1:], " ") } - fi, err := os.Stdin.Stat() - if err == nil && (fi.Mode()&os.ModeCharDevice) == 0 { - // Read all from stdin - reader := bufio.NewReader(os.Stdin) - input, err := io.ReadAll(reader) - if err == nil { - if prompt != "" { - prompt += "\n\n" + // Only read from stdin if not in detach mode + if !detach { + fi, err := os.Stdin.Stat() + if err == nil && (fi.Mode()&os.ModeCharDevice) == 0 { + // Read all from stdin + reader := bufio.NewReader(os.Stdin) + input, err := io.ReadAll(reader) + if err == nil { + if prompt != "" { + prompt += "\n\n" + } + + prompt += string(input) } - - prompt += string(input) } } @@ -381,6 +385,21 @@ func newRunCmd() *cobra.Command { } } + // Handle --detach flag: just load the model without interaction + if detach { + // Make a minimal request to load the model into memory + err := desktopClient.Chat(backend, model, "", apiKey, func(content string) { + // Silently discard output in detach mode + }, false) + if err != nil { + return handleClientError(err, "Failed to load model") + } + if debug { + cmd.Printf("Model %s loaded successfully\n", model) + } + return nil + } + if prompt != "" { if err := chatWithMarkdown(cmd, desktopClient, backend, model, prompt, apiKey); err != nil { return handleClientError(err, "Failed to generate a response") @@ -439,6 +458,7 @@ func newRunCmd() *cobra.Command { c.Flags().MarkHidden("backend") c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.") c.Flags().StringVar(&colorMode, "color", "auto", "Use colored output (auto|yes|no)") + c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction") return c } diff --git a/cmd/cli/commands/run_test.go b/cmd/cli/commands/run_test.go index f8674822f..422f6efa6 100644 --- a/cmd/cli/commands/run_test.go +++ b/cmd/cli/commands/run_test.go @@ -113,3 +113,46 @@ func TestReadMultilineInputUnclosed(t *testing.T) { t.Errorf("readMultilineInput() error should mention unclosed multiline input, got: %v", err) } } + +func TestRunCmdDetachFlag(t *testing.T) { + // Create the run command + cmd := newRunCmd() + + // Verify the --detach flag exists + detachFlag := cmd.Flags().Lookup("detach") + if detachFlag == nil { + t.Fatal("--detach flag not found") + } + + // Verify the shorthand flag exists + detachFlagShort := cmd.Flags().ShorthandLookup("d") + if detachFlagShort == nil { + t.Fatal("-d shorthand flag not found") + } + + // Verify the default value is false + if detachFlag.DefValue != "false" { + t.Errorf("Expected default detach value to be 'false', got '%s'", detachFlag.DefValue) + } + + // Verify the flag type + if detachFlag.Value.Type() != "bool" { + t.Errorf("Expected detach flag type to be 'bool', got '%s'", detachFlag.Value.Type()) + } + + // Test setting the flag value + err := cmd.Flags().Set("detach", "true") + if err != nil { + t.Errorf("Failed to set detach flag: %v", err) + } + + // Verify the value was set + detachValue, err := cmd.Flags().GetBool("detach") + if err != nil { + t.Errorf("Failed to get detach flag value: %v", err) + } + + if !detachValue { + t.Errorf("Expected detach flag value to be true, got false") + } +} diff --git a/cmd/cli/docs/reference/docker_model_run.yaml b/cmd/cli/docs/reference/docker_model_run.yaml index 44b1340fd..a0fa17123 100644 --- a/cmd/cli/docs/reference/docker_model_run.yaml +++ b/cmd/cli/docs/reference/docker_model_run.yaml @@ -39,6 +39,17 @@ options: experimentalcli: false kubernetes: false swarm: false + - option: detach + shorthand: d + value_type: bool + default_value: "false" + description: Load the model in the background without interaction + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false - option: ignore-runtime-memory-check value_type: bool default_value: "false" @@ -78,6 +89,14 @@ examples: |- > /bye Chat session ended. ``` + + ### Pre-load a model + + ```console + docker model run --detach ai/smollm2 + ``` + + This loads the model into memory without interaction, ensuring maximum performance for subsequent requests. deprecated: false hidden: false experimental: false diff --git a/cmd/cli/docs/reference/model_run.md b/cmd/cli/docs/reference/model_run.md index 6b0c3cc6a..4f92d9e6a 100644 --- a/cmd/cli/docs/reference/model_run.md +++ b/cmd/cli/docs/reference/model_run.md @@ -9,6 +9,7 @@ Run a model and interact with it using a submitted prompt or chat mode |:--------------------------------|:---------|:--------|:----------------------------------------------------------------------------------| | `--color` | `string` | `auto` | Use colored output (auto\|yes\|no) | | `--debug` | `bool` | | Enable debug logging | +| `-d`, `--detach` | `bool` | | Load the model in the background without interaction | | `--ignore-runtime-memory-check` | `bool` | | Do not block pull if estimated runtime memory for model exceeds system resources. | @@ -51,3 +52,11 @@ Hi there! It's SmolLM, AI assistant. How can I help you today? > /bye Chat session ended. ``` + +### Pre-load a model + +```console +docker model run --detach ai/smollm2 +``` + +This loads the model into memory without interaction, ensuring maximum performance for subsequent requests.