Skip to content
This repository was archived by the owner on Oct 6, 2025. It is now read-only.

Commit f7ab896

Browse files
authored
Merge pull request #79 from doringeman/compose-llama-args
Configure inference backend via compose up
2 parents ef99eaa + 5cfc9d3 commit f7ab896

File tree

326 files changed

+47494
-33
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

326 files changed

+47494
-33
lines changed

commands/compose.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"strings"
99

1010
"github.com/docker/model-cli/desktop"
11+
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
12+
"github.com/docker/model-runner/pkg/inference/scheduling"
1113
"github.com/spf13/cobra"
1214
)
1315

@@ -26,6 +28,9 @@ func newComposeCmd() *cobra.Command {
2628

2729
func newUpCommand() *cobra.Command {
2830
var models []string
31+
var ctxSize int64
32+
var rawRuntimeFlags string
33+
var backend string
2934
c := &cobra.Command{
3035
Use: "up",
3136
RunE: func(cmd *cobra.Command, args []string) error {
@@ -35,6 +40,14 @@ func newUpCommand() *cobra.Command {
3540
return err
3641
}
3742

43+
sendInfo("Initializing model runner...")
44+
if ctxSize != 4096 {
45+
sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
46+
}
47+
if rawRuntimeFlags != "" {
48+
sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
49+
}
50+
3851
kind := modelRunner.EngineKind()
3952
standalone, err := ensureStandaloneRunnerAvailable(cmd.Context(), nil)
4053
if err != nil {
@@ -50,6 +63,19 @@ func newUpCommand() *cobra.Command {
5063
return err
5164
}
5265

66+
for _, model := range models {
67+
if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
68+
Model: model,
69+
ContextSize: ctxSize,
70+
RawRuntimeFlags: rawRuntimeFlags,
71+
}); err != nil {
72+
configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
73+
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
74+
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
75+
}
76+
sendInfo("Successfully configured backend for model " + model)
77+
}
78+
5379
switch kind {
5480
case desktop.ModelRunnerEngineKindDesktop:
5581
_ = setenv("URL", "http://model-runner.docker.internal/engines/v1/")
@@ -66,6 +92,9 @@ func newUpCommand() *cobra.Command {
6692
},
6793
}
6894
c.Flags().StringArrayVar(&models, "model", nil, "model to use")
95+
c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
96+
c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
97+
c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
6998
return c
7099
}
71100

desktop/desktop.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414

1515
"github.com/docker/model-runner/pkg/inference"
1616
"github.com/docker/model-runner/pkg/inference/models"
17+
"github.com/docker/model-runner/pkg/inference/scheduling"
1718
"github.com/pkg/errors"
1819
"go.opentelemetry.io/otel"
1920
)
@@ -542,6 +543,30 @@ func (c *Client) Unload(req UnloadRequest) (UnloadResponse, error) {
542543
return unloadResp, nil
543544
}
544545

546+
func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error {
547+
configureBackendPath := inference.InferencePrefix + "/_configure"
548+
jsonData, err := json.Marshal(request)
549+
if err != nil {
550+
return fmt.Errorf("error marshaling request: %w", err)
551+
}
552+
553+
resp, err := c.doRequest(http.MethodPost, configureBackendPath, bytes.NewReader(jsonData))
554+
if err != nil {
555+
return c.handleQueryError(err, configureBackendPath)
556+
}
557+
defer resp.Body.Close()
558+
559+
if resp.StatusCode != http.StatusAccepted {
560+
body, _ := io.ReadAll(resp.Body)
561+
if resp.StatusCode == http.StatusConflict {
562+
return fmt.Errorf("%s", body)
563+
}
564+
return fmt.Errorf("%s (%s)", body, resp.Status)
565+
}
566+
567+
return nil
568+
}
569+
545570
// doRequest is a helper function that performs HTTP requests and handles 503 responses
546571
func (c *Client) doRequest(method, path string, body io.Reader) (*http.Response, error) {
547572
req, err := http.NewRequest(method, c.modelRunner.URL(path), body)

docs/reference/docker_model_compose_up.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,26 @@ usage: docker model compose up
33
pname: docker model compose
44
plink: docker_model_compose.yaml
55
options:
6+
- option: backend
7+
value_type: string
8+
default_value: llama.cpp
9+
description: inference backend to use
10+
deprecated: false
11+
hidden: false
12+
experimental: false
13+
experimentalcli: false
14+
kubernetes: false
15+
swarm: false
16+
- option: context-size
17+
value_type: int64
18+
default_value: "-1"
19+
description: context size for the model
20+
deprecated: false
21+
hidden: false
22+
experimental: false
23+
experimentalcli: false
24+
kubernetes: false
25+
swarm: false
626
- option: model
727
value_type: stringArray
828
default_value: '[]'
@@ -13,6 +33,15 @@ options:
1333
experimentalcli: false
1434
kubernetes: false
1535
swarm: false
36+
- option: runtime-flags
37+
value_type: string
38+
description: raw runtime flags to pass to the inference engine
39+
deprecated: false
40+
hidden: false
41+
experimental: false
42+
experimentalcli: false
43+
kubernetes: false
44+
swarm: false
1645
inherited_options:
1746
- option: project-name
1847
value_type: string

go.mod

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ require (
99
github.com/docker/go-connections v0.5.0
1010
github.com/docker/go-units v0.5.0
1111
github.com/docker/model-distribution v0.0.0-20250512190053-b3792c042d57
12-
github.com/docker/model-runner v0.0.0-20250512190413-96af7b750f88
12+
github.com/docker/model-runner v0.0.0-20250613083629-6b8c3b816f00
1313
github.com/google/go-containerregistry v0.20.3
1414
github.com/nxadm/tail v1.4.8
1515
github.com/olekukonko/tablewriter v0.0.5
@@ -23,15 +23,20 @@ require (
2323
)
2424

2525
require (
26+
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect
2627
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
2728
github.com/BurntSushi/toml v1.4.0 // indirect
2829
github.com/Microsoft/go-winio v0.6.2 // indirect
2930
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d // indirect
31+
github.com/StackExchange/wmi v1.2.1 // indirect
3032
github.com/beorn7/perks v1.0.1 // indirect
3133
github.com/bugsnag/panicwrap v1.3.4 // indirect
3234
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
3335
github.com/cespare/xxhash/v2 v2.3.0 // indirect
36+
github.com/containerd/containerd/v2 v2.0.4 // indirect
37+
github.com/containerd/errdefs v1.0.0 // indirect
3438
github.com/containerd/log v0.1.0 // indirect
39+
github.com/containerd/platforms v1.0.0-rc.1 // indirect
3540
github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
3641
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
3742
github.com/creack/pty v1.1.24 // indirect
@@ -46,6 +51,7 @@ require (
4651
github.com/fvbommel/sortorder v1.1.0 // indirect
4752
github.com/go-logr/logr v1.4.2 // indirect
4853
github.com/go-logr/stdr v1.2.2 // indirect
54+
github.com/go-ole/go-ole v1.2.6 // indirect
4955
github.com/go-sql-driver/mysql v1.6.0 // indirect
5056
github.com/gogo/protobuf v1.3.2 // indirect
5157
github.com/google/uuid v1.6.0 // indirect
@@ -54,15 +60,19 @@ require (
5460
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect
5561
github.com/henvic/httpretty v0.1.4 // indirect
5662
github.com/inconshreveable/mousetrap v1.1.0 // indirect
63+
github.com/jaypipes/ghw v0.16.0 // indirect
64+
github.com/jaypipes/pcidb v1.0.1 // indirect
5765
github.com/jinzhu/gorm v1.9.16 // indirect
5866
github.com/json-iterator/go v1.1.12 // indirect
5967
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
6068
github.com/klauspost/compress v1.18.0 // indirect
6169
github.com/mattn/go-runewidth v0.0.16 // indirect
70+
github.com/mattn/go-shellwords v1.0.12 // indirect
6271
github.com/miekg/pkcs11 v1.1.1 // indirect
6372
github.com/mitchellh/go-homedir v1.1.0 // indirect
6473
github.com/mitchellh/mapstructure v1.5.1-0.20220423185008-bf980b35cac4 // indirect
6574
github.com/moby/docker-image-spec v1.3.1 // indirect
75+
github.com/moby/locker v1.0.1 // indirect
6676
github.com/moby/sys/sequential v0.6.0 // indirect
6777
github.com/moby/term v0.5.2 // indirect
6878
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -110,4 +120,5 @@ require (
110120
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
111121
gopkg.in/yaml.v3 v3.0.1 // indirect
112122
gotest.tools/v3 v3.5.2 // indirect
123+
howett.net/plist v1.0.0 // indirect
113124
)

0 commit comments

Comments
 (0)