From 247d9e06a3c784790fa24a6ce1e9e2ce5eba2800 Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Wed, 25 Jun 2025 12:36:25 -0600 Subject: [PATCH 1/4] Respect context size from model config Signed-off-by: Emily Casey --- go.mod | 2 +- go.sum | 2 + pkg/inference/backends/llamacpp/llamacpp.go | 16 +-- .../backends/llamacpp/llamacpp_config.go | 29 ++++- .../backends/llamacpp/llamacpp_config_test.go | 105 +++++++++++++++++- pkg/inference/config/config.go | 3 +- 6 files changed, 141 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 9f3321e9f..66bcd5314 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.23.7 require ( github.com/containerd/containerd/v2 v2.0.4 github.com/containerd/platforms v1.0.0-rc.1 - github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857 + github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 github.com/google/go-containerregistry v0.20.3 github.com/jaypipes/ghw v0.16.0 github.com/mattn/go-shellwords v1.0.12 diff --git a/go.sum b/go.sum index bf9d4b520..cdc670919 100644 --- a/go.sum +++ b/go.sum @@ -40,6 +40,8 @@ github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZ github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M= github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857 h1:2IvvpdPZvpNn06+RUh5DC5O64dnrKjdsBKCMrzR5QTk= github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= +github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 h1:4qs7OwrJJQE1de5kbHg83gm5rmCRQzAwAp8gG/3cLY8= +github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= diff --git a/pkg/inference/backends/llamacpp/llamacpp.go b/pkg/inference/backends/llamacpp/llamacpp.go index 00fa57b86..f745320d0 100644 --- a/pkg/inference/backends/llamacpp/llamacpp.go +++ b/pkg/inference/backends/llamacpp/llamacpp.go @@ -11,7 +11,6 @@ import ( "os/exec" "path/filepath" "runtime" - "strconv" "strings" "github.com/docker/model-runner/pkg/diskusage" @@ -122,10 +121,9 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error { // Run implements inference.Backend.Run. func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference.BackendMode, config *inference.BackendConfiguration) error { - modelPath, err := l.modelManager.GetModelPath(model) - l.log.Infof("Model path: %s", modelPath) + mdl, err := l.modelManager.GetModel(model) if err != nil { - return fmt.Errorf("failed to get model path: %w", err) + return fmt.Errorf("failed to get model: %w", err) } if err := os.RemoveAll(socket); err != nil && !errors.Is(err, fs.ErrNotExist) { @@ -138,13 +136,9 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference binPath = l.updatedServerStoragePath } - args := l.config.GetArgs(modelPath, socket, mode) - - if config != nil { - if config.ContextSize >= 0 { - args = append(args, "--ctx-size", strconv.Itoa(int(config.ContextSize))) - } - args = append(args, config.RuntimeFlags...) + args, err := l.config.GetArgs(mdl, socket, mode, config) + if err != nil { + return fmt.Errorf("failed to get args for llama.cpp: %w", err) } l.log.Infof("llamaCppArgs: %v", args) diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go index c7990ce97..7b5eec49f 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -1,9 +1,11 @@ package llamacpp import ( + "fmt" "runtime" "strconv" + "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/inference" ) @@ -33,10 +35,20 @@ func NewDefaultLlamaCppConfig() *Config { } // GetArgs implements BackendConfig.GetArgs. -func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) []string { +func (c *Config) GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) { // Start with the arguments from LlamaCppConfig args := append([]string{}, c.Args...) + modelPath, err := model.GGUFPath() + if err != nil { + return nil, fmt.Errorf("get gguf path: %v", err) + } + + modelCfg, err := model.Config() + if err != nil { + return nil, fmt.Errorf("get model config: %v", err) + } + // Add model and socket arguments args = append(args, "--model", modelPath, "--host", socket) @@ -45,7 +57,20 @@ func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) [ args = append(args, "--embeddings") } - return args + // Add arguments from model config + if modelCfg.ContextSize != nil { + args = append(args, "--ctx-size", strconv.FormatUint(*modelCfg.ContextSize, 10)) + } + + // Add arguments from backend config + if config != nil { + if config.ContextSize > 0 && !containsArg(args, "--ctx-size") { + args = append(args, "--ctx-size", fmt.Sprintf("%d", config.ContextSize)) + } + args = append(args, config.RuntimeFlags...) + } + + return args, nil } // containsArg checks if the given argument is already in the args slice. diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go index d1b937b28..6856981dc 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go @@ -5,6 +5,7 @@ import ( "strconv" "testing" + "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/inference" ) @@ -72,12 +73,17 @@ func TestGetArgs(t *testing.T) { tests := []struct { name string + model types.Model mode inference.BackendMode + config *inference.BackendConfiguration expected []string }{ { name: "completion mode", mode: inference.BackendModeCompletion, + model: &fakeModel{ + ggufPath: modelPath, + }, expected: []string{ "--jinja", "-ngl", "100", @@ -89,6 +95,27 @@ func TestGetArgs(t *testing.T) { { name: "embedding mode", mode: inference.BackendModeEmbedding, + model: &fakeModel{ + ggufPath: modelPath, + }, + expected: []string{ + "--jinja", + "-ngl", "100", + "--metrics", + "--model", modelPath, + "--host", socket, + "--embeddings", + }, + }, + { + name: "context size from backend config", + mode: inference.BackendModeEmbedding, + model: &fakeModel{ + ggufPath: modelPath, + }, + config: &inference.BackendConfiguration{ + ContextSize: 1234, + }, expected: []string{ "--jinja", "-ngl", "100", @@ -96,13 +123,58 @@ func TestGetArgs(t *testing.T) { "--model", modelPath, "--host", socket, "--embeddings", + "--ctx-size", "1234", // should add this flag + }, + }, + { + name: "context size from model config", + mode: inference.BackendModeEmbedding, + model: &fakeModel{ + ggufPath: modelPath, + config: types.Config{ + ContextSize: uint64ptr(2096), + }, + }, + config: &inference.BackendConfiguration{ + ContextSize: 1234, + }, + expected: []string{ + "--jinja", + "-ngl", "100", + "--metrics", + "--model", modelPath, + "--host", socket, + "--embeddings", + "--ctx-size", "2096", // model config takes precedence + }, + }, + { + name: "raw flags from backend config", + mode: inference.BackendModeEmbedding, + model: &fakeModel{ + ggufPath: modelPath, + }, + config: &inference.BackendConfiguration{ + RuntimeFlags: []string{"--some", "flag"}, + }, + expected: []string{ + "--jinja", + "-ngl", "100", + "--metrics", + "--model", modelPath, + "--host", socket, + "--embeddings", + "--some", "flag", // model config takes precedence }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - args := config.GetArgs(modelPath, socket, tt.mode) + args, err := config.GetArgs(tt.model, socket, tt.mode, tt.config) + if err != nil { + t.Errorf("GetArgs() error = %v", err) + } // Check that all expected arguments are present and in the correct order expectedIndex := 0 @@ -171,3 +243,34 @@ func TestContainsArg(t *testing.T) { }) } } + +var _ types.Model = &fakeModel{} + +type fakeModel struct { + ggufPath string + config types.Config +} + +func (f *fakeModel) ID() (string, error) { + panic("shouldn't be called") +} + +func (f *fakeModel) GGUFPath() (string, error) { + return f.ggufPath, nil +} + +func (f *fakeModel) Config() (types.Config, error) { + return f.config, nil +} + +func (f *fakeModel) Tags() []string { + panic("shouldn't be called") +} + +func (f fakeModel) Descriptor() (types.Descriptor, error) { + panic("shouldn't be called") +} + +func uint64ptr(n uint64) *uint64 { + return &n +} diff --git a/pkg/inference/config/config.go b/pkg/inference/config/config.go index 6d05d1af4..8163759d4 100644 --- a/pkg/inference/config/config.go +++ b/pkg/inference/config/config.go @@ -1,6 +1,7 @@ package config import ( + "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/inference" ) @@ -11,5 +12,5 @@ type BackendConfig interface { // GetArgs returns the command-line arguments for the backend. // It takes the model path, socket, and mode as input and returns // the appropriate arguments for the backend. - GetArgs(modelPath, socket string, mode inference.BackendMode) []string + GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) } From 4d690797f2a9bc4e9c2a48f900625099dbe0a63f Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Fri, 27 Jun 2025 10:45:01 -0600 Subject: [PATCH 2/4] Bump model-distribution to point at main branch Signed-off-by: Emily Casey --- go.mod | 2 +- go.sum | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 66bcd5314..f94d738b6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.23.7 require ( github.com/containerd/containerd/v2 v2.0.4 github.com/containerd/platforms v1.0.0-rc.1 - github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 + github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 github.com/google/go-containerregistry v0.20.3 github.com/jaypipes/ghw v0.16.0 github.com/mattn/go-shellwords v1.0.12 diff --git a/go.sum b/go.sum index cdc670919..e851c5df8 100644 --- a/go.sum +++ b/go.sum @@ -38,10 +38,8 @@ github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBi github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo= github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M= -github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857 h1:2IvvpdPZvpNn06+RUh5DC5O64dnrKjdsBKCMrzR5QTk= -github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= -github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 h1:4qs7OwrJJQE1de5kbHg83gm5rmCRQzAwAp8gG/3cLY8= -github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= +github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 h1:bve4JZI06Admw+NewtPfrpJXsvRnGKTQvBOEICNC1C0= +github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= From aac988e62a604f753abe341f82889d8331913552 Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Fri, 27 Jun 2025 10:47:18 -0600 Subject: [PATCH 3/4] Apply suggestions from code review Signed-off-by: Emily Casey Co-authored-by: Jacob Howard --- pkg/inference/backends/llamacpp/llamacpp_config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go index 7b5eec49f..45fb5f158 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -41,12 +41,12 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen modelPath, err := model.GGUFPath() if err != nil { - return nil, fmt.Errorf("get gguf path: %v", err) + return nil, fmt.Errorf("get gguf path: %w", err) } modelCfg, err := model.Config() if err != nil { - return nil, fmt.Errorf("get model config: %v", err) + return nil, fmt.Errorf("get model config: %w", err) } // Add model and socket arguments From cbdbb83bf70ad74b80a36fcea3243e5d0a64cfc1 Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Fri, 27 Jun 2025 10:54:59 -0600 Subject: [PATCH 4/4] Incorporate review feedback Signed-off-by: Emily Casey --- pkg/inference/backends/llamacpp/llamacpp_config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go index 45fb5f158..cd4668c5f 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -65,7 +65,7 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen // Add arguments from backend config if config != nil { if config.ContextSize > 0 && !containsArg(args, "--ctx-size") { - args = append(args, "--ctx-size", fmt.Sprintf("%d", config.ContextSize)) + args = append(args, "--ctx-size", strconv.FormatInt(config.ContextSize, 10)) } args = append(args, config.RuntimeFlags...) }