From 247d9e06a3c784790fa24a6ce1e9e2ce5eba2800 Mon Sep 17 00:00:00 2001
From: Emily Casey <emily.casey@docker.com>
Date: Wed, 25 Jun 2025 12:36:25 -0600
Subject: [PATCH 1/4] Respect context size from model config

Signed-off-by: Emily Casey <emily.casey@docker.com>
---
 go.mod                                        |   2 +-
 go.sum                                        |   2 +
 pkg/inference/backends/llamacpp/llamacpp.go   |  16 +--
 .../backends/llamacpp/llamacpp_config.go      |  29 ++++-
 .../backends/llamacpp/llamacpp_config_test.go | 105 +++++++++++++++++-
 pkg/inference/config/config.go                |   3 +-
 6 files changed, 141 insertions(+), 16 deletions(-)

diff --git a/go.mod b/go.mod
index 9f3321e9f..66bcd5314 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ go 1.23.7
 require (
 	github.com/containerd/containerd/v2 v2.0.4
 	github.com/containerd/platforms v1.0.0-rc.1
-	github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857
+	github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5
 	github.com/google/go-containerregistry v0.20.3
 	github.com/jaypipes/ghw v0.16.0
 	github.com/mattn/go-shellwords v1.0.12
diff --git a/go.sum b/go.sum
index bf9d4b520..cdc670919 100644
--- a/go.sum
+++ b/go.sum
@@ -40,6 +40,8 @@ github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZ
 github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
 github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857 h1:2IvvpdPZvpNn06+RUh5DC5O64dnrKjdsBKCMrzR5QTk=
 github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
+github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 h1:4qs7OwrJJQE1de5kbHg83gm5rmCRQzAwAp8gG/3cLY8=
+github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
diff --git a/pkg/inference/backends/llamacpp/llamacpp.go b/pkg/inference/backends/llamacpp/llamacpp.go
index 00fa57b86..f745320d0 100644
--- a/pkg/inference/backends/llamacpp/llamacpp.go
+++ b/pkg/inference/backends/llamacpp/llamacpp.go
@@ -11,7 +11,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
-	"strconv"
 	"strings"
 
 	"github.com/docker/model-runner/pkg/diskusage"
@@ -122,10 +121,9 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
 
 // Run implements inference.Backend.Run.
 func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference.BackendMode, config *inference.BackendConfiguration) error {
-	modelPath, err := l.modelManager.GetModelPath(model)
-	l.log.Infof("Model path: %s", modelPath)
+	mdl, err := l.modelManager.GetModel(model)
 	if err != nil {
-		return fmt.Errorf("failed to get model path: %w", err)
+		return fmt.Errorf("failed to get model: %w", err)
 	}
 
 	if err := os.RemoveAll(socket); err != nil && !errors.Is(err, fs.ErrNotExist) {
@@ -138,13 +136,9 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
 		binPath = l.updatedServerStoragePath
 	}
 
-	args := l.config.GetArgs(modelPath, socket, mode)
-
-	if config != nil {
-		if config.ContextSize >= 0 {
-			args = append(args, "--ctx-size", strconv.Itoa(int(config.ContextSize)))
-		}
-		args = append(args, config.RuntimeFlags...)
+	args, err := l.config.GetArgs(mdl, socket, mode, config)
+	if err != nil {
+		return fmt.Errorf("failed to get args for llama.cpp: %w", err)
 	}
 
 	l.log.Infof("llamaCppArgs: %v", args)
diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go
index c7990ce97..7b5eec49f 100644
--- a/pkg/inference/backends/llamacpp/llamacpp_config.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config.go
@@ -1,9 +1,11 @@
 package llamacpp
 
 import (
+	"fmt"
 	"runtime"
 	"strconv"
 
+	"github.com/docker/model-distribution/types"
 	"github.com/docker/model-runner/pkg/inference"
 )
 
@@ -33,10 +35,20 @@ func NewDefaultLlamaCppConfig() *Config {
 }
 
 // GetArgs implements BackendConfig.GetArgs.
-func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) []string {
+func (c *Config) GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) {
 	// Start with the arguments from LlamaCppConfig
 	args := append([]string{}, c.Args...)
 
+	modelPath, err := model.GGUFPath()
+	if err != nil {
+		return nil, fmt.Errorf("get gguf path: %v", err)
+	}
+
+	modelCfg, err := model.Config()
+	if err != nil {
+		return nil, fmt.Errorf("get model config: %v", err)
+	}
+
 	// Add model and socket arguments
 	args = append(args, "--model", modelPath, "--host", socket)
 
@@ -45,7 +57,20 @@ func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) [
 		args = append(args, "--embeddings")
 	}
 
-	return args
+	// Add arguments from model config
+	if modelCfg.ContextSize != nil {
+		args = append(args, "--ctx-size", strconv.FormatUint(*modelCfg.ContextSize, 10))
+	}
+
+	// Add arguments from backend config
+	if config != nil {
+		if config.ContextSize > 0 && !containsArg(args, "--ctx-size") {
+			args = append(args, "--ctx-size", fmt.Sprintf("%d", config.ContextSize))
+		}
+		args = append(args, config.RuntimeFlags...)
+	}
+
+	return args, nil
 }
 
 // containsArg checks if the given argument is already in the args slice.
diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
index d1b937b28..6856981dc 100644
--- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
@@ -5,6 +5,7 @@ import (
 	"strconv"
 	"testing"
 
+	"github.com/docker/model-distribution/types"
 	"github.com/docker/model-runner/pkg/inference"
 )
 
@@ -72,12 +73,17 @@ func TestGetArgs(t *testing.T) {
 
 	tests := []struct {
 		name     string
+		model    types.Model
 		mode     inference.BackendMode
+		config   *inference.BackendConfiguration
 		expected []string
 	}{
 		{
 			name: "completion mode",
 			mode: inference.BackendModeCompletion,
+			model: &fakeModel{
+				ggufPath: modelPath,
+			},
 			expected: []string{
 				"--jinja",
 				"-ngl", "100",
@@ -89,6 +95,27 @@ func TestGetArgs(t *testing.T) {
 		{
 			name: "embedding mode",
 			mode: inference.BackendModeEmbedding,
+			model: &fakeModel{
+				ggufPath: modelPath,
+			},
+			expected: []string{
+				"--jinja",
+				"-ngl", "100",
+				"--metrics",
+				"--model", modelPath,
+				"--host", socket,
+				"--embeddings",
+			},
+		},
+		{
+			name: "context size from backend config",
+			mode: inference.BackendModeEmbedding,
+			model: &fakeModel{
+				ggufPath: modelPath,
+			},
+			config: &inference.BackendConfiguration{
+				ContextSize: 1234,
+			},
 			expected: []string{
 				"--jinja",
 				"-ngl", "100",
@@ -96,13 +123,58 @@ func TestGetArgs(t *testing.T) {
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
+				"--ctx-size", "1234", // should add this flag
+			},
+		},
+		{
+			name: "context size from model config",
+			mode: inference.BackendModeEmbedding,
+			model: &fakeModel{
+				ggufPath: modelPath,
+				config: types.Config{
+					ContextSize: uint64ptr(2096),
+				},
+			},
+			config: &inference.BackendConfiguration{
+				ContextSize: 1234,
+			},
+			expected: []string{
+				"--jinja",
+				"-ngl", "100",
+				"--metrics",
+				"--model", modelPath,
+				"--host", socket,
+				"--embeddings",
+				"--ctx-size", "2096", // model config takes precedence
+			},
+		},
+		{
+			name: "raw flags from backend config",
+			mode: inference.BackendModeEmbedding,
+			model: &fakeModel{
+				ggufPath: modelPath,
+			},
+			config: &inference.BackendConfiguration{
+				RuntimeFlags: []string{"--some", "flag"},
+			},
+			expected: []string{
+				"--jinja",
+				"-ngl", "100",
+				"--metrics",
+				"--model", modelPath,
+				"--host", socket,
+				"--embeddings",
+				"--some", "flag", // model config takes precedence
 			},
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			args := config.GetArgs(modelPath, socket, tt.mode)
+			args, err := config.GetArgs(tt.model, socket, tt.mode, tt.config)
+			if err != nil {
+				t.Errorf("GetArgs() error = %v", err)
+			}
 
 			// Check that all expected arguments are present and in the correct order
 			expectedIndex := 0
@@ -171,3 +243,34 @@ func TestContainsArg(t *testing.T) {
 		})
 	}
 }
+
+var _ types.Model = &fakeModel{}
+
+type fakeModel struct {
+	ggufPath string
+	config   types.Config
+}
+
+func (f *fakeModel) ID() (string, error) {
+	panic("shouldn't be called")
+}
+
+func (f *fakeModel) GGUFPath() (string, error) {
+	return f.ggufPath, nil
+}
+
+func (f *fakeModel) Config() (types.Config, error) {
+	return f.config, nil
+}
+
+func (f *fakeModel) Tags() []string {
+	panic("shouldn't be called")
+}
+
+func (f fakeModel) Descriptor() (types.Descriptor, error) {
+	panic("shouldn't be called")
+}
+
+func uint64ptr(n uint64) *uint64 {
+	return &n
+}
diff --git a/pkg/inference/config/config.go b/pkg/inference/config/config.go
index 6d05d1af4..8163759d4 100644
--- a/pkg/inference/config/config.go
+++ b/pkg/inference/config/config.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"github.com/docker/model-distribution/types"
 	"github.com/docker/model-runner/pkg/inference"
 )
 
@@ -11,5 +12,5 @@ type BackendConfig interface {
 	// GetArgs returns the command-line arguments for the backend.
 	// It takes the model path, socket, and mode as input and returns
 	// the appropriate arguments for the backend.
-	GetArgs(modelPath, socket string, mode inference.BackendMode) []string
+	GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error)
 }

From 4d690797f2a9bc4e9c2a48f900625099dbe0a63f Mon Sep 17 00:00:00 2001
From: Emily Casey <emily.casey@docker.com>
Date: Fri, 27 Jun 2025 10:45:01 -0600
Subject: [PATCH 2/4] Bump model-distribution to point at main branch

Signed-off-by: Emily Casey <emily.casey@docker.com>
---
 go.mod | 2 +-
 go.sum | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/go.mod b/go.mod
index 66bcd5314..f94d738b6 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ go 1.23.7
 require (
 	github.com/containerd/containerd/v2 v2.0.4
 	github.com/containerd/platforms v1.0.0-rc.1
-	github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5
+	github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0
 	github.com/google/go-containerregistry v0.20.3
 	github.com/jaypipes/ghw v0.16.0
 	github.com/mattn/go-shellwords v1.0.12
diff --git a/go.sum b/go.sum
index cdc670919..e851c5df8 100644
--- a/go.sum
+++ b/go.sum
@@ -38,10 +38,8 @@ github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBi
 github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
 github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo=
 github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
-github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857 h1:2IvvpdPZvpNn06+RUh5DC5O64dnrKjdsBKCMrzR5QTk=
-github.com/docker/model-distribution v0.0.0-20250618082521-fb5c8332c857/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
-github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5 h1:4qs7OwrJJQE1de5kbHg83gm5rmCRQzAwAp8gG/3cLY8=
-github.com/docker/model-distribution v0.0.0-20250627152504-c0c68acaabd5/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
+github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 h1:bve4JZI06Admw+NewtPfrpJXsvRnGKTQvBOEICNC1C0=
+github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=

From aac988e62a604f753abe341f82889d8331913552 Mon Sep 17 00:00:00 2001
From: Emily Casey <emily@atomicjar.com>
Date: Fri, 27 Jun 2025 10:47:18 -0600
Subject: [PATCH 3/4] Apply suggestions from code review

Signed-off-by: Emily Casey <emily.casey@docker.com>

Co-authored-by: Jacob Howard <jacob.howard@docker.com>
---
 pkg/inference/backends/llamacpp/llamacpp_config.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go
index 7b5eec49f..45fb5f158 100644
--- a/pkg/inference/backends/llamacpp/llamacpp_config.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config.go
@@ -41,12 +41,12 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen
 
 	modelPath, err := model.GGUFPath()
 	if err != nil {
-		return nil, fmt.Errorf("get gguf path: %v", err)
+		return nil, fmt.Errorf("get gguf path: %w", err)
 	}
 
 	modelCfg, err := model.Config()
 	if err != nil {
-		return nil, fmt.Errorf("get model config: %v", err)
+		return nil, fmt.Errorf("get model config: %w", err)
 	}
 
 	// Add model and socket arguments

From cbdbb83bf70ad74b80a36fcea3243e5d0a64cfc1 Mon Sep 17 00:00:00 2001
From: Emily Casey <emily.casey@docker.com>
Date: Fri, 27 Jun 2025 10:54:59 -0600
Subject: [PATCH 4/4] Incorporate review feedback

Signed-off-by: Emily Casey <emily.casey@docker.com>
---
 pkg/inference/backends/llamacpp/llamacpp_config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go
index 45fb5f158..cd4668c5f 100644
--- a/pkg/inference/backends/llamacpp/llamacpp_config.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config.go
@@ -65,7 +65,7 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen
 	// Add arguments from backend config
 	if config != nil {
 		if config.ContextSize > 0 && !containsArg(args, "--ctx-size") {
-			args = append(args, "--ctx-size", fmt.Sprintf("%d", config.ContextSize))
+			args = append(args, "--ctx-size", strconv.FormatInt(config.ContextSize, 10))
 		}
 		args = append(args, config.RuntimeFlags...)
 	}