Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/containerd/platforms v1.0.0-rc.1
github.com/docker/go-units v0.5.0
github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575
github.com/elastic/go-sysinfo v1.15.4
github.com/gpustack/gguf-parser-go v0.22.1
github.com/jaypipes/ghw v0.19.1
github.com/kolesnikovae/go-winjob v1.0.0
Expand All @@ -30,7 +29,6 @@ require (
github.com/docker/cli v28.3.0+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker-credential-helpers v0.9.3 // indirect
github.com/elastic/go-windows v1.0.2 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand All @@ -47,7 +45,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
github.com/vbatts/tar-split v0.12.1 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
Expand Down
8 changes: 0 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ github.com/docker/go-winjob v0.0.0-20250829235554-57b487ebcbc5 h1:dxSFEb0EEmvceI
github.com/docker/go-winjob v0.0.0-20250829235554-57b487ebcbc5/go.mod h1:ICOGmIXdwhfid7rQP+tLvDJqVg0lHdEk3pI5nsapTtg=
github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575 h1:N2yLWYSZFTVLkLTh8ux1Z0Nug/F78pXsl2KDtbWhe+Y=
github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575/go.mod h1:gbdiY0X8gr0J88OfUuRD29JXCWT9jgHzPmrqTlO15BM=
github.com/elastic/go-sysinfo v1.15.4 h1:A3zQcunCxik14MgXu39cXFXcIw2sFXZ0zL886eyiv1Q=
github.com/elastic/go-sysinfo v1.15.4/go.mod h1:ZBVXmqS368dOn/jvijV/zHLfakWTYHBZPk3G244lHrU=
github.com/elastic/go-windows v1.0.2 h1:yoLLsAsV5cfg9FLhZ9EXZ2n2sQFKeDYrHenkcivY4vI=
github.com/elastic/go-windows v1.0.2/go.mod h1:bGcDpBzXgYSqM0Gx3DM4+UxFj300SZLixie9u9ixLM8=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
Expand Down Expand Up @@ -112,17 +108,13 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc=
github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
Expand Down
19 changes: 0 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@ import (
"syscall"
"time"

"github.com/docker/model-runner/pkg/gpuinfo"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/backends/mlx"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
"github.com/docker/model-runner/pkg/inference/config"
"github.com/docker/model-runner/pkg/inference/memory"
"github.com/docker/model-runner/pkg/inference/models"
"github.com/docker/model-runner/pkg/inference/scheduling"
"github.com/docker/model-runner/pkg/metrics"
Expand Down Expand Up @@ -65,15 +63,6 @@ func main() {
llamaServerPath = "/Applications/Docker.app/Contents/Resources/model-runner/bin"
}

gpuInfo := gpuinfo.New(llamaServerPath)

sysMemInfo, err := memory.NewSystemMemoryInfo(log, gpuInfo)
if err != nil {
log.Fatalf("unable to initialize system memory info: %v", err)
}

memEstimator := memory.NewEstimator(sysMemInfo)

// Create a proxy-aware HTTP transport
// Use a safe type assertion with fallback, and explicitly set Proxy to http.ProxyFromEnvironment
var baseTransport *http.Transport
Expand All @@ -93,7 +82,6 @@ func main() {
log,
clientConfig,
nil,
memEstimator,
)
modelManager := models.NewManager(log.WithFields(logrus.Fields{"component": "model-manager"}), clientConfig)
log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
Expand All @@ -118,12 +106,6 @@ func main() {
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
}

if os.Getenv("MODEL_RUNNER_RUNTIME_MEMORY_CHECK") == "1" {
memory.SetRuntimeMemoryCheck(true)
}

memEstimator.SetDefaultBackend(llamaCppBackend)

vllmBackend, err := vllm.New(
log,
modelManager,
Expand Down Expand Up @@ -160,7 +142,6 @@ func main() {
"",
false,
),
sysMemInfo,
)

// Create the HTTP handler for the scheduler
Expand Down
53 changes: 0 additions & 53 deletions pkg/inference/memory/estimator.go

This file was deleted.

18 changes: 0 additions & 18 deletions pkg/inference/memory/settings.go

This file was deleted.

64 changes: 0 additions & 64 deletions pkg/inference/memory/system.go

This file was deleted.

22 changes: 3 additions & 19 deletions pkg/inference/models/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,10 @@ import (
"github.com/docker/model-runner/pkg/distribution/builder"
reg "github.com/docker/model-runner/pkg/distribution/registry"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/memory"

"github.com/sirupsen/logrus"
)

type mockMemoryEstimator struct{}

func (me *mockMemoryEstimator) SetDefaultBackend(_ memory.MemoryEstimatorBackend) {}

func (me *mockMemoryEstimator) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (inference.RequiredMemory, error) {
return inference.RequiredMemory{RAM: 0, VRAM: 0}, nil
}

func (me *mockMemoryEstimator) HaveSufficientMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (bool, inference.RequiredMemory, inference.RequiredMemory, error) {
return true, inference.RequiredMemory{}, inference.RequiredMemory{}, nil
}

// getProjectRoot returns the absolute path to the project root directory
func getProjectRoot(t *testing.T) string {
// Start from the current test file's directory
Expand Down Expand Up @@ -123,11 +110,10 @@ func TestPullModel(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
memEstimator := &mockMemoryEstimator{}
handler := NewHTTPHandler(log, ClientConfig{
StoreRootPath: tempDir,
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
}, nil, memEstimator)
}, nil)

r := httptest.NewRequest(http.MethodPost, "/models/create", strings.NewReader(`{"from": "`+tag+`"}`))
if tt.acceptHeader != "" {
Expand Down Expand Up @@ -234,13 +220,12 @@ func TestHandleGetModel(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
memEstimator := &mockMemoryEstimator{}
handler := NewHTTPHandler(log, ClientConfig{
StoreRootPath: tempDir,
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
Transport: http.DefaultTransport,
UserAgent: "test-agent",
}, nil, memEstimator)
}, nil)

// First pull the model if we're testing local access
if !tt.remote && !strings.Contains(tt.modelName, "nonexistent") {
Expand Down Expand Up @@ -315,11 +300,10 @@ func TestCors(t *testing.T) {
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
t.Parallel()
memEstimator := &mockMemoryEstimator{}
discard := logrus.New()
discard.SetOutput(io.Discard)
log := logrus.NewEntry(discard)
m := NewHTTPHandler(log, ClientConfig{}, []string{"*"}, memEstimator)
m := NewHTTPHandler(log, ClientConfig{}, []string{"*"})
req := httptest.NewRequest(http.MethodOptions, "http://model-runner.docker.internal"+tt.path, http.NoBody)
req.Header.Set("Origin", "docker.com")
w := httptest.NewRecorder()
Expand Down
30 changes: 5 additions & 25 deletions pkg/inference/models/http_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/docker/model-runner/pkg/distribution/distribution"
"github.com/docker/model-runner/pkg/distribution/registry"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/memory"
"github.com/docker/model-runner/pkg/internal/utils"
"github.com/docker/model-runner/pkg/logging"
"github.com/docker/model-runner/pkg/middleware"
Expand All @@ -38,8 +37,6 @@ type HTTPHandler struct {
httpHandler http.Handler
// lock is used to synchronize access to the models manager's router.
lock sync.RWMutex
// memoryEstimator is used to calculate runtime memory requirements for models.
memoryEstimator memory.MemoryEstimator
// manager handles business logic for model operations.
manager *Manager
}
Expand All @@ -56,13 +53,12 @@ type ClientConfig struct {
}

// NewHTTPHandler creates a new model's handler.
func NewHTTPHandler(log logging.Logger, c ClientConfig, allowedOrigins []string, memoryEstimator memory.MemoryEstimator) *HTTPHandler {
func NewHTTPHandler(log logging.Logger, c ClientConfig, allowedOrigins []string) *HTTPHandler {
// Create the manager.
m := &HTTPHandler{
log: log,
router: http.NewServeMux(),
memoryEstimator: memoryEstimator,
manager: NewManager(log.WithFields(logrus.Fields{"component": "service"}), c),
log: log,
router: http.NewServeMux(),
manager: NewManager(log.WithFields(logrus.Fields{"component": "service"}), c),
}

// Register routes.
Expand Down Expand Up @@ -163,23 +159,7 @@ func (h *HTTPHandler) handleCreateModel(w http.ResponseWriter, r *http.Request)
// Normalize the model name to add defaults
request.From = NormalizeModelName(request.From)

// Pull the model. In the future, we may support additional operations here
// besides pulling (such as model building).
if memory.RuntimeMemoryCheckEnabled() && !request.IgnoreRuntimeMemoryCheck {
h.log.Infof("Will estimate memory required for %q", request.From)
proceed, req, totalMem, err := h.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
if err != nil {
h.log.Warnf("Failed to validate sufficient system memory for model %q: %s", request.From, err)
// Prefer staying functional in case of unexpected estimation errors.
proceed = true
}
if !proceed {
errstr := fmt.Sprintf("Runtime memory requirement for model %q exceeds total system memory: required %d RAM %d VRAM, system %d RAM %d VRAM", request.From, req.RAM, req.VRAM, totalMem.RAM, totalMem.VRAM)
h.log.Warnf(errstr)
http.Error(w, errstr, http.StatusInsufficientStorage)
return
}
}
// Pull the model
if err := h.manager.Pull(request.From, request.BearerToken, r, w); err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
h.log.Infof("Request canceled/timed out while pulling model %q", request.From)
Expand Down
Loading