docker · ilopezluna · Dec 15, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 12, 2025
diff --git a/go.mod b/go.mod
@@ -7,7 +7,6 @@ require (
 	github.com/containerd/platforms v1.0.0-rc.1
 	github.com/docker/go-units v0.5.0
 	github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575
-	github.com/elastic/go-sysinfo v1.15.4
 	github.com/gpustack/gguf-parser-go v0.22.1
 	github.com/jaypipes/ghw v0.19.1
 	github.com/kolesnikovae/go-winjob v1.0.0
@@ -30,7 +29,6 @@ require (
 	github.com/docker/cli v28.3.0+incompatible // indirect
 	github.com/docker/distribution v2.8.3+incompatible // indirect
 	github.com/docker/docker-credential-helpers v0.9.3 // indirect
-	github.com/elastic/go-windows v1.0.2 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -47,7 +45,6 @@ require (
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
 	github.com/vbatts/tar-split v0.12.1 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect

diff --git a/go.sum b/go.sum
@@ -42,10 +42,6 @@ github.com/docker/go-winjob v0.0.0-20250829235554-57b487ebcbc5 h1:dxSFEb0EEmvceI
 github.com/docker/go-winjob v0.0.0-20250829235554-57b487ebcbc5/go.mod h1:ICOGmIXdwhfid7rQP+tLvDJqVg0lHdEk3pI5nsapTtg=
 github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575 h1:N2yLWYSZFTVLkLTh8ux1Z0Nug/F78pXsl2KDtbWhe+Y=
 github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251121150728-6951a2a36575/go.mod h1:gbdiY0X8gr0J88OfUuRD29JXCWT9jgHzPmrqTlO15BM=
-github.com/elastic/go-sysinfo v1.15.4 h1:A3zQcunCxik14MgXu39cXFXcIw2sFXZ0zL886eyiv1Q=
-github.com/elastic/go-sysinfo v1.15.4/go.mod h1:ZBVXmqS368dOn/jvijV/zHLfakWTYHBZPk3G244lHrU=
-github.com/elastic/go-windows v1.0.2 h1:yoLLsAsV5cfg9FLhZ9EXZ2n2sQFKeDYrHenkcivY4vI=
-github.com/elastic/go-windows v1.0.2/go.mod h1:bGcDpBzXgYSqM0Gx3DM4+UxFj300SZLixie9u9ixLM8=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -112,17 +108,13 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
 github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc=
 github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI=
-github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
-github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
 github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
-github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=

diff --git a/main.go b/main.go
@@ -11,13 +11,11 @@ import (
 	"syscall"
 	"time"
 
-	"github.com/docker/model-runner/pkg/gpuinfo"
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
 	"github.com/docker/model-runner/pkg/inference/backends/mlx"
 	"github.com/docker/model-runner/pkg/inference/backends/vllm"
 	"github.com/docker/model-runner/pkg/inference/config"
-	"github.com/docker/model-runner/pkg/inference/memory"
 	"github.com/docker/model-runner/pkg/inference/models"
 	"github.com/docker/model-runner/pkg/inference/scheduling"
 	"github.com/docker/model-runner/pkg/metrics"
@@ -65,15 +63,6 @@ func main() {
 		llamaServerPath = "/Applications/Docker.app/Contents/Resources/model-runner/bin"
 	}
 
-	gpuInfo := gpuinfo.New(llamaServerPath)
-
-	sysMemInfo, err := memory.NewSystemMemoryInfo(log, gpuInfo)
-	if err != nil {
-		log.Fatalf("unable to initialize system memory info: %v", err)
-	}
-
-	memEstimator := memory.NewEstimator(sysMemInfo)
-
 	// Create a proxy-aware HTTP transport
 	// Use a safe type assertion with fallback, and explicitly set Proxy to http.ProxyFromEnvironment
 	var baseTransport *http.Transport
@@ -93,7 +82,6 @@ func main() {
 		log,
 		clientConfig,
 		nil,
-		memEstimator,
 	)
 	modelManager := models.NewManager(log.WithFields(logrus.Fields{"component": "model-manager"}), clientConfig)
 	log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
@@ -118,12 +106,6 @@ func main() {
 		log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
 	}
 
-	if os.Getenv("MODEL_RUNNER_RUNTIME_MEMORY_CHECK") == "1" {
-		memory.SetRuntimeMemoryCheck(true)
-	}
-
-	memEstimator.SetDefaultBackend(llamaCppBackend)
-
 	vllmBackend, err := vllm.New(
 		log,
 		modelManager,
@@ -160,7 +142,6 @@ func main() {
 			"",
 			false,
 		),
-		sysMemInfo,
 	)
 
 	// Create the HTTP handler for the scheduler

diff --git a/pkg/inference/memory/estimator.go b/pkg/inference/memory/estimator.go
diff --git a/pkg/inference/memory/settings.go b/pkg/inference/memory/settings.go
diff --git a/pkg/inference/memory/system.go b/pkg/inference/memory/system.go
diff --git a/pkg/inference/models/handler_test.go b/pkg/inference/models/handler_test.go
@@ -17,23 +17,10 @@ import (
 	"github.com/docker/model-runner/pkg/distribution/builder"
 	reg "github.com/docker/model-runner/pkg/distribution/registry"
 	"github.com/docker/model-runner/pkg/inference"
-	"github.com/docker/model-runner/pkg/inference/memory"
 
 	"github.com/sirupsen/logrus"
 )
 
-type mockMemoryEstimator struct{}
-
-func (me *mockMemoryEstimator) SetDefaultBackend(_ memory.MemoryEstimatorBackend) {}
-
-func (me *mockMemoryEstimator) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (inference.RequiredMemory, error) {
-	return inference.RequiredMemory{RAM: 0, VRAM: 0}, nil
-}
-
-func (me *mockMemoryEstimator) HaveSufficientMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (bool, inference.RequiredMemory, inference.RequiredMemory, error) {
-	return true, inference.RequiredMemory{}, inference.RequiredMemory{}, nil
-}
-
 // getProjectRoot returns the absolute path to the project root directory
 func getProjectRoot(t *testing.T) string {
 	// Start from the current test file's directory
@@ -123,11 +110,10 @@ func TestPullModel(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			log := logrus.NewEntry(logrus.StandardLogger())
-			memEstimator := &mockMemoryEstimator{}
 			handler := NewHTTPHandler(log, ClientConfig{
 				StoreRootPath: tempDir,
 				Logger:        log.WithFields(logrus.Fields{"component": "model-manager"}),
-			}, nil, memEstimator)
+			}, nil)
 
 			r := httptest.NewRequest(http.MethodPost, "/models/create", strings.NewReader(`{"from": "`+tag+`"}`))
 			if tt.acceptHeader != "" {
@@ -234,13 +220,12 @@ func TestHandleGetModel(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			log := logrus.NewEntry(logrus.StandardLogger())
-			memEstimator := &mockMemoryEstimator{}
 			handler := NewHTTPHandler(log, ClientConfig{
 				StoreRootPath: tempDir,
 				Logger:        log.WithFields(logrus.Fields{"component": "model-manager"}),
 				Transport:     http.DefaultTransport,
 				UserAgent:     "test-agent",
-			}, nil, memEstimator)
+			}, nil)
 
 			// First pull the model if we're testing local access
 			if !tt.remote && !strings.Contains(tt.modelName, "nonexistent") {
@@ -315,11 +300,10 @@ func TestCors(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.path, func(t *testing.T) {
 			t.Parallel()
-			memEstimator := &mockMemoryEstimator{}
 			discard := logrus.New()
 			discard.SetOutput(io.Discard)
 			log := logrus.NewEntry(discard)
-			m := NewHTTPHandler(log, ClientConfig{}, []string{"*"}, memEstimator)
+			m := NewHTTPHandler(log, ClientConfig{}, []string{"*"})
 			req := httptest.NewRequest(http.MethodOptions, "http://model-runner.docker.internal"+tt.path, http.NoBody)
 			req.Header.Set("Origin", "docker.com")
 			w := httptest.NewRecorder()

diff --git a/pkg/inference/models/http_handler.go b/pkg/inference/models/http_handler.go
@@ -15,7 +15,6 @@ import (
 	"github.com/docker/model-runner/pkg/distribution/distribution"
 	"github.com/docker/model-runner/pkg/distribution/registry"
 	"github.com/docker/model-runner/pkg/inference"
-	"github.com/docker/model-runner/pkg/inference/memory"
 	"github.com/docker/model-runner/pkg/internal/utils"
 	"github.com/docker/model-runner/pkg/logging"
 	"github.com/docker/model-runner/pkg/middleware"
@@ -38,8 +37,6 @@ type HTTPHandler struct {
 	httpHandler http.Handler
 	// lock is used to synchronize access to the models manager's router.
 	lock sync.RWMutex
-	// memoryEstimator is used to calculate runtime memory requirements for models.
-	memoryEstimator memory.MemoryEstimator
 	// manager handles business logic for model operations.
 	manager *Manager
 }
@@ -56,13 +53,12 @@ type ClientConfig struct {
 }
 
 // NewHTTPHandler creates a new model's handler.
-func NewHTTPHandler(log logging.Logger, c ClientConfig, allowedOrigins []string, memoryEstimator memory.MemoryEstimator) *HTTPHandler {
+func NewHTTPHandler(log logging.Logger, c ClientConfig, allowedOrigins []string) *HTTPHandler {
 	// Create the manager.
 	m := &HTTPHandler{
-		log:             log,
-		router:          http.NewServeMux(),
-		memoryEstimator: memoryEstimator,
-		manager:         NewManager(log.WithFields(logrus.Fields{"component": "service"}), c),
+		log:     log,
+		router:  http.NewServeMux(),
+		manager: NewManager(log.WithFields(logrus.Fields{"component": "service"}), c),
 	}
 
 	// Register routes.
@@ -163,23 +159,7 @@ func (h *HTTPHandler) handleCreateModel(w http.ResponseWriter, r *http.Request)
 	// Normalize the model name to add defaults
 	request.From = NormalizeModelName(request.From)
 
-	// Pull the model. In the future, we may support additional operations here
-	// besides pulling (such as model building).
-	if memory.RuntimeMemoryCheckEnabled() && !request.IgnoreRuntimeMemoryCheck {
-		h.log.Infof("Will estimate memory required for %q", request.From)
-		proceed, req, totalMem, err := h.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
-		if err != nil {
-			h.log.Warnf("Failed to validate sufficient system memory for model %q: %s", request.From, err)
-			// Prefer staying functional in case of unexpected estimation errors.
-			proceed = true
-		}
-		if !proceed {
-			errstr := fmt.Sprintf("Runtime memory requirement for model %q exceeds total system memory: required %d RAM %d VRAM, system %d RAM %d VRAM", request.From, req.RAM, req.VRAM, totalMem.RAM, totalMem.VRAM)
-			h.log.Warnf(errstr)
-			http.Error(w, errstr, http.StatusInsufficientStorage)
-			return
-		}
-	}
+	// Pull the model
 	if err := h.manager.Pull(request.From, request.BearerToken, r, w); err != nil {
 		if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
 			h.log.Infof("Request canceled/timed out while pulling model %q", request.From)