docker · ericcurtin · Jan 7, 2026 · Jan 6, 2026 · Jan 7, 2026
diff --git a/cmd/cli/commands/list.go b/cmd/cli/commands/list.go
@@ -261,7 +261,7 @@ func appendRow(table *tablewriter.Table, tag string, model dmrm.Model) {
 	contextSize := ""
 	if model.Config.GetContextSize() != nil {
 		contextSize = fmt.Sprintf("%d", *model.Config.GetContextSize())
-	} else if dockerConfig, ok := model.Config.(*types.Config); ok && dockerConfig.GGUF != nil {
+	} else if dockerConfig, ok := model.Config.ModelConfig.(*types.Config); ok && dockerConfig.GGUF != nil {
 		if v, ok := dockerConfig.GGUF["llama.context_length"]; ok {
 			if parsed, err := strconv.ParseUint(v, 10, 64); err == nil {
 				contextSize = fmt.Sprintf("%d", parsed)

diff --git a/cmd/cli/commands/list_test.go b/cmd/cli/commands/list_test.go
@@ -15,12 +15,12 @@ func testModel(id string, tags []string, created int64) dmrm.Model {
 		ID:      id,
 		Tags:    tags,
 		Created: created,
-		Config: &types.Config{
+		Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
 			Parameters:   "7B",
 			Quantization: "Q4_0",
 			Architecture: "llama",
 			Size:         "4.0GB",
-		},
+		}},
 	}
 }
 
@@ -177,12 +177,12 @@ func TestListModelsSingleModel(t *testing.T) {
 			ID:      "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
 			Tags:    []string{"single:latest"},
 			Created: 1000,
-			Config: &types.Config{
+			Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
 				Parameters:   "7B",
 				Quantization: "Q4_0",
 				Architecture: "llama",
 				Size:         "4.0GB",
-			},
+			}},
 		},
 	}
 	output := prettyPrintModels(models)
@@ -234,23 +234,23 @@ func TestPrettyPrintModelsWithSortedInput(t *testing.T) {
 			ID:      "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
 			Tags:    []string{"ai/apple:latest"},
 			Created: 1000,
-			Config: &types.Config{
+			Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
 				Parameters:   "7B",
 				Quantization: "Q4_0",
 				Architecture: "llama",
 				Size:         "4.0GB",
-			},
+			}},
 		},
 		{
 			ID:      "sha256:223456789012345678901234567890123456789012345678901234567890abcd",
 			Tags:    []string{"ai/banana:v1"},
 			Created: 2000,
-			Config: &types.Config{
+			Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
 				Parameters:   "13B",
 				Quantization: "Q4_K_M",
 				Architecture: "llama",
 				Size:         "8.0GB",
-			},
+			}},
 		},
 	}
 
@@ -282,12 +282,12 @@ func TestPrettyPrintModelsWithMultipleTags(t *testing.T) {
 			ID:      "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
 			Tags:    []string{"qwen3:8B-Q4_K_M", "qwen3:latest", "qwen3:0.6B-F16"},
 			Created: 1000,
-			Config: &types.Config{
+			Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
 				Parameters:   "8B",
 				Quantization: "Q4_K_M",
 				Architecture: "qwen3",
 				Size:         "4.68GB",
-			},
+			}},
 		},
 	}
 

diff --git a/llamacpp/native/CMakeLists.txt b/llamacpp/native/CMakeLists.txt
@@ -8,22 +8,43 @@ project(
 
 option(DDLLAMA_BUILD_SERVER "Build the DD llama.cpp server executable" ON)
 option(DDLLAMA_BUILD_UTILS "Build utilities, e.g. nv-gpu-info" OFF)
-set(DDLLAMA_PATCH_COMMAND "patch" CACHE STRING "patch command")
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
 if (DDLLAMA_BUILD_SERVER)
-    set(LLAMA_BUILD_COMMON ON)
+    # Build upstream llama.cpp with server enabled
+    # Only set these options if they're not already defined to allow consumers to override
+    if(NOT DEFINED LLAMA_BUILD_COMMON)
+        set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common utils library")
+    endif()
+    if(NOT DEFINED LLAMA_BUILD_TOOLS)
+        set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
+    endif()
+    if(NOT DEFINED LLAMA_BUILD_SERVER)
+        set(LLAMA_BUILD_SERVER ON CACHE BOOL "Build server")
+    endif()
     add_subdirectory(vendor/llama.cpp)
-    # Get build info and set version for mtmd just like it's done in llama.cpp/CMakeLists.txt
-    include(vendor/llama.cpp/cmake/build-info.cmake)
-    if (NOT DEFINED LLAMA_BUILD_NUMBER)
-        set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
+
+    # Create custom target to copy llama-server to com.docker.llama-server
+    if (WIN32)
+        set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server.exe")
+    else()
+        set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server")
     endif()
-    set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
-    add_subdirectory(vendor/llama.cpp/tools/mtmd)
-    add_subdirectory(src/server)
+
+    add_custom_command(OUTPUT "${LLAMA_SERVER_DST}"
+        COMMAND ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:llama-server>" "${LLAMA_SERVER_DST}"
+        DEPENDS llama-server
+        COMMENT "Creating com.docker.llama-server from llama-server"
+    )
+
+    add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}")
+
+    # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support
+    install(TARGETS llama-server
+        RUNTIME DESTINATION bin
+        RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}")
 endif()
 
 if (WIN32 AND DDLLAMA_BUILD_UTILS)

diff --git a/llamacpp/native/README.md b/llamacpp/native/README.md
@@ -1,5 +1,7 @@
 # Native llama-server
 
+This project builds the upstream llama.cpp server (`llama-server`) directly from the llama.cpp submodule and renames it to `com.docker.llama-server`.
+
 ## Building
 
     cmake -B build
@@ -15,7 +17,7 @@
 
 This project uses llama.cpp as a git submodule located at `vendor/llama.cpp`, which points to the official llama.cpp repository at https://github.com/ggml-org/llama.cpp.git.
 
-The project applies custom patches to llama.cpp's server implementation (`server.cpp` and `utils.hpp`) to integrate with the Docker model-runner architecture. These patches are maintained in `src/server/server.patch`.
+We build the upstream `llama-server` binary directly without any modifications.
 
 ### Prerequisites
 
@@ -45,32 +47,20 @@ If the submodule is already initialized, this command is safe to run and will en
    popd
    ```
 
-3. **Apply the custom llama-server patch:**
+3. **Build and test:**
 
    ```bash
-   make -C src/server clean
-   make -C src/server
-   ```
-
-   This will:
-   - Clean the previous patched files
-   - Copy the new `server.cpp` and `utils.hpp` from the updated llama.cpp
-   - Apply our custom patches from `src/server/server.patch`
-
-4. **Build and test:**
+   # Build from the native directory
+   cmake -B build
+   cmake --build build --parallel 8 --config Release
 
-   ```bash
-   # Build from the native directory   
-    cmake -B build
-    cmake --build build --parallel 8 --config Release
-
    # Test the build
    ./build/bin/com.docker.llama-server --model <path to model>
    ```
 
    Make sure everything builds cleanly without errors.
 
-5. **Commit the submodule update:**
+4. **Commit the submodule update:**
 
    ```bash
    git add vendor/llama.cpp

diff --git a/llamacpp/native/src/server/CMakeLists.txt b/llamacpp/native/src/server/CMakeLists.txt
diff --git a/llamacpp/native/src/server/Makefile b/llamacpp/native/src/server/Makefile
diff --git a/llamacpp/native/src/server/README.md b/llamacpp/native/src/server/README.md