Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/cli/commands/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func appendRow(table *tablewriter.Table, tag string, model dmrm.Model) {
contextSize := ""
if model.Config.GetContextSize() != nil {
contextSize = fmt.Sprintf("%d", *model.Config.GetContextSize())
} else if dockerConfig, ok := model.Config.(*types.Config); ok && dockerConfig.GGUF != nil {
} else if dockerConfig, ok := model.Config.ModelConfig.(*types.Config); ok && dockerConfig.GGUF != nil {
if v, ok := dockerConfig.GGUF["llama.context_length"]; ok {
if parsed, err := strconv.ParseUint(v, 10, 64); err == nil {
contextSize = fmt.Sprintf("%d", parsed)
Expand Down
20 changes: 10 additions & 10 deletions cmd/cli/commands/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ func testModel(id string, tags []string, created int64) dmrm.Model {
ID: id,
Tags: tags,
Created: created,
Config: &types.Config{
Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
Parameters: "7B",
Quantization: "Q4_0",
Architecture: "llama",
Size: "4.0GB",
},
}},
}
}

Expand Down Expand Up @@ -177,12 +177,12 @@ func TestListModelsSingleModel(t *testing.T) {
ID: "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
Tags: []string{"single:latest"},
Created: 1000,
Config: &types.Config{
Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
Parameters: "7B",
Quantization: "Q4_0",
Architecture: "llama",
Size: "4.0GB",
},
}},
},
}
output := prettyPrintModels(models)
Expand Down Expand Up @@ -234,23 +234,23 @@ func TestPrettyPrintModelsWithSortedInput(t *testing.T) {
ID: "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
Tags: []string{"ai/apple:latest"},
Created: 1000,
Config: &types.Config{
Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
Parameters: "7B",
Quantization: "Q4_0",
Architecture: "llama",
Size: "4.0GB",
},
}},
},
{
ID: "sha256:223456789012345678901234567890123456789012345678901234567890abcd",
Tags: []string{"ai/banana:v1"},
Created: 2000,
Config: &types.Config{
Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
Parameters: "13B",
Quantization: "Q4_K_M",
Architecture: "llama",
Size: "8.0GB",
},
}},
},
}

Expand Down Expand Up @@ -282,12 +282,12 @@ func TestPrettyPrintModelsWithMultipleTags(t *testing.T) {
ID: "sha256:123456789012345678901234567890123456789012345678901234567890abcd",
Tags: []string{"qwen3:8B-Q4_K_M", "qwen3:latest", "qwen3:0.6B-F16"},
Created: 1000,
Config: &types.Config{
Config: &dmrm.ModelConfigWrapper{ModelConfig: &types.Config{
Parameters: "8B",
Quantization: "Q4_K_M",
Architecture: "qwen3",
Size: "4.68GB",
},
}},
},
}

Expand Down
39 changes: 30 additions & 9 deletions llamacpp/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,43 @@ project(

option(DDLLAMA_BUILD_SERVER "Build the DD llama.cpp server executable" ON)
option(DDLLAMA_BUILD_UTILS "Build utilities, e.g. nv-gpu-info" OFF)
set(DDLLAMA_PATCH_COMMAND "patch" CACHE STRING "patch command")

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

if (DDLLAMA_BUILD_SERVER)
set(LLAMA_BUILD_COMMON ON)
# Build upstream llama.cpp with server enabled
# Only set these options if they're not already defined to allow consumers to override
if(NOT DEFINED LLAMA_BUILD_COMMON)
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common utils library")
endif()
if(NOT DEFINED LLAMA_BUILD_TOOLS)
set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
endif()
if(NOT DEFINED LLAMA_BUILD_SERVER)
set(LLAMA_BUILD_SERVER ON CACHE BOOL "Build server")
endif()
add_subdirectory(vendor/llama.cpp)
# Get build info and set version for mtmd just like it's done in llama.cpp/CMakeLists.txt
include(vendor/llama.cpp/cmake/build-info.cmake)
if (NOT DEFINED LLAMA_BUILD_NUMBER)
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})

# Create custom target to copy llama-server to com.docker.llama-server
if (WIN32)
set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server.exe")
else()
set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server")
endif()
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
add_subdirectory(vendor/llama.cpp/tools/mtmd)
add_subdirectory(src/server)

add_custom_command(OUTPUT "${LLAMA_SERVER_DST}"
COMMAND ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:llama-server>" "${LLAMA_SERVER_DST}"
DEPENDS llama-server
COMMENT "Creating com.docker.llama-server from llama-server"
)

add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}")

# Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support
install(TARGETS llama-server
RUNTIME DESTINATION bin
RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}")
endif()

if (WIN32 AND DDLLAMA_BUILD_UTILS)
Expand Down
26 changes: 8 additions & 18 deletions llamacpp/native/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Native llama-server

This project builds the upstream llama.cpp server (`llama-server`) directly from the llama.cpp submodule and renames it to `com.docker.llama-server`.

## Building

cmake -B build
Expand All @@ -15,7 +17,7 @@

This project uses llama.cpp as a git submodule located at `vendor/llama.cpp`, which points to the official llama.cpp repository at https://github.com/ggml-org/llama.cpp.git.

The project applies custom patches to llama.cpp's server implementation (`server.cpp` and `utils.hpp`) to integrate with the Docker model-runner architecture. These patches are maintained in `src/server/server.patch`.
We build the upstream `llama-server` binary directly without any modifications.

### Prerequisites

Expand Down Expand Up @@ -45,32 +47,20 @@ If the submodule is already initialized, this command is safe to run and will en
popd
```

3. **Apply the custom llama-server patch:**
3. **Build and test:**

```bash
make -C src/server clean
make -C src/server
```

This will:
- Clean the previous patched files
- Copy the new `server.cpp` and `utils.hpp` from the updated llama.cpp
- Apply our custom patches from `src/server/server.patch`

4. **Build and test:**
# Build from the native directory
cmake -B build
cmake --build build --parallel 8 --config Release

```bash
# Build from the native directory
cmake -B build
cmake --build build --parallel 8 --config Release

# Test the build
./build/bin/com.docker.llama-server --model <path to model>
```

Make sure everything builds cleanly without errors.

5. **Commit the submodule update:**
4. **Commit the submodule update:**

```bash
git add vendor/llama.cpp
Expand Down
31 changes: 0 additions & 31 deletions llamacpp/native/src/server/CMakeLists.txt

This file was deleted.

18 changes: 0 additions & 18 deletions llamacpp/native/src/server/Makefile

This file was deleted.

24 changes: 0 additions & 24 deletions llamacpp/native/src/server/README.md

This file was deleted.

Loading
Loading