Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2b06217
build: add new vLLM CUDA image
doringeman Oct 13, 2025
aa89eca
fix(Dockerfile): add build-essential package
ilopezluna Oct 14, 2025
4c5bc73
fix(Dockerfile): add python3-venv and python3-dev packages
ilopezluna Oct 14, 2025
e27c7d0
fix(Dockerfile): use the final target for CPU and CUDA builds
doringeman Oct 14, 2025
5311dfe
feat(vllm): implement vLLM backend
ilopezluna Oct 14, 2025
b6d3432
fix(client): remove unsupported model format checks for Safetensors
ilopezluna Oct 14, 2025
c2af7ce
perf: optimize Docker build by restructuring stage dependencies
doringeman Oct 14, 2025
c870ab0
feat(cli): allow vLLM backend
doringeman Oct 14, 2025
b554107
fix(vllm): use model bundle directory
doringeman Oct 14, 2025
609abf6
refactor(backend): add modelRef parameter to Backend.Run interface
doringeman Oct 14, 2025
717c407
fix(client): remove unsupported model format checks for Safetensors o…
doringeman Oct 14, 2025
0920524
fix(vllm): check vllm.New error
doringeman Oct 16, 2025
b94bd9f
vllm: Linux support only
doringeman Oct 16, 2025
2be8126
vllm: add version detection from build-time capture
doringeman Oct 16, 2025
0bb2f02
feat(scheduler): automatically identify models for vLLM
doringeman Oct 21, 2025
bf8d9e7
feat(vllm): enhance argument handling for vLLM backend configuration
ilopezluna Oct 21, 2025
c771e94
fix(vllm): update model path handling to use directory for safetensors
ilopezluna Oct 21, 2025
1a6f20e
fix(vllm): add model ID as served-model-name
doringeman Oct 21, 2025
0a9893b
feat(vllm): add argument sanitization for safe logging
ilopezluna Oct 21, 2025
a8a6dc0
update actions versin to be pinned to a full length commit SHA
ilopezluna Oct 21, 2025
6ca1a5a
fix(vllm): improve logging for backend errors and warn about unsuppor…
ilopezluna Oct 22, 2025
20272f2
fix(vllm): sanitize model reference in backend error logging
ilopezluna Oct 22, 2025
b0b6ccc
fix(vllm): sanitize model reference in warning log for unsupported sa…
ilopezluna Oct 22, 2025
13e2b4f
fix(vllm): validate safetensors path before serving and update test c…
ilopezluna Oct 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955

- name: Verify vendor/ is not present
run: stat vendor && exit 1 || exit 0

- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
with:
go-version: 1.24.2
cache: true
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/cli-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ jobs:
id-token: write
contents: read
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
with:
go-version-file: cmd/cli/go.mod
cache: true
Expand All @@ -35,7 +35,7 @@ jobs:
working-directory: cmd/cli
run: |
make release VERSION=${{ github.sha }}
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: dist
path: |
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/cli-validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ jobs:
steps:
-
name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
-
name: List targets
id: generate
uses: docker/bake-action/subaction/list-targets@v6
uses: docker/bake-action/subaction/list-targets@3acf805d94d93a86cce4ca44798a76464a75b88c
with:
files: ./cmd/cli/docker-bake.hcl
target: validate
Expand All @@ -51,7 +51,7 @@ jobs:
steps:
-
name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dmr-daily-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:

steps:
- name: Set up Docker
uses: docker/setup-docker-action@v4
uses: docker/setup-docker-action@3fb92d6d9c634363128c8cce4bc3b2826526370a

- name: Install docker-model-plugin
run: |
Expand Down
43 changes: 36 additions & 7 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@ on:
required: false
type: string
default: "latest"
vllmVersion:
description: 'vLLM version'
required: false
type: string
default: "0.11.0"

jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955

- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
with:
go-version: 1.24.2
cache: true
Expand All @@ -41,7 +46,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955

- name: Format tags
id: tags
Expand All @@ -59,25 +64,32 @@ jobs:
echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
Comment on lines +67 to +72
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tag generation logic for vllm-cuda duplicates the pattern used for the cuda tags above. Consider extracting this into a reusable function or template to reduce code duplication and improve maintainability.

Copilot uses AI. Check for mistakes.

- name: Log in to DockerHub
uses: docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
with:
username: "docker"
password: ${{ secrets.ORG_ACCESS_TOKEN }}

- name: Set up Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
with:
version: "lab:latest"
driver: cloud
endpoint: "docker/make-product-smarter"
install: true

- name: Build CPU image
uses: docker/build-push-action@v5
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
Expand All @@ -87,9 +99,10 @@ jobs:
tags: ${{ steps.tags.outputs.cpu }}

- name: Build CUDA image
uses: docker/build-push-action@v5
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
Expand All @@ -99,3 +112,19 @@ jobs:
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.cuda }}

- name: Build vLLM CUDA image
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
with:
file: Dockerfile
target: final-vllm
platforms: linux/amd64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
"VLLM_VERSION=${{ inputs.vllmVersion }}"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.vllm-cuda }}
33 changes: 29 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server

# --- Final image ---
FROM docker.io/${BASE_IMAGE} AS final
FROM docker.io/${BASE_IMAGE} AS llamacpp

ARG LLAMA_SERVER_VARIANT

Expand All @@ -55,9 +55,6 @@ RUN mkdir -p /var/run/model-runner /app/bin /models && \
chown -R modelrunner:modelrunner /var/run/model-runner /app /models && \
chmod -R 755 /models

# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner

# Copy the llama.cpp binary from the llama-server stage
ARG LLAMA_BINARY_PATH
COPY --from=llama-server ${LLAMA_BINARY_PATH}/ /app/.
Expand All @@ -77,3 +74,31 @@ ENV LD_LIBRARY_PATH=/app/lib
LABEL com.docker.desktop.service="model-runner"

ENTRYPOINT ["/app/model-runner"]

# --- vLLM variant ---
FROM llamacpp AS vllm

ARG VLLM_VERSION

USER root

RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/*

RUN mkdir -p /opt/vllm-env && chown -R modelrunner:modelrunner /opt/vllm-env

USER modelrunner

# Install uv and vLLM as modelrunner user
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
&& ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
Copy link
Contributor

@ericcurtin ericcurtin Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we change this to copy from the vllm/vllm-openai:v0.11.0 container we get DGX Spark support (I know I suggested doing it this less hacky way, apologies, didn't realize the container had aarch64 and this way doesn't appear to).

Could be a follow on PR too.

Copy link
Contributor

@ericcurtin ericcurtin Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or this would be even better install the wheels from here:

https://wheels.vllm.ai/b8b302cde434df8c9289a2b465406b47ebab1c2d/vllm/

That commit sha is the 0.11.0 one.

They tipped me off in vLLM stack that they build CUDA x86_64 and aarch64 wheels for every commit. So this is the same thing, but has an aarch64 version also.

Be better than the hacky container copy (which is prone to error, missing files, OS mismatch, library version mismatch).

Copy link
Contributor

@ericcurtin ericcurtin Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a way to get that programatically:

$ git rev-list -n 1 v0.11.0
b8b302cde434df8c9289a2b465406b47ebab1c2d

&& ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"

RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version

FROM llamacpp AS final-llamacpp
# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner

FROM vllm AS final-vllm
# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner
1 change: 1 addition & 0 deletions cmd/cli/commands/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
var ValidBackends = map[string]bool{
"llama.cpp": true,
"openai": true,
"vllm": true,
}

// validateBackend checks if the provided backend is valid
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/docker_model_list.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plink: docker_model.yaml
options:
- option: backend
value_type: string
description: Specify the backend to use (llama.cpp, openai)
description: Specify the backend to use (llama.cpp, openai, vllm)
deprecated: false
hidden: true
experimental: false
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/docker_model_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ plink: docker_model.yaml
options:
- option: backend
value_type: string
description: Specify the backend to use (llama.cpp, openai)
description: Specify the backend to use (llama.cpp, openai, vllm)
deprecated: false
hidden: true
experimental: false
Expand Down
13 changes: 12 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/docker/model-runner/pkg/gpuinfo"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
"github.com/docker/model-runner/pkg/inference/config"
"github.com/docker/model-runner/pkg/inference/memory"
"github.com/docker/model-runner/pkg/inference/models"
Expand Down Expand Up @@ -119,9 +120,19 @@ func main() {

memEstimator.SetDefaultBackend(llamaCppBackend)

vllmBackend, err := vllm.New(
log,
modelManager,
log.WithFields(logrus.Fields{"component": "vllm"}),
nil,
)
if err != nil {
log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
}

scheduler := scheduling.NewScheduler(
log,
map[string]inference.Backend{llamacpp.Name: llamaCppBackend},
map[string]inference.Backend{llamacpp.Name: llamaCppBackend, vllm.Name: vllmBackend},
llamaCppBackend,
modelManager,
http.DefaultClient,
Expand Down
13 changes: 11 additions & 2 deletions pkg/distribution/distribution/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ import (
"fmt"
"io"
"net/http"
"slices"

"github.com/docker/model-runner/pkg/distribution/internal/utils"
"github.com/docker/model-runner/pkg/internal/utils"
"github.com/sirupsen/logrus"

"github.com/docker/model-runner/pkg/distribution/internal/progress"
"github.com/docker/model-runner/pkg/distribution/internal/store"
"github.com/docker/model-runner/pkg/distribution/registry"
"github.com/docker/model-runner/pkg/distribution/tarball"
"github.com/docker/model-runner/pkg/distribution/types"
"github.com/docker/model-runner/pkg/inference/platform"
)

// Client provides model distribution functionality
Expand Down Expand Up @@ -408,6 +410,13 @@ func (c *Client) GetBundle(ref string) (types.ModelBundle, error) {
return c.store.BundleForModel(ref)
}

func GetSupportedFormats() []types.Format {
if platform.SupportsVLLM() {
return []types.Format{types.FormatGGUF, types.FormatSafetensors}
}
return []types.Format{types.FormatGGUF}
}

func checkCompat(image types.ModelArtifact) error {
manifest, err := image.Manifest()
if err != nil {
Expand All @@ -423,7 +432,7 @@ func checkCompat(image types.ModelArtifact) error {
return fmt.Errorf("reading model config: %w", err)
}

if config.Format == types.FormatSafetensors {
if !slices.Contains(GetSupportedFormats(), config.Format) {
return ErrUnsupportedFormat
}

Expand Down
17 changes: 13 additions & 4 deletions pkg/distribution/distribution/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/docker/model-runner/pkg/distribution/internal/progress"
"github.com/docker/model-runner/pkg/distribution/internal/safetensors"
mdregistry "github.com/docker/model-runner/pkg/distribution/registry"
"github.com/docker/model-runner/pkg/inference/platform"
)

var (
Expand Down Expand Up @@ -418,7 +419,7 @@ func TestClientPullModel(t *testing.T) {
}
})

t.Run("pull safetensors model returns error", func(t *testing.T) {
t.Run("pull safetensors model returns error on unsupported platforms", func(t *testing.T) {
// Create temp directory for the safetensors file
tempDir, err := os.MkdirTemp("", "safetensors-test-*")
if err != nil {
Expand Down Expand Up @@ -461,10 +462,18 @@ func TestClientPullModel(t *testing.T) {
t.Fatalf("Failed to create test client: %v", err)
}

// Try to pull the safetensors model - should fail with ErrUnsupportedFormat
// Try to pull the safetensors model
err = testClient.PullModel(context.Background(), tag, nil)
if !errors.Is(err, ErrUnsupportedFormat) {
t.Fatalf("Expected ErrUnsupportedFormat, got: %v", err)
if platform.SupportsVLLM() {
// On Linux, safetensors should be supported
if err != nil {
t.Fatalf("Expected no error on Linux, got: %v", err)
}
} else {
// On non-Linux, should fail with ErrUnsupportedFormat
if !errors.Is(err, ErrUnsupportedFormat) {
t.Fatalf("Expected ErrUnsupportedFormat on non-Linux platforms, got: %v", err)
}
}
})

Expand Down
Loading
Loading