docker · doringeman · Oct 22, 2025 · Oct 13, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,13 +11,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
 
       - name: Verify vendor/ is not present
         run: stat vendor && exit 1 || exit 0
 
       - name: Set up Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
         with:
           go-version: 1.24.2
           cache: true

diff --git a/.github/workflows/cli-build.yml b/.github/workflows/cli-build.yml
@@ -25,8 +25,8 @@ jobs:
       id-token: write
       contents: read
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
         with:
           go-version-file: cmd/cli/go.mod
           cache: true
@@ -35,7 +35,7 @@ jobs:
         working-directory: cmd/cli
         run: |
           make release VERSION=${{ github.sha }}
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
         with:
           name: dist
           path: |

diff --git a/.github/workflows/cli-validate.yml b/.github/workflows/cli-validate.yml
@@ -31,11 +31,11 @@ jobs:
     steps:
       -
         name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
       -
         name: List targets
         id: generate
-        uses: docker/bake-action/subaction/list-targets@v6
+        uses: docker/bake-action/subaction/list-targets@3acf805d94d93a86cce4ca44798a76464a75b88c
         with:
           files: ./cmd/cli/docker-bake.hcl
           target: validate
@@ -51,7 +51,7 @@ jobs:
     steps:
       -
         name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
       -
         name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3

diff --git a/.github/workflows/dmr-daily-check.yml b/.github/workflows/dmr-daily-check.yml
@@ -22,7 +22,7 @@ jobs:
 
     steps:      
       - name: Set up Docker
-        uses: docker/setup-docker-action@v4
+        uses: docker/setup-docker-action@3fb92d6d9c634363128c8cce4bc3b2826526370a
 
       - name: Install docker-model-plugin
         run: |

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -19,16 +19,21 @@ on:
         required: false
         type: string
         default: "latest"
+      vllmVersion:
+        description: 'vLLM version'
+        required: false
+        type: string
+        default: "0.11.0"
 
 jobs:
   test:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
 
       - name: Set up Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
         with:
           go-version: 1.24.2
           cache: true
@@ -41,7 +46,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
 
       - name: Format tags
         id: tags
@@ -59,25 +64,32 @@ jobs:
             echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT"
           fi
           echo 'EOF' >> "$GITHUB_OUTPUT"
+          echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT"
+          echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT"
+          if [ "${{ inputs.pushLatest }}" == "true" ]; then
+            echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT"
+          fi
+          echo 'EOF' >> "$GITHUB_OUTPUT"
 
       - name: Log in to DockerHub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
         with:
           username: "docker"
           password: ${{ secrets.ORG_ACCESS_TOKEN }}
 
       - name: Set up Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
         with:
           version: "lab:latest"
           driver: cloud
           endpoint: "docker/make-product-smarter"
           install: true
 
       - name: Build CPU image
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
         with:
           file: Dockerfile
+          target: final-llamacpp
           platforms: linux/amd64, linux/arm64
           build-args: |
             "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
@@ -87,9 +99,10 @@ jobs:
           tags: ${{ steps.tags.outputs.cpu }}
 
       - name: Build CUDA image
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
         with:
           file: Dockerfile
+          target: final-llamacpp
           platforms: linux/amd64, linux/arm64
           build-args: |
             "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
@@ -99,3 +112,19 @@ jobs:
           sbom: true
           provenance: mode=max
           tags: ${{ steps.tags.outputs.cuda }}
+
+      - name: Build vLLM CUDA image
+        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
+        with:
+          file: Dockerfile
+          target: final-vllm
+          platforms: linux/amd64
+          build-args: |
+            "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
+            "LLAMA_SERVER_VARIANT=cuda"
+            "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
+            "VLLM_VERSION=${{ inputs.vllmVersion }}"
+          push: true
+          sbom: true
+          provenance: mode=max
+          tags: ${{ steps.tags.outputs.vllm-cuda }}
diff --git a/Dockerfile b/Dockerfile
@@ -35,7 +35,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \
 FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
 
 # --- Final image ---
-FROM docker.io/${BASE_IMAGE} AS final
+FROM docker.io/${BASE_IMAGE} AS llamacpp
 
 ARG LLAMA_SERVER_VARIANT
 
@@ -55,9 +55,6 @@ RUN mkdir -p /var/run/model-runner /app/bin /models && \
     chown -R modelrunner:modelrunner /var/run/model-runner /app /models && \
     chmod -R 755 /models
 
-# Copy the built binary from builder
-COPY --from=builder /app/model-runner /app/model-runner
-
 # Copy the llama.cpp binary from the llama-server stage
 ARG LLAMA_BINARY_PATH
 COPY --from=llama-server ${LLAMA_BINARY_PATH}/ /app/.
@@ -77,3 +74,31 @@ ENV LD_LIBRARY_PATH=/app/lib
 LABEL com.docker.desktop.service="model-runner"
 
 ENTRYPOINT ["/app/model-runner"]
+
+# --- vLLM variant ---
+FROM llamacpp AS vllm
+
+ARG VLLM_VERSION
+
+USER root
+
+RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/*
+
+RUN mkdir -p /opt/vllm-env && chown -R modelrunner:modelrunner /opt/vllm-env
+
+USER modelrunner
+
+# Install uv and vLLM as modelrunner user
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
+ && ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"
+
+RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version
+
+FROM llamacpp AS final-llamacpp
+# Copy the built binary from builder
+COPY --from=builder /app/model-runner /app/model-runner
+
+FROM vllm AS final-vllm
+# Copy the built binary from builder
+COPY --from=builder /app/model-runner /app/model-runner
diff --git a/cmd/cli/commands/backend.go b/cmd/cli/commands/backend.go
@@ -13,6 +13,7 @@ import (
 var ValidBackends = map[string]bool{
 	"llama.cpp": true,
 	"openai":    true,
+	"vllm":      true,
 }
 
 // validateBackend checks if the provided backend is valid

diff --git a/cmd/cli/docs/reference/docker_model_list.yaml b/cmd/cli/docs/reference/docker_model_list.yaml
@@ -8,7 +8,7 @@ plink: docker_model.yaml
 options:
     - option: backend
       value_type: string
-      description: Specify the backend to use (llama.cpp, openai)
+      description: Specify the backend to use (llama.cpp, openai, vllm)
       deprecated: false
       hidden: true
       experimental: false

diff --git a/cmd/cli/docs/reference/docker_model_run.yaml b/cmd/cli/docs/reference/docker_model_run.yaml
@@ -12,7 +12,7 @@ plink: docker_model.yaml
 options:
     - option: backend
       value_type: string
-      description: Specify the backend to use (llama.cpp, openai)
+      description: Specify the backend to use (llama.cpp, openai, vllm)
       deprecated: false
       hidden: true
       experimental: false

diff --git a/main.go b/main.go
@@ -14,6 +14,7 @@ import (
 	"github.com/docker/model-runner/pkg/gpuinfo"
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
+	"github.com/docker/model-runner/pkg/inference/backends/vllm"
 	"github.com/docker/model-runner/pkg/inference/config"
 	"github.com/docker/model-runner/pkg/inference/memory"
 	"github.com/docker/model-runner/pkg/inference/models"
@@ -119,9 +120,19 @@ func main() {
 
 	memEstimator.SetDefaultBackend(llamaCppBackend)
 
+	vllmBackend, err := vllm.New(
+		log,
+		modelManager,
+		log.WithFields(logrus.Fields{"component": "vllm"}),
+		nil,
+	)
+	if err != nil {
+		log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
+	}
+
 	scheduler := scheduling.NewScheduler(
 		log,
-		map[string]inference.Backend{llamacpp.Name: llamaCppBackend},
+		map[string]inference.Backend{llamacpp.Name: llamaCppBackend, vllm.Name: vllmBackend},
 		llamaCppBackend,
 		modelManager,
 		http.DefaultClient,

diff --git a/pkg/distribution/distribution/client.go b/pkg/distribution/distribution/client.go
@@ -6,15 +6,17 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"slices"
 
-	"github.com/docker/model-runner/pkg/distribution/internal/utils"
+	"github.com/docker/model-runner/pkg/internal/utils"
 	"github.com/sirupsen/logrus"
 
 	"github.com/docker/model-runner/pkg/distribution/internal/progress"
 	"github.com/docker/model-runner/pkg/distribution/internal/store"
 	"github.com/docker/model-runner/pkg/distribution/registry"
 	"github.com/docker/model-runner/pkg/distribution/tarball"
 	"github.com/docker/model-runner/pkg/distribution/types"
+	"github.com/docker/model-runner/pkg/inference/platform"
 )
 
 // Client provides model distribution functionality
@@ -408,6 +410,13 @@ func (c *Client) GetBundle(ref string) (types.ModelBundle, error) {
 	return c.store.BundleForModel(ref)
 }
 
+func GetSupportedFormats() []types.Format {
+	if platform.SupportsVLLM() {
+		return []types.Format{types.FormatGGUF, types.FormatSafetensors}
+	}
+	return []types.Format{types.FormatGGUF}
+}
+
 func checkCompat(image types.ModelArtifact) error {
 	manifest, err := image.Manifest()
 	if err != nil {
@@ -423,7 +432,7 @@ func checkCompat(image types.ModelArtifact) error {
 		return fmt.Errorf("reading model config: %w", err)
 	}
 
-	if config.Format == types.FormatSafetensors {
+	if !slices.Contains(GetSupportedFormats(), config.Format) {
 		return ErrUnsupportedFormat
 	}
 

diff --git a/pkg/distribution/distribution/client_test.go b/pkg/distribution/distribution/client_test.go
@@ -26,6 +26,7 @@ import (
 	"github.com/docker/model-runner/pkg/distribution/internal/progress"
 	"github.com/docker/model-runner/pkg/distribution/internal/safetensors"
 	mdregistry "github.com/docker/model-runner/pkg/distribution/registry"
+	"github.com/docker/model-runner/pkg/inference/platform"
 )
 
 var (
@@ -418,7 +419,7 @@ func TestClientPullModel(t *testing.T) {
 		}
 	})
 
-	t.Run("pull safetensors model returns error", func(t *testing.T) {
+	t.Run("pull safetensors model returns error on unsupported platforms", func(t *testing.T) {
 		// Create temp directory for the safetensors file
 		tempDir, err := os.MkdirTemp("", "safetensors-test-*")
 		if err != nil {
@@ -461,10 +462,18 @@ func TestClientPullModel(t *testing.T) {
 			t.Fatalf("Failed to create test client: %v", err)
 		}
 
-		// Try to pull the safetensors model - should fail with ErrUnsupportedFormat
+		// Try to pull the safetensors model
 		err = testClient.PullModel(context.Background(), tag, nil)
-		if !errors.Is(err, ErrUnsupportedFormat) {
-			t.Fatalf("Expected ErrUnsupportedFormat, got: %v", err)
+		if platform.SupportsVLLM() {
+			// On Linux, safetensors should be supported
+			if err != nil {
+				t.Fatalf("Expected no error on Linux, got: %v", err)
+			}
+		} else {
+			// On non-Linux, should fail with ErrUnsupportedFormat
+			if !errors.Is(err, ErrUnsupportedFormat) {
+				t.Fatalf("Expected ErrUnsupportedFormat on non-Linux platforms, got: %v", err)
+			}
 		}
 	})