diff --git a/commands/package.go b/commands/package.go index d59bf0de..02c6edd7 100644 --- a/commands/package.go +++ b/commands/package.go @@ -25,7 +25,9 @@ func newPackagedCmd() *cobra.Command { c := &cobra.Command{ Use: "package --gguf [--license ...] [--context-size ] [--push] MODEL", - Short: "Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified", + Short: "Package a GGUF file into a Docker model OCI artifact, with optional licenses.", + Long: "Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified.\n" + + "When packaging a sharded model --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).", Args: func(cmd *cobra.Command, args []string) error { if len(args) != 1 { return fmt.Errorf( diff --git a/docs/reference/docker_model_package.yaml b/docs/reference/docker_model_package.yaml index e00078e1..999d54d7 100644 --- a/docs/reference/docker_model_package.yaml +++ b/docs/reference/docker_model_package.yaml @@ -1,8 +1,9 @@ command: docker model package short: | - Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified -long: | - Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified + Package a GGUF file into a Docker model OCI artifact, with optional licenses. +long: |- + Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified. + When packaging a sharded model --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf). usage: docker model package --gguf [--license ...] [--context-size ] [--push] MODEL pname: docker model plink: docker_model.yaml diff --git a/docs/reference/model.md b/docs/reference/model.md index 144cf7a0..375cae66 100644 --- a/docs/reference/model.md +++ b/docs/reference/model.md @@ -5,24 +5,24 @@ Docker Model Runner (EXPERIMENTAL) ### Subcommands -| Name | Description | -|:------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------| -| [`df`](model_df.md) | Show Docker Model Runner disk usage | -| [`inspect`](model_inspect.md) | Display detailed information on one model | -| [`install-runner`](model_install-runner.md) | Install Docker Model Runner (Docker Engine only) | -| [`list`](model_list.md) | List the models pulled to your local environment | -| [`logs`](model_logs.md) | Fetch the Docker Model Runner logs | -| [`package`](model_package.md) | Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified | -| [`ps`](model_ps.md) | List running models | -| [`pull`](model_pull.md) | Pull a model from Docker Hub or HuggingFace to your local environment | -| [`push`](model_push.md) | Push a model to Docker Hub | -| [`rm`](model_rm.md) | Remove local models downloaded from Docker Hub | -| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode | -| [`status`](model_status.md) | Check if the Docker Model Runner is running | -| [`tag`](model_tag.md) | Tag a model | -| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner | -| [`unload`](model_unload.md) | Unload running models | -| [`version`](model_version.md) | Show the Docker Model Runner version | +| Name | Description | +|:------------------------------------------------|:------------------------------------------------------------------------------| +| [`df`](model_df.md) | Show Docker Model Runner disk usage | +| [`inspect`](model_inspect.md) | Display detailed information on one model | +| [`install-runner`](model_install-runner.md) | Install Docker Model Runner (Docker Engine only) | +| [`list`](model_list.md) | List the models pulled to your local environment | +| [`logs`](model_logs.md) | Fetch the Docker Model Runner logs | +| [`package`](model_package.md) | Package a GGUF file into a Docker model OCI artifact, with optional licenses. | +| [`ps`](model_ps.md) | List running models | +| [`pull`](model_pull.md) | Pull a model from Docker Hub or HuggingFace to your local environment | +| [`push`](model_push.md) | Push a model to Docker Hub | +| [`rm`](model_rm.md) | Remove local models downloaded from Docker Hub | +| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode | +| [`status`](model_status.md) | Check if the Docker Model Runner is running | +| [`tag`](model_tag.md) | Tag a model | +| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner | +| [`unload`](model_unload.md) | Unload running models | +| [`version`](model_version.md) | Show the Docker Model Runner version | diff --git a/docs/reference/model_package.md b/docs/reference/model_package.md index e08c7762..8c68272c 100644 --- a/docs/reference/model_package.md +++ b/docs/reference/model_package.md @@ -1,7 +1,8 @@ # docker model package -Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified +Package a GGUF file into a Docker model OCI artifact, with optional licenses. The package is sent to the model-runner, unless --push is specified. +When packaging a sharded model --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf). ### Options diff --git a/go.mod b/go.mod index 279de962..5259d6bf 100644 --- a/go.mod +++ b/go.mod @@ -11,8 +11,8 @@ require ( github.com/docker/docker v28.2.2+incompatible github.com/docker/go-connections v0.5.0 github.com/docker/go-units v0.5.0 - github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8 - github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584 + github.com/docker/model-distribution v0.0.0-20250822172258-8fe9daa4a4da + github.com/docker/model-runner v0.0.0-20250822173738-5341c9fc2974 github.com/fatih/color v1.15.0 github.com/google/go-containerregistry v0.20.6 github.com/mattn/go-isatty v0.0.20 diff --git a/go.sum b/go.sum index 5a76301c..997998e1 100644 --- a/go.sum +++ b/go.sum @@ -78,10 +78,10 @@ github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHz github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= -github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8 h1:agH5zeO6tf8lHgMcBZxqCFKPuXHM/cA53gdsn895eMI= -github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= -github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584 h1:8YAzh9lihwcFGyHTK9pTFqdM7IwYwb0R/YkrNxmQ2do= -github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584/go.mod h1:0IAh5ekLg8ipcPAF+Rdav1wbt9xF+zQPoRC1bblk/ik= +github.com/docker/model-distribution v0.0.0-20250822172258-8fe9daa4a4da h1:ml99WBfcLnsy1frXQR4X+5WAC0DoGtwZyGoU/xBsDQM= +github.com/docker/model-distribution v0.0.0-20250822172258-8fe9daa4a4da/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= +github.com/docker/model-runner v0.0.0-20250822173738-5341c9fc2974 h1:/uF17tBEtsE6T2Xgg4cgrrqNcQ02gY5Lp98je+2K0nQ= +github.com/docker/model-runner v0.0.0-20250822173738-5341c9fc2974/go.mod h1:1Q2QRB5vob542x6P5pQXlGTYs5bYPxNG6ePcjTndA0A= github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae/go.mod h1:7BvyPhdbLxMXIYTFPLsyJRFMsKmOZnQmzh6Gb+uquuM= github.com/elastic/go-sysinfo v1.15.3 h1:W+RnmhKFkqPTCRoFq2VCTmsT4p/fwpo+3gKNQsn1XU0= github.com/elastic/go-sysinfo v1.15.3/go.mod h1:K/cNrqYTDrSoMh2oDkYEMS2+a72GRxMvNP+GC+vRIlo= diff --git a/vendor/github.com/docker/model-distribution/distribution/client.go b/vendor/github.com/docker/model-distribution/distribution/client.go index 5c008c41..ca1e0229 100644 --- a/vendor/github.com/docker/model-distribution/distribution/client.go +++ b/vendor/github.com/docker/model-distribution/distribution/client.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "net/http" - "os" "github.com/sirupsen/logrus" @@ -159,20 +158,12 @@ func (c *Client) PullModel(ctx context.Context, reference string, progressWriter localModel, err := c.store.Read(remoteDigest.String()) if err == nil { c.log.Infoln("Model found in local store:", reference) - ggufPath, err := localModel.GGUFPath() + cfg, err := localModel.Config() if err != nil { - return fmt.Errorf("getting gguf path: %w", err) + return fmt.Errorf("getting cached model config: %w", err) } - // Get file size for progress reporting - fileInfo, err := os.Stat(ggufPath) - if err != nil { - return fmt.Errorf("getting file info: %w", err) - } - - // Report progress for local model - size := fileInfo.Size() - err = progress.WriteSuccess(progressWriter, fmt.Sprintf("Using cached model: %.2f MB", float64(size)/1024/1024)) + err = progress.WriteSuccess(progressWriter, fmt.Sprintf("Using cached model: %s", cfg.Size)) if err != nil { c.log.Warnf("Writing progress: %v", err) // If we fail to write progress, don't try again @@ -403,6 +394,11 @@ func (c *Client) ResetStore() error { return nil } +// GetBundle returns a types.Bundle containing the model, creating one as necessary +func (c *Client) GetBundle(ref string) (types.ModelBundle, error) { + return c.store.BundleForModel(ref) +} + func checkCompat(image types.ModelArtifact) error { manifest, err := image.Manifest() if err != nil { diff --git a/vendor/github.com/docker/model-distribution/internal/bundle/bundle.go b/vendor/github.com/docker/model-distribution/internal/bundle/bundle.go new file mode 100644 index 00000000..a32b8031 --- /dev/null +++ b/vendor/github.com/docker/model-distribution/internal/bundle/bundle.go @@ -0,0 +1,42 @@ +package bundle + +import ( + "path/filepath" + + "github.com/docker/model-distribution/types" +) + +// Bundle represents a runtime bundle containing a model and runtime config +type Bundle struct { + dir string + mmprojPath string + ggufFile string // path to GGUF file (first shard when model is split among files) + runtimeConfig types.Config +} + +// RootDir return the path to the bundle root directory +func (b *Bundle) RootDir() string { + return b.dir +} + +// GGUFPath return the path to model GGUF file. If the model is sharded this will be the path to the first shard, +// containing metadata headers. +func (b *Bundle) GGUFPath() string { + if b.ggufFile == "" { + return "" + } + return filepath.Join(b.dir, b.ggufFile) +} + +// MMPROJPath returns the path to a multi-modal projector file or "" if none is present. +func (b *Bundle) MMPROJPath() string { + if b.mmprojPath == "" { + return "" + } + return filepath.Join(b.dir, b.mmprojPath) +} + +// RuntimeConfig returns config that should be respected by the backend at runtime. +func (b *Bundle) RuntimeConfig() types.Config { + return b.runtimeConfig +} diff --git a/vendor/github.com/docker/model-distribution/internal/bundle/parse.go b/vendor/github.com/docker/model-distribution/internal/bundle/parse.go new file mode 100644 index 00000000..016254cb --- /dev/null +++ b/vendor/github.com/docker/model-distribution/internal/bundle/parse.go @@ -0,0 +1,73 @@ +package bundle + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/docker/model-distribution/types" +) + +// Parse returns the Bundle at the given rootDir +func Parse(rootDir string) (*Bundle, error) { + if fi, err := os.Stat(rootDir); err != nil || !fi.IsDir() { + return nil, fmt.Errorf("inspect bundle root dir: %w", err) + } + ggufPath, err := findGGUFFile(rootDir) + if err != nil { + return nil, err + } + mmprojPath, err := findMultiModalProjectorFile(rootDir) + if err != nil { + return nil, err + } + cfg, err := parseRuntimeConfig(rootDir) + if err != nil { + return nil, err + } + return &Bundle{ + dir: rootDir, + mmprojPath: mmprojPath, + ggufFile: ggufPath, + runtimeConfig: cfg, + }, nil +} + +func parseRuntimeConfig(rootDir string) (types.Config, error) { + f, err := os.Open(filepath.Join(rootDir, "config.json")) + if err != nil { + return types.Config{}, fmt.Errorf("open runtime config: %w", err) + } + defer f.Close() + var cfg types.Config + if err := json.NewDecoder(f).Decode(&cfg); err != nil { + return types.Config{}, fmt.Errorf("decode runtime config: %w", err) + } + return cfg, nil +} + +func findGGUFFile(rootDir string) (string, error) { + ggufs, err := filepath.Glob(filepath.Join(rootDir, "[^.]*.gguf")) + if err != nil { + return "", fmt.Errorf("find gguf files: %w", err) + } + if len(ggufs) == 0 { + return "", fmt.Errorf("no GGUF files found in bundle directory") + } + return filepath.Base(ggufs[0]), nil +} + +func findMultiModalProjectorFile(rootDir string) (string, error) { + mmprojPaths, err := filepath.Glob(filepath.Join(rootDir, "[^.]*.mmproj")) + if err != nil { + return "", err + } + if len(mmprojPaths) == 0 { + return "", nil + } + if len(mmprojPaths) > 1 { + return "", fmt.Errorf("found multiple .mmproj files, but only 1 is supported") + } + return filepath.Base(mmprojPaths[0]), nil +} diff --git a/vendor/github.com/docker/model-distribution/internal/bundle/unpack.go b/vendor/github.com/docker/model-distribution/internal/bundle/unpack.go new file mode 100644 index 00000000..5fe6a23e --- /dev/null +++ b/vendor/github.com/docker/model-distribution/internal/bundle/unpack.go @@ -0,0 +1,85 @@ +package bundle + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/docker/model-distribution/types" +) + +// Unpack creates and return a Bundle by unpacking files and config from model into dir. +func Unpack(dir string, model types.Model) (*Bundle, error) { + bundle := &Bundle{ + dir: dir, + } + if err := unpackGGUFs(bundle, model); err != nil { + return nil, fmt.Errorf("add GGUF file(s) to runtime bundle: %w", err) + } + if err := unpackMultiModalProjector(bundle, model); err != nil { + return nil, fmt.Errorf("add multi-model projector file to runtime bundle: %w", err) + } + if err := unpackRuntimeConfig(bundle, model); err != nil { + return nil, fmt.Errorf("add config.json to runtime bundle: %w", err) + } + return bundle, nil +} + +func unpackRuntimeConfig(bundle *Bundle, mdl types.Model) error { + cfg, err := mdl.Config() + if err != nil { + return err + } + f, err := os.Create(filepath.Join(bundle.dir, "config.json")) + if err != nil { + return fmt.Errorf("create runtime config file: %w", err) + } + defer f.Close() + if err := json.NewEncoder(f).Encode(cfg); err != nil { + return fmt.Errorf("encode runtime config: %w", err) + } + bundle.runtimeConfig = cfg + return nil +} + +func unpackGGUFs(bundle *Bundle, mdl types.Model) error { + ggufPaths, err := mdl.GGUFPaths() + if err != nil { + return fmt.Errorf("get GGUF files for model: %w", err) + } + + if len(ggufPaths) == 1 { + if err := unpackFile(filepath.Join(bundle.dir, "model.gguf"), ggufPaths[0]); err != nil { + return err + } + bundle.ggufFile = "model.gguf" + return err + } + + for i := range ggufPaths { + name := fmt.Sprintf("model-%05d-of-%05d.gguf", i+1, len(ggufPaths)) + if err := unpackFile(filepath.Join(bundle.dir, name), ggufPaths[i]); err != nil { + return err + } + bundle.ggufFile = name + } + + return nil +} + +func unpackMultiModalProjector(bundle *Bundle, mdl types.Model) error { + path, err := mdl.MMPROJPath() + if err != nil { + return nil // no such file + } + if err = unpackFile(filepath.Join(bundle.dir, "model.mmproj"), path); err != nil { + return err + } + bundle.mmprojPath = "model.mmproj" + return nil +} + +func unpackFile(bundlePath string, srcPath string) error { + return os.Link(srcPath, bundlePath) +} diff --git a/vendor/github.com/docker/model-distribution/internal/gguf/create.go b/vendor/github.com/docker/model-distribution/internal/gguf/create.go index c83ed923..a35324f8 100644 --- a/vendor/github.com/docker/model-distribution/internal/gguf/create.go +++ b/vendor/github.com/docker/model-distribution/internal/gguf/create.go @@ -13,13 +13,23 @@ import ( ) func NewModel(path string) (*Model, error) { - layer, err := partial.NewLayer(path, types.MediaTypeGGUF) - if err != nil { - return nil, fmt.Errorf("create gguf layer: %w", err) + shards := parser.CompleteShardGGUFFilename(path) + if len(shards) == 0 { + shards = []string{path} // single file } - diffID, err := layer.DiffID() - if err != nil { - return nil, fmt.Errorf("get gguf layer diffID: %w", err) + layers := make([]v1.Layer, len(shards)) + diffIDs := make([]v1.Hash, len(shards)) + for i, shard := range shards { + layer, err := partial.NewLayer(shard, types.MediaTypeGGUF) + if err != nil { + return nil, fmt.Errorf("create gguf layer: %w", err) + } + diffID, err := layer.DiffID() + if err != nil { + return nil, fmt.Errorf("get gguf layer diffID: %w", err) + } + layers[i] = layer + diffIDs[i] = diffID } created := time.Now() @@ -30,13 +40,11 @@ func NewModel(path string) (*Model, error) { Created: &created, }, RootFS: v1.RootFS{ - Type: "rootfs", - DiffIDs: []v1.Hash{ - diffID, - }, + Type: "rootfs", + DiffIDs: diffIDs, }, }, - layers: []v1.Layer{layer}, + layers: layers, }, nil } diff --git a/vendor/github.com/docker/model-distribution/internal/partial/partial.go b/vendor/github.com/docker/model-distribution/internal/partial/partial.go index 3def14ae..7367556c 100644 --- a/vendor/github.com/docker/model-distribution/internal/partial/partial.go +++ b/vendor/github.com/docker/model-distribution/internal/partial/partial.go @@ -65,20 +65,32 @@ type WithLayers interface { Layers() ([]v1.Layer, error) } -func GGUFPath(i WithLayers) (string, error) { - return layerPathByMediaType(i, types.MediaTypeGGUF) +func GGUFPaths(i WithLayers) ([]string, error) { + return layerPathsByMediaType(i, types.MediaTypeGGUF) } func MMPROJPath(i WithLayers) (string, error) { - return layerPathByMediaType(i, types.MediaTypeMultimodalProjector) + paths, err := layerPathsByMediaType(i, types.MediaTypeMultimodalProjector) + if err != nil { + return "", fmt.Errorf("get mmproj layer paths: %w", err) + } + if len(paths) == 0 { + return "", fmt.Errorf("model does not contain any layer of type %q", types.MediaTypeMultimodalProjector) + } + if len(paths) > 1 { + return "", fmt.Errorf("found %d files of type %q, expected exactly 1", + len(paths), types.MediaTypeMultimodalProjector) + } + return paths[0], err } -// layerPathByMediaType is a generic helper function that finds a layer by media type and returns its path -func layerPathByMediaType(i WithLayers, mediaType ggcr.MediaType) (string, error) { +// layerPathsByMediaType is a generic helper function that finds a layer by media type and returns its path +func layerPathsByMediaType(i WithLayers, mediaType ggcr.MediaType) ([]string, error) { layers, err := i.Layers() if err != nil { - return "", fmt.Errorf("get layers: %w", err) + return nil, fmt.Errorf("get layers: %w", err) } + var paths []string for _, l := range layers { mt, err := l.MediaType() if err != nil || mt != mediaType { @@ -86,11 +98,11 @@ func layerPathByMediaType(i WithLayers, mediaType ggcr.MediaType) (string, error } layer, ok := l.(*Layer) if !ok { - return "", fmt.Errorf("%s Layer is not available locally", mediaType) + return nil, fmt.Errorf("%s Layer is not available locally", mediaType) } - return layer.Path, nil + paths = append(paths, layer.Path) } - return "", fmt.Errorf("model does not contain a %s layer", mediaType) + return paths, nil } func ManifestForLayers(i WithLayers) (*v1.Manifest, error) { diff --git a/vendor/github.com/docker/model-distribution/internal/store/bundles.go b/vendor/github.com/docker/model-distribution/internal/store/bundles.go new file mode 100644 index 00000000..61032dd7 --- /dev/null +++ b/vendor/github.com/docker/model-distribution/internal/store/bundles.go @@ -0,0 +1,59 @@ +package store + +import ( + "fmt" + "os" + "path/filepath" + + v1 "github.com/google/go-containerregistry/pkg/v1" + + "github.com/docker/model-distribution/internal/bundle" + "github.com/docker/model-distribution/types" +) + +const ( + bundlesDir = "bundles" +) + +// manifestPath returns the path to the manifest file for the given hash. +func (s *LocalStore) bundlePath(hash v1.Hash) string { + return filepath.Join(s.rootPath, bundlesDir, hash.Algorithm, hash.Hex) +} + +// BundleForModel returns a runtime bundle for the given model +func (s *LocalStore) BundleForModel(ref string) (types.ModelBundle, error) { + mdl, err := s.Read(ref) + if err != nil { + return nil, fmt.Errorf("find model content: %w", err) + } + dgst, err := mdl.Digest() + if err != nil { + return nil, fmt.Errorf("get model ID: %w", err) + } + path := s.bundlePath(dgst) + if bdl, err := bundle.Parse(path); err != nil { + // create for first time or replace bad/corrupted bundle + return s.createBundle(path, mdl) + } else { + return bdl, nil + } +} + +// createBundle unpacks the bundle to path, replacing existing bundle if one is found +func (s *LocalStore) createBundle(path string, mdl *Model) (types.ModelBundle, error) { + if err := os.RemoveAll(path); err != nil { + return nil, fmt.Errorf("remove %s: %w", path, err) + } + if err := os.MkdirAll(path, 0755); err != nil { + return nil, fmt.Errorf("create bundle directory: %w", err) + } + bdl, err := bundle.Unpack(path, mdl) + if err != nil { + return nil, fmt.Errorf("unpack bundle: %w", err) + } + return bdl, nil +} + +func (s *LocalStore) removeBundle(hash v1.Hash) error { + return os.RemoveAll(s.bundlePath(hash)) +} diff --git a/vendor/github.com/docker/model-distribution/internal/store/model.go b/vendor/github.com/docker/model-distribution/internal/store/model.go index e48a386a..b35539a6 100644 --- a/vendor/github.com/docker/model-distribution/internal/store/model.go +++ b/vendor/github.com/docker/model-distribution/internal/store/model.go @@ -110,8 +110,8 @@ func (m *Model) LayerByDiffID(hash v1.Hash) (v1.Layer, error) { return m.LayerByDigest(hash) } -func (m *Model) GGUFPath() (string, error) { - return mdpartial.GGUFPath(m) +func (m *Model) GGUFPaths() ([]string, error) { + return mdpartial.GGUFPaths(m) } func (m *Model) MMPROJPath() (string, error) { diff --git a/vendor/github.com/docker/model-distribution/internal/store/store.go b/vendor/github.com/docker/model-distribution/internal/store/store.go index ac7bae64..1261730a 100644 --- a/vendor/github.com/docker/model-distribution/internal/store/store.go +++ b/vendor/github.com/docker/model-distribution/internal/store/store.go @@ -103,14 +103,21 @@ func (s *LocalStore) Delete(ref string) (string, []string, error) { return "", nil, ErrModelNotFound } + digest, err := v1.NewHash(model.ID) + if err != nil { + return "", nil, fmt.Errorf("parse manifest digest %q: %w", model.ID, err) + } + // Remove manifest file - if digest, err := v1.NewHash(model.ID); err != nil { - fmt.Printf("Warning: failed to parse manifest digest %s: %v\n", digest, err) - } else if err := s.removeManifest(digest); err != nil { - fmt.Printf("Warning: failed to remove manifest %q: %v\n", - digest, err, - ) + if err := s.removeManifest(digest); err != nil { + fmt.Printf("Warning: failed to remove manifest %q: %v\n", digest, err) } + + // Remove bundle if one exists + if err := s.removeBundle(digest); err != nil { + fmt.Printf("Warning: failed to remove bundle %q: %v\n", digest, err) + } + // Before deleting blobs, check if they are referenced by other models blobRefs := make(map[string]int) for _, m := range idx.Models { diff --git a/vendor/github.com/docker/model-distribution/registry/client.go b/vendor/github.com/docker/model-distribution/registry/client.go index 49a063cb..44a58a16 100644 --- a/vendor/github.com/docker/model-distribution/registry/client.go +++ b/vendor/github.com/docker/model-distribution/registry/client.go @@ -7,13 +7,14 @@ import ( "net/http" "strings" - "github.com/docker/model-distribution/internal/progress" - "github.com/docker/model-distribution/types" "github.com/google/go-containerregistry/pkg/authn" "github.com/google/go-containerregistry/pkg/name" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/v1/remote/transport" + + "github.com/docker/model-distribution/internal/progress" + "github.com/docker/model-distribution/types" ) const ( @@ -122,7 +123,7 @@ func (c *Client) BlobURL(reference string, digest v1.Hash) (string, error) { return fmt.Sprintf("%s://%s/v2/%s/blobs/%s", ref.Context().Registry.Scheme(), ref.Context().Registry.RegistryStr(), - ref.String(), + ref.Context().RepositoryStr(), digest.String()), nil } diff --git a/vendor/github.com/docker/model-distribution/types/model.go b/vendor/github.com/docker/model-distribution/types/model.go index 7a592ba4..62374c02 100644 --- a/vendor/github.com/docker/model-distribution/types/model.go +++ b/vendor/github.com/docker/model-distribution/types/model.go @@ -6,7 +6,7 @@ import ( type Model interface { ID() (string, error) - GGUFPath() (string, error) + GGUFPaths() ([]string, error) MMPROJPath() (string, error) Config() (Config, error) Tags() []string @@ -19,3 +19,10 @@ type ModelArtifact interface { Descriptor() (Descriptor, error) v1.Image } + +type ModelBundle interface { + RootDir() string + GGUFPath() string + MMPROJPath() string + RuntimeConfig() Config +} diff --git a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go index a5ae2a73..a4913865 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go @@ -15,10 +15,10 @@ import ( "runtime" "strings" + "github.com/docker/model-distribution/types" v1 "github.com/google/go-containerregistry/pkg/v1" parser "github.com/gpustack/gguf-parser-go" - "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/diskusage" "github.com/docker/model-runner/pkg/inference" "github.com/docker/model-runner/pkg/inference/config" @@ -132,7 +132,7 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error { // Run implements inference.Backend.Run. func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference.BackendMode, config *inference.BackendConfiguration) error { - mdl, err := l.modelManager.GetModel(model) + bundle, err := l.modelManager.GetBundle(model) if err != nil { return fmt.Errorf("failed to get model: %w", err) } @@ -147,7 +147,7 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference binPath = l.updatedServerStoragePath } - args, err := l.config.GetArgs(mdl, socket, mode, config) + args, err := l.config.GetArgs(bundle, socket, mode, config) if err != nil { return fmt.Errorf("failed to get args for llama.cpp: %w", err) } @@ -244,7 +244,7 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string, } } - contextSize := GetContextSize(&mdlConfig, config) + contextSize := GetContextSize(mdlConfig, config) ngl := uint64(0) if l.gpuSupported { @@ -280,23 +280,15 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string, } func (l *llamaCpp) parseLocalModel(model string) (*parser.GGUFFile, types.Config, error) { - mdl, err := l.modelManager.GetModel(model) + bundle, err := l.modelManager.GetBundle(model) if err != nil { return nil, types.Config{}, fmt.Errorf("getting model(%s): %w", model, err) } - mdlPath, err := mdl.GGUFPath() - if err != nil { - return nil, types.Config{}, fmt.Errorf("getting gguf path for model(%s): %w", model, err) - } - mdlGguf, err := parser.ParseGGUFFile(mdlPath) - if err != nil { - return nil, types.Config{}, fmt.Errorf("parsing gguf(%s): %w", mdlPath, err) - } - mdlConfig, err := mdl.Config() + modelGGUF, err := parser.ParseGGUFFile(bundle.GGUFPath()) if err != nil { - return nil, types.Config{}, fmt.Errorf("accessing model(%s) config: %w", model, err) + return nil, types.Config{}, fmt.Errorf("parsing gguf(%s): %w", bundle.GGUFPath(), err) } - return mdlGguf, mdlConfig, nil + return modelGGUF, bundle.RuntimeConfig(), nil } func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser.GGUFFile, types.Config, error) { @@ -308,19 +300,15 @@ func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser. if err != nil { return nil, types.Config{}, fmt.Errorf("getting layers of model(%s): %w", model, err) } - var ggufDigest v1.Hash - for _, layer := range layers { - mt, err := layer.MediaType() - if err != nil { - return nil, types.Config{}, fmt.Errorf("getting media type of model(%s) layer: %w", model, err) - } - if mt == types.MediaTypeGGUF { - ggufDigest, err = layer.Digest() - if err != nil { - return nil, types.Config{}, fmt.Errorf("getting digest of GGUF layer for model(%s): %w", model, err) - } - break - } + ggufLayers := getGGUFLayers(layers) + if len(ggufLayers) != 1 { + return nil, types.Config{}, fmt.Errorf( + "remote memory estimation only supported for models with single GGUF layer, found %d layers", len(ggufLayers), + ) + } + ggufDigest, err := ggufLayers[0].Digest() + if err != nil { + return nil, types.Config{}, fmt.Errorf("getting digest of GGUF layer for model(%s): %w", model, err) } if ggufDigest.String() == "" { return nil, types.Config{}, fmt.Errorf("model(%s) has no GGUF layer", model) @@ -344,6 +332,20 @@ func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser. return mdlGguf, config, nil } +func getGGUFLayers(layers []v1.Layer) []v1.Layer { + var filtered []v1.Layer + for _, layer := range layers { + mt, err := layer.MediaType() + if err != nil { + continue + } + if mt == types.MediaTypeGGUF { + filtered = append(filtered, layer) + } + } + return filtered +} + func (l *llamaCpp) checkGPUSupport(ctx context.Context) bool { binPath := l.vendoredServerStoragePath if l.updatedLlamaCpp { diff --git a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go index becc3a1b..f8b1fe5c 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -6,6 +6,7 @@ import ( "strconv" "github.com/docker/model-distribution/types" + "github.com/docker/model-runner/pkg/inference" ) @@ -35,18 +36,13 @@ func NewDefaultLlamaCppConfig() *Config { } // GetArgs implements BackendConfig.GetArgs. -func (c *Config) GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) { +func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) { // Start with the arguments from LlamaCppConfig args := append([]string{}, c.Args...) - modelPath, err := model.GGUFPath() - if err != nil { - return nil, fmt.Errorf("get gguf path: %w", err) - } - - modelCfg, err := model.Config() - if err != nil { - return nil, fmt.Errorf("get model config: %w", err) + modelPath := bundle.GGUFPath() + if modelPath == "" { + return nil, fmt.Errorf("GGUF file required by llama.cpp backend") } // Add model and socket arguments @@ -57,7 +53,8 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen args = append(args, "--embeddings") } - args = append(args, "--ctx-size", strconv.FormatUint(GetContextSize(&modelCfg, config), 10)) + // Add context size from model config or backend config + args = append(args, "--ctx-size", strconv.FormatUint(GetContextSize(bundle.RuntimeConfig(), config), 10)) // Add arguments from backend config if config != nil { @@ -65,17 +62,16 @@ func (c *Config) GetArgs(model types.Model, socket string, mode inference.Backen } // Add arguments for Multimodal projector - path, err := model.MMPROJPath() - if path != "" && err == nil { + if path := bundle.MMPROJPath(); path != "" { args = append(args, "--mmproj", path) } return args, nil } -func GetContextSize(modelCfg *types.Config, backendCfg *inference.BackendConfiguration) uint64 { +func GetContextSize(modelCfg types.Config, backendCfg *inference.BackendConfiguration) uint64 { // Model config takes precedence - if modelCfg != nil && modelCfg.ContextSize != nil { + if modelCfg.ContextSize != nil { return *modelCfg.ContextSize } // else use backend config diff --git a/vendor/github.com/docker/model-runner/pkg/inference/config/config.go b/vendor/github.com/docker/model-runner/pkg/inference/config/config.go index 8163759d..72a22bea 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/config/config.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/config/config.go @@ -2,6 +2,7 @@ package config import ( "github.com/docker/model-distribution/types" + "github.com/docker/model-runner/pkg/inference" ) @@ -12,5 +13,5 @@ type BackendConfig interface { // GetArgs returns the command-line arguments for the backend. // It takes the model path, socket, and mode as input and returns // the appropriate arguments for the backend. - GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) + GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) } diff --git a/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go b/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go index 4f79dcf1..7f84c34e 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go @@ -622,17 +622,13 @@ func (m *Manager) BearerTokenForModel(ctx context.Context, ref string) (string, return tok, nil } -// GetModelPath returns the path to a model's files. -func (m *Manager) GetModelPath(ref string) (string, error) { - model, err := m.GetModel(ref) +// GetBundle returns model bundle. +func (m *Manager) GetBundle(ref string) (types.ModelBundle, error) { + bundle, err := m.distributionClient.GetBundle(ref) if err != nil { - return "", err + return nil, fmt.Errorf("error while getting model bundle: %w", err) } - path, err := model.GGUFPath() - if err != nil { - return "", fmt.Errorf("error while getting model path: %w", err) - } - return path, nil + return bundle, err } // PullModel pulls a model to local storage. Any error it returns is suitable diff --git a/vendor/modules.txt b/vendor/modules.txt index e92f13c6..ae89577c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -144,10 +144,11 @@ github.com/docker/go-connections/tlsconfig # github.com/docker/go-units v0.5.0 ## explicit github.com/docker/go-units -# github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8 +# github.com/docker/model-distribution v0.0.0-20250822172258-8fe9daa4a4da ## explicit; go 1.23.0 github.com/docker/model-distribution/builder github.com/docker/model-distribution/distribution +github.com/docker/model-distribution/internal/bundle github.com/docker/model-distribution/internal/gguf github.com/docker/model-distribution/internal/mutate github.com/docker/model-distribution/internal/partial @@ -156,7 +157,7 @@ github.com/docker/model-distribution/internal/store github.com/docker/model-distribution/registry github.com/docker/model-distribution/tarball github.com/docker/model-distribution/types -# github.com/docker/model-runner v0.0.0-20250822151118-d8ed37445584 +# github.com/docker/model-runner v0.0.0-20250822173738-5341c9fc2974 ## explicit; go 1.23.7 github.com/docker/model-runner/pkg/diskusage github.com/docker/model-runner/pkg/environment