From fb280e0f03b16e84ac55ddb874e6459c5533bc24 Mon Sep 17 00:00:00 2001
From: Eric Curtin <eric.curtin@docker.com>
Date: Sat, 10 Jan 2026 13:31:13 +0000
Subject: [PATCH] Add search command for Docker Hub and HuggingFace

Adds a new `docker model search` command that allows users to search for
AI models from both Docker Hub and HuggingFace. The command supports
filtering by search terms, limiting results, selecting specific sources
(Docker Hub, HuggingFace, or both), and outputting results in either
table or JSON format. Implements concurrent search across multiple
sources with proper error handling and rate limiting support.

Signed-off-by: Eric Curtin <eric.curtin@docker.com>
---
 cmd/cli/commands/root.go                      |   1 +
 cmd/cli/commands/search.go                    | 127 +++++++++++
 cmd/cli/docs/reference/docker_model.yaml      |   2 +
 .../docs/reference/docker_model_search.yaml   |  57 +++++
 cmd/cli/docs/reference/model.md               |   1 +
 cmd/cli/docs/reference/model_search.md        |  27 +++
 cmd/cli/search/client.go                      | 135 ++++++++++++
 cmd/cli/search/dockerhub.go                   | 203 +++++++++++++++++
 cmd/cli/search/http_client.go                 |  13 ++
 cmd/cli/search/huggingface.go                 | 206 ++++++++++++++++++
 cmd/cli/search/types.go                       |  33 +++
 vllm_backend_stub.go                          |   2 +-
 12 files changed, 806 insertions(+), 1 deletion(-)
 create mode 100644 cmd/cli/commands/search.go
 create mode 100644 cmd/cli/docs/reference/docker_model_search.yaml
 create mode 100644 cmd/cli/docs/reference/model_search.md
 create mode 100644 cmd/cli/search/client.go
 create mode 100644 cmd/cli/search/dockerhub.go
 create mode 100644 cmd/cli/search/http_client.go
 create mode 100644 cmd/cli/search/huggingface.go
 create mode 100644 cmd/cli/search/types.go

diff --git a/cmd/cli/commands/root.go b/cmd/cli/commands/root.go
index 72c2e7d6..11896491 100644
--- a/cmd/cli/commands/root.go
+++ b/cmd/cli/commands/root.go
@@ -93,6 +93,7 @@ func NewRootCmd(cli *command.DockerCli) *cobra.Command {
 		newStopRunner(),
 		newRestartRunner(),
 		newReinstallRunner(),
+		newSearchCmd(),
 	)
 
 	// Commands that require a running model runner. These are wrapped to ensure the standalone runner is available.
diff --git a/cmd/cli/commands/search.go b/cmd/cli/commands/search.go
new file mode 100644
index 00000000..6c8962f7
--- /dev/null
+++ b/cmd/cli/commands/search.go
@@ -0,0 +1,127 @@
+package commands
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/docker/model-runner/cmd/cli/commands/formatter"
+	"github.com/docker/model-runner/cmd/cli/search"
+	"github.com/spf13/cobra"
+)
+
+func newSearchCmd() *cobra.Command {
+	var (
+		limit      int
+		source     string
+		jsonFormat bool
+	)
+
+	c := &cobra.Command{
+		Use:   "search [OPTIONS] [TERM]",
+		Short: "Search for models on Docker Hub and HuggingFace",
+		Long: `Search for models from Docker Hub (ai/ namespace) and HuggingFace.
+
+When no search term is provided, lists all available models.
+When a search term is provided, filters models by name/description.
+
+Examples:
+  docker model search                       # List available models from Docker Hub
+  docker model search llama                 # Search for models containing "llama"
+  docker model search --source=all          # Search both Docker Hub and HuggingFace
+  docker model search --source=huggingface  # Only search HuggingFace
+  docker model search --limit=50 phi        # Search with custom limit
+  docker model search --json llama          # Output as JSON`,
+		Args: cobra.MaximumNArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			// Parse the source
+			sourceType, err := search.ParseSource(source)
+			if err != nil {
+				return err
+			}
+
+			// Get the search query
+			var query string
+			if len(args) > 0 {
+				query = args[0]
+			}
+
+			// Create the search client
+			client := search.NewAggregatedClient(sourceType, cmd.ErrOrStderr())
+
+			// Perform the search
+			opts := search.SearchOptions{
+				Query: query,
+				Limit: limit,
+			}
+
+			results, err := client.Search(cmd.Context(), opts)
+			if err != nil {
+				return fmt.Errorf("search failed: %w", err)
+			}
+
+			if len(results) == 0 {
+				if query != "" {
+					fmt.Fprintf(cmd.OutOrStdout(), "No models found matching %q\n", query)
+				} else {
+					fmt.Fprintln(cmd.OutOrStdout(), "No models found")
+				}
+				return nil
+			}
+
+			// Output results
+			if jsonFormat {
+				output, err := formatter.ToStandardJSON(results)
+				if err != nil {
+					return err
+				}
+				fmt.Fprint(cmd.OutOrStdout(), output)
+				return nil
+			}
+
+			fmt.Fprint(cmd.OutOrStdout(), prettyPrintSearchResults(results))
+			return nil
+		},
+	}
+
+	c.Flags().IntVarP(&limit, "limit", "n", 32, "Maximum number of results to show")
+	c.Flags().StringVar(&source, "source", "all", "Source to search: all, dockerhub, huggingface")
+	c.Flags().BoolVar(&jsonFormat, "json", false, "Output results as JSON")
+
+	return c
+}
+
+// prettyPrintSearchResults formats search results as a table
+func prettyPrintSearchResults(results []search.SearchResult) string {
+	var buf bytes.Buffer
+	table := newTable(&buf)
+	table.Header([]string{"NAME", "DESCRIPTION", "BACKEND", "DOWNLOADS", "STARS", "SOURCE"})
+
+	for _, r := range results {
+		name := r.Name
+		if r.Source == search.HuggingFaceSourceName {
+			name = "hf.co/" + r.Name
+		}
+		table.Append([]string{
+			name,
+			r.Description,
+			r.Backend,
+			formatCount(r.Downloads),
+			formatCount(r.Stars),
+			r.Source,
+		})
+	}
+
+	table.Render()
+	return buf.String()
+}
+
+// formatCount formats a number in a human-readable way (e.g., 1.2M, 45K)
+func formatCount(n int64) string {
+	if n >= 1_000_000 {
+		return fmt.Sprintf("%.1fM", float64(n)/1_000_000)
+	}
+	if n >= 1_000 {
+		return fmt.Sprintf("%.1fK", float64(n)/1_000)
+	}
+	return fmt.Sprintf("%d", n)
+}
diff --git a/cmd/cli/docs/reference/docker_model.yaml b/cmd/cli/docs/reference/docker_model.yaml
index 0dd102e4..cda8839e 100644
--- a/cmd/cli/docs/reference/docker_model.yaml
+++ b/cmd/cli/docs/reference/docker_model.yaml
@@ -22,6 +22,7 @@ cname:
     - docker model restart-runner
     - docker model rm
     - docker model run
+    - docker model search
     - docker model start-runner
     - docker model status
     - docker model stop-runner
@@ -46,6 +47,7 @@ clink:
     - docker_model_restart-runner.yaml
     - docker_model_rm.yaml
     - docker_model_run.yaml
+    - docker_model_search.yaml
     - docker_model_start-runner.yaml
     - docker_model_status.yaml
     - docker_model_stop-runner.yaml
diff --git a/cmd/cli/docs/reference/docker_model_search.yaml b/cmd/cli/docs/reference/docker_model_search.yaml
new file mode 100644
index 00000000..72129bd7
--- /dev/null
+++ b/cmd/cli/docs/reference/docker_model_search.yaml
@@ -0,0 +1,57 @@
+command: docker model search
+short: Search for models on Docker Hub and HuggingFace
+long: |-
+    Search for models from Docker Hub (ai/ namespace) and HuggingFace.
+
+    When no search term is provided, lists all available models.
+    When a search term is provided, filters models by name/description.
+
+    Examples:
+      docker model search                       # List available models from Docker Hub
+      docker model search llama                 # Search for models containing "llama"
+      docker model search --source=all          # Search both Docker Hub and HuggingFace
+      docker model search --source=huggingface  # Only search HuggingFace
+      docker model search --limit=50 phi        # Search with custom limit
+      docker model search --json llama          # Output as JSON
+usage: docker model search [OPTIONS] [TERM]
+pname: docker model
+plink: docker_model.yaml
+options:
+    - option: json
+      value_type: bool
+      default_value: "false"
+      description: Output results as JSON
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
+    - option: limit
+      shorthand: "n"
+      value_type: int
+      default_value: "32"
+      description: Maximum number of results to show
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
+    - option: source
+      value_type: string
+      default_value: all
+      description: 'Source to search: all, dockerhub, huggingface'
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
+deprecated: false
+hidden: false
+experimental: false
+experimentalcli: false
+kubernetes: false
+swarm: false
+
diff --git a/cmd/cli/docs/reference/model.md b/cmd/cli/docs/reference/model.md
index 11d8d83b..e139fc45 100644
--- a/cmd/cli/docs/reference/model.md
+++ b/cmd/cli/docs/reference/model.md
@@ -23,6 +23,7 @@ Docker Model Runner
 | [`restart-runner`](model_restart-runner.md)     | Restart Docker Model Runner (Docker Engine only)                                                |
 | [`rm`](model_rm.md)                             | Remove local models downloaded from Docker Hub                                                  |
 | [`run`](model_run.md)                           | Run a model and interact with it using a submitted prompt or chat mode                          |
+| [`search`](model_search.md)                     | Search for models on Docker Hub and HuggingFace                                                 |
 | [`start-runner`](model_start-runner.md)         | Start Docker Model Runner (Docker Engine only)                                                  |
 | [`status`](model_status.md)                     | Check if the Docker Model Runner is running                                                     |
 | [`stop-runner`](model_stop-runner.md)           | Stop Docker Model Runner (Docker Engine only)                                                   |
diff --git a/cmd/cli/docs/reference/model_search.md b/cmd/cli/docs/reference/model_search.md
new file mode 100644
index 00000000..b146e60c
--- /dev/null
+++ b/cmd/cli/docs/reference/model_search.md
@@ -0,0 +1,27 @@
+# docker model search
+
+<!---MARKER_GEN_START-->
+Search for models from Docker Hub (ai/ namespace) and HuggingFace.
+
+When no search term is provided, lists all available models.
+When a search term is provided, filters models by name/description.
+
+Examples:
+  docker model search                       # List available models from Docker Hub
+  docker model search llama                 # Search for models containing "llama"
+  docker model search --source=all          # Search both Docker Hub and HuggingFace
+  docker model search --source=huggingface  # Only search HuggingFace
+  docker model search --limit=50 phi        # Search with custom limit
+  docker model search --json llama          # Output as JSON
+
+### Options
+
+| Name            | Type     | Default | Description                                   |
+|:----------------|:---------|:--------|:----------------------------------------------|
+| `--json`        | `bool`   |         | Output results as JSON                        |
+| `-n`, `--limit` | `int`    | `32`    | Maximum number of results to show             |
+| `--source`      | `string` | `all`   | Source to search: all, dockerhub, huggingface |
+
+
+<!---MARKER_GEN_END-->
+
diff --git a/cmd/cli/search/client.go b/cmd/cli/search/client.go
new file mode 100644
index 00000000..64083916
--- /dev/null
+++ b/cmd/cli/search/client.go
@@ -0,0 +1,135 @@
+package search
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"sort"
+	"sync"
+)
+
+// SourceType represents the source to search
+type SourceType string
+
+const (
+	SourceAll         SourceType = "all"
+	SourceDockerHub   SourceType = "dockerhub"
+	SourceHuggingFace SourceType = "huggingface"
+)
+
+// AggregatedClient searches multiple sources and merges results
+type AggregatedClient struct {
+	clients []SearchClient
+	errOut  io.Writer
+}
+
+// NewAggregatedClient creates a client that searches the specified sources
+func NewAggregatedClient(source SourceType, errOut io.Writer) *AggregatedClient {
+	var clients []SearchClient
+
+	switch source {
+	case SourceDockerHub:
+		clients = []SearchClient{NewDockerHubClient()}
+	case SourceHuggingFace:
+		clients = []SearchClient{NewHuggingFaceClient()}
+	case SourceAll:
+		clients = []SearchClient{
+			NewDockerHubClient(),
+			NewHuggingFaceClient(),
+		}
+	default: // This handles any unexpected values
+		clients = []SearchClient{
+			NewDockerHubClient(),
+			NewHuggingFaceClient(),
+		}
+	}
+
+	return &AggregatedClient{
+		clients: clients,
+		errOut:  errOut,
+	}
+}
+
+// searchResult holds results from a single source along with any error
+type searchResult struct {
+	results []SearchResult
+	err     error
+	source  string
+}
+
+// Search searches all configured sources and merges results
+func (c *AggregatedClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) {
+	// Search all sources concurrently
+	resultsChan := make(chan searchResult, len(c.clients))
+	var wg sync.WaitGroup
+
+	for _, client := range c.clients {
+		wg.Add(1)
+		go func(client SearchClient) {
+			defer wg.Done()
+			results, err := client.Search(ctx, opts)
+			resultsChan <- searchResult{
+				results: results,
+				err:     err,
+				source:  client.Name(),
+			}
+		}(client)
+	}
+
+	// Wait for all searches to complete
+	go func() {
+		wg.Wait()
+		close(resultsChan)
+	}()
+
+	// Collect results
+	var allResults []SearchResult
+	var errors []error
+
+	for result := range resultsChan {
+		if result.err != nil {
+			errors = append(errors, fmt.Errorf("%s: %w", result.source, result.err))
+			if c.errOut != nil {
+				fmt.Fprintf(c.errOut, "Warning: failed to search %s: %v\n", result.source, result.err)
+			}
+			continue
+		}
+		allResults = append(allResults, result.results...)
+	}
+
+	// If all sources failed, return the collected errors
+	if len(allResults) == 0 && len(errors) > 0 {
+		return nil, fmt.Errorf("all search sources failed: %v", errors)
+	}
+
+	// Sort by source (Docker Hub first), then by downloads within each source
+	sort.Slice(allResults, func(i, j int) bool {
+		// Docker Hub comes before HuggingFace
+		if allResults[i].Source != allResults[j].Source {
+			return allResults[i].Source == DockerHubSourceName
+		}
+		// Within same source, sort by downloads (popularity)
+		return allResults[i].Downloads > allResults[j].Downloads
+	})
+
+	// Limit total results if needed
+	if opts.Limit > 0 && len(allResults) > opts.Limit {
+		allResults = allResults[:opts.Limit]
+	}
+
+	return allResults, nil
+}
+
+// ParseSource parses a source string into a SourceType
+func ParseSource(s string) (SourceType, error) {
+	switch s {
+	case "all", "":
+		return SourceAll, nil
+	case "dockerhub", "docker", "hub":
+		return SourceDockerHub, nil
+	case "huggingface", "hf":
+		return SourceHuggingFace, nil
+	default:
+		return "", fmt.Errorf("unknown source %q: valid options are 'all', 'dockerhub', 'docker', 'hub', 'huggingface', 'hf'", s)
+	}
+}
diff --git a/cmd/cli/search/dockerhub.go b/cmd/cli/search/dockerhub.go
new file mode 100644
index 00000000..e613a083
--- /dev/null
+++ b/cmd/cli/search/dockerhub.go
@@ -0,0 +1,203 @@
+package search
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+)
+
+const (
+	dockerHubBaseURL = "https://hub.docker.com"
+	dockerHubAIOrg   = "ai"
+)
+
+// DockerHubClient searches for models on Docker Hub
+type DockerHubClient struct {
+	httpClient *http.Client
+	baseURL    string
+}
+
+// NewDockerHubClient creates a new Docker Hub search client
+func NewDockerHubClient() *DockerHubClient {
+	return &DockerHubClient{
+		httpClient: NewHTTPClient(),
+		baseURL:    dockerHubBaseURL,
+	}
+}
+
+// dockerHubRepoListResponse is the response from Docker Hub's repository list API
+type dockerHubRepoListResponse struct {
+	Count    int             `json:"count"`
+	Next     string          `json:"next"`
+	Previous string          `json:"previous"`
+	Results  []dockerHubRepo `json:"results"`
+}
+
+// dockerHubRepo represents a repository on Docker Hub
+type dockerHubRepo struct {
+	Name           string   `json:"name"`
+	Namespace      string   `json:"namespace"`
+	Description    string   `json:"description"`
+	IsPrivate      bool     `json:"is_private"`
+	StarCount      int      `json:"star_count"`
+	PullCount      int      `json:"pull_count"`
+	LastUpdated    string   `json:"last_updated"`
+	ContentTypes   []string `json:"content_types"`
+	RepositoryType string   `json:"repository_type"`
+	IsAutomated    bool     `json:"is_automated"`
+	CanEdit        bool     `json:"can_edit"`
+	IsMigrated     bool     `json:"is_migrated"`
+	Affiliation    string   `json:"affiliation"`
+	HubUser        string   `json:"hub_user"`
+	NamespaceType  string   `json:"namespace_type"`
+}
+
+// Name returns the name of this search source
+func (c *DockerHubClient) Name() string {
+	return DockerHubSourceName
+}
+
+// Search searches for models on Docker Hub in the ai/ namespace
+func (c *DockerHubClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) {
+	limit := opts.Limit
+	if limit <= 0 {
+		limit = 32
+	}
+
+	var results []SearchResult
+	query := strings.ToLower(opts.Query)
+	nextURL := ""
+
+	// Docker Hub API paginates at 100 results max per page
+	pageSize := 100
+	if limit < pageSize {
+		pageSize = limit
+	}
+
+	for len(results) < limit {
+		var fullURL string
+		if nextURL != "" {
+			fullURL = nextURL
+		} else {
+			// Build the URL for listing repositories in the ai/ namespace
+			apiURL := fmt.Sprintf("%s/v2/repositories/%s/", c.baseURL, dockerHubAIOrg)
+			params := url.Values{}
+			params.Set("page_size", fmt.Sprintf("%d", pageSize))
+			params.Set("ordering", "pull_count") // Sort by popularity
+			fullURL = apiURL + "?" + params.Encode()
+		}
+
+		req, err := http.NewRequestWithContext(ctx, http.MethodGet, fullURL, http.NoBody)
+		if err != nil {
+			return nil, fmt.Errorf("creating request: %w", err)
+		}
+		req.Header.Set("Accept", "application/json")
+
+		resp, err := c.httpClient.Do(req)
+		if err != nil {
+			return nil, fmt.Errorf("fetching from Docker Hub: %w", err)
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode == http.StatusTooManyRequests {
+			return nil, fmt.Errorf("rate limited by Docker Hub, please try again later")
+		}
+		if resp.StatusCode != http.StatusOK {
+			return nil, fmt.Errorf("unexpected status from Docker Hub: %s", resp.Status)
+		}
+
+		var response dockerHubRepoListResponse
+		if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+			return nil, fmt.Errorf("decoding response: %w", err)
+		}
+
+		for _, repo := range response.Results {
+			// Skip private repos
+			if repo.IsPrivate {
+				continue
+			}
+
+			// Apply client-side filtering if query is provided
+			if query != "" {
+				nameMatch := strings.Contains(strings.ToLower(repo.Name), query)
+				descMatch := strings.Contains(strings.ToLower(repo.Description), query)
+				if !nameMatch && !descMatch {
+					continue
+				}
+			}
+
+			// Determine backend type from name and description
+			backend := determineDockerHubBackend(repo.Name, repo.Description)
+
+			results = append(results, SearchResult{
+				Name:        fmt.Sprintf("%s/%s", repo.Namespace, repo.Name),
+				Description: truncateString(repo.Description, 50),
+				Downloads:   int64(repo.PullCount),
+				Stars:       int64(repo.StarCount),
+				Source:      DockerHubSourceName,
+				Official:    repo.Namespace == dockerHubAIOrg,
+				UpdatedAt:   repo.LastUpdated,
+				Backend:     backend,
+			})
+
+			if len(results) >= limit {
+				break
+			}
+		}
+
+		// Check if there are more pages
+		if response.Next == "" || len(results) >= limit {
+			break
+		}
+		nextURL = response.Next
+	}
+
+	return results, nil
+}
+
+// truncateString truncates a string to maxLen characters, adding "..." if truncated
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	if maxLen <= 3 {
+		return s[:maxLen]
+	}
+	return s[:maxLen-3] + "..."
+}
+
+// determineDockerHubBackend determines the backend type from model name and description
+func determineDockerHubBackend(name, description string) string {
+	nameLower := strings.ToLower(name)
+	descLower := strings.ToLower(description)
+	combined := nameLower + " " + descLower
+
+	var hasVLLM, hasLlamaCpp bool
+
+	// Check for vLLM indicators
+	if strings.Contains(combined, "vllm") || strings.Contains(combined, "safetensors") {
+		hasVLLM = true
+	}
+
+	// Check for llama.cpp indicators (gguf is the format used by llama.cpp)
+	if strings.Contains(combined, "llama.cpp") || strings.Contains(combined, "llamacpp") ||
+		strings.Contains(combined, "gguf") || strings.Contains(combined, "llama-cpp") {
+		hasLlamaCpp = true
+	}
+
+	if hasVLLM && hasLlamaCpp {
+		return "llama.cpp, vllm"
+	}
+	if hasVLLM {
+		return "vllm"
+	}
+	if hasLlamaCpp {
+		return "llama.cpp"
+	}
+
+	// Default to llama.cpp for ai/ namespace models as they primarily use GGUF format
+	return "llama.cpp"
+}
diff --git a/cmd/cli/search/http_client.go b/cmd/cli/search/http_client.go
new file mode 100644
index 00000000..6a9d9687
--- /dev/null
+++ b/cmd/cli/search/http_client.go
@@ -0,0 +1,13 @@
+package search
+
+import (
+	"net/http"
+	"time"
+)
+
+// NewHTTPClient creates a new HTTP client with standard configuration
+func NewHTTPClient() *http.Client {
+	return &http.Client{
+		Timeout: 30 * time.Second,
+	}
+}
diff --git a/cmd/cli/search/huggingface.go b/cmd/cli/search/huggingface.go
new file mode 100644
index 00000000..174b692d
--- /dev/null
+++ b/cmd/cli/search/huggingface.go
@@ -0,0 +1,206 @@
+package search
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+)
+
+const (
+	huggingFaceAPIURL = "https://huggingface.co/api"
+)
+
+// HuggingFaceClient searches for models on HuggingFace Hub
+type HuggingFaceClient struct {
+	httpClient *http.Client
+	baseURL    string
+}
+
+// NewHuggingFaceClient creates a new HuggingFace search client
+func NewHuggingFaceClient() *HuggingFaceClient {
+	return &HuggingFaceClient{
+		httpClient: NewHTTPClient(),
+		baseURL:    huggingFaceAPIURL,
+	}
+}
+
+// huggingFaceModel represents a model from the HuggingFace API
+type huggingFaceModel struct {
+	ID          string   `json:"id"`
+	ModelID     string   `json:"modelId"`
+	Likes       int      `json:"likes"`
+	Downloads   int      `json:"downloads"`
+	Tags        []string `json:"tags"`
+	PipelineTag string   `json:"pipeline_tag,omitempty"`
+	CreatedAt   string   `json:"createdAt"`
+	Private     bool     `json:"private"`
+}
+
+// Name returns the name of this search source
+func (c *HuggingFaceClient) Name() string {
+	return HuggingFaceSourceName
+}
+
+// Search searches for llama.cpp and vLLM compatible models on HuggingFace
+func (c *HuggingFaceClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) {
+	limit := opts.Limit
+	if limit <= 0 {
+		limit = 32
+	}
+	// HuggingFace API supports up to 1000 results in a single request
+	if limit > 1000 {
+		limit = 1000
+	}
+
+	// Build the URL for searching llama.cpp and vLLM compatible models
+	apiURL := fmt.Sprintf("%s/models", c.baseURL)
+	params := url.Values{}
+	params.Set("apps", "vllm,llama.cpp")
+	params.Set("sort", "downloads")
+	params.Set("direction", "-1")
+	params.Set("limit", fmt.Sprintf("%d", limit))
+
+	if opts.Query != "" {
+		params.Set("search", opts.Query)
+	}
+
+	fullURL := apiURL + "?" + params.Encode()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, fullURL, http.NoBody)
+	if err != nil {
+		return nil, fmt.Errorf("creating request: %w", err)
+	}
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("fetching from HuggingFace: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == http.StatusTooManyRequests {
+		return nil, fmt.Errorf("rate limited by HuggingFace, please try again later")
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("unexpected status from HuggingFace: %s", resp.Status)
+	}
+
+	var models []huggingFaceModel
+	if err := json.NewDecoder(resp.Body).Decode(&models); err != nil {
+		return nil, fmt.Errorf("decoding response: %w", err)
+	}
+
+	var results []SearchResult
+	for _, model := range models {
+		// Skip private models
+		if model.Private {
+			continue
+		}
+
+		// Use modelId if available, otherwise use id
+		modelName := model.ModelID
+		if modelName == "" {
+			modelName = model.ID
+		}
+
+		// Generate description from tags
+		description := generateDescription(model.Tags, model.PipelineTag)
+
+		// Determine backend type from tags
+		backend := determineBackend(model.Tags)
+
+		results = append(results, SearchResult{
+			Name:        modelName,
+			Description: truncateString(description, 50),
+			Downloads:   int64(model.Downloads),
+			Stars:       int64(model.Likes),
+			Source:      HuggingFaceSourceName,
+			Official:    false,
+			UpdatedAt:   model.CreatedAt,
+			Backend:     backend,
+		})
+	}
+
+	return results, nil
+}
+
+// generateDescription creates a description from model tags
+func generateDescription(tags []string, pipelineTag string) string {
+	var parts []string
+
+	if pipelineTag != "" {
+		parts = append(parts, pipelineTag)
+	}
+
+	// Look for interesting tags (skip generic ones)
+	skipTags := map[string]bool{
+		"gguf": true, "transformers": true, "pytorch": true,
+		"safetensors": true, "license:apache-2.0": true,
+	}
+
+	for _, tag := range tags {
+		tag = strings.ToLower(tag)
+		if skipTags[tag] {
+			continue
+		}
+		// Include architecture/model type tags
+		if strings.HasPrefix(tag, "llama") ||
+			strings.HasPrefix(tag, "mistral") ||
+			strings.HasPrefix(tag, "phi") ||
+			strings.HasPrefix(tag, "qwen") ||
+			strings.Contains(tag, "instruct") ||
+			strings.Contains(tag, "chat") {
+			parts = append(parts, tag)
+			if len(parts) >= 3 {
+				break
+			}
+		}
+	}
+
+	if len(parts) == 0 {
+		return "AI model"
+	}
+	return strings.Join(parts, ", ")
+}
+
+// determineBackend determines the backend type from HuggingFace model tags.
+// Since we filter by apps=vllm,llama.cpp, all results are compatible with at least one backend.
+// - GGUF format models work with llama.cpp
+// - Transformers/safetensors models work with vLLM
+func determineBackend(tags []string) string {
+	var hasVLLM, hasLlamaCpp bool
+
+	for _, tag := range tags {
+		tagLower := strings.ToLower(tag)
+
+		// Check for explicit vllm tag or formats that indicate vLLM compatibility
+		if tagLower == "vllm" || tagLower == "text-generation-inference" {
+			hasVLLM = true
+		}
+		// Transformers/safetensors models are typically vLLM compatible
+		if tagLower == "transformers" || tagLower == "safetensors" {
+			hasVLLM = true
+		}
+
+		// Check for llama.cpp compatibility (GGUF format)
+		if tagLower == "llama.cpp" || tagLower == "llama-cpp" || tagLower == "gguf" {
+			hasLlamaCpp = true
+		}
+	}
+
+	if hasVLLM && hasLlamaCpp {
+		return "llama.cpp, vllm"
+	}
+	if hasVLLM {
+		return "vllm"
+	}
+	if hasLlamaCpp {
+		return "llama.cpp"
+	}
+	// Fallback: since we filter by apps=vllm,llama.cpp, model must be compatible with one
+	// but we couldn't determine which from tags
+	return "llama.cpp"
+}
diff --git a/cmd/cli/search/types.go b/cmd/cli/search/types.go
new file mode 100644
index 00000000..32498524
--- /dev/null
+++ b/cmd/cli/search/types.go
@@ -0,0 +1,33 @@
+package search
+
+import "context"
+
+// Constants for source names
+const (
+	DockerHubSourceName   = "Docker Hub"
+	HuggingFaceSourceName = "HuggingFace"
+)
+
+// SearchResult represents a model found during search
+type SearchResult struct {
+	Name        string // Full model reference (e.g., "ai/llama3.2" or "hf.co/org/model")
+	Description string // Short description
+	Downloads   int64  // Download/pull count
+	Stars       int64  // Star/like count
+	Source      string // "Docker Hub" or "HuggingFace"
+	Official    bool   // Whether this is an official model
+	UpdatedAt   string // Last update timestamp
+	Backend     string // Backend type: "llama.cpp", "vllm", or "llama.cpp, vllm" if both
+}
+
+// SearchOptions configures the search behavior
+type SearchOptions struct {
+	Query string // Search term (empty = list all)
+	Limit int    // Maximum results per source; aggregated clients may also apply this as a global cap after merging results
+}
+
+// SearchClient defines the interface for searching a model registry
+type SearchClient interface {
+	Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error)
+	Name() string
+}
diff --git a/vllm_backend_stub.go b/vllm_backend_stub.go
index 342abc19..47e7c431 100644
--- a/vllm_backend_stub.go
+++ b/vllm_backend_stub.go
@@ -14,4 +14,4 @@ func initVLLMBackend(log *logrus.Logger, modelManager *models.Manager) (inferenc
 
 func registerVLLMBackend(backends map[string]inference.Backend, backend inference.Backend) {
 	// No-op when VLLM is disabled
-}
\ No newline at end of file
+}