diff --git a/cmd/cli/commands/root.go b/cmd/cli/commands/root.go index 72c2e7d6..11896491 100644 --- a/cmd/cli/commands/root.go +++ b/cmd/cli/commands/root.go @@ -93,6 +93,7 @@ func NewRootCmd(cli *command.DockerCli) *cobra.Command { newStopRunner(), newRestartRunner(), newReinstallRunner(), + newSearchCmd(), ) // Commands that require a running model runner. These are wrapped to ensure the standalone runner is available. diff --git a/cmd/cli/commands/search.go b/cmd/cli/commands/search.go new file mode 100644 index 00000000..6c8962f7 --- /dev/null +++ b/cmd/cli/commands/search.go @@ -0,0 +1,127 @@ +package commands + +import ( + "bytes" + "fmt" + + "github.com/docker/model-runner/cmd/cli/commands/formatter" + "github.com/docker/model-runner/cmd/cli/search" + "github.com/spf13/cobra" +) + +func newSearchCmd() *cobra.Command { + var ( + limit int + source string + jsonFormat bool + ) + + c := &cobra.Command{ + Use: "search [OPTIONS] [TERM]", + Short: "Search for models on Docker Hub and HuggingFace", + Long: `Search for models from Docker Hub (ai/ namespace) and HuggingFace. + +When no search term is provided, lists all available models. +When a search term is provided, filters models by name/description. + +Examples: + docker model search # List available models from Docker Hub + docker model search llama # Search for models containing "llama" + docker model search --source=all # Search both Docker Hub and HuggingFace + docker model search --source=huggingface # Only search HuggingFace + docker model search --limit=50 phi # Search with custom limit + docker model search --json llama # Output as JSON`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + // Parse the source + sourceType, err := search.ParseSource(source) + if err != nil { + return err + } + + // Get the search query + var query string + if len(args) > 0 { + query = args[0] + } + + // Create the search client + client := search.NewAggregatedClient(sourceType, cmd.ErrOrStderr()) + + // Perform the search + opts := search.SearchOptions{ + Query: query, + Limit: limit, + } + + results, err := client.Search(cmd.Context(), opts) + if err != nil { + return fmt.Errorf("search failed: %w", err) + } + + if len(results) == 0 { + if query != "" { + fmt.Fprintf(cmd.OutOrStdout(), "No models found matching %q\n", query) + } else { + fmt.Fprintln(cmd.OutOrStdout(), "No models found") + } + return nil + } + + // Output results + if jsonFormat { + output, err := formatter.ToStandardJSON(results) + if err != nil { + return err + } + fmt.Fprint(cmd.OutOrStdout(), output) + return nil + } + + fmt.Fprint(cmd.OutOrStdout(), prettyPrintSearchResults(results)) + return nil + }, + } + + c.Flags().IntVarP(&limit, "limit", "n", 32, "Maximum number of results to show") + c.Flags().StringVar(&source, "source", "all", "Source to search: all, dockerhub, huggingface") + c.Flags().BoolVar(&jsonFormat, "json", false, "Output results as JSON") + + return c +} + +// prettyPrintSearchResults formats search results as a table +func prettyPrintSearchResults(results []search.SearchResult) string { + var buf bytes.Buffer + table := newTable(&buf) + table.Header([]string{"NAME", "DESCRIPTION", "BACKEND", "DOWNLOADS", "STARS", "SOURCE"}) + + for _, r := range results { + name := r.Name + if r.Source == search.HuggingFaceSourceName { + name = "hf.co/" + r.Name + } + table.Append([]string{ + name, + r.Description, + r.Backend, + formatCount(r.Downloads), + formatCount(r.Stars), + r.Source, + }) + } + + table.Render() + return buf.String() +} + +// formatCount formats a number in a human-readable way (e.g., 1.2M, 45K) +func formatCount(n int64) string { + if n >= 1_000_000 { + return fmt.Sprintf("%.1fM", float64(n)/1_000_000) + } + if n >= 1_000 { + return fmt.Sprintf("%.1fK", float64(n)/1_000) + } + return fmt.Sprintf("%d", n) +} diff --git a/cmd/cli/docs/reference/docker_model.yaml b/cmd/cli/docs/reference/docker_model.yaml index 0dd102e4..cda8839e 100644 --- a/cmd/cli/docs/reference/docker_model.yaml +++ b/cmd/cli/docs/reference/docker_model.yaml @@ -22,6 +22,7 @@ cname: - docker model restart-runner - docker model rm - docker model run + - docker model search - docker model start-runner - docker model status - docker model stop-runner @@ -46,6 +47,7 @@ clink: - docker_model_restart-runner.yaml - docker_model_rm.yaml - docker_model_run.yaml + - docker_model_search.yaml - docker_model_start-runner.yaml - docker_model_status.yaml - docker_model_stop-runner.yaml diff --git a/cmd/cli/docs/reference/docker_model_search.yaml b/cmd/cli/docs/reference/docker_model_search.yaml new file mode 100644 index 00000000..72129bd7 --- /dev/null +++ b/cmd/cli/docs/reference/docker_model_search.yaml @@ -0,0 +1,57 @@ +command: docker model search +short: Search for models on Docker Hub and HuggingFace +long: |- + Search for models from Docker Hub (ai/ namespace) and HuggingFace. + + When no search term is provided, lists all available models. + When a search term is provided, filters models by name/description. + + Examples: + docker model search # List available models from Docker Hub + docker model search llama # Search for models containing "llama" + docker model search --source=all # Search both Docker Hub and HuggingFace + docker model search --source=huggingface # Only search HuggingFace + docker model search --limit=50 phi # Search with custom limit + docker model search --json llama # Output as JSON +usage: docker model search [OPTIONS] [TERM] +pname: docker model +plink: docker_model.yaml +options: + - option: json + value_type: bool + default_value: "false" + description: Output results as JSON + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false + - option: limit + shorthand: "n" + value_type: int + default_value: "32" + description: Maximum number of results to show + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false + - option: source + value_type: string + default_value: all + description: 'Source to search: all, dockerhub, huggingface' + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false +deprecated: false +hidden: false +experimental: false +experimentalcli: false +kubernetes: false +swarm: false + diff --git a/cmd/cli/docs/reference/model.md b/cmd/cli/docs/reference/model.md index 11d8d83b..e139fc45 100644 --- a/cmd/cli/docs/reference/model.md +++ b/cmd/cli/docs/reference/model.md @@ -23,6 +23,7 @@ Docker Model Runner | [`restart-runner`](model_restart-runner.md) | Restart Docker Model Runner (Docker Engine only) | | [`rm`](model_rm.md) | Remove local models downloaded from Docker Hub | | [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode | +| [`search`](model_search.md) | Search for models on Docker Hub and HuggingFace | | [`start-runner`](model_start-runner.md) | Start Docker Model Runner (Docker Engine only) | | [`status`](model_status.md) | Check if the Docker Model Runner is running | | [`stop-runner`](model_stop-runner.md) | Stop Docker Model Runner (Docker Engine only) | diff --git a/cmd/cli/docs/reference/model_search.md b/cmd/cli/docs/reference/model_search.md new file mode 100644 index 00000000..b146e60c --- /dev/null +++ b/cmd/cli/docs/reference/model_search.md @@ -0,0 +1,27 @@ +# docker model search + + +Search for models from Docker Hub (ai/ namespace) and HuggingFace. + +When no search term is provided, lists all available models. +When a search term is provided, filters models by name/description. + +Examples: + docker model search # List available models from Docker Hub + docker model search llama # Search for models containing "llama" + docker model search --source=all # Search both Docker Hub and HuggingFace + docker model search --source=huggingface # Only search HuggingFace + docker model search --limit=50 phi # Search with custom limit + docker model search --json llama # Output as JSON + +### Options + +| Name | Type | Default | Description | +|:----------------|:---------|:--------|:----------------------------------------------| +| `--json` | `bool` | | Output results as JSON | +| `-n`, `--limit` | `int` | `32` | Maximum number of results to show | +| `--source` | `string` | `all` | Source to search: all, dockerhub, huggingface | + + + + diff --git a/cmd/cli/search/client.go b/cmd/cli/search/client.go new file mode 100644 index 00000000..64083916 --- /dev/null +++ b/cmd/cli/search/client.go @@ -0,0 +1,135 @@ +package search + +import ( + "context" + "fmt" + "io" + "sort" + "sync" +) + +// SourceType represents the source to search +type SourceType string + +const ( + SourceAll SourceType = "all" + SourceDockerHub SourceType = "dockerhub" + SourceHuggingFace SourceType = "huggingface" +) + +// AggregatedClient searches multiple sources and merges results +type AggregatedClient struct { + clients []SearchClient + errOut io.Writer +} + +// NewAggregatedClient creates a client that searches the specified sources +func NewAggregatedClient(source SourceType, errOut io.Writer) *AggregatedClient { + var clients []SearchClient + + switch source { + case SourceDockerHub: + clients = []SearchClient{NewDockerHubClient()} + case SourceHuggingFace: + clients = []SearchClient{NewHuggingFaceClient()} + case SourceAll: + clients = []SearchClient{ + NewDockerHubClient(), + NewHuggingFaceClient(), + } + default: // This handles any unexpected values + clients = []SearchClient{ + NewDockerHubClient(), + NewHuggingFaceClient(), + } + } + + return &AggregatedClient{ + clients: clients, + errOut: errOut, + } +} + +// searchResult holds results from a single source along with any error +type searchResult struct { + results []SearchResult + err error + source string +} + +// Search searches all configured sources and merges results +func (c *AggregatedClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) { + // Search all sources concurrently + resultsChan := make(chan searchResult, len(c.clients)) + var wg sync.WaitGroup + + for _, client := range c.clients { + wg.Add(1) + go func(client SearchClient) { + defer wg.Done() + results, err := client.Search(ctx, opts) + resultsChan <- searchResult{ + results: results, + err: err, + source: client.Name(), + } + }(client) + } + + // Wait for all searches to complete + go func() { + wg.Wait() + close(resultsChan) + }() + + // Collect results + var allResults []SearchResult + var errors []error + + for result := range resultsChan { + if result.err != nil { + errors = append(errors, fmt.Errorf("%s: %w", result.source, result.err)) + if c.errOut != nil { + fmt.Fprintf(c.errOut, "Warning: failed to search %s: %v\n", result.source, result.err) + } + continue + } + allResults = append(allResults, result.results...) + } + + // If all sources failed, return the collected errors + if len(allResults) == 0 && len(errors) > 0 { + return nil, fmt.Errorf("all search sources failed: %v", errors) + } + + // Sort by source (Docker Hub first), then by downloads within each source + sort.Slice(allResults, func(i, j int) bool { + // Docker Hub comes before HuggingFace + if allResults[i].Source != allResults[j].Source { + return allResults[i].Source == DockerHubSourceName + } + // Within same source, sort by downloads (popularity) + return allResults[i].Downloads > allResults[j].Downloads + }) + + // Limit total results if needed + if opts.Limit > 0 && len(allResults) > opts.Limit { + allResults = allResults[:opts.Limit] + } + + return allResults, nil +} + +// ParseSource parses a source string into a SourceType +func ParseSource(s string) (SourceType, error) { + switch s { + case "all", "": + return SourceAll, nil + case "dockerhub", "docker", "hub": + return SourceDockerHub, nil + case "huggingface", "hf": + return SourceHuggingFace, nil + default: + return "", fmt.Errorf("unknown source %q: valid options are 'all', 'dockerhub', 'docker', 'hub', 'huggingface', 'hf'", s) + } +} diff --git a/cmd/cli/search/dockerhub.go b/cmd/cli/search/dockerhub.go new file mode 100644 index 00000000..e613a083 --- /dev/null +++ b/cmd/cli/search/dockerhub.go @@ -0,0 +1,203 @@ +package search + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" +) + +const ( + dockerHubBaseURL = "https://hub.docker.com" + dockerHubAIOrg = "ai" +) + +// DockerHubClient searches for models on Docker Hub +type DockerHubClient struct { + httpClient *http.Client + baseURL string +} + +// NewDockerHubClient creates a new Docker Hub search client +func NewDockerHubClient() *DockerHubClient { + return &DockerHubClient{ + httpClient: NewHTTPClient(), + baseURL: dockerHubBaseURL, + } +} + +// dockerHubRepoListResponse is the response from Docker Hub's repository list API +type dockerHubRepoListResponse struct { + Count int `json:"count"` + Next string `json:"next"` + Previous string `json:"previous"` + Results []dockerHubRepo `json:"results"` +} + +// dockerHubRepo represents a repository on Docker Hub +type dockerHubRepo struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Description string `json:"description"` + IsPrivate bool `json:"is_private"` + StarCount int `json:"star_count"` + PullCount int `json:"pull_count"` + LastUpdated string `json:"last_updated"` + ContentTypes []string `json:"content_types"` + RepositoryType string `json:"repository_type"` + IsAutomated bool `json:"is_automated"` + CanEdit bool `json:"can_edit"` + IsMigrated bool `json:"is_migrated"` + Affiliation string `json:"affiliation"` + HubUser string `json:"hub_user"` + NamespaceType string `json:"namespace_type"` +} + +// Name returns the name of this search source +func (c *DockerHubClient) Name() string { + return DockerHubSourceName +} + +// Search searches for models on Docker Hub in the ai/ namespace +func (c *DockerHubClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) { + limit := opts.Limit + if limit <= 0 { + limit = 32 + } + + var results []SearchResult + query := strings.ToLower(opts.Query) + nextURL := "" + + // Docker Hub API paginates at 100 results max per page + pageSize := 100 + if limit < pageSize { + pageSize = limit + } + + for len(results) < limit { + var fullURL string + if nextURL != "" { + fullURL = nextURL + } else { + // Build the URL for listing repositories in the ai/ namespace + apiURL := fmt.Sprintf("%s/v2/repositories/%s/", c.baseURL, dockerHubAIOrg) + params := url.Values{} + params.Set("page_size", fmt.Sprintf("%d", pageSize)) + params.Set("ordering", "pull_count") // Sort by popularity + fullURL = apiURL + "?" + params.Encode() + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fullURL, http.NoBody) + if err != nil { + return nil, fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching from Docker Hub: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusTooManyRequests { + return nil, fmt.Errorf("rate limited by Docker Hub, please try again later") + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status from Docker Hub: %s", resp.Status) + } + + var response dockerHubRepoListResponse + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return nil, fmt.Errorf("decoding response: %w", err) + } + + for _, repo := range response.Results { + // Skip private repos + if repo.IsPrivate { + continue + } + + // Apply client-side filtering if query is provided + if query != "" { + nameMatch := strings.Contains(strings.ToLower(repo.Name), query) + descMatch := strings.Contains(strings.ToLower(repo.Description), query) + if !nameMatch && !descMatch { + continue + } + } + + // Determine backend type from name and description + backend := determineDockerHubBackend(repo.Name, repo.Description) + + results = append(results, SearchResult{ + Name: fmt.Sprintf("%s/%s", repo.Namespace, repo.Name), + Description: truncateString(repo.Description, 50), + Downloads: int64(repo.PullCount), + Stars: int64(repo.StarCount), + Source: DockerHubSourceName, + Official: repo.Namespace == dockerHubAIOrg, + UpdatedAt: repo.LastUpdated, + Backend: backend, + }) + + if len(results) >= limit { + break + } + } + + // Check if there are more pages + if response.Next == "" || len(results) >= limit { + break + } + nextURL = response.Next + } + + return results, nil +} + +// truncateString truncates a string to maxLen characters, adding "..." if truncated +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} + +// determineDockerHubBackend determines the backend type from model name and description +func determineDockerHubBackend(name, description string) string { + nameLower := strings.ToLower(name) + descLower := strings.ToLower(description) + combined := nameLower + " " + descLower + + var hasVLLM, hasLlamaCpp bool + + // Check for vLLM indicators + if strings.Contains(combined, "vllm") || strings.Contains(combined, "safetensors") { + hasVLLM = true + } + + // Check for llama.cpp indicators (gguf is the format used by llama.cpp) + if strings.Contains(combined, "llama.cpp") || strings.Contains(combined, "llamacpp") || + strings.Contains(combined, "gguf") || strings.Contains(combined, "llama-cpp") { + hasLlamaCpp = true + } + + if hasVLLM && hasLlamaCpp { + return "llama.cpp, vllm" + } + if hasVLLM { + return "vllm" + } + if hasLlamaCpp { + return "llama.cpp" + } + + // Default to llama.cpp for ai/ namespace models as they primarily use GGUF format + return "llama.cpp" +} diff --git a/cmd/cli/search/http_client.go b/cmd/cli/search/http_client.go new file mode 100644 index 00000000..6a9d9687 --- /dev/null +++ b/cmd/cli/search/http_client.go @@ -0,0 +1,13 @@ +package search + +import ( + "net/http" + "time" +) + +// NewHTTPClient creates a new HTTP client with standard configuration +func NewHTTPClient() *http.Client { + return &http.Client{ + Timeout: 30 * time.Second, + } +} diff --git a/cmd/cli/search/huggingface.go b/cmd/cli/search/huggingface.go new file mode 100644 index 00000000..174b692d --- /dev/null +++ b/cmd/cli/search/huggingface.go @@ -0,0 +1,206 @@ +package search + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" +) + +const ( + huggingFaceAPIURL = "https://huggingface.co/api" +) + +// HuggingFaceClient searches for models on HuggingFace Hub +type HuggingFaceClient struct { + httpClient *http.Client + baseURL string +} + +// NewHuggingFaceClient creates a new HuggingFace search client +func NewHuggingFaceClient() *HuggingFaceClient { + return &HuggingFaceClient{ + httpClient: NewHTTPClient(), + baseURL: huggingFaceAPIURL, + } +} + +// huggingFaceModel represents a model from the HuggingFace API +type huggingFaceModel struct { + ID string `json:"id"` + ModelID string `json:"modelId"` + Likes int `json:"likes"` + Downloads int `json:"downloads"` + Tags []string `json:"tags"` + PipelineTag string `json:"pipeline_tag,omitempty"` + CreatedAt string `json:"createdAt"` + Private bool `json:"private"` +} + +// Name returns the name of this search source +func (c *HuggingFaceClient) Name() string { + return HuggingFaceSourceName +} + +// Search searches for llama.cpp and vLLM compatible models on HuggingFace +func (c *HuggingFaceClient) Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) { + limit := opts.Limit + if limit <= 0 { + limit = 32 + } + // HuggingFace API supports up to 1000 results in a single request + if limit > 1000 { + limit = 1000 + } + + // Build the URL for searching llama.cpp and vLLM compatible models + apiURL := fmt.Sprintf("%s/models", c.baseURL) + params := url.Values{} + params.Set("apps", "vllm,llama.cpp") + params.Set("sort", "downloads") + params.Set("direction", "-1") + params.Set("limit", fmt.Sprintf("%d", limit)) + + if opts.Query != "" { + params.Set("search", opts.Query) + } + + fullURL := apiURL + "?" + params.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fullURL, http.NoBody) + if err != nil { + return nil, fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching from HuggingFace: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusTooManyRequests { + return nil, fmt.Errorf("rate limited by HuggingFace, please try again later") + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status from HuggingFace: %s", resp.Status) + } + + var models []huggingFaceModel + if err := json.NewDecoder(resp.Body).Decode(&models); err != nil { + return nil, fmt.Errorf("decoding response: %w", err) + } + + var results []SearchResult + for _, model := range models { + // Skip private models + if model.Private { + continue + } + + // Use modelId if available, otherwise use id + modelName := model.ModelID + if modelName == "" { + modelName = model.ID + } + + // Generate description from tags + description := generateDescription(model.Tags, model.PipelineTag) + + // Determine backend type from tags + backend := determineBackend(model.Tags) + + results = append(results, SearchResult{ + Name: modelName, + Description: truncateString(description, 50), + Downloads: int64(model.Downloads), + Stars: int64(model.Likes), + Source: HuggingFaceSourceName, + Official: false, + UpdatedAt: model.CreatedAt, + Backend: backend, + }) + } + + return results, nil +} + +// generateDescription creates a description from model tags +func generateDescription(tags []string, pipelineTag string) string { + var parts []string + + if pipelineTag != "" { + parts = append(parts, pipelineTag) + } + + // Look for interesting tags (skip generic ones) + skipTags := map[string]bool{ + "gguf": true, "transformers": true, "pytorch": true, + "safetensors": true, "license:apache-2.0": true, + } + + for _, tag := range tags { + tag = strings.ToLower(tag) + if skipTags[tag] { + continue + } + // Include architecture/model type tags + if strings.HasPrefix(tag, "llama") || + strings.HasPrefix(tag, "mistral") || + strings.HasPrefix(tag, "phi") || + strings.HasPrefix(tag, "qwen") || + strings.Contains(tag, "instruct") || + strings.Contains(tag, "chat") { + parts = append(parts, tag) + if len(parts) >= 3 { + break + } + } + } + + if len(parts) == 0 { + return "AI model" + } + return strings.Join(parts, ", ") +} + +// determineBackend determines the backend type from HuggingFace model tags. +// Since we filter by apps=vllm,llama.cpp, all results are compatible with at least one backend. +// - GGUF format models work with llama.cpp +// - Transformers/safetensors models work with vLLM +func determineBackend(tags []string) string { + var hasVLLM, hasLlamaCpp bool + + for _, tag := range tags { + tagLower := strings.ToLower(tag) + + // Check for explicit vllm tag or formats that indicate vLLM compatibility + if tagLower == "vllm" || tagLower == "text-generation-inference" { + hasVLLM = true + } + // Transformers/safetensors models are typically vLLM compatible + if tagLower == "transformers" || tagLower == "safetensors" { + hasVLLM = true + } + + // Check for llama.cpp compatibility (GGUF format) + if tagLower == "llama.cpp" || tagLower == "llama-cpp" || tagLower == "gguf" { + hasLlamaCpp = true + } + } + + if hasVLLM && hasLlamaCpp { + return "llama.cpp, vllm" + } + if hasVLLM { + return "vllm" + } + if hasLlamaCpp { + return "llama.cpp" + } + // Fallback: since we filter by apps=vllm,llama.cpp, model must be compatible with one + // but we couldn't determine which from tags + return "llama.cpp" +} diff --git a/cmd/cli/search/types.go b/cmd/cli/search/types.go new file mode 100644 index 00000000..32498524 --- /dev/null +++ b/cmd/cli/search/types.go @@ -0,0 +1,33 @@ +package search + +import "context" + +// Constants for source names +const ( + DockerHubSourceName = "Docker Hub" + HuggingFaceSourceName = "HuggingFace" +) + +// SearchResult represents a model found during search +type SearchResult struct { + Name string // Full model reference (e.g., "ai/llama3.2" or "hf.co/org/model") + Description string // Short description + Downloads int64 // Download/pull count + Stars int64 // Star/like count + Source string // "Docker Hub" or "HuggingFace" + Official bool // Whether this is an official model + UpdatedAt string // Last update timestamp + Backend string // Backend type: "llama.cpp", "vllm", or "llama.cpp, vllm" if both +} + +// SearchOptions configures the search behavior +type SearchOptions struct { + Query string // Search term (empty = list all) + Limit int // Maximum results per source; aggregated clients may also apply this as a global cap after merging results +} + +// SearchClient defines the interface for searching a model registry +type SearchClient interface { + Search(ctx context.Context, opts SearchOptions) ([]SearchResult, error) + Name() string +} diff --git a/vllm_backend_stub.go b/vllm_backend_stub.go index 342abc19..47e7c431 100644 --- a/vllm_backend_stub.go +++ b/vllm_backend_stub.go @@ -14,4 +14,4 @@ func initVLLMBackend(log *logrus.Logger, modelManager *models.Manager) (inferenc func registerVLLMBackend(backends map[string]inference.Backend, backend inference.Backend) { // No-op when VLLM is disabled -} \ No newline at end of file +}