Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ else()
add_subdirectory(gguf-split)
add_subdirectory(imatrix)
add_subdirectory(llama-bench)
add_subdirectory(pull)
add_subdirectory(main)
add_subdirectory(perplexity)
add_subdirectory(quantize)
Expand Down
8 changes: 8 additions & 0 deletions tools/pull/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
set(TARGET llama-pull)
add_executable(${TARGET} pull.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()
43 changes: 43 additions & 0 deletions tools/pull/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# llama-pull - Model Download Tool

A command-line tool for downloading AI models from HuggingFace and Docker Hub for use with llama.cpp.

## Usage

```bash
# Download from HuggingFace
llama-pull -hf <user>/<model>[:<quant>]

# Download from Docker Hub
llama-pull -dr [<repo>/]<model>[:<quant>]
```

## Options

- `-hf, --hf-repo REPO` - Download model from HuggingFace repository
- `-dr, --docker-repo REPO` - Download model from Docker Hub
- `--hf-token TOKEN` - HuggingFace token for private repositories
- `-h, --help` - Show help message

## Examples

```bash
# Download a HuggingFace model
llama-pull -hf microsoft/DialoGPT-medium

# Download a Docker model (ai/ repo is default)
llama-pull -dr gemma3

# Download with specific quantization
llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M
```

## Model Storage

Downloaded models are stored in the standard llama.cpp cache directory:
- Linux/macOS: `~/.cache/llama.cpp/`
- The models can then be used with other llama.cpp tools

## Requirements

- Built with `LLAMA_USE_CURL=ON` (default) for download functionality
84 changes: 84 additions & 0 deletions tools/pull/pull.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#include "arg.h"
#include "common.h"
#include "log.h"

#include <cstdio>
#include <string>

static void print_usage(int, char ** argv) {
LOG("Usage: %s [options]\n", argv[0]);
LOG("\n");
LOG("Download models from HuggingFace or Docker Hub\n");
LOG("\n");
LOG("Options:\n");
LOG(" -h, --help show this help message and exit\n");
LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repository\n");
LOG(" format: <user>/<model>[:<quant>]\n");
LOG(" example: microsoft/DialoGPT-medium\n");
LOG(" -dr, --docker-repo REPO download model from Docker Hub\n");
LOG(" format: [<repo>/]<model>[:<quant>]\n");
LOG(" example: gemma3\n");
LOG(" -o, --output PATH output path for downloaded model\n");
LOG(" (default: cache directory)\n");
Comment on lines +21 to +22
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure that the -o is currently handled this way?

LOG(" --hf-token TOKEN HuggingFace token for private repositories\n");
LOG("\n");
LOG("Examples:\n");
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
LOG(" %s -dr gemma3\n", argv[0]);
LOG(" %s -hf microsoft/DialoGPT-medium -o ./my-model.gguf\n", argv[0]);
LOG("\n");
}

int main(int argc, char ** argv) {
common_params params;

// Set up argument parsing context
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);

// Parse command line arguments
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
print_usage(argc, argv);
return 1;
}

// Check if help was requested or no download option provided
if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
print_usage(argc, argv);
return 1;
}

// Both cannot be specified at the same time
if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
LOG_ERR("error: cannot specify both -hf and -dr options\n");
print_usage(argc, argv);
return 1;
}
Comment on lines +52 to +56
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be checked inside common_params_parse, right?


// Initialize llama backend for download functionality
llama_backend_init();
Comment on lines +58 to +59
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need to initialize the inference backend to download the model?


LOG_INF("llama-pull: downloading model...\n");

try {
// Use the existing model handling logic which downloads the model
common_init_result llama_init = common_init_from_params(params);

if (llama_init.model != nullptr) {
LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());

// We only want to download, not keep the model loaded
// The download happens during common_init_from_params
} else {
LOG_ERR("Failed to download or load model\n");
return 1;
}
} catch (const std::exception & e) {
LOG_ERR("Error: %s\n", e.what());
return 1;
}

// Clean up
llama_backend_free();
return 0;
}
Loading