From 17ca6ed5406b260e69bbc0f51c2e98404dbe63fa Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sat, 20 Sep 2025 17:24:35 +0100 Subject: [PATCH] Implement llama-pull tool Complete llama-pull tool with documentation Signed-off-by: Eric Curtin --- tools/CMakeLists.txt | 1 + tools/pull/CMakeLists.txt | 8 ++++ tools/pull/README.md | 43 ++++++++++++++++++++ tools/pull/pull.cpp | 84 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tools/pull/CMakeLists.txt create mode 100644 tools/pull/README.md create mode 100644 tools/pull/pull.cpp diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index d64956b843851..cd5ef94c0fbb0 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -18,6 +18,7 @@ else() add_subdirectory(gguf-split) add_subdirectory(imatrix) add_subdirectory(llama-bench) + add_subdirectory(pull) add_subdirectory(main) add_subdirectory(perplexity) add_subdirectory(quantize) diff --git a/tools/pull/CMakeLists.txt b/tools/pull/CMakeLists.txt new file mode 100644 index 0000000000000..5f0c9796664d2 --- /dev/null +++ b/tools/pull/CMakeLists.txt @@ -0,0 +1,8 @@ +set(TARGET llama-pull) +add_executable(${TARGET} pull.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) +endif() diff --git a/tools/pull/README.md b/tools/pull/README.md new file mode 100644 index 0000000000000..ceb02fd991c11 --- /dev/null +++ b/tools/pull/README.md @@ -0,0 +1,43 @@ +# llama-pull - Model Download Tool + +A command-line tool for downloading AI models from HuggingFace and Docker Hub for use with llama.cpp. + +## Usage + +```bash +# Download from HuggingFace +llama-pull -hf /[:] + +# Download from Docker Hub +llama-pull -dr [/][:] +``` + +## Options + +- `-hf, --hf-repo REPO` - Download model from HuggingFace repository +- `-dr, --docker-repo REPO` - Download model from Docker Hub +- `--hf-token TOKEN` - HuggingFace token for private repositories +- `-h, --help` - Show help message + +## Examples + +```bash +# Download a HuggingFace model +llama-pull -hf microsoft/DialoGPT-medium + +# Download a Docker model (ai/ repo is default) +llama-pull -dr gemma3 + +# Download with specific quantization +llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M +``` + +## Model Storage + +Downloaded models are stored in the standard llama.cpp cache directory: +- Linux/macOS: `~/.cache/llama.cpp/` +- The models can then be used with other llama.cpp tools + +## Requirements + +- Built with `LLAMA_USE_CURL=ON` (default) for download functionality diff --git a/tools/pull/pull.cpp b/tools/pull/pull.cpp new file mode 100644 index 0000000000000..6505529ea84cc --- /dev/null +++ b/tools/pull/pull.cpp @@ -0,0 +1,84 @@ +#include "arg.h" +#include "common.h" +#include "log.h" + +#include +#include + +static void print_usage(int, char ** argv) { + LOG("Usage: %s [options]\n", argv[0]); + LOG("\n"); + LOG("Download models from HuggingFace or Docker Hub\n"); + LOG("\n"); + LOG("Options:\n"); + LOG(" -h, --help show this help message and exit\n"); + LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repository\n"); + LOG(" format: /[:]\n"); + LOG(" example: microsoft/DialoGPT-medium\n"); + LOG(" -dr, --docker-repo REPO download model from Docker Hub\n"); + LOG(" format: [/][:]\n"); + LOG(" example: gemma3\n"); + LOG(" -o, --output PATH output path for downloaded model\n"); + LOG(" (default: cache directory)\n"); + LOG(" --hf-token TOKEN HuggingFace token for private repositories\n"); + LOG("\n"); + LOG("Examples:\n"); + LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]); + LOG(" %s -dr gemma3\n", argv[0]); + LOG(" %s -hf microsoft/DialoGPT-medium -o ./my-model.gguf\n", argv[0]); + LOG("\n"); +} + +int main(int argc, char ** argv) { + common_params params; + + // Set up argument parsing context + auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage); + + // Parse command line arguments + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { + print_usage(argc, argv); + return 1; + } + + // Check if help was requested or no download option provided + if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) { + LOG_ERR("error: must specify either -hf or -dr \n"); + print_usage(argc, argv); + return 1; + } + + // Both cannot be specified at the same time + if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) { + LOG_ERR("error: cannot specify both -hf and -dr options\n"); + print_usage(argc, argv); + return 1; + } + + // Initialize llama backend for download functionality + llama_backend_init(); + + LOG_INF("llama-pull: downloading model...\n"); + + try { + // Use the existing model handling logic which downloads the model + common_init_result llama_init = common_init_from_params(params); + + if (llama_init.model != nullptr) { + LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str()); + + // We only want to download, not keep the model loaded + // The download happens during common_init_from_params + } else { + LOG_ERR("Failed to download or load model\n"); + return 1; + } + } catch (const std::exception & e) { + LOG_ERR("Error: %s\n", e.what()); + return 1; + } + + // Clean up + llama_backend_free(); + return 0; +}