Skip to content

Commit d94f214

Browse files
committed
talk-llama : sync llama.cpp
1 parent 66ad624 commit d94f214

33 files changed

+5180
-1184
lines changed

examples/talk-llama/llama-adapter.cpp

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <map>
88
#include <cassert>
9+
#include <sstream>
910
#include <stdexcept>
1011

1112
// vec
@@ -163,13 +164,38 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
163164

164165
// check metadata
165166
{
167+
const gguf_context * gguf_ctx = ctx_gguf.get();
168+
169+
LLAMA_LOG_INFO("%s: Dumping metadata keys/values.\n", __func__);
170+
171+
// get metadata as string
172+
for (int i = 0; i < gguf_get_n_kv(gguf_ctx); i++) {
173+
gguf_type type = gguf_get_kv_type(gguf_ctx, i);
174+
const std::string type_name =
175+
type == GGUF_TYPE_ARRAY
176+
? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(gguf_ctx, i)), gguf_get_arr_n(gguf_ctx, i))
177+
: gguf_type_name(type);
178+
const char * name = gguf_get_key(gguf_ctx, i);
179+
const std::string value = gguf_kv_to_str(gguf_ctx, i);
180+
181+
if (type != GGUF_TYPE_ARRAY) {
182+
adapter.gguf_kv.emplace(name, value);
183+
}
184+
185+
const size_t MAX_VALUE_LEN = 40;
186+
std::string print_value = value.size() > MAX_VALUE_LEN ? format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()) : value;
187+
replace_all(print_value, "\n", "\\n");
188+
189+
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), print_value.c_str());
190+
}
191+
166192
auto get_kv_str = [&](const std::string & key) -> std::string {
167-
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
168-
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf.get(), id));
193+
int id = gguf_find_key(gguf_ctx, key.c_str());
194+
return id < 0 ? "" : std::string(gguf_get_val_str(gguf_ctx, id));
169195
};
170196
auto get_kv_f32 = [&](const std::string & key) -> float {
171-
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
172-
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf.get(), id);
197+
int id = gguf_find_key(gguf_ctx, key.c_str());
198+
return id < 0 ? 0.0f : gguf_get_val_f32(gguf_ctx, id);
173199
};
174200
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
175201

@@ -190,6 +216,26 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
190216
}
191217

192218
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
219+
220+
// parse alora invocation sequence vector
221+
const auto & key = llm_kv(LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS);
222+
const int kid = gguf_find_key(ctx_gguf.get(), key.c_str());
223+
if (kid >= 0) {
224+
if (gguf_get_kv_type(ctx_gguf.get(), kid) != GGUF_TYPE_ARRAY) {
225+
throw std::runtime_error("invalid gguf type for " + key);
226+
}
227+
const auto arr_type = gguf_get_arr_type(ctx_gguf.get(), kid);
228+
if (arr_type != GGUF_TYPE_UINT32) {
229+
throw std::runtime_error("invalid gguf element type for " + key);
230+
}
231+
const size_t seq_len = gguf_get_arr_n(ctx_gguf.get(), kid);
232+
const void * data = gguf_get_arr_data(ctx_gguf.get(), kid);
233+
adapter.alora_invocation_tokens.resize(seq_len);
234+
std::copy(
235+
(const llama_token *)data,
236+
(const llama_token *)data + seq_len,
237+
adapter.alora_invocation_tokens.begin());
238+
}
193239
}
194240

195241
int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
@@ -383,6 +429,57 @@ llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * p
383429
return nullptr;
384430
}
385431

432+
int32_t llama_adapter_meta_val_str(const llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size) {
433+
const auto & it = adapter->gguf_kv.find(key);
434+
if (it == adapter->gguf_kv.end()) {
435+
if (buf_size > 0) {
436+
buf[0] = '\0';
437+
}
438+
return -1;
439+
}
440+
return snprintf(buf, buf_size, "%s", it->second.c_str());
441+
}
442+
443+
int32_t llama_adapter_meta_count(const llama_adapter_lora * adapter) {
444+
return (int)adapter->gguf_kv.size();
445+
}
446+
447+
int32_t llama_adapter_meta_key_by_index(const llama_adapter_lora * adapter, int i, char * buf, size_t buf_size) {
448+
if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
449+
if (buf_size > 0) {
450+
buf[0] = '\0';
451+
}
452+
return -1;
453+
}
454+
auto it = adapter->gguf_kv.begin();
455+
std::advance(it, i);
456+
return snprintf(buf, buf_size, "%s", it->first.c_str());
457+
}
458+
459+
int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size) {
460+
if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
461+
if (buf_size > 0) {
462+
buf[0] = '\0';
463+
}
464+
return -1;
465+
}
466+
auto it = adapter->gguf_kv.begin();
467+
std::advance(it, i);
468+
return snprintf(buf, buf_size, "%s", it->second.c_str());
469+
}
470+
386471
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
387472
delete adapter;
388473
}
474+
475+
uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
476+
if (!adapter) {
477+
return 0;
478+
}
479+
return adapter->alora_invocation_tokens.size();
480+
}
481+
482+
const llama_token * llama_adapter_get_alora_invocation_tokens(const llama_adapter_lora * adapter) {
483+
GGML_ASSERT(adapter);
484+
return adapter->alora_invocation_tokens.data();
485+
}

examples/talk-llama/llama-adapter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ struct llama_adapter_lora {
6767

6868
float alpha;
6969

70+
// gguf metadata
71+
std::unordered_map<std::string, std::string> gguf_kv;
72+
73+
// activated lora (aLoRA)
74+
std::vector<llama_token> alora_invocation_tokens;
75+
7076
llama_adapter_lora() = default;
7177
~llama_adapter_lora() = default;
7278

0 commit comments

Comments
 (0)