6
6
7
7
#include < map>
8
8
#include < cassert>
9
+ #include < sstream>
9
10
#include < stdexcept>
10
11
11
12
// vec
@@ -163,13 +164,38 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
163
164
164
165
// check metadata
165
166
{
167
+ const gguf_context * gguf_ctx = ctx_gguf.get ();
168
+
169
+ LLAMA_LOG_INFO (" %s: Dumping metadata keys/values.\n " , __func__);
170
+
171
+ // get metadata as string
172
+ for (int i = 0 ; i < gguf_get_n_kv (gguf_ctx); i++) {
173
+ gguf_type type = gguf_get_kv_type (gguf_ctx, i);
174
+ const std::string type_name =
175
+ type == GGUF_TYPE_ARRAY
176
+ ? format (" %s[%s,%zu]" , gguf_type_name (type), gguf_type_name (gguf_get_arr_type (gguf_ctx, i)), gguf_get_arr_n (gguf_ctx, i))
177
+ : gguf_type_name (type);
178
+ const char * name = gguf_get_key (gguf_ctx, i);
179
+ const std::string value = gguf_kv_to_str (gguf_ctx, i);
180
+
181
+ if (type != GGUF_TYPE_ARRAY) {
182
+ adapter.gguf_kv .emplace (name, value);
183
+ }
184
+
185
+ const size_t MAX_VALUE_LEN = 40 ;
186
+ std::string print_value = value.size () > MAX_VALUE_LEN ? format (" %s..." , value.substr (0 , MAX_VALUE_LEN - 3 ).c_str ()) : value;
187
+ replace_all (print_value, " \n " , " \\ n" );
188
+
189
+ LLAMA_LOG_INFO (" %s: - kv %3d: %42s %-16s = %s\n " , __func__, i, name, type_name.c_str (), print_value.c_str ());
190
+ }
191
+
166
192
auto get_kv_str = [&](const std::string & key) -> std::string {
167
- int id = gguf_find_key (ctx_gguf. get () , key.c_str ());
168
- return id < 0 ? " " : std::string (gguf_get_val_str (ctx_gguf. get () , id));
193
+ int id = gguf_find_key (gguf_ctx , key.c_str ());
194
+ return id < 0 ? " " : std::string (gguf_get_val_str (gguf_ctx , id));
169
195
};
170
196
auto get_kv_f32 = [&](const std::string & key) -> float {
171
- int id = gguf_find_key (ctx_gguf. get () , key.c_str ());
172
- return id < 0 ? 0 .0f : gguf_get_val_f32 (ctx_gguf. get () , id);
197
+ int id = gguf_find_key (gguf_ctx , key.c_str ());
198
+ return id < 0 ? 0 .0f : gguf_get_val_f32 (gguf_ctx , id);
173
199
};
174
200
LLM_KV llm_kv = LLM_KV (LLM_ARCH_UNKNOWN);
175
201
@@ -190,6 +216,26 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
190
216
}
191
217
192
218
adapter.alpha = get_kv_f32 (llm_kv (LLM_KV_ADAPTER_LORA_ALPHA));
219
+
220
+ // parse alora invocation sequence vector
221
+ const auto & key = llm_kv (LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS);
222
+ const int kid = gguf_find_key (ctx_gguf.get (), key.c_str ());
223
+ if (kid >= 0 ) {
224
+ if (gguf_get_kv_type (ctx_gguf.get (), kid) != GGUF_TYPE_ARRAY) {
225
+ throw std::runtime_error (" invalid gguf type for " + key);
226
+ }
227
+ const auto arr_type = gguf_get_arr_type (ctx_gguf.get (), kid);
228
+ if (arr_type != GGUF_TYPE_UINT32) {
229
+ throw std::runtime_error (" invalid gguf element type for " + key);
230
+ }
231
+ const size_t seq_len = gguf_get_arr_n (ctx_gguf.get (), kid);
232
+ const void * data = gguf_get_arr_data (ctx_gguf.get (), kid);
233
+ adapter.alora_invocation_tokens .resize (seq_len);
234
+ std::copy (
235
+ (const llama_token *)data,
236
+ (const llama_token *)data + seq_len,
237
+ adapter.alora_invocation_tokens .begin ());
238
+ }
193
239
}
194
240
195
241
int n_tensors = gguf_get_n_tensors (ctx_gguf.get ());
@@ -383,6 +429,57 @@ llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * p
383
429
return nullptr ;
384
430
}
385
431
432
+ int32_t llama_adapter_meta_val_str (const llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size) {
433
+ const auto & it = adapter->gguf_kv .find (key);
434
+ if (it == adapter->gguf_kv .end ()) {
435
+ if (buf_size > 0 ) {
436
+ buf[0 ] = ' \0 ' ;
437
+ }
438
+ return -1 ;
439
+ }
440
+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
441
+ }
442
+
443
+ int32_t llama_adapter_meta_count (const llama_adapter_lora * adapter) {
444
+ return (int )adapter->gguf_kv .size ();
445
+ }
446
+
447
+ int32_t llama_adapter_meta_key_by_index (const llama_adapter_lora * adapter, int i, char * buf, size_t buf_size) {
448
+ if (i < 0 || i >= (int )adapter->gguf_kv .size ()) {
449
+ if (buf_size > 0 ) {
450
+ buf[0 ] = ' \0 ' ;
451
+ }
452
+ return -1 ;
453
+ }
454
+ auto it = adapter->gguf_kv .begin ();
455
+ std::advance (it, i);
456
+ return snprintf (buf, buf_size, " %s" , it->first .c_str ());
457
+ }
458
+
459
+ int32_t llama_adapter_meta_val_str_by_index (const llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size) {
460
+ if (i < 0 || i >= (int )adapter->gguf_kv .size ()) {
461
+ if (buf_size > 0 ) {
462
+ buf[0 ] = ' \0 ' ;
463
+ }
464
+ return -1 ;
465
+ }
466
+ auto it = adapter->gguf_kv .begin ();
467
+ std::advance (it, i);
468
+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
469
+ }
470
+
386
471
void llama_adapter_lora_free (llama_adapter_lora * adapter) {
387
472
delete adapter;
388
473
}
474
+
475
+ uint64_t llama_adapter_get_alora_n_invocation_tokens (const struct llama_adapter_lora * adapter) {
476
+ if (!adapter) {
477
+ return 0 ;
478
+ }
479
+ return adapter->alora_invocation_tokens .size ();
480
+ }
481
+
482
+ const llama_token * llama_adapter_get_alora_invocation_tokens (const llama_adapter_lora * adapter) {
483
+ GGML_ASSERT (adapter);
484
+ return adapter->alora_invocation_tokens .data ();
485
+ }
0 commit comments