Update llama.cpp

abetlen · abetlen · commit c0fc0a1e824a · 2023-12-13T21:43:16.000-05:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -405,7 +405,7 @@ class llama_model_params(Structure):
 
 #     // Keep the booleans together to avoid misalignment during copy-by-value.
 #     bool mul_mat_q;   // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
-#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one
+#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
 #     bool embedding;   // embedding mode only
 #     bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
 # };
@@ -430,7 +430,7 @@ class llama_context_params(Structure):
         type_v (int): data type for V cache
         mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
         f16_kv (bool): use fp16 for KV cache, fp32 otherwise
-        logits_all (bool): the llama_eval() call computes all logits, not just the last one
+        logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
         embedding (bool): embedding mode only"""
     _fields_ = [
         ("seed", c_uint32),
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 8a7b2fa528f130631a5f43648481596ab320ed5a
+Subproject commit 948ff137ec37f1ec74c02905917fa0afc9b97514