remove old llama.cpp submodules

randykerber · Oct 5, 2023 · d87573e · d87573e
1 parent cc6db61
commit d87573e
Show file tree

Hide file tree

Showing 5 changed files with 0 additions and 20 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,9 +1,3 @@
-[submodule "llama.cpp-230519"]
-	path = gpt4all-backend/llama.cpp-230519
-	url = https://github.com/ggerganov/llama.cpp.git
-[submodule "llama.cpp-230511"]
-	path = gpt4all-backend/llama.cpp-230511
-	url = https://github.com/nomic-ai/llama.cpp
 [submodule "llama.cpp-mainline"]
 	path = gpt4all-backend/llama.cpp-mainline
 	url = https://github.com/nomic-ai/llama.cpp.git

diff --git a/gpt4all-backend/llama.cpp-230511 b/gpt4all-backend/llama.cpp-230511
diff --git a/gpt4all-backend/llama.cpp-230519 b/gpt4all-backend/llama.cpp-230519
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
@@ -39,15 +39,10 @@ const char *modelType_ = "LLaMA";
 struct gpt_params {
     int32_t seed          = -1;   // RNG seed
     int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
-#if LLAMA_DATE <= 230511
-    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
-#endif
 
-#if LLAMA_DATE >= 230519
     // sampling parameters
     float   tfs_z         = 1.0f; // 1.0 = disabled
     float   typical_p     = 1.0f; // 1.0 = disabled
-#endif
 
     std::string prompt = "";
 
@@ -57,7 +52,6 @@ struct gpt_params {
     bool use_mlock         = false; // use mlock to keep model in memory
 };
 
-#if LLAMA_DATE >= 230519
 static int llama_sample_top_p_top_k(
         llama_context *ctx,
         const llama_token *last_n_tokens_data,
@@ -85,7 +79,6 @@ static int llama_sample_top_p_top_k(
     llama_sample_temperature(ctx, &candidates_p, temp);
     return llama_sample_token(ctx, &candidates_p);
 }
-#endif
 
 struct LLamaPrivate {
     const std::string modelPath;
@@ -150,9 +143,6 @@ bool LLamaModel::loadModel(const std::string &modelPath)
 #else
     d_ptr->params.use_mlock  = params.use_mlock;
 #endif
-#if LLAMA_DATE <= 230511
-    d_ptr->params.n_parts  = params.n_parts;
-#endif
 #ifdef GGML_USE_METAL
     std::cerr << "llama.cpp: using Metal" << std::endl;
     // metal always runs the whole model if n_gpu_layers is not 0, at least

diff --git a/gpt4all-backend/llmodel_shared.h b/gpt4all-backend/llmodel_shared.h
@@ -80,7 +80,6 @@ struct llm_kv_cache {
     }
 };
 
-#if LLAMA_DATE >= 230519
 inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads) {
     struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
     if (plan.work_size > 0) {
@@ -89,4 +88,3 @@ inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_t
     }
     ggml_graph_compute(graph, &plan);
 }
-#endif