Adding llama-2-7b

openchatai · Aug 24, 2023 · 22b939f · 22b939f
1 parent 7d3e93a
commit 22b939f
Show file tree

Hide file tree

Showing 9 changed files with 20 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/backend-server/.gitignore b/backend-server/.gitignore
@@ -17,3 +17,4 @@ yarn-error.log
 /.fleet
 /.idea
 /.vscode
+public/.DS_Store
diff --git a/backend-server/public/.DS_Store b/backend-server/public/.DS_Store
diff --git a/dj_backend_server/.gitignore b/dj_backend_server/.gitignore
@@ -36,4 +36,5 @@ pip-log.txt
 pip-delete-this-directory.txt
 website_data_sources/*
 venv
-open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
+open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
+llama-2-7b-chat.ggmlv3.q4_K_M.bin
diff --git a/dj_backend_server/api/utils/get_embeddings.py b/dj_backend_server/api/utils/get_embeddings.py
@@ -38,7 +38,13 @@ def get_openai_embedding():
 
 def get_llama2_embedding():
     """Gets embeddings using the llama2 embedding provider."""
-    return LlamaCppEmbeddings(model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin")
+    return LlamaCppEmbeddings(
+        n_batch=512,
+        n_gpu_layers=1,
+        n_threads=4,
+        f16_kv=True,
+        model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin"
+    )
 
 def choose_embedding_provider():
     """Chooses and returns the appropriate embedding provider instance."""

diff --git a/dj_backend_server/api/utils/get_openai_llm.py b/dj_backend_server/api/utils/get_openai_llm.py
@@ -15,15 +15,15 @@ def get_llama_llm():
     # Callbacks support token-wise streaming
     callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
     llm = LlamaCpp(
-        model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin",
+        model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin",
         n_gpu_layers=n_gpu_layers,
         n_batch=n_batch,
         n_ctx=4096,
         f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
         callback_manager=callback_manager,
         verbose=True,
         temperature=0.2,
-        use_mmap=True
+        n_threads=4
     )
 
     return llm

diff --git a/dj_backend_server/api/utils/make_chain.py b/dj_backend_server/api/utils/make_chain.py
@@ -21,9 +21,8 @@ def get_qa_chain(vector_store: VectorStore, mode, initial_prompt: str) -> Retrie
         chain_type_kwargs={"prompt": prompt},
         return_source_documents=True
     )
-
-
     return qa_chain
+
 def getRetrievalQAWithSourcesChain(vector_store: VectorStore, mode, initial_prompt: str):
     llm = get_llm()
     chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vector_store.as_retriever())

diff --git a/dj_backend_server/readme.md b/dj_backend_server/readme.md
@@ -139,4 +139,8 @@ This project is licensed under the XYZ License - see the [LICENSE](LICENSE) file
 
 ---
 
-Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us.
+Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us.
+
+
+
+Download llama2-7b from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
diff --git a/llm-server/.gitignore b/llm-server/.gitignore
@@ -37,4 +37,4 @@ yarn-error.log*
 next-env.d.ts
 
 #Notion_db
-/Notion_DB
+/Notion_DB
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,3 +17,4 @@ yarn-error.log @@
     /.fleet
     /.idea
     /.vscode
+    public/.DS_Store