From 22b939f59026b9da78a8aa5c653700a766059dc0 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Fri, 25 Aug 2023 01:46:07 +0300 Subject: [PATCH] Adding llama-2-7b --- .gitignore | 1 + backend-server/.gitignore | 1 + backend-server/public/.DS_Store | Bin 6148 -> 0 bytes dj_backend_server/.gitignore | 3 ++- dj_backend_server/api/utils/get_embeddings.py | 8 +++++++- dj_backend_server/api/utils/get_openai_llm.py | 4 ++-- dj_backend_server/api/utils/make_chain.py | 3 +-- dj_backend_server/readme.md | 6 +++++- llm-server/.gitignore | 2 +- 9 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 .gitignore delete mode 100644 backend-server/public/.DS_Store diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..496ee2ca --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store \ No newline at end of file diff --git a/backend-server/.gitignore b/backend-server/.gitignore index 7fe978f8..c23337e5 100644 --- a/backend-server/.gitignore +++ b/backend-server/.gitignore @@ -17,3 +17,4 @@ yarn-error.log /.fleet /.idea /.vscode +public/.DS_Store \ No newline at end of file diff --git a/backend-server/public/.DS_Store b/backend-server/public/.DS_Store deleted file mode 100644 index fc8018a5acaeb3451aa3e628610dae09ee7c34fd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKL2uJA6n^f?nyeu80Maf&1WO+nlcVt$hSIrjVF`17+J6OroAqI*OwBJz+J>pRGPVcgDX!)mtX zI#9_w+H^!Y<P z&1x@sf3A4@6ye=DdI(9QG^L35ho1~}yTWMFG5pRcK|h)@e3g_ctdA9z53dY;#dHD2 z1pQ4ajBgu^Gx?rVa0+3M*BQ0(pBdpipV+Z{2`6ci7oE;mRokjx+irLbuj#!Voap(W z7!|X;KbpSgrx#kM;bnOgzD(lzuyyl^&Wcfz#Zw_9Q4E(iuaYd%^S+*CQ7+tpY4Ci{ zAGUTEi|)hDUhrVQyW9&F2lu($-Cr(!@7A5WkDd=t)3Z!}z$jn}d$iaMi({fmGHB?e?Bzq*o|V0! zDCr&V55%2R&!Eel0#1Q-1-8s%i_ib3zrX*lliZV2z$x%wDWK|ygTp?CWY5-(!SPue tAiYCkW51O_p&+xzu@v|y-bE5aTfhZi&*I7;YGCe1KxA;4Q{b;E@B`4i*69EM diff --git a/dj_backend_server/.gitignore b/dj_backend_server/.gitignore index 533f178b..6244bad9 100644 --- a/dj_backend_server/.gitignore +++ b/dj_backend_server/.gitignore @@ -36,4 +36,5 @@ pip-log.txt pip-delete-this-directory.txt website_data_sources/* venv -open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin \ No newline at end of file +open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin +llama-2-7b-chat.ggmlv3.q4_K_M.bin \ No newline at end of file diff --git a/dj_backend_server/api/utils/get_embeddings.py b/dj_backend_server/api/utils/get_embeddings.py index 35861b81..8ad4d8ae 100644 --- a/dj_backend_server/api/utils/get_embeddings.py +++ b/dj_backend_server/api/utils/get_embeddings.py @@ -38,7 +38,13 @@ def get_openai_embedding(): def get_llama2_embedding(): """Gets embeddings using the llama2 embedding provider.""" - return LlamaCppEmbeddings(model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin") + return LlamaCppEmbeddings( + n_batch=512, + n_gpu_layers=1, + n_threads=4, + f16_kv=True, + model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin" + ) def choose_embedding_provider(): """Chooses and returns the appropriate embedding provider instance.""" diff --git a/dj_backend_server/api/utils/get_openai_llm.py b/dj_backend_server/api/utils/get_openai_llm.py index d2e0603e..ccf93dc0 100644 --- a/dj_backend_server/api/utils/get_openai_llm.py +++ b/dj_backend_server/api/utils/get_openai_llm.py @@ -15,7 +15,7 @@ def get_llama_llm(): # Callbacks support token-wise streaming callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) llm = LlamaCpp( - model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin", + model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin", n_gpu_layers=n_gpu_layers, n_batch=n_batch, n_ctx=4096, @@ -23,7 +23,7 @@ def get_llama_llm(): callback_manager=callback_manager, verbose=True, temperature=0.2, - use_mmap=True + n_threads=4 ) return llm diff --git a/dj_backend_server/api/utils/make_chain.py b/dj_backend_server/api/utils/make_chain.py index 1b9188b5..efeb80c0 100644 --- a/dj_backend_server/api/utils/make_chain.py +++ b/dj_backend_server/api/utils/make_chain.py @@ -21,9 +21,8 @@ def get_qa_chain(vector_store: VectorStore, mode, initial_prompt: str) -> Retrie chain_type_kwargs={"prompt": prompt}, return_source_documents=True ) - - return qa_chain + def getRetrievalQAWithSourcesChain(vector_store: VectorStore, mode, initial_prompt: str): llm = get_llm() chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vector_store.as_retriever()) diff --git a/dj_backend_server/readme.md b/dj_backend_server/readme.md index d73e81d6..199fbae4 100644 --- a/dj_backend_server/readme.md +++ b/dj_backend_server/readme.md @@ -139,4 +139,8 @@ This project is licensed under the XYZ License - see the [LICENSE](LICENSE) file --- -Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us. \ No newline at end of file +Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us. + + + +Download llama2-7b from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main \ No newline at end of file diff --git a/llm-server/.gitignore b/llm-server/.gitignore index 1759b24f..ad37bec2 100644 --- a/llm-server/.gitignore +++ b/llm-server/.gitignore @@ -37,4 +37,4 @@ yarn-error.log* next-env.d.ts #Notion_db -/Notion_DB +/Notion_DB \ No newline at end of file