diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..496ee2ca --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store \ No newline at end of file diff --git a/backend-server/.gitignore b/backend-server/.gitignore index 7fe978f8..c23337e5 100644 --- a/backend-server/.gitignore +++ b/backend-server/.gitignore @@ -17,3 +17,4 @@ yarn-error.log /.fleet /.idea /.vscode +public/.DS_Store \ No newline at end of file diff --git a/backend-server/public/.DS_Store b/backend-server/public/.DS_Store deleted file mode 100644 index fc8018a5..00000000 Binary files a/backend-server/public/.DS_Store and /dev/null differ diff --git a/dj_backend_server/.gitignore b/dj_backend_server/.gitignore index 533f178b..6244bad9 100644 --- a/dj_backend_server/.gitignore +++ b/dj_backend_server/.gitignore @@ -36,4 +36,5 @@ pip-log.txt pip-delete-this-directory.txt website_data_sources/* venv -open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin \ No newline at end of file +open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin +llama-2-7b-chat.ggmlv3.q4_K_M.bin \ No newline at end of file diff --git a/dj_backend_server/api/utils/get_embeddings.py b/dj_backend_server/api/utils/get_embeddings.py index 35861b81..8ad4d8ae 100644 --- a/dj_backend_server/api/utils/get_embeddings.py +++ b/dj_backend_server/api/utils/get_embeddings.py @@ -38,7 +38,13 @@ def get_openai_embedding(): def get_llama2_embedding(): """Gets embeddings using the llama2 embedding provider.""" - return LlamaCppEmbeddings(model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin") + return LlamaCppEmbeddings( + n_batch=512, + n_gpu_layers=1, + n_threads=4, + f16_kv=True, + model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin" + ) def choose_embedding_provider(): """Chooses and returns the appropriate embedding provider instance.""" diff --git a/dj_backend_server/api/utils/get_openai_llm.py b/dj_backend_server/api/utils/get_openai_llm.py index d2e0603e..ccf93dc0 100644 --- a/dj_backend_server/api/utils/get_openai_llm.py +++ b/dj_backend_server/api/utils/get_openai_llm.py @@ -15,7 +15,7 @@ def get_llama_llm(): # Callbacks support token-wise streaming callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) llm = LlamaCpp( - model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin", + model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin", n_gpu_layers=n_gpu_layers, n_batch=n_batch, n_ctx=4096, @@ -23,7 +23,7 @@ def get_llama_llm(): callback_manager=callback_manager, verbose=True, temperature=0.2, - use_mmap=True + n_threads=4 ) return llm diff --git a/dj_backend_server/api/utils/make_chain.py b/dj_backend_server/api/utils/make_chain.py index 1b9188b5..efeb80c0 100644 --- a/dj_backend_server/api/utils/make_chain.py +++ b/dj_backend_server/api/utils/make_chain.py @@ -21,9 +21,8 @@ def get_qa_chain(vector_store: VectorStore, mode, initial_prompt: str) -> Retrie chain_type_kwargs={"prompt": prompt}, return_source_documents=True ) - - return qa_chain + def getRetrievalQAWithSourcesChain(vector_store: VectorStore, mode, initial_prompt: str): llm = get_llm() chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vector_store.as_retriever()) diff --git a/dj_backend_server/readme.md b/dj_backend_server/readme.md index d73e81d6..199fbae4 100644 --- a/dj_backend_server/readme.md +++ b/dj_backend_server/readme.md @@ -139,4 +139,8 @@ This project is licensed under the XYZ License - see the [LICENSE](LICENSE) file --- -Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us. \ No newline at end of file +Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us. + + + +Download llama2-7b from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main \ No newline at end of file diff --git a/llm-server/.gitignore b/llm-server/.gitignore index 1759b24f..ad37bec2 100644 --- a/llm-server/.gitignore +++ b/llm-server/.gitignore @@ -37,4 +37,4 @@ yarn-error.log* next-env.d.ts #Notion_db -/Notion_DB +/Notion_DB \ No newline at end of file