Skip to content
This repository has been archived by the owner on Jan 5, 2025. It is now read-only.

Commit

Permalink
Adding llama-2-7b
Browse files Browse the repository at this point in the history
  • Loading branch information
codebanesr authored and codebanesr committed Aug 24, 2023
1 parent 7d3e93a commit 22b939f
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.DS_Store
1 change: 1 addition & 0 deletions backend-server/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ yarn-error.log
/.fleet
/.idea
/.vscode
public/.DS_Store
Binary file removed backend-server/public/.DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion dj_backend_server/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ pip-log.txt
pip-delete-this-directory.txt
website_data_sources/*
venv
open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
llama-2-7b-chat.ggmlv3.q4_K_M.bin
8 changes: 7 additions & 1 deletion dj_backend_server/api/utils/get_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ def get_openai_embedding():

def get_llama2_embedding():
"""Gets embeddings using the llama2 embedding provider."""
return LlamaCppEmbeddings(model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin")
return LlamaCppEmbeddings(
n_batch=512,
n_gpu_layers=1,
n_threads=4,
f16_kv=True,
model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin"
)

def choose_embedding_provider():
"""Chooses and returns the appropriate embedding provider instance."""
Expand Down
4 changes: 2 additions & 2 deletions dj_backend_server/api/utils/get_openai_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ def get_llama_llm():
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
model_path="open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin",
model_path="/Users/shanurrahman/Documents/vsearch/dj_backend_server/llama-2-7b-chat.ggmlv3.q4_K_M.bin",
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
n_ctx=4096,
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
callback_manager=callback_manager,
verbose=True,
temperature=0.2,
use_mmap=True
n_threads=4
)

return llm
Expand Down
3 changes: 1 addition & 2 deletions dj_backend_server/api/utils/make_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ def get_qa_chain(vector_store: VectorStore, mode, initial_prompt: str) -> Retrie
chain_type_kwargs={"prompt": prompt},
return_source_documents=True
)


return qa_chain

def getRetrievalQAWithSourcesChain(vector_store: VectorStore, mode, initial_prompt: str):
llm = get_llm()
chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vector_store.as_retriever())
Expand Down
6 changes: 5 additions & 1 deletion dj_backend_server/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,8 @@ This project is licensed under the XYZ License - see the [LICENSE](LICENSE) file

---

Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us.
Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us.



Download llama2-7b from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
2 changes: 1 addition & 1 deletion llm-server/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ yarn-error.log*
next-env.d.ts

#Notion_db
/Notion_DB
/Notion_DB

0 comments on commit 22b939f

Please sign in to comment.