Merge pull request openchatai#150 from codebanesr/enhancement/llama

"PR: Add Support for llama-2-7b, Improve Execution Speed, and Clean Up Libraries"
lvalics · Aug 28, 2023 · af92976 · af92976
2 parents d12adc4 + 0b002dc
commit af92976
Show file tree

Hide file tree

Showing 18 changed files with 189 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/README.md b/README.md
@@ -144,16 +144,18 @@ make.bat
 ```
 
 Sure, here's the modified text with the additional line you requested:
-
-## Getting Started with the Openchat Django App
+# Getting Started with the Openchat Django App
 
 Start your adventure of contributing to and using OpenChat, now remade using the Python programming language. You can begin by following the instructions in the guide available here: [OpenChat Python Guide](docs/django_release.md).
 
 **Kindly be aware that the transition to the Python backend includes a significant alteration related to the Qdrant vector store, constituting a breaking change.**
 
 Once the installation is complete, you can access the OpenChat console at: http://localhost:8000
 
-Documentation [available here](https://docs.openchat.so/introduction)
+## 🚀 Unleash the Power of Native LLM
+Discover the latest addition: llama2 support. [Dive into this Guide to Harness LLAMA2 by Meta](docs/aug_26/readme.md) 📖🔮
+***
+### Full documentation [available here](https://docs.openchat.so/introduction)
 
 ## 🚀 Upgrade guide:
 

diff --git a/backend-server/.gitignore b/backend-server/.gitignore
@@ -17,3 +17,4 @@ yarn-error.log
 /.fleet
 /.idea
 /.vscode
+public/.DS_Store
diff --git a/backend-server/public/.DS_Store b/backend-server/public/.DS_Store
diff --git a/dj_backend_server/.dockerignore b/dj_backend_server/.dockerignore
@@ -0,0 +1,23 @@
+# Exclude version control directories/files
+.git
+.gitignore
+
+# Exclude temporary or build files
+__pycache__
+*.pyc
+*.pyo
+
+# Exclude editor-specific files
+.vscode
+
+# Exclude local development files
+*.log
+*.db
+
+# Exclude test files and test data
+tests
+test_data
+
+# Exclude any other files or directories that are not needed in the container
+venv
+llama-2-7b-chat.ggmlv3.q4_K_M.bin
diff --git a/dj_backend_server/.gitignore b/dj_backend_server/.gitignore
@@ -35,4 +35,6 @@ staticfiles
 pip-log.txt
 pip-delete-this-directory.txt
 website_data_sources/*
-venv
+venv
+open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
+llama-2-7b-chat.ggmlv3.q4_K_M.bin
diff --git a/dj_backend_server/.vscode/launch.json b/dj_backend_server/.vscode/launch.json
@@ -19,7 +19,6 @@
             "name": "Python: Celery Workers",
             "type": "python",
             "request": "launch",
-            // "program": "${workspaceFolder}/path_to_celery_executable",
             "module": "celery",
             "args": [
                 "-A",

diff --git a/dj_backend_server/Dockerfile b/dj_backend_server/Dockerfile
@@ -15,4 +15,4 @@ COPY . /app/
 RUN pip install --no-cache-dir -r requirements.txt
 
 # Run migrations on startup
-CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:8000"]
+CMD ["sh", "-c", "python manage.py sync_models && python manage.py runserver 0.0.0.0:8000"]
diff --git a/dj_backend_server/Makefile b/dj_backend_server/Makefile
@@ -1,23 +1,71 @@
 # Makefile to Run docker-compose for Django App
 
-# Check if Docker is installed
+# Check if Docker and Docker Compose are installed
 DOCKER := $(shell command -v docker 2> /dev/null)
 DOCKER_COMPOSE := $(shell command -v docker-compose 2> /dev/null)
+OS := $(shell uname)
 
 ifndef DOCKER
-$(error "Docker is not installed. Please install Docker before proceeding.")
+$(error $(shell tput setaf 1)"Docker is not installed. Please install Docker before proceeding."$(shell tput sgr0))
 endif
 
-ifndef DOCKER_COMPOSE
-$(error "Docker Compose is not installed. Please install Docker Compose before proceeding.")
+ifndef DOCKER_COMPOSE 
+$(error $(shell tput setaf 1)"Docker Compose is not installed. Please install Docker Compose before proceeding."$(shell tput sgr0))
 endif
 
 .env.docker:
-	@echo "Error: The .env.docker file is missing. Please create it before proceeding. Refer example.env.docker or readme file in dj_backend_server/readme.md"
+	@echo $(shell tput setaf 1)"Error: The .env.docker file is missing. Please create it before proceeding. Refer example.env.docker or readme file in dj_backend_server/readme.md"$(shell tput sgr0)
 	exit 1
 
-install: .env.docker
+venv:
+ifndef venv
+	ifeq ($(OS), Darwin)
+		@echo $(shell tput setaf 2)"Creating a virtual environment..."$(shell tput sgr0)
+		python3 -m venv venv
+	else
+		@echo $(shell tput setaf 2)"Creating a virtual environment..."$(shell tput sgr0)
+		python3 -m venv venv
+	endif
+endif
+
+activate-venv: 
+	@echo $(shell tput setaf 3)"Activating virtual environment..."$(shell tput sgr0)
+	. venv/bin/activate
+
+
+install-requirements: activate-venv
+	@echo $(shell tput setaf 2)"Installing Python dependencies..."$(shell tput sgr0)
+	pip install -r requirements.txt
+
+install: .env install-requirements
 	docker-compose up -d
 
 down:
 	docker-compose down
+
+ifeq ($(OS), Darwin)  # macOS
+OPEN_COMMAND := open
+else ifeq ($(OS), Linux)  
+OPEN_COMMAND := xdg-open
+else
+OPEN_COMMAND := echo $(shell tput setaf 1)"Unsupported OS: $(OS)"$(shell tput sgr0)
+endif
+
+dev-start: 
+	docker-compose up -d
+	celery -A dj_backend_server worker --loglevel=info &
+	python manage.py sync_models && python manage.py runserver 0.0.0.0:8000
+
+	@echo $(shell tput setaf 3)"Waiting for 20 seconds before opening the browser..."$(shell tput sgr0)
+	sleep 20
+
+	$(OPEN_COMMAND) http://localhost:8000
+
+dev-stop:
+	docker-compose down
+	kill -9 $$(pgrep -f "celery -A dj_backend_server")
+	kill -9 $$(pgrep -f "python manage.py runserver")
+
+	@echo $$(tput setaf 3)"Services stopped."$$(tput sgr0)
+
+.PHONY: venv install-requirements install down dev-start dev-stop
diff --git a/dj_backend_server/api/enums/embedding_type.py b/dj_backend_server/api/enums/embedding_type.py
@@ -5,4 +5,5 @@ class EmbeddingProvider(Enum):
     OPENAI = "openai"
     BARD = "bard"
     azure = "azure"
+    llama2 = "llama2"
 
diff --git a/dj_backend_server/api/utils/get_embeddings.py b/dj_backend_server/api/utils/get_embeddings.py
@@ -3,10 +3,9 @@
 import os
 from dotenv import load_dotenv
 from langchain.embeddings.base import Embeddings
+from langchain.embeddings import LlamaCppEmbeddings
 
 load_dotenv()
-
-# https://github.com/easonlai/azure_openai_langchain_sample/blob/main/chat_with_pdf.ipynb
 import os
 
 
@@ -37,6 +36,10 @@ def get_openai_embedding():
 
     return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1)
 
+def get_llama2_embedding():
+    """Gets embeddings using the llama2 embedding provider."""
+    return LlamaCppEmbeddings(model_path="llama-2-7b-chat.ggmlv3.q4_K_M.bin")
+
 def choose_embedding_provider():
     """Chooses and returns the appropriate embedding provider instance."""
     embedding_provider = get_embedding_provider()
@@ -46,6 +49,10 @@ def choose_embedding_provider():
 
     elif embedding_provider == EmbeddingProvider.OPENAI.value:
         return get_openai_embedding()
+
+    elif embedding_provider == EmbeddingProvider.llama2.value:
+        return get_llama2_embedding()
+
 
     else:
         available_providers = ", ".join([service.value for service in EmbeddingProvider])

diff --git a/dj_backend_server/api/utils/get_openai_llm.py b/dj_backend_server/api/utils/get_openai_llm.py
@@ -1,8 +1,31 @@
 from langchain.llms import AzureOpenAI, OpenAI
 import os
 from dotenv import load_dotenv
-
+from langchain.llms import LlamaCpp
 load_dotenv()
+from langchain import PromptTemplate, LLMChain
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+
+
+def get_llama_llm():
+    n_gpu_layers = 1  # Metal set to 1 is enough.
+    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
+
+    # Callbacks support token-wise streaming
+    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+    llm = LlamaCpp(
+        model_path="llama-2-7b-chat.ggmlv3.q4_K_M.bin",
+        n_gpu_layers=n_gpu_layers,
+        n_batch=n_batch,
+        n_ctx=4096,
+        f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
+        callback_manager=callback_manager,
+        verbose=True,
+        temperature=0.2,
+    )
+
+    return llm
 
 # Azure OpenAI Language Model client
 def get_azure_openai_llm():
@@ -43,10 +66,11 @@ def get_llm():
 
     clients = {
         'azure': get_azure_openai_llm,
-        'openai': get_openai_llm
+        'openai': get_openai_llm,
+        'llama2': get_llama_llm
     }
 
-    api_type = os.environ.get('OPENAI_API_TYPE')
+    api_type = os.environ.get('LLM')
     if api_type not in clients:
         raise ValueError(f"Invalid OPENAI_API_TYPE: {api_type}")
 

diff --git a/dj_backend_server/api/utils/make_chain.py b/dj_backend_server/api/utils/make_chain.py
@@ -21,9 +21,8 @@ def get_qa_chain(vector_store: VectorStore, mode, initial_prompt: str) -> Retrie
         chain_type_kwargs={"prompt": prompt},
         return_source_documents=True
     )
-
-
     return qa_chain
+
 def getRetrievalQAWithSourcesChain(vector_store: VectorStore, mode, initial_prompt: str):
     llm = get_llm()
     chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vector_store.as_retriever())

diff --git a/dj_backend_server/dj_backend_server/settings.py b/dj_backend_server/dj_backend_server/settings.py
@@ -171,3 +171,8 @@
 }
 
 SESSION_ENGINE = 'django.contrib.sessions.backends.db'  # You can choose other engines as well
+
+ALLOWED_HOSTS = [
+    'localhost',
+    '0.0.0.0',
+]
diff --git a/dj_backend_server/docker-compose.yaml b/dj_backend_server/docker-compose.yaml
@@ -34,8 +34,9 @@ services:
   #     dockerfile: Dockerfile
   #   ports:
   #     - "8000:8000"
-  #   # volumes:
-  #   #   - .:/app
+  #   volumes:
+  #     - ./website_data_sources:/app/website_data_sources
+  #     - ./llama-2-7b-chat.ggmlv3.q4_K_M.bin:/app/llama-2-7b-chat.ggmlv3.q4_K_M.bin:ro
   #   depends_on:
   #     - mysql
   #   env_file:
@@ -45,9 +46,6 @@ services:
   #            python manage.py runserver 0.0.0.0:8000"
   #   networks:
   #     - openchat_network
-  #   dns:
-  #     - 8.8.8.8
-  #     - 8.8.4.4
 
   adminer:
     image: adminer
@@ -65,19 +63,16 @@ services:
   #     context: .
   #     dockerfile: Dockerfile
   #   container_name: myproject_celery
-  #   # volumes:
-  #   #   - .:/app
+  #   volumes:
+  #     - ./website_data_sources:/app/website_data_sources 
+  #     - ./llama-2-7b-chat.ggmlv3.q4_K_M.bin:/app/llama-2-7b-chat.ggmlv3.q4_K_M.bin:ro
   #   depends_on:
   #     - web
   #   env_file:
   #     - .env.docker
   #   command: celery -A dj_backend_server worker --loglevel=info
   #   networks:
   #     - openchat_network
-  #   dns:
-  #     - 8.8.8.8
-  #     - 8.8.4.4
-
 
   redis:
     image: redis:latest

diff --git a/dj_backend_server/readme.md b/dj_backend_server/readme.md
@@ -133,10 +133,7 @@ export DISABLE_SPRING=true
 
 We welcome contributions! If you find any issues or want to enhance the project, please create a pull request.
 
-## License
 
-This project is licensed under the XYZ License - see the [LICENSE](LICENSE) file for details.
+Download llama2-7b from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
 
----
-
-Thank you for choosing our project! If you have any questions or need further assistance, feel free to reach out to us.
+install the correct llama-python-cpp from this page https://python.langchain.com/docs/integrations/llms/llamacpp
diff --git a/docs/aug_26/readme.md b/docs/aug_26/readme.md
@@ -0,0 +1,49 @@
+## LLAMA SUPPORT FOR MAC M1/M2 devices <span style="color: red;">⚠️ **Experimental Warning** ⚠️</span>
+
+This repository offers <span style="color: #1E90FF;">LLAMA support</span> for Mac M1/M2 devices. LLAMA stands as an <span style="color: #008000;">advanced language model</span> developed by Meta. This comprehensive guide is intended to assist you in <span style="color: #FF8C00;">configuring and running LLAMA</span> on your Mac M1/M2 device by following the provided instructions. It's important to note that running LLAMA on Mac devices using Docker might not be straightforward due to <span style="color: #FF0000;">emulation constraints</span>, particularly related to accessing video drivers.
+
+Additionally, the current embedding speeds are quite low, although it's worth noting that the model's speed does improve over time.
+
+## Getting Started
+
+Follow these steps to set up LLAMA support on your Mac M1/M2 device:
+
+1. Clone this repository to your local machine.
+
+2. In the root directory of the repository, locate the `.env` file and open it in a text editor.
+
+3. Change the following two environment variables in the `.env` file:
+
+   ```dotenv
+   OPENAI_API_TYPE=llama2
+   EMBEDDING_PROVIDER=llama2
+   ```
+
+   These variables configure LLAMA as the API type and embedding provider.
+
+4. **Note**: Currently, the system supports only a specific combination of embedding and completion models. Future updates will provide more flexibility in choosing different models.
+
+5. **Note**: Docker images are not supported for Mac devices due to emulation limitations. As a result, you need to run the application using a virtual environment (virtualenv) for now.
+
+
+
+6. When working with Visual Studio Code, you have the option to leverage the debug scripts that are available to enhance your development process (you'll need to execute docker compose up -d to run the other docker containers). As another approach, you can employ the subsequent commands to initiate and halt the development server.
+   - To start the development server:
+
+     ```sh
+     make dev-start
+     ```
+
+   - To stop the development server:
+
+     ```sh
+     make dev-stop
+     ```
+
+## Future Updates
+
+We are continuously working on enhancing LLAMA support for Mac M1/M2 devices. Stay tuned for updates that will provide more options for embedding and completion models, as well as improved compatibility with different environments.
+
+For any issues or questions, please reach out to our support team or open an issue in this repository.
+
+Happy coding!
diff --git a/llm-server/.gitignore b/llm-server/.gitignore
@@ -37,4 +37,4 @@ yarn-error.log*
 next-env.d.ts
 
 #Notion_db
-/Notion_DB
+/Notion_DB
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,3 +17,4 @@ yarn-error.log @@
     /.fleet
     /.idea
     /.vscode
+    public/.DS_Store