Revert "Updated 5_minutes_RAG_no_GPU (#239)" (#247)

This reverts commit 169abdd.
NVIDIA · Nov 17, 2024 · fc80791 · fc80791
1 parent c52f37f
commit fc80791
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 197 deletions.
diff --git a/community/5_mins_rag_no_gpu/.streamlit/config.toml b/community/5_mins_rag_no_gpu/.streamlit/config.toml
diff --git a/community/5_mins_rag_no_gpu/README.md b/community/5_mins_rag_no_gpu/README.md
@@ -1,13 +1,17 @@
-# Tutorial for a Generic RAG-Based Chatbot
+# RAG in 5 Minutes
 
-This is a tutorial for how to build your own generic RAG chatbot. It is intended as a foundation for building more complex, domain-specific RAG bots. Note that no GPU is needed to run this as it is using NIMs from the NVIDIA catalog.
+This implementation is tied to the [YouTube video on NVIDIA Developer](https://youtu.be/N_OOfkEWcOk).
 
-## Acknowledgements
+This is a simple standalone implementation showing a minimal RAG pipeline that uses models available from [NVIDIA API Catalog](https://catalog.ngc.nvidia.com/ai-foundation-models).
+The catalog enables you to experience state-of-the-art LLMs accelerated by NVIDIA.
+Developers get free credits for 10K requests to any of the models.
 
- - This implementation is based on [Rag in 5 Minutes](https://github.com/NVIDIA/GenerativeAIExamples/tree/4e86d75c813bcc41d4e92e430019053920d08c94/community/5_mins_rag_no_gpu), with changes primarily made to the UI.
- - Alyssa Sawyer also contributed to updating and further developing this repo during her intern project, [Resume RAG Bot](https://github.com/alysawyer/resume-rag-nv), at NVIDIA.
+The example uses an [integration package to LangChain](https://python.langchain.com/docs/integrations/providers/nvidia) to access the models.
+NVIDIA engineers develop, test, and maintain the open source integration.
+This example uses a simple [Streamlit](https://streamlit.io/) based user interface and has a one-file implementation.
+Because the example uses the models from the NVIDIA API Catalog, you do not need a GPU to run the example.
 
-## Steps
+### Steps
 
 1. Create a python virtual environment and activate it:
 
@@ -16,10 +20,10 @@ This is a tutorial for how to build your own generic RAG chatbot. It is intended
    source genai/bin/activate
    ```
 
-1. From the root of this repository, install the requirements:
+1. From the root of this repository, `GenerativeAIExamples`, install the requirements:
 
    ```console
-   pip install -r requirements.txt
+   pip install -r community/5_mins_rag_no_gpu/requirements.txt
    ```
 
 1. Add your NVIDIA API key as an environment variable:
@@ -28,15 +32,17 @@ This is a tutorial for how to build your own generic RAG chatbot. It is intended
    export NVIDIA_API_KEY="nvapi-*"
    ```
 
-   If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`. 
+   If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`.
 
 1. Run the example using Streamlit:
 
    ```console
-   streamlit run main.py
+   streamlit run community/5_mins_rag_no_gpu/main.py
    ```
 
 1. Test the deployed example by going to `http://<host_ip>:8501` in a web browser.
 
-   Click **Browse Files** and select the documents for your knowledge base.
-   After selecting, click **Upload!** to complete the ingestion process.
+   Click **Browse Files** and select your knowledge source.
+   After selecting, click **Upload!** to complete the ingestion process.
+
+You are all set now! Try out queries related to the knowledge base using text from the user interface.
diff --git a/community/5_mins_rag_no_gpu/main.py b/community/5_mins_rag_no_gpu/main.py
@@ -13,176 +13,110 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational Models.
+# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational models.
 # It uses a simple Streamlit UI and one file implementation of a minimalistic RAG pipeline.
 
-
-############################################
-# Component #0.5 - UI / Header
-############################################
-
 import streamlit as st
 import os
+from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_community.vectorstores import FAISS
+import pickle
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
 
-# Page settings 
-st.set_page_config(
-    layout="wide",
-    page_title="RAG Chatbot", 
-    page_icon = "🤖",
-    initial_sidebar_state="expanded")
-
-# Page title 
-st.header('Generic RAG Chatbot Demo 🤖📝', divider='rainbow')
-
-# Custom CSS
-def local_css(file_name):
-    with open(file_name, "r") as f:
-        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
-local_css("style.css")
-
-# Page description 
-st.markdown('''Manually looking through vast amounts of data can be tedious and time-consuming. This chatbot can expedite that process by providing a platform to query your documents.''')
-st.warning("This is a proof of concept, and any output from the AI agent should be used in conjunction with the original data.", icon="⚠️")
-
-############################################
-# Component #1 - Document Loader
-############################################
+st.set_page_config(layout="wide")
 
+# Component #1 - Document Upload
 with st.sidebar:
-    st.subheader("Upload Your Documents")
-
     DOCS_DIR = os.path.abspath("./uploaded_docs")
-
-    # Make dir to store uploaded documents
     if not os.path.exists(DOCS_DIR):
         os.makedirs(DOCS_DIR)
-
-    # Define form on Streamlit page for uploading files to KB
     st.subheader("Add to the Knowledge Base")
     with st.form("my-form", clear_on_submit=True):
         uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files=True)
         submitted = st.form_submit_button("Upload!")
 
-    # Acknowledge successful file uploads
     if uploaded_files and submitted:
         for uploaded_file in uploaded_files:
             st.success(f"File {uploaded_file.name} uploaded successfully!")
             with open(os.path.join(DOCS_DIR, uploaded_file.name), "wb") as f:
                 f.write(uploaded_file.read())
 
-############################################
-# Component #2 - Initalizing Embedding Model and LLM
-############################################
+# Component #2 - Embedding Model and LLM
+llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
+document_embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage")
 
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
-
-#Make sure to export your NGC NV-Developer API key as NVIDIA_API_KEY! 
-API_KEY = os.environ['NVIDIA_API_KEY']
-
-# Select embedding model and LLM
-document_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", api_key=API_KEY, model_type="passage", truncate="END")
-llm = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=API_KEY, temperature=0)
-
-############################################
 # Component #3 - Vector Database Store
-############################################
-
-import pickle
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import DirectoryLoader
-from langchain_community.vectorstores import FAISS
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.retrievers import BaseRetriever
-
-# Option for using an existing vector store
 with st.sidebar:
     use_existing_vector_store = st.radio("Use existing vector store if available", ["Yes", "No"], horizontal=True)
 
-# Load raw documents from the directory
-DOCS_DIR = os.path.abspath("./uploaded_docs")
+vector_store_path = "vectorstore.pkl"
 raw_documents = DirectoryLoader(DOCS_DIR).load()
 
-# Check for existing vector store file
-vector_store_path = "vectorstore.pkl"
 vector_store_exists = os.path.exists(vector_store_path)
 vectorstore = None
-
 if use_existing_vector_store == "Yes" and vector_store_exists:
-    # Load existing vector store
     with open(vector_store_path, "rb") as f:
         vectorstore = pickle.load(f)
     with st.sidebar:
-        st.info("Existing vector store loaded successfully.")
+        st.success("Existing vector store loaded successfully.")
 else:
     with st.sidebar:
         if raw_documents and use_existing_vector_store == "Yes":
-            # Chunk documents
             with st.spinner("Splitting documents into chunks..."):
-                text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)
+                text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=200)
                 documents = text_splitter.split_documents(raw_documents)
 
-            # Convert document chunks to embeddings, and save in a vector store
             with st.spinner("Adding document chunks to vector database..."):
                 vectorstore = FAISS.from_documents(documents, document_embedder)
 
-            # Save vector store
             with st.spinner("Saving vector store"):
                 with open(vector_store_path, "wb") as f:
                     pickle.dump(vectorstore, f)
             st.success("Vector store created and saved.")
         else:
             st.warning("No documents available to process!", icon="⚠️")
 
-############################################
 # Component #4 - LLM Response Generation and Chat
-############################################
-
-st.subheader("Query your data")
+st.subheader("Chat with your AI Assistant, Envie!")
 
-# Save chat history for this user session
 if "messages" not in st.session_state:
     st.session_state.messages = []
 
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 
-# Define prompt for LLM
 prompt_template = ChatPromptTemplate.from_messages([
-    ("system", "You are a helpful AI assistant. Use the provided context to inform your responses. If no context is available, please state that."),
+    ("system", "You are a helpful AI assistant named Envie. If provided with context, use it to inform your responses. If no context is available, use your general knowledge to provide a helpful response."),
     ("human", "{input}")
 ])
 
-# Define simple prompt chain 
 chain = prompt_template | llm | StrOutputParser()
 
-# Display an example query for user 
-user_query = st.chat_input("Please summarize these documents.")
+user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
 
-if user_query:
-    st.session_state.messages.append({"role": "user", "content": user_query})
+if user_input:
+    st.session_state.messages.append({"role": "user", "content": user_input})
     with st.chat_message("user"):
-        st.markdown(user_query)
+        st.markdown(user_input)
 
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         full_response = ""
 
         if vectorstore is not None and use_existing_vector_store == "Yes":
-            # Retrieve relevant chunks for the given user query from the vector store
             retriever = vectorstore.as_retriever()
-            retrieved_docs = retriever.invoke(user_query)
-
-            # Concatenate retrieved chunks together as context for LLM
-            context = "\n\n".join([doc.page_content for doc in retrieved_docs])
-            augmented_user_input = f"Context: {context}\n\nQuestion: {user_query}\n"
+            docs = retriever.invoke(user_input)
+            context = "\n\n".join([doc.page_content for doc in docs])
+            augmented_user_input = f"Context: {context}\n\nQuestion: {user_input}\n"
         else:
-            augmented_user_input = f"Question: {user_query}\n"
+            augmented_user_input = f"Question: {user_input}\n"
 
-        # Get output from LLM
         for response in chain.stream({"input": augmented_user_input}):
             full_response += response
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)
-    st.session_state.messages.append({"role": "assistant", "content": full_response})
+    st.session_state.messages.append({"role": "assistant", "content": full_response})
diff --git a/community/5_mins_rag_no_gpu/requirements.txt b/community/5_mins_rag_no_gpu/requirements.txt
@@ -1,13 +1,5 @@
-streamlit
+streamlit==1.30.0
 faiss-cpu==1.7.4
+langchain==0.1.20
 unstructured[all-docs]==0.11.2
-langchain
-langchain-community
-langchain-core
 langchain-nvidia-ai-endpoints
-langchain-text-splitters
-nltk==3.8.1
-numpy==1.23.5
-onnx==1.16.1 
-onnxruntime==1.15.1
-python-magic
diff --git a/community/5_mins_rag_no_gpu/style.css b/community/5_mins_rag_no_gpu/style.css