fixed llamaindex basic RAG location (#223)

NVIDIA · Oct 16, 2024 · 16e754b · 16e754b
1 parent a1d7056
commit 16e754b
Show file tree

Hide file tree

Showing 4 changed files with 219 additions and 0 deletions.
diff --git a/...series/video_1_llamaindex_basic_RAG.ipynb → ...oks/llamaindex/llamaindex_basic_RAG.ipynb b/...series/video_1_llamaindex_basic_RAG.ipynb → ...oks/llamaindex/llamaindex_basic_RAG.ipynb
diff --git a/community/llm_video_series/video_1_llm_assistant_cloud_app/README.md b/community/llm_video_series/video_1_llm_assistant_cloud_app/README.md
@@ -0,0 +1,86 @@
+# Building and Deploying LLM Assistants in Cloud
+
+This application implements a GPU-accelerated Retrieval-Augmented Generation (RAG) based Question-Answering system using NVIDIA Inference Microservices (NIMs) and the LlamaIndex framework. It allows users to upload documents, process them, and then ask questions about the content.
+
+## Features
+
+- Document loading and processing
+- Vector storage and retrieval using Milvus
+- Question-answering capabilities using NIMs
+- Interactive chat interface built with Gradio
+
+## Installation
+
+1. Clone this repository:
+   ```
+   git clone https://github.com/NVIDIA/GenerativeAIExamples.git
+   cd GenerativeAIExamples/community/llm_video_series/video_1_llm_assistant_cloud_app
+   ```
+
+2. Create a virtual environment (using Python 3.9 as an example):
+   - Using `venv`:
+      ```
+      python3.9 -m venv venv
+      source venv/bin/activate
+      ```
+   - Using `conda`:
+      ```
+      conda create -n llm-assistant-env python=3.9
+      conda activate llm-assistant-env
+      ```
+
+3. Install the required Python libraries using the requirements.txt file:
+   ```
+   pip install -r requirements.txt
+   ```
+
+4. Set up your NVIDIA API Key:
+   - Sign up for an NVIDIA API Key on [build.nvidia.com](build.nvidia.com) if you haven't already.
+   - Set the API key as an environment variable:
+     ```
+     export NVIDIA_API_KEY='your-api-key-here'
+     ```
+   - Alternatively, you can directly edit the script and add your API key to the line:
+     ```python
+     os.environ["NVIDIA_API_KEY"] = 'nvapi-XXXXXXXXXXXXXXXXXXXXXX' #Add NVIDIA API Key
+     ```
+
+## Usage
+
+1. Run the script:
+   ```
+   python app.py
+   ```
+
+2. Open the provided URL in your web browser to access the Gradio interface.
+
+3. Use the interface to:
+   - Upload document files
+   - Load and process the documents
+   - Ask questions about the loaded documents
+
+## How It Works
+
+1. **Document Loading**: Users can upload multiple document files through the Gradio interface.
+
+2. **Document Processing**: The application uses LlamaIndex to read and process the uploaded documents, splitting them into chunks.
+
+3. **Embedding and Indexing**: The processed documents are embedded using NVIDIA's embedding model and stored in a Milvus vector database.
+
+4. **Question Answering**: Users can ask questions through the chat interface. The application uses NIM with Llama 3 70B Instruct hosted on cloud to generate responses based on the relevant information retrieved from the indexed documents.
+
+## Customization
+
+You can customize various aspects of the application:
+
+- Change the chunk size for text splitting
+- Use different NVIDIA or open-source models for embedding or language modeling
+- Adjust the number of similar documents retrieved for each query
+
+## Troubleshooting
+
+If you encounter any issues:
+
+1. Ensure your NVIDIA API Key is correctly set.
+2. Check that all required libraries are installed correctly.
+3. Verify that the Milvus database is properly initialized.
diff --git a/community/llm_video_series/video_1_llm_assistant_cloud_app/app.py b/community/llm_video_series/video_1_llm_assistant_cloud_app/app.py
@@ -0,0 +1,126 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Import necessary libraries
+import os
+import gradio as gr
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
+from llama_index.vector_stores.milvus import MilvusVectorStore
+from llama_index.embeddings.nvidia import NVIDIAEmbedding
+from llama_index.llms.nvidia import NVIDIA
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core import Settings
+
+# Configure settings for the application
+# os.environ["NVIDIA_API_KEY"] = 'nvapi-XXXXXXX' # Alternatively, set the environment variable
+Settings.text_splitter = SentenceSplitter(chunk_size=500)
+Settings.embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")
+Settings.llm = NVIDIA(model="meta/llama3-70b-instruct")
+
+
+# Check if NVIDIA API key is set as an environment variable
+if os.getenv('NVIDIA_API_KEY') is None:
+    raise ValueError("NVIDIA_API_KEY environment variable is not set")
+
+# Initialize global variables for the index and query engine
+index = None
+query_engine = None
+
+# Function to get file names from file objects
+def get_files_from_input(file_objs):
+    if not file_objs:
+        return []
+    return [file_obj.name for file_obj in file_objs]
+
+# Function to load documents and create the index
+def load_documents(file_objs, progress=gr.Progress()):
+    global index, query_engine
+    try:
+        if not file_objs:
+            return "Error: No files selected."
+
+        file_paths = get_files_from_input(file_objs)
+        documents = []
+        for file_path in file_paths:
+            directory = os.path.dirname(file_path)
+            documents.extend(SimpleDirectoryReader(input_files=[file_path]).load_data())
+
+        if not documents:
+            return f"No documents found in the selected files."
+
+        # Create a Milvus vector store and storage context
+        vector_store = MilvusVectorStore(uri="./milvus_demo.db", dim=1024, overwrite=True)
+        storage_context = StorageContext.from_defaults(vector_store=vector_store)
+
+        # Create the index from the documents
+        index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+
+        # Create the query engine
+        query_engine = index.as_query_engine(similarity_top_k=5, streaming=True)
+        return f"Successfully loaded {len(documents)} documents from {len(file_paths)} files."
+    except Exception as e:
+        return f"Error loading documents: {str(e)}"
+
+# Function to handle chat interactions
+def chat(message, history):
+    global query_engine
+    if query_engine is None:
+        return history + [("Please load documents first.", None)]
+    try:
+        response = query_engine.query(message)
+        return history + [(message, response)]
+    except Exception as e:
+        return history + [(message, f"Error processing query: {str(e)}")]
+
+# Function to stream responses
+def stream_response(message, history):
+    global query_engine
+    if query_engine is None:
+        yield history + [("Please load documents first.", None)]
+        return
+
+    try:
+        response = query_engine.query(message)
+        partial_response = ""
+        for text in response.response_gen:
+            partial_response += text
+            yield history + [(message, partial_response)]
+    except Exception as e:
+        yield history + [(message, f"Error processing query: {str(e)}")]
+
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# RAG Q&A Chat Application")
+
+    with gr.Row():
+        file_input = gr.File(label="Select files to load", file_count="multiple")
+        load_btn = gr.Button("Load Documents")
+
+    load_output = gr.Textbox(label="Load Status")
+
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(label="Enter your question", interactive=True)
+    clear = gr.Button("Clear")
+
+    # Set up event handlers
+    load_btn.click(load_documents, inputs=[file_input], outputs=[load_output], show_progress="hidden")
+    msg.submit(stream_response, inputs=[msg, chatbot], outputs=[chatbot])
+    msg.submit(lambda: "", outputs=[msg])  # Clear input box after submission
+    clear.click(lambda: None, None, chatbot, queue=False)
+
+# Launch the Gradio interface
+if __name__ == "__main__":
+    demo.launch()
diff --git a/community/llm_video_series/video_1_llm_assistant_cloud_app/requirements.txt b/community/llm_video_series/video_1_llm_assistant_cloud_app/requirements.txt
@@ -0,0 +1,7 @@
+llama-index-core==0.10.58
+llama-index-readers-file==0.1.30
+llama-index-llms-nvidia==0.1.4
+llama-index-embeddings-nvidia==0.1.4
+llama-index-vector-stores-milvus==0.1.20
+pymilvus==2.4.4
+gradio==4.37.2