Settings refactor and remove RAG #47 (#48)

* Refactor settings and rag * RAG commented out * RAG resources removed * RAG resources removed #47 * Dependency fixes
villekr · Sep 12, 2024 · 5441f0e · 5441f0e
1 parent 7823ae2
commit 5441f0e
Show file tree

Hide file tree

Showing 28 changed files with 576 additions and 1,008 deletions.
diff --git a/.gitignore b/.gitignore
@@ -118,11 +118,11 @@ ENV/
 env.bak/
 venv.bak/
 
-# Spyder project settings
+# Spyder project llm_settings
 .spyderproject
 .spyproject
 
-# Rope project settings
+# Rope project llm_settings
 .ropeproject
 
 # mkdocs documentation

diff --git a/README.md b/README.md
@@ -7,7 +7,6 @@ Paita is textual assistant for your terminal that supports multiple AI Services
 - **Supports Multiple AI Services:** Paita integrates with a variety of AI services through the [LangChain](https://python.langchain.com) library. If AI service is compatible with LangChain then it can be used also with Paita.
 - **Textual User Interface on your terminal:** Paita is based on [Textual](https://textual.textualize.io/) and provides a sophisticated user interface right within your terminal, combining the complexity of a GUI with console simplicity.                                                                                       
 - **Cross-Platform Compatibility:** Paita is compatible with Windows, macOS, and Linux systems across most terminals; if Python runs in your environment and Textual supports it, then Paita will work.
-- **Supports Retrieval-Augmented Generation (RAG):** Paita supports local vectorstore (Chroma) and crawling web page content.
 
 ### Supported AI Services
 * OpenAI

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,28 +21,22 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
-  "textual~=0.78.0",
+  "textual~=0.79.1",
   # "boto3~=1.35.8",  # rely on boto3 version coming from langchain-aws
   "loguru~=0.7.2",
-  "langchain-core~=0.2.36",
+  "langchain-core~=0.2.39",
   "langchain-community~=0.2.14",
   "langchain-openai~=0.1.23",
-  "langchain-aws~=0.1.17",
-  "langchain-ollama~=0.1.2",
-  "langchain-chroma~=0.1.3",
-  "langchain~=0.2.15",
+  "langchain-aws~=0.1.18",
+  "langchain-ollama~=0.1.3",
+  "langchain~=0.2.16",
   "appdirs~=1.4.4",
   "pydantic~=2.8.2",
   "cache3~=0.4.3",
   "aiofiles~=24.1.0",
   "pyperclip~=1.9.0",
   "eval-type-backport~=0.2.0",
-  "ollama~=0.3.2",
-  "bs4~=0.0.2",
-  # "beautifulsoup4~=",  # langchain-community document-loader directly depends on bs4
-  # "chromadb~=0.5.5",  # rely on boto3 version coming from langchain-chroma
-  "validators~=0.33.0",
-  "lxml~=5.3.0"
+  "validators~=0.34.0",
 ]
 
 [project.urls]

diff --git a/src/paita/llm/chat.py b/src/paita/llm/chat.py
@@ -8,8 +8,7 @@
 from paita.llm.chat_history import ChatHistory
 from paita.llm.models import AIService
 from paita.llm.services import bedrock, ollama, openai
-from paita.rag.rag_manager import RAGManager
-from paita.utils.settings_model import SettingsModel
+from paita.llm.services.service import LLMSettingsModel
 
 if TYPE_CHECKING:
     from langchain_core.language_models import BaseChatModel
@@ -21,29 +20,26 @@
 
 class Chat:
     """
-    Chat encapsulates chat history, RAG usage and can use different AI Models
+    Chat encapsulates chat history and can use different AI Models
     """
 
     def __init__(self):
         self._chat_model: BaseChatModel = None
-        self._settings_model: SettingsModel = None
+        self._settings_model: LLMSettingsModel = None
         self._chat_history: ChatHistory = None
-        self._rag_manager: RAGManager = None
         self._chain: Runnable = None
         self._callback_handler: AsyncHandler = None
         self.parser: StrOutputParser = StrOutputParser()
 
     def init_model(
         self,
         *,
-        settings_model: SettingsModel,
+        settings_model: LLMSettingsModel,
         chat_history: ChatHistory,
-        rag_manager: RAGManager = None,
         callback_handler: AsyncHandler,
     ):
         self._settings_model = settings_model
         self._chat_history = chat_history
-        self._rag_manager = rag_manager
         self._callback_handler = callback_handler
 
         if settings_model.ai_service == AIService.AWSBedRock.value:
@@ -56,31 +52,26 @@ def init_model(
             msg = f"Invalid AI Service {settings_model.ai_service}"
             raise ValueError(msg)
         self._chat_model = service.chat_model()
-        if self._settings_model.ai_rag_enabled:
-            self._chain = self._rag_manager.chain(
-                chat=self._chat_model, chat_history=self._chat_history.history, settings_model=self._settings_model
-            )
-        else:
-            prompt = ChatPromptTemplate.from_messages(
-                [
-                    (
-                        "system",
-                        self._settings_model.ai_persona,
-                    ),
-                    MessagesPlaceholder(variable_name="chat_history"),
-                    ("human", "{input}"),
-                ]
-            )
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    self._settings_model.ai_persona,
+                ),
+                MessagesPlaceholder(variable_name="chat_history"),
+                ("human", "{input}"),
+            ]
+        )
 
-            chain = prompt | self._chat_model | self.parser
-            self._chain = RunnableWithMessageHistory(
-                chain,
-                lambda session_id: self._chat_history.history,  # noqa: ARG005
-                input_messages_key="input",
-                history_messages_key="chat_history",
-            )
+        chain = prompt | self._chat_model | self.parser
+        self._chain = RunnableWithMessageHistory(
+            chain,
+            lambda session_id: self._chat_history.history,  # noqa: ARG005
+            input_messages_key="input",
+            history_messages_key="chat_history",
+        )
 
-    async def request(self, data: str) -> str:
+    async def request(self, data: str):
         await self._trim_history(self._chat_history, max_length=self._settings_model.ai_history_depth)
 
         if self._settings_model.ai_streaming:

diff --git a/src/paita/llm/services/service.py b/src/paita/llm/services/service.py
@@ -1,14 +1,18 @@
-from typing import Optional
+from __future__ import annotations
 
-from langchain_core.embeddings import Embeddings
-from langchain_core.language_models.chat_models import BaseChatModel
+from typing import TYPE_CHECKING, Any, Optional
 
-from paita.llm.callbacks import AsyncHandler
-from paita.utils.settings_model import SettingsModel
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+    from langchain_core.embeddings import Embeddings
+    from langchain_core.language_models.chat_models import BaseChatModel
+
+    from paita.llm.callbacks import AsyncHandler
 
 
 class Service:
-    def __init__(self, *, settings_model: SettingsModel, callback_handler: AsyncHandler):
+    def __init__(self, *, settings_model: LLMSettingsModel, callback_handler: AsyncHandler):
         self._settings_model = settings_model
         self._callback_handler = callback_handler
 
@@ -22,3 +26,15 @@ def embeddings(cls, model_id: Optional[str] = None) -> Embeddings:
 
     def chat_model(self) -> BaseChatModel:
         raise NotImplementedError
+
+
+class LLMSettingsModel(BaseModel):
+    version: float = 0.1
+    ai_service: Optional[str] = None
+    ai_model: Optional[str] = None
+    ai_persona: Optional[str] = "You are a helpful assistant. Answer all questions to the best of your ability."
+    ai_streaming: Optional[bool] = True
+    ai_model_kwargs: Optional[dict[str, Any]] = {}
+    ai_n: Optional[int] = 1
+    ai_max_tokens: Optional[int] = 2048
+    ai_history_depth: Optional[int] = 20
diff --git a/src/paita/localization/labels.py b/src/paita/localization/labels.py
@@ -7,13 +7,9 @@
 AI_MAX_TOKENS = "Max tokens"
 AI_HISTORY_DEPTH = "History depth"
 AI_N = "Number of response messages"
-AI_RAG_ENABLED = "RAG Enabled"
-AI_RAG_SOURCE = "Source"
-AI_RAG_SOURCE_MAX_DEPTH = "Max depth"
 
 APP_LIST_AI_SERVICES_MODELS = "Checking available AI Services and AI Models"
 
-APP_RAG_PROCESS_DOCUMENTS = "Processing documents..."
 APP_ERROR_NO_AI_SERVICES_OR_MODELS = "No available AI Services or AI Models founds"
 
 APP_DIALOG_BUTTON_EXIT = "Exit"
diff --git a/src/paita/rag/models.py b/src/paita/rag/models.py