Some refactoring

ejhusom · Nov 18, 2024 · 62755a4 · 62755a4
1 parent 5e69cc3
commit 62755a4
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 113 deletions.
diff --git a/config/settings.yaml b/config/settings.yaml
@@ -1,5 +1,5 @@
 api_url: "http://localhost:11434/api/chat"   # API endpoint for local LLM (Ollama)
-default_model: "llama3.2"
+default_model: "gemma2:2b"
 stream: True
 system_message: "You are a helpful assistant. Respond concisely and informatively."
 system_message_how_to_use_memories: |
@@ -12,7 +12,8 @@ system_message_how_to_use_memories: |
   5. When responding, balance the use of memories with fresh input and current knowledge, providing a seamless and natural conversation flow.
   6. The highest priority is to follow the user's instructions. Only use memories when relevant.
 system_message_how_to_extract_relevant_info_for_memory: |
-    You are an assistant that helps summarize conversations or text and extracts key points worth remembering. Your task is to analyze the input and extract any significant information, suggestions, or facts that could be useful to remember in future conversations. Focus on details that contribute to understanding the user’s preferences, needs, or ongoing tasks. Be very concise and selective, capturing only the most relevant and important information. Do not use more words than necessary, and avoid including irrelevant or redundant details. Respond in maximum five sentences.
+    You are an assistant that helps summarize conversations or text and extracts key points worth remembering. Your task is to analyze the input and extract any significant information, suggestions, or facts that could be useful to remember in future conversations. Focus on details that contribute to understanding the user’s preferences, needs, or ongoing tasks. Be very concise and selective, capturing only the most relevant and important information. Do not use more words than necessary, and avoid including irrelevant or redundant details. Respond in maximum three sentences.
+system_message_how_to_remember_information_in_prompt: You are tasked with identifying and summarizing key information from the user's prompt. The goal is to extract a concise, relevant memory, suitable for recall later in the conversation. Keep the extracted memory focused on specific details or insights that could be useful for future context.
 priority_imperative_memories: 5
 priority_conversation_summaries: 3
 priority_automatic_memories: 2

diff --git a/src/llm_interface.py b/src/llm_interface.py
@@ -8,80 +8,60 @@ def __init__(self):
         self.settings = load_settings()
         self.api_url = self.settings.get("api_url")
         self.model = self.settings.get("default_model")
-        self.system_message = self.settings.get("system_message")  # Get system message from settings
+        self.system_message = self.settings.get("system_message")
 
-    def generate_response(self, message_history, system_message=None):
-        """Generate a response based on the message history"""
+    def _post_request(self, data):
+        """Internal helper to send a post request and handle exceptions."""
+        try:
+            response = requests.post(self.api_url, json=data, stream=data.get("stream", False))
+            response.raise_for_status()
+            return response
+        except requests.exceptions.RequestException as e:
+            print(f"Request failed: {e}")
+            return None
 
+    def generate_response(self, message_history, system_message=None):
+        """Generate a response based on the message history."""
         if isinstance(message_history, str):
-            message_history = [message]
-
-        if not isinstance(message_history, list):
-            raise ValueError("Message history should be a list of messages")
-
-        # Prepare the data for the API request
+            message_history = [message_history]
+
         data = {
             "model": self.model,
             "system_message": system_message or self.system_message,
             "messages": message_history,
             "stream": False
         }
 
-        print(data)
-
-        try:
-            response = requests.post(self.api_url, json=data)
-            response.raise_for_status()  # Raises HTTPError for bad responses
-            return response.json().get("message").get("content")
-        except requests.exceptions.RequestException as e:
-            print(f"Request failed: {e}")
-            return None
+        response = self._post_request(data)
+        if response:
+            return response.json().get("message", {}).get("content", "Error: No content received.")
+        return None
 
     def generate_streaming_response(self, message_history):
-        """Generate a streaming response and return the full content"""
-
-        if isinstance(message_history, str):
-            message_history = [message_history]
-
-        if not isinstance(message_history, list):
-            raise ValueError("Message history should be a list of messages")
-
-        # Prepare the data for the API request with streaming enabled
+        """Generate a streaming response and return the full content."""
         data = {
             "model": self.model,
             "system_message": self.system_message,
             "messages": message_history,
             "stream": True
         }
 
-        print(data)
-
-        full_response = []  # To accumulate the full response
-
-        try:
-            response = requests.post(self.api_url, json=data, stream=True)
-            response.raise_for_status()  # Raises HTTPError for bad responses
+        full_response = []
+        response = self._post_request(data)
 
+        if response:
             print("Assistant: ", end="", flush=True)
             for chunk in response.iter_lines():
                 if chunk:
                     try:
                         chunk_data = json.loads(chunk.decode("utf-8"))
                         message_content = chunk_data.get("message", {}).get("content", "")
                         print(message_content, end="", flush=True)
-
-                        # Append content to the full response
                         full_response.append(message_content)
-
                         if chunk_data.get("done"):
                             break
                     except json.JSONDecodeError as e:
                         print("\nError decoding JSON:", e)
-            print("\n")  # Newline after streaming ends
-
-            # Return the full response as a single string
+            print("\n")
             return "".join(full_response)
-
-        except requests.exceptions.RequestException as e:
-            print(f"Request failed: {e}")
-            return None
+        return None
diff --git a/src/memory_manager.py b/src/memory_manager.py
@@ -1,6 +1,8 @@
 import os
 import json
+import fcntl
 from datetime import datetime, timedelta
+from utils import save_conversation_to_file
 
 class MemoryManager:
     def __init__(self, memory_file="memories/memory.json", decay_threshold=2, decay_days=30, llm=None):
@@ -11,21 +13,23 @@ def __init__(self, memory_file="memories/memory.json", decay_threshold=2, decay_
         self.memories = self.load_memories()
 
     def load_memories(self):
-        """Load memories from a JSON file."""
-        # Ensure the directory exists
+        """Load memories with file lock for concurrency safety."""
         os.makedirs(os.path.dirname(self.memory_file), exist_ok=True)
+        memories = []
 
         if os.path.exists(self.memory_file):
             with open(self.memory_file, "r") as file:
+                fcntl.flock(file, fcntl.LOCK_SH)
                 memories = json.load(file)
-        else:
-            memories = []  # If no memories exist, start with an empty list.
+                fcntl.flock(file, fcntl.LOCK_UN)
         return memories
 
     def save_memories(self):
-        """Save memories to a JSON file."""
+        """Save memories with file lock for concurrency safety."""
         with open(self.memory_file, "w") as file:
+            fcntl.flock(file, fcntl.LOCK_EX)
             json.dump(self.memories, file, indent=2)
+            fcntl.flock(file, fcntl.LOCK_UN)
 
     def add_memory(self, content, category="general", priority=3):
         timestamp = datetime.now().isoformat()
@@ -44,10 +48,8 @@ def add_memory(self, content, category="general", priority=3):
     def apply_decay(self):
         cutoff_date = datetime.now() - timedelta(days=self.decay_days)
         for memory in self.memories:
-            memory_date = datetime.fromisoformat(memory["timestamp"])
-            if memory["priority"] < self.decay_threshold and memory_date < cutoff_date:
-                memory["active"] = False  # Mark as inactive if decayed
-
+            if memory["priority"] < self.decay_threshold and datetime.fromisoformat(memory["timestamp"]) < cutoff_date:
+                memory["active"] = False
         self.save_memories()
 
     def get_active_memories(self):
@@ -61,12 +63,32 @@ def remember(self, text, category="general", priority=3):
         # Generate the summary or relevant info from the LLM
         summary = self.llm.generate_response(
                 message_history, 
-                system_message=self.llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
+                system_message=self.llm.settings.get("system_message_how_to_remember_information_in_prompt")
         )
 
         if summary:
             self.add_memory(summary, category, priority)
 
+
+    def summarize_and_save(self, conversation, conversation_file):
+        """Summarize the conversation and save it to memories."""
+        conversation_text = "\n".join([entry["content"] for entry in conversation])
+        summary = self.llm.generate_response(
+                [{"role": "user", "content": conversation_text}],
+                system_message=self.llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
+        )
+
+        if summary:
+            self.add_memory(content=summary, category="conversation_summary", priority=self.llm.settings.get("priority_conversation_summaries"))
+            print("Assistant: The conversation has been summarized and saved.")
+
+            # Add summary to conversation as well
+            conversation.append({"role": "assistant", "content": summary})
+
+            save_conversation_to_file(conversation, conversation_file)
+
+        return summary
+
     def build_memory_context(self):
         """Retrieve and format active memories for conversation context."""
         active_memories = self.get_active_memories()

diff --git a/src/smala.py b/src/smala.py
@@ -5,72 +5,19 @@
 import json
 from llm_interface import LLMInterface
 from memory_manager import MemoryManager
+from utils import save_conversation_to_file, load_conversation_from_file, get_multiline_input
 from datetime import datetime
 
 
-def load_conversation_from_file(file_path):
-    """Load a conversation from a JSON file."""
-    if not os.path.exists(file_path):
-        print(f"Warning: No existing conversation found at {file_path}. Starting a new conversation.")
-        return []
-
-    with open(file_path, "r") as file:
-        conversation = json.load(file)
-
-    return conversation
-
-
-def save_conversation_to_file(conversation, file_path):
-    """Save the conversation to a JSON file"""
-    with open(file_path, "w") as file:
-        json.dump(conversation, file, indent=2)
-
-
-def summarize_and_save(conversation, memory_manager, conversation_file):
-    """Summarize the conversation and save it to memories."""
-    llm = LLMInterface()
-    conversation_text = "\n".join([entry["content"] for entry in conversation])
-    summary = llm.generate_response(
-            [{"role": "user", "content": conversation_text}],
-            system_message=llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
-    )
-
-    if summary:
-        memory_manager.add_memory(content=summary, category="conversation_summary", priority=llm.settings.get("priority_conversation_summaries"))
-        print("Assistant: The conversation has been summarized and saved.")
-
-        # Add summary to conversation as well
-        conversation.append({"role": "assistant", "content": summary})
-
-        save_conversation_to_file(conversation, conversation_file)
-
-    return summary
-
-
 def handle_exit(conversation, memory_manager, conversation_file, signal, frame):
     """Graceful exit handler to summarize and save the conversation """
     print("\nGracefully shutting down.")
     print("Summarizing conversation...")
-    summarize_and_save(conversation, memory_manager, conversation_file)
+    memory_manager.summarize_and_save(conversation, conversation_file)
     print("Conversation summary saved. Exiting...")
     sys.exit(0)
 
 
-def prompt2json(prompt, role="user"):
-    """Convert a prompt string to a JSON object"""
-    return {"role": role, "content": prompt}
-
-
-def get_multiline_input(line=""):
-    """Allow multi-line input using triple quotes to start and end."""
-    lines = [line]
-    while True:
-        line = input(">>> ")
-        if line.strip() == '"""':  # End multi-line input if '"""' is typed
-            break
-        lines.append(line)
-    return "\n".join(lines)  # Combine all lines into a single string
-
 def initialize_conversation(args):
     """Initialize conversation from file or create a new conversation file path."""
     if args.conversation_file:
@@ -152,7 +99,7 @@ def main():
                 summary_response = input("Would you like to summarize the conversation? (y/n): ")
                 if summary_response.lower() == "y":
                     print("Summarizing conversation...")
-                    summarize_and_save(conversation, memory_manager, conversation_file)
+                    memory_manager.summarize_and_save(conversation, conversation_file)
                     print("Conversation summary saved. Exiting...")
                 else:
                     print("Conversation not summarized. Exiting...")
@@ -164,7 +111,11 @@ def main():
                     # Remember the previous prompt if available
                     last_user_message = conversation[-1]["content"] if conversation else ""
                     if last_user_message:
-                        memory_manager.remember(last_user_message)
+                        memory_manager.remember(
+                                    last_user_message, 
+                                    category="imperative_memory",
+                                    priority=llm.settings.get("priority_imperative_memories")
+                        )
                 else:
                     # Remember the current prompt, minus the /remember part
                     memory_content = prompt.replace("/remember", "").strip()

diff --git a/src/utils.py b/src/utils.py
@@ -1,7 +1,34 @@
+import json
 import yaml
 
 def load_settings():
     """Load settings from yaml."""
     with open("config/settings.yaml", "r") as file:
         settings = yaml.safe_load(file)
     return settings
+
+def load_conversation_from_file(file_path):
+    """Load a conversation from a JSON file."""
+    if not os.path.exists(file_path):
+        print(f"Warning: No existing conversation found at {file_path}. Starting a new conversation.")
+        return []
+
+    with open(file_path, "r") as file:
+        conversation = json.load(file)
+
+    return conversation
+
+def save_conversation_to_file(conversation, file_path):
+    """Save the conversation to a JSON file"""
+    with open(file_path, "w") as file:
+        json.dump(conversation, file, indent=2)
+
+def get_multiline_input(line=""):
+    """Allow multi-line input using triple quotes to start and end."""
+    lines = [line]
+    while True:
+        line = input(">>> ")
+        if line.strip() == '"""':  # End multi-line input if '"""' is typed
+            break
+        lines.append(line)
+    return "\n".join(lines)  # Combine all lines into a single string