Skip to content

Commit

Permalink
Some refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ejhusom committed Nov 18, 2024
1 parent 5e69cc3 commit 62755a4
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 113 deletions.
5 changes: 3 additions & 2 deletions config/settings.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
api_url: "http://localhost:11434/api/chat" # API endpoint for local LLM (Ollama)
default_model: "llama3.2"
default_model: "gemma2:2b"
stream: True
system_message: "You are a helpful assistant. Respond concisely and informatively."
system_message_how_to_use_memories: |
Expand All @@ -12,7 +12,8 @@ system_message_how_to_use_memories: |
5. When responding, balance the use of memories with fresh input and current knowledge, providing a seamless and natural conversation flow.
6. The highest priority is to follow the user's instructions. Only use memories when relevant.
system_message_how_to_extract_relevant_info_for_memory: |
You are an assistant that helps summarize conversations or text and extracts key points worth remembering. Your task is to analyze the input and extract any significant information, suggestions, or facts that could be useful to remember in future conversations. Focus on details that contribute to understanding the user’s preferences, needs, or ongoing tasks. Be very concise and selective, capturing only the most relevant and important information. Do not use more words than necessary, and avoid including irrelevant or redundant details. Respond in maximum five sentences.
You are an assistant that helps summarize conversations or text and extracts key points worth remembering. Your task is to analyze the input and extract any significant information, suggestions, or facts that could be useful to remember in future conversations. Focus on details that contribute to understanding the user’s preferences, needs, or ongoing tasks. Be very concise and selective, capturing only the most relevant and important information. Do not use more words than necessary, and avoid including irrelevant or redundant details. Respond in maximum three sentences.
system_message_how_to_remember_information_in_prompt: You are tasked with identifying and summarizing key information from the user's prompt. The goal is to extract a concise, relevant memory, suitable for recall later in the conversation. Keep the extracted memory focused on specific details or insights that could be useful for future context.
priority_imperative_memories: 5
priority_conversation_summaries: 3
priority_automatic_memories: 2
Expand Down
68 changes: 24 additions & 44 deletions src/llm_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,80 +8,60 @@ def __init__(self):
self.settings = load_settings()
self.api_url = self.settings.get("api_url")
self.model = self.settings.get("default_model")
self.system_message = self.settings.get("system_message") # Get system message from settings
self.system_message = self.settings.get("system_message")

def generate_response(self, message_history, system_message=None):
"""Generate a response based on the message history"""
def _post_request(self, data):
"""Internal helper to send a post request and handle exceptions."""
try:
response = requests.post(self.api_url, json=data, stream=data.get("stream", False))
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
return None

def generate_response(self, message_history, system_message=None):
"""Generate a response based on the message history."""
if isinstance(message_history, str):
message_history = [message]

if not isinstance(message_history, list):
raise ValueError("Message history should be a list of messages")

# Prepare the data for the API request
message_history = [message_history]

data = {
"model": self.model,
"system_message": system_message or self.system_message,
"messages": message_history,
"stream": False
}

print(data)

try:
response = requests.post(self.api_url, json=data)
response.raise_for_status() # Raises HTTPError for bad responses
return response.json().get("message").get("content")
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
return None
response = self._post_request(data)
if response:
return response.json().get("message", {}).get("content", "Error: No content received.")
return None

def generate_streaming_response(self, message_history):
"""Generate a streaming response and return the full content"""

if isinstance(message_history, str):
message_history = [message_history]

if not isinstance(message_history, list):
raise ValueError("Message history should be a list of messages")

# Prepare the data for the API request with streaming enabled
"""Generate a streaming response and return the full content."""
data = {
"model": self.model,
"system_message": self.system_message,
"messages": message_history,
"stream": True
}

print(data)

full_response = [] # To accumulate the full response

try:
response = requests.post(self.api_url, json=data, stream=True)
response.raise_for_status() # Raises HTTPError for bad responses
full_response = []
response = self._post_request(data)

if response:
print("Assistant: ", end="", flush=True)
for chunk in response.iter_lines():
if chunk:
try:
chunk_data = json.loads(chunk.decode("utf-8"))
message_content = chunk_data.get("message", {}).get("content", "")
print(message_content, end="", flush=True)

# Append content to the full response
full_response.append(message_content)

if chunk_data.get("done"):
break
except json.JSONDecodeError as e:
print("\nError decoding JSON:", e)
print("\n") # Newline after streaming ends

# Return the full response as a single string
print("\n")
return "".join(full_response)

except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
return None
return None
42 changes: 32 additions & 10 deletions src/memory_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import json
import fcntl
from datetime import datetime, timedelta
from utils import save_conversation_to_file

class MemoryManager:
def __init__(self, memory_file="memories/memory.json", decay_threshold=2, decay_days=30, llm=None):
Expand All @@ -11,21 +13,23 @@ def __init__(self, memory_file="memories/memory.json", decay_threshold=2, decay_
self.memories = self.load_memories()

def load_memories(self):
"""Load memories from a JSON file."""
# Ensure the directory exists
"""Load memories with file lock for concurrency safety."""
os.makedirs(os.path.dirname(self.memory_file), exist_ok=True)
memories = []

if os.path.exists(self.memory_file):
with open(self.memory_file, "r") as file:
fcntl.flock(file, fcntl.LOCK_SH)
memories = json.load(file)
else:
memories = [] # If no memories exist, start with an empty list.
fcntl.flock(file, fcntl.LOCK_UN)
return memories

def save_memories(self):
"""Save memories to a JSON file."""
"""Save memories with file lock for concurrency safety."""
with open(self.memory_file, "w") as file:
fcntl.flock(file, fcntl.LOCK_EX)
json.dump(self.memories, file, indent=2)
fcntl.flock(file, fcntl.LOCK_UN)

def add_memory(self, content, category="general", priority=3):
timestamp = datetime.now().isoformat()
Expand All @@ -44,10 +48,8 @@ def add_memory(self, content, category="general", priority=3):
def apply_decay(self):
cutoff_date = datetime.now() - timedelta(days=self.decay_days)
for memory in self.memories:
memory_date = datetime.fromisoformat(memory["timestamp"])
if memory["priority"] < self.decay_threshold and memory_date < cutoff_date:
memory["active"] = False # Mark as inactive if decayed

if memory["priority"] < self.decay_threshold and datetime.fromisoformat(memory["timestamp"]) < cutoff_date:
memory["active"] = False
self.save_memories()

def get_active_memories(self):
Expand All @@ -61,12 +63,32 @@ def remember(self, text, category="general", priority=3):
# Generate the summary or relevant info from the LLM
summary = self.llm.generate_response(
message_history,
system_message=self.llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
system_message=self.llm.settings.get("system_message_how_to_remember_information_in_prompt")
)

if summary:
self.add_memory(summary, category, priority)


def summarize_and_save(self, conversation, conversation_file):
"""Summarize the conversation and save it to memories."""
conversation_text = "\n".join([entry["content"] for entry in conversation])
summary = self.llm.generate_response(
[{"role": "user", "content": conversation_text}],
system_message=self.llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
)

if summary:
self.add_memory(content=summary, category="conversation_summary", priority=self.llm.settings.get("priority_conversation_summaries"))
print("Assistant: The conversation has been summarized and saved.")

# Add summary to conversation as well
conversation.append({"role": "assistant", "content": summary})

save_conversation_to_file(conversation, conversation_file)

return summary

def build_memory_context(self):
"""Retrieve and format active memories for conversation context."""
active_memories = self.get_active_memories()
Expand Down
65 changes: 8 additions & 57 deletions src/smala.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,72 +5,19 @@
import json
from llm_interface import LLMInterface
from memory_manager import MemoryManager
from utils import save_conversation_to_file, load_conversation_from_file, get_multiline_input
from datetime import datetime


def load_conversation_from_file(file_path):
"""Load a conversation from a JSON file."""
if not os.path.exists(file_path):
print(f"Warning: No existing conversation found at {file_path}. Starting a new conversation.")
return []

with open(file_path, "r") as file:
conversation = json.load(file)

return conversation


def save_conversation_to_file(conversation, file_path):
"""Save the conversation to a JSON file"""
with open(file_path, "w") as file:
json.dump(conversation, file, indent=2)


def summarize_and_save(conversation, memory_manager, conversation_file):
"""Summarize the conversation and save it to memories."""
llm = LLMInterface()
conversation_text = "\n".join([entry["content"] for entry in conversation])
summary = llm.generate_response(
[{"role": "user", "content": conversation_text}],
system_message=llm.settings.get("system_message_how_to_extract_relevant_info_for_memory")
)

if summary:
memory_manager.add_memory(content=summary, category="conversation_summary", priority=llm.settings.get("priority_conversation_summaries"))
print("Assistant: The conversation has been summarized and saved.")

# Add summary to conversation as well
conversation.append({"role": "assistant", "content": summary})

save_conversation_to_file(conversation, conversation_file)

return summary


def handle_exit(conversation, memory_manager, conversation_file, signal, frame):
"""Graceful exit handler to summarize and save the conversation """
print("\nGracefully shutting down.")
print("Summarizing conversation...")
summarize_and_save(conversation, memory_manager, conversation_file)
memory_manager.summarize_and_save(conversation, conversation_file)
print("Conversation summary saved. Exiting...")
sys.exit(0)


def prompt2json(prompt, role="user"):
"""Convert a prompt string to a JSON object"""
return {"role": role, "content": prompt}


def get_multiline_input(line=""):
"""Allow multi-line input using triple quotes to start and end."""
lines = [line]
while True:
line = input(">>> ")
if line.strip() == '"""': # End multi-line input if '"""' is typed
break
lines.append(line)
return "\n".join(lines) # Combine all lines into a single string

def initialize_conversation(args):
"""Initialize conversation from file or create a new conversation file path."""
if args.conversation_file:
Expand Down Expand Up @@ -152,7 +99,7 @@ def main():
summary_response = input("Would you like to summarize the conversation? (y/n): ")
if summary_response.lower() == "y":
print("Summarizing conversation...")
summarize_and_save(conversation, memory_manager, conversation_file)
memory_manager.summarize_and_save(conversation, conversation_file)
print("Conversation summary saved. Exiting...")
else:
print("Conversation not summarized. Exiting...")
Expand All @@ -164,7 +111,11 @@ def main():
# Remember the previous prompt if available
last_user_message = conversation[-1]["content"] if conversation else ""
if last_user_message:
memory_manager.remember(last_user_message)
memory_manager.remember(
last_user_message,
category="imperative_memory",
priority=llm.settings.get("priority_imperative_memories")
)
else:
# Remember the current prompt, minus the /remember part
memory_content = prompt.replace("/remember", "").strip()
Expand Down
27 changes: 27 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,34 @@
import json
import yaml

def load_settings():
"""Load settings from yaml."""
with open("config/settings.yaml", "r") as file:
settings = yaml.safe_load(file)
return settings

def load_conversation_from_file(file_path):
"""Load a conversation from a JSON file."""
if not os.path.exists(file_path):
print(f"Warning: No existing conversation found at {file_path}. Starting a new conversation.")
return []

with open(file_path, "r") as file:
conversation = json.load(file)

return conversation

def save_conversation_to_file(conversation, file_path):
"""Save the conversation to a JSON file"""
with open(file_path, "w") as file:
json.dump(conversation, file, indent=2)

def get_multiline_input(line=""):
"""Allow multi-line input using triple quotes to start and end."""
lines = [line]
while True:
line = input(">>> ")
if line.strip() == '"""': # End multi-line input if '"""' is typed
break
lines.append(line)
return "\n".join(lines) # Combine all lines into a single string

0 comments on commit 62755a4

Please sign in to comment.