diff --git a/openai_server/agent_prompting.py b/openai_server/agent_prompting.py index 0fd72ac47..43ddc07d7 100644 --- a/openai_server/agent_prompting.py +++ b/openai_server/agent_prompting.py @@ -588,6 +588,79 @@ def get_audio_transcription_helper(): audio_transcription = '' return audio_transcription +def get_save_memory_helper(): + cwd = os.path.abspath(os.getcwd()) + save_solution_memor = f"""\n +* You always keep an eye on the errors you encounter and the solutions you find to them. +* You have to save the solutions to memory so that you can refer to them in the future and avoid making the same mistakes again. +* You can also save new memories to make sure you don't forget them and make use of them in the future. +* Never skips saving the error solutions to the memory. + +* Save error solutions to memory using python. Use for saving error solutions to memory. +* For saving an error solution to memory, you are recommended to use the existing pre-built python code, E.g.: +```sh +# filename: my_save_solution_memory.sh +# execution: true +python {cwd}/openai_server/agent_tools/save_memory.py --task "TASK" --error "ERROR" --solution "SOLUTION" + +* usage: python {cwd}/openai_server/agent_tools/save_memory.py [-h] --task "TASK" --error "ERROR" --solution "SOLUTION" +* You should save solutions you have found to errors while solving user tasks. +* Solutions have to be callable codes if possible, otherwise just put explanations. +* While saving the solution, you should explicityl mention: 1-the task that lead you to the error, +2-the error you encountered, and 3-the solution you found to the error, as a code or explanation. +* Example task: 'While trying to scrape X data from the web I used the 123.xyz URL but it was blocked by the server.' +* Example error: 'Error 403: Forbidden' +* Example solution: 'For similar type of data, I found this another URL 456.xyz that worked.' +* Another example solution: 'Use following code to scrape X data from the web: ...' +* It's really important to save the solutions to memory so that you can refer to them in the future and avoid making the same mistakes again. +""" + return save_solution_memor + +def get_memories(instruction:str): + # read all the csv files that starts with the name 'memory_' in the directory: openai_files/62224bfb-c832-4452-81e7-8a4bdabbe164/ + # and concatenate them into single memory_df + + # find memory paths via os + memory_df_paths = [] + # TODO: This is just a toy code. In real usage, the memory files should be stored in a stable DB + for root, dirs, files in os.walk('openai_files/62224bfb-c832-4452-81e7-8a4bdabbe164/'): + for file in files: + if file.startswith('memory_') and file.endswith('.csv'): + memory_df_paths.append(os.path.join(root, file)) + print(f"Memory Paths: {memory_df_paths}") + # if no memory files found, return empty string + if len(memory_df_paths) == 0: + return "" + + from openai_server.agent_utils import MemoryVectorDB + # Initialize vector DB with OpenAI model + # TODO: In the real usage, there has to be a stable vectordb tha will work accross different chats + # Currently this is just a dummy vectordb to test the functionality + memory_db = MemoryVectorDB( + model="text-embedding-3-small", + openai_api_key=ast.literal_eval(os.getenv('H2OGPT_H2OGPT_API_KEYS'))[0], + openai_base_url="https://api.gpt.h2o.ai/v1" + ) + + import pandas as pd + memory_df = pd.concat([pd.read_csv(memory_df_path) for memory_df_path in memory_df_paths]) + # Create VectorDB documents from memory_df rows + documents = [] + for index, row in memory_df.iterrows(): + document = f"{row['task']}, {row['error']}, {row['solution']}" + documents.append(document) + # Add documents to VectorDB + memory_db.add_texts(documents) + + # Get the most similar 5 documents to the instruction + results, distances = memory_db.query(instruction, k=5, threshold=0.95) + if len(results) == 0: + return "" + + memory_prompt = "\n# Previous Solutions Memory:" + # join results with new line, also add index to each result + memory_prompt += "\n".join([f"Memory-{i+1}:\n {result}" for i, result in enumerate(results)]) + return memory_prompt def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_packages, system_prompt, base_url, api_key, model, text_context_list, image_file, temp_dir, query): @@ -598,6 +671,9 @@ def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_p mermaid_renderer_helper = get_mermaid_renderer_helper() image_generation_helper = get_image_generation_helper() audio_transcription_helper = get_audio_transcription_helper() + save_memory_helper = get_save_memory_helper() + memories_prompt = get_memories(query) + print(f"Memories Prompt: {memories_prompt}") chat_doc_query, internal_file_names = get_chat_doc_context(text_context_list, image_file, temp_dir, @@ -614,6 +690,6 @@ def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_p agent_tools_note = f"\nDo not hallucinate agent_tools tools. The only files in the {path_agent_tools} directory are as follows: {list_dir}\n" - system_message = agent_code_writer_system_message + image_query_helper + mermaid_renderer_helper + image_generation_helper + audio_transcription_helper + agent_tools_note + chat_doc_query + system_message = agent_code_writer_system_message + image_query_helper + mermaid_renderer_helper + image_generation_helper + audio_transcription_helper + save_memory_helper + memories_prompt + agent_tools_note + chat_doc_query # TODO: Also return image_generation_helper and audio_transcription_helper ? return system_message, internal_file_names, chat_doc_query, image_query_helper, mermaid_renderer_helper diff --git a/openai_server/agent_tools/save_memory.py b/openai_server/agent_tools/save_memory.py new file mode 100644 index 000000000..132974229 --- /dev/null +++ b/openai_server/agent_tools/save_memory.py @@ -0,0 +1,21 @@ +import argparse +import pandas as pd +import uuid + + +def main(): + parser = argparse.ArgumentParser(description="Save new memory to be used for future references") + parser.add_argument("--task", type=str, required=True, help="Detailed task explanation that lead to the error or the solution") + parser.add_argument("--error", type=str, required=True, help="Error message that was encountered, if there was any. ") + parser.add_argument("--solution", type=str, required=True, help="Solution, always includes codes. Full method codes are preferred so that the solution can be recalled as is.") + args = parser.parse_args() + # Memory file + memory_file = f"memory_{str(uuid.uuid4())[:6]}.csv" + # new memory + memory = pd.DataFrame([[args.task, args.error, args.solution]], columns=['task', 'error', 'solution']) + # write the memory back to the file + memory.to_csv(memory_file, index=False) + print(f"New memory saved: {memory_file}") + +if __name__ == "__main__": + main() diff --git a/openai_server/agent_utils.py b/openai_server/agent_utils.py index 50ddabd8c..e61fdf68a 100644 --- a/openai_server/agent_utils.py +++ b/openai_server/agent_utils.py @@ -304,3 +304,100 @@ def get_ret_dict_and_handle_files(chat_result, temp_dir, agent_verbose, internal ret_dict.update(dict(temp_dir=temp_dir)) return ret_dict + + +import faiss +import numpy as np +from openai import OpenAI +class MemoryVectorDB: + def __init__(self, model: str, openai_base_url:str, openai_api_key: str): + # Initialize OpenAI embeddings model + self.client = OpenAI(base_url=openai_base_url, api_key=openai_api_key) + self.model = model + + # Initialize FAISS index (using L2 distance) + self.index = None + self.texts = [] + self.embeddings = None + self.id_map = {} + + def get_embeddings(self, texts: list): + # Generate embedding for the texts via client + response = self.client.embeddings.create( + input=texts, + model=self.model + ) + # To reach embeddings for the first item in the list, use response.data[0].embedding and so on + embeddings = [] + for i in range(len(response.data)): + embeddings.append(response.data[i].embedding) + + embedding_matrix = np.array(embeddings).astype('float32') + return embedding_matrix + + def add_texts(self, texts: list): + # Generate embeddings for the texts + embedding_matrix = self.get_embeddings(texts) + + # Update the list of stored texts and id map + start_id = len(self.texts) + self.texts.extend(texts) + for i, text in enumerate(texts): + self.id_map[start_id + i] = text + + # Create or update the FAISS index + if self.index is None: + # Initialize the FAISS index with the embedding dimension + self.index = faiss.IndexFlatL2(embedding_matrix.shape[1]) + # Add embeddings to the FAISS index + self.index.add(embedding_matrix) + self.embeddings = embedding_matrix + else: + self.index.add(embedding_matrix) + self.embeddings = np.vstack((self.embeddings, embedding_matrix)) + + # Confirm embeddings were added + print("Texts added successfully.") + print("Number of items in FAISS index:", self.index.ntotal) + + def query(self, query_text: str, k: int = 2, threshold: float = 2.0): + # Generate embedding for the query + query_embedding_np = self.get_embeddings([query_text]) + + # Check if FAISS index is initialized + if self.index is None or self.index.ntotal == 0: + raise ValueError("FAISS index is empty or not initialized. Please add texts before querying.") + + # Perform FAISS search + D, I = self.index.search(query_embedding_np, k) + + # Ensure valid indices and handle potential errors + results = [] + distances = [] + for i, idx in enumerate(I[0]): + if idx in self.id_map: + results.append(self.id_map[idx]) + distances.append(D[0][i]) + else: + print(f"Warning: Index {idx} not found in id_map. It might have been deleted.") + final_results = [r for r, d in zip(results, distances) if d <= threshold] + final_distances = [d for d in distances if d <= threshold] + print(f"Memory VetorDB: Returns {len(final_results)} results.") + # Returns results having distance less than or equal to the threshold + return final_results, final_distances + + def delete_text_by_id(self, idx: int): + # Remove the text from stored texts and id map + if idx in self.id_map: + del self.id_map[idx] + self.texts.pop(idx) + + # Remove the embedding from FAISS index and rebuild the index + self.embeddings = np.delete(self.embeddings, idx, axis=0) + self.index.reset() + self.index.add(self.embeddings) + else: + print(f"Warning: Text with index {idx} not found in the database.") + + def get_all_texts(self): + return self.texts