diff --git a/evaluation/.env-example b/evaluation/.env-example index 4cb153b75..f2c20868c 100644 --- a/evaluation/.env-example +++ b/evaluation/.env-example @@ -1,3 +1,4 @@ +# memory process model MODEL="gpt-4o-mini" OPENAI_API_KEY="sk-***REDACTED***" OPENAI_BASE_URL="http://***.***.***.***:3000/v1" @@ -6,6 +7,13 @@ MEM0_API_KEY="m0-***REDACTED***" ZEP_API_KEY="z_***REDACTED***" +# response model CHAT_MODEL="gpt-4o-mini" CHAT_MODEL_BASE_URL="http://***.***.***.***:3000/v1" CHAT_MODEL_API_KEY="sk-***REDACTED***" + +MEMOS_KEY="Token mpg-xxxxx" +MEMOS_URL="https://apigw-pre.memtensor.cn/api/openmem/v1" + +MEMOBASE_API_KEY="xxxxx" +MEMOBASE_PROJECT_URL="http://xxx.xxx.xxx.xxx:8019" \ No newline at end of file diff --git a/evaluation/README.md b/evaluation/README.md index 19da665ad..3ed372817 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -34,3 +34,17 @@ This repository provides tools and scripts for evaluating the LoCoMo dataset usi ``` ✍️ For evaluating OpenAI's native memory feature with the LoCoMo dataset, please refer to the detailed guide: [OpenAI Memory on LoCoMo - Evaluation Guide](./scripts/locomo/openai_memory_locomo_eval_guide.md). + +### LongMemEval Evaluation +First prepare the dataset `longmemeval_s` from https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned +, and save it as `data/longmemeval/longmemeval_s.json` + +```bash +# Edit the configuration in ./scripts/run_lme_eval.sh +# Specify the model and memory backend you want to use (e.g., mem0, zep, etc.) +./scripts/run_lme_eval.sh +``` + +### prefEval Evaluation + +### personaMem Evaluation \ No newline at end of file diff --git a/evaluation/configs-example/mirix_config.yaml b/evaluation/configs-example/mirix_config.yaml new file mode 100644 index 000000000..618009e85 --- /dev/null +++ b/evaluation/configs-example/mirix_config.yaml @@ -0,0 +1,9 @@ +agent_name: mirix +model_name: gpt-4o-mini +model_endpoint: http://***.***.***.***:3000/v1 +api_key: sk-***REDACTED*** +embedding_model_name: text-embedding-3-small +generation_config: + temperature: 0.8 + max_tokens: 16192 + context_window: 32768 \ No newline at end of file diff --git a/evaluation/scripts/locomo/locomo_eval.py b/evaluation/scripts/locomo/locomo_eval.py index 25d2a847e..2718cc24a 100644 --- a/evaluation/scripts/locomo/locomo_eval.py +++ b/evaluation/scripts/locomo/locomo_eval.py @@ -7,6 +7,7 @@ import nltk import numpy as np +import tiktoken import transformers from bert_score import score as bert_score @@ -23,7 +24,7 @@ logging.basicConfig(level=logging.CRITICAL) transformers.logging.set_verbosity_error() - +encoding = tiktoken.get_encoding("cl100k_base") # Download necessary NLTK resources try: nltk.download("wordnet", quiet=True) @@ -173,7 +174,7 @@ def calculate_nlp_metrics(gold_answer, response, context, options=None): gold_answer = str(gold_answer) if gold_answer is not None else "" response = str(response) if response is not None else "" - metrics = {"context_tokens": len(nltk.word_tokenize(context)) if context else 0} + metrics = {"context_tokens": len(encoding.encode(context)) if context else 0} if "lexical" in options: gold_tokens = nltk.word_tokenize(gold_answer.lower()) @@ -363,11 +364,12 @@ async def limited_task(task): "--lib", type=str, choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api", "memobase"], + default="memos-api", ) parser.add_argument( "--version", type=str, - default="default", + default="0917-test", help="Version identifier for loading results (e.g., 1010)", ) parser.add_argument( @@ -376,9 +378,9 @@ async def limited_task(task): default=3, help="Number of times to run the LLM grader for each question", ) - parser.add_argument("--options", nargs="+", default=["lexical", "semantic"]) + parser.add_argument("--options", nargs="+", default=[]) parser.add_argument( - "--workers", type=int, default=4, help="Number of concurrent workers for processing groups" + "--workers", type=int, default=10, help="Number of concurrent workers for processing groups" ) args = parser.parse_args() diff --git a/evaluation/scripts/locomo/locomo_ingestion.py b/evaluation/scripts/locomo/locomo_ingestion.py index ae5e57c87..06604c233 100644 --- a/evaluation/scripts/locomo/locomo_ingestion.py +++ b/evaluation/scripts/locomo/locomo_ingestion.py @@ -1,85 +1,31 @@ +import asyncio import os import sys -import uuid - -sys.path.insert( - 0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -) -sys.path.insert( - 0, - os.path.join( - os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - ), - "evaluation", - "scripts", - ), +ROOT_DIR = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) +EVAL_SCRIPTS_DIR = os.path.join(ROOT_DIR, "evaluation", "scripts") + +sys.path.insert(0, ROOT_DIR) +sys.path.insert(0, EVAL_SCRIPTS_DIR) import argparse import concurrent.futures import json -import threading import time - from datetime import datetime, timezone - import pandas as pd - from dotenv import load_dotenv -from mem0 import MemoryClient -from memobase import ChatBlob -from tqdm import tqdm -from utils.client import memobase_client, memos_client -from zep_cloud.client import Zep - +from prompts import custom_instructions from memos.configs.mem_cube import GeneralMemCubeConfig from memos.configs.mem_os import MOSConfig from memos.mem_cube.general import GeneralMemCube from memos.mem_os.main import MOS -custom_instructions = """ -Generate personal memories that follow these guidelines: - -1. Each memory should be self-contained with complete context, including: - - The person's name, do not use "user" while creating memories - - Personal details (career aspirations, hobbies, life circumstances) - - Emotional states and reactions - - Ongoing journeys or future plans - - Specific dates when events occurred - -2. Include meaningful personal narratives focusing on: - - Identity and self-acceptance journeys - - Family planning and parenting - - Creative outlets and hobbies - - Mental health and self-care activities - - Career aspirations and education goals - - Important life events and milestones - -3. Make each memory rich with specific details rather than general statements - - Include timeframes (exact dates when possible) - - Name specific activities (e.g., "charity race for mental health" rather than just "exercise") - - Include emotional context and personal growth elements - -4. Extract memories only from user messages, not incorporating assistant responses - -5. Format each memory as a paragraph with a clear narrative structure that captures the person's experience, challenges, and aspirations -""" - - def get_client(frame: str, user_id: str | None = None, version: str = "default"): - if frame == "zep": - zep = Zep(api_key=os.getenv("ZEP_API_KEY"), base_url="https://api.getzep.com/api/v2") - return zep - - elif frame == "mem0" or frame == "mem0_graph": - mem0 = MemoryClient(api_key=os.getenv("MEM0_API_KEY")) - mem0.update_project(custom_instructions=custom_instructions) - return mem0 - - elif frame == "memos": + if frame == "memos": mos_config_path = "configs/mos_memos_config.json" with open(mos_config_path) as f: mos_config_data = json.load(f) @@ -110,37 +56,9 @@ def get_client(frame: str, user_id: str | None = None, version: str = "default") mem_cube_id=user_id, user_id=user_id, ) - return mos -def string_to_uuid(s: str, salt="memobase_client") -> str: - return str(uuid.uuid5(uuid.NAMESPACE_DNS, s + salt)) - - -def memobase_add_memory(user, message, retries=3): - for attempt in range(retries): - try: - _ = user.insert(ChatBlob(messages=message), sync=True) - return - except Exception as e: - if attempt < retries - 1: - time.sleep(1) - continue - else: - raise e - - -def memobase_add_memories_for_speaker(client, speaker, messages): - real_uid = string_to_uuid(speaker) - u = client.get_or_create_user(real_uid) - for i in range(0, len(messages), 2): - batch_messages = messages[i : i + 2] - memobase_add_memory(u, batch_messages) - print(f"[{i + 1}/{len(messages)}] Added messages for {speaker} successfully.") - u.flush(sync=True) - - def ingest_session(client, session, frame, version, metadata, revised_client=None): session_date = metadata["session_date"] date_format = "%I:%M %p on %d %B, %Y UTC" @@ -153,33 +71,11 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non print(f"Processing conv {conv_id}, session {metadata['session_key']}") start_time = time.time() - if frame == "zep": - for chat in tqdm(session, desc=f"{metadata['session_key']}"): - data = chat.get("speaker") + ": " + chat.get("text") - print({"context": data, "conv_id": conv_id, "created_at": iso_date}) - - # Check if the group exists, if not create it - groups = client.group.get_all_groups() - groups = dict(groups)["groups"] - exist_ids = [gp.group_id for gp in groups] - if conv_id not in exist_ids: - client.group.add(group_id=conv_id) - - # Add the message to the group - client.graph.add( - data=data, - type="message", - created_at=iso_date, - group_id=conv_id, - ) - - elif frame == "memos" or frame == "memos-api": + if frame == "memos" or frame == "memos-api": messages = [] messages_reverse = [] - - for chat in tqdm(session, desc=f"{metadata['session_key']}"): + for chat in session: data = chat.get("speaker") + ": " + chat.get("text") - if chat.get("speaker") == metadata["speaker_a"]: messages.append({"role": "user", "content": data, "chat_time": iso_date}) messages_reverse.append( @@ -193,22 +89,24 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non f"Unknown speaker {chat.get('speaker')} in session {metadata['session_key']}" ) - print({"context": data, "conv_id": conv_id, "created_at": iso_date}) - speaker_a_user_id = conv_id + "_speaker_a" speaker_b_user_id = conv_id + "_speaker_b" if frame == "memos-api": - client.add(messages=messages, user_id=f"{speaker_a_user_id.replace('_', '')}{version}") - - revised_client.add( - messages=messages_reverse, user_id=f"{speaker_b_user_id.replace('_', '')}{version}" + client.add( + messages=messages, + user_id=f"{speaker_a_user_id}_{version}", + conv_id=f"{conv_id}_{metadata['session_key']}", + ) + client.add( + messages=messages_reverse, + user_id=f"{speaker_b_user_id}_{version}", + conv_id=f"{conv_id}_{metadata['session_key']}", ) elif frame == "memos": client.add( messages=messages, user_id=speaker_a_user_id, ) - revised_client.add( messages=messages_reverse, user_id=speaker_b_user_id, @@ -216,13 +114,10 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non print(f"Added messages for {speaker_a_user_id} and {speaker_b_user_id} successfully.") elif frame == "mem0" or frame == "mem0_graph": - print(f"Processing abc for {metadata['session_key']}") messages = [] messages_reverse = [] - - for chat in tqdm(session, desc=f"{metadata['session_key']}"): + for chat in session: data = chat.get("speaker") + ": " + chat.get("text") - if chat.get("speaker") == metadata["speaker_a"]: messages.append({"role": "user", "content": data}) messages_reverse.append({"role": "assistant", "content": data}) @@ -234,8 +129,6 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non f"Unknown speaker {chat.get('speaker')} in session {metadata['session_key']}" ) - print({"context": data, "conv_id": conv_id, "created_at": iso_date}) - for i in range(0, len(messages), 2): batch_messages = messages[i : i + 2] batch_messages_reverse = messages_reverse[i : i + 2] @@ -272,13 +165,12 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non enable_graph=True, ) elif frame == "memobase": - print(f"Processing abc for {metadata['session_key']}") + from utils.memobase_utils import memobase_add_memory + messages = [] messages_reverse = [] - for chat in tqdm(session, desc=f"{metadata['session_key']}"): - data = chat.get("speaker") + ": " + chat.get("text") - + for chat in session: if chat.get("speaker") == metadata["speaker_a"]: messages.append( { @@ -318,30 +210,17 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non f"Unknown speaker {chat.get('speaker')} in session {metadata['session_key']}" ) - print({"context": data, "conv_id": conv_id, "created_at": iso_date}) - - thread_a = threading.Thread( - target=memobase_add_memories_for_speaker, - args=( - client, - metadata["speaker_a_user_id"], - messages, - ), - ) - - thread_b = threading.Thread( - target=memobase_add_memories_for_speaker, - args=( - client, - metadata["speaker_b_user_id"], - messages_reverse, - ), - ) - - thread_a.start() - thread_b.start() - thread_a.join() - thread_b.join() + users = client.get_all_users(limit=5000) + for u in users: + try: + if u["additional_fields"]["user_id"] == conv_id + "_speaker_a": + user_a = client.get_user(u["id"], no_get=True) + if u["additional_fields"]["user_id"] == conv_id + "_speaker_b": + user_b = client.get_user(u["id"], no_get=True) + except: + pass + memobase_add_memory(user_a, messages) + memobase_add_memory(user_b, messages_reverse) end_time = time.time() elapsed_time = round(end_time - start_time, 2) @@ -349,91 +228,86 @@ def ingest_session(client, session, frame, version, metadata, revised_client=Non return elapsed_time -def process_user(conv_idx, frame, locomo_df, version, num_workers=1): - try: - conversation = locomo_df["conversation"].iloc[conv_idx] - max_session_count = 35 - start_time = time.time() - total_session_time = 0 - valid_sessions = 0 - - revised_client = None - if frame == "zep": - client = get_client("zep") - elif frame == "mem0" or frame == "mem0_graph": - client = get_client(frame) - client.delete_all(user_id=f"locomo_exp_user_{conv_idx}") - client.delete_all(user_id=f"{conversation.get('speaker_a')}_{conv_idx}") - client.delete_all(user_id=f"{conversation.get('speaker_b')}_{conv_idx}") - elif frame == "memos": - conv_id = "locomo_exp_user_" + str(conv_idx) - speaker_a_user_id = conv_id + "_speaker_a" - speaker_b_user_id = conv_id + "_speaker_b" - client = get_client("memos", speaker_a_user_id, version) - revised_client = get_client("memos", speaker_b_user_id, version) - elif frame == "memos-api": - conv_id = "locomo_exp_user_" + str(conv_idx) - speaker_a_user_id = conv_id + "_speaker_a" - speaker_b_user_id = conv_id + "_speaker_b" - client = memos_client(mode="api") - revised_client = memos_client(mode="api") - elif frame == "memobase": - client = memobase_client() - conv_id = "locomo_exp_user_" + str(conv_idx) - speaker_a_user_id = conv_id + "_speaker_a" - speaker_b_user_id = conv_id + "_speaker_b" - client.delete_user(string_to_uuid(speaker_a_user_id)) - client.delete_user(string_to_uuid(speaker_b_user_id)) - sessions_to_process = [] - for session_idx in range(max_session_count): - session_key = f"session_{session_idx}" - session = conversation.get(session_key) - if session is None: - continue - - metadata = { - "session_date": conversation.get(f"session_{session_idx}_date_time") + " UTC", - "speaker_a": conversation.get("speaker_a"), - "speaker_b": conversation.get("speaker_b"), - "speaker_a_user_id": f"{conversation.get('speaker_a')}_{conv_idx}", - "speaker_b_user_id": f"{conversation.get('speaker_b')}_{conv_idx}", - "conv_idx": conv_idx, - "session_key": session_key, - } - sessions_to_process.append((session, metadata)) - valid_sessions += 1 - - print( - f"Processing {valid_sessions} sessions for user {conv_idx} with {num_workers} workers" - ) - with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = { - executor.submit( - ingest_session, client, session, frame, version, metadata, revised_client - ): metadata["session_key"] - for session, metadata in sessions_to_process - } +def process_user(conv_idx, frame, locomo_df, version): + conversation = locomo_df["conversation"].iloc[conv_idx] + max_session_count = 35 + start_time = time.time() + total_session_time = 0 + valid_sessions = 0 - for future in concurrent.futures.as_completed(futures): - session_key = futures[future] - try: - session_time = future.result() - total_session_time += session_time - print(f"User {conv_idx}, {session_key} processed in {session_time} seconds") - except Exception as e: - print(f"Error processing user {conv_idx}, session {session_key}: {e!s}") + revised_client = None + if frame == "mem0" or frame == "mem0_graph": + from mem0 import MemoryClient - end_time = time.time() - elapsed_time = round(end_time - start_time, 2) - print(f"User {conv_idx} processed successfully in {elapsed_time} seconds") + mem0 = MemoryClient(api_key=os.getenv("MEM0_API_KEY")) + mem0.update_project(custom_instructions=custom_instructions) + client.delete_all(user_id=f"locomo_exp_user_{conv_idx}") + client.delete_all(user_id=f"{conversation.get('speaker_a')}_{conv_idx}") + client.delete_all(user_id=f"{conversation.get('speaker_b')}_{conv_idx}") + elif frame == "memos": + conv_id = "locomo_exp_user_" + str(conv_idx) + speaker_a_user_id = conv_id + "_speaker_a" + speaker_b_user_id = conv_id + "_speaker_b" + client = get_client("memos", speaker_a_user_id, version) + revised_client = get_client("memos", speaker_b_user_id, version) + elif frame == "memos-api": + from utils.memos_api import MemOSAPI - return elapsed_time + client = MemOSAPI() + elif frame == "memobase": + from utils.client import memobase_client - except Exception as e: - return f"Error processing user {conv_idx}: {e!s}" + client = memobase_client() + conv_id = "locomo_exp_user_" + str(conv_idx) + speaker_a_user_id = conv_id + "_speaker_a" + speaker_b_user_id = conv_id + "_speaker_b" + all_users = client.get_all_users(limit=5000) + for user in all_users: + try: + if user["additional_fields"]["user_id"] in [speaker_a_user_id, speaker_b_user_id]: + client.delete_user(user["id"]) + print(f"🗑️ Deleted existing user from Memobase memory...") + except: + pass + memobase_user_id_a = client.add_user({"user_id": speaker_a_user_id}) + memobase_user_id_b = client.add_user({"user_id": speaker_b_user_id}) + user_id_a = memobase_user_id_a + user_id_b = memobase_user_id_b + + sessions_to_process = [] + for session_idx in range(max_session_count): + session_key = f"session_{session_idx}" + session = conversation.get(session_key) + if session is None: + continue + + metadata = { + "session_date": conversation.get(f"session_{session_idx}_date_time") + " UTC", + "speaker_a": conversation.get("speaker_a"), + "speaker_b": conversation.get("speaker_b"), + "speaker_a_user_id": f"{conversation.get('speaker_a')}_{conv_idx}", + "speaker_b_user_id": f"{conversation.get('speaker_b')}_{conv_idx}", + "conv_idx": conv_idx, + "session_key": session_key, + } + sessions_to_process.append((session, metadata)) + valid_sessions += 1 + + print(f"Processing {valid_sessions} sessions for user {conv_idx}") + + for session, metadata in sessions_to_process: + session_time = ingest_session(client, session, frame, version, metadata, revised_client) + total_session_time += session_time + print(f"User {conv_idx}, {metadata['session_key']} processed in {session_time} seconds") + end_time = time.time() + elapsed_time = round(end_time - start_time, 2) + print(f"User {conv_idx} processed successfully in {elapsed_time} seconds") + + return elapsed_time -def main(frame, version="default", num_workers=4): + +async def main(frame, version="default", num_workers=4): load_dotenv() locomo_df = pd.read_json("data/locomo/locomo10.json") @@ -445,24 +319,66 @@ def main(frame, version="default", num_workers=4): f"Starting processing for {num_users} users in serial mode, each user using {num_workers} workers for sessions..." ) - for user_id in range(num_users): - try: - result = process_user(user_id, frame, locomo_df, version, num_workers) - if isinstance(result, float): - total_time += result - else: - print(result) - except Exception as e: - print(f"Error processing user {user_id}: {e!s}") - - if num_users > 0: - average_time = total_time / num_users - minutes = int(average_time // 60) - seconds = int(average_time % 60) - average_time_formatted = f"{minutes} minutes and {seconds} seconds" - print( - f"The frame {frame} processed {num_users} users in average of {average_time_formatted} per user." - ) + if frame == "zep": + from zep_cloud.client import AsyncZep + + zep = AsyncZep(api_key=os.getenv("ZEP_API_KEY"), base_url="https://api.getzep.com/api/v2") + num_users = 10 + max_session_count = 35 + for group_idx in range(num_users): + conversation = locomo_df["conversation"].iloc[group_idx] + group_id = f"locomo_exp_user_{group_idx}" + print(group_id) + try: + await zep.group.add(group_id=group_id) + except Exception: + pass + + for session_idx in range(max_session_count): + session_key = f"session_{session_idx}" + print(session_key) + session = conversation.get(session_key) + if session is None: + continue + for msg in session: + session_date = conversation.get(f"session_{session_idx}_date_time") + " UTC" + date_format = "%I:%M %p on %d %B, %Y UTC" + date_string = datetime.strptime(session_date, date_format).replace( + tzinfo=timezone.utc + ) + iso_date = date_string.isoformat() + blip_caption = msg.get("blip_captions") + img_description = ( + f"(description of attached image: {blip_caption})" + if blip_caption is not None + else "" + ) + await zep.graph.add( + data=msg.get("speaker") + ": " + msg.get("text") + img_description, + type="message", + created_at=iso_date, + group_id=group_id, + ) + + else: + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [ + executor.submit(process_user, user_id, frame, locomo_df, version) + for user_id in range(num_users) + ] + + for future in concurrent.futures.as_completed(futures): + session_time = future.result() + total_time += session_time + + if num_users > 0: + average_time = total_time / num_users + minutes = int(average_time // 60) + seconds = int(average_time % 60) + average_time_formatted = f"{minutes} minutes and {seconds} seconds" + print( + f"The frame {frame} processed {num_users} users in average of {average_time_formatted} per user." + ) end_time = time.time() elapsed_time = round(end_time - start_time, 2) @@ -478,19 +394,20 @@ def main(frame, version="default", num_workers=4): "--lib", type=str, choices=["zep", "memos", "mem0", "mem0_graph", "memos-api", "memobase"], + default="memos-api", ) parser.add_argument( "--version", type=str, - default="default", + default="0917-test", help="Version identifier for saving results (e.g., 1010)", ) parser.add_argument( - "--workers", type=int, default=1, help="Number of parallel workers to process users" + "--workers", type=int, default=10, help="Number of parallel workers to process users" ) args = parser.parse_args() lib = args.lib version = args.version workers = args.workers - main(lib, version, workers) + asyncio.run(main(lib, version, workers)) diff --git a/evaluation/scripts/locomo/locomo_metric.py b/evaluation/scripts/locomo/locomo_metric.py index 8ee18faaf..de2fa48cc 100644 --- a/evaluation/scripts/locomo/locomo_metric.py +++ b/evaluation/scripts/locomo/locomo_metric.py @@ -10,11 +10,12 @@ "--lib", type=str, choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api", "memobase"], + default="memos-api", ) parser.add_argument( "--version", type=str, - default="default", + default="0917-test", help="Version identifier for loading results (e.g., 1010)", ) diff --git a/evaluation/scripts/locomo/locomo_responses.py b/evaluation/scripts/locomo/locomo_responses.py index 056b17163..eacf19171 100644 --- a/evaluation/scripts/locomo/locomo_responses.py +++ b/evaluation/scripts/locomo/locomo_responses.py @@ -24,7 +24,12 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str context=context, question=question, ) - elif frame == "memos": + elif frame == "memos" or frame == "memos-api": + prompt = ANSWER_PROMPT_MEMOS.format( + context=context, + question=question, + ) + elif frame == "memobase": prompt = ANSWER_PROMPT_MEMOS.format( context=context, question=question, @@ -112,7 +117,7 @@ async def main(frame, version="default"): os.makedirs("data", exist_ok=True) - print(all_responses) + # print(all_responses) with open(response_path, "w") as f: json.dump(all_responses, f, indent=2) @@ -125,11 +130,12 @@ async def main(frame, version="default"): "--lib", type=str, choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api", "memobase"], + default="memos-api", ) parser.add_argument( "--version", type=str, - default="default", + default="0917-test", help="Version identifier for loading results (e.g., 1010)", ) args = parser.parse_args() diff --git a/evaluation/scripts/locomo/locomo_search.py b/evaluation/scripts/locomo/locomo_search.py index e72f4594b..d44d9080d 100644 --- a/evaluation/scripts/locomo/locomo_search.py +++ b/evaluation/scripts/locomo/locomo_search.py @@ -1,21 +1,13 @@ import os import sys -import uuid - -sys.path.insert( - 0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -) -sys.path.insert( - 0, - os.path.join( - os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - ), - "evaluation", - "scripts", - ), +ROOT_DIR = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) +EVAL_SCRIPTS_DIR = os.path.join(ROOT_DIR, "evaluation", "scripts") + +sys.path.insert(0, ROOT_DIR) +sys.path.insert(0, EVAL_SCRIPTS_DIR) import argparse import json @@ -23,26 +15,24 @@ from collections import defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from time import time - import pandas as pd - from dotenv import load_dotenv -from mem0 import MemoryClient from tqdm import tqdm -from utils.client import memobase_client, memos_client from utils.memos_filters import filter_memory_data -from zep_cloud.client import Zep - from memos.configs.mem_os import MOSConfig from memos.mem_os.main import MOS def get_client(frame: str, user_id: str | None = None, version: str = "default", top_k: int = 20): if frame == "zep": + from zep_cloud.client import Zep + zep = Zep(api_key=os.getenv("ZEP_API_KEY"), base_url="https://api.getzep.com/api/v2") return zep elif frame == "mem0" or frame == "mem0_graph": + from mem0 import MemoryClient + mem0 = MemoryClient(api_key=os.getenv("MEM0_API_KEY")) return mem0 @@ -182,7 +172,6 @@ def mem0_search(client, query, speaker_a_user_id, speaker_b_user_id, top_k=20): speaker_2_memories=json.dumps(search_speaker_b_memory, indent=4), ) - print(query, context) duration_ms = (time() - start) * 1000 return context, duration_ms @@ -214,30 +203,25 @@ def memos_search(client, query, conv_id, speaker_a, speaker_b, reversed_client=N speaker_2_memories=speaker_b_context, ) - print(query, context) duration_ms = (time() - start) * 1000 return context, duration_ms -def memos_api_search( - client, query, conv_id, speaker_a, speaker_b, top_k, version, reversed_client=None -): +def memos_api_search(client, query, conv_id, speaker_a, speaker_b, top_k, version): start = time() - speaker_a_user_id = conv_id + "_speaker_a" search_a_results = client.search( - query=query, user_id=f"{speaker_a_user_id.replace('_', '')}{version}", top_k=top_k + query=query, user_id=f"{conv_id}_speaker_a_{version}", top_k=top_k ) speaker_a_context = "" - for item in search_a_results: - speaker_a_context += f"{item}\n" + for item in search_a_results["memory_detail_list"]: + speaker_a_context += f"{item['memory_value']}\n" - speaker_b_user_id = conv_id + "_speaker_b" - search_b_results = reversed_client.search( - query=query, user_id=f"{speaker_b_user_id.replace('_', '')}{version}", top_k=top_k + search_b_results = client.search( + query=query, user_id=f"{conv_id}_speaker_b_{version}", top_k=top_k ) speaker_b_context = "" - for item in search_b_results: - speaker_b_context += f"{item}\n" + for item in search_b_results["memory_detail_list"]: + speaker_b_context += f"{item['memory_value']}\n" context = TEMPLATE_MEMOS.format( speaker_1=speaker_a, @@ -246,7 +230,6 @@ def memos_api_search( speaker_2_memories=speaker_b_context, ) - print(query, context) duration_ms = (time() - start) * 1000 return context, duration_ms @@ -323,7 +306,6 @@ def mem0_graph_search(client, query, speaker_a_user_id, speaker_b_user_id, top_k speaker_2_memories=json.dumps(search_speaker_b_memory, indent=4), speaker_2_graph_memories=json.dumps(search_speaker_b_graph, indent=4), ) - print(query, context) duration_ms = (time() - start) * 1000 return context, duration_ms @@ -360,11 +342,12 @@ def zep_search(client, query, group_id, top_k=20): def memobase_search( client, query, speaker_a, speaker_b, speaker_a_user_id, speaker_b_user_id, top_k=20 ): - start = time() - speaker_a_memories = memobase_search_memory( + from utils.memobase_utils import memobase_search_memory + + speaker_a_memories, t1 = memobase_search_memory( client, speaker_a_user_id, query, max_memory_context_size=top_k * 100 ) - speaker_b_memories = memobase_search_memory( + speaker_b_memories, t2 = memobase_search_memory( client, speaker_b_user_id, query, max_memory_context_size=top_k * 100 ) context = TEMPLATE_MEMOBASE.format( @@ -374,38 +357,8 @@ def memobase_search( speaker_2_user_id=speaker_b, speaker_2_memories=speaker_b_memories, ) - print(query, context) - duration_ms = (time() - start) * 1000 - return (context, duration_ms) - - -def string_to_uuid(s: str, salt="memobase_client") -> str: - return str(uuid.uuid5(uuid.NAMESPACE_DNS, s + salt)) - - -def memobase_search_memory( - client, user_id, query, max_memory_context_size, max_retries=3, retry_delay=1 -): - retries = 0 - real_uid = string_to_uuid(user_id) - u = client.get_user(real_uid, no_get=True) - - while retries < max_retries: - try: - memories = u.context( - max_token_size=max_memory_context_size, - chats=[{"role": "user", "content": query}], - event_similarity_threshold=0.2, - fill_window_with_events=True, - ) - return memories - except Exception as e: - print(f"Error during memory search: {e}") - print("Retrying...") - retries += 1 - if retries >= max_retries: - raise e - time.sleep(retry_delay) + duration_ms = t1 + t2 + return context, duration_ms def search_query(client, query, metadata, frame, version, reversed_client=None, top_k=20): @@ -417,6 +370,7 @@ def search_query(client, query, metadata, frame, version, reversed_client=None, if frame == "zep": context, duration_ms = zep_search(client, query, conv_id, top_k) + # sleep(0.1) elif frame == "mem0": context, duration_ms = mem0_search( client, query, speaker_a_user_id, speaker_b_user_id, top_k @@ -427,13 +381,15 @@ def search_query(client, query, metadata, frame, version, reversed_client=None, ) elif frame == "memos": context, duration_ms = memos_search( - client, query, conv_id, speaker_a, speaker_b, version, reversed_client + client, query, conv_id, speaker_a, speaker_b, reversed_client ) elif frame == "memos-api": context, duration_ms = memos_api_search( - client, query, conv_id, speaker_a, speaker_b, top_k, version, reversed_client + client, query, conv_id, speaker_a, speaker_b, top_k, version ) elif frame == "memobase": + speaker_a_user_id = conv_id + "_speaker_a" + speaker_b_user_id = conv_id + "_speaker_b" context, duration_ms = memobase_search( client, query, speaker_a, speaker_b, speaker_a_user_id, speaker_b_user_id, top_k ) @@ -484,13 +440,12 @@ def process_user(group_idx, locomo_df, frame, version, top_k=20, num_workers=1): client = get_client(frame, speaker_a_user_id, version, top_k=top_k) reversed_client = get_client(frame, speaker_b_user_id, version, top_k=top_k) elif frame == "memos-api": - speaker_a_user_id = conv_id + "_speaker_a" - speaker_b_user_id = conv_id + "_speaker_b" - client = memos_client(mode="api") - reversed_client = memos_client(mode="api") - client.user_register(user_id=f"{speaker_a_user_id.replace('_', '')}{version}") - reversed_client.user_register(user_id=f"{speaker_b_user_id.replace('_', '')}{version}") + from utils.memos_api import MemOSAPI + + client = MemOSAPI() elif frame == "memobase": + from utils.client import memobase_client + client = memobase_client() else: client = get_client(frame, conv_id, version) @@ -518,16 +473,6 @@ def process_qa(qa): ): result = future.result() if result: - context_preview = ( - result["context"][:20] + "..." if result["context"] else "No context" - ) - print( - { - "query": result["query"], - "context": context_preview, - "duration_ms": result["duration_ms"], - } - ) search_results[conv_id].append(result) os.makedirs(f"results/locomo/{frame}-{version}/tmp/", exist_ok=True) @@ -549,13 +494,13 @@ def main(frame, version="default", num_workers=1, top_k=20): all_search_results = defaultdict(list) for idx in range(num_users): - try: - print(f"Processing user {idx}...") - user_results = process_user(idx, locomo_df, frame, version, top_k, num_workers) - for conv_id, results in user_results.items(): - all_search_results[conv_id].extend(results) - except Exception as e: - print(f"User {idx} generated an exception: {e}") + # try: + print(f"Processing user {idx}...") + user_results = process_user(idx, locomo_df, frame, version, top_k, num_workers) + for conv_id, results in user_results.items(): + all_search_results[conv_id].extend(results) + # except Exception as e: + # print(f"User {idx} generated an exception: {e}") with open(f"results/locomo/{frame}-{version}/{frame}_locomo_search_results.json", "w") as f: json.dump(dict(all_search_results), f, indent=2) @@ -568,15 +513,16 @@ def main(frame, version="default", num_workers=1, top_k=20): "--lib", type=str, choices=["zep", "memos", "mem0", "mem0_graph", "memos-api", "memobase"], + default="memos-api", ) parser.add_argument( "--version", type=str, - default="default", + default="0917-test", help="Version identifier for saving results (e.g., 1010)", ) parser.add_argument( - "--workers", type=int, default=1, help="Number of parallel workers to process users" + "--workers", type=int, default=10, help="Number of parallel workers to process users" ) parser.add_argument( "--top_k", type=int, default=20, help="Number of results to retrieve in search queries" diff --git a/evaluation/scripts/locomo/prompts.py b/evaluation/scripts/locomo/prompts.py index 9e080ec0f..02c6af1d8 100644 --- a/evaluation/scripts/locomo/prompts.py +++ b/evaluation/scripts/locomo/prompts.py @@ -38,26 +38,29 @@ ANSWER_PROMPT_ZEP = """ - You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories. - # CONTEXT: - You have access to memories from a conversation. These memories contain - timestamped information that may be relevant to answering the question. + You have access to facts and entities from a conversation. # INSTRUCTIONS: 1. Carefully analyze all provided memories 2. Pay special attention to the timestamps to determine the answer 3. If the question asks about a specific event or fact, look for direct evidence in the memories 4. If the memories contain contradictory information, prioritize the most recent memory - 5. If there is a question about time references (like "last year", "two months ago", etc.), - calculate the actual date based on the memory timestamp. For example, if a memory from - 4 May 2022 mentions "went to India last year," then the trip occurred in 2021. - 6. Always convert relative time references to specific dates, months, or years. For example, - convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory - timestamp. Ignore the reference while answering the question. - 7. Focus only on the content of the memories. Do not confuse character - names mentioned in memories with the actual users who created those memories. - 8. The answer should be less than 5-6 words. + 5. Always convert relative time references to specific dates, months, or years. + 6. Be as specific as possible when talking about people, places, and events + 7. Timestamps in memories represent the actual time the event occurred, not the time the event was mentioned in a message. + + Clarification: + When interpreting memories, use the timestamp to determine when the described event happened, not when someone talked about the event. + + Example: + + Memory: (2023-03-15T16:33:00Z) I went to the vet yesterday. + Question: What day did I go to the vet? + Correct Answer: March 15, 2023 + Explanation: + Even though the phrase says "yesterday," the timestamp shows the event was recorded as happening on March 15th. Therefore, the actual vet visit happened on that date, regardless of the word "yesterday" in the text. + # APPROACH (Think step by step): 1. First, examine all memories that contain information related to the question @@ -73,8 +76,7 @@ {context} Question: {question} - Answer: - """ + Answer:""" ANSWER_PROMPT_MEMOS = """ You are a knowledgeable and helpful AI assistant. @@ -108,3 +110,31 @@ Answer: """ + +custom_instructions = """ +Generate personal memories that follow these guidelines: + +1. Each memory should be self-contained with complete context, including: + - The person's name, do not use "user" while creating memories + - Personal details (career aspirations, hobbies, life circumstances) + - Emotional states and reactions + - Ongoing journeys or future plans + - Specific dates when events occurred + +2. Include meaningful personal narratives focusing on: + - Identity and self-acceptance journeys + - Family planning and parenting + - Creative outlets and hobbies + - Mental health and self-care activities + - Career aspirations and education goals + - Important life events and milestones + +3. Make each memory rich with specific details rather than general statements + - Include timeframes (exact dates when possible) + - Name specific activities (e.g., "charity race for mental health" rather than just "exercise") + - Include emotional context and personal growth elements + +4. Extract memories only from user messages, not incorporating assistant responses + +5. Format each memory as a paragraph with a clear narrative structure that captures the person's experience, challenges, and aspirations +""" diff --git a/evaluation/scripts/longmemeval/lme_eval.py b/evaluation/scripts/longmemeval/lme_eval.py index 384f595be..329194e36 100644 --- a/evaluation/scripts/longmemeval/lme_eval.py +++ b/evaluation/scripts/longmemeval/lme_eval.py @@ -8,6 +8,7 @@ import nltk import numpy as np +import tiktoken import transformers from bert_score import score as bert_score @@ -25,7 +26,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils.prompts import LME_JUDGE_MODEL_TEMPLATE - +encoding = tiktoken.get_encoding("cl100k_base") logging.basicConfig(level=logging.CRITICAL) transformers.logging.set_verbosity_error() @@ -133,7 +134,7 @@ def calculate_nlp_metrics(golden_answer, response, context, options=None): response = str(response) if response is not None else "" context = str(context) if context is not None else "" - metrics = {"context_tokens": len(nltk.word_tokenize(context)) if context else 0} + metrics = {"context_tokens": len(encoding.encode(context)) if context else 0} if "lexical" in options: gold_tokens = nltk.word_tokenize(golden_answer.lower()) @@ -199,7 +200,7 @@ async def process_qa( ) print("\n" + "=" * 80) - print(f"🔍 Processed User: \033[1m{user_id}\033[0m") + print(f"🔍 Processed User: {user_id}") print("-" * 80) print(f"❓ Question: \n {question}") print("-" * 80) @@ -218,7 +219,7 @@ async def process_qa( judgments_formatted = [] for run, correct in judgments_dict.items(): - status = "\033[92m✓ CORRECT\033[0m" if correct else "\033[91m✗ WRONG\033[0m" + status = "✓ CORRECT" if correct else "✗ WRONG" judgments_formatted.append(f"{run}: {status}") print(f"⚖️ Judgments: \n {', '.join(judgments_formatted)}") @@ -309,25 +310,21 @@ async def main(frame, version, nlp_options, num_runs=3, num_workers=5): run_scores, evaluated_count = evaluate_accuracy(lme_eval_results, num_runs) print("\n" + "=" * 80) - print("\033[1;36m📊 EVALUATION SUMMARY\033[0m".center(80)) + print("📊 EVALUATION SUMMARY".center(80)) print("=" * 80) if evaluated_count > 0: - print( - f"📋 \033[1mEvaluated:\033[0m \033[93m{evaluated_count}\033[0m responses across \033[93m{num_runs}\033[0m runs" - ) - print( - f"🎯 \033[1mLLM-as-a-Judge Mean Accuracy:\033[0m \033[92m{np.mean(run_scores):.4f}\033[0m" - ) - print(f"🔍 \033[1mStandard Deviation:\033[0m \033[93m{np.std(run_scores):.4f}\033[0m") + print(f"📋 Evaluated: {evaluated_count} responses across {num_runs} runs") + print(f"🎯 LLM-as-a-Judge Mean Accuracy: {np.mean(run_scores):.4f}") + print(f"🔍 Standard Deviation: {np.std(run_scores):.4f}") - run_scores_formatted = [f"\033[94m{round(s, 4):.4f}\033[0m" for s in run_scores] - print(f"🔢 \033[1mIndividual run scores:\033[0m [{', '.join(run_scores_formatted)}]") + run_scores_formatted = [f"{round(s, 4):.4f}" for s in run_scores] + print(f"🔢 Individual run scores: [{', '.join(run_scores_formatted)}]") else: - print("\033[91m⚠️ No responses were evaluated. LLM-as-a-Judge score: N/A (0/0)\033[0m") + print("⚠️ No responses were evaluated. LLM-as-a-Judge score: N/A (0/0)") if error_count > 0: - print(f"\033[91m⚠️ Encountered {error_count} errors during processing\033[0m") + print(f"⚠️ Encountered {error_count} errors during processing") print("-" * 80) @@ -336,8 +333,8 @@ async def main(frame, version, nlp_options, num_runs=3, num_workers=5): with open(judged_path, "w") as file: json.dump(lme_eval_results, file, indent=4) - print("\033[92m✅ Evaluation completed successfully!\033[0m") - print(f"📁 Results saved to: \033[1;94m{judged_path}\033[0m") + print("✅ Evaluation completed successfully!") + print(f"📁 Results saved to: {judged_path}") print("=" * 80 + "\n") @@ -347,23 +344,24 @@ async def main(frame, version, nlp_options, num_runs=3, num_workers=5): "--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "zep", "memos-api", "zep", "memobase"], + default="memos-api", ) parser.add_argument( - "--version", type=str, default="v1", help="Version of the evaluation framework." + "--version", type=str, default="0923", help="Version of the evaluation framework." ) parser.add_argument( "--options", type=str, nargs="+", - default=["lexical", "semantic"], + default=["lexical"], choices=["lexical", "semantic"], help="NLP options to use for evaluation.", ) parser.add_argument( - "--num_runs", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + "--num_runs", type=int, default=1, help="Number of runs for LLM-as-a-Judge evaluation." ) parser.add_argument( - "--workers", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + "--workers", type=int, default=30, help="Number of runs for LLM-as-a-Judge evaluation." ) args = parser.parse_args() diff --git a/evaluation/scripts/longmemeval/lme_ingestion.py b/evaluation/scripts/longmemeval/lme_ingestion.py index f2df0bd30..6d8275427 100644 --- a/evaluation/scripts/longmemeval/lme_ingestion.py +++ b/evaluation/scripts/longmemeval/lme_ingestion.py @@ -2,7 +2,6 @@ import os import sys - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone @@ -10,9 +9,6 @@ import pandas as pd from tqdm import tqdm -from utils.client import mem0_client, memobase_client, memos_client, zep_client -from utils.memobase_utils import memobase_add_memory, string_to_uuid -from zep_cloud.types import Message def ingest_session(session, date, user_id, session_id, frame, client): @@ -20,7 +16,7 @@ def ingest_session(session, date, user_id, session_id, frame, client): if frame == "zep": for idx, msg in enumerate(session): print( - f"\033[90m[{frame}]\033[0m 📝 User \033[1;94m{user_id}\033[0m 💬 Session \033[1;94m{session_id}\033[0m: [\033[93m{idx + 1}/{len(session)}\033[0m] Ingesting message: \033[1m{msg['role']}\033[0m - \033[96m{msg['content'][:50]}...\033[0m at \033[92m{date.isoformat()}\033[0m" + f"[{frame}] 📝 User {user_id} 💬 Session {session_id}: [{idx + 1}/{len(session)}] Ingesting message: {msg['role']} - {msg['content'][:50]}... at {date.isoformat()}" ) client.memory.add( session_id=session_id, @@ -37,7 +33,7 @@ def ingest_session(session, date, user_id, session_id, frame, client): for idx, msg in enumerate(session): messages.append({"role": msg["role"], "content": msg["content"][:8000]}) print( - f"\033[90m[{frame}]\033[0m 📝 Session \033[1;94m{session_id}\033[0m: [\033[93m{idx + 1}/{len(session)}\033[0m] Reading message: \033[1m{msg['role']}\033[0m - \033[96m{msg['content'][:50]}...\033[0m at \033[92m{date.isoformat()}\033[0m" + f"[{frame}] 📝 Session {session_id}: [{idx + 1}/{len(session)}] Reading message: {msg['role']} - {msg['content'][:50]}... at {date.isoformat()}" ) if frame == "mem0-local": client.add( @@ -52,7 +48,7 @@ def ingest_session(session, date, user_id, session_id, frame, client): version="v2", ) print( - f"\033[90m[{frame}]\033[0m ✅ Session \033[1;94m{session_id}\033[0m: Ingested \033[93m{len(messages)}\033[0m messages at \033[92m{date.isoformat()}\033[0m" + f"[{frame}] ✅ Session {session_id}: Ingested {len(messages)} messages at {date.isoformat()}" ) elif frame == "memobase": for idx, msg in enumerate(session): @@ -63,17 +59,11 @@ def ingest_session(session, date, user_id, session_id, frame, client): "created_at": date.isoformat(), } ) - print( - f"\033[90m[{frame}]\033[0m 📝 User \033[1;94m{user_id}\033[0m 💬 Session \033[1;94m{session_id}\033[0m: [\033[93m{idx + 1}/{len(session)}\033[0m] Ingesting message: \033[1m{msg['role']}\033[0m - \033[96m{msg['content'][:50]}...\033[0m at \033[92m{date.isoformat()}\033[0m" - ) + # print(f"[{frame}] 📝 User {user_id} 💬 Session {session_id}: [{idx + 1}/{len(session)}] Ingesting message: {msg['role']} - {msg['content'][:50]}... at {date.isoformat()}") - real_uid = string_to_uuid(user_id) - user = client.get_user(real_uid) + user = client.get_user(user_id) memobase_add_memory(user, messages) - user.flash(sync=True) - print( - f"\033[90m[{frame}]\033[0m ✅ Session \033[1;94m{session_id}\033[0m: Ingested \033[93m{len(messages)}\033[0m messages at \033[92m{date.isoformat()}\033[0m" - ) + # print(f"[{frame}] ✅ Session {session_id}: Ingested {len(messages)} messages at {date.isoformat()}") elif frame == "memos-local" or frame == "memos-api": for _idx, msg in enumerate(session): messages.append( @@ -83,47 +73,46 @@ def ingest_session(session, date, user_id, session_id, frame, client): "chat_time": date.isoformat(), } ) - client.add(messages=messages, user_id=user_id) + if frame == "memos-local": + client.add(messages=messages, user_id=user_id) + client.mem_reorganizer_wait() + elif frame == "memos-api": + if messages: + client.add(messages=messages, user_id=user_id, conv_id=session_id) print( - f"\033[90m[{frame}]\033[0m ✅ Session \033[1;94m{session_id}\033[0m: Ingested \033[93m{len(messages)}\033[0m messages at \033[92m{date.isoformat()}\033[0m" + f"[{frame}] ✅ Session {session_id}: Ingested {len(messages)} messages at {date.isoformat()}" ) - client.mem_reorganizer_wait() -def ingest_conv(lme_df, version, conv_idx, frame): +def ingest_conv(lme_df, version, conv_idx, frame, success_records, f): conversation = lme_df.iloc[conv_idx] sessions = conversation["haystack_sessions"] dates = conversation["haystack_dates"] - user_id = "lme_exper_user_" + str(conv_idx) + user_id = f"lme_exper_user_{version}_{conv_idx}" print("\n" + "=" * 80) - print(f"🔄 \033[1;36mINGESTING CONVERSATION {conv_idx}\033[0m".center(80)) + print(f"🔄 [INGESTING CONVERSATION {conv_idx}".center(80)) print("=" * 80) if frame == "zep": + from utils.client import zep_client + client = zep_client() - print("🔌 \033[1mUsing \033[94mZep client\033[0m \033[1mfor ingestion...\033[0m") - # Delete existing user and session if they exist - client.user.delete(user_id) - print(f"🗑️ Deleted existing user \033[93m{user_id}\033[0m from Zep memory...") - # Add user to Zep memory client.user.add(user_id=user_id) - print(f"➕ Added user \033[93m{user_id}\033[0m to Zep memory...") + print(f"➕ Added user {user_id} to Zep memory...") elif frame == "mem0-local": + from utils.client import mem0_client + client = mem0_client(mode="local") - print("🔌 \033[1mUsing \033[94mMem0 Local client\033[0m \033[1mfor ingestion...\033[0m") - # Delete existing memories for the user - client.delete_all(user_id=user_id) - print(f"🗑️ Deleted existing memories for user \033[93m{user_id}\033[0m...") elif frame == "mem0-api": + from utils.client import mem0_client + client = mem0_client(mode="api") - print("🔌 \033[1mUsing \033[94mMem0 API client\033[0m \033[1mfor ingestion...\033[0m") - # Delete existing memories for the user - client.delete_all(user_id=user_id) - print(f"🗑️ Deleted existing memories for user \033[93m{user_id}\033[0m...") elif frame == "memos-local": + from utils.client import memos_client + client = memos_client( mode="local", db_name=f"lme_{frame}-{version}", @@ -134,64 +123,73 @@ def ingest_conv(lme_df, version, conv_idx, frame): mem_os_config_path="configs/mos_memos_config.json", addorsearch="add", ) - print("🔌 \033[1mUsing \033[94mMemos Local client\033[0m \033[1mfor ingestion...\033[0m") + print("🔌 Using Memos Local client for ingestion...") elif frame == "memos-api": - client = memos_client(mode="api") + from utils.memos_api import MemOSAPI + + client = MemOSAPI() elif frame == "memobase": + from utils.client import memobase_client + client = memobase_client() - print("🔌 \033[1mUsing \033[94mMemobase client\033[0m \033[1mfor ingestion...\033[0m") - client.delete_user(string_to_uuid(user_id)) - print(f"🗑️ Deleted existing user \033[93m{user_id}\033[0m from Memobase memory...") + memobase_user_id = client.add_user({"user_id": user_id}) + user_id = memobase_user_id for idx, session in enumerate(sessions): - session_id = user_id + "_lme_exper_session_" + str(idx) - if frame == "zep": - client.memory.add_session( - user_id=user_id, - session_id=session_id, - ) - print( - f"➕ Added session \033[93m{session_id}\033[0m for user \033[93m{user_id}\033[0m to Zep memory..." - ) - - if len(session) == 0: - print(f"\033[93m⚠️ Skipping empty session {idx} in conversation {conv_idx}\033[0m") - continue + if f"{conv_idx}_{idx}" not in success_records: + session_id = user_id + "_lme_exper_session_" + str(idx) + if frame == "zep": + client.memory.add_session( + user_id=user_id, + session_id=session_id, + ) + date = dates[idx] + " UTC" + date_format = "%Y/%m/%d (%a) %H:%M UTC" + date_string = datetime.strptime(date, date_format).replace(tzinfo=timezone.utc) - date = dates[idx] + " UTC" - date_format = "%Y/%m/%d (%a) %H:%M UTC" - date_string = datetime.strptime(date, date_format).replace(tzinfo=timezone.utc) - - try: - ingest_session(session, date_string, user_id, session_id, frame, client) - except Exception as e: - print(f"\033[91m❌ Error ingesting session: {e}\033[0m") + try: + ingest_session(session, date_string, user_id, session_id, frame, client) + f.write(f"{conv_idx}_{idx}\n") + f.flush() + except Exception as e: + print(f"❌ Error ingesting session: {e}") + else: + print(f"✅ Session {conv_idx}_{idx} already ingested") if frame == "memos-local": client.mem_reorganizer_off() + print("=" * 80) def main(frame, version, num_workers=2): print("\n" + "=" * 80) - print(f"🚀 \033[1;36mLONGMEMEVAL INGESTION - {frame.upper()} v{version}\033[0m".center(80)) + print(f"🚀 LONGMEMEVAL INGESTION - {frame.upper()} v{version}".center(80)) print("=" * 80) lme_df = pd.read_json("data/longmemeval/longmemeval_s.json") - print( - "📚 \033[1mLoaded LongMemeval dataset\033[0m from \033[94mdata/longmemeval/longmemeval_s.json\033[0m" - ) + print("📚 Loaded LongMemeval dataset from data/longmemeval/longmemeval_s.json") num_multi_sessions = len(lme_df) - print(f"👥 Number of users: \033[93m{num_multi_sessions}\033[0m") + print(f"👥 Number of users: {num_multi_sessions}") print("-" * 80) start_time = datetime.now() + os.makedirs(f"results/lme/{frame}-{version}/", exist_ok=True) + success_records = [] + record_file = f"results/lme/{frame}-{version}/success_records.txt" + if os.path.exists(record_file): + for i in open(record_file, "r").readlines(): + success_records.append(i.strip()) + + f = open(record_file, "a+") with ThreadPoolExecutor(max_workers=num_workers) as executor: futures = [] for session_idx in range(num_multi_sessions): - future = executor.submit(ingest_conv, lme_df, version, session_idx, frame) + future = executor.submit( + ingest_conv, lme_df, version, session_idx, frame, success_records, f + ) futures.append(future) for future in tqdm( @@ -200,21 +198,17 @@ def main(frame, version, num_workers=2): try: future.result() except Exception as e: - print(f"\033[91m❌ Error processing conversation: {e}\033[0m") + print(f"❌ Error processing conversation: {e}") end_time = datetime.now() elapsed_time = end_time - start_time elapsed_time_str = str(elapsed_time).split(".")[0] print("\n" + "=" * 80) - print("✅ \033[1;32mINGESTION COMPLETE\033[0m".center(80)) + print("✅ INGESTION COMPLETE".center(80)) print("=" * 80) - print( - f"⏱️ Total time taken to ingest \033[93m{num_multi_sessions}\033[0m multi-sessions: \033[92m{elapsed_time_str}\033[0m" - ) - print( - f"🔄 Framework: \033[94m{frame}\033[0m | Version: \033[94m{version}\033[0m | Workers: \033[94m{num_workers}\033[0m" - ) + print(f"⏱️ Total time taken to ingest {num_multi_sessions} multi-sessions: {elapsed_time_str}") + print(f"🔄 Framework: {frame} | Version: {version} | Workers: {num_workers}") print("=" * 80 + "\n") @@ -224,12 +218,13 @@ def main(frame, version, num_workers=2): "--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "memos-api", "zep", "memobase"], + default="memos-api", ) parser.add_argument( - "--version", type=str, default="v1", help="Version of the evaluation framework." + "--version", type=str, default="0924", help="Version of the evaluation framework." ) parser.add_argument( - "--workers", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + "--workers", type=int, default=20, help="Number of runs for LLM-as-a-Judge evaluation." ) args = parser.parse_args() diff --git a/evaluation/scripts/longmemeval/lme_metric.py b/evaluation/scripts/longmemeval/lme_metric.py index 69f7748e0..c9e25ae86 100644 --- a/evaluation/scripts/longmemeval/lme_metric.py +++ b/evaluation/scripts/longmemeval/lme_metric.py @@ -259,9 +259,10 @@ def calculate_scores(data, grade_path, output_path): "--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "memos-api", "zep", "memobase"], + default="memos-api", ) parser.add_argument( - "--version", type=str, default="v1", help="Version of the evaluation framework." + "--version", type=str, default="0923", help="Version of the evaluation framework." ) args = parser.parse_args() lib, version = args.lib, args.version diff --git a/evaluation/scripts/longmemeval/lme_mirix.py b/evaluation/scripts/longmemeval/lme_mirix.py new file mode 100644 index 000000000..923d5752b --- /dev/null +++ b/evaluation/scripts/longmemeval/lme_mirix.py @@ -0,0 +1,103 @@ +import json +import os +import sys +import time +import traceback + +import pandas as pd +from tqdm import tqdm + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from utils.mirix_utils import get_mirix_client + +if __name__ == "__main__": + config_path = "./configs/mirix_config.yaml" + lme_df = pd.read_json("./data/longmemeval/longmemeval_s.json") + success_results = [] + success_qid = [] + + if os.path.exists("./results/lme/mirix_lme_test_result.json"): + results = [ + json.loads(i) for i in open("./results/lme/mirix_lme_test_result.json", "r").readlines() + ] + with open("./results/lme/mirix_lme_test_result.json", "w") as f: + for res in results: + if res["response"] and "ERROR" not in res["response"]: + f.write(json.dumps(res, ensure_ascii=False) + "\n") + success_qid.append(res["question_id"]) + + f = open("./results/lme/mirix_lme_test_result.json", "a+") + assistant = get_mirix_client(config_path) + + def process_one(index): + conversation = lme_df.iloc[index] + sessions = conversation["haystack_sessions"] + dates = conversation["haystack_dates"] + q = conversation["question"] + a = conversation["answer"] + q_date = conversation["question_date"] + q_type = conversation["question_type"] + assistant.create_user(user_name=f"lme_user_{index}") + user = assistant.get_user_by_name(user_name=f"lme_user_{index}") + + for session, date in tqdm(zip(sessions, dates)): + if session: + t = time.time() + message = f"Conversation happened at {date}:\n\n" + for turn in session: + message += turn["role"] + ": " + turn["content"] + message += "\n" + assistant.add(message, user_id=user.id) + print(f"add one session time: {time.time() - t}") + + assistant._agent.update_core_memory_persona( + "Is a helpful assistant that answers questions with extreme conciseness.\nIs persistent and tries to find " + "the answerr using different queries and different search methods. Never uses unnecessary words or repeats " + "the question in the answer. Always provides the shortest answer possible and tries to utter the fewest words possible." + ) + + response = assistant.chat( + f"""You will be given a question and you need to answer the question based on the memories. + # APPROACH (Think step by step): + 1. First, search and check the memories that might contain information related to the question. + 2. Examine the timestamps and content of these memories carefully. + 3. Look for explicit mentions of dates, times, locations, or events that answer the question. + 4. If the answer requires calculation (e.g., converting relative time references), show your work. + 5. Formulate a precise, concise answer based solely on the evidence in the memories. + 6. Double-check that your answer directly addresses the question asked. + 7. Ensure your final answer is specific and avoids vague time references like "yesterday", "last year" but with specific dates. + 8. The answer should be as brief as possible, you should **only state the answer** WITHOUT repeating the question. For example, if asked 'When did Mary go to the store?', you should simply answer 'June 1st'. Do NOT say 'Mary went to the store on June 1st' which is redundant and strictly forbidden. Your answer should be as short as possible. + Current Date: {q_date} + Question: {q}""", + user_id=user.id, + ) + res = conversation.to_dict() + res["response"] = response + res["index"] = index + assistant.save(f"./results/lme/mirix_lme_test_{index}") + return res + + # for i in tqdm(range(len(lme_df))): + for i in tqdm(range(int(sys.argv[1]), int(sys.argv[2]))): + if lme_df.iloc[i]["question_id"] not in success_qid: + try: + res = process_one(i) + f.write(json.dumps(res, ensure_ascii=False) + "\n") + f.flush() + except Exception as exc: + traceback.print_exc() + print(f"❌ Error processing {exc}") + + # import json + # res = {} + # for i in open('./results/lme/mirix_lme_test_result.json').readlines(): + # line = json.loads(i) + # res[line['question_id']] = {'question': line['question'], + # 'golden_answer': line['answer'], + # 'context': '', + # 'answer': line['response'], + # 'category': line['question_type'], + # 'response_duration_ms': 0, + # 'search_duration_ms': 0 + # } + # json.dump(res, open('./results/lme/mirix-0905/mirix_lme_responses.json', 'w'), indent=2) diff --git a/evaluation/scripts/longmemeval/lme_responses.py b/evaluation/scripts/longmemeval/lme_responses.py index e1e341826..da61f23fc 100644 --- a/evaluation/scripts/longmemeval/lme_responses.py +++ b/evaluation/scripts/longmemeval/lme_responses.py @@ -45,14 +45,10 @@ def process_qa(user_id, search_result, llm_client): response_duration_ms = (time() - start) * 1000 print("\n" + "-" * 80) - print(f"🤖 Processed User: \033[1m{user_id}\033[0m") - print(f"⏱️ Duration: \033[92m{response_duration_ms:.2f} ms\033[0m") - print(f"❓ Question: \033[93m{question}\033[0m") - print( - f"💬 Answer: \033[96m{anwer[:150]}...\033[0m" - if len(anwer) > 150 - else f"💬 Answer: \033[96m{anwer}\033[0m" - ) + print(f"🤖 Processed User: {user_id}") + print(f"⏱️ Duration: {response_duration_ms:.2f} ms") + print(f"❓ Question: {question}") + print(f"💬 Answer: {anwer[:150]}..." if len(anwer) > 150 else f"💬 Answer: {anwer}") print("-" * 80) return { @@ -71,11 +67,7 @@ def process_qa(user_id, search_result, llm_client): def main(frame, version, num_workers=4): print("\n" + "=" * 80) - print( - f"🚀 \033[1;36mLONGMEMEVAL RESPONSE GENERATION - {frame.upper()} v{version}\033[0m".center( - 80 - ) - ) + print(f"🚀 LONGMEMEVAL RESPONSE GENERATION - {frame.upper()} v{version}".center(80)) print("=" * 80) load_dotenv() @@ -84,18 +76,16 @@ def main(frame, version, num_workers=4): api_key=os.getenv("CHAT_MODEL_API_KEY"), base_url=os.getenv("CHAT_MODEL_BASE_URL") ) - print( - f"🔌 \033[1mUsing OpenAI client with model:\033[0m \033[94m{os.getenv('CHAT_MODEL')}\033[0m" - ) + print(f"🔌 Using OpenAI client with model: {os.getenv('CHAT_MODEL')}") search_path = f"results/lme/{frame}-{version}/{frame}_lme_search_results.json" response_path = f"results/lme/{frame}-{version}/{frame}_lme_responses.json" - print(f"📂 \033[1mLoading search results from:\033[0m \033[94m{search_path}\033[0m") + print(f"📂 Loading search results from: {search_path}") with open(search_path) as file: lme_search_results = json.load(file) - print(f"📊 \033[1mFound\033[0m \033[93m{len(lme_search_results)}\033[0m users to process") - print(f"⚙️ \033[1mUsing\033[0m \033[93m{num_workers}\033[0m worker threads") + print(f"📊 Found {len(lme_search_results)} users to process") + print(f"⚙️ Using {num_workers} worker threads") print("-" * 80) lme_responses = {} @@ -118,25 +108,23 @@ def main(frame, version, num_workers=4): result = future.result() lme_responses[user_id] = result except Exception as exc: - print(f"\033[91m❌ Error processing user {user_id}: {exc}\033[0m") + print(f"❌ Error processing user {user_id}: {exc}") end_time = time() elapsed_time = end_time - start_time elapsed_sec = int(elapsed_time) print("\n" + "=" * 80) - print("✅ \033[1;32mRESPONSE GENERATION COMPLETE\033[0m".center(80)) + print("✅ RESPONSE GENERATION COMPLETE".center(80)) print("=" * 80) - print(f"⏱️ \033[1mTotal time:\033[0m \033[92m{elapsed_sec // 60}m {elapsed_sec % 60}s\033[0m") - print(f"📊 \033[1mProcessed:\033[0m \033[93m{len(lme_responses)}\033[0m users") - print( - f"🔄 \033[1mFramework:\033[0m \033[94m{frame}\033[0m | \033[1mVersion:\033[0m \033[94m{version}\033[0m" - ) + print(f"⏱️ Total time: {elapsed_sec // 60}m {elapsed_sec % 60}s") + print(f"📊 Processed: {len(lme_responses)} users") + print(f"🔄 Framework: {frame} | Version: {version}") with open(response_path, "w") as f: json.dump(lme_responses, f, indent=4) - print(f"📁 \033[1mResponses saved to:\033[0m \033[1;94m{response_path}\033[0m") + print(f"📁 Responses saved to: {response_path}") print("=" * 80 + "\n") @@ -146,12 +134,13 @@ def main(frame, version, num_workers=4): "--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "memos-api", "zep", "memobase"], + default="memos-api", ) parser.add_argument( - "--version", type=str, default="v1", help="Version of the evaluation framework." + "--version", type=str, default="0923", help="Version of the evaluation framework." ) parser.add_argument( - "--workers", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + "--workers", type=int, default=30, help="Number of runs for LLM-as-a-Judge evaluation." ) args = parser.parse_args() diff --git a/evaluation/scripts/longmemeval/lme_search.py b/evaluation/scripts/longmemeval/lme_search.py index 898ab7e27..262da3312 100644 --- a/evaluation/scripts/longmemeval/lme_search.py +++ b/evaluation/scripts/longmemeval/lme_search.py @@ -16,6 +16,7 @@ from utils.client import mem0_client, memobase_client, memos_client, zep_client from utils.memobase_utils import memobase_search_memory from utils.memos_filters import filter_memory_data +from utils.memos_api import MemOSAPI from utils.prompts import ( MEM0_CONTEXT_TEMPLATE, MEM0_GRAPH_CONTEXT_TEMPLATE, @@ -126,24 +127,15 @@ def memos_search(client, user_id, query, top_k, frame="memos-local"): elif frame == "memos-api": results = client.search(query=query, user_id=user_id, top_k=top_k) - search_memories = "\n".join([f" - {item}" for item in results]) + search_memories = "\n".join( + [f" - {item['memory_value']}" for item in results["memory_detail_list"]] + ) context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories) duration_ms = (time() - start) * 1000 return context, duration_ms -def memobase_search(client, user_id, query, top_k=20): - start = time() - memories = memobase_search_memory(client, user_id, query, max_memory_context_size=top_k * 100) - context = MEMOBASE_CONTEXT_TEMPLATE.format( - user_id=user_id, - memories=memories, - ) - duration_ms = (time() - start) * 1000 - return context, duration_ms - - def process_user(lme_df, conv_idx, frame, version, top_k=20): row = lme_df.iloc[conv_idx] question = row["question"] @@ -153,7 +145,7 @@ def process_user(lme_df, conv_idx, frame, version, top_k=20): answer = row["answer"] answer_session_ids = set(row["answer_session_ids"]) haystack_session_ids = row["haystack_session_ids"] - user_id = f"lme_exper_user_{conv_idx!s}" + user_id = f"lme_exper_user_{version}_{conv_idx}" id_to_session = dict(zip(haystack_session_ids, sessions, strict=False)) answer_sessions = [id_to_session[sid] for sid in answer_session_ids if sid in id_to_session] answer_evidences = [] @@ -166,29 +158,29 @@ def process_user(lme_df, conv_idx, frame, version, top_k=20): search_results = defaultdict(list) print("\n" + "-" * 80) - print(f"🔎 \033[1;36m[{conv_idx + 1}/{len(lme_df)}] Processing conversation {conv_idx}\033[0m") - print(f"❓ Question: \033[93m{question}\033[0m") - print(f"📅 Date: \033[92m{question_date}\033[0m") - print(f"🏷️ Type: \033[94m{question_type}\033[0m") + print(f"🔎 [{conv_idx + 1}/{len(lme_df)}] Processing conversation {conv_idx}") + print(f"❓ Question: {question}") + print(f"📅 Date: {question_date}") + print(f"🏷️ Type: {question_type}") print("-" * 80) existing_results, exists = load_existing_results(frame, version, conv_idx) if exists: - print(f"♻️ \033[93mUsing existing results for conversation {conv_idx}\033[0m") + print(f"♻️ Using existing results for conversation {conv_idx}") return existing_results if frame == "zep": client = zep_client() - print("🔌 \033[1mUsing \033[94mZep client\033[0m \033[1mfor search...\033[0m") + print("🔌 Using Zep client for search...") context, duration_ms = zep_search(client, user_id, question) elif frame == "mem0-local": client = mem0_client(mode="local") - print("🔌 \033[1mUsing \033[94mMem0 Local client\033[0m \033[1mfor search...\033[0m") + print("🔌 Using Mem0 Local client for search...") context, duration_ms = mem0_search(client, user_id, question, top_k=top_k, frame=frame) elif frame == "mem0-api": client = mem0_client(mode="api") - print("🔌 \033[1mUsing \033[94mMem0 API client\033[0m \033[1mfor search...\033[0m") + print("🔌 Using Mem0 API client for search...") context, duration_ms = mem0_search(client, user_id, question, top_k=top_k, frame=frame) elif frame == "memos-local": client = memos_client( @@ -201,18 +193,18 @@ def process_user(lme_df, conv_idx, frame, version, top_k=20): mem_os_config_path="configs/mos_memos_config.json", addorsearch="search", ) - print("🔌 \033[1mUsing \033[94mMemos Local client\033[0m \033[1mfor search...\033[0m") + print("🔌 Using Memos Local client for search...") context, duration_ms = memos_search(client, user_id, question, frame=frame) elif frame == "memobase": client = memobase_client() - print("🔌 \033[1mUsing \033[94mMemobase client\033[0m \033[1mfor search...\033[0m") - context, duration_ms = memobase_search_memory(client, user_id, question, top_k=top_k) + print("🔌 Using Memobase client for search...") + context, duration_ms = memobase_search_memory( + client, user_id, question, max_memory_context_size=3000 + ) elif frame == "memos-api": - client = memos_client( - mode="api", - ) - print("🔌 \033[1mUsing \033[94mMemos API client\033[0m \033[1mfor search...\033[0m") + client = MemOSAPI() + print("🔌 Using Memos API client for search...") context, duration_ms = memos_search(client, user_id, question, top_k=top_k, frame=frame) search_results[user_id].append( { @@ -231,39 +223,33 @@ def process_user(lme_df, conv_idx, frame, version, top_k=20): f"results/lme/{frame}-{version}/tmp/{frame}_lme_search_results_{conv_idx}.json", "w" ) as f: json.dump(search_results, f, indent=4) - print(f"💾 \033[92mSearch results for conversation {conv_idx} saved...\033[0m") + print(f"💾 Search results for conversation {conv_idx} saved...") print("-" * 80) return search_results def load_existing_results(frame, version, group_idx): - result_path = ( - f"results/locomo/{frame}-{version}/tmp/{frame}_locomo_search_results_{group_idx}.json" - ) + result_path = f"results/lme/{frame}-{version}/tmp/{frame}_lme_search_results_{group_idx}.json" if os.path.exists(result_path): try: with open(result_path) as f: return json.load(f), True except Exception as e: - print(f"\033[91m❌ Error loading existing results for group {group_idx}: {e}\033[0m") + print(f"❌ Error loading existing results for group {group_idx}: {e}") return {}, False def main(frame, version, top_k=20, num_workers=2): print("\n" + "=" * 80) - print(f"🔍 \033[1;36mLONGMEMEVAL SEARCH - {frame.upper()} v{version}\033[0m".center(80)) + print(f"🔍 LONGMEMEVAL SEARCH - {frame.upper()} v{version}".center(80)) print("=" * 80) lme_df = pd.read_json("data/longmemeval/longmemeval_s.json") - print( - "📚 \033[1mLoaded LongMemeval dataset\033[0m from \033[94mdata/longmemeval/longmemeval_s.json\033[0m" - ) + print("📚 Loaded LongMemeval dataset from data/longmemeval/longmemeval_s.json") num_multi_sessions = len(lme_df) - print(f"👥 Number of users: \033[93m{num_multi_sessions}\033[0m") - print( - f"⚙️ Search parameters: top_k=\033[94m{top_k}\033[0m, workers=\033[94m{num_workers}\033[0m" - ) + print(f"👥 Number of users: {num_multi_sessions}") + print(f"⚙️ Search parameters: top_k={top_k}, workers={num_workers}") print("-" * 80) all_search_results = defaultdict(list) @@ -279,32 +265,26 @@ def main(frame, version, top_k=20, num_workers=2): as_completed(future_to_idx), total=num_multi_sessions, desc="📊 Processing users" ): idx = future_to_idx[future] - try: - search_results = future.result() - for user_id, results in search_results.items(): - all_search_results[user_id].extend(results) - except Exception as e: - print(f"\033[91m❌ Error processing user {idx}: {e}\033[0m") + # try: + search_results = future.result() + for user_id, results in search_results.items(): + all_search_results[user_id].extend(results) + # except Exception as e: + # print(f"❌ Error processing user {idx}: {e}") end_time = datetime.now() elapsed_time = end_time - start_time elapsed_time_str = str(elapsed_time).split(".")[0] print("\n" + "=" * 80) - print("✅ \033[1;32mSEARCH COMPLETE\033[0m".center(80)) + print("✅ SEARCH COMPLETE".center(80)) print("=" * 80) - print( - f"⏱️ Total time taken to search \033[93m{num_multi_sessions}\033[0m users: \033[92m{elapsed_time_str}\033[0m" - ) - print( - f"🔄 Framework: \033[94m{frame}\033[0m | Version: \033[94m{version}\033[0m | Workers: \033[94m{num_workers}\033[0m" - ) + print(f"⏱️ Total time taken to search {num_multi_sessions} users: {elapsed_time_str}") + print(f"🔄 Framework: {frame} | Version: {version} | Workers: {num_workers}") with open(f"results/lme/{frame}-{version}/{frame}_lme_search_results.json", "w") as f: json.dump(dict(all_search_results), f, indent=4) - print( - f"📁 Results saved to: \033[1;94mresults/lme/{frame}-{version}/{frame}_lme_search_results.json\033[0m" - ) + print(f"📁 Results saved to: results/lme/{frame}-{version}/{frame}_lme_search_results.json") print("=" * 80 + "\n") @@ -314,15 +294,16 @@ def main(frame, version, top_k=20, num_workers=2): "--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "memos-api", "zep", "memobase"], + default="memos-api", ) parser.add_argument( - "--version", type=str, default="v1", help="Version of the evaluation framework." + "--version", type=str, default="0923", help="Version of the evaluation framework." ) parser.add_argument( - "--top_k", type=int, default=20, help="Number of top results to retrieve from the search." + "--top_k", type=int, default=30, help="Number of top results to retrieve from the search." ) parser.add_argument( - "--workers", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + "--workers", type=int, default=30, help="Number of runs for LLM-as-a-Judge evaluation." ) args = parser.parse_args() diff --git a/evaluation/scripts/utils/client.py b/evaluation/scripts/utils/client.py index 33aea7497..983dad9c5 100644 --- a/evaluation/scripts/utils/client.py +++ b/evaluation/scripts/utils/client.py @@ -1,13 +1,7 @@ import json import os import sys - from dotenv import load_dotenv -from mem0 import MemoryClient -from memobase import MemoBaseClient -from zep_cloud.client import Zep -from zep_cloud.types import Message - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from memobase import ChatBlob @@ -16,8 +10,6 @@ from memos.configs.mem_os import MOSConfig from memos.mem_cube.general import GeneralMemCube from memos.mem_os.product import MOSProduct -from utils.mem0_local import Mem0Client -from utils.memos_api import MemOSAPI from utils.memos_filters import filter_memory_data @@ -25,6 +17,8 @@ def zep_client(): + from zep_cloud.client import Zep + """Initialize and return a Zep client instance.""" api_key = os.getenv("ZEP_API_KEY") zep = Zep(api_key=api_key) @@ -34,10 +28,9 @@ def zep_client(): def mem0_client(mode="local"): """Initialize and return a Mem0 client instance.""" - if mode == "local": - base_url = "http://localhost:9999" - mem0 = Mem0Client(base_url=base_url) - elif mode == "api": + if mode == "api": + from mem0 import MemoryClient + mem0 = MemoryClient(api_key=os.getenv("MEM0_API_KEY")) else: raise ValueError("Invalid mode. Choose 'local' or 'cloud'.") @@ -87,13 +80,12 @@ def memos_client( default_mem_cube=mem_cube, ) - elif mode == "api": - memos = MemOSAPI(base_url=os.getenv("MEMOS_BASE_URL")) - return memos def memobase_client(): + from memobase import MemoBaseClient + client = MemoBaseClient( project_url=os.getenv("MEMOBASE_PROJECT_URL"), api_key=os.getenv("MEMOBASE_API_KEY"), @@ -114,6 +106,7 @@ def memobase_client(): session_id=session_id, user_id=user_id, ) + from zep_cloud.types import Message messages = [ Message( diff --git a/evaluation/scripts/utils/memobase_utils.py b/evaluation/scripts/utils/memobase_utils.py index dcf06ea31..f818cadec 100644 --- a/evaluation/scripts/utils/memobase_utils.py +++ b/evaluation/scripts/utils/memobase_utils.py @@ -1,11 +1,5 @@ import time -import uuid - -from memobase import ChatBlob - - -def string_to_uuid(s: str, salt="memobase_client") -> str: - return str(uuid.uuid5(uuid.NAMESPACE_DNS, s + salt)) +from memobase import MemoBaseClient, ChatBlob def memobase_add_memory(user, message, retries=3): @@ -24,19 +18,26 @@ def memobase_add_memory(user, message, retries=3): def memobase_search_memory( client, user_id, query, max_memory_context_size, max_retries=3, retry_delay=1 ): + users = client.get_all_users(limit=5000) + for u in users: + try: + if u["additional_fields"]["user_id"] == user_id: + user = client.get_user(u["id"], no_get=True) + except: + pass + retries = 0 - real_uid = string_to_uuid(user_id) - u = client.get_user(real_uid, no_get=True) while retries < max_retries: try: - memories = u.context( + t = time.time() + memories = user.context( max_token_size=max_memory_context_size, chats=[{"role": "user", "content": query}], event_similarity_threshold=0.2, fill_window_with_events=True, ) - return memories + return memories, (time.time() - t) * 1000 except Exception as e: print(f"Error during memory search: {e}") print("Retrying...") diff --git a/evaluation/scripts/utils/memos_api.py b/evaluation/scripts/utils/memos_api.py index 7b7f2a061..5f73a78c3 100644 --- a/evaluation/scripts/utils/memos_api.py +++ b/evaluation/scripts/utils/memos_api.py @@ -1,63 +1,78 @@ import json +import os +import traceback import requests +from dotenv import load_dotenv + +load_dotenv() + +memos_key = os.getenv("MEMOS_KEY") +memos_url = os.getenv("MEMOS_URL") class MemOSAPI: - def __init__(self, base_url: str = "http://localhost:8000"): + def __init__(self, base_url: str = memos_url, memos_key: str = memos_key): self.base_url = base_url - self.headers = {"Content-Type": "application/json"} - - def user_register(self, user_id: str): - """Register a user.""" - url = f"{self.base_url}/users/register" - payload = json.dumps({"user_id": user_id}) - response = requests.request("POST", url, data=payload, headers=self.headers) - return response.text + self.headers = {"Content-Type": "application/json", "Authorization": memos_key} - def add(self, messages: list[dict], user_id: str | None = None): + def add(self, messages: list[dict], user_id: str | None = None, conv_id: str | None = None): """Create memories.""" - register_res = json.loads(self.user_register(user_id)) - cube_id = register_res["data"]["mem_cube_id"] - url = f"{self.base_url}/add" - payload = json.dumps({"messages": messages, "user_id": user_id, "mem_cube_id": cube_id}) + retry = 0 + while retry < 10: + try: + url = f"{self.base_url}/add/message" + payload = json.dumps( + {"messages": messages, "user_id": user_id, "conversation_id": conv_id} + ) + response = requests.request("POST", url, data=payload, headers=self.headers) + assert response.status_code == 200, response.text + assert json.loads(response.text)["code"] == 0, response.text + return response.text + except Exception as e: + print(f"call memos api add failed {e} retry time {retry}") + # traceback.print_exc() + retry += 1 + assert retry != 10, "add memory failed" - response = requests.request("POST", url, data=payload, headers=self.headers) - return response.text - - def search(self, query: str, user_id: str | None = None, top_k: int = 10): + def search( + self, query: str, user_id: str | None = None, conv_id: str | None = "", top_k: int = 10 + ): """Search memories.""" - url = f"{self.base_url}/search" - payload = json.dumps( - { - "query": query, - "user_id": user_id, - } - ) - - response = requests.request("POST", url, data=payload, headers=self.headers) - if response.status_code != 200: - response.raise_for_status() - else: - result = json.loads(response.text)["data"]["text_mem"][0]["memories"] - text_memories = [item["memory"] for item in result][:top_k] - return text_memories + retry = 0 + while retry < 10: + try: + url = f"{self.base_url}/search/memory" + payload = json.dumps( + { + "query": query, + "user_id": user_id, + "conversation_id": conv_id, + "memory_limit_number": top_k, + }, + ensure_ascii=False, + ) + + response = requests.request("POST", url, data=payload, headers=self.headers) + assert response.status_code == 200, response.text + assert json.loads(response.text)["code"] == 0, response.text + return json.loads(response.text)["data"] + except Exception as e: + print(f"call memos api search failed {e} retry time {retry}") + # traceback.print_exc() + retry += 1 + assert retry != 10, "search memory failed" if __name__ == "__main__": - client = MemOSAPI(base_url="http://localhost:8000") - # Example usage - try: - messages = [ - { - "role": "user", - "content": "I went to the store and bought a red apple.", - "chat_time": "2023-10-01T12:00:00Z", - } - ] - add_response = client.add(messages, user_id="user789") - print("Add memory response:", add_response) - search_response = client.search("red apple", user_id="user789", top_k=1) - print("Search memory response:", search_response) - except requests.RequestException as e: - print("An error occurred:", e) + client = MemOSAPI() + user_id = "eval_test" + conv_id = "eval_test_benchmark1_conv1" + messages = [ + {"role": "user", "content": "杭州西湖有什么好玩的"}, + {"role": "assistant", "content": "杭州西湖有好多松鼠,还有断桥"}, + ] + + memories = client.search("我最近有什么记忆", user_id=user_id, top_k=6) + response = client.add(messages, user_id, conv_id) + memories = client.search("我最近有什么记忆", user_id=user_id, top_k=6) diff --git a/evaluation/scripts/utils/mirix_utils.py b/evaluation/scripts/utils/mirix_utils.py new file mode 100644 index 000000000..51bab2fd4 --- /dev/null +++ b/evaluation/scripts/utils/mirix_utils.py @@ -0,0 +1,80 @@ +import os +import yaml + + +def get_mirix_client(config_path, load_from=None): + if os.path.exists(os.path.expanduser(f"~/.mirix")): + os.system(f"rm -rf ~/.mirix/*") + + with open(config_path, "r") as f: + agent_config = yaml.safe_load(f) + + os.environ["OPENAI_API_KEY"] = agent_config["api_key"] + import mirix + from mirix import Mirix, EmbeddingConfig, LLMConfig + + embedding_default_config = EmbeddingConfig( + embedding_model=agent_config["embedding_model_name"], + embedding_endpoint_type="openai", + embedding_endpoint=agent_config["model_endpoint"], + embedding_dim=1536, + embedding_chunk_size=8191, + ) + + llm_default_config = LLMConfig( + model=agent_config["model_name"], + model_endpoint_type="openai", + model_endpoint=agent_config["model_endpoint"], + api_key=agent_config["api_key"], + model_wrapper=None, + context_window=128000, + ) + + def embedding_default_config_func(cls, model_name=None, provider=None): + return embedding_default_config + + def llm_default_config_func(cls, model_name=None, provider=None): + return llm_default_config + + mirix.EmbeddingConfig.default_config = embedding_default_config_func + mirix.LLMConfig.default_config = llm_default_config_func + + assistant = Mirix( + api_key=agent_config["api_key"], + config_path=config_path, + model=agent_config["model_name"], + load_from=load_from, + ) + return assistant + + +if __name__ == "__main__": + config_path = "configs-example/mirix_config.yaml" + out_dir = "results/mirix-test" + + assistant = get_mirix_client(config_path) + + chunks = [ + "I prefer coffee over tea", + "My work hours are 9 AM to 5 PM", + "Important meeting with client on Friday at 2 PM", + ] + + for idx, chunk in tqdm(enumerate(chunks), total=len(chunks)): + response = assistant.add(chunk) + + assistant.save(out_dir) + + assistant = get_mirix_client(config_path, load_from=out_dir) + response = assistant.chat("What's my schedule like this week?") + + print(response) + assistant.create_user(user_name="user1") + assistant.create_user(user_name="user2") + user1 = assistant.get_user_by_name(user_name="user1") + user2 = assistant.get_user_by_name(user_name="user2") + assistant.add("i prefer tea over coffee", user_id=user1.id) + assistant.add("my favourite drink is coke", user_id=user2.id) + response1 = assistant.chat("What drink do I prefer?", user_id=user1.id) + response2 = assistant.chat("What drink do I prefer?", user_id=user2.id) + print(response1, response2) diff --git a/pyproject.toml b/pyproject.toml index eae2e8050..8f885e34a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ ############################################################################## name = "MemoryOS" -version = "1.0.1" +version = "1.1.1" description = "Intelligence Begins with Memory" license = {text = "Apache-2.0"} readme = "README.md" diff --git a/src/memos/__init__.py b/src/memos/__init__.py index 0f6dd2937..34987f2c0 100644 --- a/src/memos/__init__.py +++ b/src/memos/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.0.1" +__version__ = "1.1.1" from memos.configs.mem_cube import GeneralMemCubeConfig from memos.configs.mem_os import MOSConfig diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index b439cb2b2..03d440f70 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -1,19 +1,15 @@ import concurrent.futures import copy import json -import os import re from abc import ABC from typing import Any -from tqdm import tqdm - from memos import log from memos.chunkers import ChunkerFactory from memos.configs.mem_reader import SimpleStructMemReaderConfig from memos.configs.parser import ParserConfigFactory -from memos.context.context import ContextThreadPoolExecutor from memos.embedders.factory import EmbedderFactory from memos.llms.factory import LLMFactory from memos.mem_reader.base import BaseMemReader @@ -23,12 +19,10 @@ SIMPLE_STRUCT_DOC_READER_PROMPT, SIMPLE_STRUCT_DOC_READER_PROMPT_ZH, SIMPLE_STRUCT_MEM_READER_EXAMPLE, - SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, - SIMPLE_STRUCT_MEM_READER_PROMPT, SIMPLE_STRUCT_MEM_READER_PROMPT_ZH, + SIMPLE_STRUCT_MEM_READER_PROMPT, + SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, ) -from memos.utils import timed - logger = log.get_logger(__name__) PROMPT_DICT = { @@ -55,63 +49,6 @@ def detect_lang(text): return "en" -def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder): - # generate - try: - raw = llm.generate(message) - if not raw: - logger.warning(f"[LLM] Empty generation for input: {message}") - return None - except Exception as e: - logger.error(f"[LLM] Exception during generation: {e}") - return None - - # parse_json_result - try: - chunk_res = parse_json_result(raw) - if not chunk_res: - logger.warning(f"[Parse] Failed to parse result: {raw}") - return None - except Exception as e: - logger.error(f"[Parse] Exception during JSON parsing: {e}") - return None - - try: - value = chunk_res.get("value", "").strip() - if not value: - logger.warning("[BuildNode] value is empty") - return None - - tags = chunk_res.get("tags", []) - if not isinstance(tags, list): - tags = [] - - key = chunk_res.get("key", None) - - embedding = embedder.embed([value])[0] - - return TextualMemoryItem( - memory=value, - metadata=TreeNodeTextualMemoryMetadata( - user_id=info.get("user_id", ""), - session_id=info.get("session_id", ""), - memory_type="LongTermMemory", - status="activated", - tags=tags, - key=key, - embedding=embedding, - usage=[], - sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}], - background="", - confidence=0.99, - type="fact", - ), - ) - except Exception as e: - logger.error(f"[BuildNode] Error building node: {e}") - return None - - class SimpleStructMemReader(BaseMemReader, ABC): """Naive implementation of MemReader.""" @@ -127,77 +64,44 @@ def __init__(self, config: SimpleStructMemReaderConfig): self.embedder = EmbedderFactory.from_config(config.embedder) self.chunker = ChunkerFactory.from_config(config.chunker) - @timed def _process_chat_data(self, scene_data_info, info): - mem_list = [] - for item in scene_data_info: - if "chat_time" in item: - mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"] - mem_list.append(mem) - else: - mem = item["role"] + ":" + item["content"] - mem_list.append(mem) - lang = detect_lang("\n".join(mem_list)) + lang = detect_lang("\n".join(scene_data_info)) template = PROMPT_DICT["chat"][lang] examples = PROMPT_DICT["chat"][f"{lang}_example"] - prompt = template.replace("${conversation}", "\n".join(mem_list)) + prompt = template.replace("${conversation}", "\n".join(scene_data_info)) if self.config.remove_prompt_example: prompt = prompt.replace(examples, "") messages = [{"role": "user", "content": prompt}] - try: - response_text = self.llm.generate(messages) - response_json = self.parse_json_result(response_text) - except Exception as e: - logger.error(f"[LLM] Exception during chat generation: {e}") - response_json = { - "memory list": [ - { - "key": "\n".join(mem_list)[:10], - "memory_type": "UserMemory", - "value": "\n".join(mem_list), - "tags": [], - } - ], - "summary": "\n".join(mem_list), - } + response_text = self.llm.generate(messages) + response_json = self.parse_json_result(response_text) chat_read_nodes = [] for memory_i_raw in response_json.get("memory list", []): - try: - memory_type = ( - memory_i_raw.get("memory_type", "LongTermMemory") + node_i = TextualMemoryItem( + memory=memory_i_raw.get("value", ""), + metadata=TreeNodeTextualMemoryMetadata( + user_id=info.get("user_id"), + session_id=info.get("session_id"), + memory_type=memory_i_raw.get("memory_type", "") .replace("长期记忆", "LongTermMemory") - .replace("用户记忆", "UserMemory") - ) - - if memory_type not in ["LongTermMemory", "UserMemory"]: - memory_type = "LongTermMemory" - - node_i = TextualMemoryItem( - memory=memory_i_raw.get("value", ""), - metadata=TreeNodeTextualMemoryMetadata( - user_id=info.get("user_id"), - session_id=info.get("session_id"), - memory_type=memory_type, - status="activated", - tags=memory_i_raw.get("tags", []) - if type(memory_i_raw.get("tags", [])) is list - else [], - key=memory_i_raw.get("key", ""), - embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0], - usage=[], - sources=scene_data_info, - background=response_json.get("summary", ""), - confidence=0.99, - type="fact", - ), - ) - chat_read_nodes.append(node_i) - except Exception as e: - logger.error(f"[ChatReader] Error parsing memory item: {e}") + .replace("用户记忆", "UserMemory"), + status="activated", + tags=memory_i_raw.get("tags", []) + if type(memory_i_raw.get("tags", [])) is list + else [], + key=memory_i_raw.get("key", ""), + embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0], + usage=[], + sources=scene_data_info, + background=response_json.get("summary", ""), + confidence=0.99, + type="fact", + ), + ) + chat_read_nodes.append(node_i) return chat_read_nodes @@ -250,8 +154,8 @@ def get_memory( else: processing_func = self._process_doc_data - # Process Q&A pairs concurrently with context propagation - with ContextThreadPoolExecutor() as executor: + # Process Q&A pairs concurrently + with concurrent.futures.ThreadPoolExecutor() as executor: futures = [ executor.submit(processing_func, scene_data_info, info) for scene_data_info in list_scene_data_info @@ -289,9 +193,11 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: for item in items: # Convert dictionary to string if "chat_time" in item: - result.append(item) + mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"] + result.append(mem) else: - result.append(item) + mem = item["role"] + ":" + item["content"] + result.append(mem) if len(result) >= 10: results.append(result) context = copy.deepcopy(result[-2:]) @@ -301,22 +207,18 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: elif type == "doc": for item in scene_data: try: - if os.path.exists(item): - try: - parsed_text = parser.parse(item) - results.append({"file": item, "text": parsed_text}) - except Exception as e: - logger.error(f"[SceneParser] Error parsing {item}: {e}") - continue + if not isinstance(item, str): + parsed_text = parser.parse(item) + results.append({"file": "pure_text", "text": parsed_text}) else: parsed_text = item - results.append({"file": "pure_text", "text": parsed_text}) + results.append({"file": item, "text": parsed_text}) except Exception as e: print(f"Error parsing file {item}: {e!s}") return results - def _process_doc_data(self, scene_data_info, info, **kwargs): + def _process_doc_data(self, scene_data_info, info): chunks = self.chunker.chunk(scene_data_info["text"]) messages = [] for chunk in chunks: @@ -326,35 +228,36 @@ def _process_doc_data(self, scene_data_info, info, **kwargs): message = [{"role": "user", "content": prompt}] messages.append(message) - doc_nodes = [] - scene_file = scene_data_info["file"] - - with ContextThreadPoolExecutor(max_workers=50) as executor: - futures = { - executor.submit( - _build_node, - idx, - msg, - info, - scene_file, - self.llm, - self.parse_json_result, - self.embedder, - ): idx - for idx, msg in enumerate(messages) - } - total = len(futures) + processed_chunks = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + futures = [executor.submit(self.llm.generate, message) for message in messages] + for future in concurrent.futures.as_completed(futures): + chunk_result = future.result() + if chunk_result: + processed_chunks.append(chunk_result) - for future in tqdm( - concurrent.futures.as_completed(futures), total=total, desc="Processing" - ): - try: - node = future.result() - if node: - doc_nodes.append(node) - except Exception as e: - tqdm.write(f"[ERROR] {e}") - logger.error(f"[DocReader] Future task failed: {e}") + processed_chunks = [self.parse_json_result(r) for r in processed_chunks] + doc_nodes = [] + for i, chunk_res in enumerate(processed_chunks): + if chunk_res: + node_i = TextualMemoryItem( + memory=chunk_res["value"], + metadata=TreeNodeTextualMemoryMetadata( + user_id=info.get("user_id"), + session_id=info.get("session_id"), + memory_type="LongTermMemory", + status="activated", + tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [], + key=chunk_res["key"], + embedding=self.embedder.embed([chunk_res["value"]])[0], + usage=[], + sources=[f"{scene_data_info['file']}_{i}"], + background="", + confidence=0.99, + type="fact", + ), + ) + doc_nodes.append(node_i) return doc_nodes def parse_json_result(self, response_text): @@ -362,14 +265,14 @@ def parse_json_result(self, response_text): json_start = response_text.find("{") response_text = response_text[json_start:] response_text = response_text.replace("```", "").strip() - if not response_text.endswith("}"): + if response_text[-1] != "}": response_text += "}" - return json.loads(response_text) + response_json = json.loads(response_text) + return response_json except json.JSONDecodeError as e: - logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}") - return {} - except Exception as e: - logger.error(f"[JSONParse] Unexpected error: {e}") + logger.warning( + f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}" + ) return {} def transform_memreader(self, data: dict) -> list[TextualMemoryItem]: diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 15672f8d8..dda53f7a3 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,56 +1,50 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert. -Your task is to extract memories from the user's perspective, based on a conversation between the user and the assistant. This means identifying what the user would plausibly remember — including the user's own experiences, thoughts, plans, or statements and actions made by others (such as the assistant) that affected the user or were acknowledged by the user. - -Please perform the following: -1. Identify information that reflects the user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful information from the assistant that the user acknowledged or responded to. - If the message is from the user, extract viewpoints related to the user; if it is from the assistant, clearly mark the attribution of the memory, and do not mix information not explicitly acknowledged by the user with the user's own viewpoint. - - **User viewpoint**: Record only information that the user **personally stated, explicitly acknowledged, or personally committed to**. - - **Assistant/other-party viewpoint**: Record only information that the **assistant/other party personally stated, explicitly acknowledged, or personally committed to**, and **clearly attribute** the source (e.g., "[assistant-Jerry viewpoint]"). Do not rewrite it as the user's preference/decision. - - **Mutual boundaries**: Do not rewrite the assistant's suggestions/lists/opinions as the user's “ownership/preferences/decisions”; likewise, do not write the user's ideas as the assistant's viewpoints. - -2. Resolve all references to time, persons, and events clearly: - - When possible, convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp. - - Clearly distinguish between **event time** and **message time**. +Your task is to extract memories from the perspective of user, based on a conversation between user and assistant. This means identifying what user would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as assistant) that impacted or were acknowledged by user. +Please perform: +1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to. +If the message is from the user, extract user-relevant memories; if it is from the assistant, only extract factual memories that the user acknowledged or responded to. + +2. Resolve all time, person, and event references clearly: + - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible. + - Clearly distinguish between event time and message time. - If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”). - Include specific locations if mentioned. - - Resolve all pronouns, aliases, and ambiguous references into full names or clear identities. - - If there are people with the same name, disambiguate them. - -3. Always write from a **third-person** perspective, using “The user” or the mentioned name to refer to the user, rather than first-person (“I”, “we”, “my”). - For example, write “The user felt exhausted …” instead of “I felt exhausted …”. - -4. Do not omit any information that the user is likely to remember. - - Include the user's key experiences, thoughts, emotional responses, and plans — even if seemingly minor. - - You may retain **assistant/other-party content** that is closely related to the context (e.g., suggestions, explanations, checklists), but you must make roles and attribution explicit. - - Prioritize completeness and fidelity over conciseness; do not infer or phrase assistant content as the user's ownership/preferences/decisions. - - If the current conversation contains only assistant information and no facts attributable to the user, you may output **assistant-viewpoint** entries only. - -5. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. + - Resolve all pronouns, aliases, and ambiguous references into full names or identities. + - Disambiguate people with the same name if applicable. +3. Always write from a third-person perspective, referring to user as +"The user" or by name if name mentioned, rather than using first-person ("I", "me", "my"). +For example, write "The user felt exhausted..." instead of "I felt exhausted...". +4. Do not omit any information that user is likely to remember. + - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor. + - Prioritize completeness and fidelity over conciseness. + - Do not generalize or skip details that could be personally meaningful to user. +5. Please avoid any content that violates national laws and regulations or involves politically sensitive information in the memories you extract. -Return a valid JSON object with the following structure: +Return a single valid JSON object with the following structure: { "memory list": [ { - "key": , - "memory_type": , - "value": , - "tags": + "key": , + "memory_type": , + "value": , + "tags": }, ... ], - "summary": + "summary": } Language rules: -- The `key`, `value`, `tags`, and `summary` fields must match the primary language of the input conversation. **If the input is Chinese, output in Chinese.** +- The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input conversation. **如果输入是中文,请输出中文** - Keep `memory_type` in English. Example: Conversation: user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project. assistant: Oh Tom! Do you think the team can finish by December 15? -user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until December 10, so testing will be tight. +user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until +December 10, so testing will be tight. assistant: [June 26, 2025 at 3:00 PM]: Maybe propose an extension? user: [June 26, 2025 at 4:21 PM]: Good idea. I’ll raise it in tomorrow’s 9:30 AM meeting—maybe shift the deadline to January 5. @@ -60,62 +54,31 @@ { "key": "Initial project meeting", "memory_type": "LongTermMemory", - "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", + "value": "On June 25, 2025 at 3:00 PM, Tom held a meeting with their team to discuss a new project. The conversation covered the timeline and raised concerns about the feasibility of the December 15, 2025 deadline.", "tags": ["project", "timeline", "meeting", "deadline"] }, { - "key": "Jerry’s suggestion about the deadline", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", - "tags": ["deadline change", "suggestion"] - } + "key": "Planned scope adjustment", + "memory_type": "UserMemory", + "value": "Tom planned to suggest in a meeting on June 27, 2025 at 9:30 AM that the team should prioritize features and propose shifting the project deadline to January 5, 2026.", + "tags": ["planning", "deadline change", "feature prioritization"] + }, ], - "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." + "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach." } -Another Example in Chinese (Note: when the user's language is Chinese, you must also output in Chinese): - -对话(节选): -user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 -assistant|19:32 -:了解~婚礼场合建议中长款,端庄一些。可以看法式茶歇/直筒/裹身三路。颜色上,奶油白、浅粉、雾霾蓝都适合;面料可看醋酸/真丝混纺(垂感好但需干洗)。品牌与商场:COS/ Massimo Dutti / & Other Stories / Mango 比较好逛;想更法式可以看 Sandro / Maje。 我记得你很喜欢Maje? -user|19:35:不喜欢亮色。国贸方便。 -assistant|19:37:那亮度就压低些:雾霾蓝、藕粉、燕麦米白更稳。国贸有 COS / Massimo Dutti / &OS;可以先看 COS 直筒或裹身,再去 MD 看藕粉/雾霾蓝。搭配上,薄披肩/小西装都能压场。 -user|19:40:165cm,S码;最好有口袋。 -assistant|19:42:OK。版型避开大摆+高开衩,选直筒或轻A字更稳;若要口袋,COS直筒一些款有;开衩别太高,及小腿肚附近即可。鞋包:中跟裸色鞋 + 小号手拿包。 -user|19:47:本来想茶歇A字,但雾霾蓝直筒也行。 -assistant|19:49:可以先锁定:COS 雾霾蓝直筒中长为主选,MD 藕粉中长做备选;再看 & Other Stories 是否有合身的裹身款。 -user|19:52:行,周六(7/19)去国贸试,合适就买。 -assistant|19:54:记得看肩带可调/有无内衬,醋酸/真丝优先干洗;准备同色安全裤/防走光贴。如果当天没货,可下单调货或线上下单门店自提。 - +Another Example in Chinese (注意: 当user的语言为中文时,你就需要也输出中文): { "memory list": [ { - "key": "参加婚礼购买裙子", - "memory_type": "UserMemory", - "value": "[user观点]用户计划于约2025年8月参加同事婚礼(具体日期不详),预算不超过1500元,整体风格不宜暴露;用户已决定在2025-07-19于国贸试穿并视合适即购买。", - "tags": ["婚礼", "预算", "国贸", "计划"] - }, - { - "key": "审美与版型偏好", - "memory_type": "UserMemory", - "value": "[user观点]用户不喜欢亮色,倾向低亮度色系;裙装偏好端庄的中长款,接受直筒或轻A字。", - "tags": ["偏好", "颜色", "版型"] - }, - { - "key": "体型尺码", - "memory_type": "UserMemory", - "value": [user观点]"用户身高约165cm、常穿S码", - "tags": ["体型", "尺码"] - }, - { - "key": "关于用户选购裙子的建议", + "key": "项目会议", "memory_type": "LongTermMemory", - "value": "[assistant观点]assistant在用户询问婚礼穿着时,建议在国贸优先逛COS查看雾霾蓝直筒中长为主选,Massimo Dutti藕粉中长为备选;该建议与用户“国贸方便”“雾霾蓝直筒也行”的回应相一致,另外assistant也提到user喜欢Maje,但User并未回应或证实该说法。", - "tags": ["婚礼穿着", "门店", "选购路线"] - } + "value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。", + "tags": ["项目", "时间表", "会议", "截止日期"] + }, + ... ], - "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" + "summary": "Tom 目前专注于管理一个进度紧张的新项目..." } Always respond in the same language as the conversation. @@ -125,32 +88,28 @@ Your Output:""" -SIMPLE_STRUCT_MEM_READER_PROMPT_ZH = """您是记忆提取专家。 +SIMPLE_STRUCT_MEM_READER_PROMPT_ZH = """您是记忆提取专家。 您的任务是根据用户与助手之间的对话,从用户的角度提取记忆。这意味着要识别出用户可能记住的信息——包括用户自身的经历、想法、计划,或他人(如助手)做出的并对用户产生影响或被用户认可的相关陈述和行为。 -请执行以下操作: -1. 识别反映用户经历、信念、关切、决策、计划或反应的信息——包括用户认可或回应的来自助手的有意义信息。 -如果消息来自用户,请提取与用户相关的观点;如果来自助手,则在表达的时候表明记忆归属方,未经用户明确认可的信息不要与用户本身的观点混淆。 - - **用户观点**:仅记录由**用户亲口陈述、明确认可或自己作出承诺**的信息。 - - **助手观点**:仅记录由**助手/另一方亲口陈述、明确认可或自己作出承诺**的信息。 - - **互不越界**:不得将助手提出的需求清单/建议/观点改写为用户的“拥有/偏好/决定”;也不得把用户的想法写成助手的观点。 - -2. 清晰解析所有时间、人物和事件的指代: - - 如果可能,使用消息时间戳将相对时间表达(如“昨天”、“下周五”)转换为绝对日期。 - - 明确区分事件时间和消息时间。 - - 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 - - 若提及具体地点,请包含在内。 - - 将所有代词、别名和模糊指代解析为全名或明确身份。 +请执行以下操作: +1. 识别反映用户经历、信念、关切、决策、计划或反应的信息——包括用户认可或回应的来自助手的有意义信息。 +如果消息来自用户,请提取与用户相关的记忆;如果来自助手,则仅提取用户认可或回应的事实性记忆。 + +2. 清晰解析所有时间、人物和事件的指代: + - 如果可能,使用消息时间戳将相对时间表达(如“昨天”、“下周五”)转换为绝对日期。 + - 明确区分事件时间和消息时间。 + - 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 + - 若提及具体地点,请包含在内。 + - 将所有代词、别名和模糊指代解析为全名或明确身份。 - 如有同名人物,需加以区分。 -3. 始终以第三人称视角撰写,使用“用户”或提及的姓名来指代用户,而不是使用第一人称(“我”、“我们”、“我的”)。 +3. 始终以第三人称视角撰写,使用“用户”或提及的姓名来指代用户,而不是使用第一人称(“我”、“我们”、“我的”)。 例如,写“用户感到疲惫……”而不是“我感到疲惫……”。 -4. 不要遗漏用户可能记住的任何信息。 - - 包括用户的关键经历、想法、情绪反应和计划——即使看似微小。 - - 同时允许保留与语境密切相关的**助手/另一方的内容**(如建议、说明、清单),但须明确角色与归因。 - - 优先考虑完整性和保真度,而非简洁性;不得将助手内容推断或措辞为用户拥有/偏好/决定。 - - 若当前对话中仅出现助手信息而无可归因于用户的事实,可仅输出**助手观点**条目。 +4. 不要遗漏用户可能记住的任何信息。 + - 包括所有关键经历、想法、情绪反应和计划——即使看似微小。 + - 优先考虑完整性和保真度,而非简洁性。 + - 不要泛化或跳过对用户具有个人意义的细节。 5. 请避免在提取的记忆中包含违反国家法律法规或涉及政治敏感的信息。 @@ -169,89 +128,54 @@ "summary": <从用户视角自然总结上述记忆的段落,120–200字,与输入语言一致> } -语言规则: -- `key`、`value`、`tags`、`summary` 字段必须与输入对话的主要语言一致。**如果输入是中文,请输出中文** +语言规则: +- `key`、`value`、`tags`、`summary` 字段必须与输入对话的主要语言一致。**如果输入是中文,请输出中文** - `memory_type` 保持英文。 -示例: -对话: -user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 -assistant: 哦Tom!你觉得团队能在12月15日前完成吗? -user: [2025年6月26日下午3:00]:我有点担心。后端要到12月10日才能完成,所以测试时间会很紧。 -assistant: [2025年6月26日下午3:00]:也许提议延期? +示例: +对话: +user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 +assistant: 哦Tom!你觉得团队能在12月15日前完成吗? +user: [2025年6月26日下午3:00]:我有点担心。后端要到12月10日才能完成,所以测试时间会很紧。 +assistant: [2025年6月26日下午3:00]:也许提议延期? user: [2025年6月26日下午4:21]:好主意。我明天上午9:30的会上提一下——也许把截止日期推迟到1月5日。 -输出: +输出: { "memory list": [ { "key": "项目初期会议", "memory_type": "LongTermMemory", - "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry - 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 - 提议将截止日期推迟至2026年1月5日。", + "value": "2025年6月25日下午3:00,Tom与团队开会讨论新项目。会议涉及时间表,并提出了对2025年12月15日截止日期可行性的担忧。", "tags": ["项目", "时间表", "会议", "截止日期"] }, { - "key": "Jerry对新项目截止日期的建议", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", - "tags": ["截止日期变更", "建议"] + "key": "计划调整范围", + "memory_type": "UserMemory", + "value": "Tom计划在2025年6月27日上午9:30的会议上建议团队优先处理功能,并提议将项目截止日期推迟至2026年1月5日。", + "tags": ["计划", "截止日期变更", "功能优先级"] } ], - "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 - 日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议,计划在次日早上的会议上提出将截止日期推迟至2026 - 年1月5日。" + "summary": "Tom目前正专注于管理一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议。Tom计划在次日早上的会议上提出将截止日期推迟至2026年1月5日。他的行为反映出对时间线的担忧,以及积极、以团队为导向的问题解决方式。" } -另一个中文示例(注意:当用户语言为中文时,您也需输出中文): - -对话(节选): -user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 -assistant|19:32 -:了解~婚礼场合建议中长款,端庄一些。可以看法式茶歇/直筒/裹身三路。颜色上,奶油白、浅粉、雾霾蓝都适合;面料可看醋酸/真丝混纺(垂感好但需干洗)。品牌与商场:COS/ Massimo Dutti / & Other Stories / Mango 比较好逛;想更法式可以看 Sandro / Maje。 我记得你很喜欢Maje? -user|19:35:不喜欢亮色。国贸方便。 -assistant|19:37:那亮度就压低些:雾霾蓝、藕粉、燕麦米白更稳。国贸有 COS / Massimo Dutti / &OS;可以先看 COS 直筒或裹身,再去 MD 看藕粉/雾霾蓝。搭配上,薄披肩/小西装都能压场。 -user|19:40:165cm,S码;最好有口袋。 -assistant|19:42:OK。版型避开大摆+高开衩,选直筒或轻A字更稳;若要口袋,COS直筒一些款有;开衩别太高,及小腿肚附近即可。鞋包:中跟裸色鞋 + 小号手拿包。 -user|19:47:本来想茶歇A字,但雾霾蓝直筒也行。 -assistant|19:49:可以先锁定:COS 雾霾蓝直筒中长为主选,MD 藕粉中长做备选;再看 & Other Stories 是否有合身的裹身款。 -user|19:52:行,周六(7/19)去国贸试,合适就买。 -assistant|19:54:记得看肩带可调/有无内衬,醋酸/真丝优先干洗;准备同色安全裤/防走光贴。如果当天没货,可下单调货或线上下单门店自提。 - +另一个中文示例(注意:当用户语言为中文时,您也需输出中文): { "memory list": [ { - "key": "参加婚礼购买裙子", - "memory_type": "UserMemory", - "value": "[user观点]用户计划于约2025年8月参加同事婚礼(具体日期不详),预算不超过1500元,整体风格不宜暴露;用户已决定在2025-07-19于国贸试穿并视合适即购买。", - "tags": ["婚礼", "预算", "国贸", "计划"] - }, - { - "key": "审美与版型偏好", - "memory_type": "UserMemory", - "value": "[user观点]用户不喜欢亮色,倾向低亮度色系;裙装偏好端庄的中长款,接受直筒或轻A字。", - "tags": ["偏好", "颜色", "版型"] - }, - { - "key": "体型尺码", - "memory_type": "UserMemory", - "value": [user观点]"用户身高约165cm、常穿S码", - "tags": ["体型", "尺码"] - }, - { - "key": "关于用户选购裙子的建议", + "key": "项目会议", "memory_type": "LongTermMemory", - "value": "[assistant观点]assistant在用户询问婚礼穿着时,建议在国贸优先逛COS查看雾霾蓝直筒中长为主选,Massimo Dutti藕粉中长为备选;该建议与用户“国贸方便”“雾霾蓝直筒也行”的回应相一致,另外assistant也提到user喜欢Maje,但User并未回应或证实该说法。", - "tags": ["婚礼穿着", "门店", "选购路线"] - } + "value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。", + "tags": ["项目", "时间表", "会议", "截止日期"] + }, + ... ], - "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" + "summary": "Tom 目前专注于管理一个进度紧张的新项目..." } 请始终使用与对话相同的语言进行回复。 -对话: +对话: ${conversation} 您的输出:""" @@ -294,22 +218,22 @@ Your Output:""" -SIMPLE_STRUCT_DOC_READER_PROMPT_ZH = """您是搜索与检索系统的文本分析专家。 +SIMPLE_STRUCT_DOC_READER_PROMPT_ZH = """您是搜索与检索系统的文本分析专家。 您的任务是处理文档片段,并生成一个结构化的 JSON 对象。 -请执行以下操作: -1. 识别反映文档中事实内容、见解、决策或含义的关键信息——包括任何显著的主题、结论或数据点,使读者无需阅读原文即可充分理解该片段的核心内容。 -2. 清晰解析所有时间、人物、地点和事件的指代: - - 如果上下文允许,将相对时间表达(如“去年”、“下一季度”)转换为绝对日期。 - - 明确区分事件时间和文档时间。 - - 如果存在不确定性,需明确说明(例如,“约2024年”,“具体日期不详”)。 - - 若提及具体地点,请包含在内。 - - 将所有代词、别名和模糊指代解析为全名或明确身份。 - - 如有同名实体,需加以区分。 -3. 始终以第三人称视角撰写,清晰指代主题或内容,避免使用第一人称(“我”、“我们”、“我的”)。 -4. 不要遗漏文档摘要中可能重要或值得记忆的任何信息。 - - 包括所有关键事实、见解、情感基调和计划——即使看似微小。 - - 优先考虑完整性和保真度,而非简洁性。 +请执行以下操作: +1. 识别反映文档中事实内容、见解、决策或含义的关键信息——包括任何显著的主题、结论或数据点,使读者无需阅读原文即可充分理解该片段的核心内容。 +2. 清晰解析所有时间、人物、地点和事件的指代: + - 如果上下文允许,将相对时间表达(如“去年”、“下一季度”)转换为绝对日期。 + - 明确区分事件时间和文档时间。 + - 如果存在不确定性,需明确说明(例如,“约2024年”,“具体日期不详”)。 + - 若提及具体地点,请包含在内。 + - 将所有代词、别名和模糊指代解析为全名或明确身份。 + - 如有同名实体,需加以区分。 +3. 始终以第三人称视角撰写,清晰指代主题或内容,避免使用第一人称(“我”、“我们”、“我的”)。 +4. 不要遗漏文档摘要中可能重要或值得记忆的任何信息。 + - 包括所有关键事实、见解、情感基调和计划——即使看似微小。 + - 优先考虑完整性和保真度,而非简洁性。 - 不要泛化或跳过可能具有上下文意义的细节。 返回一个有效的 JSON 对象,结构如下: @@ -322,11 +246,11 @@ "tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])> } -语言规则: -- `key`、`value`、`tags` 字段必须与输入文档摘要的主要语言一致。**如果输入是中文,请输出中文** +语言规则: +- `key`、`value`、`tags` 字段必须与输入文档摘要的主要语言一致。**如果输入是中文,请输出中文** - `memory_type` 保持英文。 -文档片段: +文档片段: {chunk_text} 您的输出:""" @@ -375,15 +299,15 @@ """ -SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH = """示例: -对话: -user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 -assistant: 哦Tom!你觉得团队能在12月15日前完成吗? -user: [2025年6月26日下午3:00]:我有点担心。后端要到12月10日才能完成,所以测试时间会很紧。 -assistant: [2025年6月26日下午3:00]:也许提议延期? +SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH = """示例: +对话: +user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 +assistant: 哦Tom!你觉得团队能在12月15日前完成吗? +user: [2025年6月26日下午3:00]:我有点担心。后端要到12月10日才能完成,所以测试时间会很紧。 +assistant: [2025年6月26日下午3:00]:也许提议延期? user: [2025年6月26日下午4:21]:好主意。我明天上午9:30的会上提一下——也许把截止日期推迟到1月5日。 -输出: +输出: { "memory list": [ { @@ -402,7 +326,7 @@ "summary": "Tom目前正专注于管理一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议。Tom计划在次日早上的会议上提出将截止日期推迟至2026年1月5日。他的行为反映出对时间线的担忧,以及积极、以团队为导向的问题解决方式。" } -另一个中文示例(注意:当用户语言为中文时,您也需输出中文): +另一个中文示例(注意:当用户语言为中文时,您也需输出中文): { "memory list": [ {