diff --git a/benchmark/halumem/quickstart.md b/benchmark/halumem/quickstart.md index 59dc68fa..4ef30846 100644 --- a/benchmark/halumem/quickstart.md +++ b/benchmark/halumem/quickstart.md @@ -14,17 +14,30 @@ conda activate ./reme-env pip install . ``` -### 2. Clone the Repository +### 2. Download the Dataset ```bash cd ./benchmark/halumem -git clone https://github.com/MemTensor/HaluMem.git +mkdir -p data +curl -L "https://huggingface.co/datasets/IAAR-Shanghai/HaluMem/resolve/main/HaluMem-Medium.jsonl?download=true" -o data/HaluMem-Medium.jsonl +curl -L "https://huggingface.co/datasets/IAAR-Shanghai/HaluMem/resolve/main/HaluMem-Long.jsonl?download=true" -o data/HaluMem-Long.jsonl +``` + +Dataset page: +https://huggingface.co/datasets/IAAR-Shanghai/HaluMem/tree/main + +If the official source is slow or inaccessible in mainland China, you can use a mirror: +```bash +cd ./benchmark/halumem +mkdir -p data +curl -L "https://hf-mirror.com/datasets/IAAR-Shanghai/HaluMem/resolve/main/HaluMem-Medium.jsonl?download=true" -o data/HaluMem-Medium.jsonl +curl -L "https://hf-mirror.com/datasets/IAAR-Shanghai/HaluMem/resolve/main/HaluMem-Long.jsonl?download=true" -o data/HaluMem-Long.jsonl ``` ### 3. Run Experiments Launch the ReMe service to enable memory library functionality: ```bash clear && python benchmark/halumem/eval_reme.py \ - --data_path benchmark/halumem/HaluMem/data/HaluMem-Medium.jsonl \ + --data_path benchmark/halumem/data/HaluMem-Medium.jsonl \ --reme_model_name gpt-4o-mini-2024-07-18 \ --eval_model_name gpt-4o-mini-2024-07-18 \ --batch_size 40 \ diff --git a/reme/core/application.py b/reme/core/application.py index 8b5b1932..042b2e80 100644 --- a/reme/core/application.py +++ b/reme/core/application.py @@ -44,6 +44,7 @@ def __init__( config_path: str | None = None, enable_logo: bool = True, log_to_console: bool = True, + log_to_file: bool = True, enable_load_env: bool = True, parser: type[PydanticConfigParser] | None = None, default_as_llm_config: dict | None = None, @@ -73,6 +74,7 @@ def __init__( config_path=config_path, enable_logo=enable_logo, log_to_console=log_to_console, + log_to_file=log_to_file, default_as_llm_config=default_as_llm_config, default_as_llm_formatter_config=default_as_llm_formatter_config, default_llm_config=default_llm_config, @@ -144,7 +146,10 @@ async def start(self): logger.warning("Application has already started.") return self - init_logger(log_to_console=self.service_config.log_to_console) + init_logger( + log_to_console=self.service_config.log_to_console, + log_to_file=self.service_config.log_to_file, + ) logger.info(f"Init ReMe with config: {self.service_config.model_dump_json()}") working_path = Path(self.service_config.working_dir) diff --git a/reme/core/schema/service_config.py b/reme/core/schema/service_config.py index 48b97e0a..1f8a6816 100644 --- a/reme/core/schema/service_config.py +++ b/reme/core/schema/service_config.py @@ -122,6 +122,7 @@ class ServiceConfig(BasicConfig): ) ray_max_workers: int = Field(default=-1) log_to_console: bool = Field(default=True) + log_to_file: bool = Field(default=True) disabled_flows: list[str] = Field(default_factory=list) enabled_flows: list[str] = Field(default_factory=list) diff --git a/reme/core/service_context.py b/reme/core/service_context.py index 2c58fe7d..5e57c9e4 100644 --- a/reme/core/service_context.py +++ b/reme/core/service_context.py @@ -34,6 +34,7 @@ def __init__( config_path: str | None = None, enable_logo: bool = True, log_to_console: bool = True, + log_to_file: bool = True, default_as_llm_config: dict | None = None, default_as_llm_formatter_config: dict | None = None, default_as_token_counter_config: dict | None = None, @@ -79,6 +80,7 @@ def __init__( { "enable_logo": enable_logo, "log_to_console": log_to_console, + "log_to_file": log_to_file, "working_dir": working_dir, }, ) diff --git a/reme/core/utils/logger_utils.py b/reme/core/utils/logger_utils.py index 512c0a42..cc141724 100644 --- a/reme/core/utils/logger_utils.py +++ b/reme/core/utils/logger_utils.py @@ -5,13 +5,19 @@ from datetime import datetime -def init_logger(log_dir: str = "logs", level: str = "INFO", log_to_console: bool = True) -> None: +def init_logger( + log_dir: str = "logs", + level: str = "INFO", + log_to_console: bool = True, + log_to_file: bool = True, +) -> None: """Initialize the logger with both file and console handlers. Args: log_dir: Directory path for log files level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) log_to_console: Whether to print logs to console/screen + log_to_file: Whether to persist logs to files under log_dir """ from loguru import logger @@ -28,25 +34,26 @@ def init_logger(log_dir: str = "logs", level: str = "INFO", log_to_console: bool ) # Try to configure file-based logging (skip if permission denied) - try: - # Ensure the logging directory exists - os.makedirs(log_dir, exist_ok=True) - - # Generate filename based on the current timestamp - # Use dashes instead of colons for Windows compatibility - current_ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - log_filename = f"{current_ts}.log" - log_filepath = os.path.join(log_dir, log_filename) - - # Configure file-based logging with rotation and compression - logger.add( - log_filepath, - level=level, - rotation="00:00", - retention="7 days", - compression="zip", - encoding="utf-8", - format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {file}:{line} | {function} | {message}", - ) - except Exception as e: - logger.error(f"Error configuring file logging: {e}") + if log_to_file: + try: + # Ensure the logging directory exists + os.makedirs(log_dir, exist_ok=True) + + # Generate filename based on the current timestamp + # Use dashes instead of colons for Windows compatibility + current_ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_filename = f"{current_ts}.log" + log_filepath = os.path.join(log_dir, log_filename) + + # Configure file-based logging with rotation and compression + logger.add( + log_filepath, + level=level, + rotation="00:00", + retention="7 days", + compression="zip", + encoding="utf-8", + format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {file}:{line} | {function} | {message}", + ) + except Exception as e: + logger.error(f"Error configuring file logging: {e}") diff --git a/reme/reme.py b/reme/reme.py index 5e2aa5b2..f559ad71 100644 --- a/reme/reme.py +++ b/reme/reme.py @@ -46,6 +46,7 @@ def __init__( config_path: str = "vector", enable_logo: bool = True, log_to_console: bool = True, + log_to_file: bool = True, default_llm_config: dict | None = None, default_embedding_model_config: dict | None = None, default_vector_store_config: dict | None = None, @@ -81,6 +82,7 @@ def __init__( config_path=config_path, enable_logo=enable_logo, log_to_console=log_to_console, + log_to_file=log_to_file, parser=ReMeConfigParser, default_llm_config=default_llm_config, default_embedding_model_config=default_embedding_model_config,