-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
35 lines (29 loc) · 1.13 KB
/
config.py
File metadata and controls
35 lines (29 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Milvus Lite Configuration
MILVUS_LITE_DATA_PATH = "./milvus_lite_data.db" # Path to store Milvus Lite data
COLLECTION_NAME = "medical_rag_lite" # Use a different name if needed
# Data Configuration
DATA_FILE = "./data/processed_data.json"
# Model Configuration
# Example: 'all-MiniLM-L6-v2' (dim 384), 'thenlper/gte-large' (dim 1024)
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
GENERATION_MODEL_NAME = "Qwen/Qwen2.5-0.5B"
EMBEDDING_DIM = 384 # Must match EMBEDDING_MODEL_NAME
# Indexing and Search Parameters
MAX_ARTICLES_TO_INDEX = 500
TOP_K = 3
# Milvus index parameters (adjust based on data size and needs)
INDEX_METRIC_TYPE = "L2" # Or "IP"
INDEX_TYPE = "IVF_FLAT" # Milvus Lite 支持的索引类型
# HNSW index params (adjust as needed)
INDEX_PARAMS = {"nlist": 128}
# HNSW search params (adjust as needed)
SEARCH_PARAMS = {"nprobe": 16}
# Generation Parameters
MAX_NEW_TOKENS_GEN = 512
TEMPERATURE = 0.7
TOP_P = 0.9
REPETITION_PENALTY = 1.1
# Global map to store document content (populated during indexing)
# Key: document ID (int), Value: dict {'title': str, 'abstract': str, 'content': str}
id_to_doc_map = {}
id_to_embedding_map = {}