refactor(config): move reflection qdrant connection from env to toml

Mgrsc · Mgrsc · commit 33ba51a63a55 · 2026-03-05T23:12:01.000+08:00
diff --git a/.env.example b/.env.example
@@ -11,7 +11,5 @@ TELEGRAM_BOT_TOKEN=123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11
 MINIMAX_API_KEY=
 GROQ_API_KEY=
 MEMBURROW_AUTH_TOKEN=
-
-# Reflection memory loop (only needed when reflection.enabled=true)
-QDRANT_URL=http://qdrant:6333
-# QDRANT_API_KEY=
+FIRECRAWL_API_KEY=
+# FIRECRAWL_BASE_URL=https://api.firecrawl.dev
diff --git a/README.md b/README.md
@@ -270,7 +270,7 @@ In the default integrated deployment, MemBurrow and Zerda reflection share the s
 
 Zerda implements a heuristic executor reflection memory loop that is conceptually inspired by ACON (Agent Context Optimization). The goal is to shift memory usage from "feeding more task facts" to "feeding reusable methodology and lessons" (`How to act / What to avoid`). Before an execution run, the system embeds the delegated instruction, retrieves top-matched historical guidelines from Qdrant, and injects them into the Executor prompt as concise system reminders.
 
-Configuration note: all reflection settings live under `[reflection]` (for example `llm_model`, `max_tokens`, `embedding_model`, `embedding_dim`). Both `llm_model` and `embedding_model` use `provider_id@model_name` and resolve `base_url` / `api_key` from `[providers.<id>]`. `embedding_model` is optional and defaults to the same provider as `llm_model` with `text-embedding-3-small`. Reflection sampling is fixed at `temperature=0.7` and `top_p=0.95`.
+Configuration note: all reflection settings live under `[reflection]` (for example `llm_model`, `max_tokens`, `embedding_model`, `embedding_dim`, `qdrant_url`, `qdrant_api_key`). Both `llm_model` and `embedding_model` use `provider_id@model_name` and resolve `base_url` / `api_key` from `[providers.<id>]`. `embedding_model` is optional and defaults to the same provider as `llm_model` with `text-embedding-3-small`. Reflection sampling is fixed at `temperature=0.7` and `top_p=0.95`.
 
 During execution, Zerda records iteration outcomes (tool errors and traceback signals). After the run, a reflection worker asynchronously performs failure-driven contrast: it compares failed and successful iterations from the same trajectory, then compresses one reusable guideline in imperative form. The compression prompt explicitly enforces method-level lessons (not domain facts), short output, and generalizability to similar tasks.
 
diff --git a/README_zh.md b/README_zh.md
@@ -270,7 +270,7 @@ Zerda 引入了轻量外部记忆服务 [MemBurrow](https://github.com/Mgrsc/Mem
 
 Zerda 在 Executor 路径实现的是启发式反思记忆闭环，核心思想参考了 ACON（Agent Context Optimization），但不是 ACON 的完整实现。目标是把记忆优化从“持续喂任务知识”转为“沉淀可复用的方法论与教训”（`How to act / What to avoid`）。每次执行前，系统会对委托指令做向量化检索，从 Qdrant 召回最相似的历史指南，并以精简的 `<system-reminder>` 注入到 Executor 提示词中。
 
-配置说明：反思相关配置全部放在 `[reflection]` 下（如 `llm_model`、`max_tokens`、`embedding_model`、`embedding_dim`）。`llm_model` 与 `embedding_model` 都使用 `provider_id@model_name`，其 `base_url` / `api_key` 统一从 `[providers.<id>]` 读取。`embedding_model` 可省略，默认使用 `llm_model` 的同一 provider，并使用 `text-embedding-3-small` 作为默认 embedding 模型名。反思采样参数固定为 `temperature=0.7`、`top_p=0.95`。
+配置说明：反思相关配置全部放在 `[reflection]` 下（如 `llm_model`、`max_tokens`、`embedding_model`、`embedding_dim`、`qdrant_url`、`qdrant_api_key`）。`llm_model` 与 `embedding_model` 都使用 `provider_id@model_name`，其 `base_url` / `api_key` 统一从 `[providers.<id>]` 读取。`embedding_model` 可省略，默认使用 `llm_model` 的同一 provider，并使用 `text-embedding-3-small` 作为默认 embedding 模型名。反思采样参数固定为 `temperature=0.7`、`top_p=0.95`。
 
 执行过程中，系统按迭代记录工具错误和 traceback 信号。执行结束后，异步反思任务会做失败驱动对比：在同一轨迹内对照失败迭代与成功迭代，压缩出一条可迁移的操作指南。压缩提示词强约束输出为方法级经验（非领域事实）、短文本、祈使句，并要求可泛化到相似任务。
 
diff --git a/docs/zerda/configuration.md b/docs/zerda/configuration.md
@@ -114,6 +114,24 @@ model = "whisper-large-v3-turbo" # Default: "whisper-large-v3-turbo"
 
 Used for Telegram voice message transcription.
 
+## Reflection Memory Loop
+
+```toml
+[reflection]
+enabled = false
+llm_model = "openai@${OPENAI_REFLECTION_MODEL}"
+max_tokens = 2048
+embedding_model = "openai@${OPENAI_EMBEDDING_MODEL}" # optional
+embedding_dim = 1536                                 # optional
+qdrant_url = "http://qdrant:6333"
+qdrant_api_key = ""
+```
+
+- `llm_model`: Reflection analysis model (`provider_id@model_name`).
+- `embedding_model`: Optional embedding model (`provider_id@model_name`), defaults to `<llm provider>@text-embedding-3-small`.
+- `qdrant_url`: Qdrant endpoint for reflection guideline collection.
+- `qdrant_api_key`: Optional Qdrant API key. Empty string means no API key header is sent.
+
 ## Memory Service (MemBurrow)
 
 ```toml
diff --git a/src/config.rs b/src/config.rs
@@ -167,6 +167,10 @@ pub struct ReflectionConfig {
     pub embedding_model: Option<String>,
     #[serde(default)]
     pub embedding_dim: Option<u64>,
+    #[serde(default = "default_reflection_qdrant_url")]
+    pub qdrant_url: String,
+    #[serde(default)]
+    pub qdrant_api_key: String,
 }
 
 const fn default_reflection_max_tokens() -> Option<u32> {
@@ -181,6 +185,8 @@ impl Default for ReflectionConfig {
             max_tokens: default_reflection_max_tokens(),
             embedding_model: None,
             embedding_dim: None,
+            qdrant_url: default_reflection_qdrant_url(),
+            qdrant_api_key: String::new(),
         }
     }
 }
@@ -367,7 +373,7 @@ impl Default for MemoryServiceConfig {
 }
 
 fn default_memory_service_url() -> String {
-    "http://localhost:8080".to_string()
+    "http://memory-service:8080".to_string()
 }
 fn default_memory_tenant_id() -> String {
     "default".to_string()
@@ -384,6 +390,9 @@ fn default_docs_search_embedding_model() -> String {
 const fn default_docs_search_embedding_dim() -> u64 {
     1536
 }
+fn default_reflection_qdrant_url() -> String {
+    "http://qdrant:6333".to_string()
+}
 fn default_docs_search_qdrant_url() -> String {
     "http://qdrant:6333".to_string()
 }
@@ -587,6 +596,10 @@ fn validate_config(config: &Config) -> Result<()> {
             config.reflection.as_model_config().is_some(),
             "reflection.enabled=true requires non-empty reflection.llm_model (provider_id@model_name)"
         );
+        anyhow::ensure!(
+            !config.reflection.qdrant_url.trim().is_empty(),
+            "reflection.qdrant_url must not be empty when reflection.enabled=true"
+        );
     }
 
     if config.docs_search.enabled {
diff --git a/src/main.rs b/src/main.rs
@@ -183,6 +183,8 @@ async fn main() -> Result<()> {
                                         match reflection::ReflectionEngine::try_new(
                                             reflection_provider,
                                             reflection_opts,
+                                            &cfg.reflection.qdrant_url,
+                                            Some(&cfg.reflection.qdrant_api_key),
                                             cfg.reflection.embedding_dim,
                                             embedding_provider,
                                             &embedding_ref.model_name,
diff --git a/src/reflection/mod.rs b/src/reflection/mod.rs
@@ -23,11 +23,19 @@ impl ReflectionEngine {
     pub fn try_new(
         provider: Arc<dyn Provider>,
         chat_opts: ChatOptions,
+        qdrant_url: &str,
+        qdrant_api_key: Option<&str>,
         embedding_dim: Option<u64>,
         embedding_provider: &ProviderEndpoint,
         embedding_model: &str,
     ) -> Option<Self> {
-        let store = QdrantStore::try_new(embedding_dim, embedding_provider, embedding_model)?;
+        let store = QdrantStore::try_new(
+            qdrant_url,
+            qdrant_api_key,
+            embedding_dim,
+            embedding_provider,
+            embedding_model,
+        )?;
         let analyzer = ReflectionAnalyzer::new(provider, chat_opts);
         Some(Self { store, analyzer })
     }
diff --git a/src/reflection/store.rs b/src/reflection/store.rs
@@ -38,11 +38,17 @@ struct EmbeddingData {
 
 impl QdrantStore {
     pub fn try_new(
+        qdrant_url: &str,
+        qdrant_api_key: Option<&str>,
         embedding_dim_override: Option<u64>,
         embedding_provider: &ProviderEndpoint,
         embedding_model: &str,
     ) -> Option<Self> {
-        let qdrant_url = read_non_empty_env("QDRANT_URL")?;
+        let qdrant_url = qdrant_url.trim();
+        if qdrant_url.is_empty() {
+            tracing::warn!("REFLECTION: empty qdrant_url in configuration");
+            return None;
+        }
         let embedding_api_key = embedding_provider.api_key.trim().to_string();
         if embedding_api_key.is_empty() {
             tracing::warn!(
@@ -63,9 +69,9 @@ impl QdrantStore {
         };
         let embedding_dim = embedding_dim_override.unwrap_or(DEFAULT_EMBEDDING_DIM);
 
-        let mut qdrant_config = QdrantConfig::from_url(&qdrant_url);
-        if let Some(api_key) = read_non_empty_env("QDRANT_API_KEY") {
-            qdrant_config = qdrant_config.api_key(api_key);
+        let mut qdrant_config = QdrantConfig::from_url(qdrant_url);
+        if let Some(api_key) = qdrant_api_key.map(str::trim).filter(|v| !v.is_empty()) {
+            qdrant_config = qdrant_config.api_key(api_key.to_string());
         }
 
         let qdrant = match Qdrant::new(qdrant_config) {
@@ -232,13 +238,6 @@ impl QdrantStore {
     }
 }
 
-fn read_non_empty_env(name: &str) -> Option<String> {
-    std::env::var(name)
-        .ok()
-        .map(|v| v.trim().to_string())
-        .filter(|v| !v.is_empty())
-}
-
 fn truncate(s: &str, max: usize) -> &str {
     if s.len() <= max {
         s
diff --git a/zerda.toml b/zerda.toml
@@ -30,6 +30,8 @@ llm_model = "openai@${OPENAI_REFLECTION_MODEL}"
 max_tokens = 2048
 embedding_model = "openai@${OPENAI_EMBEDDING_MODEL}"
 embedding_dim = 1536
+qdrant_url = "http://qdrant:6333"
+qdrant_api_key = ""
 
 # =============================
 # Docs Search
diff --git a/zerda.toml.full b/zerda.toml.full
@@ -62,6 +62,8 @@ llm_model = ""                          # default: empty; required only when ena
 max_tokens = 2048
 # embedding_model = "openai@text-embedding-3-small"  # optional; default: "<llm provider>@text-embedding-3-small" when enabled
 # embedding_dim = 1536                  # optional; model-specific
+qdrant_url = "http://qdrant:6333"
+qdrant_api_key = ""
 
 # =============================
 # Docs Search (optional)
@@ -116,7 +118,7 @@ model = "whisper-large-v3-turbo"
 # =============================
 [memory_service]
 enabled = false
-url = "http://localhost:8080"
+url = "http://memory-service:8080"
 auth_token = ""
 tenant_id = "default"
 default_entity_id = "user_default"

Original file line number	Diff line number	Diff line change
`@@ -167,6 +167,10 @@ pub struct ReflectionConfig {`
`167`	`167`	`pub embedding_model: Option<String>,`
`168`	`168`	`#[serde(default)]`
`169`	`169`	`pub embedding_dim: Option<u64>,`
	`170`	`+ #[serde(default = "default_reflection_qdrant_url")]`
	`171`	`+ pub qdrant_url: String,`
	`172`	`+ #[serde(default)]`
	`173`	`+ pub qdrant_api_key: String,`
`170`	`174`	`}`
`171`	`175`
`172`	`176`	`const fn default_reflection_max_tokens() -> Option<u32> {`
`@@ -181,6 +185,8 @@ impl Default for ReflectionConfig {`
`181`	`185`	`max_tokens: default_reflection_max_tokens(),`
`182`	`186`	`embedding_model: None,`
`183`	`187`	`embedding_dim: None,`
	`188`	`+ qdrant_url: default_reflection_qdrant_url(),`
	`189`	`+ qdrant_api_key: String::new(),`
`184`	`190`	`}`
`185`	`191`	`}`
`186`	`192`	`}`
`@@ -367,7 +373,7 @@ impl Default for MemoryServiceConfig {`
`367`	`373`	`}`
`368`	`374`
`369`	`375`	`fn default_memory_service_url() -> String {`
`370`		`- "http://localhost:8080".to_string()`
	`376`	`+ "http://memory-service:8080".to_string()`
`371`	`377`	`}`
`372`	`378`	`fn default_memory_tenant_id() -> String {`
`373`	`379`	`"default".to_string()`
`@@ -384,6 +390,9 @@ fn default_docs_search_embedding_model() -> String {`
`384`	`390`	`const fn default_docs_search_embedding_dim() -> u64 {`
`385`	`391`	`1536`
`386`	`392`	`}`
	`393`	`+fn default_reflection_qdrant_url() -> String {`
	`394`	`+ "http://qdrant:6333".to_string()`
	`395`	`+}`
`387`	`396`	`fn default_docs_search_qdrant_url() -> String {`
`388`	`397`	`"http://qdrant:6333".to_string()`
`389`	`398`	`}`
`@@ -587,6 +596,10 @@ fn validate_config(config: &Config) -> Result<()> {`
`587`	`596`	`config.reflection.as_model_config().is_some(),`
`588`	`597`	`"reflection.enabled=true requires non-empty reflection.llm_model (provider_id@model_name)"`
`589`	`598`	`);`
	`599`	`+ anyhow::ensure!(`
	`600`	`+ !config.reflection.qdrant_url.trim().is_empty(),`
	`601`	`+ "reflection.qdrant_url must not be empty when reflection.enabled=true"`
	`602`	`+ );`
`590`	`603`	`}`
`591`	`604`
`592`	`605`	`if config.docs_search.enabled {`