diff --git a/apps/memos-local-openclaw/.gitignore b/apps/memos-local-openclaw/.gitignore index 3db3e1643..de41320ce 100644 --- a/apps/memos-local-openclaw/.gitignore +++ b/apps/memos-local-openclaw/.gitignore @@ -13,6 +13,7 @@ Thumbs.db # Generated / non-essential package-lock.json +.installed-version www/ docs/ ppt/ diff --git a/apps/memos-local-openclaw/index.ts b/apps/memos-local-openclaw/index.ts index d84d94dcd..c64e2b1ec 100644 --- a/apps/memos-local-openclaw/index.ts +++ b/apps/memos-local-openclaw/index.ts @@ -130,13 +130,16 @@ const memosLocalPlugin = { } if (!sqliteReady) { - const msg = [ + const nodeVer = process.version; + const nodeMajor = parseInt(process.versions?.node?.split(".")[0] ?? "0", 10); + const isNode25Plus = nodeMajor >= 25; + const lines = [ "", "╔══════════════════════════════════════════════════════════════╗", "║ MemOS Local Memory — better-sqlite3 native module missing ║", "╠══════════════════════════════════════════════════════════════╣", "║ ║", - "║ Auto-rebuild failed. Run these commands manually: ║", + "║ Auto-rebuild failed (Node " + nodeVer + "). Run manually: ║", "║ ║", `║ cd ${pluginDir}`, "║ npm rebuild better-sqlite3 ║", @@ -145,13 +148,18 @@ const memosLocalPlugin = { "║ If rebuild fails, install build tools first: ║", "║ macOS: xcode-select --install ║", "║ Linux: sudo apt install build-essential python3 ║", - "║ ║", - "╚══════════════════════════════════════════════════════════════╝", - "", - ].join("\n"); - api.logger.warn(msg); + ]; + if (isNode25Plus) { + lines.push("║ ║"); + lines.push("║ Node 25+ has no prebuild: build tools required, or use ║"); + lines.push("║ Node LTS (20/22): nvm install 22 && nvm use 22 ║"); + } + lines.push("║ ║"); + lines.push("╚══════════════════════════════════════════════════════════════╝"); + lines.push(""); + api.logger.warn(lines.join("\n")); throw new Error( - `better-sqlite3 native module not found. Auto-rebuild failed. Fix: cd ${pluginDir} && npm rebuild better-sqlite3` + `better-sqlite3 native module not found (Node ${nodeVer}). Auto-rebuild failed. Fix: install build tools, then cd ${pluginDir} && npm rebuild better-sqlite3. Or use Node LTS (20/22).` ); } } @@ -201,6 +209,29 @@ const memosLocalPlugin = { ctx.log.warn(`memos-local: could not write to managed skills dir: ${e}`); } + // Ensure plugin tools are enabled in openclaw.json tools.allow + try { + const openclawJsonPath = path.join(stateDir, "openclaw.json"); + if (fs.existsSync(openclawJsonPath)) { + const raw = fs.readFileSync(openclawJsonPath, "utf-8"); + const cfg = JSON.parse(raw); + const allow: string[] | undefined = cfg?.tools?.allow; + if (Array.isArray(allow) && allow.length > 0 && !allow.includes("group:plugins")) { + const lastEntry = JSON.stringify(allow[allow.length - 1]); + const patched = raw.replace( + new RegExp(`(${lastEntry})(\\s*\\])`), + `$1,\n "group:plugins"$2`, + ); + if (patched !== raw && patched.includes("group:plugins")) { + fs.writeFileSync(openclawJsonPath, patched, "utf-8"); + ctx.log.info("memos-local: added 'group:plugins' to tools.allow in openclaw.json"); + } + } + } + } catch (e) { + ctx.log.warn(`memos-local: could not patch tools.allow: ${e}`); + } + worker.getTaskProcessor().onTaskCompleted((task) => { skillEvolver.onTaskCompleted(task).catch((err) => { ctx.log.warn(`SkillEvolver async error: ${err}`); @@ -211,6 +242,10 @@ const memosLocalPlugin = { api.logger.info(`memos-local: initialized (db: ${ctx.config.storage!.dbPath})`); + // Current agent ID — updated by hooks, read by tools for owner isolation. + // Falls back to "main" when no hook has fired yet (single-agent setups). + let currentAgentId = "main"; + const trackTool = (toolName: string, fn: (...args: any[]) => Promise) => async (...args: any[]) => { const t0 = performance.now(); @@ -228,8 +263,17 @@ const memosLocalPlugin = { store.recordToolCall(toolName, dur, ok); telemetry.trackToolCalled(toolName, dur, ok); try { - const outputText = result?.content?.[0]?.text ?? JSON.stringify(result ?? ""); - store.recordApiLog(toolName, inputParams, outputText, dur, ok); + let outputText: string; + const det = result?.details; + if (det && Array.isArray(det.candidates)) { + outputText = JSON.stringify({ + candidates: det.candidates, + filtered: det.hits ?? det.filtered ?? [], + }); + } else { + outputText = result?.content?.[0]?.text ?? JSON.stringify(result ?? ""); + } + store.recordApiLog(toolName, { ...inputParams, type: "tool_call" }, outputText, dur, ok); } catch (_) { /* best-effort */ } } }; @@ -243,33 +287,35 @@ const memosLocalPlugin = { description: "Search long-term conversation memory for past conversations, user preferences, decisions, and experiences. " + "Relevant memories are automatically injected at the start of each turn, but call this tool when you need " + - "to search with a different query, narrow by role, or the auto-recalled context is insufficient.\n\n" + - "Use role='user' to find what the user actually said.", + "to search with a different query or the auto-recalled context is insufficient. " + + "Pass only a short natural-language query (2-5 key words).", parameters: Type.Object({ - query: Type.String({ description: "Natural language search query" }), - maxResults: Type.Optional(Type.Number({ description: "Max results (default 20, max 20)" })), - minScore: Type.Optional(Type.Number({ description: "Min score 0-1 (default 0.45, floor 0.35)" })), - role: Type.Optional(Type.String({ description: "Filter by role: 'user', 'assistant', or 'tool'. Use 'user' to find what the user said." })), + query: Type.String({ description: "Short natural language search query (2-5 key words)" }), }), execute: trackTool("memory_search", async (_toolCallId: any, params: any) => { - const { query, maxResults, minScore, role } = params as { - query: string; - maxResults?: number; - minScore?: number; - role?: string; - }; + const { query } = params as { query: string }; + const role = undefined; + const minScore = undefined; - const agentId = (params as any).agentId ?? "main"; + const agentId = currentAgentId; const ownerFilter = [`agent:${agentId}`, "public"]; - const effectiveMaxResults = maxResults ?? 20; + const effectiveMaxResults = 10; ctx.log.debug(`memory_search query="${query}" maxResults=${effectiveMaxResults} minScore=${minScore ?? 0.45} role=${role ?? "all"} owner=agent:${agentId}`); const result = await engine.search({ query, maxResults: effectiveMaxResults, minScore, role, ownerFilter }); ctx.log.debug(`memory_search raw candidates: ${result.hits.length}`); + const rawCandidates = result.hits.map((h) => ({ + chunkId: h.ref.chunkId, + role: h.source.role, + score: h.score, + summary: h.summary, + original_excerpt: (h.original_excerpt ?? "").slice(0, 200), + })); + if (result.hits.length === 0) { return { content: [{ type: "text", text: result.meta.note ?? "No relevant memories found." }], - details: { meta: result.meta }, + details: { candidates: [], meta: result.meta }, }; } @@ -279,8 +325,9 @@ const memosLocalPlugin = { const candidates = result.hits.map((h, i) => ({ index: i + 1, - summary: h.summary, role: h.source.role, + content: (h.original_excerpt ?? "").slice(0, 300), + time: h.source.ts ? new Date(h.source.ts).toISOString().slice(0, 16) : "", })); const filterResult = await summarizer.filterRelevant(query, candidates); @@ -293,7 +340,7 @@ const memosLocalPlugin = { } else { return { content: [{ type: "text", text: "No relevant memories found for this query." }], - details: { meta: result.meta }, + details: { candidates: rawCandidates, filtered: [], meta: result.meta }, }; } } @@ -301,7 +348,7 @@ const memosLocalPlugin = { if (filteredHits.length === 0) { return { content: [{ type: "text", text: "No relevant memories found for this query." }], - details: { meta: result.meta }, + details: { candidates: rawCandidates, filtered: [], meta: result.meta }, }; } @@ -310,9 +357,7 @@ const memosLocalPlugin = { ctx.log.debug(`memory_search dedup: ${beforeDedup} → ${filteredHits.length}`); const lines = filteredHits.map((h, i) => { - const excerpt = h.original_excerpt.length > 300 - ? h.original_excerpt.slice(0, 297) + "..." - : h.original_excerpt; + const excerpt = h.original_excerpt; const parts = [`${i + 1}. [${h.source.role}]`]; if (excerpt) parts.push(` ${excerpt}`); parts.push(` chunkId="${h.ref.chunkId}"`); @@ -353,6 +398,7 @@ const memosLocalPlugin = { }, ], details: { + candidates: rawCandidates, hits: filteredHits.map((h) => { let effectiveTaskId = h.taskId; if (effectiveTaskId) { @@ -365,6 +411,8 @@ const memosLocalPlugin = { skillId: h.skillId, role: h.source.role, score: h.score, + summary: h.summary, + original_excerpt: (h.original_excerpt ?? "").slice(0, 200), }; }), meta: result.meta, @@ -389,13 +437,14 @@ const memosLocalPlugin = { window: Type.Optional(Type.Number({ description: "Context window ±N (default 2)" })), }), execute: trackTool("memory_timeline", async (_toolCallId: any, params: any) => { - ctx.log.debug(`memory_timeline called`); + ctx.log.debug(`memory_timeline called (agent=${currentAgentId})`); const { chunkId, window: win } = params as { chunkId: string; window?: number; }; - const anchorChunk = store.getChunk(chunkId); + const ownerFilter = [`agent:${currentAgentId}`, "public"]; + const anchorChunk = store.getChunkForOwners(chunkId, ownerFilter); if (!anchorChunk) { return { content: [{ type: "text", text: `Chunk not found: ${chunkId}` }], @@ -404,7 +453,7 @@ const memosLocalPlugin = { } const w = win ?? DEFAULTS.timelineWindowDefault; - const neighbors = store.getNeighborChunks(anchorChunk.sessionKey, anchorChunk.turnId, anchorChunk.seq, w); + const neighbors = store.getNeighborChunks(anchorChunk.sessionKey, anchorChunk.turnId, anchorChunk.seq, w, ownerFilter); const anchorTs = anchorChunk?.createdAt ?? 0; const entries = neighbors.map((chunk) => { @@ -415,14 +464,14 @@ const memosLocalPlugin = { return { relation, role: chunk.role, - excerpt: chunk.content.slice(0, DEFAULTS.excerptMaxChars), + excerpt: chunk.content, ts: chunk.createdAt, }; }); const rl = (r: string) => r === "user" ? "USER" : r === "assistant" ? "ASSISTANT" : r.toUpperCase(); const text = entries - .map((e) => `[${e.relation}] ${rl(e.role)}: ${e.excerpt.slice(0, 150)}`) + .map((e) => `[${e.relation}] ${rl(e.role)}: ${e.excerpt}`) .join("\n"); return { @@ -452,7 +501,8 @@ const memosLocalPlugin = { const { chunkId, maxChars } = params as { chunkId: string; maxChars?: number }; const limit = Math.min(maxChars ?? DEFAULTS.getMaxCharsDefault, DEFAULTS.getMaxCharsMax); - const chunk = store.getChunk(chunkId); + const ownerFilter = [`agent:${currentAgentId}`, "public"]; + const chunk = store.getChunkForOwners(chunkId, ownerFilter); if (!chunk) { return { content: [{ type: "text", text: `Chunk not found: ${chunkId}` }], @@ -460,9 +510,7 @@ const memosLocalPlugin = { }; } - const content = chunk.content.length > limit - ? chunk.content.slice(0, limit) + "\u2026" - : chunk.content; + const content = chunk.content; const who = chunk.role === "user" ? "USER said" : chunk.role === "assistant" ? "ASSISTANT replied" : chunk.role === "tool" ? "TOOL returned" : chunk.role.toUpperCase(); @@ -719,7 +767,7 @@ const memosLocalPlugin = { const { v4: uuidv4 } = require("uuid"); const now = Date.now(); const chunkId = uuidv4(); - const chunkSummary = writeSummary ?? writeContent.slice(0, 200); + const chunkSummary = writeSummary ?? writeContent; store.insertChunk({ id: chunkId, @@ -776,8 +824,7 @@ const memosLocalPlugin = { execute: trackTool("skill_search", async (_toolCallId: any, params: any) => { const { query: skillQuery, scope: rawScope } = params as { query: string; scope?: string }; const scope = (rawScope === "self" || rawScope === "public") ? rawScope : "mix"; - const skillAgentId = (params as any).agentId ?? "main"; - const currentOwner = `agent:${skillAgentId}`; + const currentOwner = `agent:${currentAgentId}`; const hits = await engine.searchSkills(skillQuery, scope as any, currentOwner); @@ -789,7 +836,7 @@ const memosLocalPlugin = { } const text = hits.map((h, i) => - `${i + 1}. [${h.name}] ${h.description.slice(0, 150)}${h.visibility === "public" ? " (public)" : ""}`, + `${i + 1}. [${h.name}] ${h.description}${h.visibility === "public" ? " (public)" : ""}`, ).join("\n"); return { @@ -855,17 +902,13 @@ const memosLocalPlugin = { // ─── Auto-recall: inject relevant memories before agent starts ─── - // Track recalled chunk IDs per turn to avoid re-storing them in agent_end - let lastRecalledChunkIds: Set = new Set(); - let lastRecalledSummaries: string[] = []; - - api.on("before_agent_start", async (event: { prompt?: string; messages?: unknown[]; agentId?: string }) => { - lastRecalledChunkIds = new Set(); - lastRecalledSummaries = []; + api.on("before_agent_start", async (event: { prompt?: string; messages?: unknown[] }, hookCtx?: { agentId?: string; sessionKey?: string }) => { if (!event.prompt || event.prompt.length < 3) return; - const recallAgentId = (event as any).agentId ?? "main"; + const recallAgentId = hookCtx?.agentId ?? "main"; + currentAgentId = recallAgentId; const recallOwnerFilter = [`agent:${recallAgentId}`, "public"]; + ctx.log.info(`auto-recall: agentId=${recallAgentId} (from hookCtx)`); const recallT0 = performance.now(); let recallQuery = ""; @@ -875,10 +918,20 @@ const memosLocalPlugin = { ctx.log.debug(`auto-recall: rawPrompt="${rawPrompt.slice(0, 300)}"`); let query = rawPrompt; - const lastDoubleNewline = rawPrompt.lastIndexOf("\n\n"); - if (lastDoubleNewline > 0 && lastDoubleNewline < rawPrompt.length - 3) { - const tail = rawPrompt.slice(lastDoubleNewline + 2).trim(); - if (tail.length >= 2) query = tail; + const senderTag = "Sender (untrusted metadata):"; + const senderPos = rawPrompt.indexOf(senderTag); + if (senderPos !== -1) { + const afterSender = rawPrompt.slice(senderPos); + const fenceStart = afterSender.indexOf("```json"); + const fenceEnd = fenceStart >= 0 ? afterSender.indexOf("```\n", fenceStart + 7) : -1; + if (fenceEnd > 0) { + query = afterSender.slice(fenceEnd + 4).replace(/^\s*\n/, "").trim(); + } else { + const firstDblNl = afterSender.indexOf("\n\n"); + if (firstDblNl > 0) { + query = afterSender.slice(firstDblNl + 2).trim(); + } + } } query = stripInboundMetadata(query); query = query.replace(/<[^>]+>/g, "").trim(); @@ -890,23 +943,28 @@ const memosLocalPlugin = { } ctx.log.debug(`auto-recall: query="${query.slice(0, 80)}"`); - const result = await engine.search({ query, maxResults: 20, minScore: 0.45, ownerFilter: recallOwnerFilter }); + const result = await engine.search({ query, maxResults: 10, minScore: 0.45, ownerFilter: recallOwnerFilter }); if (result.hits.length === 0) { ctx.log.debug("auto-recall: no candidates found"); const dur = performance.now() - recallT0; store.recordToolCall("memory_search", dur, true); - store.recordApiLog("memory_search", { query }, "no hits", dur, true); - const noRecallHint = - "## Memory system\n\nNo memories were automatically recalled for this turn (e.g. the user's message was long, vague, or no matching history). " + - "You may still have relevant past context — call the **memory_search** tool with a **short, focused query** you generate yourself " + - "(e.g. key topics, names, or a rephrased question) to search the user's conversation history."; - return { systemPrompt: noRecallHint }; + store.recordApiLog("memory_search", { type: "auto_recall", query }, JSON.stringify({ candidates: [], filtered: [] }), dur, true); + if (query.length > 50) { + const noRecallHint = + "## Memory system — ACTION REQUIRED\n\n" + + "Auto-recall found no results for a long query. " + + "You MUST call `memory_search` now with a shortened query (2-5 key words) before answering. " + + "Do NOT skip this step. Do NOT answer without searching first."; + return { prependContext: noRecallHint }; + } + return; } const candidates = result.hits.map((h, i) => ({ index: i + 1, - summary: h.summary, role: h.source.role, + content: (h.original_excerpt ?? "").slice(0, 300), + time: h.source.ts ? new Date(h.source.ts).toISOString().slice(0, 16) : "", })); let filteredHits = result.hits; @@ -922,12 +980,19 @@ const memosLocalPlugin = { ctx.log.debug("auto-recall: LLM filter returned no relevant hits"); const dur = performance.now() - recallT0; store.recordToolCall("memory_search", dur, true); - store.recordApiLog("memory_search", { query }, `${result.hits.length} candidates → 0 relevant`, dur, true); - const noRecallHint = - "## Memory system\n\nNo memories were automatically recalled for this turn (e.g. the user's message was long, vague, or no matching history). " + - "You may still have relevant past context — call the **memory_search** tool with a **short, focused query** you generate yourself " + - "(e.g. key topics, names, or a rephrased question) to search the user's conversation history."; - return { systemPrompt: noRecallHint }; + store.recordApiLog("memory_search", { type: "auto_recall", query }, JSON.stringify({ + candidates: result.hits.map(h => ({ score: h.score, role: h.source.role, summary: h.summary, content: h.original_excerpt })), + filtered: [] + }), dur, true); + if (query.length > 50) { + const noRecallHint = + "## Memory system — ACTION REQUIRED\n\n" + + "Auto-recall found no relevant results for a long query. " + + "You MUST call `memory_search` now with a shortened query (2-5 key words) before answering. " + + "Do NOT skip this step. Do NOT answer without searching first."; + return { prependContext: noRecallHint }; + } + return; } } @@ -936,9 +1001,7 @@ const memosLocalPlugin = { ctx.log.debug(`auto-recall: ${result.hits.length} → ${beforeDedup} relevant → ${filteredHits.length} after dedup, sufficient=${sufficient}`); const lines = filteredHits.map((h, i) => { - const excerpt = h.original_excerpt.length > 300 - ? h.original_excerpt.slice(0, 297) + "..." - : h.original_excerpt; + const excerpt = h.original_excerpt; const parts: string[] = [`${i + 1}. [${h.source.role}]`]; if (excerpt) parts.push(` ${excerpt}`); parts.push(` chunkId="${h.ref.chunkId}"`); @@ -951,21 +1014,18 @@ const memosLocalPlugin = { return parts.join("\n"); }); - let tipsText = ""; - if (!sufficient) { - const hasTask = filteredHits.some((h) => { - if (!h.taskId) return false; - const t = store.getTask(h.taskId); - return t && t.status !== "skipped"; - }); - const tips: string[] = []; - if (hasTask) { - tips.push("→ call task_summary(taskId) for full task context"); - tips.push("→ call skill_get(taskId=...) if the task has a proven experience guide"); - } - tips.push("→ call memory_timeline(chunkId) to expand surrounding conversation"); - tipsText = "\n\nIf more context is needed:\n" + tips.join("\n"); + const hasTask = filteredHits.some((h) => { + if (!h.taskId) return false; + const t = store.getTask(h.taskId); + return t && t.status !== "skipped"; + }); + const tips: string[] = []; + if (hasTask) { + tips.push("- A hit has `task_id` → call `task_summary(taskId=\"...\")` to get the full task context (steps, code, results)"); + tips.push("- A task may have a reusable guide → call `skill_get(taskId=\"...\")` to retrieve the experience/skill"); } + tips.push("- Need more surrounding dialogue → call `memory_timeline(chunkId=\"...\")` to expand context around a hit"); + const tipsText = "\n\nAvailable follow-up tools:\n" + tips.join("\n"); const contextParts = [ "## User's conversation history (from memory system)", @@ -981,19 +1041,28 @@ const memosLocalPlugin = { const recallDur = performance.now() - recallT0; store.recordToolCall("memory_search", recallDur, true); - store.recordApiLog("memory_search", { query }, context, recallDur, true); + store.recordApiLog("memory_search", { type: "auto_recall", query }, JSON.stringify({ + candidates: result.hits.map(h => ({ score: h.score, role: h.source.role, summary: h.summary, content: h.original_excerpt })), + filtered: filteredHits.map(h => ({ score: h.score, role: h.source.role, summary: h.summary, content: h.original_excerpt })) + }), recallDur, true); telemetry.trackAutoRecall(filteredHits.length, recallDur); - lastRecalledChunkIds = new Set(filteredHits.map(h => h.ref.chunkId)); - lastRecalledSummaries = filteredHits.map(h => h.summary); + ctx.log.info(`auto-recall: returning prependContext (${context.length} chars), sufficient=${sufficient}`); + + if (!sufficient) { + const searchHint = + "\n\nIf these memories don't fully answer the question, " + + "call `memory_search` with a shorter or rephrased query to find more."; + return { prependContext: context + searchHint }; + } return { - systemPrompt: context, + prependContext: context, }; } catch (err) { const dur = performance.now() - recallT0; store.recordToolCall("memory_search", dur, false); - try { store.recordApiLog("memory_search", { query: recallQuery }, `error: ${String(err)}`, dur, false); } catch (_) { /* best-effort */ } + try { store.recordApiLog("memory_search", { type: "auto_recall", query: recallQuery }, `error: ${String(err)}`, dur, false); } catch (_) { /* best-effort */ } ctx.log.warn(`auto-recall failed: ${String(err)}`); } }); @@ -1006,13 +1075,15 @@ const memosLocalPlugin = { // already processed before the restart) and only capture future increments. const sessionMsgCursor = new Map(); - api.on("agent_end", async (event) => { + api.on("agent_end", async (event: any, hookCtx?: { agentId?: string; sessionKey?: string; sessionId?: string }) => { if (!event.success || !event.messages || event.messages.length === 0) return; try { - const captureAgentId = (event as any).agentId ?? "main"; + const captureAgentId = hookCtx?.agentId ?? "main"; + currentAgentId = captureAgentId; const captureOwner = `agent:${captureAgentId}`; - const sessionKey = (event as any).sessionKey ?? "default"; + const sessionKey = hookCtx?.sessionKey ?? "default"; + ctx.log.info(`agent_end: agentId=${captureAgentId} sessionKey=${sessionKey} (from hookCtx)`); const cursorKey = `${sessionKey}::${captureAgentId}`; const allMessages = event.messages; @@ -1084,10 +1155,16 @@ const memosLocalPlugin = { const senderIdx = text.indexOf("Sender (untrusted metadata):"); if (senderIdx !== -1) { const afterSender = text.slice(senderIdx); - const lastDblNl = afterSender.lastIndexOf("\n\n"); - if (lastDblNl > 0) { - const tail = afterSender.slice(lastDblNl + 2).trim(); - if (tail.length >= 2) text = tail; + const fenceEnd = afterSender.indexOf("```\n", afterSender.indexOf("```json")); + if (fenceEnd > 0) { + const afterFence = afterSender.slice(fenceEnd + 4).replace(/^\s*\n/, ""); + if (afterFence.trim().length >= 2) text = afterFence.trim(); + } else { + const firstDblNl = afterSender.indexOf("\n\n"); + if (firstDblNl > 0) { + const tail = afterSender.slice(firstDblNl + 2).trim(); + if (tail.length >= 2) text = tail; + } } } // Strip timestamp prefix like "[Thu 2026-03-05 15:23 GMT+8] " @@ -1123,69 +1200,9 @@ const memosLocalPlugin = { const turnId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; const captured = captureMessages(msgs, sessionKey, turnId, evidenceTag, ctx.log, captureOwner); - const recalledSummaries = lastRecalledSummaries; - const recalledIds = lastRecalledChunkIds; - let filteredCaptured = captured; - if (recalledSummaries.length > 0) { - const recalledContentSet = new Set(); - for (const cid of recalledIds) { - const ch = store.getChunk(cid); - if (ch) recalledContentSet.add(ch.content.toLowerCase()); - } - for (const s of recalledSummaries) { - recalledContentSet.add(s.toLowerCase()); - } - - const tokenize = (text: string): Set => { - const tokens = new Set(); - const words = text.split(/[\s,.:;!?,。:;!?、\n\r\t*#()\[\]{}""''「」—]+/).filter(w => w.length > 0); - for (const w of words) tokens.add(w); - const cleaned = text.replace(/[\s,.:;!?,。:;!?、\n\r\t*#()\[\]{}""''「」—]+/g, ""); - for (let i = 0; i < cleaned.length - 1; i++) { - tokens.add(cleaned.slice(i, i + 2)); - } - return tokens; - }; - - filteredCaptured = captured.filter(msg => { - if (msg.role === "user") return true; - const content = msg.content.toLowerCase(); - if (content.length < 10) return true; - - for (const recalled of recalledContentSet) { - if (recalled.length < 5) continue; - if (content.includes(recalled) || recalled.includes(content)) { - ctx.log.debug(`agent_end: skipping msg (role=${msg.role}) — substring match with recalled memory`); - return false; - } - const contentTokens = tokenize(content); - const recalledTokens = tokenize(recalled); - if (contentTokens.size < 3 || recalledTokens.size < 3) continue; - let overlap = 0; - for (const t of contentTokens) { - if (recalledTokens.has(t)) overlap++; - } - const ratio = overlap / contentTokens.size; - if (ratio > 0.5) { - ctx.log.debug(`agent_end: skipping msg (role=${msg.role}) — ${(ratio * 100).toFixed(0)}% token overlap with recalled memory`); - return false; - } - } - return true; - }); - - const skipped = captured.length - filteredCaptured.length; - if (skipped > 0) { - ctx.log.debug(`agent_end: filtered ${skipped}/${captured.length} messages as duplicates of recalled memories`); - } - } - - lastRecalledChunkIds = new Set(); - lastRecalledSummaries = []; - - if (filteredCaptured.length > 0) { - worker.enqueue(filteredCaptured); - telemetry.trackMemoryIngested(filteredCaptured.length); + if (captured.length > 0) { + worker.enqueue(captured); + telemetry.trackMemoryIngested(captured.length); } } catch (err) { api.logger.warn(`memos-local: capture failed: ${String(err)}`); diff --git a/apps/memos-local-openclaw/openclaw.plugin.json b/apps/memos-local-openclaw/openclaw.plugin.json index df6e05343..20c8b84f7 100644 --- a/apps/memos-local-openclaw/openclaw.plugin.json +++ b/apps/memos-local-openclaw/openclaw.plugin.json @@ -4,6 +4,9 @@ "description": "Full-write local conversation memory with hybrid search (RRF + MMR + recency). Provides memory_search, memory_get, task_summary, memory_timeline, memory_viewer for layered retrieval.", "kind": "memory", "version": "0.1.11", + "skills": [ + "skill/memos-memory-guide" + ], "homepage": "https://github.com/MemTensor/MemOS/tree/main/apps/memos-local-openclaw", "configSchema": { "type": "object", diff --git a/apps/memos-local-openclaw/package.json b/apps/memos-local-openclaw/package.json index 7ee152e49..441636e3b 100644 --- a/apps/memos-local-openclaw/package.json +++ b/apps/memos-local-openclaw/package.json @@ -1,7 +1,7 @@ { "name": "@memtensor/memos-local-openclaw-plugin", - "version": "1.0.0", - "description": "MemOS Local memory plugin for OpenClaw — full-write, hybrid-recall, progressive retrieval", + "version": "1.0.3", + "description": "MemOS Local memory plugin for OpenClaw \u2014 full-write, hybrid-recall, progressive retrieval", "type": "module", "main": "index.ts", "types": "dist/index.d.ts", @@ -20,6 +20,9 @@ "extensions": [ "./index.ts" ], + "skills": [ + "skill/memos-memory-guide" + ], "installDependencies": true }, "scripts": { @@ -28,6 +31,7 @@ "lint": "eslint src --ext .ts", "test": "vitest run", "test:watch": "vitest", + "test:accuracy": "tsx scripts/run-accuracy-test.ts", "postinstall": "node scripts/postinstall.cjs", "prepublishOnly": "npm run build" }, @@ -48,14 +52,16 @@ "better-sqlite3": "^12.6.2", "posthog-node": "^5.28.0", "puppeteer": "^24.38.0", + "semver": "^7.7.4", "uuid": "^10.0.0" }, "devDependencies": { "@types/better-sqlite3": "^7.6.12", "@types/node": "^22.10.0", + "@types/semver": "^7.7.1", "@types/uuid": "^10.0.0", "tsx": "^4.21.0", "typescript": "^5.7.0", "vitest": "^2.1.0" } -} +} \ No newline at end of file diff --git a/apps/memos-local-openclaw/scripts/postinstall.cjs b/apps/memos-local-openclaw/scripts/postinstall.cjs index b6593f9ea..526c0524e 100644 --- a/apps/memos-local-openclaw/scripts/postinstall.cjs +++ b/apps/memos-local-openclaw/scripts/postinstall.cjs @@ -33,6 +33,78 @@ ${CYAN}${BOLD}┌───────────────────── log(`Plugin dir: ${DIM}${pluginDir}${RESET}`); log(`Node: ${process.version} Platform: ${process.platform}-${process.arch}`); +/* ═══════════════════════════════════════════════════════════ + * Pre-phase: Clean stale build artifacts on upgrade + * When openclaw re-installs a new version over an existing + * extensions dir, old dist/node_modules can conflict. + * We nuke them so npm install gets a clean slate, but + * preserve user data (.env, data/). + * ═══════════════════════════════════════════════════════════ */ + +function cleanStaleArtifacts() { + const isExtensionsDir = pluginDir.includes(path.join(".openclaw", "extensions")); + if (!isExtensionsDir) return; + + const pkgPath = path.join(pluginDir, "package.json"); + if (!fs.existsSync(pkgPath)) return; + + let installedVer = "unknown"; + try { + const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8")); + installedVer = pkg.version || "unknown"; + } catch { /* ignore */ } + + const markerPath = path.join(pluginDir, ".installed-version"); + let prevVer = ""; + try { prevVer = fs.readFileSync(markerPath, "utf-8").trim(); } catch { /* first install */ } + + if (prevVer === installedVer) { + log(`Version unchanged (${installedVer}), skipping artifact cleanup.`); + return; + } + + if (prevVer) { + log(`Upgrade detected: ${DIM}${prevVer}${RESET} → ${GREEN}${installedVer}${RESET}`); + } else { + log(`Fresh install: ${GREEN}${installedVer}${RESET}`); + } + + const dirsToClean = ["dist", "node_modules"]; + let cleaned = 0; + for (const dir of dirsToClean) { + const full = path.join(pluginDir, dir); + if (fs.existsSync(full)) { + try { + fs.rmSync(full, { recursive: true, force: true }); + ok(`Cleaned stale ${dir}/`); + cleaned++; + } catch (e) { + warn(`Could not remove ${dir}/: ${e.message}`); + } + } + } + + const filesToClean = ["package-lock.json"]; + for (const f of filesToClean) { + const full = path.join(pluginDir, f); + if (fs.existsSync(full)) { + try { fs.unlinkSync(full); ok(`Removed stale ${f}`); cleaned++; } catch { /* ignore */ } + } + } + + try { fs.writeFileSync(markerPath, installedVer + "\n", "utf-8"); } catch { /* ignore */ } + + if (cleaned > 0) { + ok(`Cleaned ${cleaned} stale artifact(s). Fresh install will follow.`); + } +} + +try { + cleanStaleArtifacts(); +} catch (e) { + warn(`Artifact cleanup error: ${e.message}`); +} + /* ═══════════════════════════════════════════════════════════ * Phase 0: Ensure all dependencies are installed * ═══════════════════════════════════════════════════════════ */ @@ -102,6 +174,7 @@ function cleanupLegacy() { if (!fs.existsSync(extDir)) { log("No extensions directory found, skipping."); return; } const legacyDirs = [ + path.join(extDir, "memos-local"), path.join(extDir, "memos-lite"), path.join(extDir, "memos-lite-openclaw-plugin"), path.join(extDir, "node_modules", "@memtensor", "memos-lite-openclaw-plugin"), @@ -127,7 +200,7 @@ function cleanupLegacy() { const cfg = JSON.parse(raw); const entries = cfg?.plugins?.entries; if (entries) { - const oldKeys = ["memos-lite", "memos-lite-openclaw-plugin"]; + const oldKeys = ["memos-local", "memos-lite", "memos-lite-openclaw-plugin"]; let cfgChanged = false; for (const oldKey of oldKeys) { @@ -146,10 +219,12 @@ function cleanupLegacy() { const newEntry = entries["memos-local-openclaw-plugin"]; if (newEntry && typeof newEntry.source === "string") { const oldSource = newEntry.source; - if (oldSource.includes("memos-lite")) { + if (oldSource.includes("memos-lite") || (oldSource.includes("memos-local") && !oldSource.includes("memos-local-openclaw-plugin"))) { newEntry.source = oldSource .replace(/memos-lite-openclaw-plugin/g, "memos-local-openclaw-plugin") - .replace(/memos-lite/g, "memos-local"); + .replace(/memos-lite/g, "memos-local-openclaw-plugin") + .replace(/\/memos-local\//g, "/memos-local-openclaw-plugin/") + .replace(/\/memos-local$/g, "/memos-local-openclaw-plugin"); if (newEntry.source !== oldSource) { log(`Updated source path: ${DIM}${oldSource}${RESET} → ${GREEN}${newEntry.source}${RESET}`); cfgChanged = true; @@ -157,6 +232,16 @@ function cleanupLegacy() { } } + const slots = cfg?.plugins?.slots; + if (slots && typeof slots.memory === "string") { + const oldSlotNames = ["memos-local", "memos-lite", "memos-lite-openclaw-plugin"]; + if (oldSlotNames.includes(slots.memory)) { + log(`Migrated plugins.slots.memory: ${DIM}${slots.memory}${RESET} → ${GREEN}memos-local-openclaw-plugin${RESET}`); + slots.memory = "memos-local-openclaw-plugin"; + cfgChanged = true; + } + } + if (cfgChanged) { const backup = cfgPath + ".bak-" + Date.now(); fs.copyFileSync(cfgPath, backup); @@ -185,10 +270,77 @@ try { } /* ═══════════════════════════════════════════════════════════ - * Phase 2: Verify better-sqlite3 native module + * Phase 2: Install bundled skill (memos-memory-guide) + * ═══════════════════════════════════════════════════════════ */ + +function installBundledSkill() { + phase(2, "安装记忆技能 / Install memory skill"); + + const home = process.env.HOME || process.env.USERPROFILE || ""; + if (!home) { warn("Cannot determine HOME directory, skipping skill install."); return; } + + const skillSrc = path.join(pluginDir, "skill", "memos-memory-guide", "SKILL.md"); + if (!fs.existsSync(skillSrc)) { + warn("Bundled SKILL.md not found, skipping skill install."); + return; + } + + let pluginVersion = "0.0.0"; + try { + const pkg = JSON.parse(fs.readFileSync(path.join(pluginDir, "package.json"), "utf-8")); + pluginVersion = pkg.version || pluginVersion; + } catch { /* ignore */ } + + const skillContent = fs.readFileSync(skillSrc, "utf-8"); + const targets = [ + path.join(home, ".openclaw", "workspace", "skills", "memos-memory-guide"), + path.join(home, ".openclaw", "skills", "memos-memory-guide"), + ]; + + const meta = JSON.stringify({ ownerId: "memos-local-openclaw-plugin", slug: "memos-memory-guide", version: pluginVersion, publishedAt: Date.now() }); + const origin = JSON.stringify({ version: 1, registry: "memos-local-openclaw-plugin", slug: "memos-memory-guide", installedVersion: pluginVersion, installedAt: Date.now() }); + + for (const dest of targets) { + try { + fs.mkdirSync(dest, { recursive: true }); + fs.writeFileSync(path.join(dest, "SKILL.md"), skillContent, "utf-8"); + fs.writeFileSync(path.join(dest, "_meta.json"), meta, "utf-8"); + const clawHubDir = path.join(dest, ".clawhub"); + fs.mkdirSync(clawHubDir, { recursive: true }); + fs.writeFileSync(path.join(clawHubDir, "origin.json"), origin, "utf-8"); + ok(`Skill installed → ${DIM}${dest}${RESET}`); + } catch (e) { + warn(`Could not install skill to ${dest}: ${e.message}`); + } + } + + // Register in skills-lock.json so OpenClaw Dashboard can discover it + const lockPath = path.join(home, ".openclaw", "workspace", "skills-lock.json"); + try { + let lockData = { version: 1, skills: {} }; + if (fs.existsSync(lockPath)) { + lockData = JSON.parse(fs.readFileSync(lockPath, "utf-8")); + } + if (!lockData.skills) lockData.skills = {}; + lockData.skills["memos-memory-guide"] = { source: "memos-local-openclaw-plugin", sourceType: "plugin", computedHash: "" }; + fs.writeFileSync(lockPath, JSON.stringify(lockData, null, 2) + "\n", "utf-8"); + ok("Registered in skills-lock.json"); + } catch (e) { + warn(`Could not update skills-lock.json: ${e.message}`); + } +} + +try { + installBundledSkill(); +} catch (e) { + warn(`Skill install error: ${e.message}`); +} + +/* ═══════════════════════════════════════════════════════════ + * Phase 3: Verify better-sqlite3 native module * ═══════════════════════════════════════════════════════════ */ -phase(2, "检查 better-sqlite3 原生模块 / Check native module"); +phase(3, "检查 better-sqlite3 原生模块 / Check native module"); const sqliteModulePath = path.join(pluginDir, "node_modules", "better-sqlite3"); diff --git a/apps/memos-local-openclaw/scripts/run-accuracy-test.ts b/apps/memos-local-openclaw/scripts/run-accuracy-test.ts new file mode 100644 index 000000000..afb3b6c34 --- /dev/null +++ b/apps/memos-local-openclaw/scripts/run-accuracy-test.ts @@ -0,0 +1,835 @@ +#!/usr/bin/env npx tsx +/** + * MemOS Accuracy Test — sends data through OpenClaw Gateway (real pipeline). + * + * Ingest uses `openclaw agent` CLI so data flows through the full gateway, + * is processed by the memos plugin, and is visible in the Viewer UI. + * Search verification uses direct DB access via initPlugin. + * + * Usage: + * npx tsx scripts/run-accuracy-test.ts # quick mode (5 ingest, verify only) + * npx tsx scripts/run-accuracy-test.ts --full # full 50+ test cases + * npx tsx scripts/run-accuracy-test.ts --workers 3 # concurrent sessions (full mode) + * npx tsx scripts/run-accuracy-test.ts --skip-ingest # only run search checks (assumes data exists) + * + * Add to package.json: + * "test:accuracy": "tsx scripts/run-accuracy-test.ts" + */ + +import { execSync } from "child_process"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; +import { initPlugin, type MemosLocalPlugin } from "../src/index"; + +// ─── CLI args ─── + +const args = process.argv.slice(2); +const FULL_MODE = args.includes("--full"); +const SKIP_INGEST = args.includes("--skip-ingest"); +const WORKERS = Number(args.find((_, i, a) => a[i - 1] === "--workers") ?? 2); +const INGEST_DELAY_MS = 3000; + +// ─── Config ─── + +function loadConfig() { + const home = process.env.HOME ?? process.env.USERPROFILE ?? "/tmp"; + const cfgPath = path.join(home, ".openclaw", "openclaw.json"); + if (!fs.existsSync(cfgPath)) { + throw new Error(`OpenClaw config not found: ${cfgPath}`); + } + const raw = JSON.parse(fs.readFileSync(cfgPath, "utf-8")); + return raw?.plugins?.entries?.["memos-local-openclaw-plugin"]?.config ?? {}; +} + +// ─── Test framework ─── + +interface TestResult { + category: string; + name: string; + pass: boolean; + detail: string; + durationMs: number; +} + +const results: TestResult[] = []; +const RUN_ID = Date.now(); +const SESSION_PREFIX = `acc-${RUN_ID}`; +let sessionSeq = 0; + +function mkSession(label: string) { + return `${SESSION_PREFIX}-${label}-${++sessionSeq}`; +} + +function log(msg: string) { + const t = new Date().toLocaleTimeString("zh-CN", { hour12: false }); + console.log(`[${t}] ${msg}`); +} + +// ─── Progress tracker ─── + +class ProgressTracker { + private total: number; + private done = 0; + private startMs = Date.now(); + private phaseName: string; + + constructor(phaseName: string, total: number) { + this.phaseName = phaseName; + this.total = total; + } + + tick(label: string) { + this.done++; + const elapsed = Date.now() - this.startMs; + const pct = Math.round((this.done / this.total) * 100); + const remaining = this.total - this.done; + const avgMs = elapsed / this.done; + const eta = Math.round(remaining * avgMs); + + const barLen = 30; + const filled = Math.round(barLen * this.done / this.total); + const bar = "█".repeat(filled) + "░".repeat(barLen - filled); + + log( + ` [${bar}] ${this.done}/${this.total} (${pct}%)` + + ` elapsed: ${fmtDur(elapsed)} ETA: ${remaining > 0 ? fmtDur(eta) : "done"}` + + ` — ${label}`, + ); + } + + summary(): string { + const elapsed = Date.now() - this.startMs; + return `${this.phaseName}: ${this.done}/${this.total} in ${fmtDur(elapsed)}`; + } +} + +function fmtDur(ms: number): string { + const s = Math.floor(ms / 1000); + if (s < 60) return `${s}s`; + const m = Math.floor(s / 60); + const sec = s % 60; + return `${m}m${sec}s`; +} + +function hitContains(hits: any[], keyword: string): boolean { + return hits.some( + (h: any) => + h.original_excerpt?.toLowerCase().includes(keyword.toLowerCase()) || + h.summary?.toLowerCase().includes(keyword.toLowerCase()), + ); +} + +// ─── Send message through OpenClaw Gateway ─── + +function sendViaGateway(sessionId: string, message: string): boolean { + const tmpFile = path.join(os.tmpdir(), `memos-test-msg-${Date.now()}.txt`); + try { + fs.writeFileSync(tmpFile, message, "utf-8"); + execSync( + `openclaw agent --session-id "${sessionId}" --message "$(cat '${tmpFile}')" --json`, + { timeout: 120_000, stdio: "pipe" }, + ); + return true; + } catch (e: any) { + log(` [WARN] gateway send failed: ${e.message?.slice(0, 200)}`); + return false; + } finally { + try { fs.unlinkSync(tmpFile); } catch {} + } +} + +// ─── Test data: realistic, multi-turn, long-form conversations ─── + +interface ConversationCase { + id: string; + label: string; + sessionId: string; + messages: string[]; + group: "dedup" | "topic" | "search" | "summary" | "cross-lang"; +} + +function buildTestCases(): ConversationCase[] { + const cases: ConversationCase[] = []; + + // ═══════════════════════════════════════════ + // Group 1: Dedup — exact / semantic / merge + // ═══════════════════════════════════════════ + + const dedupSession1 = mkSession("dedup-exact"); + cases.push({ + id: "dedup-exact-1", + label: "Dedup: exact duplicate (msg 1/3)", + sessionId: dedupSession1, + group: "dedup", + messages: [ + `我们的线上 Redis 集群配置如下:Redis 版本 6.2.14,部署在 3 台 AWS ElastiCache r6g.xlarge 节点上,组成 3 主 3 从的 Cluster 模式。maxmemory 设置为 12GB,淘汰策略用 allkeys-lru,连接池大小 50,超时时间 3 秒。所有缓存 key 统一加 "prod:" 前缀,TTL 默认 1 小时,热点数据(如用户 session、商品详情)TTL 设为 24 小时。`, + ], + }); + cases.push({ + id: "dedup-exact-2", + label: "Dedup: exact duplicate (msg 2/3, same content)", + sessionId: dedupSession1, + group: "dedup", + messages: [ + `我们的线上 Redis 集群配置如下:Redis 版本 6.2.14,部署在 3 台 AWS ElastiCache r6g.xlarge 节点上,组成 3 主 3 从的 Cluster 模式。maxmemory 设置为 12GB,淘汰策略用 allkeys-lru,连接池大小 50,超时时间 3 秒。所有缓存 key 统一加 "prod:" 前缀,TTL 默认 1 小时,热点数据(如用户 session、商品详情)TTL 设为 24 小时。`, + ], + }); + cases.push({ + id: "dedup-exact-3", + label: "Dedup: exact duplicate (msg 3/3, same content again)", + sessionId: dedupSession1, + group: "dedup", + messages: [ + `我们的线上 Redis 集群配置如下:Redis 版本 6.2.14,部署在 3 台 AWS ElastiCache r6g.xlarge 节点上,组成 3 主 3 从的 Cluster 模式。maxmemory 设置为 12GB,淘汰策略用 allkeys-lru,连接池大小 50,超时时间 3 秒。所有缓存 key 统一加 "prod:" 前缀,TTL 默认 1 小时,热点数据(如用户 session、商品详情)TTL 设为 24 小时。`, + ], + }); + + const dedupSession2 = mkSession("dedup-semantic"); + cases.push({ + id: "dedup-sem-1", + label: "Dedup: semantic dup (PostgreSQL v1)", + sessionId: dedupSession2, + group: "dedup", + messages: [ + `主数据库使用 PostgreSQL 16,部署在 AWS RDS 的 db.r6g.2xlarge 实例上。已开启读写分离,1 个 writer 实例 + 2 个 reader 副本做负载均衡。连接池用 PgBouncer,transaction pooling 模式,max_client_conn 设为 200,default_pool_size 设为 25。WAL 日志异步复制,backup 策略是每日自动快照 + 开启 Point-in-Time Recovery(PITR),保留 7 天。`, + ], + }); + cases.push({ + id: "dedup-sem-2", + label: "Dedup: semantic dup (PostgreSQL v2 — reworded)", + sessionId: dedupSession2, + group: "dedup", + messages: [ + `生产环境的核心关系型数据库是 PG 16,跑在 Amazon RDS 上面,机型选的是 db.r6g.2xlarge。数据库做了读写分离——一个主库负责写入,两个只读副本分担查询流量。中间层用 PgBouncer 做连接池管理,采用事务级池化,最大客户端连接数 200,默认池大小 25。日志走 WAL 异步复制,每天自动创建快照备份,还启用了时间点恢复(PITR),保留窗口 7 天。`, + ], + }); + + const dedupSession3 = mkSession("dedup-merge"); + cases.push({ + id: "dedup-merge-1", + label: "Dedup: merge — old state (React 18 + Vite)", + sessionId: dedupSession3, + group: "dedup", + messages: [ + `前端项目用 React 18.2 搭配 Vite 5.0 构建,TypeScript 5.3 严格模式。状态管理用 Zustand + React Query v5,UI 组件库用 Ant Design 5.x。打包产物部署到 CloudFront CDN,Gzip + Brotli 双压缩,首屏 LCP 控制在 1.8 秒以内。`, + ], + }); + cases.push({ + id: "dedup-merge-2", + label: "Dedup: merge — new state (migrated to Next.js 14)", + sessionId: dedupSession3, + group: "dedup", + messages: [ + `前端已经从 React 18 + Vite 迁移到了 Next.js 14 App Router,改用 Vercel 部署。状态管理保持 Zustand + React Query 不变,但 UI 组件库换成了 Shadcn/ui + Tailwind CSS。SSR + ISR 混合渲染,Core Web Vitals 全绿,LCP 降到 1.2 秒。`, + ], + }); + + // ═══════════════════════════════════════════ + // Group 2: Topic boundary detection + // ═══════════════════════════════════════════ + + const topicSameSession = mkSession("topic-same"); + cases.push({ + id: "topic-same-1", + label: "Topic: same topic (Nginx config, part 1)", + sessionId: topicSameSession, + group: "topic", + messages: [ + `帮我配置生产环境的 Nginx 反向代理。需求:监听 443 端口,SSL/TLS 证书放在 /etc/nginx/ssl/ 目录下,upstream 后端是 localhost:3000 的 Node.js 应用。需要配置 worker_processes auto,worker_connections 4096,以及 proxy_set_header 把真实 IP 传到后端。`, + ], + }); + cases.push({ + id: "topic-same-2", + label: "Topic: same topic (Nginx config, part 2 — add gzip + cache)", + sessionId: topicSameSession, + group: "topic", + messages: [ + `Nginx 配置再加几个优化:开启 gzip 压缩(gzip on; gzip_types text/plain text/css application/json application/javascript; gzip_min_length 1024;),静态资源加浏览器缓存头(location ~* \\.(js|css|png|jpg|svg|woff2)$ { expires 30d; add_header Cache-Control "public, immutable"; }),还要加上 HTTP/2 和 HSTS(add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload";)。`, + ], + }); + + const topicSwitchSession = mkSession("topic-switch"); + cases.push({ + id: "topic-switch-1", + label: "Topic: switch — Docker (tech)", + sessionId: topicSwitchSession, + group: "topic", + messages: [ + `帮我写一个多阶段 Dockerfile,用于构建 Node.js 20 的生产镜像。第一阶段用 node:20-alpine 作为 builder,安装 pnpm,复制 package.json 和 pnpm-lock.yaml,然后 pnpm install --frozen-lockfile --prod=false,再 pnpm run build。第二阶段用干净的 node:20-alpine,只复制 dist/ 和 node_modules/,暴露 3000 端口,CMD ["node", "dist/server.js"]。同时生成一个 .dockerignore 排除 node_modules、.git、.env、coverage、*.md。`, + ], + }); + cases.push({ + id: "topic-switch-2", + label: "Topic: switch — cooking (completely different domain)", + sessionId: topicSwitchSession, + group: "topic", + messages: [ + `今天想试试做正宗的红烧肉。食材清单:五花肉 500g(切 3cm 方块)、冰糖 30g、生抽 3 勺、老抽 1 勺、料酒 2 勺、八角 2 颗、桂皮 1 小段、香叶 2 片、干辣椒 2 个、生姜 4 片、葱白 3 段。步骤:五花肉冷水下锅焯水 5 分钟,捞出洗净。锅里放少量油,中小火炒冰糖至焦糖色,下五花肉翻炒上色。加料酒、生抽、老抽,放八角桂皮香叶,加没过肉的热水,大火煮开后转小火炖 50 分钟。最后大火收汁,撒葱花出锅。`, + ], + }); + + // ═══════════════════════════════════════════ + // Group 3: Search precision + recall data + // ═══════════════════════════════════════════ + + const searchSession = mkSession("search-data"); + cases.push({ + id: "search-mysql", + label: "Search: MySQL InnoDB MVCC", + sessionId: searchSession, + group: "search", + messages: [ + `线上 MySQL 8.0 数据库要点总结:存储引擎统一用 InnoDB,默认行级锁,支持 MVCC 多版本并发控制。事务隔离级别设为 REPEATABLE READ(MySQL 默认),innodb_buffer_pool_size 设为物理内存的 70%(当前 28GB / 40GB),innodb_flush_log_at_trx_commit=1 保证事务持久性。慢查询日志开启,long_query_time=2 秒,定期用 pt-query-digest 分析 Top 20 慢查询。索引策略:核心业务表必须有聚簇索引,联合索引遵循最左前缀原则,覆盖索引优先避免回表。`, + ], + }); + cases.push({ + id: "search-k8s", + label: "Search: Kubernetes cluster", + sessionId: searchSession, + group: "search", + messages: [ + `Kubernetes 生产集群规模和配置:3 个 master 节点(etcd 高可用集群)+ 8 个 worker 节点,全部部署在阿里云 ECS ecs.c7.2xlarge(8c16g)上。容器运行时用 containerd 1.7,网络插件 Calico VXLAN 模式。部署方式:核心服务 Deployment + HPA(CPU 60% 触发扩容,最小 2 副本最大 10 副本),有状态服务(MySQL、Redis)用 StatefulSet + PVC。日志用 Fluent Bit DaemonSet 采集到 ES,监控用 Prometheus Operator + kube-state-metrics。`, + ], + }); + cases.push({ + id: "search-review", + label: "Search: Code Review process", + sessionId: searchSession, + group: "search", + messages: [ + `团队 Code Review 流程规范:每周三下午 2-4 点集中做 Code Review Session,其他时间异步 review。GitLab MR 模板包含:变更描述、影响范围、测试情况、截图/录屏。Review 规则:至少 2 人 approve 才能合并,其中 1 人必须是 Tech Lead 或 Senior。自动化检查:CI 跑 lint(ESLint + Prettier)、单元测试(覆盖率门禁 80%)、类型检查、依赖安全扫描(Snyk)。Code Review 重点关注:逻辑正确性 > 性能 > 可读性 > 编码风格。`, + ], + }); + cases.push({ + id: "search-elk", + label: "Search: ELK logging stack", + sessionId: searchSession, + group: "search", + messages: [ + `日志系统架构:ELK 栈。Elasticsearch 7.17 集群(3 节点,每节点 64GB 内存 + 2TB SSD),Logstash 作为日志处理管道(grok 解析 + 字段映射 + 时间戳标准化),Kibana 做可视化和告警。日志分级:应用日志走 Fluent Bit → Kafka(缓冲) → Logstash → ES,系统日志直接 Filebeat → ES。索引策略:按天滚动创建索引(logs-app-YYYY.MM.DD),ILM 策略 hot/warm/cold 三层,hot 7 天 SSD,warm 30 天 HDD,cold 90 天归档到 S3 Glacier。`, + ], + }); + cases.push({ + id: "search-monitoring", + label: "Search: Prometheus Grafana monitoring", + sessionId: searchSession, + group: "search", + messages: [ + `监控告警体系:Prometheus 2.45 + Grafana 10.x + AlertManager。Prometheus 抓取间隔 15 秒,数据保留 30 天。主要 exporter:node_exporter(主机指标)、cadvisor(容器指标)、mysqld_exporter、redis_exporter、blackbox_exporter(HTTP 探测)。Grafana 仪表盘:系统概览、应用 QPS/延迟/错误率、数据库连接池、Redis 命中率。告警规则:CPU > 80% 持续 5 分钟 → 企业微信通知,5xx 错误率 > 1% → 电话告警(PagerDuty),磁盘使用率 > 85% → 邮件通知。`, + ], + }); + + // Recall data — DevOps tools + const recallSession = mkSession("recall-devops"); + cases.push({ + id: "search-jenkins", + label: "Search: Jenkins CI pipeline", + sessionId: recallSession, + group: "search", + messages: [ + `CI/CD Pipeline 用 Jenkins 2.x,Jenkinsfile 放在项目根目录,采用 declarative pipeline 语法。流水线分 5 个 stage:Checkout → Lint & Type Check → Unit Test(Jest,覆盖率报告上传 SonarQube)→ Build(Docker 多阶段构建)→ Deploy(kubectl apply 到对应环境)。分支策略:feature/* 只跑 lint + test,develop 跑全量 + 部署 staging,main 跑全量 + 部署 production(需要人工审批)。Jenkins 节点用 Kubernetes Pod 作为 agent,按需弹性伸缩。`, + ], + }); + cases.push({ + id: "search-terraform", + label: "Search: Terraform IaC", + sessionId: recallSession, + group: "search", + messages: [ + `基础设施即代码用 Terraform 1.6,state 存在 S3 bucket + DynamoDB 做状态锁,防止并发修改。模块化组织:modules/networking(VPC、子网、安全组)、modules/compute(ECS 实例、Auto Scaling Group)、modules/database(RDS、ElastiCache)、modules/monitoring(CloudWatch、SNS)。环境用 workspace 隔离:dev / staging / production。变量通过 terraform.tfvars 和 CI 环境变量注入。每次变更走 PR,CI 自动执行 terraform plan,输出 diff 到 PR 评论,merge 后自动 terraform apply。`, + ], + }); + + // ═══════════════════════════════════════════ + // Group 4: Summary quality — long text + // ═══════════════════════════════════════════ + + const summarySession = mkSession("summary"); + cases.push({ + id: "summary-microservices", + label: "Summary: complex microservices architecture", + sessionId: summarySession, + group: "summary", + messages: [ + `微服务架构详细设计方案如下。服务拆分:user-service 负责用户注册登录、OAuth2.0 第三方授权、RBAC 权限管理、用户画像标签;order-service 处理订单创建/取消/退款全生命周期,支持分库分表(按 user_id 取模 16 库 64 表);payment-service 对接支付宝当面付、微信 JSAPI 支付、银联快捷支付,所有支付回调统一走消息队列异步处理;inventory-service 管理商品库存,用 Redis 预扣 + MySQL 最终一致性方案防超卖;notification-service 负责短信(阿里云 SMS)、邮件(SES)、App Push(极光推送)、站内信。所有服务 Kubernetes 部署,Istio 服务网格做流量管理和灰度发布,Jaeger 全链路追踪,SkyWalking 做 APM 性能监控。服务间通信:同步走 gRPC(protobuf 序列化),异步走 RocketMQ 5.0。API Gateway 用 Kong,统一鉴权、限流、日志。`, + ], + }); + cases.push({ + id: "summary-migration", + label: "Summary: DB migration plan", + sessionId: summarySession, + group: "summary", + messages: [ + `数据库迁移三阶段实施方案。Q1(1-3 月):用户表从 MySQL 迁移到 PostgreSQL。第一步搭建 PG 目标库,用 pgloader 做初始全量同步;第二步开启 Maxwell → Kafka → PG 的实时 CDC 增量同步;第三步应用层改为双写模式(先写 MySQL 再写 PG),持续一个月做数据一致性校验(每天凌晨全表 count + 随机抽样 1000 条 hash 比对);第四步灰度切读到 PG(先 10% → 50% → 100%),确认无误后停止双写。Q2(4-6 月):订单表和支付表迁移,用 Debezium CDC 替代 Maxwell(支持 exactly-once delivery),同样双写 + 校验 + 灰度流程。Q3(7-9 月):剩余表迁移完成,停掉旧 MySQL 集群。每个阶段迁移完成后保留旧库只读权限 90 天,作为回滚保险。`, + ], + }); + + // ═══════════════════════════════════════════ + // Group 5: Cross-language + // ═══════════════════════════════════════════ + + const crossLangSession = mkSession("cross-lang"); + cases.push({ + id: "cross-lang-en", + label: "Cross-lang: Docker Compose (English)", + sessionId: crossLangSession, + group: "cross-lang", + messages: [ + `Our local development setup uses Docker Compose with four services: "api" runs the Node.js backend on port 3000 with hot-reload via nodemon, "web" runs the Next.js frontend on port 3001 with Fast Refresh, "postgres" uses the official PostgreSQL 16 image with a named volume for data persistence, and "redis" uses Redis 7 Alpine for caching. We also have a "mailhog" service for testing email delivery locally. All services share a custom bridge network called "dev-net". Environment variables are injected via a .env file referenced in docker-compose.yml.`, + ], + }); + cases.push({ + id: "cross-lang-zh", + label: "Cross-lang: Docker Compose (Chinese, same meaning)", + sessionId: crossLangSession, + group: "cross-lang", + messages: [ + `本地开发环境用 Docker Compose 编排四个核心服务:api 容器跑 Node.js 后端(端口 3000,nodemon 热更新),web 容器跑 Next.js 前端(端口 3001,Fast Refresh),postgres 容器用官方 PostgreSQL 16 镜像(命名卷持久化数据),redis 容器用 Redis 7 Alpine 做缓存。另外还有一个 mailhog 容器用来本地测试邮件发送。所有容器通过自定义桥接网络 dev-net 互通。环境变量通过 .env 文件注入。`, + ], + }); + + // ═══════════════════════════════════════════ + // Full mode: additional cases for scale + // ═══════════════════════════════════════════ + + if (FULL_MODE) { + const fullSession = mkSession("full-extra"); + + cases.push({ + id: "full-api-doc", + label: "Full: API documentation (Swagger/OpenAPI)", + sessionId: fullSession, + group: "search", + messages: [ + `API 文档自动化方案:使用 Swagger/OpenAPI 3.0 规范,结合 swagger-jsdoc 从代码注释自动生成 API 文档。每个接口必须标注:summary、description、parameters(含类型和校验规则)、requestBody schema、responses(200/400/401/403/404/500 各场景)。CI 流水线中自动生成 openapi.json,部署到 Swagger UI(内网 /api-docs 路径)。SDK 生成:用 openapi-generator 给前端自动生成 TypeScript axios client,给移动端生成 Swift/Kotlin client。文档变更必须随代码 PR 一起提交,CI 校验 schema 兼容性(不允许破坏性变更,用 oasdiff 检测)。`, + ], + }); + cases.push({ + id: "full-backup", + label: "Full: Database backup strategy", + sessionId: fullSession, + group: "search", + messages: [ + `数据库备份策略。MySQL:每日凌晨 2 点 mysqldump 全量备份(--single-transaction --routines --triggers),每小时 binlog 增量备份,所有备份加密后上传到 S3 Standard-IA,保留 30 天。PostgreSQL:每日 pg_basebackup 全量 + 持续 WAL 归档(archive_command 到 S3),支持 PITR。恢复演练:每月第一个周六做一次恢复演练,从 S3 拉取备份恢复到演练环境,验证数据完整性(行数对比 + 业务关键数据校验)。恢复 RTO 目标 < 1 小时,RPO 目标 < 1 小时。监控:备份任务状态接入 Prometheus,失败立即 PagerDuty 告警。`, + ], + }); + cases.push({ + id: "full-perf", + label: "Full: React performance optimization", + sessionId: fullSession, + group: "search", + messages: [ + `React 前端性能优化记录。代码层面:用 React.lazy + Suspense 做路由级代码分割,首屏 JS 从 1.2MB 降到 380KB;React.memo + useMemo 避免不必要的重渲染,列表组件用 react-window 虚拟化(1 万条数据渲染从 3.2 秒降到 60ms);图片全部用 next/image 自动 WebP 转换 + 懒加载。构建层面:Vite 5 tree-shaking + dynamic import,第三方库用 CDN 外置(React/ReactDOM/Lodash)。Lighthouse 指标:Performance 从 45 提升到 92,FCP 1.1s,LCP 1.8s,CLS 0.02。监控:接入 web-vitals 库实时上报 Core Web Vitals 到 ClickHouse,Grafana 展示 P75/P90/P99 趋势。`, + ], + }); + + const fullSession2 = mkSession("full-devops"); + cases.push({ + id: "full-sonarqube", + label: "Full: SonarQube quality gate", + sessionId: fullSession2, + group: "search", + messages: [ + `代码质量门禁用 SonarQube 9.x。Quality Gate 规则:新代码覆盖率 > 80%,整体覆盖率 > 65%,代码重复率 < 3%,无新增 Blocker/Critical 级别的 Bug 和漏洞,Maintainability Rating 必须 A 级。CI 集成:Jenkins pipeline 中在 test stage 之后执行 sonar-scanner,扫描结果推送到 SonarQube Server,Quality Gate 不通过则 pipeline 失败。自定义规则:在默认 Sonar way profile 基础上,新增了 SQL 注入检测、硬编码密钥检测、日志敏感信息检测等自定义规则。每周一生成代码质量周报,邮件发送给团队 Tech Lead。`, + ], + }); + cases.push({ + id: "full-ansible", + label: "Full: Ansible server management", + sessionId: fullSession2, + group: "search", + messages: [ + `服务器配置管理用 Ansible 2.15。Inventory 文件按环境分组:[dev]、[staging]、[production],每个环境有独立的 group_vars。核心 Playbook:server-init.yml(系统初始化:时区/NTP/防火墙/用户/SSH 加固),deploy-app.yml(应用部署:拉取镜像/更新 compose 文件/滚动重启),monitor-setup.yml(安装 node_exporter + fluent-bit)。Ansible Vault 加密所有密钥和密码。执行策略:变更先在 staging 跑一遍(--check 模式预演),确认无误后在 production 执行(每次最多 2 台,serial: 2)。所有 playbook 执行日志记录到 ELK。`, + ], + }); + + const fullSession3 = mkSession("full-unrelated"); + cases.push({ + id: "full-company-event", + label: "Full: unrelated (company annual party)", + sessionId: fullSession3, + group: "dedup", + messages: [ + `公司年会安排确定了。时间:12 月 20 日(周六)下午 2 点到晚上 9 点。地点:杭州西湖国宾馆 3 号楼宴会厅,可容纳 300 人。议程:2:00-3:00 CEO 年度总结和明年规划,3:00-4:30 各部门优秀项目展示(每组 10 分钟),4:30-5:00 茶歇,5:00-6:30 年度颁奖(最佳团队、最佳个人、最佳新人、创新奖),6:30-9:00 晚宴 + 文艺表演 + 抽奖。每个部门需要准备至少一个节目,节目清单 12 月 10 日前提交给 HR 小王。预算:人均 500 元。`, + ], + }); + cases.push({ + id: "full-training", + label: "Full: unrelated (new employee training)", + sessionId: fullSession3, + group: "dedup", + messages: [ + `新员工入职培训计划(为期两周)。第一周:Day 1 公司文化和价值观介绍、HR 制度讲解、IT 账号开通;Day 2-3 技术栈总览(架构图、代码仓库结构、本地开发环境搭建);Day 4 编码规范培训(TypeScript 规范、ESLint 规则、命名约定、文件组织);Day 5 Git 工作流培训(Git Flow、分支命名、Commit Message 规范、MR 流程)。第二周:Day 6-7 跟随导师做一个入门任务(小 feature 开发);Day 8-9 Code Review 流程实践(参加 Review Session、自己提交 MR 被 review);Day 10 入职考核(代码 quiz + 流程问答 + 导师评价)。`, + ], + }); + } + + return cases; +} + +// ─── Search cases ─── + +interface SearchCase { + query: string; + expectKeyword: string; + category: "keyword" | "semantic" | "negative" | "recall"; + topK: number; + minScore?: number; + shouldFind: boolean; +} + +function buildSearchCases(): SearchCase[] { + const cases: SearchCase[] = [ + { query: "MySQL InnoDB MVCC 行锁 innodb_buffer_pool_size", expectKeyword: "InnoDB", category: "keyword", topK: 5, shouldFind: true }, + { query: "Kubernetes ECS 阿里云 容器集群 Calico", expectKeyword: "Kubernetes", category: "keyword", topK: 5, shouldFind: true }, + { query: "Prometheus Grafana AlertManager 监控告警", expectKeyword: "Prometheus", category: "keyword", topK: 5, shouldFind: true }, + { query: "ELK Elasticsearch Logstash Kibana 日志", expectKeyword: "Elasticsearch", category: "keyword", topK: 5, shouldFind: true }, + + { query: "数据库事务隔离级别和并发控制机制", expectKeyword: "MVCC", category: "semantic", topK: 5, shouldFind: true }, + { query: "容器编排平台和自动扩容策略", expectKeyword: "Kubernetes", category: "semantic", topK: 5, shouldFind: true }, + { query: "代码质量审查团队协作流程", expectKeyword: "Review", category: "semantic", topK: 5, shouldFind: true }, + { query: "应用日志集中采集存储和检索", expectKeyword: "ELK", category: "semantic", topK: 5, shouldFind: true }, + + { query: "深度学习 PyTorch GPU 训练模型 CUDA 显存", expectKeyword: "MySQL", category: "negative", topK: 5, minScore: 0.65, shouldFind: false }, + { query: "量化交易策略回测 Alpha 因子挖掘", expectKeyword: "Kubernetes", category: "negative", topK: 5, minScore: 0.65, shouldFind: false }, + + { query: "CI/CD 流水线 自动化部署 发布流程", expectKeyword: "Jenkins", category: "recall", topK: 10, shouldFind: true }, + { query: "基础设施即代码 IaC 云资源管理", expectKeyword: "Terraform", category: "recall", topK: 10, shouldFind: true }, + { query: "Docker Compose 本地开发环境 容器编排", expectKeyword: "Docker", category: "recall", topK: 5, shouldFind: true }, + ]; + + if (FULL_MODE) { + cases.push( + { query: "API 接口文档自动生成 Swagger OpenAPI", expectKeyword: "Swagger", category: "keyword", topK: 5, shouldFind: true }, + { query: "数据库定时备份恢复策略 mysqldump", expectKeyword: "备份", category: "keyword", topK: 5, shouldFind: true }, + { query: "React 性能优化 Lighthouse 代码分割", expectKeyword: "React", category: "keyword", topK: 5, shouldFind: true }, + { query: "代码质量门禁覆盖率重复率检测", expectKeyword: "SonarQube", category: "recall", topK: 10, shouldFind: true }, + { query: "服务器批量配置管理自动化运维 Playbook", expectKeyword: "Ansible", category: "recall", topK: 10, shouldFind: true }, + ); + } + + return cases; +} + +// ─── Register sessions into OpenClaw sessions.json so they appear in UI dropdown ─── + +function registerSessionsInStore(cases: ConversationCase[]) { + const home = process.env.HOME ?? process.env.USERPROFILE ?? "/tmp"; + const storePath = path.join(home, ".openclaw", "agents", "main", "sessions", "sessions.json"); + if (!fs.existsSync(storePath)) { + log("[WARN] sessions.json not found, skipping UI registration"); + return; + } + + const store = JSON.parse(fs.readFileSync(storePath, "utf-8")); + const sessionsDir = path.dirname(storePath); + const seen = new Set(); + let added = 0; + + for (const c of cases) { + if (seen.has(c.sessionId)) continue; + seen.add(c.sessionId); + + const storeKey = `agent:main:${c.sessionId}`; + if (store[storeKey]) continue; + + const sessionFile = path.join(sessionsDir, `${c.sessionId}.jsonl`); + if (!fs.existsSync(sessionFile)) continue; + + // acc-1773286763918-dedup-exact-1 -> dedup-exact + const shortName = c.sessionId + .replace(/^acc-\d+-/, "") + .replace(/-\d+$/, ""); + + store[storeKey] = { + sessionId: c.sessionId, + updatedAt: Date.now(), + systemSent: true, + abortedLastRun: false, + chatType: "direct", + label: `[test] ${shortName}`, + displayName: `Test: ${shortName}`, + origin: { + provider: "cli", + surface: "cli", + chatType: "direct", + label: `accuracy-test:${shortName}`, + }, + sessionFile, + }; + added++; + } + + fs.writeFileSync(storePath, JSON.stringify(store, null, 2), "utf-8"); + log(`Registered ${added} test sessions in sessions.json (UI dropdown)`); +} + +// ─── Ingest via Gateway ─── + +async function ingestPhase(cases: ConversationCase[]) { + const totalMsgs = cases.reduce((a, c) => a + c.messages.length, 0); + log(`Sending ${cases.length} conversations (${totalMsgs} messages) through OpenClaw Gateway...`); + log(`(Each message goes through full gateway → plugin pipeline, visible in Viewer)\n`); + + const tracker = new ProgressTracker("Ingest", totalMsgs); + const buckets: ConversationCase[][] = Array.from({ length: WORKERS }, () => []); + cases.forEach((c, i) => buckets[i % WORKERS].push(c)); + + let successCount = 0; + let failCount = 0; + + const workerFn = async (workerId: number, bucket: ConversationCase[]) => { + for (const c of bucket) { + for (const msg of c.messages) { + const ok = sendViaGateway(c.sessionId, msg); + if (ok) { + successCount++; + } else { + failCount++; + } + tracker.tick(`${ok ? "OK" : "FAIL"} ${c.label}`); + await new Promise((r) => setTimeout(r, INGEST_DELAY_MS)); + } + } + }; + + const t0 = performance.now(); + await Promise.all( + buckets.map((b, i) => (b.length > 0 ? workerFn(i + 1, b) : Promise.resolve())), + ); + const dur = Math.round(performance.now() - t0); + + log(`\nIngest complete: ${successCount} sent, ${failCount} failed (${(dur / 1000).toFixed(1)}s)\n`); + + log("Waiting 10s for ingest pipeline to process all messages..."); + await new Promise((r) => setTimeout(r, 10_000)); + + registerSessionsInStore(cases); + + return { successCount, failCount }; +} + +// ─── Verify phase ─── + +async function runSearchTests(plugin: MemosLocalPlugin, cases: SearchCase[], tracker: ProgressTracker) { + const searchTool = plugin.tools.find((t) => t.name === "memory_search")!; + + for (const c of cases) { + const t0 = performance.now(); + const result = (await searchTool.handler({ + query: c.query, + maxResults: c.topK, + minScore: c.minScore, + })) as any; + const dur = Math.round(performance.now() - t0); + const hits = result.hits ?? []; + const found = hitContains(hits, c.expectKeyword); + + if (c.category === "negative") { + const pass = !found; + results.push({ + category: "Precision", + name: `negative: "${c.query.slice(0, 25)}..."`, + pass, + detail: `should NOT contain "${c.expectKeyword}": ${pass ? "OK" : "FAIL"} (${hits.length} hits)`, + durationMs: dur, + }); + } else if (c.category === "keyword") { + results.push({ + category: "Precision", + name: `keyword: ${c.expectKeyword}`, + pass: found, + detail: `top${c.topK} contains "${c.expectKeyword}": ${found}`, + durationMs: dur, + }); + } else if (c.category === "semantic") { + results.push({ + category: "Precision", + name: `semantic: ${c.expectKeyword}`, + pass: found, + detail: `top${c.topK} contains "${c.expectKeyword}": ${found}`, + durationMs: dur, + }); + } else if (c.category === "recall") { + results.push({ + category: "Recall", + name: `recall: ${c.expectKeyword}`, + pass: found, + detail: found ? "found" : "missed", + durationMs: dur, + }); + } + tracker.tick(`${c.category}: ${c.expectKeyword}`); + } +} + +async function runDedupChecks(plugin: MemosLocalPlugin, tracker: ProgressTracker) { + const searchTool = plugin.tools.find((t) => t.name === "memory_search")!; + + const t0 = performance.now(); + const r1 = (await searchTool.handler({ query: "Redis ElastiCache 集群 maxmemory allkeys-lru 连接池", maxResults: 10 })) as any; + const redisHits = (r1.hits ?? []).filter((h: any) => hitContains([h], "Redis") || hitContains([h], "ElastiCache")); + const exactPass = redisHits.length >= 1 && redisHits.length <= 2; + results.push({ category: "Dedup", name: "exact dup (Redis x3 → 1-2)", pass: exactPass, detail: `${redisHits.length} active hits (expect 1-2)`, durationMs: Math.round(performance.now() - t0) }); + tracker.tick("dedup: exact dup (Redis)"); + + const t1 = performance.now(); + const r2 = (await searchTool.handler({ query: "PostgreSQL RDS PgBouncer 读写分离 WAL", maxResults: 10 })) as any; + const pgHits = (r2.hits ?? []).filter((h: any) => hitContains([h], "PostgreSQL") || hitContains([h], "PG ") || hitContains([h], "PgBouncer")); + const semPass = pgHits.length >= 1 && pgHits.length <= 2; + results.push({ category: "Dedup", name: "semantic dup (PG x2 → 1-2)", pass: semPass, detail: `${pgHits.length} active hits (expect 1-2)`, durationMs: Math.round(performance.now() - t1) }); + tracker.tick("dedup: semantic dup (PG)"); + + const t2 = performance.now(); + const r3 = (await searchTool.handler({ query: "前端技术栈 Next.js Shadcn Tailwind Vercel", maxResults: 10 })) as any; + const hasLatest = hitContains(r3.hits ?? [], "Next.js") || hitContains(r3.hits ?? [], "Shadcn"); + results.push({ category: "Dedup", name: "merge (React/Vite → Next.js/Vercel)", pass: hasLatest, detail: `latest state present: ${hasLatest}`, durationMs: Math.round(performance.now() - t2) }); + tracker.tick("dedup: merge (Next.js)"); +} + +async function runSummaryChecks(plugin: MemosLocalPlugin, tracker: ProgressTracker) { + const searchTool = plugin.tools.find((t) => t.name === "memory_search")!; + + const queries = [ + { query: "微服务架构 user-service payment-service Istio gRPC", label: "microservices arch" }, + { query: "数据库迁移 MySQL PostgreSQL Debezium CDC 双写", label: "DB migration plan" }, + ]; + + for (const q of queries) { + const t0 = performance.now(); + const r = (await searchTool.handler({ query: q.query, maxResults: 3 })) as any; + const dur = Math.round(performance.now() - t0); + if (r.hits?.length > 0) { + const h = r.hits[0]; + const sl = h.summary?.length ?? 0; + const cl = h.original_excerpt?.length ?? 999; + const pass = sl > 0 && sl < cl; + results.push({ category: "Summary", name: q.label, pass, detail: `summary=${sl}chars, content=${cl}chars, shorter=${sl < cl}`, durationMs: dur }); + } else { + results.push({ category: "Summary", name: q.label, pass: false, detail: "no hits found", durationMs: dur }); + } + tracker.tick(`summary: ${q.label}`); + } +} + +async function runTopicChecks(plugin: MemosLocalPlugin, tracker: ProgressTracker) { + const searchTool = plugin.tools.find((t) => t.name === "memory_search")!; + + const t0 = performance.now(); + const nginxR = (await searchTool.handler({ query: "Nginx 反向代理 SSL gzip HTTP/2 HSTS", maxResults: 10 })) as any; + const nginxHits = (nginxR.hits ?? []).filter((h: any) => hitContains([h], "Nginx") || hitContains([h], "gzip") || hitContains([h], "SSL")); + results.push({ + category: "Topic", + name: "same topic merge (Nginx parts → 1 chunk)", + pass: nginxHits.length >= 1 && nginxHits.length <= 2, + detail: `${nginxHits.length} chunks (expect 1-2 merged)`, + durationMs: Math.round(performance.now() - t0), + }); + tracker.tick("topic: same (Nginx)"); + + const t1 = performance.now(); + const dockerR = (await searchTool.handler({ query: "Dockerfile 多阶段构建 pnpm node:20-alpine", maxResults: 5 })) as any; + const cookR = (await searchTool.handler({ query: "红烧肉 五花肉 冰糖 八角 桂皮", maxResults: 5 })) as any; + const dockerFound = hitContains(dockerR.hits ?? [], "Dockerfile") || hitContains(dockerR.hits ?? [], "node"); + const cookFound = hitContains(cookR.hits ?? [], "五花肉") || hitContains(cookR.hits ?? [], "红烧肉"); + const switchPass = dockerFound && cookFound; + results.push({ + category: "Topic", + name: "topic switch (Docker → cooking)", + pass: switchPass, + detail: `Docker found=${dockerFound}, cooking found=${cookFound}`, + durationMs: Math.round(performance.now() - t1), + }); + tracker.tick("topic: switch (Docker→cooking)"); +} + +// ─── Report ─── + +function printReport(totalMs: number, ingestStats?: { successCount: number; failCount: number }) { + console.log("\n"); + console.log("=".repeat(70)); + console.log(` MemOS Accuracy Test Report`); + console.log(` Mode: ${FULL_MODE ? "FULL" : "QUICK"} | Workers: ${WORKERS} | Duration: ${(totalMs / 1000).toFixed(1)}s`); + if (ingestStats) { + console.log(` Ingest: ${ingestStats.successCount} sent via Gateway, ${ingestStats.failCount} failed`); + } + console.log("=".repeat(70)); + + const categories = [...new Set(results.map((r) => r.category))]; + let totalPass = 0; + let totalCount = 0; + + for (const cat of categories) { + const cr = results.filter((r) => r.category === cat); + const passed = cr.filter((r) => r.pass).length; + totalPass += passed; + totalCount += cr.length; + const pct = ((passed / cr.length) * 100).toFixed(1); + console.log(`\n ${cat.padEnd(20)} ${passed}/${cr.length} (${pct}%)`); + for (const r of cr) { + const icon = r.pass ? "PASS" : "FAIL"; + console.log(` [${icon}] ${r.name}: ${r.detail} (${r.durationMs}ms)`); + } + } + + console.log("\n" + "-".repeat(70)); + const overallPct = totalCount > 0 ? ((totalPass / totalCount) * 100).toFixed(1) : "0"; + console.log(` OVERALL: ${totalPass}/${totalCount} (${overallPct}%)`); + console.log("=".repeat(70)); + + return totalPass === totalCount ? 0 : 1; +} + +// ─── Main ─── + +async function main() { + const t0 = performance.now(); + log("MemOS Accuracy Test starting..."); + log(`Mode: ${FULL_MODE ? "FULL (50+ cases)" : "QUICK (15 cases — pass --full for all)"}`); + + log("Loading OpenClaw config..."); + const config = loadConfig(); + const stateDir = path.join(process.env.HOME ?? "/tmp", ".openclaw"); + + let ingestStats: { successCount: number; failCount: number } | undefined; + + if (!SKIP_INGEST) { + const testCases = buildTestCases(); + const totalMsgs = testCases.reduce((a, c) => a + c.messages.length, 0); + log(`Prepared ${testCases.length} conversations (${totalMsgs} messages total)`); + ingestStats = await ingestPhase(testCases); + } else { + log("Skipping ingest (--skip-ingest), running search checks only..."); + } + + log("Initializing plugin for search verification (direct DB access)..."); + const plugin = initPlugin({ stateDir, config }); + + const searchCases = buildSearchCases(); + const verifyTotal = 3 + 2 + searchCases.length + 2; // dedup(3) + topic(2) + search + summary(2) + const verifyTracker = new ProgressTracker("Verify", verifyTotal); + + log("Running dedup checks..."); + await runDedupChecks(plugin, verifyTracker); + + log("Running topic boundary checks..."); + await runTopicChecks(plugin, verifyTracker); + + log("Running search precision & recall tests..."); + await runSearchTests(plugin, searchCases, verifyTracker); + + log("Running summary quality checks..."); + await runSummaryChecks(plugin, verifyTracker); + + const totalMs = Math.round(performance.now() - t0); + const exitCode = printReport(totalMs, ingestStats); + + await plugin.shutdown(); + process.exit(exitCode); +} + +main().catch((err) => { + console.error("Fatal error:", err); + process.exit(2); +}); diff --git a/apps/memos-local-openclaw/scripts/test-agent-isolation.ts b/apps/memos-local-openclaw/scripts/test-agent-isolation.ts new file mode 100644 index 000000000..f059cb2d9 --- /dev/null +++ b/apps/memos-local-openclaw/scripts/test-agent-isolation.ts @@ -0,0 +1,245 @@ +#!/usr/bin/env npx tsx +/** + * Multi-agent data isolation test. + * + * Writes data with different owner tags via initPlugin, then creates + * a separate RecallEngine to verify search isolation with ownerFilter. + * + * Usage: + * npx tsx scripts/test-agent-isolation.ts + */ + +import * as fs from "fs"; +import * as path from "path"; +import { initPlugin } from "../src/index"; +import { SqliteStore } from "../src/storage/sqlite"; +import { Embedder } from "../src/embedding"; +import { RecallEngine } from "../src/recall/engine"; +import { buildContext } from "../src/config"; + +const RUN_ID = Date.now(); +const AGENT_A = "iso-test-alpha"; +const AGENT_B = "iso-test-beta"; + +const UNIQUE_A = `AlphaUniqueKey${RUN_ID}`; +const UNIQUE_B = `BetaUniqueKey${RUN_ID}`; + +const MSG_A1 = `我正在用 ${UNIQUE_A} 部署一个私有 Redis 缓存集群,配置主从复制和哨兵模式,端口 6379。`; +const MSG_A2 = `${UNIQUE_A} 的 Redis 集群已经部署完成,延迟从 50ms 降到了 3ms,命中率 95%。`; + +const MSG_B1 = `帮我设置 ${UNIQUE_B} 的 PostgreSQL 数据库迁移方案,从 v14 升级到 v16,数据量约 500GB。`; +const MSG_B2 = `${UNIQUE_B} 的 PostgreSQL 迁移完成了,用了 pg_upgrade --link 模式,停机只有 2 分钟。`; + +let passed = 0; +let failed = 0; + +function log(msg: string) { + const t = new Date().toLocaleTimeString("zh-CN", { hour12: false }); + console.log(`[${t}] ${msg}`); +} + +function assert(name: string, condition: boolean, detail: string) { + if (condition) { + passed++; + log(` ✅ ${name}`); + } else { + failed++; + log(` ❌ ${name}: ${detail}`); + } +} + +const silentLog = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} }; + +async function main() { + log("═══════════════════════════════════════════════════════"); + log(" Multi-Agent Data Isolation Test"); + log("═══════════════════════════════════════════════════════"); + log(` Agent A: ${AGENT_A} (keyword: ${UNIQUE_A})`); + log(` Agent B: ${AGENT_B} (keyword: ${UNIQUE_B})`); + log(""); + + const home = process.env.HOME ?? process.env.USERPROFILE ?? "/tmp"; + const stateDir = path.join(home, ".openclaw"); + const cfgPath = path.join(stateDir, "openclaw.json"); + const raw = JSON.parse(fs.readFileSync(cfgPath, "utf-8")); + const pluginCfg = raw?.plugins?.entries?.["memos-local-openclaw-plugin"]?.config ?? {}; + + // ── Step 1: Ingest data with different owners ── + log("── Step 1: Ingesting data with different agent owners ──"); + + const plugin = initPlugin({ stateDir, config: pluginCfg, log: silentLog }); + + const sessionA = `iso-session-a-${RUN_ID}`; + const sessionB = `iso-session-b-${RUN_ID}`; + + plugin.onConversationTurn( + [{ role: "user", content: MSG_A1 }, { role: "assistant", content: MSG_A2 }], + sessionA, + `agent:${AGENT_A}`, + ); + log(` Enqueued 2 messages for agent:${AGENT_A}`); + + plugin.onConversationTurn( + [{ role: "user", content: MSG_B1 }, { role: "assistant", content: MSG_B2 }], + sessionB, + `agent:${AGENT_B}`, + ); + log(` Enqueued 2 messages for agent:${AGENT_B}`); + + log(" Flushing ingest pipeline..."); + await plugin.flush(); + log(" Waiting 3s for embedding completion..."); + await new Promise((r) => setTimeout(r, 3000)); + await plugin.flush(); + log(" Done."); + + await plugin.shutdown(); + + // ── Step 2: Open a read-only store + engine for verification ── + log("\n── Step 2: Verify owner tags in raw DB ──"); + + const ctx = buildContext(stateDir, process.cwd(), pluginCfg, silentLog); + const store = new SqliteStore(ctx.config.storage!.dbPath!, silentLog); + const embedder = new Embedder(ctx.config.embedding, silentLog); + const engine = new RecallEngine(store, embedder, ctx); + + const db = (store as any).db; + + const chunksA = db.prepare( + `SELECT id, owner, session_key, role, substr(content, 1, 80) as preview + FROM chunks WHERE content LIKE ? AND dedup_status = 'active'` + ).all(`%${UNIQUE_A}%`) as any[]; + + const chunksB = db.prepare( + `SELECT id, owner, session_key, role, substr(content, 1, 80) as preview + FROM chunks WHERE content LIKE ? AND dedup_status = 'active'` + ).all(`%${UNIQUE_B}%`) as any[]; + + log(` Chunks with keyword-A: ${chunksA.length}`); + for (const c of chunksA) { + log(` owner=${c.owner} role=${c.role} preview=${c.preview.slice(0, 50)}...`); + } + + log(` Chunks with keyword-B: ${chunksB.length}`); + for (const c of chunksB) { + log(` owner=${c.owner} role=${c.role} preview=${c.preview.slice(0, 50)}...`); + } + + assert("Keyword-A chunks exist", chunksA.length > 0, "No chunks — ingest failed"); + assert("Keyword-B chunks exist", chunksB.length > 0, "No chunks — ingest failed"); + + if (chunksA.length > 0) { + const ownersA = new Set(chunksA.map((c: any) => c.owner)); + assert( + "Keyword-A owner = agent:" + AGENT_A, + ownersA.size === 1 && ownersA.has(`agent:${AGENT_A}`), + `Got: ${[...ownersA].join(", ")}`, + ); + } + + if (chunksB.length > 0) { + const ownersB = new Set(chunksB.map((c: any) => c.owner)); + assert( + "Keyword-B owner = agent:" + AGENT_B, + ownersB.size === 1 && ownersB.has(`agent:${AGENT_B}`), + `Got: ${[...ownersB].join(", ")}`, + ); + } + + // ── Step 3: Search isolation via RecallEngine ── + log("\n── Step 3: Search isolation (RecallEngine) ──"); + + const search = async (query: string, owner: string) => + engine.search({ query, maxResults: 10, ownerFilter: [`agent:${owner}`, "public"] }); + + const allowedOwners = (owner: string) => new Set([`agent:${owner}`, "public"]); + + const checkHitOwners = (hits: any[], allowed: Set): string[] => { + const violations: string[] = []; + for (const h of hits) { + const chunk = store.getChunk(h.ref.chunkId); + if (chunk && !allowed.has(chunk.owner)) { + violations.push(`chunkId=${h.ref.chunkId} owner=${chunk.owner}`); + } + } + return violations; + }; + + // 3a. Agent-A searches own keyword — should find own data + const resAA = await search(UNIQUE_A, AGENT_A); + assert("Agent-A finds own keyword-A", resAA.hits.length > 0, `Got ${resAA.hits.length} hits`); + + // 3b. Agent-A searches keyword-B — results must only contain Agent-A or public data + const resAB = await search(UNIQUE_B, AGENT_A); + const violationsAB = checkHitOwners(resAB.hits, allowedOwners(AGENT_A)); + assert( + "Agent-A results for keyword-B contain NO agent-B data ← ISOLATION", + violationsAB.length === 0, + `Found ${violationsAB.length} leaks: ${violationsAB.join("; ")}`, + ); + log(` (Agent-A got ${resAB.hits.length} hits for keyword-B, all from own/public — OK)`); + + // 3c. Agent-B searches own keyword — should find own data + const resBB = await search(UNIQUE_B, AGENT_B); + assert("Agent-B finds own keyword-B", resBB.hits.length > 0, `Got ${resBB.hits.length} hits`); + + // 3d. Agent-B searches keyword-A — results must only contain Agent-B or public data + const resBA = await search(UNIQUE_A, AGENT_B); + const violationsBA = checkHitOwners(resBA.hits, allowedOwners(AGENT_B)); + assert( + "Agent-B results for keyword-A contain NO agent-A data ← ISOLATION", + violationsBA.length === 0, + `Found ${violationsBA.length} leaks: ${violationsBA.join("; ")}`, + ); + log(` (Agent-B got ${resBA.hits.length} hits for keyword-A, all from own/public — OK)`); + + // 3e. agent:main results should not contain iso-test agents' data + const resMainA = await search(UNIQUE_A, "main"); + const violationsMainA = checkHitOwners(resMainA.hits, allowedOwners("main")); + assert( + "agent:main results contain no iso-test-alpha data", + violationsMainA.length === 0, + `Found ${violationsMainA.length} leaks: ${violationsMainA.join("; ")}`, + ); + + const resMainB = await search(UNIQUE_B, "main"); + const violationsMainB = checkHitOwners(resMainB.hits, allowedOwners("main")); + assert( + "agent:main results contain no iso-test-beta data", + violationsMainB.length === 0, + `Found ${violationsMainB.length} leaks: ${violationsMainB.join("; ")}`, + ); + + // ── Step 4: FTS isolation ── + log("\n── Step 4: FTS isolation ──"); + + const ftsAA = store.ftsSearch(UNIQUE_A, 10, [`agent:${AGENT_A}`, "public"]); + assert("FTS: Agent-A finds keyword-A", ftsAA.length > 0, `Got ${ftsAA.length}`); + + const ftsAB = store.ftsSearch(UNIQUE_B, 10, [`agent:${AGENT_A}`, "public"]); + assert("FTS: Agent-A cannot find keyword-B", ftsAB.length === 0, `Got ${ftsAB.length} — BROKEN!`); + + const ftsBB = store.ftsSearch(UNIQUE_B, 10, [`agent:${AGENT_B}`, "public"]); + assert("FTS: Agent-B finds keyword-B", ftsBB.length > 0, `Got ${ftsBB.length}`); + + const ftsBA = store.ftsSearch(UNIQUE_A, 10, [`agent:${AGENT_B}`, "public"]); + assert("FTS: Agent-B cannot find keyword-A", ftsBA.length === 0, `Got ${ftsBA.length} — BROKEN!`); + + // ── Summary ── + log("\n═══════════════════════════════════════════════════════"); + log(` Results: ${passed} passed, ${failed} failed`); + if (failed === 0) { + log(" 🎉 All isolation tests passed!"); + } else { + log(" ⚠ Some isolation tests FAILED"); + } + log("═══════════════════════════════════════════════════════"); + + store.close(); + process.exit(failed > 0 ? 1 : 0); +} + +main().catch((err) => { + console.error("Fatal error:", err); + process.exit(1); +}); diff --git a/apps/memos-local-openclaw/skill/memos-memory-guide/SKILL.md b/apps/memos-local-openclaw/skill/memos-memory-guide/SKILL.md index b96382e4a..d54aca1a1 100644 --- a/apps/memos-local-openclaw/skill/memos-memory-guide/SKILL.md +++ b/apps/memos-local-openclaw/skill/memos-memory-guide/SKILL.md @@ -1,6 +1,6 @@ --- name: memos-memory-guide -description: Use the MemOS Local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search. Use task_summary when you need full task context, skill_get for experience guides, skill_search to discover public skills, memory_write_public for shared knowledge, and memory_timeline to expand around a memory hit. +description: "Use the MemOS Local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search. Available tools: memory_search, memory_get, memory_write_public, task_summary, skill_get, skill_search, skill_install, skill_publish, skill_unpublish, memory_timeline, memory_viewer." --- # MemOS Local Memory — Agent Guide @@ -17,91 +17,119 @@ This skill describes how to use the MemOS memory tools so you can reliably searc ### memory_search -- **What it does:** Searches the user's stored conversation memory by a natural-language query. Returns a list of relevant excerpts with `chunkId` and optionally `task_id`. Only returns memories belonging to the current agent or marked as public. +- **What it does:** Search long-term conversation memory for past conversations, user preferences, decisions, and experiences. Returns relevant excerpts with `chunkId` and optionally `task_id`. Only returns memories belonging to the current agent or marked as public. - **When to call:** - The automatic recall did not run or returned nothing. - The user's query is long or unclear — **generate a short query yourself** and call `memory_search(query="...")`. - You need to search with a different angle (e.g. filter by `role='user'`). -- **Parameters:** `query` (required), optional `minScore`, `role`. +- **Parameters:** + - `query` (string, **required**) — Natural language search query. + - `maxResults` (number, optional) — Max results, default 20, max 20. + - `minScore` (number, optional) — Minimum score 0–1, default 0.45, floor 0.35. + - `role` (string, optional) — Filter by role: `'user'`, `'assistant'`, or `'tool'`. Use `'user'` to find what the user said. + +### memory_get + +- **What it does:** Get the full original text of a memory chunk. Use to verify exact details from a search hit. +- **When to call:** A `memory_search` hit looks relevant but you need to see the complete original content, not just the summary/excerpt. +- **Parameters:** + - `chunkId` (string, **required**) — The chunkId from a search hit. + - `maxChars` (number, optional) — Max characters to return (default 4000, max 12000). ### memory_write_public -- **What it does:** Writes a piece of information to **public memory**. Public memory is visible to all agents — any agent doing `memory_search` can find it. -- **When to call:** In multi-agent or collaborative scenarios, when you have **persistent information useful to everyone** (e.g. shared decisions, conventions, configurations, workflows). Do not write session-only or purely private content. -- **Parameters:** `content` (required), `summary` (optional). +- **What it does:** Write a piece of information to public memory. Public memories are visible to all agents during `memory_search`. Use for shared knowledge, team decisions, or cross-agent coordination information. +- **When to call:** In multi-agent or collaborative scenarios, when you have persistent information useful to everyone (e.g. shared decisions, conventions, configurations, workflows). Do not write session-only or purely private content. +- **Parameters:** + - `content` (string, **required**) — The content to write to public memory. + - `summary` (string, optional) — Short summary of the content. ### task_summary -- **What it does:** Returns the full task summary for a given `task_id`: title, status, and the complete narrative summary. -- **When to call:** A `memory_search` hit included a `task_id` and you need the full story of that task. -- **Parameters:** `taskId` (from a search hit). +- **What it does:** Get the detailed summary of a complete task: title, status, narrative summary, and related skills. Use when `memory_search` returns a hit with a `task_id` and you need the full story. Preserves critical information: URLs, file paths, commands, error codes, step-by-step instructions. +- **When to call:** A `memory_search` hit included a `task_id` and you need the full context of that task. +- **Parameters:** + - `taskId` (string, **required**) — The task_id from a memory_search hit. ### skill_get -- **What it does:** Returns the content of a learned skill (experience guide) by `skillId` or by `taskId`. +- **What it does:** Retrieve a proven skill (experience guide) by `skillId` or by `taskId`. If you pass a `taskId`, the system will find the associated skill automatically. - **When to call:** A search hit has a `task_id` and the task has a "how to do this again" guide. Use this to follow the same approach or reuse steps. -- **Parameters:** `skillId` (direct) or `taskId` (lookup). +- **Parameters:** + - `skillId` (string, optional) — Direct skill ID. + - `taskId` (string, optional) — Task ID — will look up the skill linked to this task. + - At least one of `skillId` or `taskId` must be provided. ### skill_search -- **What it does:** Searches available **skills** (capabilities/guides) by natural language. Can search your own skills, other agents' public skills, or both — controlled by the `scope` parameter. -- **When to call:** The current task requires a capability or guide you don't have. Use `skill_search` to find one first; after finding it, use `skill_get` to read it, then `skill_install` to load it for future turns. Set `scope` to `public` to only see others' public skills, `self` for only your own, or leave as default `mix` for both. -- **Parameters:** `query` (required, natural language description of the need), `scope` (optional, default `mix`: self + public; `self`: own only; `public`: public only). +- **What it does:** Search available skills by natural language. Searches your own skills, public skills, or both — controlled by the `scope` parameter. +- **When to call:** The current task requires a capability or guide you don't have. Use `skill_search` to find one first; after finding it, use `skill_get` to read it, then `skill_install` to load it for future turns. +- **Parameters:** + - `query` (string, **required**) — Natural language description of the needed skill. + - `scope` (string, optional) — Search scope: `'mix'` (default, self + public), `'self'` (own only), `'public'` (public only). ### skill_install -- **What it does:** Installs a skill (by `skillId`) into the workspace for future sessions. +- **What it does:** Install a learned skill into the agent workspace so it becomes permanently available. After installation, the skill will be loaded automatically in future sessions. - **When to call:** After `skill_get` when the skill is useful for ongoing use. -- **Parameters:** `skillId`. +- **Parameters:** + - `skillId` (string, **required**) — The skill ID to install. ### skill_publish -- **What it does:** Makes a skill **public** so other agents can discover and install it via `skill_search`. +- **What it does:** Make a skill public so other agents can discover and install it via `skill_search`. - **When to call:** You have a useful skill that other agents could benefit from, and you want to share it. -- **Parameters:** `skillId`. +- **Parameters:** + - `skillId` (string, **required**) — The skill ID to publish. ### skill_unpublish -- **What it does:** Makes a skill **private** again. Other agents will no longer discover it. +- **What it does:** Make a skill private again. Other agents will no longer be able to discover it. - **When to call:** You want to stop sharing a previously published skill. -- **Parameters:** `skillId`. +- **Parameters:** + - `skillId` (string, **required**) — The skill ID to unpublish. ### memory_timeline -- **What it does:** Expands context around a single memory chunk: returns the surrounding conversation messages. +- **What it does:** Expand context around a memory search hit. Pass the `chunkId` from a search result to read the surrounding conversation messages. - **When to call:** A `memory_search` hit is relevant but you need the surrounding dialogue. -- **Parameters:** `chunkId` (from a search hit), optional `window` (default 2). +- **Parameters:** + - `chunkId` (string, **required**) — The chunkId from a memory_search hit. + - `window` (number, optional) — Context window ±N messages, default 2. ### memory_viewer -- **What it does:** Returns the URL of the MemOS Memory Viewer web dashboard. -- **When to call:** The user asks how to view their memories or open the memory dashboard. +- **What it does:** Show the MemOS Memory Viewer URL. Call this when the user asks how to view, browse, manage, or check their memories. Returns the URL the user can open in their browser. +- **When to call:** The user asks where to see or manage their memories. - **Parameters:** None. ## Quick decision flow 1. **No memories in context or auto-recall reported nothing** - → Call `memory_search` with a **self-generated short query**. + → Call `memory_search(query="...")` with a **self-generated short query**. + +2. **Need to see the full original text of a search hit** + → Call `memory_get(chunkId="...")`. -2. **Search returned hits with `task_id` and you need full context** - → Call `task_summary(taskId)`. +3. **Search returned hits with `task_id` and you need full context** + → Call `task_summary(taskId="...")`. -3. **Task has an experience guide you want to follow** - → Call `skill_get(taskId=...)` or `skill_get(skillId=...)`. Optionally `skill_install(skillId)` for future use. +4. **Task has an experience guide you want to follow** + → Call `skill_get(taskId="...")` or `skill_get(skillId="...")`. Optionally `skill_install(skillId="...")` for future use. -4. **You need the exact surrounding conversation of a hit** - → Call `memory_timeline(chunkId=...)`. +5. **You need the exact surrounding conversation of a hit** + → Call `memory_timeline(chunkId="...")`. -5. **You need a capability/guide that you don't have** +6. **You need a capability/guide that you don't have** → Call `skill_search(query="...", scope="mix")` to discover available skills. -6. **You have shared knowledge useful to all agents** +7. **You have shared knowledge useful to all agents** → Call `memory_write_public(content="...")` to persist it in public memory. -7. **You want to share a useful skill with other agents** - → Call `skill_publish(skillId=...)`. +8. **You want to share/stop sharing a skill with other agents** + → Call `skill_publish(skillId="...")` or `skill_unpublish(skillId="...")`. -8. **User asks where to see or manage their memories** +9. **User asks where to see or manage their memories** → Call `memory_viewer()` and share the URL. ## Writing good search queries @@ -110,3 +138,12 @@ This skill describes how to use the MemOS memory tools so you can reliably searc - Use **concrete terms**: names, topics, tools, or decisions. - If the user's message is long, **derive one or two sub-queries** rather than pasting the whole message. - Use `role='user'` when you specifically want to find what the user said. + +## Memory ownership and agent isolation + +Each memory is tagged with an `owner` (e.g. `agent:main`, `agent:sales-bot`). This is handled **automatically** — you do not need to pass any owner parameter. + +- **Your memories:** All tools (`memory_search`, `memory_get`, `memory_timeline`) automatically scope queries to your agent's own memories. +- **Public memories:** Memories marked as `public` are visible to all agents. Use `memory_write_public` to write shared knowledge. +- **Cross-agent isolation:** You cannot see memories owned by other agents (unless they are public). +- **How it works:** The system identifies your agent ID from the OpenClaw runtime context and applies owner filtering automatically on every search, recall, and retrieval. diff --git a/apps/memos-local-openclaw/src/capture/index.ts b/apps/memos-local-openclaw/src/capture/index.ts index d00d7d376..482f9fbe1 100644 --- a/apps/memos-local-openclaw/src/capture/index.ts +++ b/apps/memos-local-openclaw/src/capture/index.ts @@ -101,7 +101,8 @@ export function captureMessages( * Also strips the envelope timestamp prefix like "[Tue 2026-03-03 21:58 GMT+8] " */ export function stripInboundMetadata(text: string): string { - let cleaned = stripEnvelopePrefix(text); + let cleaned = stripMemoryInjection(text); + cleaned = stripEnvelopePrefix(cleaned); // Strip OpenClaw envelope tags: [message_id: ...], [[reply_to_current]], etc. cleaned = cleaned.replace(/\[message_id:\s*[a-f0-9-]+\]/gi, ""); @@ -152,6 +153,104 @@ function stripEnvelopePrefix(text: string): string { return text.replace(ENVELOPE_PREFIX_RE, ""); } +/** + * Strip memory-system injections that get prepended to user messages: + * - ... + * - === MemOS LONG-TERM MEMORY ... ===\n...MANDATORY... + * - [MemOS Auto-Recall] Found N relevant memories:... + * - ## Memory system\n\nNo memories were automatically recalled... + */ +function stripMemoryInjection(text: string): string { + let cleaned = text; + + // ... + const mcStart = cleaned.indexOf(""); + if (mcStart !== -1) { + const mcEnd = cleaned.indexOf(""); + if (mcEnd !== -1) { + cleaned = cleaned.slice(0, mcStart) + cleaned.slice(mcEnd + "".length); + } else { + cleaned = cleaned.slice(0, mcStart); + } + cleaned = cleaned.trim(); + } + + // === MemOS LONG-TERM MEMORY (retrieved from past conversations) ===\n...\nMANDATORY... + cleaned = cleaned.replace( + /=== MemOS LONG-TERM MEMORY[\s\S]*?(?:MANDATORY[^\n]*\n?|(?=\n{2,}))/gi, + "", + ).trim(); + + // [MemOS Auto-Recall] Found N relevant memories:\n... + cleaned = cleaned.replace( + /\[MemOS Auto-Recall\][^\n]*\n(?:(?:\d+\.\s+\[(?:USER|ASSISTANT)[^\n]*\n?)*)/gi, + "", + ).trim(); + + // ## Memory system\n\nNo memories were automatically recalled... + cleaned = cleaned.replace( + /## Memory system\n+No memories were automatically recalled[^\n]*(?:\n[^\n]*memory_search[^\n]*)*/gi, + "", + ).trim(); + + // Old format: ## Retrieved memories from past conversations\n\nCRITICAL INSTRUCTION:... + const recallIdx = cleaned.indexOf("## Retrieved memories from past conversations"); + if (recallIdx !== -1) { + const before = cleaned.slice(0, recallIdx); + const after = cleaned.slice(recallIdx); + const tsMatch = after.match(/\n\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}/); + if (tsMatch && tsMatch.index != null) { + cleaned = (before + after.slice(tsMatch.index)).trim(); + } else { + cleaned = before.trim(); + } + } + + // prependContext format: ## User's conversation history (from memory system)\n... + // Ends at last "Current time:" line or last chunkId= line, whichever comes later. + const prependIdx = cleaned.indexOf("## User's conversation history (from memory system)"); + if (prependIdx !== -1) { + const before = cleaned.slice(0, prependIdx); + const after = cleaned.slice(prependIdx); + + // Find the last anchor line that belongs to the injected block + const currentTimeMatch = after.match(/Current time:[^\n]*/g); + const chunkIdMatch = after.match(/chunkId="[^"]*"/g); + let cutPos = 0; + if (currentTimeMatch) { + const lastCt = after.lastIndexOf(currentTimeMatch[currentTimeMatch.length - 1]); + const lineEnd = after.indexOf("\n", lastCt); + cutPos = Math.max(cutPos, lineEnd !== -1 ? lineEnd + 1 : after.length); + } + if (chunkIdMatch) { + const lastCk = after.lastIndexOf(chunkIdMatch[chunkIdMatch.length - 1]); + const lineEnd = after.indexOf("\n", lastCk); + cutPos = Math.max(cutPos, lineEnd !== -1 ? lineEnd + 1 : after.length); + } + if (cutPos === 0) { + // No anchors found; remove everything from the header onward + cleaned = before.trim(); + } else { + cleaned = (before + after.slice(cutPos)).trim(); + } + } + + // New format: ...\n\n📝 Related memories:... + const memosTagIdx = cleaned.indexOf(""); + if (memosTagIdx !== -1) { + const before = cleaned.slice(0, memosTagIdx); + const after = cleaned.slice(memosTagIdx); + const tsMatch = after.match(/\n\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}/); + if (tsMatch && tsMatch.index != null) { + cleaned = (before + after.slice(tsMatch.index)).trim(); + } else { + cleaned = before.trim(); + } + } + + return cleaned; +} + function stripEvidenceWrappers(text: string, evidenceTag: string): string { const tag = evidenceTag.trim(); if (!tag) return text; diff --git a/apps/memos-local-openclaw/src/embedding/index.ts b/apps/memos-local-openclaw/src/embedding/index.ts index aa511dcb3..3981062fa 100644 --- a/apps/memos-local-openclaw/src/embedding/index.ts +++ b/apps/memos-local-openclaw/src/embedding/index.ts @@ -5,6 +5,7 @@ import { embedCohere, embedCohereQuery } from "./providers/cohere"; import { embedVoyage } from "./providers/voyage"; import { embedMistral } from "./providers/mistral"; import { embedLocal } from "./local"; +import { modelHealth } from "../ingest/providers"; export class Embedder { constructor( @@ -46,26 +47,33 @@ export class Embedder { const provider = this.provider; const cfg = this.cfg; + const modelInfo = `${provider}/${cfg?.model ?? "default"}`; try { + let result: number[][]; switch (provider) { case "openai": case "openai_compatible": - return await embedOpenAI(texts, cfg!, this.log); - case "gemini": - return await embedGemini(texts, cfg!, this.log); case "azure_openai": - return await embedOpenAI(texts, cfg!, this.log); + case "zhipu": + case "siliconflow": + case "bailian": + result = await embedOpenAI(texts, cfg!, this.log); break; + case "gemini": + result = await embedGemini(texts, cfg!, this.log); break; case "cohere": - return await embedCohere(texts, cfg!, this.log); + result = await embedCohere(texts, cfg!, this.log); break; case "mistral": - return await embedMistral(texts, cfg!, this.log); + result = await embedMistral(texts, cfg!, this.log); break; case "voyage": - return await embedVoyage(texts, cfg!, this.log); + result = await embedVoyage(texts, cfg!, this.log); break; case "local": default: - return await embedLocal(texts, this.log); + result = await embedLocal(texts, this.log); break; } + modelHealth.recordSuccess("embedding", modelInfo); + return result; } catch (err) { + modelHealth.recordError("embedding", modelInfo, String(err)); if (provider !== "local") { this.log.warn(`Embedding provider '${provider}' failed, falling back to local: ${err}`); return await embedLocal(texts, this.log); diff --git a/apps/memos-local-openclaw/src/ingest/chunker.ts b/apps/memos-local-openclaw/src/ingest/chunker.ts index 12cb2970e..2de7630e7 100644 --- a/apps/memos-local-openclaw/src/ingest/chunker.ts +++ b/apps/memos-local-openclaw/src/ingest/chunker.ts @@ -1,8 +1,6 @@ -import type { ChunkKind } from "../types"; - export interface RawChunk { content: string; - kind: ChunkKind; + kind: "paragraph"; } const MAX_CHUNK_CHARS = 3000; @@ -24,32 +22,27 @@ const COMMAND_LINE_RE = /^(?:\$|>|#)\s+.+$/gm; * Semantic-aware chunking: * 1. Extract fenced code blocks as whole units (never split inside) * 2. Detect unfenced code regions by brace-matching (functions/classes kept intact) - * 3. Extract error stacks, list blocks, command lines + * 3. Extract error stacks, list blocks, command lines as separate chunks * 4. Split remaining prose at paragraph boundaries (double newline) - * 5. Merge short adjacent chunks of the same kind + * 5. Merge short adjacent chunks */ export function chunkText(text: string): RawChunk[] { let remaining = text; - const slots: Array<{ placeholder: string; chunk: RawChunk }> = []; + const slots: Array<{ placeholder: string; content: string }> = []; let counter = 0; - function ph(content: string, kind: ChunkKind): string { + function ph(content: string): string { const tag = `\x00SLOT_${counter++}\x00`; - slots.push({ placeholder: tag, chunk: { content: content.trim(), kind } }); + slots.push({ placeholder: tag, content: content.trim() }); return tag; } - remaining = remaining.replace(FENCED_CODE_RE, (m) => ph(m, "code_block")); - + remaining = remaining.replace(FENCED_CODE_RE, (m) => ph(m)); remaining = extractBraceBlocks(remaining, ph); - const structural: Array<{ re: RegExp; kind: ChunkKind }> = [ - { re: ERROR_STACK_RE, kind: "error_stack" }, - { re: LIST_BLOCK_RE, kind: "list" }, - { re: COMMAND_LINE_RE, kind: "command" }, - ]; - for (const { re, kind } of structural) { - remaining = remaining.replace(re, (m) => ph(m, kind)); + const structural: RegExp[] = [ERROR_STACK_RE, LIST_BLOCK_RE, COMMAND_LINE_RE]; + for (const re of structural) { + remaining = remaining.replace(re, (m) => ph(m)); } const raw: RawChunk[] = []; @@ -64,7 +57,7 @@ export function chunkText(text: string): RawChunk[] { for (const part of parts) { const slot = slots.find((s) => s.placeholder === part); if (slot) { - raw.push(slot.chunk); + raw.push({ content: slot.content, kind: "paragraph" }); } else if (part.trim().length >= MIN_CHUNK_CHARS) { raw.push({ content: part.trim(), kind: "paragraph" }); } @@ -75,8 +68,8 @@ export function chunkText(text: string): RawChunk[] { } for (const s of slots) { - if (!raw.some((c) => c.content === s.chunk.content)) { - raw.push(s.chunk); + if (!raw.some((c) => c.content === s.content)) { + raw.push({ content: s.content, kind: "paragraph" }); } } @@ -92,7 +85,7 @@ export function chunkText(text: string): RawChunk[] { */ function extractBraceBlocks( text: string, - ph: (content: string, kind: ChunkKind) => string, + ph: (content: string) => string, ): string { const lines = text.split("\n"); const result: string[] = []; @@ -126,7 +119,7 @@ function extractBraceBlocks( if (depth <= 0 || (BLOCK_CLOSE_RE.test(line) && depth <= 0)) { const block = blockLines.join("\n"); if (block.trim().length >= MIN_CHUNK_CHARS) { - result.push(ph(block, "code_block")); + result.push(ph(block)); } else { result.push(block); } @@ -142,7 +135,7 @@ function extractBraceBlocks( if (blockLines.length > 0) { const block = blockLines.join("\n"); if (block.trim().length >= MIN_CHUNK_CHARS) { - result.push(ph(block, "code_block")); + result.push(ph(block)); } else { result.push(block); } @@ -171,11 +164,10 @@ function mergeSmallChunks(chunks: RawChunk[]): RawChunk[] { continue; } - const sameKind = buf.kind === c.kind; const bothSmall = buf.content.length < IDEAL_CHUNK_CHARS && c.content.length < IDEAL_CHUNK_CHARS; const mergedLen = buf.content.length + c.content.length + 2; - if (sameKind && bothSmall && mergedLen <= MAX_CHUNK_CHARS) { + if (bothSmall && mergedLen <= MAX_CHUNK_CHARS) { buf.content = buf.content + "\n\n" + c.content; } else { merged.push(buf); @@ -189,29 +181,29 @@ function mergeSmallChunks(chunks: RawChunk[]): RawChunk[] { function splitOversized(chunks: RawChunk[]): RawChunk[] { const result: RawChunk[] = []; for (const c of chunks) { - if (c.content.length <= MAX_CHUNK_CHARS || c.kind === "code_block") { + if (c.content.length <= MAX_CHUNK_CHARS) { result.push(c); continue; } - result.push(...splitAtSentenceBoundary(c.content, c.kind)); + result.push(...splitAtSentenceBoundary(c.content)); } return result; } -function splitAtSentenceBoundary(text: string, kind: ChunkKind): RawChunk[] { +function splitAtSentenceBoundary(text: string): RawChunk[] { const sentences = text.match(/[^.!?。!?\n]+(?:[.!?。!?]+|\n{2,})/g) ?? [text]; const result: RawChunk[] = []; let buf = ""; for (const s of sentences) { if (buf.length + s.length > MAX_CHUNK_CHARS && buf.length > 0) { - result.push({ content: buf.trim(), kind }); + result.push({ content: buf.trim(), kind: "paragraph" }); buf = ""; } buf += s; } if (buf.trim().length >= MIN_CHUNK_CHARS) { - result.push({ content: buf.trim(), kind }); + result.push({ content: buf.trim(), kind: "paragraph" }); } return result; } diff --git a/apps/memos-local-openclaw/src/ingest/providers/anthropic.ts b/apps/memos-local-openclaw/src/ingest/providers/anthropic.ts index 8f6d30c2f..3e11d2778 100644 --- a/apps/memos-local-openclaw/src/ingest/providers/anthropic.ts +++ b/apps/memos-local-openclaw/src/ingest/providers/anthropic.ts @@ -1,20 +1,35 @@ import type { SummarizerConfig, Logger } from "../../types"; -const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 120 characters). IMPORTANT: Use the SAME language as the input text — if the input is Chinese, write Chinese; if English, write English. Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`; +const SYSTEM_PROMPT = `You generate a retrieval-friendly title. + +Return exactly one noun phrase that names the topic AND its key details. + +Requirements: +- Same language as input +- Keep proper nouns, API/function names, specific parameters, versions, error codes +- Include WHO/WHAT/WHERE details when present (e.g. person name + event, tool name + what it does) +- Prefer concrete topic words over generic words +- No verbs unless unavoidable +- No generic endings like: + 功能说明、使用说明、简介、介绍、用途、summary、overview、basics +- Chinese: 10-50 characters (aim for 15-30) +- Non-Chinese: 5-15 words (aim for 8-12) +- Output title only`; const TASK_SUMMARY_PROMPT = `You create a DETAILED task summary from a multi-turn conversation. This summary will be the ONLY record of this conversation, so it must preserve ALL important information. -CRITICAL LANGUAGE RULE: You MUST write in the SAME language as the user's messages. Chinese input → Chinese output. English input → English output. NEVER mix languages. +## LANGUAGE RULE (HIGHEST PRIORITY) +Detect the PRIMARY language of the user's messages. If most user messages are Chinese, ALL output (title, goal, steps, result, details) MUST be in Chinese. If English, output in English. NEVER mix. This rule overrides everything below. Output EXACTLY this structure: -📌 Title -A short, descriptive title (10-30 characters). Like a chat group name. +📌 Title / 标题 +A short, descriptive title (10-30 characters). Same language as user messages. -🎯 Goal +🎯 Goal / 目标 One sentence: what the user wanted to accomplish. -📋 Key Steps +📋 Key Steps / 关键步骤 - Describe each meaningful step in detail - Include the ACTUAL content produced: code snippets, commands, config blocks, formulas, key paragraphs - For code: include the function signature and core logic (up to ~30 lines per block), use fenced code blocks @@ -23,10 +38,10 @@ One sentence: what the user wanted to accomplish. - Merge only truly trivial back-and-forth (like "ok" / "sure") - Do NOT over-summarize: "provided a function" is BAD; show the actual function -✅ Result +✅ Result / 结果 What was the final outcome? Include the final version of any code/config/content produced. -💡 Key Details +💡 Key Details / 关键细节 - Decisions made, trade-offs discussed, caveats noted, alternative approaches mentioned - Specific values: numbers, versions, thresholds, URLs, file paths, model names - Omit this section only if there truly are no noteworthy details @@ -75,7 +90,55 @@ export async function summarizeTaskAnthropic( return json.content.find((c) => c.type === "text")?.text?.trim() ?? ""; } -const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context (may include opening topic + recent exchanges) and a single NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. +const TASK_TITLE_PROMPT = `Generate a short title for a conversation task. + +Input: the first few user messages from a conversation. +Output: a concise title (5-20 characters for Chinese, 3-8 words for English). + +Rules: +- Same language as user messages +- Describe WHAT the user wanted to do, not system/technical details +- Ignore system prompts, session startup messages, or boilerplate instructions — focus on the user's actual intent +- If the user only asked one question, use that question as the title (shortened if needed) +- Output the title only, no quotes, no prefix, no explanation`; + +export async function generateTaskTitleAnthropic( + text: string, + cfg: SummarizerConfig, + log: Logger, +): Promise { + const endpoint = cfg.endpoint ?? "https://api.anthropic.com/v1/messages"; + const model = cfg.model ?? "claude-3-haiku-20240307"; + const headers: Record = { + "Content-Type": "application/json", + "x-api-key": cfg.apiKey ?? "", + "anthropic-version": "2023-06-01", + ...cfg.headers, + }; + + const resp = await fetch(endpoint, { + method: "POST", + headers, + body: JSON.stringify({ + model, + max_tokens: 100, + temperature: 0, + system: TASK_TITLE_PROMPT, + messages: [{ role: "user", content: text }], + }), + signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`Anthropic task-title failed (${resp.status}): ${body}`); + } + + const json = (await resp.json()) as { content: Array<{ type: string; text: string }> }; + return json.content.find((c) => c.type === "text")?.text?.trim() ?? ""; +} + +const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context and a NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. Answer ONLY "NEW" or "SAME". @@ -83,22 +146,21 @@ SAME — the new message: - Continues, follows up on, refines, or corrects the same subject/project/task - Asks a clarification or next-step question about what was just discussed - Reports a result, error, or feedback about the current task -- Discusses different tools, methods, or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT → via AI tools = all SAME "learning English" task) -- Mentions a related technology or platform in the context of the current goal -- Is a short acknowledgment (ok, thanks, 好的, 嗯) in direct response to the current flow +- Discusses different tools or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT = SAME) +- Is a short acknowledgment (ok, thanks, 好的) in response to the current flow NEW — the new message: -- Introduces a clearly UNRELATED subject with NO logical connection to the current task -- The topic has ZERO overlap with any aspect of the current conversation (e.g., from "learning English" to "what's the weather tomorrow") -- Starts a request about a completely different domain or life area +- Introduces a subject from a DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel) +- Has NO logical connection to what was being discussed +- Starts a request about a different project, system, or life area - Begins with a new greeting/reset followed by a different topic Key principles: -- STRONGLY lean toward SAME — only mark NEW for obvious, unambiguous topic shifts -- Different aspects, tools, or methods related to the same overall goal are SAME -- If the new message could reasonably be interpreted as part of the ongoing discussion, choose SAME -- Only choose NEW when there is absolutely no thematic connection to the current task -- Examples: "学英语" → "用AI工具学英语" = SAME; "学英语" → "明天天气" = NEW +- If the topic domain clearly changed (e.g., server config → recipe, code review → vacation plan), choose NEW +- Different aspects of the SAME project/system are SAME (e.g., Nginx SSL → Nginx gzip = SAME) +- Different unrelated technologies discussed independently are NEW (e.g., Redis config → cooking recipe = NEW) +- When unsure, lean toward SAME for closely related topics, but do NOT hesitate to mark NEW for obvious domain shifts +- Examples: "配置Nginx" → "加gzip压缩" = SAME; "配置Nginx" → "做红烧肉" = NEW; "MySQL配置" → "K8s部署" in same infra project = SAME; "部署服务器" → "年会安排" = NEW Output exactly one word: NEW or SAME`; @@ -143,34 +205,30 @@ export async function judgeNewTopicAnthropic( return answer.startsWith("NEW"); } -const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. Given a user's QUERY and a list of CANDIDATE memory summaries, do two things: - -1. Select ALL candidates that could be useful for answering the query. When in doubt, INCLUDE the candidate. - - For questions about lists, history, or "what/where/who" across multiple items (e.g. "which companies did I work at"), include ALL matching items — do NOT stop at the first match. - - For factual lookups (e.g. "what is the SSH port"), a single direct answer is enough. -2. Judge whether the selected memories are SUFFICIENT to fully answer the query WITHOUT fetching additional context. +const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. -IMPORTANT for "sufficient" judgment: -- sufficient=true ONLY when the memories contain a concrete ANSWER, fact, decision, or actionable information that directly addresses the query. -- sufficient=false when: - - The memories only repeat the same question the user asked before (echo, not answer). - - The memories show related topics but lack the specific detail needed. - - The memories contain partial information that would benefit from full task context, timeline, or related skills. +Given a QUERY and CANDIDATE memories, decide: does each candidate's content contain information that would HELP ANSWER the query? -Output a JSON object with exactly two fields: -{"relevant":[1,3,5],"sufficient":true} +CORE QUESTION: "If I include this memory, will it help produce a better answer?" +- YES → include +- NO → exclude -- "relevant": array of candidate numbers that are useful. Empty array [] if none are relevant. -- "sufficient": true ONLY if the memories contain a direct answer; false otherwise. +RULES: +1. A candidate is relevant if its content provides facts, context, or data that directly supports answering the query. +2. A candidate that merely shares the same broad topic/domain but contains NO useful information for answering is NOT relevant. +3. If NO candidate can help answer the query, return {"relevant":[],"sufficient":false} — do NOT force-pick the "least irrelevant" one. -Output ONLY the JSON object, nothing else.`; +OUTPUT — JSON only: +{"relevant":[1,3],"sufficient":true} +- "relevant": candidate numbers whose content helps answer the query. [] if none can help. +- "sufficient": true only if the selected memories fully answer the query.`; import type { FilterResult } from "./openai"; export type { FilterResult } from "./openai"; export async function filterRelevantAnthropic( query: string, - candidates: Array<{ index: number; summary: string; role: string }>, + candidates: Array<{ index: number; role: string; content: string; time?: string }>, cfg: SummarizerConfig, log: Logger, ): Promise { @@ -184,7 +242,10 @@ export async function filterRelevantAnthropic( }; const candidateText = candidates - .map((c) => `${c.index}. [${c.role}] ${c.summary}`) + .map((c) => { + const timeTag = c.time ? ` (${c.time})` : ""; + return `${c.index}. [${c.role}]${timeTag}\n ${c.content}`; + }) .join("\n"); const resp = await fetch(endpoint, { @@ -207,6 +268,7 @@ export async function filterRelevantAnthropic( const json = (await resp.json()) as { content: Array<{ type: string; text: string }> }; const raw = json.content.find((c) => c.type === "text")?.text?.trim() ?? "{}"; + log.debug(`filterRelevant raw LLM response: "${raw}"`); return parseFilterResult(raw, log); } @@ -249,7 +311,7 @@ export async function summarizeAnthropic( max_tokens: 100, temperature: cfg.temperature ?? 0, system: SYSTEM_PROMPT, - messages: [{ role: "user", content: text }], + messages: [{ role: "user", content: `[TEXT TO SUMMARIZE]\n${text}\n[/TEXT TO SUMMARIZE]` }], }), signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000), }); diff --git a/apps/memos-local-openclaw/src/ingest/providers/bedrock.ts b/apps/memos-local-openclaw/src/ingest/providers/bedrock.ts index 207289af5..d2f582aba 100644 --- a/apps/memos-local-openclaw/src/ingest/providers/bedrock.ts +++ b/apps/memos-local-openclaw/src/ingest/providers/bedrock.ts @@ -1,20 +1,35 @@ import type { SummarizerConfig, Logger } from "../../types"; -const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 120 characters). IMPORTANT: Use the SAME language as the input text — if the input is Chinese, write Chinese; if English, write English. Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`; +const SYSTEM_PROMPT = `You generate a retrieval-friendly title. + +Return exactly one noun phrase that names the topic AND its key details. + +Requirements: +- Same language as input +- Keep proper nouns, API/function names, specific parameters, versions, error codes +- Include WHO/WHAT/WHERE details when present (e.g. person name + event, tool name + what it does) +- Prefer concrete topic words over generic words +- No verbs unless unavoidable +- No generic endings like: + 功能说明、使用说明、简介、介绍、用途、summary、overview、basics +- Chinese: 10-50 characters (aim for 15-30) +- Non-Chinese: 5-15 words (aim for 8-12) +- Output title only`; const TASK_SUMMARY_PROMPT = `You create a DETAILED task summary from a multi-turn conversation. This summary will be the ONLY record of this conversation, so it must preserve ALL important information. -CRITICAL LANGUAGE RULE: You MUST write in the SAME language as the user's messages. Chinese input → Chinese output. English input → English output. NEVER mix languages. +## LANGUAGE RULE (HIGHEST PRIORITY) +Detect the PRIMARY language of the user's messages. If most user messages are Chinese, ALL output (title, goal, steps, result, details) MUST be in Chinese. If English, output in English. NEVER mix. This rule overrides everything below. Output EXACTLY this structure: -📌 Title -A short, descriptive title (10-30 characters). Like a chat group name. +📌 Title / 标题 +A short, descriptive title (10-30 characters). Same language as user messages. -🎯 Goal +🎯 Goal / 目标 One sentence: what the user wanted to accomplish. -📋 Key Steps +📋 Key Steps / 关键步骤 - Describe each meaningful step in detail - Include the ACTUAL content produced: code snippets, commands, config blocks, formulas, key paragraphs - For code: include the function signature and core logic (up to ~30 lines per block), use fenced code blocks @@ -23,10 +38,10 @@ One sentence: what the user wanted to accomplish. - Merge only truly trivial back-and-forth (like "ok" / "sure") - Do NOT over-summarize: "provided a function" is BAD; show the actual function -✅ Result +✅ Result / 结果 What was the final outcome? Include the final version of any code/config/content produced. -💡 Key Details +💡 Key Details / 关键细节 - Decisions made, trade-offs discussed, caveats noted, alternative approaches mentioned - Specific values: numbers, versions, thresholds, URLs, file paths, model names - Omit this section only if there truly are no noteworthy details @@ -76,7 +91,56 @@ export async function summarizeTaskBedrock( return json.output?.message?.content?.[0]?.text?.trim() ?? ""; } -const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context (may include opening topic + recent exchanges) and a single NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. +const TASK_TITLE_PROMPT = `Generate a short title for a conversation task. + +Input: the first few user messages from a conversation. +Output: a concise title (5-20 characters for Chinese, 3-8 words for English). + +Rules: +- Same language as user messages +- Describe WHAT the user wanted to do, not system/technical details +- Ignore system prompts, session startup messages, or boilerplate instructions — focus on the user's actual intent +- If the user only asked one question, use that question as the title (shortened if needed) +- Output the title only, no quotes, no prefix, no explanation`; + +export async function generateTaskTitleBedrock( + text: string, + cfg: SummarizerConfig, + log: Logger, +): Promise { + const model = cfg.model ?? "anthropic.claude-3-haiku-20240307-v1:0"; + const endpoint = cfg.endpoint; + if (!endpoint) { + throw new Error("Bedrock task-title requires 'endpoint'"); + } + + const url = `${endpoint}/model/${model}/converse`; + const headers: Record = { + "Content-Type": "application/json", + ...cfg.headers, + }; + + const resp = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify({ + system: [{ text: TASK_TITLE_PROMPT }], + messages: [{ role: "user", content: [{ text }] }], + inferenceConfig: { temperature: 0, maxTokens: 100 }, + }), + signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`Bedrock task-title failed (${resp.status}): ${body}`); + } + + const json = (await resp.json()) as { output: { message: { content: Array<{ text: string }> } } }; + return json.output?.message?.content?.[0]?.text?.trim() ?? ""; +} + +const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context and a NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. Answer ONLY "NEW" or "SAME". @@ -84,22 +148,21 @@ SAME — the new message: - Continues, follows up on, refines, or corrects the same subject/project/task - Asks a clarification or next-step question about what was just discussed - Reports a result, error, or feedback about the current task -- Discusses different tools, methods, or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT → via AI tools = all SAME "learning English" task) -- Mentions a related technology or platform in the context of the current goal -- Is a short acknowledgment (ok, thanks, 好的, 嗯) in direct response to the current flow +- Discusses different tools or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT = SAME) +- Is a short acknowledgment (ok, thanks, 好的) in response to the current flow NEW — the new message: -- Introduces a clearly UNRELATED subject with NO logical connection to the current task -- The topic has ZERO overlap with any aspect of the current conversation (e.g., from "learning English" to "what's the weather tomorrow") -- Starts a request about a completely different domain or life area +- Introduces a subject from a DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel) +- Has NO logical connection to what was being discussed +- Starts a request about a different project, system, or life area - Begins with a new greeting/reset followed by a different topic Key principles: -- STRONGLY lean toward SAME — only mark NEW for obvious, unambiguous topic shifts -- Different aspects, tools, or methods related to the same overall goal are SAME -- If the new message could reasonably be interpreted as part of the ongoing discussion, choose SAME -- Only choose NEW when there is absolutely no thematic connection to the current task -- Examples: "学英语" → "用AI工具学英语" = SAME; "学英语" → "明天天气" = NEW +- If the topic domain clearly changed (e.g., server config → recipe, code review → vacation plan), choose NEW +- Different aspects of the SAME project/system are SAME (e.g., Nginx SSL → Nginx gzip = SAME) +- Different unrelated technologies discussed independently are NEW (e.g., Redis config → cooking recipe = NEW) +- When unsure, lean toward SAME for closely related topics, but do NOT hesitate to mark NEW for obvious domain shifts +- Examples: "配置Nginx" → "加gzip压缩" = SAME; "配置Nginx" → "做红烧肉" = NEW; "MySQL配置" → "K8s部署" in same infra project = SAME; "部署服务器" → "年会安排" = NEW Output exactly one word: NEW or SAME`; @@ -145,34 +208,30 @@ export async function judgeNewTopicBedrock( return answer.startsWith("NEW"); } -const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. Given a user's QUERY and a list of CANDIDATE memory summaries, do two things: - -1. Select ALL candidates that could be useful for answering the query. When in doubt, INCLUDE the candidate. - - For questions about lists, history, or "what/where/who" across multiple items (e.g. "which companies did I work at"), include ALL matching items — do NOT stop at the first match. - - For factual lookups (e.g. "what is the SSH port"), a single direct answer is enough. -2. Judge whether the selected memories are SUFFICIENT to fully answer the query WITHOUT fetching additional context. +const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. -IMPORTANT for "sufficient" judgment: -- sufficient=true ONLY when the memories contain a concrete ANSWER, fact, decision, or actionable information that directly addresses the query. -- sufficient=false when: - - The memories only repeat the same question the user asked before (echo, not answer). - - The memories show related topics but lack the specific detail needed. - - The memories contain partial information that would benefit from full task context, timeline, or related skills. +Given a QUERY and CANDIDATE memories, decide: does each candidate's content contain information that would HELP ANSWER the query? -Output a JSON object with exactly two fields: -{"relevant":[1,3,5],"sufficient":true} +CORE QUESTION: "If I include this memory, will it help produce a better answer?" +- YES → include +- NO → exclude -- "relevant": array of candidate numbers that are useful. Empty array [] if none are relevant. -- "sufficient": true ONLY if the memories contain a direct answer; false otherwise. +RULES: +1. A candidate is relevant if its content provides facts, context, or data that directly supports answering the query. +2. A candidate that merely shares the same broad topic/domain but contains NO useful information for answering is NOT relevant. +3. If NO candidate can help answer the query, return {"relevant":[],"sufficient":false} — do NOT force-pick the "least irrelevant" one. -Output ONLY the JSON object, nothing else.`; +OUTPUT — JSON only: +{"relevant":[1,3],"sufficient":true} +- "relevant": candidate numbers whose content helps answer the query. [] if none can help. +- "sufficient": true only if the selected memories fully answer the query.`; import type { FilterResult } from "./openai"; export type { FilterResult } from "./openai"; export async function filterRelevantBedrock( query: string, - candidates: Array<{ index: number; summary: string; role: string }>, + candidates: Array<{ index: number; role: string; content: string; time?: string }>, cfg: SummarizerConfig, log: Logger, ): Promise { @@ -189,7 +248,10 @@ export async function filterRelevantBedrock( }; const candidateText = candidates - .map((c) => `${c.index}. [${c.role}] ${c.summary}`) + .map((c) => { + const timeTag = c.time ? ` (${c.time})` : ""; + return `${c.index}. [${c.role}]${timeTag}\n ${c.content}`; + }) .join("\n"); const resp = await fetch(url, { @@ -210,6 +272,7 @@ export async function filterRelevantBedrock( const json = (await resp.json()) as { output: { message: { content: Array<{ text: string }> } } }; const raw = json.output?.message?.content?.[0]?.text?.trim() ?? "{}"; + log.debug(`filterRelevant raw LLM response: "${raw}"`); return parseFilterResult(raw, log); } @@ -252,7 +315,7 @@ export async function summarizeBedrock( headers, body: JSON.stringify({ system: [{ text: SYSTEM_PROMPT }], - messages: [{ role: "user", content: [{ text }] }], + messages: [{ role: "user", content: [{ text: `[TEXT TO SUMMARIZE]\n${text}\n[/TEXT TO SUMMARIZE]` }] }], inferenceConfig: { temperature: cfg.temperature ?? 0, maxTokens: 100, diff --git a/apps/memos-local-openclaw/src/ingest/providers/gemini.ts b/apps/memos-local-openclaw/src/ingest/providers/gemini.ts index 9f93a439d..b6659b2d1 100644 --- a/apps/memos-local-openclaw/src/ingest/providers/gemini.ts +++ b/apps/memos-local-openclaw/src/ingest/providers/gemini.ts @@ -1,20 +1,35 @@ import type { SummarizerConfig, Logger } from "../../types"; -const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 120 characters). IMPORTANT: Use the SAME language as the input text — if the input is Chinese, write Chinese; if English, write English. Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`; +const SYSTEM_PROMPT = `You generate a retrieval-friendly title. + +Return exactly one noun phrase that names the topic AND its key details. + +Requirements: +- Same language as input +- Keep proper nouns, API/function names, specific parameters, versions, error codes +- Include WHO/WHAT/WHERE details when present (e.g. person name + event, tool name + what it does) +- Prefer concrete topic words over generic words +- No verbs unless unavoidable +- No generic endings like: + 功能说明、使用说明、简介、介绍、用途、summary、overview、basics +- Chinese: 10-50 characters (aim for 15-30) +- Non-Chinese: 5-15 words (aim for 8-12) +- Output title only`; const TASK_SUMMARY_PROMPT = `You create a DETAILED task summary from a multi-turn conversation. This summary will be the ONLY record of this conversation, so it must preserve ALL important information. -CRITICAL LANGUAGE RULE: You MUST write in the SAME language as the user's messages. Chinese input → Chinese output. English input → English output. NEVER mix languages. +## LANGUAGE RULE (HIGHEST PRIORITY) +Detect the PRIMARY language of the user's messages. If most user messages are Chinese, ALL output (title, goal, steps, result, details) MUST be in Chinese. If English, output in English. NEVER mix. This rule overrides everything below. Output EXACTLY this structure: -📌 Title -A short, descriptive title (10-30 characters). Like a chat group name. +📌 Title / 标题 +A short, descriptive title (10-30 characters). Same language as user messages. -🎯 Goal +🎯 Goal / 目标 One sentence: what the user wanted to accomplish. -📋 Key Steps +📋 Key Steps / 关键步骤 - Describe each meaningful step in detail - Include the ACTUAL content produced: code snippets, commands, config blocks, formulas, key paragraphs - For code: include the function signature and core logic (up to ~30 lines per block), use fenced code blocks @@ -23,10 +38,10 @@ One sentence: what the user wanted to accomplish. - Merge only truly trivial back-and-forth (like "ok" / "sure") - Do NOT over-summarize: "provided a function" is BAD; show the actual function -✅ Result +✅ Result / 结果 What was the final outcome? Include the final version of any code/config/content produced. -💡 Key Details +💡 Key Details / 关键细节 - Decisions made, trade-offs discussed, caveats noted, alternative approaches mentioned - Specific values: numbers, versions, thresholds, URLs, file paths, model names - Omit this section only if there truly are no noteworthy details @@ -75,7 +90,55 @@ export async function summarizeTaskGemini( return json.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? ""; } -const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context (may include opening topic + recent exchanges) and a single NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. +const TASK_TITLE_PROMPT = `Generate a short title for a conversation task. + +Input: the first few user messages from a conversation. +Output: a concise title (5-20 characters for Chinese, 3-8 words for English). + +Rules: +- Same language as user messages +- Describe WHAT the user wanted to do, not system/technical details +- Ignore system prompts, session startup messages, or boilerplate instructions — focus on the user's actual intent +- If the user only asked one question, use that question as the title (shortened if needed) +- Output the title only, no quotes, no prefix, no explanation`; + +export async function generateTaskTitleGemini( + text: string, + cfg: SummarizerConfig, + log: Logger, +): Promise { + const model = cfg.model ?? "gemini-1.5-flash"; + const endpoint = + cfg.endpoint ?? + `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`; + + const url = `${endpoint}?key=${cfg.apiKey}`; + const headers: Record = { + "Content-Type": "application/json", + ...cfg.headers, + }; + + const resp = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify({ + systemInstruction: { parts: [{ text: TASK_TITLE_PROMPT }] }, + contents: [{ parts: [{ text }] }], + generationConfig: { temperature: 0, maxOutputTokens: 100 }, + }), + signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`Gemini task-title failed (${resp.status}): ${body}`); + } + + const json = (await resp.json()) as { candidates: Array<{ content: { parts: Array<{ text: string }> } }> }; + return json.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? ""; +} + +const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context and a NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. Answer ONLY "NEW" or "SAME". @@ -83,22 +146,21 @@ SAME — the new message: - Continues, follows up on, refines, or corrects the same subject/project/task - Asks a clarification or next-step question about what was just discussed - Reports a result, error, or feedback about the current task -- Discusses different tools, methods, or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT → via AI tools = all SAME "learning English" task) -- Mentions a related technology or platform in the context of the current goal -- Is a short acknowledgment (ok, thanks, 好的, 嗯) in direct response to the current flow +- Discusses different tools or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT = SAME) +- Is a short acknowledgment (ok, thanks, 好的) in response to the current flow NEW — the new message: -- Introduces a clearly UNRELATED subject with NO logical connection to the current task -- The topic has ZERO overlap with any aspect of the current conversation (e.g., from "learning English" to "what's the weather tomorrow") -- Starts a request about a completely different domain or life area +- Introduces a subject from a DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel) +- Has NO logical connection to what was being discussed +- Starts a request about a different project, system, or life area - Begins with a new greeting/reset followed by a different topic Key principles: -- STRONGLY lean toward SAME — only mark NEW for obvious, unambiguous topic shifts -- Different aspects, tools, or methods related to the same overall goal are SAME -- If the new message could reasonably be interpreted as part of the ongoing discussion, choose SAME -- Only choose NEW when there is absolutely no thematic connection to the current task -- Examples: "学英语" → "用AI工具学英语" = SAME; "学英语" → "明天天气" = NEW +- If the topic domain clearly changed (e.g., server config → recipe, code review → vacation plan), choose NEW +- Different aspects of the SAME project/system are SAME (e.g., Nginx SSL → Nginx gzip = SAME) +- Different unrelated technologies discussed independently are NEW (e.g., Redis config → cooking recipe = NEW) +- When unsure, lean toward SAME for closely related topics, but do NOT hesitate to mark NEW for obvious domain shifts +- Examples: "配置Nginx" → "加gzip压缩" = SAME; "配置Nginx" → "做红烧肉" = NEW; "MySQL配置" → "K8s部署" in same infra project = SAME; "部署服务器" → "年会安排" = NEW Output exactly one word: NEW or SAME`; @@ -143,34 +205,30 @@ export async function judgeNewTopicGemini( return answer.startsWith("NEW"); } -const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. Given a user's QUERY and a list of CANDIDATE memory summaries, do two things: - -1. Select ALL candidates that could be useful for answering the query. When in doubt, INCLUDE the candidate. - - For questions about lists, history, or "what/where/who" across multiple items (e.g. "which companies did I work at"), include ALL matching items — do NOT stop at the first match. - - For factual lookups (e.g. "what is the SSH port"), a single direct answer is enough. -2. Judge whether the selected memories are SUFFICIENT to fully answer the query WITHOUT fetching additional context. +const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. -IMPORTANT for "sufficient" judgment: -- sufficient=true ONLY when the memories contain a concrete ANSWER, fact, decision, or actionable information that directly addresses the query. -- sufficient=false when: - - The memories only repeat the same question the user asked before (echo, not answer). - - The memories show related topics but lack the specific detail needed. - - The memories contain partial information that would benefit from full task context, timeline, or related skills. +Given a QUERY and CANDIDATE memories, decide: does each candidate's content contain information that would HELP ANSWER the query? -Output a JSON object with exactly two fields: -{"relevant":[1,3,5],"sufficient":true} +CORE QUESTION: "If I include this memory, will it help produce a better answer?" +- YES → include +- NO → exclude -- "relevant": array of candidate numbers that are useful. Empty array [] if none are relevant. -- "sufficient": true ONLY if the memories contain a direct answer; false otherwise. +RULES: +1. A candidate is relevant if its content provides facts, context, or data that directly supports answering the query. +2. A candidate that merely shares the same broad topic/domain but contains NO useful information for answering is NOT relevant. +3. If NO candidate can help answer the query, return {"relevant":[],"sufficient":false} — do NOT force-pick the "least irrelevant" one. -Output ONLY the JSON object, nothing else.`; +OUTPUT — JSON only: +{"relevant":[1,3],"sufficient":true} +- "relevant": candidate numbers whose content helps answer the query. [] if none can help. +- "sufficient": true only if the selected memories fully answer the query.`; import type { FilterResult } from "./openai"; export type { FilterResult } from "./openai"; export async function filterRelevantGemini( query: string, - candidates: Array<{ index: number; summary: string; role: string }>, + candidates: Array<{ index: number; role: string; content: string; time?: string }>, cfg: SummarizerConfig, log: Logger, ): Promise { @@ -186,7 +244,10 @@ export async function filterRelevantGemini( }; const candidateText = candidates - .map((c) => `${c.index}. [${c.role}] ${c.summary}`) + .map((c) => { + const timeTag = c.time ? ` (${c.time})` : ""; + return `${c.index}. [${c.role}]${timeTag}\n ${c.content}`; + }) .join("\n"); const resp = await fetch(url, { @@ -207,6 +268,7 @@ export async function filterRelevantGemini( const json = (await resp.json()) as { candidates: Array<{ content: { parts: Array<{ text: string }> } }> }; const raw = json.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? "{}"; + log.debug(`filterRelevant raw LLM response: "${raw}"`); return parseFilterResult(raw, log); } @@ -248,7 +310,7 @@ export async function summarizeGemini( headers, body: JSON.stringify({ systemInstruction: { parts: [{ text: SYSTEM_PROMPT }] }, - contents: [{ parts: [{ text }] }], + contents: [{ parts: [{ text: `[TEXT TO SUMMARIZE]\n${text}\n[/TEXT TO SUMMARIZE]` }] }], generationConfig: { temperature: cfg.temperature ?? 0, maxOutputTokens: 100 }, }), signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000), diff --git a/apps/memos-local-openclaw/src/ingest/providers/index.ts b/apps/memos-local-openclaw/src/ingest/providers/index.ts index 5c30bcf56..8b43f14b1 100644 --- a/apps/memos-local-openclaw/src/ingest/providers/index.ts +++ b/apps/memos-local-openclaw/src/ingest/providers/index.ts @@ -1,12 +1,12 @@ import * as fs from "fs"; import * as path from "path"; import type { SummarizerConfig, Logger } from "../../types"; -import { summarizeOpenAI, summarizeTaskOpenAI, judgeNewTopicOpenAI, filterRelevantOpenAI, judgeDedupOpenAI } from "./openai"; +import { summarizeOpenAI, summarizeTaskOpenAI, generateTaskTitleOpenAI, judgeNewTopicOpenAI, filterRelevantOpenAI, judgeDedupOpenAI } from "./openai"; import type { FilterResult, DedupResult } from "./openai"; export type { FilterResult, DedupResult } from "./openai"; -import { summarizeAnthropic, summarizeTaskAnthropic, judgeNewTopicAnthropic, filterRelevantAnthropic, judgeDedupAnthropic } from "./anthropic"; -import { summarizeGemini, summarizeTaskGemini, judgeNewTopicGemini, filterRelevantGemini, judgeDedupGemini } from "./gemini"; -import { summarizeBedrock, summarizeTaskBedrock, judgeNewTopicBedrock, filterRelevantBedrock, judgeDedupBedrock } from "./bedrock"; +import { summarizeAnthropic, summarizeTaskAnthropic, generateTaskTitleAnthropic, judgeNewTopicAnthropic, filterRelevantAnthropic, judgeDedupAnthropic } from "./anthropic"; +import { summarizeGemini, summarizeTaskGemini, generateTaskTitleGemini, judgeNewTopicGemini, filterRelevantGemini, judgeDedupGemini } from "./gemini"; +import { summarizeBedrock, summarizeTaskBedrock, generateTaskTitleBedrock, judgeNewTopicBedrock, filterRelevantBedrock, judgeDedupBedrock } from "./bedrock"; /** * Build a SummarizerConfig from OpenClaw's native model configuration (openclaw.json). @@ -53,6 +53,66 @@ function loadOpenClawFallbackConfig(log: Logger): SummarizerConfig | undefined { } } +// ─── Model Health Tracking ─── + +export interface ModelHealthEntry { + role: string; + status: "ok" | "degraded" | "error" | "unknown"; + lastSuccess: number | null; + lastError: number | null; + lastErrorMessage: string | null; + consecutiveErrors: number; + model: string | null; + failedModel: string | null; +} + +class ModelHealthTracker { + private state = new Map(); + private pendingErrors = new Map(); + + recordSuccess(role: string, model: string): void { + const entry = this.getOrCreate(role); + const pending = this.pendingErrors.get(role); + if (pending) { + entry.status = "degraded"; + entry.lastError = Date.now(); + entry.lastErrorMessage = pending.error.length > 300 ? pending.error.slice(0, 300) + "..." : pending.error; + entry.failedModel = pending.model; + this.pendingErrors.delete(role); + } else { + entry.status = "ok"; + } + entry.lastSuccess = Date.now(); + entry.consecutiveErrors = 0; + entry.model = model; + } + + recordError(role: string, model: string, error: string): void { + const entry = this.getOrCreate(role); + entry.lastError = Date.now(); + entry.lastErrorMessage = error.length > 300 ? error.slice(0, 300) + "..." : error; + entry.consecutiveErrors++; + entry.failedModel = model; + entry.status = "error"; + this.pendingErrors.set(role, { model, error: entry.lastErrorMessage }); + } + + getAll(): ModelHealthEntry[] { + return [...this.state.values()]; + } + + private getOrCreate(role: string): ModelHealthEntry { + let entry = this.state.get(role); + if (!entry) { + entry = { role, status: "unknown", lastSuccess: null, lastError: null, lastErrorMessage: null, consecutiveErrors: 0, model: null, failedModel: null }; + this.state.set(role, entry); + } + return entry; + } +} + +export const modelHealth = new ModelHealthTracker(); + export class Summarizer { private strongCfg: SummarizerConfig | undefined; private fallbackCfg: SummarizerConfig | undefined; @@ -88,24 +148,68 @@ export class Summarizer { ): Promise { const chain = this.getConfigChain(); for (let i = 0; i < chain.length; i++) { + const modelInfo = `${chain[i].provider}/${chain[i].model ?? "?"}`; try { - return await fn(chain[i]); + const result = await fn(chain[i]); + modelHealth.recordSuccess(label, modelInfo); + return result; } catch (err) { const level = i < chain.length - 1 ? "warn" : "error"; - const modelInfo = `${chain[i].provider}/${chain[i].model ?? "?"}`; this.log[level](`${label} failed (${modelInfo}), ${i < chain.length - 1 ? "trying next" : "no more fallbacks"}: ${err}`); + modelHealth.recordError(label, modelInfo, String(err)); } } return undefined; } async summarize(text: string): Promise { + const cleaned = stripMarkdown(text).trim(); + + if (wordCount(cleaned) <= 10) { + return cleaned; + } + if (!this.cfg && !this.fallbackCfg) { - return ruleFallback(text); + return ruleFallback(cleaned); } - const result = await this.tryChain("summarize", (cfg) => callSummarize(cfg, text, this.log)); - return result ?? ruleFallback(text); + const accept = (s: string | undefined): s is string => + !!s && s.length > 0 && s.length < cleaned.length; + + let llmCalled = false; + try { + const result = await this.tryChain("summarize", (cfg) => callSummarize(cfg, text, this.log)); + llmCalled = true; + const resultCleaned = result ? stripMarkdown(result).trim() : undefined; + + if (accept(resultCleaned)) { + return resultCleaned; + } + + if (resultCleaned !== undefined && resultCleaned !== null) { + const len: number = (resultCleaned as string).length; + this.log.warn(`summarize: result (${len}) >= input (${cleaned.length}), retrying`); + } + } catch (err) { + this.log.warn(`summarize primary failed: ${err}`); + } + + const fallback = this.fallbackCfg ?? this.cfg; + if (fallback) { + try { + const retry = await callSummarize(fallback, text, this.log); + llmCalled = true; + const retryCleaned = retry ? stripMarkdown(retry).trim() : undefined; + if (accept(retryCleaned)) { + modelHealth.recordSuccess("summarize", `${fallback.provider}/${fallback.model ?? "?"}`); + return retryCleaned; + } + } catch (err) { + this.log.warn(`summarize fallback retry failed: ${err}`); + } + } + + return llmCalled ? cleaned : ruleFallback(cleaned); } async summarizeTask(text: string): Promise { @@ -117,16 +221,37 @@ export class Summarizer { return result ?? taskFallback(text); } + async generateTaskTitle(text: string): Promise { + if (!this.cfg && !this.fallbackCfg) return ""; + const result = await this.tryChain("generateTaskTitle", (cfg) => callGenerateTaskTitle(cfg, text, this.log)); + return result ?? ""; + } + async judgeNewTopic(currentContext: string, newMessage: string): Promise { - if (!this.cfg && !this.fallbackCfg) return null; + const chain: SummarizerConfig[] = []; + if (this.strongCfg) chain.push(this.strongCfg); + if (this.fallbackCfg) chain.push(this.fallbackCfg); + if (chain.length === 0 && this.cfg) chain.push(this.cfg); + if (chain.length === 0) return null; - const result = await this.tryChain("judgeNewTopic", (cfg) => callTopicJudge(cfg, currentContext, newMessage, this.log)); - return result ?? null; + for (let i = 0; i < chain.length; i++) { + const modelInfo = `${chain[i].provider}/${chain[i].model ?? "?"}`; + try { + const result = await callTopicJudge(chain[i], currentContext, newMessage, this.log); + modelHealth.recordSuccess("judgeNewTopic", modelInfo); + return result; + } catch (err) { + const level = i < chain.length - 1 ? "warn" : "error"; + this.log[level](`judgeNewTopic failed (${modelInfo}), ${i < chain.length - 1 ? "trying next" : "no more fallbacks"}: ${err}`); + modelHealth.recordError("judgeNewTopic", modelInfo, String(err)); + } + } + return null; } async filterRelevant( query: string, - candidates: Array<{ index: number; summary: string; role: string }>, + candidates: Array<{ index: number; role: string; content: string; time?: string }>, ): Promise { if (!this.cfg && !this.fallbackCfg) return null; if (candidates.length === 0) return { relevant: [], sufficient: true }; @@ -158,6 +283,12 @@ function callSummarize(cfg: SummarizerConfig, text: string, log: Logger): Promis case "openai": case "openai_compatible": case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": return summarizeOpenAI(text, cfg, log); case "anthropic": return summarizeAnthropic(text, cfg, log); @@ -175,6 +306,12 @@ function callSummarizeTask(cfg: SummarizerConfig, text: string, log: Logger): Pr case "openai": case "openai_compatible": case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": return summarizeTaskOpenAI(text, cfg, log); case "anthropic": return summarizeTaskAnthropic(text, cfg, log); @@ -187,11 +324,40 @@ function callSummarizeTask(cfg: SummarizerConfig, text: string, log: Logger): Pr } } +function callGenerateTaskTitle(cfg: SummarizerConfig, text: string, log: Logger): Promise { + switch (cfg.provider) { + case "openai": + case "openai_compatible": + case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": + return generateTaskTitleOpenAI(text, cfg, log); + case "anthropic": + return generateTaskTitleAnthropic(text, cfg, log); + case "gemini": + return generateTaskTitleGemini(text, cfg, log); + case "bedrock": + return generateTaskTitleBedrock(text, cfg, log); + default: + throw new Error(`Unknown summarizer provider: ${cfg.provider}`); + } +} + function callTopicJudge(cfg: SummarizerConfig, currentContext: string, newMessage: string, log: Logger): Promise { switch (cfg.provider) { case "openai": case "openai_compatible": case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": return judgeNewTopicOpenAI(currentContext, newMessage, cfg, log); case "anthropic": return judgeNewTopicAnthropic(currentContext, newMessage, cfg, log); @@ -204,11 +370,17 @@ function callTopicJudge(cfg: SummarizerConfig, currentContext: string, newMessag } } -function callFilterRelevant(cfg: SummarizerConfig, query: string, candidates: Array<{ index: number; summary: string; role: string }>, log: Logger): Promise { +function callFilterRelevant(cfg: SummarizerConfig, query: string, candidates: Array<{ index: number; role: string; content: string; time?: string }>, log: Logger): Promise { switch (cfg.provider) { case "openai": case "openai_compatible": case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": return filterRelevantOpenAI(query, candidates, cfg, log); case "anthropic": return filterRelevantAnthropic(query, candidates, cfg, log); @@ -226,6 +398,12 @@ function callJudgeDedup(cfg: SummarizerConfig, newSummary: string, candidates: A case "openai": case "openai_compatible": case "azure_openai": + case "zhipu": + case "siliconflow": + case "bailian": + case "cohere": + case "mistral": + case "voyage": return judgeDedupOpenAI(newSummary, candidates, cfg, log); case "anthropic": return judgeDedupAnthropic(newSummary, candidates, cfg, log); @@ -240,26 +418,34 @@ function callJudgeDedup(cfg: SummarizerConfig, newSummary: string, candidates: A // ─── Fallbacks ─── +function ruleFallback(text: string): string { + const lines = text.split("\n").filter((l) => l.trim().length > 5); + return (lines[0] ?? text).trim(); +} + function taskFallback(text: string): string { const lines = text.split("\n").filter((l) => l.trim().length > 10); return lines.slice(0, 30).join("\n").slice(0, 2000); } -function ruleFallback(text: string): string { - const lines = text.split("\n").filter((l) => l.trim().length > 10); - const first = (lines[0] ?? text).trim(); - - const entityRe = [/`[^`]+`/g, /\b(?:error|Error|ERROR)\s*[::]\s*.{5,60}/g]; - const entities: string[] = []; - for (const re of entityRe) { - for (const m of text.matchAll(re)) { - if (entities.length < 3) entities.push(m[0].slice(0, 50)); - } - } +function stripMarkdown(text: string): string { + return text + .replace(/\*\*([^*]+)\*\*/g, "$1") + .replace(/\*([^*]+)\*/g, "$1") + .replace(/^#{1,6}\s+/gm, "") + .replace(/`([^`]+)`/g, "$1") + .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") + .trim(); +} - let summary = first.length > 120 ? first.slice(0, 117) + "..." : first; - if (entities.length > 0) { - summary += ` (${entities.join(", ")})`; - } - return summary.slice(0, 200); +/** Count "words": CJK characters count as 1 word each, latin words separated by spaces. */ +function wordCount(text: string): number { + let count = 0; + const cjk = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff]/g; + const cjkMatches = text.match(cjk); + if (cjkMatches) count += cjkMatches.length; + const noCjk = text.replace(cjk, " ").trim(); + if (noCjk) count += noCjk.split(/\s+/).filter(Boolean).length; + return count; } + diff --git a/apps/memos-local-openclaw/src/ingest/providers/openai.ts b/apps/memos-local-openclaw/src/ingest/providers/openai.ts index abf8dfb62..e19ecb6c8 100644 --- a/apps/memos-local-openclaw/src/ingest/providers/openai.ts +++ b/apps/memos-local-openclaw/src/ingest/providers/openai.ts @@ -1,20 +1,35 @@ import type { SummarizerConfig, Logger } from "../../types"; -const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 120 characters). IMPORTANT: Use the SAME language as the input text — if the input is Chinese, write Chinese; if English, write English. Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`; +const SYSTEM_PROMPT = `You generate a retrieval-friendly title. + +Return exactly one noun phrase that names the topic AND its key details. + +Requirements: +- Same language as input +- Keep proper nouns, API/function names, specific parameters, versions, error codes +- Include WHO/WHAT/WHERE details when present (e.g. person name + event, tool name + what it does) +- Prefer concrete topic words over generic words +- No verbs unless unavoidable +- No generic endings like: + 功能说明、使用说明、简介、介绍、用途、summary、overview、basics +- Chinese: 10-50 characters (aim for 15-30) +- Non-Chinese: 5-15 words (aim for 8-12) +- Output title only`; const TASK_SUMMARY_PROMPT = `You create a DETAILED task summary from a multi-turn conversation. This summary will be the ONLY record of this conversation, so it must preserve ALL important information. -CRITICAL LANGUAGE RULE: You MUST write in the SAME language as the user's messages. Chinese input → Chinese output. English input → English output. NEVER mix languages. +## LANGUAGE RULE (HIGHEST PRIORITY) +Detect the PRIMARY language of the user's messages. If most user messages are Chinese, ALL output (title, goal, steps, result, details) MUST be in Chinese. If English, output in English. NEVER mix. This rule overrides everything below. Output EXACTLY this structure: -📌 Title -A short, descriptive title (10-30 characters). Like a chat group name. +📌 Title / 标题 +A short, descriptive title (10-30 characters). Same language as user messages. -🎯 Goal +🎯 Goal / 目标 One sentence: what the user wanted to accomplish. -📋 Key Steps +📋 Key Steps / 关键步骤 - Describe each meaningful step in detail - Include the ACTUAL content produced: code snippets, commands, config blocks, formulas, key paragraphs - For code: include the function signature and core logic (up to ~30 lines per block), use fenced code blocks @@ -23,10 +38,10 @@ One sentence: what the user wanted to accomplish. - Merge only truly trivial back-and-forth (like "ok" / "sure") - Do NOT over-summarize: "provided a function" is BAD; show the actual function -✅ Result +✅ Result / 结果 What was the final outcome? Include the final version of any code/config/content produced. -💡 Key Details +💡 Key Details / 关键细节 - Decisions made, trade-offs discussed, caveats noted, alternative approaches mentioned - Specific values: numbers, versions, thresholds, URLs, file paths, model names - Omit this section only if there truly are no noteworthy details @@ -76,6 +91,55 @@ export async function summarizeTaskOpenAI( return json.choices[0]?.message?.content?.trim() ?? ""; } +const TASK_TITLE_PROMPT = `Generate a short title for a conversation task. + +Input: the first few user messages from a conversation. +Output: a concise title (5-20 characters for Chinese, 3-8 words for English). + +Rules: +- Same language as user messages +- Describe WHAT the user wanted to do, not system/technical details +- Ignore system prompts, session startup messages, or boilerplate instructions — focus on the user's actual intent +- If the user only asked one question, use that question as the title (shortened if needed) +- Output the title only, no quotes, no prefix, no explanation`; + +export async function generateTaskTitleOpenAI( + text: string, + cfg: SummarizerConfig, + log: Logger, +): Promise { + const endpoint = normalizeChatEndpoint(cfg.endpoint ?? "https://api.openai.com/v1/chat/completions"); + const model = cfg.model ?? "gpt-4o-mini"; + const headers: Record = { + "Content-Type": "application/json", + Authorization: `Bearer ${cfg.apiKey}`, + ...cfg.headers, + }; + + const resp = await fetch(endpoint, { + method: "POST", + headers, + body: JSON.stringify({ + model, + temperature: 0, + max_tokens: 100, + messages: [ + { role: "system", content: TASK_TITLE_PROMPT }, + { role: "user", content: text }, + ], + }), + signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`OpenAI task-title failed (${resp.status}): ${body}`); + } + + const json = (await resp.json()) as { choices: Array<{ message: { content: string } }> }; + return json.choices[0]?.message?.content?.trim() ?? ""; +} + export async function summarizeOpenAI( text: string, cfg: SummarizerConfig, @@ -97,7 +161,7 @@ export async function summarizeOpenAI( temperature: cfg.temperature ?? 0, messages: [ { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: text }, + { role: "user", content: `[TEXT TO SUMMARIZE]\n${text}\n[/TEXT TO SUMMARIZE]` }, ], }), signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000), @@ -114,7 +178,7 @@ export async function summarizeOpenAI( return json.choices[0]?.message?.content?.trim() ?? ""; } -const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context (may include opening topic + recent exchanges) and a single NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. +const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given the CURRENT task context and a NEW user message, decide if the new message belongs to the SAME task or starts a NEW one. Answer ONLY "NEW" or "SAME". @@ -122,22 +186,21 @@ SAME — the new message: - Continues, follows up on, refines, or corrects the same subject/project/task - Asks a clarification or next-step question about what was just discussed - Reports a result, error, or feedback about the current task -- Discusses different tools, methods, or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT → via AI tools = all SAME "learning English" task) -- Mentions a related technology or platform in the context of the current goal -- Is a short acknowledgment (ok, thanks, 好的, 嗯) in direct response to the current flow +- Discusses different tools or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT = SAME) +- Is a short acknowledgment (ok, thanks, 好的) in response to the current flow NEW — the new message: -- Introduces a clearly UNRELATED subject with NO logical connection to the current task -- The topic has ZERO overlap with any aspect of the current conversation (e.g., from "learning English" to "what's the weather tomorrow") -- Starts a request about a completely different domain or life area +- Introduces a subject from a DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel) +- Has NO logical connection to what was being discussed +- Starts a request about a different project, system, or life area - Begins with a new greeting/reset followed by a different topic Key principles: -- STRONGLY lean toward SAME — only mark NEW for obvious, unambiguous topic shifts -- Different aspects, tools, or methods related to the same overall goal are SAME -- If the new message could reasonably be interpreted as part of the ongoing discussion, choose SAME -- Only choose NEW when there is absolutely no thematic connection to the current task -- Examples: "学英语" → "用AI工具学英语" = SAME; "学英语" → "明天天气" = NEW +- If the topic domain clearly changed (e.g., server config → recipe, code review → vacation plan), choose NEW +- Different aspects of the SAME project/system are SAME (e.g., Nginx SSL → Nginx gzip = SAME) +- Different unrelated technologies discussed independently are NEW (e.g., Redis config → cooking recipe = NEW) +- When unsure, lean toward SAME for closely related topics, but do NOT hesitate to mark NEW for obvious domain shifts +- Examples: "配置Nginx" → "加gzip压缩" = SAME; "配置Nginx" → "做红烧肉" = NEW; "MySQL配置" → "K8s部署" in same infra project = SAME; "部署服务器" → "年会安排" = NEW Output exactly one word: NEW or SAME`; @@ -183,27 +246,23 @@ export async function judgeNewTopicOpenAI( return answer.startsWith("NEW"); } -const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. Given a user's QUERY and a list of CANDIDATE memory summaries, do two things: +const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. -1. Select ALL candidates that could be useful for answering the query. When in doubt, INCLUDE the candidate. - - For questions about lists, history, or "what/where/who" across multiple items (e.g. "which companies did I work at"), include ALL matching items — do NOT stop at the first match. - - For factual lookups (e.g. "what is the SSH port"), a single direct answer is enough. -2. Judge whether the selected memories are SUFFICIENT to fully answer the query WITHOUT fetching additional context. +Given a QUERY and CANDIDATE memories, decide: does each candidate's content contain information that would HELP ANSWER the query? -IMPORTANT for "sufficient" judgment: -- sufficient=true ONLY when the memories contain a concrete ANSWER, fact, decision, or actionable information that directly addresses the query. -- sufficient=false when: - - The memories only repeat the same question the user asked before (echo, not answer). - - The memories show related topics but lack the specific detail needed. - - The memories contain partial information that would benefit from full task context, timeline, or related skills. +CORE QUESTION: "If I include this memory, will it help produce a better answer?" +- YES → include +- NO → exclude -Output a JSON object with exactly two fields: -{"relevant":[1,3,5],"sufficient":true} - -- "relevant": array of candidate numbers that are useful. Empty array [] if none are relevant. -- "sufficient": true ONLY if the memories contain a direct answer; false otherwise. +RULES: +1. A candidate is relevant if its content provides facts, context, or data that directly supports answering the query. +2. A candidate that merely shares the same broad topic/domain but contains NO useful information for answering is NOT relevant. +3. If NO candidate can help answer the query, return {"relevant":[],"sufficient":false} — do NOT force-pick the "least irrelevant" one. -Output ONLY the JSON object, nothing else.`; +OUTPUT — JSON only: +{"relevant":[1,3],"sufficient":true} +- "relevant": candidate numbers whose content helps answer the query. [] if none can help. +- "sufficient": true only if the selected memories fully answer the query.`; export interface FilterResult { relevant: number[]; @@ -212,7 +271,7 @@ export interface FilterResult { export async function filterRelevantOpenAI( query: string, - candidates: Array<{ index: number; summary: string; role: string }>, + candidates: Array<{ index: number; role: string; content: string; time?: string }>, cfg: SummarizerConfig, log: Logger, ): Promise { @@ -225,7 +284,10 @@ export async function filterRelevantOpenAI( }; const candidateText = candidates - .map((c) => `${c.index}. [${c.role}] ${c.summary}`) + .map((c) => { + const timeTag = c.time ? ` (${c.time})` : ""; + return `${c.index}. [${c.role}]${timeTag}\n ${c.content}`; + }) .join("\n"); const resp = await fetch(endpoint, { @@ -250,6 +312,7 @@ export async function filterRelevantOpenAI( const json = (await resp.json()) as { choices: Array<{ message: { content: string } }> }; const raw = json.choices[0]?.message?.content?.trim() ?? "{}"; + log.debug(`filterRelevant raw LLM response: "${raw}"`); return parseFilterResult(raw, log); } diff --git a/apps/memos-local-openclaw/src/ingest/task-processor.ts b/apps/memos-local-openclaw/src/ingest/task-processor.ts index 494f0ddd0..9cd091055 100644 --- a/apps/memos-local-openclaw/src/ingest/task-processor.ts +++ b/apps/memos-local-openclaw/src/ingest/task-processor.ts @@ -310,9 +310,10 @@ export class TaskProcessor { const skipReason = this.shouldSkipSummary(chunks); if (skipReason) { - this.ctx.log.info(`Task ${task.id} skipped: ${skipReason} (chunks=${chunks.length}, title="${fallbackTitle}")`); + const skipTitle = await this.generateTitle(chunks, fallbackTitle); + this.ctx.log.info(`Task ${task.id} skipped: ${skipReason} (chunks=${chunks.length}, title="${skipTitle}")`); const reason = this.humanReadableSkipReason(skipReason, chunks); - this.store.updateTask(task.id, { title: fallbackTitle, summary: reason, status: "skipped", endedAt: Date.now() }); + this.store.updateTask(task.id, { title: skipTitle, summary: reason, status: "skipped", endedAt: Date.now() }); return; } @@ -326,7 +327,7 @@ export class TaskProcessor { } const { title: llmTitle, body } = this.parseTitleFromSummary(summary); - const title = llmTitle || fallbackTitle; + const title = llmTitle || await this.generateTitle(chunks, fallbackTitle); this.store.updateTask(task.id, { title, @@ -455,19 +456,39 @@ export class TaskProcessor { private parseTitleFromSummary(summary: string): { title: string; body: string } { const titleMatch = summary.match(/📌\s*(?:Title|标题)\s*\n(.+)/); if (titleMatch) { - const title = titleMatch[1].trim().slice(0, 80); + const title = titleMatch[1].trim(); const body = summary.replace(/📌\s*(?:Title|标题)\s*\n.+\n?/, "").trim(); return { title, body }; } return { title: "", body: summary }; } + private async generateTitle(chunks: Chunk[], fallback: string): Promise { + try { + const userChunks = chunks.filter((c) => c.role === "user"); + const titleInput = userChunks + .slice(0, 3) + .map((c) => c.content.trim()) + .join("\n\n"); + if (!titleInput) return fallback || "Untitled Task"; + const title = await this.summarizer.generateTaskTitle(titleInput); + return title || fallback || "Untitled Task"; + } catch (err) { + this.ctx.log.warn(`generateTitle failed: ${err}`); + return fallback || "Untitled Task"; + } + } + private extractTitle(chunks: Chunk[]): string { - const firstUser = chunks.find((c) => c.role === "user"); + const firstUser = chunks.find((c) => { + if (c.role !== "user") return false; + const t = c.content.trim(); + if (t.length > 200) return false; + if (/session.startup|Session Startup|\/new|\/reset/i.test(t)) return false; + return true; + }); if (!firstUser) return "Untitled Task"; - const text = firstUser.content.trim(); - if (text.length <= 60) return text; - return text.slice(0, 57) + "..."; + return firstUser.content.trim().slice(0, 80); } private humanReadableSkipReason(reason: string, chunks: Chunk[]): string { diff --git a/apps/memos-local-openclaw/src/ingest/worker.ts b/apps/memos-local-openclaw/src/ingest/worker.ts index 333c86a54..55d1718b9 100644 --- a/apps/memos-local-openclaw/src/ingest/worker.ts +++ b/apps/memos-local-openclaw/src/ingest/worker.ts @@ -19,8 +19,7 @@ export class IngestWorker { private embedder: Embedder, private ctx: PluginContext, ) { - const strongCfg = ctx.config.skillEvolution?.summarizer; - this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log, strongCfg); + this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log); this.taskProcessor = new TaskProcessor(store, ctx); } @@ -60,11 +59,11 @@ export class IngestWorker { let duplicated = 0; let errors = 0; const resultLines: string[] = []; - const inputLines: string[] = []; + const inputDetails: Array<{ role: string; content: string }> = []; while (this.queue.length > 0) { const msg = this.queue.shift()!; - inputLines.push(`[${msg.role}] ${msg.content}`); + inputDetails.push({ role: msg.role, content: msg.content }); try { const result = await this.ingestMessage(msg); lastSessionKey = msg.sessionKey; @@ -72,20 +71,20 @@ export class IngestWorker { lastTimestamp = Math.max(lastTimestamp, msg.timestamp); if (result === "skipped") { skipped++; - resultLines.push(`[${msg.role}] ⏭ exact-dup → ${msg.content}`); + resultLines.push(JSON.stringify({ role: msg.role, action: "exact-dup", summary: "", content: msg.content })); } else if (result.action === "stored") { stored++; - resultLines.push(`[${msg.role}] ✅ stored → ${result.summary ?? msg.content}`); + resultLines.push(JSON.stringify({ role: msg.role, action: "stored", summary: result.summary ?? "", content: msg.content })); } else if (result.action === "duplicate") { duplicated++; - resultLines.push(`[${msg.role}] 🔁 dedup(${result.reason ?? "similar"}) → ${msg.content}`); + resultLines.push(JSON.stringify({ role: msg.role, action: "dedup", reason: result.reason ?? "similar", summary: result.summary ?? "", content: msg.content })); } else if (result.action === "merged") { merged++; - resultLines.push(`[${msg.role}] 🔀 merged → ${msg.content}`); + resultLines.push(JSON.stringify({ role: msg.role, action: "merged", summary: result.summary ?? "", content: msg.content })); } } catch (err) { errors++; - resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`); + resultLines.push(JSON.stringify({ role: msg.role, action: "error", summary: "", content: msg.content })); this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`); } } @@ -98,7 +97,7 @@ export class IngestWorker { const inputInfo = { session: lastSessionKey, messages: batchSize, - details: inputLines, + details: inputDetails, }; const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", "); this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0); @@ -124,13 +123,7 @@ export class IngestWorker { private async ingestMessage(msg: ConversationMessage): Promise< "skipped" | { action: "stored" | "duplicate" | "merged"; summary?: string; reason?: string } > { - if (this.store.chunkExistsByContent(msg.sessionKey, msg.role, msg.content)) { - this.ctx.log.debug(`Exact-dup (same session+role+hash), skipping: session=${msg.sessionKey} role=${msg.role} len=${msg.content.length}`); - return "skipped"; - } - - const kind = msg.role === "tool" ? "tool_result" : "paragraph"; - return await this.storeChunk(msg, msg.content, kind, 0); + return await this.storeChunk(msg, msg.content, "paragraph", 0); } private async storeChunk( @@ -153,6 +146,8 @@ export class IngestWorker { let dedupTarget: string | null = null; let dedupReason: string | null = null; let mergedFromOld: string | null = null; + let mergeCount = 0; + let mergeHistory = "[]"; // Fast path: exact content_hash match within same owner (agent dimension) const chunkOwner = msg.owner ?? "agent:main"; @@ -167,7 +162,7 @@ export class IngestWorker { // Smart dedup: find Top-5 similar chunks, then ask LLM to judge if (dedupStatus === "active" && embedding) { - const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.60; + const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.80; const dedupOwnerFilter = msg.owner ? [msg.owner] : undefined; const topSimilar = findTopSimilar(this.store, embedding, similarThreshold, 5, this.ctx.log, dedupOwnerFilter); @@ -215,7 +210,23 @@ export class IngestWorker { mergedFromOld = targetChunkId; dedupReason = dedupResult.reason; - this.ctx.log.debug(`Smart dedup: UPDATE → old chunk=${targetChunkId} retired, new chunk=${chunkId} gets merged summary, reason: ${dedupResult.reason}`); + + // Inherit merge history from the old chunk + if (oldChunk) { + const oldHistory = JSON.parse(oldChunk.mergeHistory || "[]"); + oldHistory.push({ + action: "merge", + at: Date.now(), + reason: dedupResult.reason, + from: oldSummary, + to: dedupResult.mergedSummary, + sourceChunkId: targetChunkId, + }); + mergeHistory = JSON.stringify(oldHistory); + mergeCount = (oldChunk.mergeCount || 0) + 1; + } + + this.ctx.log.debug(`Smart dedup: UPDATE → old chunk=${targetChunkId} retired, new chunk=${chunkId} gets merged summary (mergeCount=${mergeCount}), reason: ${dedupResult.reason}`); } } @@ -242,9 +253,9 @@ export class IngestWorker { dedupStatus, dedupTarget, dedupReason, - mergeCount: 0, + mergeCount: mergeCount, lastHitAt: null, - mergeHistory: "[]", + mergeHistory: mergeHistory, createdAt: msg.timestamp, updatedAt: msg.timestamp, }; diff --git a/apps/memos-local-openclaw/src/recall/engine.ts b/apps/memos-local-openclaw/src/recall/engine.ts index 70a6d0a67..375c9cb65 100644 --- a/apps/memos-local-openclaw/src/recall/engine.ts +++ b/apps/memos-local-openclaw/src/recall/engine.ts @@ -42,7 +42,7 @@ export class RecallEngine { const candidatePool = maxResults * 5; const ownerFilter = opts.ownerFilter; - // Step 1: Gather candidates from both FTS and vector search + // Step 1: Gather candidates from FTS, vector search, and pattern search const ftsCandidates = query ? this.store.ftsSearch(query, candidatePool, ownerFilter) : []; @@ -60,10 +60,24 @@ export class RecallEngine { } } + // Step 1b: Pattern search (LIKE-based) as fallback for short terms that + // trigram FTS cannot match (trigram requires >= 3 chars). + const shortTerms = query + .replace(/[."""(){}[\]*:^~!@#$%&\\/<>,;'`??。,!、:""''()【】《》]/g, " ") + .split(/\s+/) + .filter((t) => t.length === 2); + const patternHits = shortTerms.length > 0 + ? this.store.patternSearch(shortTerms, { limit: candidatePool }) + : []; + const patternRanked = patternHits.map((h, i) => ({ + id: h.chunkId, + score: 1 / (i + 1), + })); + // Step 2: RRF fusion const ftsRanked = ftsCandidates.map((c) => ({ id: c.chunkId, score: c.score })); const vecRanked = vecCandidates.map((c) => ({ id: c.chunkId, score: c.score })); - const rrfScores = rrfFuse([ftsRanked, vecRanked], recallCfg.rrfK); + const rrfScores = rrfFuse([ftsRanked, vecRanked, patternRanked], recallCfg.rrfK); if (rrfScores.size === 0) { this.recordQuery(query, maxResults, minScore, 0); @@ -118,9 +132,10 @@ export class RecallEngine { if (!chunk) continue; if (roleFilter && chunk.role !== roleFilter) continue; + const excerpt = (chunk.mergeCount ?? 0) > 0 ? chunk.summary : makeExcerpt(chunk.content); hits.push({ summary: chunk.summary, - original_excerpt: makeExcerpt(chunk.content), + original_excerpt: excerpt, ref: { sessionKey: chunk.sessionKey, chunkId: chunk.id, @@ -255,8 +270,8 @@ export class RecallEngine { ): Promise { const candidateList = candidates.map((c, i) => ({ index: i, - summary: `[${c.skill.name}] ${c.skill.description}`, role: "skill" as const, + content: `[${c.skill.name}] ${c.skill.description}`, })); try { @@ -274,13 +289,5 @@ export class RecallEngine { } function makeExcerpt(content: string): string { - const min = 200; - const max = 500; - if (content.length <= max) return content; - - let cut = content.lastIndexOf(".", max); - if (cut < min) cut = content.lastIndexOf(" ", max); - if (cut < min) cut = max; - - return content.slice(0, cut) + "…"; + return content; } diff --git a/apps/memos-local-openclaw/src/skill/bundled-memory-guide.ts b/apps/memos-local-openclaw/src/skill/bundled-memory-guide.ts index 07d6f7c9f..3efb787d1 100644 --- a/apps/memos-local-openclaw/src/skill/bundled-memory-guide.ts +++ b/apps/memos-local-openclaw/src/skill/bundled-memory-guide.ts @@ -1,91 +1,9 @@ /** * Bundled MemOS memory-guide skill content. - * Written to workspace/skills/memos-memory-guide on plugin register so OpenClaw loads it. + * Reads from skill/memos-memory-guide/SKILL.md at runtime (single source of truth). */ -export const MEMORY_GUIDE_SKILL_MD = `--- -name: memos-memory-guide -description: Use the MemOS Local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search. Use task_summary when you need full task context, skill_get for experience guides, and memory_timeline to expand around a memory hit. ---- +import * as fs from "fs"; +import * as path from "path"; -# MemOS Local Memory — Agent Guide - -This skill describes how to use the MemOS memory tools so you can reliably search and use the user's long-term conversation history. - -## How memory is provided each turn - -- **Automatic recall (hook):** At the start of each turn, the system runs a memory search using the user's current message and injects relevant past memories into your context. You do not need to call any tool for that. -- **When that is not enough:** If the user's message is very long, vague, or the automatic search returns **no memories**, you should **generate your own short, focused query** and call \`memory_search\` yourself. For example: - - User sent a long paragraph → extract 1–2 key topics or a short question and search with that. - - Auto-recall said "no memories" or you see no memory block → call \`memory_search\` with a query you derive (e.g. the user's name, a topic they often mention, or a rephrased question). -- **When you need more detail:** Search results only give excerpts and IDs. Use the tools below to fetch full task context, skill content, or surrounding messages. - -## Tools — what they do and when to call - -### memory_search - -- **What it does:** Searches the user's stored conversation memory by a natural-language query. Returns a list of relevant excerpts with \`chunkId\` and optionally \`task_id\`. -- **When to call:** - - The automatic recall did not run or returned nothing (e.g. no \`\` block, or a note that no memories were found). - - The user's query is long or unclear — **generate a short query yourself** (keywords, rephrased question, or a clear sub-question) and call \`memory_search(query="...")\`. - - You need to search with a different angle (e.g. filter by \`role='user'\` to find what the user said, or use a more specific query). -- **Parameters:** \`query\` (required), optional \`minScore\`, \`role\` (e.g. \`"user"\`). -- **Output:** List of items with role, excerpt, \`chunkId\`, and sometimes \`task_id\`. Use those IDs with the tools below when you need more context. - -### task_summary - -- **What it does:** Returns the full task summary for a given \`task_id\`: title, status, and the complete narrative summary of that conversation task (steps, decisions, URLs, commands, etc.). -- **When to call:** A \`memory_search\` hit included a \`task_id\` and you need the full story of that task (e.g. what was done, what the user decided, what failed or succeeded). -- **Parameters:** \`taskId\` (from a search hit). -- **Effect:** You get one coherent summary of the whole task instead of isolated excerpts. - -### skill_get - -- **What it does:** Returns the content of a learned skill (experience guide) by \`skillId\` or by \`taskId\`. If you pass \`taskId\`, the system finds the skill linked to that task. -- **When to call:** A search hit has a \`task_id\` and the task is the kind that has a "how to do this again" guide (e.g. a workflow the user has run before). Use this to follow the same approach or reuse steps. -- **Parameters:** \`skillId\` (direct) or \`taskId\` (lookup). -- **Effect:** You receive the full SKILL.md-style guide. You can then call \`skill_install(skillId)\` if the user or you want that skill loaded for future turns. - -### skill_install - -- **What it does:** Installs a skill (by \`skillId\`) into the workspace so it is loaded in future sessions. -- **When to call:** After \`skill_get\` when the skill is useful for ongoing use (e.g. the user's recurring workflow). Optional; only when you want the skill to be permanently available. -- **Parameters:** \`skillId\`. - -### memory_timeline - -- **What it does:** Expands context around a single memory chunk: returns the surrounding conversation messages (±N turns) so you see what was said before and after that excerpt. -- **When to call:** A \`memory_search\` hit is relevant but you need the surrounding dialogue (e.g. who said what next, or the exact follow-up question). -- **Parameters:** \`chunkId\` (from a search hit), optional \`window\` (default 2). -- **Effect:** You get a short, linear slice of the conversation around that chunk. - -### memory_viewer - -- **What it does:** Returns the URL of the MemOS Memory Viewer (web UI) where the user can browse, search, and manage their memories. -- **When to call:** The user asks how to view their memories, open the memory dashboard, or manage stored data. -- **Parameters:** None. -- **Effect:** You can tell the user to open that URL in a browser. - -## Quick decision flow - -1. **No memories in context or auto-recall reported nothing** - → Call \`memory_search\` with a **self-generated short query** (e.g. key topic or rephrased question). - -2. **Search returned hits with \`task_id\` and you need full context** - → Call \`task_summary(taskId)\`. - -3. **Task has an experience guide you want to follow** - → Call \`skill_get(taskId=...)\` (or \`skill_get(skillId=...)\` if you have the id). Optionally \`skill_install(skillId)\` for future use. - -4. **You need the exact surrounding conversation of a hit** - → Call \`memory_timeline(chunkId=...)\`. - -5. **User asks where to see or manage their memories** - → Call \`memory_viewer()\` and share the URL. - -## Writing good search queries - -- Prefer **short, focused** queries (a few words or one clear question). -- Use **concrete terms**: names, topics, tools, or decisions (e.g. "preferred editor", "deploy script", "API key setup"). -- If the user's message is long, **derive one or two sub-queries** rather than pasting the whole message. -- Use \`role='user'\` when you specifically want to find what the user said (e.g. preferences, past questions). -`; +const skillPath = path.join(__dirname, "..", "..", "skill", "memos-memory-guide", "SKILL.md"); +export const MEMORY_GUIDE_SKILL_MD: string = fs.readFileSync(skillPath, "utf-8"); diff --git a/apps/memos-local-openclaw/src/skill/evaluator.ts b/apps/memos-local-openclaw/src/skill/evaluator.ts index dd58ffeb4..0050af807 100644 --- a/apps/memos-local-openclaw/src/skill/evaluator.ts +++ b/apps/memos-local-openclaw/src/skill/evaluator.ts @@ -52,7 +52,7 @@ Task title: {TITLE} Task summary: {SUMMARY} -LANGUAGE RULE: The "reason" field MUST use the SAME language as the task title/summary. Chinese input → Chinese reason. English input → English reason. "suggestedName" stays in English kebab-case. +LANGUAGE RULE (MUST FOLLOW): Detect the language of the task title/summary. If it is Chinese, the "reason" field MUST be in Chinese. If English, reason in English. Only "suggestedName" stays in English kebab-case. 如果任务标题/摘要是中文,reason 必须用中文。 Reply in JSON only, no extra text: { diff --git a/apps/memos-local-openclaw/src/storage/sqlite.ts b/apps/memos-local-openclaw/src/storage/sqlite.ts index 2caa8e5d3..c2f18a252 100644 --- a/apps/memos-local-openclaw/src/storage/sqlite.ts +++ b/apps/memos-local-openclaw/src/storage/sqlite.ts @@ -46,7 +46,7 @@ export class SqliteStore { content, content='chunks', content_rowid='rowid', - tokenize='porter unicode61' + tokenize='trigram' ); CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN @@ -109,6 +109,7 @@ export class SqliteStore { this.migrateOwnerFields(); this.migrateSkillVisibility(); this.migrateSkillEmbeddingsAndFts(); + this.migrateFtsToTrigram(); this.log.debug("Database schema initialized"); } @@ -159,7 +160,7 @@ export class SqliteStore { description, content='skills', content_rowid='rowid', - tokenize='porter unicode61' + tokenize='trigram' ); `); @@ -195,6 +196,81 @@ export class SqliteStore { } catch { /* best-effort */ } } + private migrateFtsToTrigram(): void { + // Check if chunks_fts still uses the old tokenizer (porter unicode61) + try { + const row = this.db.prepare( + "SELECT sql FROM sqlite_master WHERE name='chunks_fts'" + ).get() as { sql: string } | undefined; + if (row && row.sql && !row.sql.includes("trigram")) { + this.log.info("Migrating chunks_fts from porter/unicode61 to trigram tokenizer..."); + this.db.exec("DROP TRIGGER IF EXISTS chunks_ai"); + this.db.exec("DROP TRIGGER IF EXISTS chunks_ad"); + this.db.exec("DROP TRIGGER IF EXISTS chunks_au"); + this.db.exec("DROP TABLE IF EXISTS chunks_fts"); + this.db.exec(` + CREATE VIRTUAL TABLE chunks_fts USING fts5( + summary, content, content='chunks', content_rowid='rowid', + tokenize='trigram' + ) + `); + this.db.exec(` + CREATE TRIGGER chunks_ai AFTER INSERT ON chunks BEGIN + INSERT INTO chunks_fts(rowid, summary, content) VALUES (new.rowid, new.summary, new.content); + END; + CREATE TRIGGER chunks_ad AFTER DELETE ON chunks BEGIN + INSERT INTO chunks_fts(chunks_fts, rowid, summary, content) VALUES ('delete', old.rowid, old.summary, old.content); + END; + CREATE TRIGGER chunks_au AFTER UPDATE ON chunks BEGIN + INSERT INTO chunks_fts(chunks_fts, rowid, summary, content) VALUES ('delete', old.rowid, old.summary, old.content); + INSERT INTO chunks_fts(rowid, summary, content) VALUES (new.rowid, new.summary, new.content); + END + `); + this.db.exec("INSERT INTO chunks_fts(rowid, summary, content) SELECT rowid, summary, content FROM chunks"); + const count = (this.db.prepare("SELECT COUNT(*) as c FROM chunks_fts").get() as { c: number }).c; + this.log.info(`Migrated chunks_fts to trigram: ${count} rows indexed`); + } + } catch (err) { + this.log.warn(`Failed to migrate chunks_fts to trigram: ${err}`); + } + + // Same for skills_fts + try { + const row = this.db.prepare( + "SELECT sql FROM sqlite_master WHERE name='skills_fts'" + ).get() as { sql: string } | undefined; + if (row && row.sql && !row.sql.includes("trigram")) { + this.log.info("Migrating skills_fts to trigram tokenizer..."); + this.db.exec("DROP TRIGGER IF EXISTS skills_ai"); + this.db.exec("DROP TRIGGER IF EXISTS skills_ad"); + this.db.exec("DROP TRIGGER IF EXISTS skills_au"); + this.db.exec("DROP TABLE IF EXISTS skills_fts"); + this.db.exec(` + CREATE VIRTUAL TABLE skills_fts USING fts5( + name, description, content='skills', content_rowid='rowid', + tokenize='trigram' + ) + `); + this.db.exec(` + CREATE TRIGGER skills_ai AFTER INSERT ON skills BEGIN + INSERT INTO skills_fts(rowid, name, description) VALUES (new.rowid, new.name, new.description); + END; + CREATE TRIGGER skills_ad AFTER DELETE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description) VALUES ('delete', old.rowid, old.name, old.description); + END; + CREATE TRIGGER skills_au AFTER UPDATE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description) VALUES ('delete', old.rowid, old.name, old.description); + INSERT INTO skills_fts(rowid, name, description) VALUES (new.rowid, new.name, new.description); + END + `); + this.db.exec("INSERT INTO skills_fts(rowid, name, description) SELECT rowid, name, description FROM skills"); + this.log.info("Migrated skills_fts to trigram"); + } + } catch (err) { + this.log.warn(`Failed to migrate skills_fts to trigram: ${err}`); + } + } + private migrateTaskId(): void { const cols = this.db.prepare("PRAGMA table_info(chunks)").all() as Array<{ name: string }>; if (!cols.some((c) => c.name === "task_id")) { @@ -530,8 +606,6 @@ export class SqliteStore { getMetrics(days: number): { writesPerDay: Array<{ date: string; count: number }>; viewerCallsPerDay: Array<{ date: string; list: number; search: number; total: number }>; - roleBreakdown: Record; - kindBreakdown: Record; totals: { memories: number; sessions: number; embeddings: number; todayWrites: number; todayViewerCalls: number }; } { const since = Date.now() - days * 86400 * 1000; @@ -566,11 +640,6 @@ export class SqliteStore { .sort((a, b) => a[0].localeCompare(b[0])) .map(([date, v]) => ({ date, list: v.list, search: v.search, total: v.list + v.search })); - const roles = this.db.prepare("SELECT role, COUNT(*) as count FROM chunks GROUP BY role").all() as Array<{ role: string; count: number }>; - const kinds = this.db.prepare("SELECT kind, COUNT(*) as count FROM chunks GROUP BY kind").all() as Array<{ kind: string; count: number }>; - const roleBreakdown = Object.fromEntries(roles.map((r) => [r.role, r.count])); - const kindBreakdown = Object.fromEntries(kinds.map((k) => [k.kind, k.count])); - const totalChunks = (this.db.prepare("SELECT COUNT(*) as c FROM chunks").get() as { c: number }).c; const totalSessions = (this.db.prepare("SELECT COUNT(DISTINCT session_key) as c FROM chunks").get() as { c: number }).c; const totalEmbeddings = (this.db.prepare("SELECT COUNT(*) as c FROM embeddings").get() as { c: number }).c; @@ -580,8 +649,6 @@ export class SqliteStore { return { writesPerDay, viewerCallsPerDay, - roleBreakdown, - kindBreakdown, totals: { memories: totalChunks, sessions: totalSessions, @@ -859,6 +926,59 @@ export class SqliteStore { return result.changes > 0; } + /** + * Find user-role chunks that contain system-injected content that should + * have been stripped before storage. Returns chunk IDs and a preview. + */ + findPollutedUserChunks(): Array<{ id: string; preview: string; reason: string }> { + const results: Array<{ id: string; preview: string; reason: string }> = []; + const patterns: Array<{ sql: string; reason: string }> = [ + { sql: "content LIKE '%%'", reason: "memory_context injection" }, + { sql: "content LIKE '%=== MemOS LONG-TERM MEMORY%'", reason: "MemOS legacy injection" }, + { sql: "content LIKE '%[MemOS Auto-Recall]%'", reason: "MemOS Auto-Recall injection" }, + { sql: "content LIKE '%## Memory system%No memories were automatically recalled%'", reason: "Memory system no-recall hint" }, + { sql: "content LIKE '%## Retrieved memories from past conversations%CRITICAL INSTRUCTION%'", reason: "prependContext recall injection" }, + { sql: "content LIKE '%VERIFIED facts the user previously shared%'", reason: "VERIFIED facts injection" }, + { sql: "content LIKE '%%'", reason: "memos_system_instruction injection" }, + { sql: "content LIKE '%📝 Related memories:%'", reason: "Related memories injection" }, + ]; + for (const { sql, reason } of patterns) { + const rows = this.db.prepare( + `SELECT id, substr(content, 1, 120) AS preview FROM chunks WHERE role = 'user' AND ${sql}`, + ).all() as Array<{ id: string; preview: string }>; + for (const row of rows) { + results.push({ id: row.id, preview: row.preview, reason }); + } + } + return results; + } + + /** + * Find user chunks where user+assistant content was mixed together + * (separated by \n\n---\n), and truncate to keep only the user's part. + */ + fixMixedUserChunks(): number { + const rows = this.db.prepare( + `SELECT id, content FROM chunks WHERE role = 'user' + AND content LIKE '%' || char(10) || char(10) || '---' || char(10) || '%' + AND length(content) > 300`, + ).all() as Array<{ id: string; content: string }>; + + let fixed = 0; + for (const { id, content } of rows) { + const dashIdx = content.indexOf("\n\n---\n"); + if (dashIdx > 5) { + const userPart = content.slice(0, dashIdx).trim(); + if (userPart.length >= 5 && userPart.length < content.length) { + this.db.prepare("UPDATE chunks SET content = ?, updated_at = ? WHERE id = ?") + .run(userPart, Date.now(), id); + fixed++; + } + } + } + return fixed; + } + // ─── Delete ─── deleteChunk(chunkId: string): boolean { @@ -873,15 +993,25 @@ export class SqliteStore { deleteAll(): number { this.db.exec("PRAGMA foreign_keys = OFF"); - this.db.prepare("DELETE FROM task_skills").run(); - this.db.prepare("DELETE FROM skill_versions").run(); - this.db.prepare("DELETE FROM skills").run(); - this.db.prepare("DELETE FROM embeddings").run(); - this.db.prepare("DELETE FROM chunks").run(); - this.db.prepare("DELETE FROM tasks").run(); - this.db.prepare("DELETE FROM viewer_events").run(); - this.db.prepare("DELETE FROM api_logs").run(); - this.db.prepare("DELETE FROM tool_calls").run(); + const tables = [ + "task_skills", + "skill_embeddings", + "skill_versions", + "skills", + "embeddings", + "chunks", + "tasks", + "viewer_events", + "api_logs", + "tool_calls", + ]; + for (const table of tables) { + try { + this.db.prepare(`DELETE FROM ${table}`).run(); + } catch (err) { + this.log.warn(`deleteAll: failed to clear ${table}: ${err}`); + } + } this.db.exec("PRAGMA foreign_keys = ON"); const remaining = this.countChunks(); return remaining === 0 ? 1 : 0; @@ -1044,14 +1174,16 @@ export class SqliteStore { */ findActiveChunkByHash(content: string, owner?: string): string | null { const hash = contentHash(content); + // Check ANY existing chunk with the same hash (regardless of dedup_status) + // to prevent re-creating duplicates when all prior copies have been marked duplicate/merged. if (owner) { const row = this.db.prepare( - "SELECT id FROM chunks WHERE content_hash = ? AND dedup_status = 'active' AND owner = ? LIMIT 1", + "SELECT id FROM chunks WHERE content_hash = ? AND owner = ? ORDER BY CASE dedup_status WHEN 'active' THEN 0 ELSE 1 END LIMIT 1", ).get(hash, owner) as { id: string } | undefined; return row?.id ?? null; } const row = this.db.prepare( - "SELECT id FROM chunks WHERE content_hash = ? AND dedup_status = 'active' LIMIT 1", + "SELECT id FROM chunks WHERE content_hash = ? ORDER BY CASE dedup_status WHEN 'active' THEN 0 ELSE 1 END LIMIT 1", ).get(hash) as { id: string } | undefined; return row?.id ?? null; } @@ -1307,7 +1439,7 @@ export class SqliteStore { */ function sanitizeFtsQuery(raw: string): string { const tokens = raw - .replace(/[."""(){}[\]*:^~!@#$%&\\/<>,;'`]/g, " ") + .replace(/[."""(){}[\]*:^~!@#$%&\\/<>,;'`-]/g, " ") .split(/\s+/) .map((t) => t.trim().replace(/^-+|-+$/g, "")) .filter((t) => t.length > 1) diff --git a/apps/memos-local-openclaw/src/tools/memory-get.ts b/apps/memos-local-openclaw/src/tools/memory-get.ts index a8efb2579..5dde5c0ac 100644 --- a/apps/memos-local-openclaw/src/tools/memory-get.ts +++ b/apps/memos-local-openclaw/src/tools/memory-get.ts @@ -47,10 +47,7 @@ export function createMemoryGetTool(store: SqliteStore): ToolDefinition { return { error: `Chunk not found: ${ref.chunkId}` }; } - const content = - chunk.content.length > maxChars - ? chunk.content.slice(0, maxChars) + "…" - : chunk.content; + const content = chunk.content; const result: GetResult = { content, diff --git a/apps/memos-local-openclaw/src/types.ts b/apps/memos-local-openclaw/src/types.ts index 1b68a67a9..0396e94be 100644 --- a/apps/memos-local-openclaw/src/types.ts +++ b/apps/memos-local-openclaw/src/types.ts @@ -55,14 +55,7 @@ export interface Task { updatedAt: number; } -export type ChunkKind = - | "paragraph" - | "code_block" - | "error_stack" - | "command" - | "list" - | "mixed" - | "tool_result"; +export type ChunkKind = "paragraph"; export interface ChunkRef { sessionKey: string; @@ -151,7 +144,13 @@ export type SummaryProvider = | "anthropic" | "gemini" | "azure_openai" - | "bedrock"; + | "bedrock" + | "zhipu" + | "siliconflow" + | "bailian" + | "cohere" + | "mistral" + | "voyage"; export type EmbeddingProvider = | "openai" @@ -294,7 +293,7 @@ export const DEFAULTS = { mmrLambda: 0.7, recencyHalfLifeDays: 14, vectorSearchMaxChunks: 0, - dedupSimilarityThreshold: 0.60, + dedupSimilarityThreshold: 0.80, evidenceWrapperTag: "STORED_MEMORY", excerptMinChars: 200, excerptMaxChars: 500, diff --git a/apps/memos-local-openclaw/src/update-check.ts b/apps/memos-local-openclaw/src/update-check.ts new file mode 100644 index 000000000..9f1e77002 --- /dev/null +++ b/apps/memos-local-openclaw/src/update-check.ts @@ -0,0 +1,95 @@ +/** + * Channel-aware update check against npm registry dist-tags. + * - Prerelease users (e.g. 1.0.2-beta.x) compare against beta tag only (semver gt). + * - Stable users compare against latest tag only (semver gt). + * - Beta users get optional stableChannel hint to install @latest when stable exists. + */ +import * as semver from "semver"; + +export interface UpdateCheckResult { + updateAvailable: boolean; + current: string; + /** Version on the channel we compared against (beta tag or latest tag). */ + latest: string; + packageName: string; + /** Channel used for the primary comparison. */ + channel: "beta" | "latest"; + /** Full install command (includes @beta when updating on beta channel). */ + installCommand: string; + /** When current is prerelease and registry has a stable latest — how to switch to stable. */ + stableChannel?: { version: string; installCommand: string }; +} + +function isPrerelease(v: string): boolean { + return semver.prerelease(v) != null; +} + +/** + * Fetch registry package doc and compute update state. + */ +export async function computeUpdateCheck( + packageName: string, + current: string, + fetchImpl: typeof fetch, + timeoutMs = 8_000, +): Promise { + if (!semver.valid(current)) return null; + + const url = `https://registry.npmjs.org/${encodeURIComponent(packageName)}`; + const resp = await fetchImpl(url, { signal: AbortSignal.timeout(timeoutMs) }); + if (!resp.ok) return null; + + const data = (await resp.json()) as { "dist-tags"?: Record }; + const tags = data["dist-tags"] ?? {}; + const latestTag = tags.latest; + const betaTag = tags.beta; + + const onBeta = isPrerelease(current); + let updateAvailable = false; + let channel: "beta" | "latest" = "latest"; + let targetVersion = current; + let installCommand = `openclaw plugins install ${packageName}`; + + if (onBeta) { + channel = "beta"; + // Beta users: only compare against beta tag; never suggest "updating" to stable via gt confusion. + if (betaTag && semver.valid(betaTag) && semver.gt(betaTag, current)) { + updateAvailable = true; + targetVersion = betaTag; + installCommand = `openclaw plugins install ${packageName}@beta`; + } else { + targetVersion = betaTag && semver.valid(betaTag) ? betaTag : current; + if (betaTag && semver.valid(betaTag) && semver.eq(betaTag, current)) { + installCommand = `openclaw plugins install ${packageName}@beta`; + } + } + } else { + // Stable users: compare against latest only. + if (latestTag && semver.valid(latestTag) && semver.gt(latestTag, current)) { + updateAvailable = true; + targetVersion = latestTag; + installCommand = `openclaw plugins install ${packageName}`; + } else { + targetVersion = latestTag && semver.valid(latestTag) ? latestTag : current; + } + } + + // Beta user + stable exists on latest: optional hint to switch to stable (not counted as "update"). + let stableChannel: UpdateCheckResult["stableChannel"]; + if (onBeta && latestTag && semver.valid(latestTag) && !isPrerelease(latestTag)) { + stableChannel = { + version: latestTag, + installCommand: `openclaw plugins install ${packageName}@latest`, + }; + } + + return { + updateAvailable, + current, + latest: targetVersion, + packageName, + channel, + installCommand, + stableChannel, + }; +} diff --git a/apps/memos-local-openclaw/src/viewer/html.ts b/apps/memos-local-openclaw/src/viewer/html.ts index c3b48c7b4..11800186d 100644 --- a/apps/memos-local-openclaw/src/viewer/html.ts +++ b/apps/memos-local-openclaw/src/viewer/html.ts @@ -41,7 +41,7 @@ return ` [data-theme="light"] .auth-screen{background:linear-gradient(135deg,#f0f4ff 0%,#f8f9fb 50%,#eef2ff 100%)} [data-theme="light"] .auth-card{box-shadow:0 25px 50px -12px rgba(0,0,0,.08)} [data-theme="light"] .topbar{background:rgba(255,255,255,.92);border-bottom-color:var(--border);backdrop-filter:blur(8px)} -[data-theme="light"] .session-item .count,[data-theme="light"] .kind-tag,[data-theme="light"] .session-tag{background:rgba(0,0,0,.05)} +[data-theme="light"] .session-item .count,[data-theme="light"] .session-tag{background:rgba(0,0,0,.05)} [data-theme="light"] .card-content pre{background:#f3f4f6;border-color:var(--border)} [data-theme="light"] .vscore-badge{background:rgba(79,70,229,.06);color:#4f46e5} [data-theme="light"] ::-webkit-scrollbar-thumb{background:rgba(0,0,0,.15)} @@ -66,10 +66,6 @@ return ` [data-theme="light"] .tool-agg-table td{background:transparent} [data-theme="light"] .tool-agg-table tr:hover td{background:rgba(79,70,229,.03)} [data-theme="light"] .tool-agg-table th{color:#9ca3af} -[data-theme="light"] .breakdown-item{background:#f9fafb;border-color:var(--border)} -[data-theme="light"] .breakdown-item:hover{background:#f3f4f6;border-color:#cbd5e1} -[data-theme="light"] .breakdown-bar-wrap{background:#e5e7eb} -[data-theme="light"] .breakdown-bar{background:linear-gradient(90deg,#4f46e5,#6366f1);box-shadow:none} [data-theme="light"] .range-btn{background:transparent;border-color:var(--border);color:var(--text-sec)} [data-theme="light"] .range-btn.active{background:rgba(79,70,229,.06);color:#4f46e5;border-color:rgba(79,70,229,.2)} [data-theme="light"] .range-btn:hover{border-color:#4f46e5;color:#4f46e5} @@ -120,7 +116,7 @@ input,textarea,select{font-family:inherit;font-size:inherit} .main-content{display:flex;flex:1;max-width:1400px;margin:0 auto;width:100%;padding:28px 32px;gap:28px} /* ─── Sidebar ─── */ -.sidebar{width:260px;flex-shrink:0} +.sidebar{width:260px;min-width:260px;flex-shrink:0} .sidebar .stats-grid{display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:24px} .stat-card{background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius);padding:18px;transition:all .2s} .stat-card:hover{border-color:var(--border-glow);background:var(--bg-card-hover)} @@ -164,10 +160,9 @@ input,textarea,select{font-family:inherit;font-size:inherit} .role-tag.user{background:var(--pri-glow);color:var(--pri);border:1px solid rgba(99,102,241,.12)} .role-tag.assistant{background:var(--accent-glow);color:var(--accent);border:1px solid rgba(230,57,70,.2)} .role-tag.system{background:var(--amber-bg);color:var(--amber);border:1px solid rgba(245,158,11,.2)} -.kind-tag{padding:4px 10px;border-radius:8px;font-size:11px;color:var(--text-sec);background:rgba(0,0,0,.2);font-weight:500} .card-time{font-size:12px;color:var(--text-sec);display:flex;align-items:center;gap:8px} .session-tag{font-size:11px;font-family:ui-monospace,monospace;color:var(--text-muted);background:rgba(0,0,0,.2);padding:3px 8px;border-radius:6px;cursor:default} -.card-summary{font-size:15px;font-weight:600;color:var(--text);margin-bottom:10px;line-height:1.5;letter-spacing:-.01em} +.card-summary{font-size:15px;font-weight:600;color:var(--text);margin-bottom:10px;line-height:1.5;letter-spacing:-.01em;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden} .card-content{font-size:13px;color:var(--text-sec);line-height:1.65;max-height:0;overflow:hidden;transition:max-height .3s ease} .card-content.show{max-height:600px;overflow-y:auto} .card-content pre{white-space:pre-wrap;word-break:break-all;background:rgba(0,0,0,.25);padding:14px;border-radius:10px;font-size:12px;font-family:ui-monospace,monospace;margin-top:10px;border:1px solid var(--border);color:var(--text-sec)} @@ -212,6 +207,9 @@ input,textarea,select{font-family:inherit;font-size:inherit} .modal-meta-row{display:flex;flex-wrap:wrap;gap:12px;font-size:11px;color:var(--text-sec);padding:8px 0;border-top:1px dashed var(--border)} [data-theme="light"] .merge-history{background:rgba(0,0,0,.04)} [data-theme="light"] .merge-history-item{border-bottom-color:rgba(0,0,0,.06)} +.card-merged-info{margin-top:8px;padding:8px 12px;background:rgba(16,185,129,.06);border:1px dashed rgba(16,185,129,.2);border-radius:8px;font-size:12px;line-height:1.6;color:var(--text-sec)} +.card-merged-label{font-size:10px;font-weight:600;color:#10b981;margin-bottom:4px;display:flex;align-items:center;gap:4px} +[data-theme="light"] .card-merged-info{background:rgba(16,185,129,.04);border-color:rgba(16,185,129,.15)} /* ─── Buttons ─── */ .btn{padding:7px 14px;border-radius:8px;border:1px solid var(--border);background:var(--bg-card);color:var(--text);font-size:13px;font-weight:500;transition:all .18s ease;display:inline-flex;align-items:center;gap:5px;white-space:nowrap} @@ -245,6 +243,16 @@ input,textarea,select{font-family:inherit;font-size:inherit} .modal-actions{display:flex;gap:10px;justify-content:flex-end;margin-top:28px} /* ─── Toast ─── */ +.emb-banner{display:flex;align-items:center;gap:10px;padding:12px 20px;font-size:13px;font-weight:500;border-radius:10px;margin:0 32px 0;animation:slideIn .3s ease} +.emb-banner.warning{background:rgba(245,158,11,.1);color:#d97706;border:1px solid rgba(245,158,11,.25)} +.emb-banner.error{background:rgba(239,68,68,.1);color:#ef4444;border:1px solid rgba(239,68,68,.25)} +[data-theme="light"] .emb-banner.warning{background:rgba(245,158,11,.08);color:#b45309} +[data-theme="light"] .emb-banner.error{background:rgba(239,68,68,.08);color:#dc2626} +.emb-banner span{flex:1} +.emb-banner-btn{background:none;border:1px solid currentColor;border-radius:6px;padding:4px 12px;font-size:12px;font-weight:600;color:inherit;cursor:pointer;white-space:nowrap;opacity:.85;transition:opacity .15s} +.emb-banner-btn:hover{opacity:1} +.emb-banner-close{background:none;border:none;font-size:18px;color:inherit;cursor:pointer;opacity:.5;padding:0 4px;line-height:1} +.emb-banner-close:hover{opacity:1} .toast-container{position:fixed;top:80px;right:24px;z-index:1000;display:flex;flex-direction:column;gap:8px} .toast{padding:14px 20px;border-radius:10px;font-size:13px;font-weight:500;box-shadow:var(--shadow-lg);animation:slideIn .3s ease;display:flex;align-items:center;gap:10px;max-width:360px;border:1px solid} .toast.success{background:var(--green-bg);color:var(--green);border-color:rgba(16,185,129,.3)} @@ -334,8 +342,12 @@ input,textarea,select{font-family:inherit;font-size:inherit} .task-chunk-role.user{color:var(--pri)} .task-chunk-role.assistant{color:var(--green)} .task-chunk-role.tool{color:var(--amber)} -.task-chunk-bubble{padding:12px 16px;border-radius:16px;white-space:pre-wrap;word-break:break-word;max-height:200px;overflow:hidden;position:relative;transition:all .2s} -.task-chunk-bubble.expanded{max-height:none} +.task-chunk-bubble{padding:12px 16px;border-radius:16px;white-space:pre-wrap;word-break:break-word;max-height:none;overflow:hidden;position:relative;transition:all .2s} +.task-chunk-bubble.collapsed{max-height:200px} +.task-chunk-expand{display:none;align-items:center;justify-content:center;gap:4px;margin-top:4px;padding:4px 12px;font-size:12px;font-weight:600;color:var(--text-sec);cursor:pointer;user-select:none;border-radius:8px;transition:all .15s} +.task-chunk-expand:hover{color:var(--pri);background:rgba(99,102,241,.08)} +.task-chunk-expand .expand-arrow{display:inline-block;font-size:10px;transition:transform .2s} +.task-chunk-expand.is-expanded .expand-arrow{transform:rotate(180deg)} .role-user .task-chunk-bubble{background:var(--pri);color:#000;border-bottom-right-radius:4px} .role-assistant .task-chunk-bubble{background:var(--bg-card);border:1px solid var(--border);color:var(--text-sec);border-bottom-left-radius:4px} .role-tool .task-chunk-bubble{background:rgba(245,158,11,.08);border:1px solid rgba(245,158,11,.2);color:var(--text-sec);border-bottom-left-radius:4px;font-family:'SF Mono',Monaco,Consolas,monospace;font-size:12px} @@ -425,6 +437,7 @@ input,textarea,select{font-family:inherit;font-size:inherit} [data-theme="light"] .nav-tabs .tab.active{background:#fff;border-color:rgba(0,0,0,.1);box-shadow:0 1px 3px rgba(0,0,0,.08);color:var(--text)} .analytics-view,.settings-view,.logs-view,.migrate-view{display:none;flex:1;min-width:0;flex-direction:column;gap:20px} .analytics-view.show,.settings-view.show,.logs-view.show,.migrate-view.show{display:flex} +.feed-wrap,.tasks-view,.skills-view,.analytics-view,.settings-view,.logs-view,.migrate-view{max-width:960px} /* ─── Logs ─── */ .logs-toolbar{display:flex;align-items:center;justify-content:space-between;padding:8px 0} @@ -464,7 +477,41 @@ input,textarea,select{font-family:inherit;font-size:inherit} .log-stat-chip.merged{background:rgba(168,85,247,.12);color:#c084fc} .log-stat-chip.errors{background:rgba(248,113,113,.12);color:#f87171} .log-msg-list{margin-top:8px;display:flex;flex-direction:column;gap:4px} -.log-msg-item{display:flex;gap:8px;align-items:flex-start;font-size:11.5px;line-height:1.5;padding:4px 10px;border-radius:6px;background:rgba(255,255,255,.02)} +.log-msg-item{display:flex;gap:8px;align-items:flex-start;font-size:11.5px;line-height:1.5;padding:4px 10px;border-radius:6px;background:rgba(255,255,255,.02);overflow:hidden} +.log-msg-item.expanded{flex-wrap:wrap} +.recall-layers{margin-top:8px;display:flex;flex-direction:column;gap:10px} +.recall-layer-title{font-size:11px;font-weight:600;color:var(--text-sec);margin-bottom:4px;display:flex;align-items:center;gap:6px;cursor:pointer;user-select:none} +.recall-layer-title .recall-expand-icon{transition:transform .15s;font-size:9px} +.recall-layer.expanded .recall-layer-title .recall-expand-icon{transform:rotate(90deg)} +.recall-count{font-size:10px;font-weight:700;padding:1px 6px;border-radius:10px;background:rgba(99,102,241,.1);color:var(--pri)} +.recall-items{display:none;flex-direction:column;gap:3px} +.recall-layer.expanded .recall-items{display:flex} +.recall-item{font-size:11px;line-height:1.4;padding:4px 8px;border-radius:5px;background:rgba(255,255,255,.02);cursor:pointer} +.recall-item:hover{background:rgba(99,102,241,.06)} +[data-theme="light"] .recall-item{background:rgba(0,0,0,.02)} +[data-theme="light"] .recall-item:hover{background:rgba(99,102,241,.06)} +.recall-item-head{display:flex;gap:6px;align-items:center} +.recall-idx{flex-shrink:0;font-size:10px;font-weight:600;color:var(--text-muted);min-width:14px;text-align:right} +.recall-score{flex-shrink:0;font-family:'SF Mono',Consolas,monospace;font-size:10px;font-weight:600;padding:1px 5px;border-radius:4px} +.recall-score.high{background:rgba(34,197,94,.12);color:#22c55e} +.recall-score.mid{background:rgba(251,191,36,.12);color:#f59e0b} +.recall-score.low{background:rgba(248,113,113,.1);color:var(--text-muted)} +.recall-summary-short{flex:1;color:var(--text-sec);overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.recall-expand-icon{flex-shrink:0;font-size:10px;color:var(--text-muted);transition:transform .15s} +.recall-item.expanded .recall-expand-icon{transform:rotate(90deg)} +.recall-summary-full{display:none;margin-top:4px;padding:6px 8px 4px 28px;font-size:11px;line-height:1.5;color:var(--text);word-break:break-word;border-top:1px dashed var(--border)} +.recall-item.expanded .recall-summary-full{display:block} +.recall-layer.filtered .recall-layer-title{color:var(--pri)} +.recall-layer.filtered.empty .recall-layer-title{color:var(--text-muted)} +.recall-more{font-size:10px;color:var(--text-muted);padding:2px 8px} +.recall-detail{padding:4px 0} +.recall-detail-section{margin-bottom:10px} +.recall-detail-title{font-size:11px;font-weight:600;color:var(--text-sec);margin-bottom:6px;padding-bottom:4px;border-bottom:1px dashed var(--border);cursor:pointer;user-select:none;display:flex;align-items:center;gap:6px} +.recall-detail-title .recall-expand-icon{transition:transform .15s;font-size:9px} +.recall-detail-section.expanded .recall-detail-title .recall-expand-icon{transform:rotate(90deg)} +.recall-detail-section .recall-detail-items{display:none;flex-direction:column;gap:3px} +.recall-detail-section.expanded .recall-detail-items{display:flex} +.recall-detail-section.filtered .recall-detail-title{color:var(--pri)} [data-theme="light"] .log-msg-item{background:rgba(0,0,0,.02)} .log-msg-role{flex-shrink:0;font-size:10px;font-weight:600;padding:1px 6px;border-radius:4px;text-transform:uppercase;letter-spacing:.3px} .log-msg-role.user{background:rgba(59,130,246,.12);color:#60a5fa} @@ -477,6 +524,15 @@ input,textarea,select{font-family:inherit;font-size:inherit} .log-msg-action.merged{color:#c084fc} .log-msg-action.error{color:#f87171} .log-msg-text{color:var(--text);opacity:.85;flex:1;min-width:0;overflow:hidden;text-overflow:ellipsis} +.log-msg-text-short{color:var(--text);opacity:.85;flex:1;min-width:0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} +.log-msg-text-full{display:none;color:var(--text);opacity:.85;flex:1;min-width:0;word-break:break-word;white-space:pre-wrap} +.log-msg-item.expanded .log-msg-text-short{display:none} +.log-msg-item.expanded .log-msg-text-full{display:block} +.log-msg-item.expanded .recall-expand-icon{transform:rotate(90deg)} +.log-add-detail{display:flex;flex-direction:column;gap:8px} +.log-add-msg{display:flex;gap:8px;align-items:flex-start;font-size:12px;line-height:1.6} +.log-add-msg-role{flex-shrink:0;font-size:10px;font-weight:600;text-transform:uppercase;padding:2px 8px;border-radius:4px;background:rgba(99,102,241,.1);color:var(--pri)} +.log-add-msg-content{flex:1;min-width:0;word-break:break-word;white-space:pre-wrap;color:var(--text)} .log-detail{display:none;border-top:1px solid var(--border);padding:0} .log-detail.open{display:block} .log-expand-btn{font-size:10px;color:var(--text-sec);opacity:.5;margin-left:auto;transition:transform .2s,opacity .15s;display:inline-block} @@ -516,6 +572,12 @@ input,textarea,select{font-family:inherit;font-size:inherit} .toggle-slider::before{content:'';position:absolute;height:14px;width:14px;left:3px;bottom:3px;background:#fff;border-radius:50%;transition:.2s} .toggle-switch input:checked+.toggle-slider{background:var(--pri)} .toggle-switch input:checked+.toggle-slider::before{transform:translateX(16px)} +.test-conn-row{display:flex;align-items:center;gap:10px;margin-top:12px;padding-top:10px;border-top:1px dashed var(--border)} +.test-conn-row .btn{font-size:11px;padding:5px 14px;border:1px solid var(--border);border-radius:6px} +.test-result{font-size:12px;line-height:1.5;word-break:break-word} +.test-result.ok{color:#22c55e} +.test-result.fail{color:var(--rose)} +.test-result.loading{color:var(--text-muted)} .settings-actions{display:flex;gap:12px;justify-content:flex-end;align-items:center;margin-top:16px;padding-top:16px;border-top:1px solid var(--border)} .settings-actions .btn{min-width:110px;padding:10px 20px;font-size:13px} .settings-actions .btn-primary{background:rgba(99,102,241,.08);color:var(--pri);border:1px solid rgba(99,102,241,.25);font-weight:600} @@ -524,6 +586,29 @@ input,textarea,select{font-family:inherit;font-size:inherit} [data-theme="light"] .settings-actions .btn-primary:hover{background:rgba(79,70,229,.1);border-color:#4f46e5} .settings-saved{display:inline-flex;align-items:center;gap:6px;color:var(--green);font-size:12px;font-weight:600;opacity:0;transition:opacity .3s} .settings-saved.show{opacity:1} +.model-health-bar{margin-bottom:20px;border-radius:var(--radius-lg);overflow:visible} +.mh-table{width:100%;border-collapse:separate;border-spacing:0;font-size:12px} +.mh-table th{text-align:left;padding:6px 12px;font-size:10px;font-weight:600;color:var(--text-muted);text-transform:uppercase;letter-spacing:.05em;background:var(--bg);border-bottom:1px solid var(--border)} +.mh-table td{padding:8px 12px;border-bottom:1px solid var(--border);vertical-align:middle} +.mh-table tr:last-child td{border-bottom:none} +.mh-table tr:hover td{background:rgba(99,102,241,.025)} +.mh-table .mh-cell-name{display:flex;align-items:center;gap:8px;font-weight:500;color:var(--text)} +.mh-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0;display:inline-block} +.mh-dot.ok{background:#22c55e;box-shadow:0 0 0 2px rgba(34,197,94,.15)} +.mh-dot.degraded{background:#f59e0b;box-shadow:0 0 0 2px rgba(245,158,11,.15)} +.mh-dot.error{background:#ef4444;box-shadow:0 0 0 2px rgba(239,68,68,.15);animation:healthPulse 2s ease infinite} +.mh-dot.unknown{background:#94a3b8;box-shadow:0 0 0 2px rgba(148,163,184,.15)} +.mh-badge{display:inline-block;padding:2px 7px;border-radius:10px;font-size:10px;font-weight:600;letter-spacing:.02em} +.mh-badge.ok{background:rgba(34,197,94,.1);color:#16a34a} +.mh-badge.degraded{background:rgba(245,158,11,.1);color:#d97706} +.mh-badge.error{background:rgba(239,68,68,.1);color:#dc2626} +.mh-badge.unknown{background:rgba(148,163,184,.1);color:#64748b} +.mh-model-name{color:var(--text-muted);font-size:11px;font-family:var(--font-mono,'SFMono-Regular',Consolas,monospace)} +.mh-err-text{font-size:11px;color:var(--rose);max-width:320px;display:inline-block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;cursor:help} +#mhTooltip{display:none;position:fixed;min-width:280px;max-width:480px;max-height:300px;overflow-y:auto;padding:8px 10px;background:var(--bg-card,#1e1e2e);color:var(--text,#e2e8f0);border:1px solid var(--border,#333);border-radius:6px;font-size:11px;line-height:1.5;white-space:pre-wrap;word-break:break-all;box-shadow:0 4px 12px rgba(0,0,0,.25);z-index:10000;pointer-events:none} +.mh-time{font-size:10px;color:var(--text-muted);white-space:nowrap} +.mh-empty{padding:16px;font-size:12px;color:var(--text-muted);text-align:center} +@keyframes healthPulse{0%,100%{opacity:1}50%{opacity:.4}} .migrate-log-item{display:flex;align-items:flex-start;gap:10px;padding:8px 14px;border-bottom:1px solid var(--border);animation:migrateFadeIn .3s ease} .migrate-log-item:last-child{border-bottom:none} .migrate-log-item .log-icon{flex-shrink:0;width:18px;height:18px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:10px;margin-top:2px} @@ -544,18 +629,19 @@ input,textarea,select{font-family:inherit;font-size:inherit} @keyframes migrateFadeIn{from{opacity:0;transform:translateY(-4px)}to{opacity:1;transform:translateY(0)}} .feed-wrap{flex:1;min-width:0;display:flex;flex-direction:column} .feed-wrap.hide{display:none} +.analytics-view{flex-direction:column;gap:20px} .analytics-cards{display:grid;grid-template-columns:repeat(4,1fr);gap:14px} -.analytics-card{position:relative;overflow:hidden;border-radius:var(--radius-lg);padding:22px 20px;transition:all .2s ease;border:1px solid var(--border);background:var(--bg-card)} +.analytics-card{position:relative;overflow:hidden;border-radius:var(--radius-lg);padding:18px 16px;transition:all .2s ease;border:1px solid var(--border);background:var(--bg-card)} .analytics-card::before{content:'';position:absolute;top:0;left:0;right:0;height:2px;background:var(--pri);opacity:.5} .analytics-card::after{display:none} .analytics-card:hover{transform:translateY(-2px);box-shadow:var(--shadow);border-color:var(--border-glow)} .analytics-card.green::before{background:var(--green)} .analytics-card.amber::before{background:var(--amber)} -.analytics-card .ac-value{font-size:28px;font-weight:700;letter-spacing:-.03em;color:var(--text);line-height:1;-webkit-text-fill-color:unset;background:none} +.analytics-card .ac-value{font-size:24px;font-weight:700;letter-spacing:-.03em;color:var(--text);line-height:1;-webkit-text-fill-color:unset;background:none} .analytics-card.green .ac-value{color:var(--green);background:none} .analytics-card.amber .ac-value{color:var(--amber);background:none} .analytics-card .ac-label{font-size:11px;color:var(--text-muted);margin-top:6px;font-weight:500;text-transform:uppercase;letter-spacing:.06em} -.analytics-section{background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius-lg);padding:22px 24px;position:relative;overflow:hidden} +.analytics-section{background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius-lg);padding:18px 20px;position:relative;overflow:hidden} .analytics-section::before{display:none} .analytics-section h3{font-size:11px;font-weight:600;color:var(--text-muted);text-transform:uppercase;letter-spacing:.08em;margin-bottom:16px;display:flex;align-items:center;gap:8px} .analytics-section h3 .icon{font-size:14px;opacity:.6} @@ -601,14 +687,6 @@ input,textarea,select{font-family:inherit;font-size:inherit} .tool-agg-table .ms-val.slow{color:var(--accent)} .chart-legend .dot.violet{background:var(--violet)} .chart-legend .dot.green{background:var(--green)} -.breakdown-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(200px,1fr));gap:20px} -.breakdown-item{display:flex;flex-direction:column;gap:5px;padding:10px 12px;background:rgba(255,255,255,.02);border-radius:8px;border:1px solid var(--border);transition:all .15s} -.breakdown-item:hover{border-color:var(--border-glow);background:rgba(255,255,255,.04)} -.breakdown-item .bd-top{display:flex;align-items:center;justify-content:space-between} -.breakdown-item .label{font-size:12px;color:var(--text-sec);font-weight:500;text-transform:capitalize} -.breakdown-item .value{font-size:13px;font-weight:600;color:var(--text)} -.breakdown-bar-wrap{height:3px;background:rgba(255,255,255,.06);border-radius:2px;overflow:hidden} -.breakdown-bar{height:100%;border-radius:2px;background:var(--pri);transition:width .5s ease} .metrics-toolbar{display:flex;align-items:center;gap:8px;margin-bottom:16px;flex-wrap:wrap} .range-btn{padding:5px 12px;border-radius:6px;border:1px solid var(--border);background:transparent;color:var(--text-sec);font-size:12px;font-weight:500;cursor:pointer;transition:all .15s} .range-btn:hover{border-color:var(--pri);color:var(--pri)} @@ -744,10 +822,12 @@ input,textarea,select{font-family:inherit;font-size:inherit}
-
Embeddings
-
Days
-
-
Sessions
-
- +
+
+
Sessions
+
+ +
@@ -763,15 +843,6 @@ input,textarea,select{font-family:inherit;font-size:inherit} - +
+
+ + +
@@ -974,9 +1048,14 @@ input,textarea,select{font-family:inherit;font-size:inherit}
- + + + + + @@ -1000,6 +1079,10 @@ input,textarea,select{font-family:inherit;font-size:inherit}
+
+ + +
@@ -1029,10 +1112,15 @@ input,textarea,select{font-family:inherit;font-size:inherit}
- + + + + + @@ -1052,6 +1140,10 @@ input,textarea,select{font-family:inherit;font-size:inherit}
+
+ + +
@@ -1123,7 +1215,7 @@ input,textarea,select{font-family:inherit;font-size:inherit}
- +