diff --git a/.gitignore b/.gitignore index 121300b0..6c2aabf9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ dist/ tui/lcm-tui dist/ tui/tui + +# CE planning artifacts +docs/plans/ +TASK.md diff --git a/src/assembler.ts b/src/assembler.ts index fb49c590..3b767ce9 100644 --- a/src/assembler.ts +++ b/src/assembler.ts @@ -16,6 +16,8 @@ export interface AssembleContextInput { tokenBudget: number; /** Number of most recent raw turns to always include (default: 8) */ freshTailCount?: number; + /** Optional user query for relevance-based eviction scoring (BM25-lite). When absent, falls back to chronological eviction. */ + prompt?: string; } export interface AssembleContextResult { @@ -632,10 +634,55 @@ interface ResolvedItem { tokens: number; /** Whether this came from a raw message (vs. a summary) */ isMessage: boolean; + /** Pre-extracted plain text used for relevance scoring */ + text: string; /** Summary metadata used for dynamic system prompt guidance */ summarySignal?: SummaryPromptSignal; } +// ── BM25-lite relevance scorer ──────────────────────────────────────────────── + +/** Tokenize text into lowercase alphanumeric terms. */ +function tokenizeText(text: string): string[] { + return text + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((t) => t.length > 1); +} + +/** + * Score an item's text against a prompt using BM25-lite (term-frequency overlap). + * Returns a score in [0, 1] range relative to the number of prompt terms matched. + * Returns 0 when either input is empty. + */ +function scoreRelevance(itemText: string, prompt: string): number { + const promptTerms = tokenizeText(prompt); + if (promptTerms.length === 0) return 0; + + const itemTerms = tokenizeText(itemText); + if (itemTerms.length === 0) return 0; + + // Build term-frequency map for the item + const freq = new Map(); + for (const term of itemTerms) { + freq.set(term, (freq.get(term) ?? 0) + 1); + } + + // Sum TF contribution for each unique prompt term + const seen = new Set(); + let score = 0; + for (const term of promptTerms) { + if (seen.has(term)) continue; + seen.add(term); + const tf = freq.get(term) ?? 0; + if (tf > 0) { + // Normalised TF: tf / itemLength (BM25-lite saturation skipped for simplicity) + score += tf / itemTerms.length; + } + } + return score; +} + // ── ContextAssembler ───────────────────────────────────────────────────────── export class ContextAssembler { @@ -719,8 +766,32 @@ export class ContextAssembler { // Everything fits selected.push(...evictable); evictableTokens = evictableTotalTokens; + } else if (input.prompt && input.prompt.length > 0) { + // Prompt-aware eviction: score each evictable item by relevance to the + // prompt, then greedily fill budget from highest-scoring items down. + // Re-sort selected items by ordinal to restore chronological order. + const scored = evictable.map((item, idx) => ({ + item, + score: scoreRelevance(item.text, input.prompt!), + idx, // original index — higher = more recent, used as tiebreaker + })); + // Sort: highest relevance first; most recent (higher idx) breaks ties + scored.sort((a, b) => b.score - a.score || b.idx - a.idx); + + const kept: ResolvedItem[] = []; + let accum = 0; + for (const { item } of scored) { + if (accum + item.tokens <= remainingBudget) { + kept.push(item); + accum += item.tokens; + } + } + // Restore chronological order by ordinal before appending freshTail + kept.sort((a, b) => a.ordinal - b.ordinal); + selected.push(...kept); + evictableTokens = accum; } else { - // Need to drop oldest items until we fit. + // Chronological eviction (default): drop oldest items until we fit. // Walk from the END of evictable (newest first) accumulating tokens, // then reverse to restore chronological order. const kept: ResolvedItem[] = []; @@ -865,6 +936,7 @@ export class ContextAssembler { } as AgentMessage), tokens: tokenCount, isMessage: true, + text: contentText, }; } @@ -887,6 +959,7 @@ export class ContextAssembler { message: { role: "user" as const, content } as AgentMessage, tokens, isMessage: false, + text: summary.content, summarySignal: { kind: summary.kind, depth: summary.depth, diff --git a/src/engine.ts b/src/engine.ts index 8ff86742..dfdd663a 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -2172,6 +2172,8 @@ export class LcmContextEngine implements ContextEngine { sessionKey?: string; messages: AgentMessage[]; tokenBudget?: number; + /** Optional user query for relevance-based eviction (BM25-lite). When absent, falls back to chronological eviction. */ + prompt?: string; }): Promise { if (this.shouldIgnoreSession({ sessionId: params.sessionId, sessionKey: params.sessionKey })) { return { @@ -2223,6 +2225,7 @@ export class LcmContextEngine implements ContextEngine { conversationId: conversation.conversationId, tokenBudget, freshTailCount: this.config.freshTailCount, + prompt: params.prompt, }); // If assembly produced no messages for a non-empty live session, diff --git a/test/lcm-integration.test.ts b/test/lcm-integration.test.ts index 2ff0c04f..2f11b141 100644 --- a/test/lcm-integration.test.ts +++ b/test/lcm-integration.test.ts @@ -2798,3 +2798,200 @@ describe("LCM integration: media message annotation in compaction", () => { expect(summarizedText).toContain("Pure text message"); }); }); + +// ═════════════════════════════════════════════════════════════════════════════ +// Test Suite: Prompt-Aware Context Assembly +// ═════════════════════════════════════════════════════════════════════════════ + +describe("prompt-aware eviction", () => { + let convStore: ReturnType; + let sumStore: ReturnType; + let assembler: ContextAssembler; + + beforeEach(() => { + convStore = createMockConversationStore(); + sumStore = createMockSummaryStore(); + wireStores(convStore, sumStore); + assembler = new ContextAssembler(convStore as any, sumStore as any); + }); + + /** + * Helper: insert a summary into the summary store and append to context items. + * The summary content is used as the scoring text. + */ + async function addSummary(content: string, summaryId: string): Promise { + await sumStore.insertSummary({ + summaryId, + conversationId: CONV_ID, + kind: "leaf", + content, + tokenCount: estimateTokens(content), + }); + await sumStore.appendContextSummary(CONV_ID, summaryId); + } + + it("prefers relevant summaries over irrelevant ones when prompt is set", async () => { + // Budget is tight: only one of the two summaries fits in the evictable window. + // The relevant summary should win. + const irrelevantContent = "painting brushes canvas art watercolor oils"; // ~46 chars → ~12 tokens + const relevantContent = "authentication login password security token"; // ~45 chars → ~12 tokens + + // Add irrelevant summary first (older ordinal) then relevant summary (newer ordinal) + await addSummary(irrelevantContent, "sum_irrelevant"); + await addSummary(relevantContent, "sum_relevant"); + + // Add fresh tail messages (they are always kept regardless) + await ingestMessages(convStore, sumStore, 4, { + contentFn: (i) => `Fresh message ${i}`, + }); + + // Budget: each summary is ~12 tokens. Fresh tail = 4 messages * ~15 tokens each = ~60 tokens. + // Total budget = 75: fresh tail uses ~60, leaving ~15 for evictable. + // Only one summary fits. With prompt matching "authentication", the relevant one should be kept. + const result = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 75, + freshTailCount: 4, + prompt: "how does authentication work", + }); + + const contents = result.messages.map((m) => extractMessageText(m.content)).join("\n"); + expect(contents).toContain("authentication"); + expect(contents).not.toContain("painting brushes"); + }); + + it("falls back to chronological order when no prompt is provided", async () => { + // Same setup as above but no prompt. Chronological means newest-first evictable. + const olderContent = "authentication login password security token"; + const newerContent = "painting brushes canvas art watercolor oils"; + + await addSummary(olderContent, "sum_older"); + await addSummary(newerContent, "sum_newer"); + + await ingestMessages(convStore, sumStore, 4, { + contentFn: (i) => `Fresh message ${i}`, + }); + + const result = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 75, + freshTailCount: 4, + // no prompt + }); + + const contents = result.messages.map((m) => extractMessageText(m.content)).join("\n"); + // Chronological: newer summary (painting) kept, older one (authentication) dropped + expect(contents).toContain("painting"); + expect(contents).not.toContain("authentication login"); + }); + + it("empty string prompt falls back to chronological eviction", async () => { + const olderContent = "authentication login password security token"; + const newerContent = "painting brushes canvas art watercolor oils"; + + await addSummary(olderContent, "sum_older"); + await addSummary(newerContent, "sum_newer"); + + await ingestMessages(convStore, sumStore, 4, { + contentFn: (i) => `Fresh message ${i}`, + }); + + // Empty string prompt should behave identically to no prompt + const result = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 75, + freshTailCount: 4, + prompt: "", + }); + + const contents = result.messages.map((m) => extractMessageText(m.content)).join("\n"); + // Chronological: newer summary kept + expect(contents).toContain("painting"); + expect(contents).not.toContain("authentication login"); + }); + + it("when budget fits everything, prompt has no effect on output", async () => { + await addSummary("authentication login security", "sum_auth"); + await addSummary("painting canvas watercolor", "sum_art"); + await ingestMessages(convStore, sumStore, 2, { + contentFn: (i) => `Message ${i}`, + }); + + // Large budget fits everything + const result = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 100_000, + freshTailCount: 2, + prompt: "authentication", + }); + + // All 4 items present (2 summaries + 2 messages) + expect(result.messages).toHaveLength(4); + expect(result.stats.summaryCount).toBe(2); + expect(result.stats.rawMessageCount).toBe(2); + }); + + it("single evictable item: kept if it fits, dropped if it does not", async () => { + // The summary content acts as a sentinel we can search for in output messages. + // "x".repeat(400) = 400 chars ≈ 100 tokens when formatted as XML. + const bigContent = "x".repeat(400); + await addSummary(bigContent, "sum_big"); + + await ingestMessages(convStore, sumStore, 4, { + contentFn: (i) => `Fresh message ${i}`, + }); + + const hasSummaryInOutput = (messages: { content: unknown }[]): boolean => + messages.some((m) => extractMessageText(m.content).includes("x".repeat(10))); + + // Small budget: fresh tail uses ~16 tokens, remaining budget ~54; summary is ~125 tokens → dropped + const smallBudgetResult = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 70, + freshTailCount: 4, + prompt: "irrelevant query", + }); + expect(hasSummaryInOutput(smallBudgetResult.messages)).toBe(false); + + // Large budget: summary fits regardless of prompt relevance + const largeBudgetResult = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 500, + freshTailCount: 4, + prompt: "irrelevant query", + }); + expect(hasSummaryInOutput(largeBudgetResult.messages)).toBe(true); + }); + + it("output messages are in chronological order even with prompt-aware eviction", async () => { + // Add 3 summaries. The relevant one is the oldest (lowest ordinal). + await addSummary("authentication login password security", "sum_auth"); // ordinal 1 + await addSummary("painting canvas art colors", "sum_art"); // ordinal 2 + await addSummary("gardening plants flowers soil", "sum_garden"); // ordinal 3 + + await ingestMessages(convStore, sumStore, 4, { + contentFn: (i) => `Fresh message ${i}`, + }); + + // Budget tight: only 1 summary fits from evictable + const result = await assembler.assemble({ + conversationId: CONV_ID, + tokenBudget: 80, + freshTailCount: 4, + prompt: "how does authentication work", + }); + + // The relevant summary should be kept + const contents = result.messages.map((m) => extractMessageText(m.content)).join("\n"); + expect(contents).toContain("authentication"); + + // Verify output is still in chronological order (summary before fresh messages) + const summaryIdx = result.messages.findIndex((m) => + extractMessageText(m.content).includes("authentication"), + ); + const freshIdx = result.messages.findIndex((m) => + extractMessageText(m.content).includes("Fresh message"), + ); + expect(summaryIdx).toBeLessThan(freshIdx); + }); +});