From 9e1af349c2c0e706369b5b3b161d7218b27ec137 Mon Sep 17 00:00:00 2001 From: 123 Date: Thu, 19 Mar 2026 12:26:34 +0800 Subject: [PATCH] fix: skip media-only messages from summarization pipeline Messages containing only media attachments (no meaningful text) produce near-empty source text that wastes summarizer API calls every compaction cycle. Filter out messages with fewer than 50 characters of actual text content after stripping MEDIA:/ file path references. Closes #124 Co-Authored-By: Claude Opus 4.6 --- .changeset/skip-media-only-messages.md | 5 +++++ src/compaction.ts | 30 +++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 .changeset/skip-media-only-messages.md diff --git a/.changeset/skip-media-only-messages.md b/.changeset/skip-media-only-messages.md new file mode 100644 index 00000000..f4249201 --- /dev/null +++ b/.changeset/skip-media-only-messages.md @@ -0,0 +1,5 @@ +--- +"@martian-engineering/lossless-claw": patch +--- + +Skip media-only messages from the summarization pipeline. Messages whose text content (after stripping `MEDIA:/` file path references) is below 50 characters are excluded from summarizer input, avoiding wasted API calls on content that cannot be meaningfully compressed. diff --git a/src/compaction.ts b/src/compaction.ts index a78db8b5..0e9c0e3c 100644 --- a/src/compaction.ts +++ b/src/compaction.ts @@ -144,6 +144,20 @@ const FALLBACK_MAX_CHARS = 512 * 4; const DEFAULT_LEAF_CHUNK_TOKENS = 20_000; const CONDENSED_MIN_INPUT_RATIO = 0.1; +/** + * Minimum text length (after stripping file/media references) for a message + * to be worth sending to the summarizer. Messages below this threshold are + * typically media-only (an image attachment with no accompanying text). + */ +const MEDIA_ONLY_MIN_TEXT_LENGTH = 50; + +const MEDIA_PATH_RE = /MEDIA:\/\S+/g; + +function isMediaOnlyContent(content: string): boolean { + const stripped = content.replace(MEDIA_PATH_RE, "").trim(); + return stripped.length < MEDIA_ONLY_MIN_TEXT_LENGTH; +} + function dedupeOrderedIds(ids: Iterable): string[] { const seen = new Set(); const ordered: string[] = []; @@ -1063,7 +1077,21 @@ export class CompactionEngine { } } - const concatenated = messageContents + // Skip media-only messages that cannot be meaningfully summarized. + const summarizable = messageContents.filter( + (message) => !isMediaOnlyContent(message.content), + ); + + // If every message in this chunk is media-only, skip the entire leaf pass + // rather than sending an empty string to the summarizer. + if (summarizable.length === 0) { + console.warn( + `[lcm] skipping leaf chunk: all ${messageContents.length} messages are media-only; conversationId=${conversationId}`, + ); + return null; + } + + const concatenated = summarizable .map((message) => `[${formatTimestamp(message.createdAt, this.config.timezone)}]\n${message.content}`) .join("\n\n"); const fileIds = dedupeOrderedIds(