Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/skip-media-only-messages.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@martian-engineering/lossless-claw": patch
---

Skip media-only messages from the summarization pipeline. Messages whose text content (after stripping `MEDIA:/` file path references) is below 50 characters are excluded from summarizer input, avoiding wasted API calls on content that cannot be meaningfully compressed.
30 changes: 29 additions & 1 deletion src/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@ const FALLBACK_MAX_CHARS = 512 * 4;
const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
const CONDENSED_MIN_INPUT_RATIO = 0.1;

/**
* Minimum text length (after stripping file/media references) for a message
* to be worth sending to the summarizer. Messages below this threshold are
* typically media-only (an image attachment with no accompanying text).
*/
const MEDIA_ONLY_MIN_TEXT_LENGTH = 50;

const MEDIA_PATH_RE = /MEDIA:\/\S+/g;

function isMediaOnlyContent(content: string): boolean {
const stripped = content.replace(MEDIA_PATH_RE, "").trim();
return stripped.length < MEDIA_ONLY_MIN_TEXT_LENGTH;
}

function dedupeOrderedIds(ids: Iterable<string>): string[] {
const seen = new Set<string>();
const ordered: string[] = [];
Expand Down Expand Up @@ -1063,7 +1077,21 @@ export class CompactionEngine {
}
}

const concatenated = messageContents
// Skip media-only messages that cannot be meaningfully summarized.
const summarizable = messageContents.filter(
(message) => !isMediaOnlyContent(message.content),
);

// If every message in this chunk is media-only, skip the entire leaf pass
// rather than sending an empty string to the summarizer.
if (summarizable.length === 0) {
console.warn(
`[lcm] skipping leaf chunk: all ${messageContents.length} messages are media-only; conversationId=${conversationId}`,
);
return null;
}

const concatenated = summarizable
.map((message) => `[${formatTimestamp(message.createdAt, this.config.timezone)}]\n${message.content}`)
.join("\n\n");
const fileIds = dedupeOrderedIds(
Expand Down
Loading