From 2016bc9e3ddbebd5164a673b14aaefe8b0950042 Mon Sep 17 00:00:00 2001 From: xiaominghao Date: Mon, 22 Jun 2026 16:36:12 +0800 Subject: [PATCH 1/2] fix(reflect): unwrap JSON answer envelopes --- .../hindsight_api/engine/reflect/agent.py | 90 ++++++++++++++++++- .../hindsight_api/engine/reflect/prompts.py | 1 + .../tests/test_reflect_agent.py | 51 ++++++++++- .../tests/test_reflect_prompt_builder.py | 1 + 4 files changed, 138 insertions(+), 5 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py index 04ea2427b..77cae9846 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py @@ -90,12 +90,86 @@ def _is_done_tool(name: str) -> bool: r'\s*```(?:json)?\s*\{[^}]*(?:"(?:observation_ids|memory_ids|mental_model_ids)"|\})\s*```\s*$', re.DOTALL | re.IGNORECASE, ) -_LEAKED_JSON_OBJECT = re.compile( - r'\s*\{[^{]*"(?:observation_ids|memory_ids|mental_model_ids|answer)"[^}]*\}\s*$', re.DOTALL -) _TRAILING_IDS_PATTERN = re.compile( r"\s*(?:observation_ids|memory_ids|mental_model_ids)\s*[=:]\s*\[.*?\]\s*$", re.DOTALL | re.IGNORECASE ) +_JSON_CODE_FENCE_PATTERN = re.compile(r"^\s*```(?:json)?\s*(\{.*\})\s*```\s*$", re.DOTALL | re.IGNORECASE) + +_REFLECT_ENVELOPE_KEYS = frozenset( + { + "answer", + "directive_compliance", + "memory_ids", + "mental_model_ids", + "observation_ids", + "model_ids", + } +) +_REFLECT_ENVELOPE_MARKER_KEYS = _REFLECT_ENVELOPE_KEYS - {"answer"} +_LEAKED_JSON_ID_KEYS = frozenset({"memory_ids", "mental_model_ids", "observation_ids", "model_ids"}) + + +def _unwrap_reflect_answer_envelope(text: str) -> str | None: + """Return the answer from a model-invented reflect JSON envelope. + + Some models return the done-tool argument shape as user-visible text, e.g. + {"answer": "...", "memory_ids": [...]}. Only unwrap objects that look like + that reflect envelope so normal JSON answers stay intact. + """ + candidate = text.strip() + if not candidate: + return None + + fenced = _JSON_CODE_FENCE_PATTERN.match(candidate) + if fenced: + candidate = fenced.group(1).strip() + + try: + payload = json.loads(candidate) + except json.JSONDecodeError: + return None + + if not isinstance(payload, dict): + return None + answer = payload.get("answer") + if not isinstance(answer, str) or not answer.strip(): + return None + + keys = set(payload) + if not keys.intersection(_REFLECT_ENVELOPE_MARKER_KEYS): + return None + if not keys.issubset(_REFLECT_ENVELOPE_KEYS): + return None + + for key in ("memory_ids", "mental_model_ids", "observation_ids", "model_ids"): + value = payload.get(key) + if value is not None and not isinstance(value, list): + return None + + return answer.strip() + + +def _strip_trailing_id_json_object(text: str) -> str: + stripped = text.rstrip() + if not stripped.endswith("}"): + return text.strip() + + start = stripped.rfind("{") + if start < 0: + return text.strip() + + try: + payload = json.loads(stripped[start:]) + except json.JSONDecodeError: + return text.strip() + + if not isinstance(payload, dict) or not payload: + return text.strip() + keys = set(payload) + if not keys.issubset(_LEAKED_JSON_ID_KEYS): + return text.strip() + + return stripped[:start].strip() def _clean_answer_text(text: str) -> str: @@ -104,6 +178,10 @@ def _clean_answer_text(text: str) -> str: Some LLMs output the done() call as text instead of a proper tool call. This strips out patterns like: done({"answer": "...", ...}) """ + unwrapped = _unwrap_reflect_answer_envelope(text) + if unwrapped is not None: + return unwrapped + # Remove done() call pattern from the end of the text cleaned = _DONE_CALL_PATTERN.sub("", text).strip() return cleaned if cleaned else text @@ -122,13 +200,17 @@ def _clean_done_answer(text: str) -> str: if not text: return text + unwrapped = _unwrap_reflect_answer_envelope(text) + if unwrapped is not None: + return unwrapped + cleaned = text # Remove leaked JSON in code blocks at the end cleaned = _LEAKED_JSON_SUFFIX.sub("", cleaned).strip() # Remove leaked raw JSON objects at the end - cleaned = _LEAKED_JSON_OBJECT.sub("", cleaned).strip() + cleaned = _strip_trailing_id_json_object(cleaned) # Remove trailing ID patterns cleaned = _TRAILING_IDS_PATTERN.sub("", cleaned).strip() diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py index b28b6de37..d38906e9b 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py @@ -387,6 +387,7 @@ def build_system_prompt_for_tools( "- CRITICAL: Add blank lines before and after block elements (tables, code blocks, lists)", "- Format for clarity and readability with proper spacing and hierarchy", "- NEVER include memory IDs, UUIDs, or 'Memory references' in the answer text", + "- CRITICAL: Do NOT wrap the answer in a JSON object or include fields like `answer`, `directive_compliance`, `memory_ids`, `mental_model_ids`, or `observation_ids` in the answer text. The answer field must contain only the user-visible markdown response.", "- Put IDs ONLY in the memory_ids/mental_model_ids/observation_ids arrays, not in the answer", "- CRITICAL: This is a NON-CONVERSATIONAL system. NEVER ask follow-up questions, offer further assistance, or suggest next steps. Your answer must be complete and self-contained. The user cannot reply.", ] diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py index 5eb86ddb6..9bb64c4d6 100644 --- a/hindsight-api-slim/tests/test_reflect_agent.py +++ b/hindsight-api-slim/tests/test_reflect_agent.py @@ -68,6 +68,25 @@ def test_clean_text_multiline_done(self): cleaned = _clean_answer_text(text) assert cleaned == "Summary of findings." + def test_clean_text_unwraps_reflect_json_envelope(self): + """A model-invented reflect envelope should unwrap to the answer text.""" + text = """{ + "answer": "Use the inbound table for API consumers.", + "directive_compliance": "Directive 1 followed.", + "memory_ids": ["mem-1"], + "mental_model_ids": [], + "observation_ids": [] + }""" + cleaned = _clean_answer_text(text) + assert cleaned == "Use the inbound table for API consumers." + assert "directive_compliance" not in cleaned + + def test_clean_text_leaves_non_reflect_json_answer_unchanged(self): + """Plain JSON answers are valid user-visible content, not envelopes.""" + text = '{"status": "ok", "items": [1, 2]}' + cleaned = _clean_answer_text(text) + assert cleaned == text + class TestCleanDoneAnswer: """Test cleanup of answer field from done() tool call that leaks structured output.""" @@ -141,6 +160,37 @@ def test_clean_answer_multiline_with_markdown(self): assert "Point 2" in cleaned assert "mental_model_ids" not in cleaned + def test_clean_answer_unwraps_reflect_json_envelope(self): + """A done answer that is itself an envelope should keep answer content.""" + text = """{ + "answer": "Render two markdown tables: inbound and outbound.", + "directive_compliance": "All directives followed.", + "memory_ids": ["mem-1", "mem-2"], + "mental_model_ids": [], + "observation_ids": ["obs-1"] + }""" + cleaned = _clean_done_answer(text) + assert cleaned == "Render two markdown tables: inbound and outbound." + + def test_clean_answer_unwraps_fenced_reflect_json_envelope(self): + """Some models put the envelope in a JSON code fence.""" + text = """```json +{ + "answer": "The current interface is HTTP only.", + "memory_ids": [], + "mental_model_ids": [], + "observation_ids": [] +} +```""" + cleaned = _clean_done_answer(text) + assert cleaned == "The current interface is HTTP only." + + def test_clean_answer_rejects_envelope_with_unexpected_keys(self): + """Avoid rewriting user-requested JSON that happens to contain answer.""" + text = '{"answer": "yes", "payload": {"format": "json"}, "memory_ids": []}' + cleaned = _clean_done_answer(text) + assert cleaned == text + class TestToolNameNormalization: """Test tool name normalization for various LLM output formats.""" @@ -1022,7 +1072,6 @@ async def test_reflect_completes_with_tiny_context_budget(self, memory, request_ # Patch get_config where memory_engine uses it, injecting a tiny # max_context_tokens. Everything else delegates to the real config. - real_config = memory._get_raw_config() if hasattr(memory, "_get_raw_config") else None from hindsight_api.config import get_config as _real_get_config class _TinyContextProxy: diff --git a/hindsight-api-slim/tests/test_reflect_prompt_builder.py b/hindsight-api-slim/tests/test_reflect_prompt_builder.py index 55a4e4a19..93f008ff3 100644 --- a/hindsight-api-slim/tests/test_reflect_prompt_builder.py +++ b/hindsight-api-slim/tests/test_reflect_prompt_builder.py @@ -109,6 +109,7 @@ - CRITICAL: Add blank lines before and after block elements (tables, code blocks, lists) - Format for clarity and readability with proper spacing and hierarchy - NEVER include memory IDs, UUIDs, or 'Memory references' in the answer text +- CRITICAL: Do NOT wrap the answer in a JSON object or include fields like `answer`, `directive_compliance`, `memory_ids`, `mental_model_ids`, or `observation_ids` in the answer text. The answer field must contain only the user-visible markdown response. - Put IDs ONLY in the memory_ids/mental_model_ids/observation_ids arrays, not in the answer - CRITICAL: This is a NON-CONVERSATIONAL system. NEVER ask follow-up questions, offer further assistance, or suggest next steps. Your answer must be complete and self-contained. The user cannot reply.\ """ From c8cace02428e10203b1eec950f536692c1fad2c1 Mon Sep 17 00:00:00 2001 From: xiaominghao Date: Tue, 23 Jun 2026 17:45:19 +0800 Subject: [PATCH 2/2] fix(reflect): clarify leaked done argument recovery --- .../hindsight_api/engine/reflect/agent.py | 23 ++++++++++--------- .../hindsight_api/engine/reflect/prompts.py | 1 - .../tests/test_reflect_agent.py | 18 +++++++-------- .../tests/test_reflect_prompt_builder.py | 1 - 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py index 77cae9846..e4ec52ee0 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py @@ -95,7 +95,7 @@ def _is_done_tool(name: str) -> bool: ) _JSON_CODE_FENCE_PATTERN = re.compile(r"^\s*```(?:json)?\s*(\{.*\})\s*```\s*$", re.DOTALL | re.IGNORECASE) -_REFLECT_ENVELOPE_KEYS = frozenset( +_DONE_ARGUMENT_KEYS = frozenset( { "answer", "directive_compliance", @@ -105,16 +105,17 @@ def _is_done_tool(name: str) -> bool: "model_ids", } ) -_REFLECT_ENVELOPE_MARKER_KEYS = _REFLECT_ENVELOPE_KEYS - {"answer"} +_DONE_ARGUMENT_MARKER_KEYS = _DONE_ARGUMENT_KEYS - {"answer"} _LEAKED_JSON_ID_KEYS = frozenset({"memory_ids", "mental_model_ids", "observation_ids", "model_ids"}) -def _unwrap_reflect_answer_envelope(text: str) -> str | None: - """Return the answer from a model-invented reflect JSON envelope. +def _unwrap_leaked_done_arguments(text: str) -> str | None: + """Return the answer when a done tool call was rendered as JSON text. - Some models return the done-tool argument shape as user-visible text, e.g. - {"answer": "...", "memory_ids": [...]}. Only unwrap objects that look like - that reflect envelope so normal JSON answers stay intact. + Some providers leak the done tool's argument object instead of surfacing it + as a native tool call, e.g. {"answer": "...", "memory_ids": [...]}. Only + unwrap objects that match the done argument shape so normal JSON answers + stay intact. """ candidate = text.strip() if not candidate: @@ -136,9 +137,9 @@ def _unwrap_reflect_answer_envelope(text: str) -> str | None: return None keys = set(payload) - if not keys.intersection(_REFLECT_ENVELOPE_MARKER_KEYS): + if not keys.intersection(_DONE_ARGUMENT_MARKER_KEYS): return None - if not keys.issubset(_REFLECT_ENVELOPE_KEYS): + if not keys.issubset(_DONE_ARGUMENT_KEYS): return None for key in ("memory_ids", "mental_model_ids", "observation_ids", "model_ids"): @@ -178,7 +179,7 @@ def _clean_answer_text(text: str) -> str: Some LLMs output the done() call as text instead of a proper tool call. This strips out patterns like: done({"answer": "...", ...}) """ - unwrapped = _unwrap_reflect_answer_envelope(text) + unwrapped = _unwrap_leaked_done_arguments(text) if unwrapped is not None: return unwrapped @@ -200,7 +201,7 @@ def _clean_done_answer(text: str) -> str: if not text: return text - unwrapped = _unwrap_reflect_answer_envelope(text) + unwrapped = _unwrap_leaked_done_arguments(text) if unwrapped is not None: return unwrapped diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py index d38906e9b..b28b6de37 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py @@ -387,7 +387,6 @@ def build_system_prompt_for_tools( "- CRITICAL: Add blank lines before and after block elements (tables, code blocks, lists)", "- Format for clarity and readability with proper spacing and hierarchy", "- NEVER include memory IDs, UUIDs, or 'Memory references' in the answer text", - "- CRITICAL: Do NOT wrap the answer in a JSON object or include fields like `answer`, `directive_compliance`, `memory_ids`, `mental_model_ids`, or `observation_ids` in the answer text. The answer field must contain only the user-visible markdown response.", "- Put IDs ONLY in the memory_ids/mental_model_ids/observation_ids arrays, not in the answer", "- CRITICAL: This is a NON-CONVERSATIONAL system. NEVER ask follow-up questions, offer further assistance, or suggest next steps. Your answer must be complete and self-contained. The user cannot reply.", ] diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py index 9bb64c4d6..551248063 100644 --- a/hindsight-api-slim/tests/test_reflect_agent.py +++ b/hindsight-api-slim/tests/test_reflect_agent.py @@ -68,8 +68,8 @@ def test_clean_text_multiline_done(self): cleaned = _clean_answer_text(text) assert cleaned == "Summary of findings." - def test_clean_text_unwraps_reflect_json_envelope(self): - """A model-invented reflect envelope should unwrap to the answer text.""" + def test_clean_text_recovers_leaked_done_arguments(self): + """A done tool-call argument object rendered as text should keep only answer.""" text = """{ "answer": "Use the inbound table for API consumers.", "directive_compliance": "Directive 1 followed.", @@ -81,8 +81,8 @@ def test_clean_text_unwraps_reflect_json_envelope(self): assert cleaned == "Use the inbound table for API consumers." assert "directive_compliance" not in cleaned - def test_clean_text_leaves_non_reflect_json_answer_unchanged(self): - """Plain JSON answers are valid user-visible content, not envelopes.""" + def test_clean_text_leaves_non_done_json_answer_unchanged(self): + """Plain JSON answers are valid user-visible content.""" text = '{"status": "ok", "items": [1, 2]}' cleaned = _clean_answer_text(text) assert cleaned == text @@ -160,8 +160,8 @@ def test_clean_answer_multiline_with_markdown(self): assert "Point 2" in cleaned assert "mental_model_ids" not in cleaned - def test_clean_answer_unwraps_reflect_json_envelope(self): - """A done answer that is itself an envelope should keep answer content.""" + def test_clean_answer_recovers_leaked_done_arguments(self): + """A done answer that contains leaked done arguments should keep answer content.""" text = """{ "answer": "Render two markdown tables: inbound and outbound.", "directive_compliance": "All directives followed.", @@ -172,8 +172,8 @@ def test_clean_answer_unwraps_reflect_json_envelope(self): cleaned = _clean_done_answer(text) assert cleaned == "Render two markdown tables: inbound and outbound." - def test_clean_answer_unwraps_fenced_reflect_json_envelope(self): - """Some models put the envelope in a JSON code fence.""" + def test_clean_answer_recovers_fenced_leaked_done_arguments(self): + """Some providers put leaked done arguments in a JSON code fence.""" text = """```json { "answer": "The current interface is HTTP only.", @@ -185,7 +185,7 @@ def test_clean_answer_unwraps_fenced_reflect_json_envelope(self): cleaned = _clean_done_answer(text) assert cleaned == "The current interface is HTTP only." - def test_clean_answer_rejects_envelope_with_unexpected_keys(self): + def test_clean_answer_rejects_done_arguments_with_unexpected_keys(self): """Avoid rewriting user-requested JSON that happens to contain answer.""" text = '{"answer": "yes", "payload": {"format": "json"}, "memory_ids": []}' cleaned = _clean_done_answer(text) diff --git a/hindsight-api-slim/tests/test_reflect_prompt_builder.py b/hindsight-api-slim/tests/test_reflect_prompt_builder.py index 93f008ff3..55a4e4a19 100644 --- a/hindsight-api-slim/tests/test_reflect_prompt_builder.py +++ b/hindsight-api-slim/tests/test_reflect_prompt_builder.py @@ -109,7 +109,6 @@ - CRITICAL: Add blank lines before and after block elements (tables, code blocks, lists) - Format for clarity and readability with proper spacing and hierarchy - NEVER include memory IDs, UUIDs, or 'Memory references' in the answer text -- CRITICAL: Do NOT wrap the answer in a JSON object or include fields like `answer`, `directive_compliance`, `memory_ids`, `mental_model_ids`, or `observation_ids` in the answer text. The answer field must contain only the user-visible markdown response. - Put IDs ONLY in the memory_ids/mental_model_ids/observation_ids arrays, not in the answer - CRITICAL: This is a NON-CONVERSATIONAL system. NEVER ask follow-up questions, offer further assistance, or suggest next steps. Your answer must be complete and self-contained. The user cannot reply.\ """