diff --git a/src/logsqueak/services/llm_helpers.py b/src/logsqueak/services/llm_helpers.py
index ae82c88..adf62ce 100644
--- a/src/logsqueak/services/llm_helpers.py
+++ b/src/logsqueak/services/llm_helpers.py
@@ -2,6 +2,7 @@
from collections import defaultdict
from logseq_outline.parser import LogseqOutline
+from logsqueak.services.page_indexer import _clean_context_for_llm
def format_chunks_for_llm(
@@ -93,7 +94,16 @@ def format_chunks_for_llm(
for block_id, context in chunks_by_page[page_name]:
short_id = id_mapper.to_short(block_id)
xml_parts.append(f'')
- xml_parts.append(context)
+
+ # Strip redundant frontmatter from page-level chunks
+ # Page-level chunks have format: "Page: X\nTitle: Y\n"
+ # Frontmatter is already shown in , so strip it
+ if block_id.endswith("::__PAGE__") and outline and outline.frontmatter:
+ cleaned_context = _clean_context_for_llm(context, outline.frontmatter)
+ xml_parts.append(cleaned_context)
+ else:
+ xml_parts.append(context)
+
xml_parts.append("")
xml_parts.append("")
diff --git a/src/logsqueak/services/llm_wrappers.py b/src/logsqueak/services/llm_wrappers.py
index 8bbb4b1..2315132 100644
--- a/src/logsqueak/services/llm_wrappers.py
+++ b/src/logsqueak/services/llm_wrappers.py
@@ -651,6 +651,19 @@ async def plan_integration_for_block(
)
continue # Skip this chunk
+ # Normalize page-level chunks (__PAGE__) to add_section
+ # Page-level chunks exist only in RAG index for semantic search,
+ # not as real blocks in the file structure
+ if target_hybrid_id and target_hybrid_id.endswith("::__PAGE__"):
+ logger.debug(
+ "llm_page_level_chunk_normalized",
+ original_action=chunk.action,
+ target_page=chunk.target_page,
+ target_id=target_hybrid_id
+ )
+ chunk.action = "add_section"
+ target_hybrid_id = None # No target_block_id for add_section
+
# Set translated target_block_id
chunk.target_block_id = target_hybrid_id
diff --git a/tests/unit/test_llm_helpers.py b/tests/unit/test_llm_helpers.py
new file mode 100644
index 0000000..44decc6
--- /dev/null
+++ b/tests/unit/test_llm_helpers.py
@@ -0,0 +1,126 @@
+"""Unit tests for LLM helper functions."""
+
+import pytest
+from logsqueak.services.llm_helpers import format_chunks_for_llm
+from logsqueak.utils.llm_id_mapper import LLMIDMapper
+from logseq_outline.parser import LogseqOutline
+
+
+def test_format_chunks_for_llm_strips_page_level_frontmatter():
+ """Test format_chunks_for_llm() strips frontmatter from page-level chunks."""
+ # Arrange
+ id_mapper = LLMIDMapper()
+ page_name = "diffused"
+ page_chunk_id = f"{page_name}::__PAGE__"
+ regular_block_id = "block123"
+
+ # Add IDs to mapper
+ id_mapper.add(page_chunk_id)
+ id_mapper.add(regular_block_id)
+
+ # Page-level chunk with frontmatter in context
+ page_chunk_context = "Page: diffused\nTitle: Diffused System\ntags:: system, kubernetes"
+ # Regular block chunk (already cleaned during indexing)
+ regular_block_context = "- Deployment architecture\n - Uses Kubernetes"
+
+ chunks = [
+ (page_name, page_chunk_id, page_chunk_context),
+ (page_name, regular_block_id, regular_block_context),
+ ]
+
+ page_contents = {
+ page_name: LogseqOutline.parse(
+ "tags:: system, kubernetes\n\n"
+ "- Deployment architecture\n"
+ " id:: block123\n"
+ " - Uses Kubernetes"
+ )
+ }
+
+ # Act
+ xml = format_chunks_for_llm(chunks, page_contents, id_mapper)
+
+ # Assert - frontmatter should appear only in , not in page-level block
+ assert "" in xml
+ assert "tags:: system, kubernetes" in xml.split("")[1].split("")[0]
+
+ # Page-level block should have "Page:" and "Title:" but NOT frontmatter
+ page_block_section = xml.split(f'')[1].split("")[0]
+ assert "Page: diffused" in page_block_section
+ assert "Title: Diffused System" in page_block_section
+ assert "tags::" not in page_block_section # Frontmatter stripped from block
+
+ # Regular block should be unchanged
+ regular_block_section = xml.split(f'')[1].split("")[0]
+ assert "Deployment architecture" in regular_block_section
+
+
+def test_format_chunks_for_llm_handles_regular_blocks_normally():
+ """Test format_chunks_for_llm() doesn't strip frontmatter from regular blocks."""
+ # Arrange
+ id_mapper = LLMIDMapper()
+ page_name = "Python"
+ block_id = "block456"
+
+ id_mapper.add(block_id)
+
+ # Regular block (not a page-level chunk)
+ # These are already cleaned during indexing, but test that we don't break them
+ chunks = [
+ (page_name, block_id, "- Type hints are essential\n - Improve code quality"),
+ ]
+
+ page_contents = {
+ page_name: LogseqOutline.parse(
+ "type:: language\n\n"
+ "- Type hints are essential\n"
+ " id:: block456\n"
+ " - Improve code quality"
+ )
+ }
+
+ # Act
+ xml = format_chunks_for_llm(chunks, page_contents, id_mapper)
+
+ # Assert - regular block content is preserved as-is
+ assert "Type hints are essential" in xml
+ assert "Improve code quality" in xml
+
+
+def test_format_chunks_for_llm_handles_multiple_pages():
+ """Test format_chunks_for_llm() handles multiple pages with page-level chunks."""
+ # Arrange
+ id_mapper = LLMIDMapper()
+
+ page1_id = "Page1::__PAGE__"
+ page2_id = "Page2::__PAGE__"
+
+ id_mapper.add(page1_id)
+ id_mapper.add(page2_id)
+
+ chunks = [
+ ("Page1", page1_id, "Page: Page1\ntags:: web"),
+ ("Page2", page2_id, "Page: Page2\ntype:: language"),
+ ]
+
+ page_contents = {
+ "Page1": LogseqOutline.parse("tags:: web\n\n"),
+ "Page2": LogseqOutline.parse("type:: language\n\n"),
+ }
+
+ # Act
+ xml = format_chunks_for_llm(chunks, page_contents, id_mapper)
+
+ # Assert - each page should have properties section and cleaned blocks
+ assert xml.count("") == 2
+
+ # Page1 block should not have frontmatter
+ page1_section = xml.split('')[1].split("")[0]
+ page1_block = page1_section.split('')[1].split("")[0]
+ assert "Page: Page1" in page1_block
+ assert "tags::" not in page1_block # Stripped from block
+
+ # But properties should have frontmatter
+ page1_properties = page1_section.split("")[1].split("")[0]
+ assert "tags:: web" in page1_properties
diff --git a/tests/unit/test_llm_wrappers.py b/tests/unit/test_llm_wrappers.py
index 3cd0dca..04461ce 100644
--- a/tests/unit/test_llm_wrappers.py
+++ b/tests/unit/test_llm_wrappers.py
@@ -560,3 +560,106 @@ async def mock_stream(*args, **kwargs):
# Assert - should handle gracefully (no crashes)
assert len(results) == 0
+
+
+@pytest.mark.asyncio
+async def test_plan_integration_for_block_normalizes_page_level_chunks():
+ """Test plan_integration_for_block() normalizes __PAGE__ targets to add_section."""
+ # Arrange
+ mock_client = Mock(spec=LLMClient)
+
+ edited_content = EditedContent(
+ block_id="abc123",
+ original_content="[[diffused]] supports [[ACS]]",
+ hierarchical_context="- [[diffused]] supports [[ACS]]",
+ current_content="[[diffused]] can utilize [[ACS]]"
+ )
+
+ # Include page-level chunk (ends with ::__PAGE__)
+ candidate_chunks = [
+ ("diffused", "diffused::__PAGE__", "Page: diffused\nTitle: diffused\ntags:: system, kubernetes"),
+ ("diffused", "block1", "- Deployment architecture\n - Uses Kubernetes")
+ ]
+
+ page_contents = {
+ "diffused": LogseqOutline.parse(
+ "tags:: system, kubernetes\n\n"
+ "- Deployment architecture\n"
+ " id:: block1\n"
+ " - Uses Kubernetes"
+ )
+ }
+
+ # Mock LLM suggesting add_under action for page-level chunk
+ # NOTE: "diffused::__PAGE__" will be mapped to short ID (e.g., "1")
+ async def mock_stream(*args, **kwargs):
+ # LLM suggests adding under page-level chunk
+ yield IntegrationDecisionChunk(
+ target_page="diffused",
+ action="add_under", # LLM can suggest any action
+ target_block_id="1", # Short ID for "diffused::__PAGE__"
+ target_block_title="diffused",
+ confidence=0.95,
+ reasoning="The knowledge is about diffused system"
+ )
+
+ mock_client.stream_ndjson = mock_stream
+
+ # Act
+ results = []
+ async for chunk in plan_integration_for_block(mock_client, edited_content, candidate_chunks, page_contents):
+ results.append(chunk)
+
+ # Assert - __PAGE__ target should be normalized to add_section
+ assert len(results) == 1
+ assert results[0].knowledge_block_id == "abc123"
+ assert results[0].target_page == "diffused"
+ assert results[0].action == "add_section" # Normalized from add_under
+ assert results[0].target_block_id is None # Cleared (add_section has no target)
+ assert results[0].confidence == 0.95
+
+
+@pytest.mark.asyncio
+async def test_plan_integration_for_block_preserves_regular_block_targets():
+ """Test plan_integration_for_block() preserves regular block targets (not __PAGE__)."""
+ # Arrange
+ mock_client = Mock(spec=LLMClient)
+
+ edited_content = EditedContent(
+ block_id="abc123",
+ original_content="Kubernetes scaling info",
+ hierarchical_context="- Kubernetes scaling info",
+ current_content="Kubernetes enables horizontal scaling"
+ )
+
+ # Regular block chunk (NOT a page-level chunk)
+ candidate_chunks = [
+ ("diffused", "block1", "- Deployment architecture\n - Uses Kubernetes")
+ ]
+
+ page_contents = {
+ "diffused": LogseqOutline.parse("- Deployment architecture\n id:: block1")
+ }
+
+ # Mock LLM suggesting add_under action for regular block
+ async def mock_stream(*args, **kwargs):
+ yield IntegrationDecisionChunk(
+ target_page="diffused",
+ action="add_under",
+ target_block_id="1", # Short ID for "block1"
+ target_block_title="Deployment architecture",
+ confidence=0.90,
+ reasoning="Related to deployment"
+ )
+
+ mock_client.stream_ndjson = mock_stream
+
+ # Act
+ results = []
+ async for chunk in plan_integration_for_block(mock_client, edited_content, candidate_chunks, page_contents):
+ results.append(chunk)
+
+ # Assert - regular block should NOT be normalized
+ assert len(results) == 1
+ assert results[0].action == "add_under" # Preserved
+ assert results[0].target_block_id == "block1" # Translated but not cleared