diff --git a/arcllm/__init__.py b/arcllm/__init__.py index da23918..7596cb7 100644 --- a/arcllm/__init__.py +++ b/arcllm/__init__.py @@ -69,7 +69,7 @@ from __future__ import annotations -__version__ = "0.4.8" +__version__ = "0.4.9" __all__ = [ "APIConnectionError", "APIError", @@ -102,6 +102,7 @@ "ServiceUnavailableError", "StreamChunk", "StreamingResponse", + "ThinkingBlock", "Timeout", "TimeoutError", "ToolCall", @@ -224,6 +225,7 @@ RerankResult, StreamChunk, StreamingResponse, + ThinkingBlock, ToolCall, Usage, ) diff --git a/arcllm/core.py b/arcllm/core.py index a8f1ce8..dc02538 100644 --- a/arcllm/core.py +++ b/arcllm/core.py @@ -30,6 +30,7 @@ ModelResponse, StreamChunk, StreamingResponse, + ThinkingBlock, ToolCall, Usage, ) @@ -548,6 +549,10 @@ def stream_chunk_builder( # Use specialized structure for better performance choice_roles: dict[int, str | None] = {} choice_content: dict[int, list[str]] = {} + choice_reasoning: dict[int, list[str]] = {} + # Anthropic-style: per-choice ordered list of (thinking_text, signature) + # blocks rebuilt from the stream so callers can replay them as input. + choice_thinking_blocks: dict[int, list[list[str]]] = {} choice_tool_calls: dict[ int, dict[int, list[Any]] ] = {} # idx -> tc_idx -> [id, type, name_parts, arg_parts] @@ -571,6 +576,8 @@ def stream_chunk_builder( if idx not in choice_content: choice_roles[idx] = None choice_content[idx] = [] + choice_reasoning[idx] = [] + choice_thinking_blocks[idx] = [] choice_tool_calls[idx] = {} choice_finish[idx] = None choice_logprobs[idx] = None @@ -584,6 +591,26 @@ def stream_chunk_builder( if delta_content: choice_content[idx].append(delta_content) + # Reasoning (DeepSeek/GLM/o-series style — flat string deltas). + delta_reasoning = delta.reasoning_content + if delta_reasoning: + choice_reasoning[idx].append(delta_reasoning) + + # Anthropic-style thinking deltas — group by current open block. + # A new block starts whenever a thinking delta arrives after a + # signature delta (or first thinking delta of the stream). + delta_thinking = delta.thinking + delta_signature = delta.signature + if delta_thinking is not None or delta_signature is not None: + blocks = choice_thinking_blocks[idx] + if not blocks or (blocks and blocks[-1][1]): + # Last block is closed (has signature) — start a new one. + blocks.append(["", ""]) + if delta_thinking: + blocks[-1][0] += delta_thinking + if delta_signature: + blocks[-1][1] = delta_signature + choice_finish_reason = choice.finish_reason if choice_finish_reason: choice_finish[idx] = choice_finish_reason @@ -645,10 +672,34 @@ def stream_chunk_builder( content_parts = choice_content[idx] content = "".join(content_parts) if content_parts else None + reasoning_parts = choice_reasoning[idx] + reasoning_content = "".join(reasoning_parts) if reasoning_parts else None + + thinking_blocks_assembled: list[ThinkingBlock] | None = None + if choice_thinking_blocks[idx]: + thinking_blocks_assembled = [ + ThinkingBlock( + type="thinking", + thinking=text, + signature=sig or None, + ) + for text, sig in choice_thinking_blocks[idx] + if text or sig + ] or None + # Fallback to populate the flat surface when only thinking blocks + # arrived (Anthropic) — concatenate their text so callers reading + # ``reasoning_content`` see the same string regardless of provider. + if reasoning_content is None and thinking_blocks_assembled is not None: + reasoning_content = ( + "".join(b.thinking or "" for b in thinking_blocks_assembled) or None + ) + message = Message( role=choice_roles[idx] or "assistant", content=content, tool_calls=tool_calls or None, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks_assembled, ) choices.append( diff --git a/arcllm/providers/anthropic_adapter.py b/arcllm/providers/anthropic_adapter.py index 9db63e8..3a20591 100644 --- a/arcllm/providers/anthropic_adapter.py +++ b/arcllm/providers/anthropic_adapter.py @@ -97,6 +97,7 @@ Message, ModelResponse, StreamChunk, + ThinkingBlock, ToolCall, Usage, ) @@ -450,9 +451,11 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon now = int(time.time()) content_blocks = resp.get("content", []) - # Extract text content and tool uses + # Extract text content, tool uses, and thinking blocks text_parts: list[str] = [] tool_calls: list[ToolCall] = [] + thinking_blocks: list[ThinkingBlock] = [] + thinking_text_parts: list[str] = [] # Citations are sourced from two places in Anthropic responses: # - ``web_search_tool_result`` blocks: aggregate result list with # ``url`` / ``title`` / ``snippet`` per source. @@ -485,11 +488,29 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon end_index=ann_dict.get("end_index") or ann_dict.get("end_char_index"), ) - # Second pass: tool uses + web_search_tool_result fallback (only fills - # URLs that the text-block annotations didn't already cover). + # Second pass: tool uses, thinking blocks, and web_search_tool_result + # fallback (only fills URLs the text-block annotations didn't cover). for block in content_blocks: kind = block.get("type") - if kind == "tool_use": + if kind == "thinking": + thinking_text = block.get("thinking", "") + thinking_blocks.append( + ThinkingBlock( + type="thinking", + thinking=thinking_text, + signature=block.get("signature"), + ) + ) + if thinking_text: + thinking_text_parts.append(thinking_text) + elif kind == "redacted_thinking": + thinking_blocks.append( + ThinkingBlock( + type="redacted_thinking", + data=block.get("data"), + ) + ) + elif kind == "tool_use": tool_calls.append( ToolCall( id=block.get("id", ""), @@ -518,12 +539,15 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon # Join text parts efficiently text_content = "".join(text_parts) if text_parts else None citations = list(citation_index.values()) if citation_index else None + reasoning_content = "".join(thinking_text_parts) if thinking_text_parts else None message = Message( role=resp.get("role", "assistant"), content=text_content, tool_calls=tool_calls or None, citations=citations, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks or None, ) # Map Anthropic stop reasons to OpenAI format @@ -617,6 +641,21 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: ) ], ) + if block.get("type") == "thinking": + # Anthropic emits an empty thinking block first, then a + # series of thinking_delta events with the text, then a + # signature_delta with the cryptographic signature. + return StreamChunk( + id="", + model=model, + choices=[ + ChunkChoice( + index=0, + delta=ChunkDelta(thinking=block.get("thinking", "")), + finish_reason=None, + ) + ], + ) if block.get("type") == "tool_use": # Start of tool use return StreamChunk( @@ -645,7 +684,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: elif event_type == "content_block_delta": delta = event.get("delta", {}) - if delta.get("type") == "text_delta": + delta_type = delta.get("type") + if delta_type == "text_delta": return StreamChunk( id="", model=model, @@ -657,6 +697,38 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: ) ], ) + if delta_type == "thinking_delta": + # Surface as both ``thinking`` (matches Anthropic wire shape + # for round-trip) and ``reasoning_content`` (so callers using + # the unified surface can stream thinking text without a + # provider-specific code path). + thinking_text = delta.get("thinking", "") + return StreamChunk( + id="", + model=model, + choices=[ + ChunkChoice( + index=0, + delta=ChunkDelta( + thinking=thinking_text, + reasoning_content=thinking_text, + ), + finish_reason=None, + ) + ], + ) + if delta_type == "signature_delta": + return StreamChunk( + id="", + model=model, + choices=[ + ChunkChoice( + index=0, + delta=ChunkDelta(signature=delta.get("signature", "")), + finish_reason=None, + ) + ], + ) if delta.get("type") == "input_json_delta": # Tool argument delta return StreamChunk( diff --git a/arcllm/providers/gemini_adapter.py b/arcllm/providers/gemini_adapter.py index 111a007..785118e 100644 --- a/arcllm/providers/gemini_adapter.py +++ b/arcllm/providers/gemini_adapter.py @@ -420,11 +420,19 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon # Use list + join for efficient string building text_parts: list[str] = [] + thought_parts: list[str] = [] tool_calls: list[ToolCall] = [] for part in parts: if "text" in part: - text_parts.append(part["text"]) + # Gemini marks chain-of-thought parts with ``thought: true`` + # when the request set ``thinkingConfig.includeThoughts``. + # We split those out into ``reasoning_content`` so callers + # don't have to filter them out of the answer text. + if part.get("thought"): + thought_parts.append(part["text"]) + else: + text_parts.append(part["text"]) elif "functionCall" in part: fc = part["functionCall"] tool_calls.append( @@ -439,12 +447,14 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon ) text_content = "".join(text_parts) if text_parts else None + reasoning_content = "".join(thought_parts) if thought_parts else None citations = _extract_grounding_citations(candidate) message = Message( role="assistant", content=text_content, tool_calls=tool_calls or None, citations=citations, + reasoning_content=reasoning_content, ) # Map finish reason @@ -507,11 +517,15 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: # Use list + join for efficient string building text_parts: list[str] = [] + thought_parts: list[str] = [] tool_call_deltas: list[dict[str, Any]] = [] for part in parts: if "text" in part: - text_parts.append(part["text"]) + if part.get("thought"): + thought_parts.append(part["text"]) + else: + text_parts.append(part["text"]) elif "functionCall" in part: fc = part["functionCall"] tool_call_deltas.append( @@ -527,9 +541,11 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: ) text_content = "".join(text_parts) if text_parts else None + reasoning_content = "".join(thought_parts) if thought_parts else None delta = ChunkDelta( content=text_content, tool_calls=tool_call_deltas or None, + reasoning_content=reasoning_content, ) finish_reason = None diff --git a/arcllm/providers/openai_adapter.py b/arcllm/providers/openai_adapter.py index 823899c..3e75e18 100644 --- a/arcllm/providers/openai_adapter.py +++ b/arcllm/providers/openai_adapter.py @@ -231,12 +231,23 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon arguments=fc.get("arguments", ""), ) + # ``reasoning_content`` is the de-facto field name used by + # DeepSeek-R1, GLM-4.5+, Groq's DeepSeek/Qwen-thinking models, + # Cerebras, Together, Fireworks, and any OpenAI-compat host + # serving a reasoning model. ``reasoning`` is the alias + # OpenAI ships on the chat-completions endpoint for o-series + # responses; we accept either and normalise to one field. + reasoning_content = message_data.get("reasoning_content") or message_data.get( + "reasoning" + ) + message = Message( role=message_data.get("role", "assistant"), content=message_data.get("content"), tool_calls=tool_calls, function_call=function_call, refusal=message_data.get("refusal"), + reasoning_content=reasoning_content, ) choices.append( @@ -303,6 +314,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None: content=delta_data.get("content"), tool_calls=tool_calls, function_call=delta_data.get("function_call"), + reasoning_content=delta_data.get("reasoning_content") + or delta_data.get("reasoning"), ) choices.append( diff --git a/arcllm/types.py b/arcllm/types.py index 74f4100..1ebe061 100644 --- a/arcllm/types.py +++ b/arcllm/types.py @@ -186,6 +186,44 @@ def model_dump(self) -> dict[str, Any]: return result +# ============================================================================= +# Reasoning / Thinking Types +# ============================================================================= + + +class ThinkingBlock(_DictLike, msgspec.Struct): + """A chain-of-thought block from a reasoning-capable model. + + Anthropic's extended-thinking feature returns thinking as structured + content blocks (``type: "thinking"`` with a ``signature`` for replay, + or ``type: "redacted_thinking"`` with opaque ``data``). Preserving the + block shape — instead of flattening to a string — lets callers send + the thinking back as part of the conversation history without losing + the signature, which is required for tool-use round trips. + + For providers that emit thinking as a flat string (DeepSeek-R1, GLM, + Groq DeepSeek/Qwen-thinking, Gemini with ``includeThoughts=true``) + we populate :attr:`Message.reasoning_content` directly instead. + """ + + type: Literal["thinking", "redacted_thinking"] = "thinking" + thinking: str | None = None + signature: str | None = None + # Anthropic-specific: opaque payload for ``redacted_thinking`` blocks. + data: str | None = None + + def model_dump(self) -> dict[str, Any]: + """Return dict representation for serialization.""" + result: dict[str, Any] = {"type": self.type} + if self.thinking is not None: + result["thinking"] = self.thinking + if self.signature is not None: + result["signature"] = self.signature + if self.data is not None: + result["data"] = self.data + return result + + # ============================================================================= # Message Types # ============================================================================= @@ -204,6 +242,14 @@ class Message(_DictLike, msgspec.Struct): # responses; an empty list means "the provider was asked to ground but # returned no sources" (rare). citations: list[Citation] | None = None + # Chain-of-thought / extended-thinking output from reasoning models. + # ``reasoning_content`` is the unified flat-string surface (populated + # by DeepSeek-R1, GLM-4.5+, Gemini 2.5+ with includeThoughts, OpenAI + # o-series via chat/completions when supported, etc.). For Anthropic + # extended thinking we also populate ``thinking_blocks`` so callers + # can send the structured form back with signatures intact. + reasoning_content: str | None = None + thinking_blocks: list[ThinkingBlock] | None = None def model_dump(self) -> dict[str, Any]: """Return dict representation for serialization.""" @@ -218,6 +264,10 @@ def model_dump(self) -> dict[str, Any]: result["refusal"] = self.refusal if self.citations is not None: result["citations"] = [c.model_dump() for c in self.citations] + if self.reasoning_content is not None: + result["reasoning_content"] = self.reasoning_content + if self.thinking_blocks is not None: + result["thinking_blocks"] = [b.model_dump() for b in self.thinking_blocks] return result @@ -372,6 +422,13 @@ class ChunkDelta(_DictLike, msgspec.Struct): # for grounded providers — Perplexity, Gemini grounding, Anthropic # web-search). None on intermediate chunks. citations: list[Citation] | None = None + # Reasoning deltas. ``reasoning_content`` is the flat-string surface + # (DeepSeek-R1, GLM, Groq, etc.). ``thinking`` carries the per-chunk + # text of an Anthropic ``thinking_delta`` event; the matching + # ``signature`` lands in the trailing ``signature_delta``. + reasoning_content: str | None = None + thinking: str | None = None + signature: str | None = None def model_dump(self) -> dict[str, Any]: """Return dict representation for serialization.""" @@ -386,6 +443,12 @@ def model_dump(self) -> dict[str, Any]: result["function_call"] = self.function_call if self.citations is not None: result["citations"] = [c.model_dump() for c in self.citations] + if self.reasoning_content is not None: + result["reasoning_content"] = self.reasoning_content + if self.thinking is not None: + result["thinking"] = self.thinking + if self.signature is not None: + result["signature"] = self.signature return result diff --git a/pyproject.toml b/pyproject.toml index 5ad8112..d5ade71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "arcllm-sdk" -version = "0.4.8" +version = "0.4.9" description = "The arc connecting you to every LLM. Minimal dependencies, maximum performance." readme = "README.md" license = "Apache-2.0" diff --git a/tests/test_reasoning.py b/tests/test_reasoning.py index 23aed12..d3c6196 100644 --- a/tests/test_reasoning.py +++ b/tests/test_reasoning.py @@ -173,3 +173,301 @@ def test_no_thinking_args_leaves_config_clean(self, gemini_adapter: GeminiAdapte # exist, it must not carry thinkingConfig. if "generationConfig" in body: assert "thinkingConfig" not in body["generationConfig"] + + +# --------------------------------------------------------------------------- +# Response-side: reasoning_content / thinking_blocks parsing +# --------------------------------------------------------------------------- +# +# Reasoning models expose chain-of-thought differently per family. arcllm +# normalises everything into ``Message.reasoning_content`` (flat str). For +# Anthropic we additionally keep ``Message.thinking_blocks`` so callers can +# replay the structured form (with signatures) on the next turn. + + +class TestReasoningResponseExtraction: + """``parse_response`` populates ``reasoning_content`` + ``thinking_blocks``.""" + + def test_openai_reasoning_field_is_extracted(self, openai_adapter: OpenAIAdapter) -> None: + """OpenAI o-series chat/completions can return ``reasoning`` on message.""" + body = orjson.dumps( + { + "id": "x", + "model": "o3-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "42", + "reasoning": "Counting Hitchhiker references...", + }, + "finish_reason": "stop", + } + ], + } + ) + resp = openai_adapter.parse_response(body, model="o3-mini") + msg = resp.choices[0].message + assert msg.content == "42" + assert msg.reasoning_content == "Counting Hitchhiker references..." + + def test_deepseek_style_reasoning_content_is_extracted( + self, openai_adapter: OpenAIAdapter + ) -> None: + """DeepSeek-R1 / GLM / Groq DeepSeek / Together / Fireworks DeepSeek-R1 + all use the ``reasoning_content`` field. Test through the OpenAI base + since every OpenAI-compat host inherits this parser.""" + body = orjson.dumps( + { + "id": "x", + "model": "deepseek-reasoner", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "ok", + "reasoning_content": "Let me think... yes, ok.", + }, + "finish_reason": "stop", + } + ], + } + ) + resp = openai_adapter.parse_response(body, model="deepseek-reasoner") + msg = resp.choices[0].message + assert msg.content == "ok" + assert msg.reasoning_content == "Let me think... yes, ok." + + def test_anthropic_thinking_blocks_preserved_with_signature( + self, anthropic_adapter: AnthropicAdapter + ) -> None: + """Anthropic extended-thinking returns structured blocks. The signature + must round-trip — replaying without it breaks tool-use.""" + body = orjson.dumps( + { + "id": "msg_x", + "type": "message", + "role": "assistant", + "model": "claude-sonnet-4-7", + "stop_reason": "end_turn", + "content": [ + { + "type": "thinking", + "thinking": "User wants ok. Reply ok.", + "signature": "sig_abc123", + }, + {"type": "text", "text": "ok"}, + ], + "usage": {"input_tokens": 5, "output_tokens": 12}, + } + ) + resp = anthropic_adapter.parse_response(body, model="claude-sonnet-4-7") + msg = resp.choices[0].message + assert msg.content == "ok" + assert msg.reasoning_content == "User wants ok. Reply ok." + assert msg.thinking_blocks is not None + assert len(msg.thinking_blocks) == 1 + block = msg.thinking_blocks[0] + assert block.type == "thinking" + assert block.thinking == "User wants ok. Reply ok." + assert block.signature == "sig_abc123" + + def test_anthropic_redacted_thinking_block_preserves_opaque_data( + self, anthropic_adapter: AnthropicAdapter + ) -> None: + """``redacted_thinking`` blocks have no readable text — only an opaque + payload that must round-trip back unchanged. They surface on + ``thinking_blocks`` but contribute nothing to ``reasoning_content``.""" + body = orjson.dumps( + { + "id": "msg_x", + "role": "assistant", + "model": "claude-sonnet-4-7", + "stop_reason": "end_turn", + "content": [ + {"type": "redacted_thinking", "data": "OPAQUE_BLOB"}, + {"type": "text", "text": "ok"}, + ], + "usage": {"input_tokens": 5, "output_tokens": 1}, + } + ) + resp = anthropic_adapter.parse_response(body, model="claude-sonnet-4-7") + msg = resp.choices[0].message + assert msg.reasoning_content is None + assert msg.thinking_blocks is not None + assert msg.thinking_blocks[0].type == "redacted_thinking" + assert msg.thinking_blocks[0].data == "OPAQUE_BLOB" + + def test_gemini_thought_parts_route_to_reasoning_content( + self, gemini_adapter: GeminiAdapter + ) -> None: + """Gemini 2.5+ marks chain-of-thought parts with ``thought: true``. + + Without this split, the thought text would land in ``content`` and + the caller would have to filter it out manually.""" + body = orjson.dumps( + { + "candidates": [ + { + "content": { + "parts": [ + {"text": "User wants ok.", "thought": True}, + {"text": "ok"}, + ] + }, + "finishReason": "STOP", + } + ], + "usageMetadata": {"promptTokenCount": 5, "candidatesTokenCount": 1}, + } + ) + resp = gemini_adapter.parse_response(body, model="gemini-2.5-pro") + msg = resp.choices[0].message + assert msg.content == "ok" + assert msg.reasoning_content == "User wants ok." + + def test_non_reasoning_response_leaves_fields_none(self, openai_adapter: OpenAIAdapter) -> None: + """Regular chat responses (no reasoning fields) must not invent them.""" + body = orjson.dumps( + { + "id": "x", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "ok"}, + "finish_reason": "stop", + } + ], + } + ) + resp = openai_adapter.parse_response(body, model="gpt-4o-mini") + msg = resp.choices[0].message + assert msg.reasoning_content is None + assert msg.thinking_blocks is None + + +class TestReasoningStreamAccumulation: + """``stream_chunk_builder`` accumulates reasoning across chunks.""" + + def test_flat_reasoning_content_accumulates(self) -> None: + """DeepSeek/GLM/Groq style: reasoning_content arrives in deltas.""" + from arcllm.core import stream_chunk_builder + from arcllm.types import ChunkChoice, ChunkDelta, StreamChunk + + chunks = [ + StreamChunk( + id="x", + model="deepseek-reasoner", + choices=[ChunkChoice(index=0, delta=ChunkDelta(role="assistant"))], + ), + StreamChunk( + id="x", + model="deepseek-reasoner", + choices=[ChunkChoice(index=0, delta=ChunkDelta(reasoning_content="Let me "))], + ), + StreamChunk( + id="x", + model="deepseek-reasoner", + choices=[ChunkChoice(index=0, delta=ChunkDelta(reasoning_content="think."))], + ), + StreamChunk( + id="x", + model="deepseek-reasoner", + choices=[ + ChunkChoice(index=0, delta=ChunkDelta(content="ok"), finish_reason="stop") + ], + ), + ] + final = stream_chunk_builder(chunks) + msg = final.choices[0].message + assert msg.content == "ok" + assert msg.reasoning_content == "Let me think." + assert msg.thinking_blocks is None + + def test_anthropic_thinking_deltas_grouped_by_signature(self) -> None: + """Anthropic streaming: thinking_delta → thinking_delta → signature_delta + is one block. The next thinking_delta opens a new block.""" + from arcllm.core import stream_chunk_builder + from arcllm.types import ChunkChoice, ChunkDelta, StreamChunk + + chunks = [ + StreamChunk( + id="x", + model="claude-sonnet-4-7", + choices=[ChunkChoice(index=0, delta=ChunkDelta(role="assistant"))], + ), + StreamChunk( + id="x", + model="claude-sonnet-4-7", + choices=[ + ChunkChoice( + index=0, + delta=ChunkDelta(thinking="User wants ", reasoning_content="User wants "), + ) + ], + ), + StreamChunk( + id="x", + model="claude-sonnet-4-7", + choices=[ + ChunkChoice( + index=0, + delta=ChunkDelta(thinking="ok.", reasoning_content="ok."), + ) + ], + ), + StreamChunk( + id="x", + model="claude-sonnet-4-7", + choices=[ChunkChoice(index=0, delta=ChunkDelta(signature="sig_abc"))], + ), + StreamChunk( + id="x", + model="claude-sonnet-4-7", + choices=[ + ChunkChoice(index=0, delta=ChunkDelta(content="ok"), finish_reason="stop") + ], + ), + ] + final = stream_chunk_builder(chunks) + msg = final.choices[0].message + assert msg.content == "ok" + assert msg.reasoning_content == "User wants ok." + assert msg.thinking_blocks is not None + assert len(msg.thinking_blocks) == 1 + assert msg.thinking_blocks[0].thinking == "User wants ok." + assert msg.thinking_blocks[0].signature == "sig_abc" + + +class TestReasoningSerialization: + """``Message.model_dump`` round-trips reasoning fields.""" + + def test_dump_includes_reasoning_when_set(self) -> None: + from arcllm.types import Message, ThinkingBlock + + msg = Message( + role="assistant", + content="ok", + reasoning_content="thinking text", + thinking_blocks=[ + ThinkingBlock(type="thinking", thinking="thinking text", signature="s") + ], + ) + dumped = msg.model_dump() + assert dumped["reasoning_content"] == "thinking text" + assert dumped["thinking_blocks"] == [ + {"type": "thinking", "thinking": "thinking text", "signature": "s"} + ] + + def test_dump_omits_reasoning_when_absent(self) -> None: + """Don't emit empty reasoning fields — keeps the serialised shape lean + and matches OpenAI/litellm behaviour for non-reasoning responses.""" + from arcllm.types import Message + + msg = Message(role="assistant", content="ok") + dumped = msg.model_dump() + assert "reasoning_content" not in dumped + assert "thinking_blocks" not in dumped