dynamiq-ai · vitalii-dynamiq · May 10, 2026 · May 6, 2026 · May 7, 2026 · May 10, 2026
diff --git a/arcllm/__init__.py b/arcllm/__init__.py
@@ -69,7 +69,7 @@
 
 from __future__ import annotations
 
-__version__ = "0.4.8"
+__version__ = "0.4.9"
 __all__ = [
     "APIConnectionError",
     "APIError",
@@ -102,6 +102,7 @@
     "ServiceUnavailableError",
     "StreamChunk",
     "StreamingResponse",
+    "ThinkingBlock",
     "Timeout",
     "TimeoutError",
     "ToolCall",
@@ -224,6 +225,7 @@
     RerankResult,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )

diff --git a/arcllm/core.py b/arcllm/core.py
@@ -30,6 +30,7 @@
     ModelResponse,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
@@ -548,6 +549,10 @@ def stream_chunk_builder(
     # Use specialized structure for better performance
     choice_roles: dict[int, str | None] = {}
     choice_content: dict[int, list[str]] = {}
+    choice_reasoning: dict[int, list[str]] = {}
+    # Anthropic-style: per-choice ordered list of (thinking_text, signature)
+    # blocks rebuilt from the stream so callers can replay them as input.
+    choice_thinking_blocks: dict[int, list[list[str]]] = {}
     choice_tool_calls: dict[
         int, dict[int, list[Any]]
     ] = {}  # idx -> tc_idx -> [id, type, name_parts, arg_parts]
@@ -571,6 +576,8 @@ def stream_chunk_builder(
             if idx not in choice_content:
                 choice_roles[idx] = None
                 choice_content[idx] = []
+                choice_reasoning[idx] = []
+                choice_thinking_blocks[idx] = []
                 choice_tool_calls[idx] = {}
                 choice_finish[idx] = None
                 choice_logprobs[idx] = None
@@ -584,6 +591,26 @@ def stream_chunk_builder(
             if delta_content:
                 choice_content[idx].append(delta_content)
 
+            # Reasoning (DeepSeek/GLM/o-series style — flat string deltas).
+            delta_reasoning = delta.reasoning_content
+            if delta_reasoning:
+                choice_reasoning[idx].append(delta_reasoning)
+
+            # Anthropic-style thinking deltas — group by current open block.
+            # A new block starts whenever a thinking delta arrives after a
+            # signature delta (or first thinking delta of the stream).
+            delta_thinking = delta.thinking
+            delta_signature = delta.signature
+            if delta_thinking is not None or delta_signature is not None:
+                blocks = choice_thinking_blocks[idx]
+                if not blocks or (blocks and blocks[-1][1]):
+                    # Last block is closed (has signature) — start a new one.
+                    blocks.append(["", ""])
+                if delta_thinking:
+                    blocks[-1][0] += delta_thinking
+                if delta_signature:
+                    blocks[-1][1] = delta_signature
+
             choice_finish_reason = choice.finish_reason
             if choice_finish_reason:
                 choice_finish[idx] = choice_finish_reason
@@ -645,10 +672,34 @@ def stream_chunk_builder(
         content_parts = choice_content[idx]
         content = "".join(content_parts) if content_parts else None
 
+        reasoning_parts = choice_reasoning[idx]
+        reasoning_content = "".join(reasoning_parts) if reasoning_parts else None
+
+        thinking_blocks_assembled: list[ThinkingBlock] | None = None
+        if choice_thinking_blocks[idx]:
+            thinking_blocks_assembled = [
+                ThinkingBlock(
+                    type="thinking",
+                    thinking=text,
+                    signature=sig or None,
+                )
+                for text, sig in choice_thinking_blocks[idx]
+                if text or sig
+            ] or None
+            # Fallback to populate the flat surface when only thinking blocks
+            # arrived (Anthropic) — concatenate their text so callers reading
+            # ``reasoning_content`` see the same string regardless of provider.
+            if reasoning_content is None and thinking_blocks_assembled is not None:
+                reasoning_content = (
+                    "".join(b.thinking or "" for b in thinking_blocks_assembled) or None
+                )
+
         message = Message(
             role=choice_roles[idx] or "assistant",
             content=content,
             tool_calls=tool_calls or None,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks_assembled,
         )
 
         choices.append(

diff --git a/arcllm/providers/anthropic_adapter.py b/arcllm/providers/anthropic_adapter.py
@@ -97,6 +97,7 @@
     Message,
     ModelResponse,
     StreamChunk,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
@@ -450,9 +451,11 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
         now = int(time.time())
         content_blocks = resp.get("content", [])
 
-        # Extract text content and tool uses
+        # Extract text content, tool uses, and thinking blocks
         text_parts: list[str] = []
         tool_calls: list[ToolCall] = []
+        thinking_blocks: list[ThinkingBlock] = []
+        thinking_text_parts: list[str] = []
         # Citations are sourced from two places in Anthropic responses:
         #   - ``web_search_tool_result`` blocks: aggregate result list with
         #     ``url`` / ``title`` / ``snippet`` per source.
@@ -485,11 +488,29 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     end_index=ann_dict.get("end_index") or ann_dict.get("end_char_index"),
                 )
 
-        # Second pass: tool uses + web_search_tool_result fallback (only fills
-        # URLs that the text-block annotations didn't already cover).
+        # Second pass: tool uses, thinking blocks, and web_search_tool_result
+        # fallback (only fills URLs the text-block annotations didn't cover).
         for block in content_blocks:
             kind = block.get("type")
-            if kind == "tool_use":
+            if kind == "thinking":
+                thinking_text = block.get("thinking", "")
+                thinking_blocks.append(
+                    ThinkingBlock(
+                        type="thinking",
+                        thinking=thinking_text,
+                        signature=block.get("signature"),
+                    )
+                )
+                if thinking_text:
+                    thinking_text_parts.append(thinking_text)
+            elif kind == "redacted_thinking":
+                thinking_blocks.append(
+                    ThinkingBlock(
+                        type="redacted_thinking",
+                        data=block.get("data"),
+                    )
+                )
+            elif kind == "tool_use":
                 tool_calls.append(
                     ToolCall(
                         id=block.get("id", ""),
@@ -518,12 +539,15 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
         # Join text parts efficiently
         text_content = "".join(text_parts) if text_parts else None
         citations = list(citation_index.values()) if citation_index else None
+        reasoning_content = "".join(thinking_text_parts) if thinking_text_parts else None
 
         message = Message(
             role=resp.get("role", "assistant"),
             content=text_content,
             tool_calls=tool_calls or None,
             citations=citations,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks or None,
         )
 
         # Map Anthropic stop reasons to OpenAI format
@@ -617,6 +641,21 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                         )
                     ],
                 )
+            if block.get("type") == "thinking":
+                # Anthropic emits an empty thinking block first, then a
+                # series of thinking_delta events with the text, then a
+                # signature_delta with the cryptographic signature.
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(thinking=block.get("thinking", "")),
+                            finish_reason=None,
+                        )
+                    ],
+                )
             if block.get("type") == "tool_use":
                 # Start of tool use
                 return StreamChunk(
@@ -645,7 +684,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
 
         elif event_type == "content_block_delta":
             delta = event.get("delta", {})
-            if delta.get("type") == "text_delta":
+            delta_type = delta.get("type")
+            if delta_type == "text_delta":
                 return StreamChunk(
                     id="",
                     model=model,
@@ -657,6 +697,38 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                         )
                     ],
                 )
+            if delta_type == "thinking_delta":
+                # Surface as both ``thinking`` (matches Anthropic wire shape
+                # for round-trip) and ``reasoning_content`` (so callers using
+                # the unified surface can stream thinking text without a
+                # provider-specific code path).
+                thinking_text = delta.get("thinking", "")
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(
+                                thinking=thinking_text,
+                                reasoning_content=thinking_text,
+                            ),
+                            finish_reason=None,
+                        )
+                    ],
+                )
+            if delta_type == "signature_delta":
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(signature=delta.get("signature", "")),
+                            finish_reason=None,
+                        )
+                    ],
+                )
             if delta.get("type") == "input_json_delta":
                 # Tool argument delta
                 return StreamChunk(

diff --git a/arcllm/providers/gemini_adapter.py b/arcllm/providers/gemini_adapter.py
@@ -420,11 +420,19 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
 
             # Use list + join for efficient string building
             text_parts: list[str] = []
+            thought_parts: list[str] = []
             tool_calls: list[ToolCall] = []
 
             for part in parts:
                 if "text" in part:
-                    text_parts.append(part["text"])
+                    # Gemini marks chain-of-thought parts with ``thought: true``
+                    # when the request set ``thinkingConfig.includeThoughts``.
+                    # We split those out into ``reasoning_content`` so callers
+                    # don't have to filter them out of the answer text.
+                    if part.get("thought"):
+                        thought_parts.append(part["text"])
+                    else:
+                        text_parts.append(part["text"])
                 elif "functionCall" in part:
                     fc = part["functionCall"]
                     tool_calls.append(
@@ -439,12 +447,14 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     )
 
             text_content = "".join(text_parts) if text_parts else None
+            reasoning_content = "".join(thought_parts) if thought_parts else None
             citations = _extract_grounding_citations(candidate)
             message = Message(
                 role="assistant",
                 content=text_content,
                 tool_calls=tool_calls or None,
                 citations=citations,
+                reasoning_content=reasoning_content,
             )
 
             # Map finish reason
@@ -507,11 +517,15 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
 
             # Use list + join for efficient string building
             text_parts: list[str] = []
+            thought_parts: list[str] = []
             tool_call_deltas: list[dict[str, Any]] = []
 
             for part in parts:
                 if "text" in part:
-                    text_parts.append(part["text"])
+                    if part.get("thought"):
+                        thought_parts.append(part["text"])
+                    else:
+                        text_parts.append(part["text"])
                 elif "functionCall" in part:
                     fc = part["functionCall"]
                     tool_call_deltas.append(
@@ -527,9 +541,11 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                     )
 
             text_content = "".join(text_parts) if text_parts else None
+            reasoning_content = "".join(thought_parts) if thought_parts else None
             delta = ChunkDelta(
                 content=text_content,
                 tool_calls=tool_call_deltas or None,
+                reasoning_content=reasoning_content,
             )
 
             finish_reason = None

diff --git a/arcllm/providers/openai_adapter.py b/arcllm/providers/openai_adapter.py
@@ -231,12 +231,23 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     arguments=fc.get("arguments", ""),
                 )
 
+            # ``reasoning_content`` is the de-facto field name used by
+            # DeepSeek-R1, GLM-4.5+, Groq's DeepSeek/Qwen-thinking models,
+            # Cerebras, Together, Fireworks, and any OpenAI-compat host
+            # serving a reasoning model. ``reasoning`` is the alias
+            # OpenAI ships on the chat-completions endpoint for o-series
+            # responses; we accept either and normalise to one field.
+            reasoning_content = message_data.get("reasoning_content") or message_data.get(
+                "reasoning"
+            )
+
             message = Message(
                 role=message_data.get("role", "assistant"),
                 content=message_data.get("content"),
                 tool_calls=tool_calls,
                 function_call=function_call,
                 refusal=message_data.get("refusal"),
+                reasoning_content=reasoning_content,
             )
 
             choices.append(
@@ -303,6 +314,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                 content=delta_data.get("content"),
                 tool_calls=tool_calls,
                 function_call=delta_data.get("function_call"),
+                reasoning_content=delta_data.get("reasoning_content")
+                or delta_data.get("reasoning"),
             )
 
             choices.append(