diff --git a/arcllm/__init__.py b/arcllm/__init__.py
index da23918..7596cb7 100644
--- a/arcllm/__init__.py
+++ b/arcllm/__init__.py
@@ -69,7 +69,7 @@
 
 from __future__ import annotations
 
-__version__ = "0.4.8"
+__version__ = "0.4.9"
 __all__ = [
     "APIConnectionError",
     "APIError",
@@ -102,6 +102,7 @@
     "ServiceUnavailableError",
     "StreamChunk",
     "StreamingResponse",
+    "ThinkingBlock",
     "Timeout",
     "TimeoutError",
     "ToolCall",
@@ -224,6 +225,7 @@
     RerankResult,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
diff --git a/arcllm/core.py b/arcllm/core.py
index a8f1ce8..dc02538 100644
--- a/arcllm/core.py
+++ b/arcllm/core.py
@@ -30,6 +30,7 @@
     ModelResponse,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
@@ -548,6 +549,10 @@ def stream_chunk_builder(
     # Use specialized structure for better performance
     choice_roles: dict[int, str | None] = {}
     choice_content: dict[int, list[str]] = {}
+    choice_reasoning: dict[int, list[str]] = {}
+    # Anthropic-style: per-choice ordered list of (thinking_text, signature)
+    # blocks rebuilt from the stream so callers can replay them as input.
+    choice_thinking_blocks: dict[int, list[list[str]]] = {}
     choice_tool_calls: dict[
         int, dict[int, list[Any]]
     ] = {}  # idx -> tc_idx -> [id, type, name_parts, arg_parts]
@@ -571,6 +576,8 @@ def stream_chunk_builder(
             if idx not in choice_content:
                 choice_roles[idx] = None
                 choice_content[idx] = []
+                choice_reasoning[idx] = []
+                choice_thinking_blocks[idx] = []
                 choice_tool_calls[idx] = {}
                 choice_finish[idx] = None
                 choice_logprobs[idx] = None
@@ -584,6 +591,26 @@ def stream_chunk_builder(
             if delta_content:
                 choice_content[idx].append(delta_content)
 
+            # Reasoning (DeepSeek/GLM/o-series style — flat string deltas).
+            delta_reasoning = delta.reasoning_content
+            if delta_reasoning:
+                choice_reasoning[idx].append(delta_reasoning)
+
+            # Anthropic-style thinking deltas — group by current open block.
+            # A new block starts whenever a thinking delta arrives after a
+            # signature delta (or first thinking delta of the stream).
+            delta_thinking = delta.thinking
+            delta_signature = delta.signature
+            if delta_thinking is not None or delta_signature is not None:
+                blocks = choice_thinking_blocks[idx]
+                if not blocks or (blocks and blocks[-1][1]):
+                    # Last block is closed (has signature) — start a new one.
+                    blocks.append(["", ""])
+                if delta_thinking:
+                    blocks[-1][0] += delta_thinking
+                if delta_signature:
+                    blocks[-1][1] = delta_signature
+
             choice_finish_reason = choice.finish_reason
             if choice_finish_reason:
                 choice_finish[idx] = choice_finish_reason
@@ -645,10 +672,34 @@ def stream_chunk_builder(
         content_parts = choice_content[idx]
         content = "".join(content_parts) if content_parts else None
 
+        reasoning_parts = choice_reasoning[idx]
+        reasoning_content = "".join(reasoning_parts) if reasoning_parts else None
+
+        thinking_blocks_assembled: list[ThinkingBlock] | None = None
+        if choice_thinking_blocks[idx]:
+            thinking_blocks_assembled = [
+                ThinkingBlock(
+                    type="thinking",
+                    thinking=text,
+                    signature=sig or None,
+                )
+                for text, sig in choice_thinking_blocks[idx]
+                if text or sig
+            ] or None
+            # Fallback to populate the flat surface when only thinking blocks
+            # arrived (Anthropic) — concatenate their text so callers reading
+            # ``reasoning_content`` see the same string regardless of provider.
+            if reasoning_content is None and thinking_blocks_assembled is not None:
+                reasoning_content = (
+                    "".join(b.thinking or "" for b in thinking_blocks_assembled) or None
+                )
+
         message = Message(
             role=choice_roles[idx] or "assistant",
             content=content,
             tool_calls=tool_calls or None,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks_assembled,
         )
 
         choices.append(
diff --git a/arcllm/providers/anthropic_adapter.py b/arcllm/providers/anthropic_adapter.py
index 9db63e8..3a20591 100644
--- a/arcllm/providers/anthropic_adapter.py
+++ b/arcllm/providers/anthropic_adapter.py
@@ -97,6 +97,7 @@
     Message,
     ModelResponse,
     StreamChunk,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
@@ -450,9 +451,11 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
         now = int(time.time())
         content_blocks = resp.get("content", [])
 
-        # Extract text content and tool uses
+        # Extract text content, tool uses, and thinking blocks
         text_parts: list[str] = []
         tool_calls: list[ToolCall] = []
+        thinking_blocks: list[ThinkingBlock] = []
+        thinking_text_parts: list[str] = []
         # Citations are sourced from two places in Anthropic responses:
         #   - ``web_search_tool_result`` blocks: aggregate result list with
         #     ``url`` / ``title`` / ``snippet`` per source.
@@ -485,11 +488,29 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     end_index=ann_dict.get("end_index") or ann_dict.get("end_char_index"),
                 )
 
-        # Second pass: tool uses + web_search_tool_result fallback (only fills
-        # URLs that the text-block annotations didn't already cover).
+        # Second pass: tool uses, thinking blocks, and web_search_tool_result
+        # fallback (only fills URLs the text-block annotations didn't cover).
         for block in content_blocks:
             kind = block.get("type")
-            if kind == "tool_use":
+            if kind == "thinking":
+                thinking_text = block.get("thinking", "")
+                thinking_blocks.append(
+                    ThinkingBlock(
+                        type="thinking",
+                        thinking=thinking_text,
+                        signature=block.get("signature"),
+                    )
+                )
+                if thinking_text:
+                    thinking_text_parts.append(thinking_text)
+            elif kind == "redacted_thinking":
+                thinking_blocks.append(
+                    ThinkingBlock(
+                        type="redacted_thinking",
+                        data=block.get("data"),
+                    )
+                )
+            elif kind == "tool_use":
                 tool_calls.append(
                     ToolCall(
                         id=block.get("id", ""),
@@ -518,12 +539,15 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
         # Join text parts efficiently
         text_content = "".join(text_parts) if text_parts else None
         citations = list(citation_index.values()) if citation_index else None
+        reasoning_content = "".join(thinking_text_parts) if thinking_text_parts else None
 
         message = Message(
             role=resp.get("role", "assistant"),
             content=text_content,
             tool_calls=tool_calls or None,
             citations=citations,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks or None,
         )
 
         # Map Anthropic stop reasons to OpenAI format
@@ -617,6 +641,21 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                         )
                     ],
                 )
+            if block.get("type") == "thinking":
+                # Anthropic emits an empty thinking block first, then a
+                # series of thinking_delta events with the text, then a
+                # signature_delta with the cryptographic signature.
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(thinking=block.get("thinking", "")),
+                            finish_reason=None,
+                        )
+                    ],
+                )
             if block.get("type") == "tool_use":
                 # Start of tool use
                 return StreamChunk(
@@ -645,7 +684,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
 
         elif event_type == "content_block_delta":
             delta = event.get("delta", {})
-            if delta.get("type") == "text_delta":
+            delta_type = delta.get("type")
+            if delta_type == "text_delta":
                 return StreamChunk(
                     id="",
                     model=model,
@@ -657,6 +697,38 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                         )
                     ],
                 )
+            if delta_type == "thinking_delta":
+                # Surface as both ``thinking`` (matches Anthropic wire shape
+                # for round-trip) and ``reasoning_content`` (so callers using
+                # the unified surface can stream thinking text without a
+                # provider-specific code path).
+                thinking_text = delta.get("thinking", "")
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(
+                                thinking=thinking_text,
+                                reasoning_content=thinking_text,
+                            ),
+                            finish_reason=None,
+                        )
+                    ],
+                )
+            if delta_type == "signature_delta":
+                return StreamChunk(
+                    id="",
+                    model=model,
+                    choices=[
+                        ChunkChoice(
+                            index=0,
+                            delta=ChunkDelta(signature=delta.get("signature", "")),
+                            finish_reason=None,
+                        )
+                    ],
+                )
             if delta.get("type") == "input_json_delta":
                 # Tool argument delta
                 return StreamChunk(
diff --git a/arcllm/providers/gemini_adapter.py b/arcllm/providers/gemini_adapter.py
index 111a007..785118e 100644
--- a/arcllm/providers/gemini_adapter.py
+++ b/arcllm/providers/gemini_adapter.py
@@ -420,11 +420,19 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
 
             # Use list + join for efficient string building
             text_parts: list[str] = []
+            thought_parts: list[str] = []
             tool_calls: list[ToolCall] = []
 
             for part in parts:
                 if "text" in part:
-                    text_parts.append(part["text"])
+                    # Gemini marks chain-of-thought parts with ``thought: true``
+                    # when the request set ``thinkingConfig.includeThoughts``.
+                    # We split those out into ``reasoning_content`` so callers
+                    # don't have to filter them out of the answer text.
+                    if part.get("thought"):
+                        thought_parts.append(part["text"])
+                    else:
+                        text_parts.append(part["text"])
                 elif "functionCall" in part:
                     fc = part["functionCall"]
                     tool_calls.append(
@@ -439,12 +447,14 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     )
 
             text_content = "".join(text_parts) if text_parts else None
+            reasoning_content = "".join(thought_parts) if thought_parts else None
             citations = _extract_grounding_citations(candidate)
             message = Message(
                 role="assistant",
                 content=text_content,
                 tool_calls=tool_calls or None,
                 citations=citations,
+                reasoning_content=reasoning_content,
             )
 
             # Map finish reason
@@ -507,11 +517,15 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
 
             # Use list + join for efficient string building
             text_parts: list[str] = []
+            thought_parts: list[str] = []
             tool_call_deltas: list[dict[str, Any]] = []
 
             for part in parts:
                 if "text" in part:
-                    text_parts.append(part["text"])
+                    if part.get("thought"):
+                        thought_parts.append(part["text"])
+                    else:
+                        text_parts.append(part["text"])
                 elif "functionCall" in part:
                     fc = part["functionCall"]
                     tool_call_deltas.append(
@@ -527,9 +541,11 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                     )
 
             text_content = "".join(text_parts) if text_parts else None
+            reasoning_content = "".join(thought_parts) if thought_parts else None
             delta = ChunkDelta(
                 content=text_content,
                 tool_calls=tool_call_deltas or None,
+                reasoning_content=reasoning_content,
             )
 
             finish_reason = None
diff --git a/arcllm/providers/openai_adapter.py b/arcllm/providers/openai_adapter.py
index 823899c..3e75e18 100644
--- a/arcllm/providers/openai_adapter.py
+++ b/arcllm/providers/openai_adapter.py
@@ -231,12 +231,23 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
                     arguments=fc.get("arguments", ""),
                 )
 
+            # ``reasoning_content`` is the de-facto field name used by
+            # DeepSeek-R1, GLM-4.5+, Groq's DeepSeek/Qwen-thinking models,
+            # Cerebras, Together, Fireworks, and any OpenAI-compat host
+            # serving a reasoning model. ``reasoning`` is the alias
+            # OpenAI ships on the chat-completions endpoint for o-series
+            # responses; we accept either and normalise to one field.
+            reasoning_content = message_data.get("reasoning_content") or message_data.get(
+                "reasoning"
+            )
+
             message = Message(
                 role=message_data.get("role", "assistant"),
                 content=message_data.get("content"),
                 tool_calls=tool_calls,
                 function_call=function_call,
                 refusal=message_data.get("refusal"),
+                reasoning_content=reasoning_content,
             )
 
             choices.append(
@@ -303,6 +314,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
                 content=delta_data.get("content"),
                 tool_calls=tool_calls,
                 function_call=delta_data.get("function_call"),
+                reasoning_content=delta_data.get("reasoning_content")
+                or delta_data.get("reasoning"),
             )
 
             choices.append(
diff --git a/arcllm/types.py b/arcllm/types.py
index 74f4100..1ebe061 100644
--- a/arcllm/types.py
+++ b/arcllm/types.py
@@ -186,6 +186,44 @@ def model_dump(self) -> dict[str, Any]:
         return result
 
 
+# =============================================================================
+# Reasoning / Thinking Types
+# =============================================================================
+
+
+class ThinkingBlock(_DictLike, msgspec.Struct):
+    """A chain-of-thought block from a reasoning-capable model.
+
+    Anthropic's extended-thinking feature returns thinking as structured
+    content blocks (``type: "thinking"`` with a ``signature`` for replay,
+    or ``type: "redacted_thinking"`` with opaque ``data``). Preserving the
+    block shape — instead of flattening to a string — lets callers send
+    the thinking back as part of the conversation history without losing
+    the signature, which is required for tool-use round trips.
+
+    For providers that emit thinking as a flat string (DeepSeek-R1, GLM,
+    Groq DeepSeek/Qwen-thinking, Gemini with ``includeThoughts=true``)
+    we populate :attr:`Message.reasoning_content` directly instead.
+    """
+
+    type: Literal["thinking", "redacted_thinking"] = "thinking"
+    thinking: str | None = None
+    signature: str | None = None
+    # Anthropic-specific: opaque payload for ``redacted_thinking`` blocks.
+    data: str | None = None
+
+    def model_dump(self) -> dict[str, Any]:
+        """Return dict representation for serialization."""
+        result: dict[str, Any] = {"type": self.type}
+        if self.thinking is not None:
+            result["thinking"] = self.thinking
+        if self.signature is not None:
+            result["signature"] = self.signature
+        if self.data is not None:
+            result["data"] = self.data
+        return result
+
+
 # =============================================================================
 # Message Types
 # =============================================================================
@@ -204,6 +242,14 @@ class Message(_DictLike, msgspec.Struct):
     # responses; an empty list means "the provider was asked to ground but
     # returned no sources" (rare).
     citations: list[Citation] | None = None
+    # Chain-of-thought / extended-thinking output from reasoning models.
+    # ``reasoning_content`` is the unified flat-string surface (populated
+    # by DeepSeek-R1, GLM-4.5+, Gemini 2.5+ with includeThoughts, OpenAI
+    # o-series via chat/completions when supported, etc.). For Anthropic
+    # extended thinking we also populate ``thinking_blocks`` so callers
+    # can send the structured form back with signatures intact.
+    reasoning_content: str | None = None
+    thinking_blocks: list[ThinkingBlock] | None = None
 
     def model_dump(self) -> dict[str, Any]:
         """Return dict representation for serialization."""
@@ -218,6 +264,10 @@ def model_dump(self) -> dict[str, Any]:
             result["refusal"] = self.refusal
         if self.citations is not None:
             result["citations"] = [c.model_dump() for c in self.citations]
+        if self.reasoning_content is not None:
+            result["reasoning_content"] = self.reasoning_content
+        if self.thinking_blocks is not None:
+            result["thinking_blocks"] = [b.model_dump() for b in self.thinking_blocks]
         return result
 
 
@@ -372,6 +422,13 @@ class ChunkDelta(_DictLike, msgspec.Struct):
     # for grounded providers — Perplexity, Gemini grounding, Anthropic
     # web-search). None on intermediate chunks.
     citations: list[Citation] | None = None
+    # Reasoning deltas. ``reasoning_content`` is the flat-string surface
+    # (DeepSeek-R1, GLM, Groq, etc.). ``thinking`` carries the per-chunk
+    # text of an Anthropic ``thinking_delta`` event; the matching
+    # ``signature`` lands in the trailing ``signature_delta``.
+    reasoning_content: str | None = None
+    thinking: str | None = None
+    signature: str | None = None
 
     def model_dump(self) -> dict[str, Any]:
         """Return dict representation for serialization."""
@@ -386,6 +443,12 @@ def model_dump(self) -> dict[str, Any]:
             result["function_call"] = self.function_call
         if self.citations is not None:
             result["citations"] = [c.model_dump() for c in self.citations]
+        if self.reasoning_content is not None:
+            result["reasoning_content"] = self.reasoning_content
+        if self.thinking is not None:
+            result["thinking"] = self.thinking
+        if self.signature is not None:
+            result["signature"] = self.signature
         return result
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 5ad8112..d5ade71 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "arcllm-sdk"
-version = "0.4.8"
+version = "0.4.9"
 description = "The arc connecting you to every LLM. Minimal dependencies, maximum performance."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/test_reasoning.py b/tests/test_reasoning.py
index 23aed12..d3c6196 100644
--- a/tests/test_reasoning.py
+++ b/tests/test_reasoning.py
@@ -173,3 +173,301 @@ def test_no_thinking_args_leaves_config_clean(self, gemini_adapter: GeminiAdapte
         # exist, it must not carry thinkingConfig.
         if "generationConfig" in body:
             assert "thinkingConfig" not in body["generationConfig"]
+
+
+# ---------------------------------------------------------------------------
+# Response-side: reasoning_content / thinking_blocks parsing
+# ---------------------------------------------------------------------------
+#
+# Reasoning models expose chain-of-thought differently per family. arcllm
+# normalises everything into ``Message.reasoning_content`` (flat str). For
+# Anthropic we additionally keep ``Message.thinking_blocks`` so callers can
+# replay the structured form (with signatures) on the next turn.
+
+
+class TestReasoningResponseExtraction:
+    """``parse_response`` populates ``reasoning_content`` + ``thinking_blocks``."""
+
+    def test_openai_reasoning_field_is_extracted(self, openai_adapter: OpenAIAdapter) -> None:
+        """OpenAI o-series chat/completions can return ``reasoning`` on message."""
+        body = orjson.dumps(
+            {
+                "id": "x",
+                "model": "o3-mini",
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": "42",
+                            "reasoning": "Counting Hitchhiker references...",
+                        },
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+        )
+        resp = openai_adapter.parse_response(body, model="o3-mini")
+        msg = resp.choices[0].message
+        assert msg.content == "42"
+        assert msg.reasoning_content == "Counting Hitchhiker references..."
+
+    def test_deepseek_style_reasoning_content_is_extracted(
+        self, openai_adapter: OpenAIAdapter
+    ) -> None:
+        """DeepSeek-R1 / GLM / Groq DeepSeek / Together / Fireworks DeepSeek-R1
+        all use the ``reasoning_content`` field. Test through the OpenAI base
+        since every OpenAI-compat host inherits this parser."""
+        body = orjson.dumps(
+            {
+                "id": "x",
+                "model": "deepseek-reasoner",
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": "ok",
+                            "reasoning_content": "Let me think... yes, ok.",
+                        },
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+        )
+        resp = openai_adapter.parse_response(body, model="deepseek-reasoner")
+        msg = resp.choices[0].message
+        assert msg.content == "ok"
+        assert msg.reasoning_content == "Let me think... yes, ok."
+
+    def test_anthropic_thinking_blocks_preserved_with_signature(
+        self, anthropic_adapter: AnthropicAdapter
+    ) -> None:
+        """Anthropic extended-thinking returns structured blocks. The signature
+        must round-trip — replaying without it breaks tool-use."""
+        body = orjson.dumps(
+            {
+                "id": "msg_x",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-sonnet-4-7",
+                "stop_reason": "end_turn",
+                "content": [
+                    {
+                        "type": "thinking",
+                        "thinking": "User wants ok. Reply ok.",
+                        "signature": "sig_abc123",
+                    },
+                    {"type": "text", "text": "ok"},
+                ],
+                "usage": {"input_tokens": 5, "output_tokens": 12},
+            }
+        )
+        resp = anthropic_adapter.parse_response(body, model="claude-sonnet-4-7")
+        msg = resp.choices[0].message
+        assert msg.content == "ok"
+        assert msg.reasoning_content == "User wants ok. Reply ok."
+        assert msg.thinking_blocks is not None
+        assert len(msg.thinking_blocks) == 1
+        block = msg.thinking_blocks[0]
+        assert block.type == "thinking"
+        assert block.thinking == "User wants ok. Reply ok."
+        assert block.signature == "sig_abc123"
+
+    def test_anthropic_redacted_thinking_block_preserves_opaque_data(
+        self, anthropic_adapter: AnthropicAdapter
+    ) -> None:
+        """``redacted_thinking`` blocks have no readable text — only an opaque
+        payload that must round-trip back unchanged. They surface on
+        ``thinking_blocks`` but contribute nothing to ``reasoning_content``."""
+        body = orjson.dumps(
+            {
+                "id": "msg_x",
+                "role": "assistant",
+                "model": "claude-sonnet-4-7",
+                "stop_reason": "end_turn",
+                "content": [
+                    {"type": "redacted_thinking", "data": "OPAQUE_BLOB"},
+                    {"type": "text", "text": "ok"},
+                ],
+                "usage": {"input_tokens": 5, "output_tokens": 1},
+            }
+        )
+        resp = anthropic_adapter.parse_response(body, model="claude-sonnet-4-7")
+        msg = resp.choices[0].message
+        assert msg.reasoning_content is None
+        assert msg.thinking_blocks is not None
+        assert msg.thinking_blocks[0].type == "redacted_thinking"
+        assert msg.thinking_blocks[0].data == "OPAQUE_BLOB"
+
+    def test_gemini_thought_parts_route_to_reasoning_content(
+        self, gemini_adapter: GeminiAdapter
+    ) -> None:
+        """Gemini 2.5+ marks chain-of-thought parts with ``thought: true``.
+
+        Without this split, the thought text would land in ``content`` and
+        the caller would have to filter it out manually."""
+        body = orjson.dumps(
+            {
+                "candidates": [
+                    {
+                        "content": {
+                            "parts": [
+                                {"text": "User wants ok.", "thought": True},
+                                {"text": "ok"},
+                            ]
+                        },
+                        "finishReason": "STOP",
+                    }
+                ],
+                "usageMetadata": {"promptTokenCount": 5, "candidatesTokenCount": 1},
+            }
+        )
+        resp = gemini_adapter.parse_response(body, model="gemini-2.5-pro")
+        msg = resp.choices[0].message
+        assert msg.content == "ok"
+        assert msg.reasoning_content == "User wants ok."
+
+    def test_non_reasoning_response_leaves_fields_none(self, openai_adapter: OpenAIAdapter) -> None:
+        """Regular chat responses (no reasoning fields) must not invent them."""
+        body = orjson.dumps(
+            {
+                "id": "x",
+                "model": "gpt-4o-mini",
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {"role": "assistant", "content": "ok"},
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+        )
+        resp = openai_adapter.parse_response(body, model="gpt-4o-mini")
+        msg = resp.choices[0].message
+        assert msg.reasoning_content is None
+        assert msg.thinking_blocks is None
+
+
+class TestReasoningStreamAccumulation:
+    """``stream_chunk_builder`` accumulates reasoning across chunks."""
+
+    def test_flat_reasoning_content_accumulates(self) -> None:
+        """DeepSeek/GLM/Groq style: reasoning_content arrives in deltas."""
+        from arcllm.core import stream_chunk_builder
+        from arcllm.types import ChunkChoice, ChunkDelta, StreamChunk
+
+        chunks = [
+            StreamChunk(
+                id="x",
+                model="deepseek-reasoner",
+                choices=[ChunkChoice(index=0, delta=ChunkDelta(role="assistant"))],
+            ),
+            StreamChunk(
+                id="x",
+                model="deepseek-reasoner",
+                choices=[ChunkChoice(index=0, delta=ChunkDelta(reasoning_content="Let me "))],
+            ),
+            StreamChunk(
+                id="x",
+                model="deepseek-reasoner",
+                choices=[ChunkChoice(index=0, delta=ChunkDelta(reasoning_content="think."))],
+            ),
+            StreamChunk(
+                id="x",
+                model="deepseek-reasoner",
+                choices=[
+                    ChunkChoice(index=0, delta=ChunkDelta(content="ok"), finish_reason="stop")
+                ],
+            ),
+        ]
+        final = stream_chunk_builder(chunks)
+        msg = final.choices[0].message
+        assert msg.content == "ok"
+        assert msg.reasoning_content == "Let me think."
+        assert msg.thinking_blocks is None
+
+    def test_anthropic_thinking_deltas_grouped_by_signature(self) -> None:
+        """Anthropic streaming: thinking_delta → thinking_delta → signature_delta
+        is one block. The next thinking_delta opens a new block."""
+        from arcllm.core import stream_chunk_builder
+        from arcllm.types import ChunkChoice, ChunkDelta, StreamChunk
+
+        chunks = [
+            StreamChunk(
+                id="x",
+                model="claude-sonnet-4-7",
+                choices=[ChunkChoice(index=0, delta=ChunkDelta(role="assistant"))],
+            ),
+            StreamChunk(
+                id="x",
+                model="claude-sonnet-4-7",
+                choices=[
+                    ChunkChoice(
+                        index=0,
+                        delta=ChunkDelta(thinking="User wants ", reasoning_content="User wants "),
+                    )
+                ],
+            ),
+            StreamChunk(
+                id="x",
+                model="claude-sonnet-4-7",
+                choices=[
+                    ChunkChoice(
+                        index=0,
+                        delta=ChunkDelta(thinking="ok.", reasoning_content="ok."),
+                    )
+                ],
+            ),
+            StreamChunk(
+                id="x",
+                model="claude-sonnet-4-7",
+                choices=[ChunkChoice(index=0, delta=ChunkDelta(signature="sig_abc"))],
+            ),
+            StreamChunk(
+                id="x",
+                model="claude-sonnet-4-7",
+                choices=[
+                    ChunkChoice(index=0, delta=ChunkDelta(content="ok"), finish_reason="stop")
+                ],
+            ),
+        ]
+        final = stream_chunk_builder(chunks)
+        msg = final.choices[0].message
+        assert msg.content == "ok"
+        assert msg.reasoning_content == "User wants ok."
+        assert msg.thinking_blocks is not None
+        assert len(msg.thinking_blocks) == 1
+        assert msg.thinking_blocks[0].thinking == "User wants ok."
+        assert msg.thinking_blocks[0].signature == "sig_abc"
+
+
+class TestReasoningSerialization:
+    """``Message.model_dump`` round-trips reasoning fields."""
+
+    def test_dump_includes_reasoning_when_set(self) -> None:
+        from arcllm.types import Message, ThinkingBlock
+
+        msg = Message(
+            role="assistant",
+            content="ok",
+            reasoning_content="thinking text",
+            thinking_blocks=[
+                ThinkingBlock(type="thinking", thinking="thinking text", signature="s")
+            ],
+        )
+        dumped = msg.model_dump()
+        assert dumped["reasoning_content"] == "thinking text"
+        assert dumped["thinking_blocks"] == [
+            {"type": "thinking", "thinking": "thinking text", "signature": "s"}
+        ]
+
+    def test_dump_omits_reasoning_when_absent(self) -> None:
+        """Don't emit empty reasoning fields — keeps the serialised shape lean
+        and matches OpenAI/litellm behaviour for non-reasoning responses."""
+        from arcllm.types import Message
+
+        msg = Message(role="assistant", content="ok")
+        dumped = msg.model_dump()
+        assert "reasoning_content" not in dumped
+        assert "thinking_blocks" not in dumped