dynamiq-ai · vitalii-dynamiq · May 10, 2026 · May 6, 2026 · May 7, 2026 · May 10, 2026
diff --git a/arcllm/__init__.py b/arcllm/__init__.py
@@ -69,7 +69,7 @@
 
 from __future__ import annotations
 
-__version__ = "0.4.6"
+__version__ = "0.4.9"
 __all__ = [
     "APIConnectionError",
     "APIError",
@@ -102,6 +102,7 @@
     "ServiceUnavailableError",
     "StreamChunk",
     "StreamingResponse",
+    "ThinkingBlock",
     "Timeout",
     "TimeoutError",
     "ToolCall",
@@ -224,6 +225,7 @@
     RerankResult,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )

diff --git a/arcllm/core.py b/arcllm/core.py
@@ -30,6 +30,7 @@
     ModelResponse,
     StreamChunk,
     StreamingResponse,
+    ThinkingBlock,
     ToolCall,
     Usage,
 )
@@ -548,6 +549,10 @@ def stream_chunk_builder(
     # Use specialized structure for better performance
     choice_roles: dict[int, str | None] = {}
     choice_content: dict[int, list[str]] = {}
+    choice_reasoning: dict[int, list[str]] = {}
+    # Anthropic-style: per-choice ordered list of (thinking_text, signature)
+    # blocks rebuilt from the stream so callers can replay them as input.
+    choice_thinking_blocks: dict[int, list[list[str]]] = {}
     choice_tool_calls: dict[
         int, dict[int, list[Any]]
     ] = {}  # idx -> tc_idx -> [id, type, name_parts, arg_parts]
@@ -571,6 +576,8 @@ def stream_chunk_builder(
             if idx not in choice_content:
                 choice_roles[idx] = None
                 choice_content[idx] = []
+                choice_reasoning[idx] = []
+                choice_thinking_blocks[idx] = []
                 choice_tool_calls[idx] = {}
                 choice_finish[idx] = None
                 choice_logprobs[idx] = None
@@ -584,6 +591,26 @@ def stream_chunk_builder(
             if delta_content:
                 choice_content[idx].append(delta_content)
 
+            # Reasoning (DeepSeek/GLM/o-series style — flat string deltas).
+            delta_reasoning = delta.reasoning_content
+            if delta_reasoning:
+                choice_reasoning[idx].append(delta_reasoning)
+
+            # Anthropic-style thinking deltas — group by current open block.
+            # A new block starts whenever a thinking delta arrives after a
+            # signature delta (or first thinking delta of the stream).
+            delta_thinking = delta.thinking
+            delta_signature = delta.signature
+            if delta_thinking is not None or delta_signature is not None:
+                blocks = choice_thinking_blocks[idx]
+                if not blocks or (blocks and blocks[-1][1]):
+                    # Last block is closed (has signature) — start a new one.
+                    blocks.append(["", ""])
+                if delta_thinking:
+                    blocks[-1][0] += delta_thinking
+                if delta_signature:
+                    blocks[-1][1] = delta_signature
+
             choice_finish_reason = choice.finish_reason
             if choice_finish_reason:
                 choice_finish[idx] = choice_finish_reason
@@ -645,10 +672,34 @@ def stream_chunk_builder(
         content_parts = choice_content[idx]
         content = "".join(content_parts) if content_parts else None
 
+        reasoning_parts = choice_reasoning[idx]
+        reasoning_content = "".join(reasoning_parts) if reasoning_parts else None
+
+        thinking_blocks_assembled: list[ThinkingBlock] | None = None
+        if choice_thinking_blocks[idx]:
+            thinking_blocks_assembled = [
+                ThinkingBlock(
+                    type="thinking",
+                    thinking=text,
+                    signature=sig or None,
+                )
+                for text, sig in choice_thinking_blocks[idx]
+                if text or sig
+            ] or None
+            # Fallback to populate the flat surface when only thinking blocks
+            # arrived (Anthropic) — concatenate their text so callers reading
+            # ``reasoning_content`` see the same string regardless of provider.
+            if reasoning_content is None and thinking_blocks_assembled is not None:
+                reasoning_content = (
+                    "".join(b.thinking or "" for b in thinking_blocks_assembled) or None
+                )
+
         message = Message(
             role=choice_roles[idx] or "assistant",
             content=content,
             tool_calls=tool_calls or None,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks_assembled,
         )
 
         choices.append(

diff --git a/arcllm/exceptions.py b/arcllm/exceptions.py
@@ -37,16 +37,16 @@ class ArcLLMError(Exception):
     def __init__(
         self,
         message: str,
-        *,
         provider: str | None = None,
-        # Litellm-compat alias: callers migrating from litellm pass
-        # ``llm_provider`` (its kwarg name). If both are given, ``provider``
-        # wins so explicit arcllm code keeps its semantics.
-        llm_provider: str | None = None,
         model: str | None = None,
         status_code: int | None = None,
         request_id: str | None = None,
         raw_response: Any | None = None,
+        *,
+        # Litellm-compat alias: callers migrating from litellm pass
+        # ``llm_provider`` (its kwarg name). If both are given, ``provider``
+        # wins so explicit arcllm code keeps its semantics.
+        llm_provider: str | None = None,
     ) -> None:
         super().__init__(message)
         self.message = message
@@ -103,11 +103,17 @@ class RateLimitError(ArcLLMError):
     def __init__(
         self,
         message: str,
-        *,
+        provider: str | None = None,
+        model: str | None = None,
+        *args: Any,
         retry_after: float | None = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(message, **kwargs)
+        if provider is not None:
+            kwargs.setdefault("provider", provider)
+        if model is not None:
+            kwargs.setdefault("model", model)
+        super().__init__(message, *args, **kwargs)
         self.retry_after = retry_after
 
 
@@ -151,17 +157,31 @@ class ProviderAPIError(ArcLLMError):
 
     This is used for provider-specific errors that don't map to
     other more specific exception types.
+
+    Litellm-compat: callers may construct this as
+    ``ProviderAPIError(status_code, message, provider, model)`` (litellm's
+    ``APIError`` signature). Detection is by type — if the first arg is an
+    ``int``, it's the status code and the remaining positionals shift.
     """
 
     def __init__(
         self,
-        message: str,
-        *,
+        *args: Any,
         error_type: str | None = None,
         error_code: str | None = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(message, **kwargs)
+        # Litellm `APIError(status_code, message, llm_provider, model)`
+        # vs arcllm `ProviderAPIError(message, provider, model, status_code)`.
+        if args and isinstance(args[0], int):
+            status_code = args[0]
+            message = args[1] if len(args) > 1 else ""
+            provider = args[2] if len(args) > 2 else None
+            model = args[3] if len(args) > 3 else None
+            kwargs.setdefault("status_code", status_code)
+            super().__init__(message, provider, model, **kwargs)
+        else:
+            super().__init__(*args, **kwargs)
         self.error_type = error_type
         self.error_code = error_code
 
@@ -239,24 +259,49 @@ def __init__(
         self.filter_reason = filter_reason
 
 
-class InvalidRequestError(ArcLLMError):
+class BadRequestError(ArcLLMError):
     """
     Raised when the request is malformed or invalid.
 
     Common causes:
     - Missing required parameters
     - Invalid parameter values
     - Malformed message format
+
+    Litellm-compat: callers may construct this as
+    ``BadRequestError(message, model, llm_provider)`` (litellm signature
+    has ``model`` second). The base ``ArcLLMError`` has ``provider`` second.
+    We accept both shapes — if the second positional looks like a provider
+    name (registered in our provider list), treat it as ``provider``;
+    otherwise treat it as ``model``.
     """
 
     def __init__(
         self,
         message: str,
-        *,
+        arg2: str | None = None,
+        arg3: str | None = None,
+        *args: Any,
         param: str | None = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(message, **kwargs)
+        # Disambiguate (provider, model) vs litellm's (model, llm_provider).
+        # Heuristic: if arg2 is a known provider name and arg3 isn't, use
+        # arcllm's order. If arg3 is a known provider and arg2 isn't, use
+        # litellm's (model, llm_provider) order. Falls back to arcllm's order.
+        if arg2 is not None and arg3 is not None:
+            from arcllm.providers.base import SUPPORTED_PROVIDERS
+
+            if arg2 not in SUPPORTED_PROVIDERS and arg3 in SUPPORTED_PROVIDERS:
+                # Litellm shape: (message, model, llm_provider)
+                kwargs.setdefault("provider", arg3)
+                kwargs.setdefault("model", arg2)
+            else:
+                kwargs.setdefault("provider", arg2)
+                kwargs.setdefault("model", arg3)
+        elif arg2 is not None:
+            kwargs.setdefault("provider", arg2)
+        super().__init__(message, *args, **kwargs)
         self.param = param
 
 
@@ -325,7 +370,7 @@ def map_status_code_to_exception(
     if status_code == 404:
         return UnsupportedModelError(message, status_code=status_code, **kwargs)
     if status_code == 400:
-        return InvalidRequestError(message, status_code=status_code, **kwargs)
+        return BadRequestError(message, status_code=status_code, **kwargs)
     if status_code == 408:
         return TimeoutError(message, status_code=status_code, **kwargs)
     if status_code == 503:
@@ -345,4 +390,4 @@ def map_status_code_to_exception(
 # ``ProviderAPIError`` (the broader provider-error base) and
 # ``InvalidRequestError`` (400-class semantics) respectively.
 APIError = ProviderAPIError
-BadRequestError = InvalidRequestError
+InvalidRequestError = BadRequestError