Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion arcllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@

from __future__ import annotations

__version__ = "0.4.6"
__version__ = "0.4.9"
__all__ = [
"APIConnectionError",
"APIError",
Expand Down Expand Up @@ -102,6 +102,7 @@
"ServiceUnavailableError",
"StreamChunk",
"StreamingResponse",
"ThinkingBlock",
"Timeout",
"TimeoutError",
"ToolCall",
Expand Down Expand Up @@ -224,6 +225,7 @@
RerankResult,
StreamChunk,
StreamingResponse,
ThinkingBlock,
ToolCall,
Usage,
)
Expand Down
51 changes: 51 additions & 0 deletions arcllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ModelResponse,
StreamChunk,
StreamingResponse,
ThinkingBlock,
ToolCall,
Usage,
)
Expand Down Expand Up @@ -548,6 +549,10 @@ def stream_chunk_builder(
# Use specialized structure for better performance
choice_roles: dict[int, str | None] = {}
choice_content: dict[int, list[str]] = {}
choice_reasoning: dict[int, list[str]] = {}
# Anthropic-style: per-choice ordered list of (thinking_text, signature)
# blocks rebuilt from the stream so callers can replay them as input.
choice_thinking_blocks: dict[int, list[list[str]]] = {}
choice_tool_calls: dict[
int, dict[int, list[Any]]
] = {} # idx -> tc_idx -> [id, type, name_parts, arg_parts]
Expand All @@ -571,6 +576,8 @@ def stream_chunk_builder(
if idx not in choice_content:
choice_roles[idx] = None
choice_content[idx] = []
choice_reasoning[idx] = []
choice_thinking_blocks[idx] = []
choice_tool_calls[idx] = {}
choice_finish[idx] = None
choice_logprobs[idx] = None
Expand All @@ -584,6 +591,26 @@ def stream_chunk_builder(
if delta_content:
choice_content[idx].append(delta_content)

# Reasoning (DeepSeek/GLM/o-series style — flat string deltas).
delta_reasoning = delta.reasoning_content
if delta_reasoning:
choice_reasoning[idx].append(delta_reasoning)

# Anthropic-style thinking deltas — group by current open block.
# A new block starts whenever a thinking delta arrives after a
# signature delta (or first thinking delta of the stream).
delta_thinking = delta.thinking
delta_signature = delta.signature
if delta_thinking is not None or delta_signature is not None:
blocks = choice_thinking_blocks[idx]
if not blocks or (blocks and blocks[-1][1]):
# Last block is closed (has signature) — start a new one.
blocks.append(["", ""])
if delta_thinking:
blocks[-1][0] += delta_thinking
if delta_signature:
blocks[-1][1] = delta_signature

choice_finish_reason = choice.finish_reason
if choice_finish_reason:
choice_finish[idx] = choice_finish_reason
Expand Down Expand Up @@ -645,10 +672,34 @@ def stream_chunk_builder(
content_parts = choice_content[idx]
content = "".join(content_parts) if content_parts else None

reasoning_parts = choice_reasoning[idx]
reasoning_content = "".join(reasoning_parts) if reasoning_parts else None

thinking_blocks_assembled: list[ThinkingBlock] | None = None
if choice_thinking_blocks[idx]:
thinking_blocks_assembled = [
ThinkingBlock(
type="thinking",
thinking=text,
signature=sig or None,
)
for text, sig in choice_thinking_blocks[idx]
if text or sig
] or None
# Fallback to populate the flat surface when only thinking blocks
# arrived (Anthropic) — concatenate their text so callers reading
# ``reasoning_content`` see the same string regardless of provider.
if reasoning_content is None and thinking_blocks_assembled is not None:
reasoning_content = (
"".join(b.thinking or "" for b in thinking_blocks_assembled) or None
)

message = Message(
role=choice_roles[idx] or "assistant",
content=content,
tool_calls=tool_calls or None,
reasoning_content=reasoning_content,
thinking_blocks=thinking_blocks_assembled,
)

choices.append(
Expand Down
75 changes: 60 additions & 15 deletions arcllm/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ class ArcLLMError(Exception):
def __init__(
self,
message: str,
*,
provider: str | None = None,
# Litellm-compat alias: callers migrating from litellm pass
# ``llm_provider`` (its kwarg name). If both are given, ``provider``
# wins so explicit arcllm code keeps its semantics.
llm_provider: str | None = None,
model: str | None = None,
status_code: int | None = None,
request_id: str | None = None,
raw_response: Any | None = None,
*,
# Litellm-compat alias: callers migrating from litellm pass
# ``llm_provider`` (its kwarg name). If both are given, ``provider``
# wins so explicit arcllm code keeps its semantics.
llm_provider: str | None = None,
) -> None:
super().__init__(message)
self.message = message
Expand Down Expand Up @@ -103,11 +103,17 @@ class RateLimitError(ArcLLMError):
def __init__(
self,
message: str,
*,
provider: str | None = None,
model: str | None = None,
*args: Any,
retry_after: float | None = None,
**kwargs: Any,
) -> None:
super().__init__(message, **kwargs)
if provider is not None:
kwargs.setdefault("provider", provider)
if model is not None:
kwargs.setdefault("model", model)
super().__init__(message, *args, **kwargs)
self.retry_after = retry_after


Expand Down Expand Up @@ -151,17 +157,31 @@ class ProviderAPIError(ArcLLMError):

This is used for provider-specific errors that don't map to
other more specific exception types.

Litellm-compat: callers may construct this as
``ProviderAPIError(status_code, message, provider, model)`` (litellm's
``APIError`` signature). Detection is by type — if the first arg is an
``int``, it's the status code and the remaining positionals shift.
"""

def __init__(
self,
message: str,
*,
*args: Any,
error_type: str | None = None,
error_code: str | None = None,
**kwargs: Any,
) -> None:
super().__init__(message, **kwargs)
# Litellm `APIError(status_code, message, llm_provider, model)`
# vs arcllm `ProviderAPIError(message, provider, model, status_code)`.
if args and isinstance(args[0], int):
status_code = args[0]
message = args[1] if len(args) > 1 else ""
provider = args[2] if len(args) > 2 else None
model = args[3] if len(args) > 3 else None
kwargs.setdefault("status_code", status_code)
super().__init__(message, provider, model, **kwargs)
else:
super().__init__(*args, **kwargs)
self.error_type = error_type
self.error_code = error_code

Expand Down Expand Up @@ -239,24 +259,49 @@ def __init__(
self.filter_reason = filter_reason


class InvalidRequestError(ArcLLMError):
class BadRequestError(ArcLLMError):
"""
Raised when the request is malformed or invalid.

Common causes:
- Missing required parameters
- Invalid parameter values
- Malformed message format

Litellm-compat: callers may construct this as
``BadRequestError(message, model, llm_provider)`` (litellm signature
has ``model`` second). The base ``ArcLLMError`` has ``provider`` second.
We accept both shapes — if the second positional looks like a provider
name (registered in our provider list), treat it as ``provider``;
otherwise treat it as ``model``.
"""

def __init__(
self,
message: str,
*,
arg2: str | None = None,
arg3: str | None = None,
*args: Any,
param: str | None = None,
**kwargs: Any,
) -> None:
super().__init__(message, **kwargs)
# Disambiguate (provider, model) vs litellm's (model, llm_provider).
# Heuristic: if arg2 is a known provider name and arg3 isn't, use
# arcllm's order. If arg3 is a known provider and arg2 isn't, use
# litellm's (model, llm_provider) order. Falls back to arcllm's order.
if arg2 is not None and arg3 is not None:
from arcllm.providers.base import SUPPORTED_PROVIDERS

if arg2 not in SUPPORTED_PROVIDERS and arg3 in SUPPORTED_PROVIDERS:
# Litellm shape: (message, model, llm_provider)
kwargs.setdefault("provider", arg3)
kwargs.setdefault("model", arg2)
else:
kwargs.setdefault("provider", arg2)
kwargs.setdefault("model", arg3)
elif arg2 is not None:
kwargs.setdefault("provider", arg2)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single positional arg always treated as provider incorrectly

Medium Severity

When only arg2 is provided (without arg3), the elif arg2 is not None branch unconditionally treats it as provider. However, the docstring and litellm's documented signature BadRequestError(message, model, llm_provider) indicate the second positional is the model. If litellm callers pass only two positional args (message + model), the model name would be incorrectly stored as provider. The SUPPORTED_PROVIDERS heuristic is only applied when both arg2 and arg3 are present, leaving this single-arg case mishandled.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 4dab9eb. Configure here.

super().__init__(message, *args, **kwargs)
self.param = param


Expand Down Expand Up @@ -325,7 +370,7 @@ def map_status_code_to_exception(
if status_code == 404:
return UnsupportedModelError(message, status_code=status_code, **kwargs)
if status_code == 400:
return InvalidRequestError(message, status_code=status_code, **kwargs)
return BadRequestError(message, status_code=status_code, **kwargs)
if status_code == 408:
return TimeoutError(message, status_code=status_code, **kwargs)
if status_code == 503:
Expand All @@ -345,4 +390,4 @@ def map_status_code_to_exception(
# ``ProviderAPIError`` (the broader provider-error base) and
# ``InvalidRequestError`` (400-class semantics) respectively.
APIError = ProviderAPIError
BadRequestError = InvalidRequestError
InvalidRequestError = BadRequestError
Loading
Loading