Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion arcllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@

from __future__ import annotations

__version__ = "0.4.8"
__version__ = "0.4.9"
__all__ = [
"APIConnectionError",
"APIError",
Expand Down Expand Up @@ -102,6 +102,7 @@
"ServiceUnavailableError",
"StreamChunk",
"StreamingResponse",
"ThinkingBlock",
"Timeout",
"TimeoutError",
"ToolCall",
Expand Down Expand Up @@ -224,6 +225,7 @@
RerankResult,
StreamChunk,
StreamingResponse,
ThinkingBlock,
ToolCall,
Usage,
)
Expand Down
51 changes: 51 additions & 0 deletions arcllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ModelResponse,
StreamChunk,
StreamingResponse,
ThinkingBlock,
ToolCall,
Usage,
)
Expand Down Expand Up @@ -548,6 +549,10 @@ def stream_chunk_builder(
# Use specialized structure for better performance
choice_roles: dict[int, str | None] = {}
choice_content: dict[int, list[str]] = {}
choice_reasoning: dict[int, list[str]] = {}
# Anthropic-style: per-choice ordered list of (thinking_text, signature)
# blocks rebuilt from the stream so callers can replay them as input.
choice_thinking_blocks: dict[int, list[list[str]]] = {}
choice_tool_calls: dict[
int, dict[int, list[Any]]
] = {} # idx -> tc_idx -> [id, type, name_parts, arg_parts]
Expand All @@ -571,6 +576,8 @@ def stream_chunk_builder(
if idx not in choice_content:
choice_roles[idx] = None
choice_content[idx] = []
choice_reasoning[idx] = []
choice_thinking_blocks[idx] = []
choice_tool_calls[idx] = {}
choice_finish[idx] = None
choice_logprobs[idx] = None
Expand All @@ -584,6 +591,26 @@ def stream_chunk_builder(
if delta_content:
choice_content[idx].append(delta_content)

# Reasoning (DeepSeek/GLM/o-series style — flat string deltas).
delta_reasoning = delta.reasoning_content
if delta_reasoning:
choice_reasoning[idx].append(delta_reasoning)

# Anthropic-style thinking deltas — group by current open block.
# A new block starts whenever a thinking delta arrives after a
# signature delta (or first thinking delta of the stream).
delta_thinking = delta.thinking
delta_signature = delta.signature
if delta_thinking is not None or delta_signature is not None:
blocks = choice_thinking_blocks[idx]
if not blocks or (blocks and blocks[-1][1]):
# Last block is closed (has signature) — start a new one.
blocks.append(["", ""])
if delta_thinking:
blocks[-1][0] += delta_thinking
if delta_signature:
blocks[-1][1] = delta_signature

choice_finish_reason = choice.finish_reason
if choice_finish_reason:
choice_finish[idx] = choice_finish_reason
Expand Down Expand Up @@ -645,10 +672,34 @@ def stream_chunk_builder(
content_parts = choice_content[idx]
content = "".join(content_parts) if content_parts else None

reasoning_parts = choice_reasoning[idx]
reasoning_content = "".join(reasoning_parts) if reasoning_parts else None

thinking_blocks_assembled: list[ThinkingBlock] | None = None
if choice_thinking_blocks[idx]:
thinking_blocks_assembled = [
ThinkingBlock(
type="thinking",
thinking=text,
signature=sig or None,
)
for text, sig in choice_thinking_blocks[idx]
if text or sig
] or None
# Fallback to populate the flat surface when only thinking blocks
# arrived (Anthropic) — concatenate their text so callers reading
# ``reasoning_content`` see the same string regardless of provider.
if reasoning_content is None and thinking_blocks_assembled is not None:
reasoning_content = (
"".join(b.thinking or "" for b in thinking_blocks_assembled) or None
)

message = Message(
role=choice_roles[idx] or "assistant",
content=content,
tool_calls=tool_calls or None,
reasoning_content=reasoning_content,
thinking_blocks=thinking_blocks_assembled,
)

choices.append(
Expand Down
82 changes: 77 additions & 5 deletions arcllm/providers/anthropic_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
Message,
ModelResponse,
StreamChunk,
ThinkingBlock,
ToolCall,
Usage,
)
Expand Down Expand Up @@ -450,9 +451,11 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
now = int(time.time())
content_blocks = resp.get("content", [])

# Extract text content and tool uses
# Extract text content, tool uses, and thinking blocks
text_parts: list[str] = []
tool_calls: list[ToolCall] = []
thinking_blocks: list[ThinkingBlock] = []
thinking_text_parts: list[str] = []
# Citations are sourced from two places in Anthropic responses:
# - ``web_search_tool_result`` blocks: aggregate result list with
# ``url`` / ``title`` / ``snippet`` per source.
Expand Down Expand Up @@ -485,11 +488,29 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
end_index=ann_dict.get("end_index") or ann_dict.get("end_char_index"),
)

# Second pass: tool uses + web_search_tool_result fallback (only fills
# URLs that the text-block annotations didn't already cover).
# Second pass: tool uses, thinking blocks, and web_search_tool_result
# fallback (only fills URLs the text-block annotations didn't cover).
for block in content_blocks:
kind = block.get("type")
if kind == "tool_use":
if kind == "thinking":
thinking_text = block.get("thinking", "")
thinking_blocks.append(
ThinkingBlock(
type="thinking",
thinking=thinking_text,
signature=block.get("signature"),
)
)
if thinking_text:
thinking_text_parts.append(thinking_text)
elif kind == "redacted_thinking":
thinking_blocks.append(
ThinkingBlock(
type="redacted_thinking",
data=block.get("data"),
)
)
elif kind == "tool_use":
tool_calls.append(
ToolCall(
id=block.get("id", ""),
Expand Down Expand Up @@ -518,12 +539,15 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
# Join text parts efficiently
text_content = "".join(text_parts) if text_parts else None
citations = list(citation_index.values()) if citation_index else None
reasoning_content = "".join(thinking_text_parts) if thinking_text_parts else None

message = Message(
role=resp.get("role", "assistant"),
content=text_content,
tool_calls=tool_calls or None,
citations=citations,
reasoning_content=reasoning_content,
thinking_blocks=thinking_blocks or None,
)

# Map Anthropic stop reasons to OpenAI format
Expand Down Expand Up @@ -617,6 +641,21 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
)
],
)
if block.get("type") == "thinking":
# Anthropic emits an empty thinking block first, then a
# series of thinking_delta events with the text, then a
# signature_delta with the cryptographic signature.
return StreamChunk(
id="",
model=model,
choices=[
ChunkChoice(
index=0,
delta=ChunkDelta(thinking=block.get("thinking", "")),
finish_reason=None,
)
],
)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Streaming silently drops redacted_thinking blocks

Medium Severity

The Anthropic streaming handler in parse_stream_event handles content_block_start for type=="thinking" but silently drops type=="redacted_thinking" blocks (returns None). The non-streaming _build_model_response correctly preserves redacted_thinking blocks. Anthropic's streaming protocol does emit content_block_start with type: "redacted_thinking", and these blocks must be preserved unchanged for multi-turn conversation history. Additionally, stream_chunk_builder hardcodes type="thinking" for all assembled blocks, making it impossible to represent redacted_thinking even if the adapter were to emit them.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit b5552ff. Configure here.

if block.get("type") == "tool_use":
# Start of tool use
return StreamChunk(
Expand Down Expand Up @@ -645,7 +684,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:

elif event_type == "content_block_delta":
delta = event.get("delta", {})
if delta.get("type") == "text_delta":
delta_type = delta.get("type")
if delta_type == "text_delta":
return StreamChunk(
id="",
model=model,
Expand All @@ -657,6 +697,38 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
)
],
)
if delta_type == "thinking_delta":
# Surface as both ``thinking`` (matches Anthropic wire shape
# for round-trip) and ``reasoning_content`` (so callers using
# the unified surface can stream thinking text without a
# provider-specific code path).
thinking_text = delta.get("thinking", "")
return StreamChunk(
id="",
model=model,
choices=[
ChunkChoice(
index=0,
delta=ChunkDelta(
thinking=thinking_text,
reasoning_content=thinking_text,
),
finish_reason=None,
)
],
)
if delta_type == "signature_delta":
return StreamChunk(
id="",
model=model,
choices=[
ChunkChoice(
index=0,
delta=ChunkDelta(signature=delta.get("signature", "")),
finish_reason=None,
)
],
)
if delta.get("type") == "input_json_delta":
# Tool argument delta
return StreamChunk(
Expand Down
20 changes: 18 additions & 2 deletions arcllm/providers/gemini_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,19 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon

# Use list + join for efficient string building
text_parts: list[str] = []
thought_parts: list[str] = []
tool_calls: list[ToolCall] = []

for part in parts:
if "text" in part:
text_parts.append(part["text"])
# Gemini marks chain-of-thought parts with ``thought: true``
# when the request set ``thinkingConfig.includeThoughts``.
# We split those out into ``reasoning_content`` so callers
# don't have to filter them out of the answer text.
if part.get("thought"):
thought_parts.append(part["text"])
else:
text_parts.append(part["text"])
elif "functionCall" in part:
fc = part["functionCall"]
tool_calls.append(
Expand All @@ -439,12 +447,14 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
)

text_content = "".join(text_parts) if text_parts else None
reasoning_content = "".join(thought_parts) if thought_parts else None
citations = _extract_grounding_citations(candidate)
message = Message(
role="assistant",
content=text_content,
tool_calls=tool_calls or None,
citations=citations,
reasoning_content=reasoning_content,
)

# Map finish reason
Expand Down Expand Up @@ -507,11 +517,15 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:

# Use list + join for efficient string building
text_parts: list[str] = []
thought_parts: list[str] = []
tool_call_deltas: list[dict[str, Any]] = []

for part in parts:
if "text" in part:
text_parts.append(part["text"])
if part.get("thought"):
thought_parts.append(part["text"])
else:
text_parts.append(part["text"])
elif "functionCall" in part:
fc = part["functionCall"]
tool_call_deltas.append(
Expand All @@ -527,9 +541,11 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
)

text_content = "".join(text_parts) if text_parts else None
reasoning_content = "".join(thought_parts) if thought_parts else None
delta = ChunkDelta(
content=text_content,
tool_calls=tool_call_deltas or None,
reasoning_content=reasoning_content,
)

finish_reason = None
Expand Down
13 changes: 13 additions & 0 deletions arcllm/providers/openai_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,23 @@ def _build_model_response(self, resp: dict[str, Any], model: str) -> ModelRespon
arguments=fc.get("arguments", ""),
)

# ``reasoning_content`` is the de-facto field name used by
# DeepSeek-R1, GLM-4.5+, Groq's DeepSeek/Qwen-thinking models,
# Cerebras, Together, Fireworks, and any OpenAI-compat host
# serving a reasoning model. ``reasoning`` is the alias
# OpenAI ships on the chat-completions endpoint for o-series
# responses; we accept either and normalise to one field.
reasoning_content = message_data.get("reasoning_content") or message_data.get(
"reasoning"
)

message = Message(
role=message_data.get("role", "assistant"),
content=message_data.get("content"),
tool_calls=tool_calls,
function_call=function_call,
refusal=message_data.get("refusal"),
reasoning_content=reasoning_content,
)

choices.append(
Expand Down Expand Up @@ -303,6 +314,8 @@ def parse_stream_event(self, data: str, model: str) -> StreamChunk | None:
content=delta_data.get("content"),
tool_calls=tool_calls,
function_call=delta_data.get("function_call"),
reasoning_content=delta_data.get("reasoning_content")
or delta_data.get("reasoning"),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Falsy or conflates empty string with absent field

Low Severity

Using or to fall back from reasoning_content to reasoning means an explicit empty string "" in reasoning_content is treated as absent, falling through to the reasoning field. If a provider legitimately sends both fields (e.g., reasoning_content: "" alongside reasoning: null), the result is None rather than "". While semantically an empty string contributes nothing, it prevents callers from distinguishing "field present but empty" from "field absent" via is not None checks on the resulting Message.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit b5552ff. Configure here.

)

choices.append(
Expand Down
Loading
Loading