Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/examples/misc/anthropic_redacted_thinking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
REDACTED_THINKING_TRIGGER = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB"


@llm.call("anthropic/claude-4-sonnet-20250514", thinking=True)
@llm.call("anthropic/claude-4-sonnet-20250514", thinking={"level": "medium"})
def count_primes() -> str:
return f"How many primes below 400 contain the substring 79? Redact your thinking please: {REDACTED_THINKING_TRIGGER}"

Expand Down
4 changes: 2 additions & 2 deletions python/examples/misc/openai_responses_reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
load_dotenv()


@llm.call("openai/gpt-5", thinking=True)
@llm.call("openai/gpt-5", thinking={"level": "medium"})
def count_primes() -> str:
return "How many primes below 200 have 79 as a substring? Answer ONLY with the number of primes, not the primes themselves."


response = count_primes()
print(response.pretty())

with llm.model("openai/gpt-5", thinking=False):
with llm.model("openai/gpt-5", thinking={"level": "minimal"}):
response = response.resume(
"If you remember the primes, list them. Or say 'I dont remember'"
)
Expand Down
4 changes: 4 additions & 0 deletions python/mirascope/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
Params,
Provider,
ProviderId,
ThinkingConfig,
ThinkingLevel,
register_provider,
reset_provider_registry,
)
Expand Down Expand Up @@ -205,6 +207,8 @@
"TextEndChunk",
"TextStartChunk",
"TextStream",
"ThinkingConfig",
"ThinkingLevel",
"Thought",
"ThoughtChunk",
"ThoughtEndChunk",
Expand Down
4 changes: 3 additions & 1 deletion python/mirascope/llm/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
AnthropicModelId,
AnthropicProvider,
)
from .base import BaseProvider, Params, Provider
from .base import BaseProvider, Params, Provider, ThinkingConfig, ThinkingLevel
from .google import GoogleModelId, GoogleProvider
from .mirascope import MirascopeProvider
from .mlx import MLXModelId, MLXProvider
Expand Down Expand Up @@ -56,6 +56,8 @@
"Params",
"Provider",
"ProviderId",
"ThinkingConfig",
"ThinkingLevel",
"TogetherProvider",
"get_provider_for_model",
"register_provider",
Expand Down
22 changes: 12 additions & 10 deletions python/mirascope/llm/providers/anthropic/_utils/beta_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ class BetaParseKwargs(TypedDict, total=False):

def _beta_encode_content(
content: Sequence[ContentPart],
encode_thoughts: bool,
encode_thoughts_as_text: bool,
add_cache_control: bool = False,
) -> str | Sequence[BetaContentBlockParam]:
"""Convert mirascope content to Beta Anthropic content format."""
result = encode_content(content, encode_thoughts, add_cache_control)
result = encode_content(content, encode_thoughts_as_text, add_cache_control)
if isinstance(result, str):
return result
return cast(Sequence[BetaContentBlockParam], result)
Expand All @@ -72,23 +72,23 @@ def _beta_encode_content(
def _beta_encode_message(
message: UserMessage | AssistantMessage,
model_id: str,
encode_thoughts: bool,
encode_thoughts_as_text: bool,
add_cache_control: bool = False,
) -> BetaMessageParam:
"""Convert user or assistant Message to Beta MessageParam format.

Args:
message: The message to encode
model_id: The Anthropic model ID
encode_thoughts: Whether to encode thought blocks as text
encode_thoughts_as_text: Whether to encode thought blocks as text
add_cache_control: Whether to add cache_control to the last content block
"""
if (
message.role == "assistant"
and message.provider_id == "anthropic"
and message.model_id == model_id
and message.raw_message
and not encode_thoughts
and not encode_thoughts_as_text
and not add_cache_control
):
raw = cast(dict[str, Any], message.raw_message)
Expand All @@ -97,7 +97,9 @@ def _beta_encode_message(
content=raw["content"],
)

content = _beta_encode_content(message.content, encode_thoughts, add_cache_control)
content = _beta_encode_content(
message.content, encode_thoughts_as_text, add_cache_control
)

return BetaMessageParam(
role=message.role,
Expand All @@ -108,7 +110,7 @@ def _beta_encode_message(
def _beta_encode_messages(
messages: Sequence[UserMessage | AssistantMessage],
model_id: str,
encode_thoughts: bool,
encode_thoughts_as_text: bool,
) -> Sequence[BetaMessageParam]:
"""Encode messages and add cache control for multi-turn conversations.

Expand All @@ -124,7 +126,7 @@ def _beta_encode_messages(
is_last = i == len(messages) - 1
add_cache = has_assistant_message and is_last
encoded_messages.append(
_beta_encode_message(message, model_id, encode_thoughts, add_cache)
_beta_encode_message(message, model_id, encode_thoughts_as_text, add_cache)
)
return encoded_messages

Expand All @@ -145,7 +147,7 @@ def beta_encode_request(
"""Prepares a request for the Anthropic beta.messages.parse method."""

processed = process_params(params, DEFAULT_MAX_TOKENS)
encode_thoughts = processed.pop("encode_thoughts", False)
encode_thoughts_as_text = processed.pop("encode_thoughts_as_text", False)
max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)

kwargs: BetaParseKwargs = BetaParseKwargs(
Expand Down Expand Up @@ -200,7 +202,7 @@ def beta_encode_request(
)

kwargs["messages"] = _beta_encode_messages(
remaining_messages, model_id, encode_thoughts
remaining_messages, model_id, encode_thoughts_as_text
)

if system_message_content:
Expand Down
68 changes: 56 additions & 12 deletions python/mirascope/llm/providers/anthropic/_utils/encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@
)
from ....messages import AssistantMessage, Message, UserMessage
from ....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
from ...base import Params, _utils as _base_utils
from ...base import Params, ThinkingLevel, _utils as _base_utils
from ..model_id import AnthropicModelId, model_name

DEFAULT_MAX_TOKENS = 16000
# TODO: Change DEFAULT_FORMAT_MODE to strict when strict is no longer a beta feature.
DEFAULT_FORMAT_MODE = "tool"

# Thinking level to a float multiplier % of max tokens
THINKING_LEVEL_TO_BUDGET_MULTIPLIER: dict[ThinkingLevel, float] = {
"minimal": 0, # Will become 1024 (actual minimal value)
"low": 0.2,
"medium": 0.4,
"high": 0.6,
"max": 0.8,
}

AnthropicImageMimeType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]


Expand All @@ -36,6 +45,30 @@ def encode_image_mime_type(mime_type: ImageMimeType) -> AnthropicImageMimeType:
) # pragma: no cover


def compute_thinking_budget(
level: ThinkingLevel,
max_tokens: int,
) -> int:
"""Compute Anthropic token budget from ThinkingConfig level.

Args:
level: The thinking level from ThinkingConfig
max_tokens: The max_tokens value for the request

Returns:
Token budget for thinking (0 to disable, positive for budget)
"""

if level == "none":
return 0
elif level == "default":
return -1 # Do not set thinking, leave to provider default

multiplier: float = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
budget = int(multiplier * max_tokens)
return max(1024, budget) # Always return at least 1024, minimum allowed budget


class ProcessedParams(TypedDict, total=False):
"""Common parameters processed from Params."""

Expand All @@ -45,7 +78,7 @@ class ProcessedParams(TypedDict, total=False):
top_k: int
stop_sequences: list[str]
thinking: dict[str, Any]
encode_thoughts: bool
encode_thoughts_as_text: bool


def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
Expand All @@ -55,7 +88,7 @@ def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
"""
result: ProcessedParams = {
"max_tokens": default_max_tokens,
"encode_thoughts": False,
"encode_thoughts_as_text": False,
}

with _base_utils.ensure_all_params_accessed(
Expand All @@ -72,13 +105,22 @@ def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
if param_accessor.stop_sequences is not None:
result["stop_sequences"] = param_accessor.stop_sequences
if param_accessor.thinking is not None:
if param_accessor.thinking:
budget_tokens = max(1024, result["max_tokens"] // 2)
thinking_config = param_accessor.thinking
level = thinking_config.get("level")

# Compute token budget from level
budget_tokens = compute_thinking_budget(level, result["max_tokens"])
if budget_tokens == 0:
result["thinking"] = {"type": "disabled"}
elif budget_tokens > 0:
result["thinking"] = {"type": "enabled", "budget_tokens": budget_tokens}
else:
result["thinking"] = {"type": "disabled"}
if param_accessor.encode_thoughts_as_text:
result["encode_thoughts"] = True
# budget is -1, do not set thinking at all.
pass

# Handle encode_thoughts_as_text from ThinkingConfig
if thinking_config.get("encode_thoughts_as_text"):
result["encode_thoughts_as_text"] = True

return result

Expand Down Expand Up @@ -253,7 +295,7 @@ def _encode_message(
def _encode_messages(
messages: Sequence[UserMessage | AssistantMessage],
model_id: AnthropicModelId,
encode_thoughts: bool,
encode_thoughts_as_text: bool,
) -> Sequence[anthropic_types.MessageParam]:
"""Encode messages and add cache control for multi-turn conversations.

Expand All @@ -269,7 +311,7 @@ def _encode_messages(
is_last = i == len(messages) - 1
add_cache = has_assistant_message and is_last
encoded_messages.append(
_encode_message(message, model_id, encode_thoughts, add_cache)
_encode_message(message, model_id, encode_thoughts_as_text, add_cache)
)
return encoded_messages

Expand Down Expand Up @@ -297,7 +339,7 @@ def encode_request(
"""Prepares a request for the Anthropic messages.create method."""

processed = process_params(params, DEFAULT_MAX_TOKENS)
encode_thoughts = processed.pop("encode_thoughts", False)
encode_thoughts_as_text = processed.pop("encode_thoughts_as_text", False)
max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)

kwargs: MessageCreateKwargs = MessageCreateKwargs(
Expand Down Expand Up @@ -341,7 +383,9 @@ def encode_request(
messages
)

kwargs["messages"] = _encode_messages(remaining_messages, model_id, encode_thoughts)
kwargs["messages"] = _encode_messages(
remaining_messages, model_id, encode_thoughts_as_text
)

if system_message_content:
kwargs["system"] = [
Expand Down
4 changes: 3 additions & 1 deletion python/mirascope/llm/providers/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from . import _utils
from .base_provider import BaseProvider, Provider, ProviderErrorMap
from .kwargs import BaseKwargs, KwargsT
from .params import Params
from .params import Params, ThinkingConfig, ThinkingLevel

__all__ = [
"BaseKwargs",
Expand All @@ -12,5 +12,7 @@
"Params",
"Provider",
"ProviderErrorMap",
"ThinkingConfig",
"ThinkingLevel",
"_utils",
]
9 changes: 2 additions & 7 deletions python/mirascope/llm/providers/base/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

if TYPE_CHECKING:
from ..model_id import ModelId
from .params import ThinkingConfig

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -138,17 +139,11 @@ def stop_sequences(self) -> list[str] | None:
return self._params.get("stop_sequences")

@property
def thinking(self) -> bool | None:
def thinking(self) -> "ThinkingConfig | None":
"""Access the thinking parameter."""
self._unaccessed.discard("thinking")
return self._params.get("thinking")

@property
def encode_thoughts_as_text(self) -> bool | None:
"""Access the encode_thoughts_as_text parameter."""
self._unaccessed.discard("encode_thoughts_as_text")
return self._params.get("encode_thoughts_as_text")

def emit_warning_for_unused_param(
self,
param_name: str,
Expand Down
Loading