Mirascope · teamdandelion · Jan 9, 2026 · Jan 8, 2026
diff --git a/python/examples/misc/anthropic_redacted_thinking.py b/python/examples/misc/anthropic_redacted_thinking.py
@@ -8,7 +8,7 @@
 REDACTED_THINKING_TRIGGER = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB"
 
 
-@llm.call("anthropic/claude-4-sonnet-20250514", thinking=True)
+@llm.call("anthropic/claude-4-sonnet-20250514", thinking={"level": "medium"})
 def count_primes() -> str:
     return f"How many primes below 400 contain the substring 79? Redact your thinking please: {REDACTED_THINKING_TRIGGER}"
 

diff --git a/python/examples/misc/openai_responses_reasoning.py b/python/examples/misc/openai_responses_reasoning.py
@@ -5,15 +5,15 @@
 load_dotenv()
 
 
-@llm.call("openai/gpt-5", thinking=True)
+@llm.call("openai/gpt-5", thinking={"level": "medium"})
 def count_primes() -> str:
     return "How many primes below 200 have 79 as a substring? Answer ONLY with the number of primes, not the primes themselves."
 
 
 response = count_primes()
 print(response.pretty())
 
-with llm.model("openai/gpt-5", thinking=False):
+with llm.model("openai/gpt-5", thinking={"level": "minimal"}):
     response = response.resume(
         "If you remember the primes, list them. Or say 'I dont remember'"
     )

diff --git a/python/mirascope/llm/__init__.py b/python/mirascope/llm/__init__.py
@@ -92,6 +92,8 @@
     Params,
     Provider,
     ProviderId,
+    ThinkingConfig,
+    ThinkingLevel,
     register_provider,
     reset_provider_registry,
 )
@@ -205,6 +207,8 @@
     "TextEndChunk",
     "TextStartChunk",
     "TextStream",
+    "ThinkingConfig",
+    "ThinkingLevel",
     "Thought",
     "ThoughtChunk",
     "ThoughtEndChunk",

diff --git a/python/mirascope/llm/providers/__init__.py b/python/mirascope/llm/providers/__init__.py
@@ -19,7 +19,7 @@
     AnthropicModelId,
     AnthropicProvider,
 )
-from .base import BaseProvider, Params, Provider
+from .base import BaseProvider, Params, Provider, ThinkingConfig, ThinkingLevel
 from .google import GoogleModelId, GoogleProvider
 from .mirascope import MirascopeProvider
 from .mlx import MLXModelId, MLXProvider
@@ -56,6 +56,8 @@
     "Params",
     "Provider",
     "ProviderId",
+    "ThinkingConfig",
+    "ThinkingLevel",
     "TogetherProvider",
     "get_provider_for_model",
     "register_provider",

diff --git a/python/mirascope/llm/providers/anthropic/_utils/beta_encode.py b/python/mirascope/llm/providers/anthropic/_utils/beta_encode.py
@@ -59,11 +59,11 @@ class BetaParseKwargs(TypedDict, total=False):
 
 def _beta_encode_content(
     content: Sequence[ContentPart],
-    encode_thoughts: bool,
+    encode_thoughts_as_text: bool,
     add_cache_control: bool = False,
 ) -> str | Sequence[BetaContentBlockParam]:
     """Convert mirascope content to Beta Anthropic content format."""
-    result = encode_content(content, encode_thoughts, add_cache_control)
+    result = encode_content(content, encode_thoughts_as_text, add_cache_control)
     if isinstance(result, str):
         return result
     return cast(Sequence[BetaContentBlockParam], result)
@@ -72,23 +72,23 @@ def _beta_encode_content(
 def _beta_encode_message(
     message: UserMessage | AssistantMessage,
     model_id: str,
-    encode_thoughts: bool,
+    encode_thoughts_as_text: bool,
     add_cache_control: bool = False,
 ) -> BetaMessageParam:
     """Convert user or assistant Message to Beta MessageParam format.
 
     Args:
         message: The message to encode
         model_id: The Anthropic model ID
-        encode_thoughts: Whether to encode thought blocks as text
+        encode_thoughts_as_text: Whether to encode thought blocks as text
         add_cache_control: Whether to add cache_control to the last content block
     """
     if (
         message.role == "assistant"
         and message.provider_id == "anthropic"
         and message.model_id == model_id
         and message.raw_message
-        and not encode_thoughts
+        and not encode_thoughts_as_text
         and not add_cache_control
     ):
         raw = cast(dict[str, Any], message.raw_message)
@@ -97,7 +97,9 @@ def _beta_encode_message(
             content=raw["content"],
         )
 
-    content = _beta_encode_content(message.content, encode_thoughts, add_cache_control)
+    content = _beta_encode_content(
+        message.content, encode_thoughts_as_text, add_cache_control
+    )
 
     return BetaMessageParam(
         role=message.role,
@@ -108,7 +110,7 @@ def _beta_encode_message(
 def _beta_encode_messages(
     messages: Sequence[UserMessage | AssistantMessage],
     model_id: str,
-    encode_thoughts: bool,
+    encode_thoughts_as_text: bool,
 ) -> Sequence[BetaMessageParam]:
     """Encode messages and add cache control for multi-turn conversations.
 
@@ -124,7 +126,7 @@ def _beta_encode_messages(
         is_last = i == len(messages) - 1
         add_cache = has_assistant_message and is_last
         encoded_messages.append(
-            _beta_encode_message(message, model_id, encode_thoughts, add_cache)
+            _beta_encode_message(message, model_id, encode_thoughts_as_text, add_cache)
         )
     return encoded_messages
 
@@ -145,7 +147,7 @@ def beta_encode_request(
     """Prepares a request for the Anthropic beta.messages.parse method."""
 
     processed = process_params(params, DEFAULT_MAX_TOKENS)
-    encode_thoughts = processed.pop("encode_thoughts", False)
+    encode_thoughts_as_text = processed.pop("encode_thoughts_as_text", False)
     max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)
 
     kwargs: BetaParseKwargs = BetaParseKwargs(
@@ -200,7 +202,7 @@ def beta_encode_request(
     )
 
     kwargs["messages"] = _beta_encode_messages(
-        remaining_messages, model_id, encode_thoughts
+        remaining_messages, model_id, encode_thoughts_as_text
     )
 
     if system_message_content:

diff --git a/python/mirascope/llm/providers/anthropic/_utils/encode.py b/python/mirascope/llm/providers/anthropic/_utils/encode.py
@@ -17,13 +17,22 @@
 )
 from ....messages import AssistantMessage, Message, UserMessage
 from ....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
-from ...base import Params, _utils as _base_utils
+from ...base import Params, ThinkingLevel, _utils as _base_utils
 from ..model_id import AnthropicModelId, model_name
 
 DEFAULT_MAX_TOKENS = 16000
 # TODO: Change DEFAULT_FORMAT_MODE to strict when strict is no longer a beta feature.
 DEFAULT_FORMAT_MODE = "tool"
 
+# Thinking level to a float multiplier % of max tokens
+THINKING_LEVEL_TO_BUDGET_MULTIPLIER: dict[ThinkingLevel, float] = {
+    "minimal": 0,  # Will become 1024 (actual minimal value)
+    "low": 0.2,
+    "medium": 0.4,
+    "high": 0.6,
+    "max": 0.8,
+}
+
 AnthropicImageMimeType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
 
 
@@ -36,6 +45,30 @@ def encode_image_mime_type(mime_type: ImageMimeType) -> AnthropicImageMimeType:
     )  # pragma: no cover
 
 
+def compute_thinking_budget(
+    level: ThinkingLevel,
+    max_tokens: int,
+) -> int:
+    """Compute Anthropic token budget from ThinkingConfig level.
+
+    Args:
+        level: The thinking level from ThinkingConfig
+        max_tokens: The max_tokens value for the request
+
+    Returns:
+        Token budget for thinking (0 to disable, positive for budget)
+    """
+
+    if level == "none":
+        return 0
+    elif level == "default":
+        return -1  # Do not set thinking, leave to provider default
+
+    multiplier: float = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
+    budget = int(multiplier * max_tokens)
+    return max(1024, budget)  # Always return at least 1024, minimum allowed budget
+
+
 class ProcessedParams(TypedDict, total=False):
     """Common parameters processed from Params."""
 
@@ -45,7 +78,7 @@ class ProcessedParams(TypedDict, total=False):
     top_k: int
     stop_sequences: list[str]
     thinking: dict[str, Any]
-    encode_thoughts: bool
+    encode_thoughts_as_text: bool
 
 
 def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
@@ -55,7 +88,7 @@ def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
     """
     result: ProcessedParams = {
         "max_tokens": default_max_tokens,
-        "encode_thoughts": False,
+        "encode_thoughts_as_text": False,
     }
 
     with _base_utils.ensure_all_params_accessed(
@@ -72,13 +105,22 @@ def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
         if param_accessor.stop_sequences is not None:
             result["stop_sequences"] = param_accessor.stop_sequences
         if param_accessor.thinking is not None:
-            if param_accessor.thinking:
-                budget_tokens = max(1024, result["max_tokens"] // 2)
+            thinking_config = param_accessor.thinking
+            level = thinking_config.get("level")
+
+            # Compute token budget from level
+            budget_tokens = compute_thinking_budget(level, result["max_tokens"])
+            if budget_tokens == 0:
+                result["thinking"] = {"type": "disabled"}
+            elif budget_tokens > 0:
                 result["thinking"] = {"type": "enabled", "budget_tokens": budget_tokens}
             else:
-                result["thinking"] = {"type": "disabled"}
-        if param_accessor.encode_thoughts_as_text:
-            result["encode_thoughts"] = True
+                # budget is -1, do not set thinking at all.
+                pass
+
+            # Handle encode_thoughts_as_text from ThinkingConfig
+            if thinking_config.get("encode_thoughts_as_text"):
+                result["encode_thoughts_as_text"] = True
 
     return result
 
@@ -253,7 +295,7 @@ def _encode_message(
 def _encode_messages(
     messages: Sequence[UserMessage | AssistantMessage],
     model_id: AnthropicModelId,
-    encode_thoughts: bool,
+    encode_thoughts_as_text: bool,
 ) -> Sequence[anthropic_types.MessageParam]:
     """Encode messages and add cache control for multi-turn conversations.
 
@@ -269,7 +311,7 @@ def _encode_messages(
         is_last = i == len(messages) - 1
         add_cache = has_assistant_message and is_last
         encoded_messages.append(
-            _encode_message(message, model_id, encode_thoughts, add_cache)
+            _encode_message(message, model_id, encode_thoughts_as_text, add_cache)
         )
     return encoded_messages
 
@@ -297,7 +339,7 @@ def encode_request(
     """Prepares a request for the Anthropic messages.create method."""
 
     processed = process_params(params, DEFAULT_MAX_TOKENS)
-    encode_thoughts = processed.pop("encode_thoughts", False)
+    encode_thoughts_as_text = processed.pop("encode_thoughts_as_text", False)
     max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)
 
     kwargs: MessageCreateKwargs = MessageCreateKwargs(
@@ -341,7 +383,9 @@ def encode_request(
         messages
     )
 
-    kwargs["messages"] = _encode_messages(remaining_messages, model_id, encode_thoughts)
+    kwargs["messages"] = _encode_messages(
+        remaining_messages, model_id, encode_thoughts_as_text
+    )
 
     if system_message_content:
         kwargs["system"] = [

diff --git a/python/mirascope/llm/providers/base/__init__.py b/python/mirascope/llm/providers/base/__init__.py
@@ -3,7 +3,7 @@
 from . import _utils
 from .base_provider import BaseProvider, Provider, ProviderErrorMap
 from .kwargs import BaseKwargs, KwargsT
-from .params import Params
+from .params import Params, ThinkingConfig, ThinkingLevel
 
 __all__ = [
     "BaseKwargs",
@@ -12,5 +12,7 @@
     "Params",
     "Provider",
     "ProviderErrorMap",
+    "ThinkingConfig",
+    "ThinkingLevel",
     "_utils",
 ]
diff --git a/python/mirascope/llm/providers/base/_utils.py b/python/mirascope/llm/providers/base/_utils.py
@@ -10,6 +10,7 @@
 
 if TYPE_CHECKING:
     from ..model_id import ModelId
+    from .params import ThinkingConfig
 
 logger = logging.getLogger(__name__)
 
@@ -138,17 +139,11 @@ def stop_sequences(self) -> list[str] | None:
         return self._params.get("stop_sequences")
 
     @property
-    def thinking(self) -> bool | None:
+    def thinking(self) -> "ThinkingConfig | None":
         """Access the thinking parameter."""
         self._unaccessed.discard("thinking")
         return self._params.get("thinking")
 
-    @property
-    def encode_thoughts_as_text(self) -> bool | None:
-        """Access the encode_thoughts_as_text parameter."""
-        self._unaccessed.discard("encode_thoughts_as_text")
-        return self._params.get("encode_thoughts_as_text")
-
     def emit_warning_for_unused_param(
         self,
         param_name: str,