Mirascope · teamdandelion · Jan 9, 2026 · Jan 9, 2026
@@ -18,46 +18,100 @@
 )
 from ....messages import AssistantMessage, Message, UserMessage
 from ....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
-from ...base import Params, ThinkingLevel, _utils as _base_utils
+from ...base import Params, ThinkingConfig, ThinkingLevel, _utils as _base_utils
 from ..model_id import GoogleModelId, model_name
 from ..model_info import MODELS_WITHOUT_STRUCTURED_OUTPUT_AND_TOOLS_SUPPORT
 
 UNKNOWN_TOOL_ID = "google_unknown_tool_id"
 
-# Thinking level to a float multiplier % of max tokens
+# Thinking level to a float multiplier % of max tokens (for 2.5 models using budget)
 THINKING_LEVEL_TO_BUDGET_MULTIPLIER: dict[ThinkingLevel, float] = {
-    "minimal": 0,
+    "none": 0,
+    "minimal": 0.1,
     "low": 0.2,
     "medium": 0.4,
     "high": 0.6,
     "max": 0.8,
 }
 
+# Gemini 3 Pro supports only LOW or HIGH
+# https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
+THINKING_LEVEL_FOR_GEMINI_3_PRO: dict[ThinkingLevel, genai_types.ThinkingLevel] = {
+    "default": genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
+    "none": genai_types.ThinkingLevel.LOW,
+    "minimal": genai_types.ThinkingLevel.LOW,
+    "low": genai_types.ThinkingLevel.LOW,
+    "medium": genai_types.ThinkingLevel.HIGH,
+    "high": genai_types.ThinkingLevel.HIGH,
+    "max": genai_types.ThinkingLevel.HIGH,
+}
+
+# Gemini 3 Flash supports MINIMAL, LOW, MEDIUM, HIGH
+# https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
+THINKING_LEVEL_FOR_GEMINI_3_FLASH: dict[ThinkingLevel, genai_types.ThinkingLevel] = {
+    "default": genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
+    "none": genai_types.ThinkingLevel.MINIMAL,
+    "minimal": genai_types.ThinkingLevel.MINIMAL,
+    "low": genai_types.ThinkingLevel.LOW,
+    "medium": genai_types.ThinkingLevel.MEDIUM,
+    "high": genai_types.ThinkingLevel.HIGH,
+    "max": genai_types.ThinkingLevel.HIGH,
+}
+
 
-def compute_thinking_budget(
-    level: ThinkingLevel,
+def google_thinking_config(
+    thinking_config: ThinkingConfig,
     max_tokens: int | None,
-) -> int:
-    """Compute Google thinking budget from ThinkingConfig level.
+    model_id: GoogleModelId,
+) -> genai_types.ThinkingConfigDict:
+    """Compute Google thinking configuration based on model version.
 
     Args:
-        level: The thinking level from ThinkingConfig
+        thinking_config: The ThinkingConfig from params
+        max_tokens: Max output tokens (used to compute budget for 2.5 models)
+        model_id: The Google model ID to determine version
 
     Returns:
-        Token budget for thinking:
-        - -1 for automatic budget (level=None)
-        - 0 to disable thinking ("minimal")
-        - Positive int for specific budget ("low", "medium", "high")
+        ThinkingConfigDict with either thinking_level or thinking_budget set.
+
+    Notes:
+        - Gemini 2.5 models use thinking_budget (token count)
+        - Gemini 3.0 Pro supports thinking_level "low" or "high"
+        - Gemini 3.0 Flash supports thinking_level "minimal", "low", "medium", "high"
+
+    See: https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
     """
-    if level == "default":
-        # Use Google's automatic budget
-        return -1
+    level: ThinkingLevel = thinking_config.get("level", "default")
+    include_summaries = thinking_config.get("include_summaries")
 
-    if max_tokens is None:
-        max_tokens = 16000
+    result = genai_types.ThinkingConfigDict()
 
-    multiplier: float = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
-    return int(multiplier * max_tokens)
+    if "gemini-3-flash" in model_id:
+        result["thinking_level"] = THINKING_LEVEL_FOR_GEMINI_3_FLASH.get(
+            level, genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
+        )
+    elif "gemini-3-pro" in model_id:
+        result["thinking_level"] = THINKING_LEVEL_FOR_GEMINI_3_PRO.get(
+            level, genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
+        )
+    else:  # Fall back to 2.5-style budgets
+        # 2.5 models use thinking_budget
+        if level == "default":
+            budget = -1  # Dynamic budget
+        elif level == "none":
+            budget = 0  # Disable thinking
+        else:
+            # Compute budget as percentage of max_tokens
+            if max_tokens is None:
+                max_tokens = 16000
+            multiplier = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
+            budget = int(multiplier * max_tokens)
+
+        result["thinking_budget"] = budget
+    if include_summaries is not None:
+        result["include_thoughts"] = include_summaries
+
+    return result
 
 
 class GoogleKwargs(TypedDict, total=False):
@@ -241,24 +295,12 @@ def encode_request(
             google_config["stop_sequences"] = param_accessor.stop_sequences
         if param_accessor.thinking is not None:
             thinking_config = param_accessor.thinking
-            level = thinking_config.get("level")
-            include_summaries = thinking_config.get("include_summaries", True)
 
-            # Compute budget from level
-            budget = compute_thinking_budget(level, param_accessor.max_tokens)
-            thinking_dict = genai_types.ThinkingConfigDict(
-                thinking_budget=budget,
+            # Compute thinking config based on model version
+            google_config["thinking_config"] = google_thinking_config(
+                thinking_config, param_accessor.max_tokens, model_id
             )
 
-            if budget == 0:
-                # Disabled: no thoughts
-                thinking_dict["include_thoughts"] = False
-            else:
-                # Enable thoughts based on include_summaries
-                thinking_dict["include_thoughts"] = include_summaries
-
-            google_config["thinking_config"] = thinking_dict
-
             # Handle encode_thoughts_as_text from ThinkingConfig
             if thinking_config.get("encode_thoughts_as_text"):
                 encode_thoughts_as_text = True

@@ -62,4 +62,67 @@ interactions:
     status:
       code: 200
       message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "What is 4200 + 42?"}], "role": "user"}],
+      "generationConfig": {"temperature": 0.7, "topP": 0.3, "topK": 50.0, "maxOutputTokens":
+      500, "stopSequences": ["4242"], "seed": 42, "thinkingConfig": {"include_thoughts":
+      false, "thinking_budget": 0}}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '276'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.57.0 gl-python/3.10.16
+      x-goog-api-client:
+      - google-genai-sdk/1.57.0 gl-python/3.10.16
+      x-goog-api-key:
+      - <filtered>
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/2WRT0+DQBDF73yKyV4FgxRj7NU/iSbGqkRNpIdNd6Ckyy5hp1rT9Ls7LKWlcQ/w
+        Mu/tzPBjGwCIhTSqUpLQiSl8cQVg65+dZw2hITaGEhcb2dIx25/tSHOEcNNdEpmFojIKaIng1jXY
+        AtIkjoFHsgjhB4HHg1Q+UgPZElm009zkhht14dycgZe5ifrjLTGauDvoeXjcs7UauyVqq1AP8d0Q
+        ELxY5ZavKJ01Xewte56Jg8tb44bLcTAM8K3F2skSn5AkE5MHLqJpbd1QZldobuzaE7uY9M1GgE/8
+        Sbr3yZLUJ1Z6Ff5r6255aKXH4Ef/hL9R6op+PfS7z0yMONDpVgMI/54HeyQ9pXdsXdXjKLFmQFFy
+        fhkVWrql7yhadI01Dh9Ul1GP16Usoo97ibqm1Sz+niXpy0IEu+APrlJBf1sCAAA=
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Fri, 09 Jan 2026 06:25:56 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=729
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
 version: 1
@@ -74,4 +74,79 @@ interactions:
     status:
       code: 200
       message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "Is the first fibonacci number to end
+      with the digits 57 prime?"}], "role": "user"}, {"parts": [{"text": "**Thinking:**
+      Let me see... I have instantenously remembered this table of fibonacci\nnumbers
+      with their prime factorizations. That''s sure convenient! Let\nme see if it
+      has the answer:\n0 : 0\n1 : 1\n2 : 1\n3 : 2\n4 : 3\n5 : 5\n6 : 8 = 23\n7 : 13\n8
+      : 21 = 3 x 7\n9 : 34 = 2 x 17\n10 : 55 = 5 x 11\n11 : 89\n12 : 144 = 24 x 32\n13
+      : 233\n14 : 377 = 13 x 29\n15 : 610 = 2 x 5 x 61\n16 : 987 = 3 x 7 x 47\n17
+      : 1597\n18 : 2584 = 23 x 17 x 19\n19 : 4181 = 37 x 113\n20 : 6765 = 3 x 5 x
+      11 x 41\n21 : 10946 = 2 x 13 x 421\n22 : 17711 = 89 x 199\n23 : 28657\n\nThere
+      we have it! 28657 is the first fibonacci number ending in 57,\nand it is prime.
+      I''m supposed to answer with extreme concision, so I''ll\njust say ''Yes.''"},
+      {"text": "Yes."}], "role": "model"}, {"parts": [{"text": "Please tell me what
+      the number is."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
+      "Always answer with extreme concision, giving the answer and no added context."}],
+      "role": "user"}, "generationConfig": {"thinkingConfig": {"thinking_budget":
+      1600}}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1179'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.57.0 gl-python/3.10.16
+      x-goog-api-client:
+      - google-genai-sdk/1.57.0 gl-python/3.10.16
+      x-goog-api-key:
+      - <filtered>
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/2WQQU+EMBCF7/0VpOfFGBQWvBk1cWOMGyXGaDw0doBqaZHOJmsI/90WFrZoD00z
+        73Vm3teRIKAfTHHBGYKhF8GbrQRBN9xO0wpBoRWmki02rMWjdzyd97YWhL37RKM0idfU0/r5/b46
+        dmy1BGevNQc52fvJQAuhhKkegRmtnO0pf9jSWRWKw96WT8k0YGhNd4aVcA/IbDY2J6BNq+sGc/0F
+        6krvhmzn0Xrs5rFYGOKDjBqZXH7N0tW/vubaThXSZ+ThsyGZFPjjkuQ3Lzn1QOCftSYUxCNGsdK7
+        ssLliklCDshGis/QGjHiKqG2AMPoJA4LyUw1DKQtmEYrAxvuPPw1K1nxqW+zu803mm0YnklzmVLS
+        k18Qn6izJQIAAA==
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Fri, 09 Jan 2026 06:25:57 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=997
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
 version: 1