Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 76 additions & 34 deletions python/mirascope/llm/providers/google/_utils/encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,100 @@
)
from ....messages import AssistantMessage, Message, UserMessage
from ....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
from ...base import Params, ThinkingLevel, _utils as _base_utils
from ...base import Params, ThinkingConfig, ThinkingLevel, _utils as _base_utils
from ..model_id import GoogleModelId, model_name
from ..model_info import MODELS_WITHOUT_STRUCTURED_OUTPUT_AND_TOOLS_SUPPORT

UNKNOWN_TOOL_ID = "google_unknown_tool_id"

# Thinking level to a float multiplier % of max tokens
# Thinking level to a float multiplier % of max tokens (for 2.5 models using budget)
THINKING_LEVEL_TO_BUDGET_MULTIPLIER: dict[ThinkingLevel, float] = {
"minimal": 0,
"none": 0,
"minimal": 0.1,
"low": 0.2,
"medium": 0.4,
"high": 0.6,
"max": 0.8,
}

# Gemini 3 Pro supports only LOW or HIGH
# https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
THINKING_LEVEL_FOR_GEMINI_3_PRO: dict[ThinkingLevel, genai_types.ThinkingLevel] = {
"default": genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
"none": genai_types.ThinkingLevel.LOW,
"minimal": genai_types.ThinkingLevel.LOW,
"low": genai_types.ThinkingLevel.LOW,
"medium": genai_types.ThinkingLevel.HIGH,
"high": genai_types.ThinkingLevel.HIGH,
"max": genai_types.ThinkingLevel.HIGH,
}

# Gemini 3 Flash supports MINIMAL, LOW, MEDIUM, HIGH
# https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
THINKING_LEVEL_FOR_GEMINI_3_FLASH: dict[ThinkingLevel, genai_types.ThinkingLevel] = {
"default": genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
"none": genai_types.ThinkingLevel.MINIMAL,
"minimal": genai_types.ThinkingLevel.MINIMAL,
"low": genai_types.ThinkingLevel.LOW,
"medium": genai_types.ThinkingLevel.MEDIUM,
"high": genai_types.ThinkingLevel.HIGH,
"max": genai_types.ThinkingLevel.HIGH,
}


def compute_thinking_budget(
level: ThinkingLevel,
def google_thinking_config(
thinking_config: ThinkingConfig,
max_tokens: int | None,
) -> int:
"""Compute Google thinking budget from ThinkingConfig level.
model_id: GoogleModelId,
) -> genai_types.ThinkingConfigDict:
"""Compute Google thinking configuration based on model version.

Args:
level: The thinking level from ThinkingConfig
thinking_config: The ThinkingConfig from params
max_tokens: Max output tokens (used to compute budget for 2.5 models)
model_id: The Google model ID to determine version

Returns:
Token budget for thinking:
- -1 for automatic budget (level=None)
- 0 to disable thinking ("minimal")
- Positive int for specific budget ("low", "medium", "high")
ThinkingConfigDict with either thinking_level or thinking_budget set.

Notes:
- Gemini 2.5 models use thinking_budget (token count)
- Gemini 3.0 Pro supports thinking_level "low" or "high"
- Gemini 3.0 Flash supports thinking_level "minimal", "low", "medium", "high"

See: https://ai.google.dev/gemini-api/docs/gemini-3#thinking_level
"""
if level == "default":
# Use Google's automatic budget
return -1
level: ThinkingLevel = thinking_config.get("level", "default")
include_summaries = thinking_config.get("include_summaries")

if max_tokens is None:
max_tokens = 16000
result = genai_types.ThinkingConfigDict()

multiplier: float = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
return int(multiplier * max_tokens)
if "gemini-3-flash" in model_id:
result["thinking_level"] = THINKING_LEVEL_FOR_GEMINI_3_FLASH.get(
level, genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
)
elif "gemini-3-pro" in model_id:
result["thinking_level"] = THINKING_LEVEL_FOR_GEMINI_3_PRO.get(
level, genai_types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
)
else: # Fall back to 2.5-style budgets
# 2.5 models use thinking_budget
if level == "default":
budget = -1 # Dynamic budget
elif level == "none":
budget = 0 # Disable thinking
else:
# Compute budget as percentage of max_tokens
if max_tokens is None:
max_tokens = 16000
multiplier = THINKING_LEVEL_TO_BUDGET_MULTIPLIER.get(level, 0.4)
budget = int(multiplier * max_tokens)

result["thinking_budget"] = budget
if include_summaries is not None:
result["include_thoughts"] = include_summaries

return result


class GoogleKwargs(TypedDict, total=False):
Expand Down Expand Up @@ -241,24 +295,12 @@ def encode_request(
google_config["stop_sequences"] = param_accessor.stop_sequences
if param_accessor.thinking is not None:
thinking_config = param_accessor.thinking
level = thinking_config.get("level")
include_summaries = thinking_config.get("include_summaries", True)

# Compute budget from level
budget = compute_thinking_budget(level, param_accessor.max_tokens)
thinking_dict = genai_types.ThinkingConfigDict(
thinking_budget=budget,
# Compute thinking config based on model version
google_config["thinking_config"] = google_thinking_config(
thinking_config, param_accessor.max_tokens, model_id
)

if budget == 0:
# Disabled: no thoughts
thinking_dict["include_thoughts"] = False
else:
# Enable thoughts based on include_summaries
thinking_dict["include_thoughts"] = include_summaries

google_config["thinking_config"] = thinking_dict

# Handle encode_thoughts_as_text from ThinkingConfig
if thinking_config.get("encode_thoughts_as_text"):
encode_thoughts_as_text = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,67 @@ interactions:
status:
code: 200
message: OK
- request:
body: '{"contents": [{"parts": [{"text": "What is 4200 + 42?"}], "role": "user"}],
"generationConfig": {"temperature": 0.7, "topP": 0.3, "topK": 50.0, "maxOutputTokens":
500, "stopSequences": ["4242"], "seed": 42, "thinkingConfig": {"include_thoughts":
false, "thinking_budget": 0}}}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '276'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
user-agent:
- google-genai-sdk/1.57.0 gl-python/3.10.16
x-goog-api-client:
- google-genai-sdk/1.57.0 gl-python/3.10.16
x-goog-api-key:
- <filtered>
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
response:
body:
string: !!binary |
H4sIAAAAAAAC/2WRT0+DQBDF73yKyV4FgxRj7NU/iSbGqkRNpIdNd6Ckyy5hp1rT9Ls7LKWlcQ/w
Mu/tzPBjGwCIhTSqUpLQiSl8cQVg65+dZw2hITaGEhcb2dIx25/tSHOEcNNdEpmFojIKaIng1jXY
AtIkjoFHsgjhB4HHg1Q+UgPZElm009zkhht14dycgZe5ifrjLTGauDvoeXjcs7UauyVqq1AP8d0Q
ELxY5ZavKJ01Xewte56Jg8tb44bLcTAM8K3F2skSn5AkE5MHLqJpbd1QZldobuzaE7uY9M1GgE/8
Sbr3yZLUJ1Z6Ff5r6255aKXH4Ef/hL9R6op+PfS7z0yMONDpVgMI/54HeyQ9pXdsXdXjKLFmQFFy
fhkVWrql7yhadI01Dh9Ul1GP16Usoo97ibqm1Sz+niXpy0IEu+APrlJBf1sCAAA=
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Fri, 09 Jan 2026 06:25:56 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=729
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,79 @@ interactions:
status:
code: 200
message: OK
- request:
body: '{"contents": [{"parts": [{"text": "Is the first fibonacci number to end
with the digits 57 prime?"}], "role": "user"}, {"parts": [{"text": "**Thinking:**
Let me see... I have instantenously remembered this table of fibonacci\nnumbers
with their prime factorizations. That''s sure convenient! Let\nme see if it
has the answer:\n0 : 0\n1 : 1\n2 : 1\n3 : 2\n4 : 3\n5 : 5\n6 : 8 = 23\n7 : 13\n8
: 21 = 3 x 7\n9 : 34 = 2 x 17\n10 : 55 = 5 x 11\n11 : 89\n12 : 144 = 24 x 32\n13
: 233\n14 : 377 = 13 x 29\n15 : 610 = 2 x 5 x 61\n16 : 987 = 3 x 7 x 47\n17
: 1597\n18 : 2584 = 23 x 17 x 19\n19 : 4181 = 37 x 113\n20 : 6765 = 3 x 5 x
11 x 41\n21 : 10946 = 2 x 13 x 421\n22 : 17711 = 89 x 199\n23 : 28657\n\nThere
we have it! 28657 is the first fibonacci number ending in 57,\nand it is prime.
I''m supposed to answer with extreme concision, so I''ll\njust say ''Yes.''"},
{"text": "Yes."}], "role": "model"}, {"parts": [{"text": "Please tell me what
the number is."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
"Always answer with extreme concision, giving the answer and no added context."}],
"role": "user"}, "generationConfig": {"thinkingConfig": {"thinking_budget":
1600}}}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '1179'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
user-agent:
- google-genai-sdk/1.57.0 gl-python/3.10.16
x-goog-api-client:
- google-genai-sdk/1.57.0 gl-python/3.10.16
x-goog-api-key:
- <filtered>
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
response:
body:
string: !!binary |
H4sIAAAAAAAC/2WQQU+EMBCF7/0VpOfFGBQWvBk1cWOMGyXGaDw0doBqaZHOJmsI/90WFrZoD00z
73Vm3teRIKAfTHHBGYKhF8GbrQRBN9xO0wpBoRWmki02rMWjdzyd97YWhL37RKM0idfU0/r5/b46
dmy1BGevNQc52fvJQAuhhKkegRmtnO0pf9jSWRWKw96WT8k0YGhNd4aVcA/IbDY2J6BNq+sGc/0F
6krvhmzn0Xrs5rFYGOKDjBqZXH7N0tW/vubaThXSZ+ThsyGZFPjjkuQ3Lzn1QOCftSYUxCNGsdK7
ssLliklCDshGis/QGjHiKqG2AMPoJA4LyUw1DKQtmEYrAxvuPPw1K1nxqW+zu803mm0YnklzmVLS
k18Qn6izJQIAAA==
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Fri, 09 Jan 2026 06:25:57 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=997
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Loading