feat: allow the turning off of thinking (#4)

dearden · web-flow · commit 2c2166117f59 · 2026-01-16T14:58:46.000Z
* feat: allow the turning off of thinking

* tests: added tests for thinking

* chore: added missing place where `do_thinking` should have been passed on
diff --git a/README.md b/README.md
@@ -109,4 +109,9 @@ prompt = (
 
 result = run_prompt(prompt, use_grounding=True, inline_citations=True)
 pp(result)
-```
+```
+
+## Thinking
+
+You can enable or disable thinking in the model by toggling the `do_thinking` parameter.
+Only enable this if the task is complex enough to require it, because it makes things slow and expensive.
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ license = { file = "LICENSE.md" }
 requires-python = ">=3.10"
 dependencies = [
     "google-cloud-core>=2.4.3",
-    "google-genai>=1.27.0",
+    "google-genai>=1.59.0",
     "json-repair~=0.40.0",
     "pydantic>=2.11.7",
     "rapidfuzz>=3.13.0",
diff --git a/scripts/gemini_demo.py b/scripts/gemini_demo.py
@@ -74,3 +74,12 @@ class Monarch(BaseModel):
 
 answer_inline_citations = run_prompt(prompt, use_grounding=True, inline_citations=True)
 print("GROUNDING W/ CITATIONS", "\n", "-" * 100, "\n", answer_inline_citations, "\n\n")
+
+# Thinking
+print("THINKING" + "\n" + "-" * 100)
+
+test_prompt = """
+Why is fact checking important?
+""".strip()
+output = run_prompt(test_prompt, do_thinking=True)
+pp(output)
diff --git a/src/genai_utils/gemini.py b/src/genai_utils/gemini.py
@@ -293,6 +293,43 @@ def check_grounding_ran(response: types.GenerateContentResponse) -> bool:
     return bool(n_searches and n_chunks and n_supports)
 
 
+def get_thinking_config(
+    model_name: str, do_thinking: bool
+) -> types.ThinkingConfig | None:
+    """
+    Gets the thinking cofig required for the current model.
+    Thinking is set differently before and after Gemini 3.0.
+    Certain models like the 2.5 and 3.0 pro models, do not allow grounding to be disabled.
+    """
+    if "gemini-2.5-pro" in model_name:
+        if not do_thinking:
+            _logger.warning(
+                "It is not possible to turn off thinking with this model. Setting to minimum."
+            )
+            return types.ThinkingConfig(thinking_budget=128)  # minimum thinking
+        return types.ThinkingConfig(thinking_budget=-1)  # dynamic budget
+
+    if (
+        model_name < "gemini-2.6"
+    ):  # there is no 2.6, but this means it will catch all 2.5 variants
+        if do_thinking:
+            return types.ThinkingConfig(thinking_budget=-1)  # dynamic budget
+        return types.ThinkingConfig(thinking_budget=0)  # disable thinking
+
+    if model_name >= "gemini-3":
+        if not do_thinking:
+            if "pro" in model_name:
+                _logger.warning(
+                    "Cannot disable thinking in this model. Setting thinking to low."
+                )
+                return types.ThinkingConfig(thinking_level=types.ThinkingLevel.LOW)
+            return types.ThinkingConfig(thinking_level=types.ThinkingLevel.MINIMAL)
+        return None
+
+    _logger.warning("Did not recognise the model provided, defaulting to None")
+    return None
+
+
 def run_prompt(
     prompt: str,
     video_uri: str | None = None,
@@ -302,6 +339,7 @@ def run_prompt(
     safety_settings: list[types.SafetySetting] = DEFAULT_SAFETY_SETTINGS,
     model_config: ModelConfig | None = None,
     use_grounding: bool = False,
+    do_thinking: bool = False,
     inline_citations: bool = False,
     labels: dict[str, str] = {},
 ) -> str:
@@ -352,6 +390,10 @@ class Movie(BaseModel):
         and makes the output more likely to be factual.
         Does not work with structured output.
         See the docs (`grounding`_).
+    do_thinking: bool
+        Whether Gemini should use a thought process.
+        This is more expensive but may yield better results.
+        Do not use for bulk tasks that don't require complex thoughts.
     inline_citations: bool
         Whether output should include citations inline with the text.
         These citations will be links to be used as evidence.
@@ -379,6 +421,7 @@ class Movie(BaseModel):
             safety_settings=safety_settings,
             model_config=model_config,
             use_grounding=use_grounding,
+            do_thinking=do_thinking,
             inline_citations=inline_citations,
             labels=labels,
         )
@@ -394,6 +437,7 @@ async def run_prompt_async(
     safety_settings: list[types.SafetySetting] = DEFAULT_SAFETY_SETTINGS,
     model_config: ModelConfig | None = None,
     use_grounding: bool = False,
+    do_thinking: bool = False,
     inline_citations: bool = False,
     labels: dict[str, str] = {},
 ) -> str:
@@ -444,6 +488,10 @@ class Movie(BaseModel):
         and makes the output more likely to be factual.
         Does not work with structured output.
         See the docs (`grounding`_).
+    do_thinking: bool
+        Whether Gemini should use a thought process.
+        This is more expensive but may yield better results.
+        Do not use for bulk tasks that don't require complex thoughts.
     inline_citations: bool
         Whether output should include citations inline with the text.
         These citations will be links to be used as evidence.
@@ -506,6 +554,7 @@ class Movie(BaseModel):
             safety_settings=safety_settings,
             **built_gen_config,
             labels=merged_labels,
+            thinking_config=get_thinking_config(model_config.model_name, do_thinking),
         ),
     )
 
diff --git a/tests/genai_utils/test_gemini.py b/tests/genai_utils/test_gemini.py
@@ -1,16 +1,18 @@
 import os
 from unittest.mock import Mock, patch
 
-from google.genai import Client
+from google.genai import Client, types
 from google.genai.client import AsyncClient
 from google.genai.models import Models
 from pydantic import BaseModel, Field
+from pytest import mark, param
 
 from genai_utils.gemini import (
     DEFAULT_PARAMETERS,
     GeminiError,
     ModelConfig,
     generate_model_config,
+    get_thinking_config,
     run_prompt_async,
 )
 
@@ -143,3 +145,33 @@ async def test_error_if_citations_and_no_grounding(mock_client):
         return
 
     assert False
+
+
+@mark.parametrize(
+    "model_name,do_thinking,expected",
+    [
+        param("gemini-2.0-flash", False, types.ThinkingConfig(thinking_budget=0)),
+        param("gemini-2.0-flash", True, types.ThinkingConfig(thinking_budget=-1)),
+        param("gemini-2.5-flash-lite", False, types.ThinkingConfig(thinking_budget=0)),
+        param("gemini-2.5-flash-lite", True, types.ThinkingConfig(thinking_budget=-1)),
+        param("gemini-2.5-pro", False, types.ThinkingConfig(thinking_budget=128)),
+        param("gemini-2.5-pro", True, types.ThinkingConfig(thinking_budget=-1)),
+        param(
+            "gemini-3.0-flash",
+            False,
+            types.ThinkingConfig(thinking_level=types.ThinkingLevel.MINIMAL),
+        ),
+        param("gemini-3.0-flash", True, None),
+        param(
+            "gemini-3.0-pro",
+            False,
+            types.ThinkingConfig(thinking_level=types.ThinkingLevel.LOW),
+        ),
+        param("gemini-3.0-pro", True, None),
+    ],
+)
+def test_get_thinking_config(
+    model_name: str, do_thinking: bool, expected: types.ThinkingConfig
+):
+    thinking_config = get_thinking_config(model_name, do_thinking)
+    assert thinking_config == expected
diff --git a/uv.lock b/uv.lock