pipecat-ai · kompfner · Dec 11, 2025 · Dec 2, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/changelog/3175.added.md b/changelog/3175.added.md
@@ -0,0 +1,41 @@
+- Added additional functionality related to "thinking", for Google and Anthropic
+  LLMs.
+
+  1. New typed parameters for Google and Anthropic LLMs that control the
+     models' thinking behavior (like how much thinking to do, and whether to
+     output thoughts or thought summaries):
+     - `AnthropicLLMService.ThinkingConfig`
+     - `GoogleLLMService.ThinkingConfig`
+  2. New frames for representing thoughts output by LLMs:
+     - `LLMThoughtStartFrame`
+     - `LLMThoughtTextFrame`
+     - `LLMThoughtEndFrame`
+  3. A mechanism for appending arbitrary context messages after a function call
+     message, used specifically to support Google's function-call-related
+     "thought signatures", which are necessary to ensure thinking continuity
+     between function calls in a chain (where the model thinks, makes a function
+     call, thinks some more, etc.). See:
+     - `append_extra_context_messages` field in `FunctionInProgressFrame` and
+       helper types
+     - `GoogleLLMService` leveraging the new mechanism to add a Google-specific
+       `"fn_thought_signature"` message
+     - `LLMAssistantAggregator` handling of `append_extra_context_messages`
+     - `GeminiLLMAdapter` handling of `"fn_thought_signature"` messages
+  4. A generic mechanism for recording LLM thoughts to context, used
+     specifically to support Anthropic, whose thought signatures are expected to
+     appear alongside the text of the thoughts within assistant context
+     messages. See:
+     - `LLMThoughtEndFrame.signature`
+     - `LLMAssistantAggregator` handling of the above field
+     - `AnthropicLLMAdapter` handling of `"thought"` context messages
+  5. Google-specific logic for inserting non-function-call-related thought
+     signatures into the context, to help maintain thinking continuity in a
+     chain of LLM calls. See:
+     - `GoogleLLMService` sending `LLMMessagesAppendFrame`s to add LLM-specific
+       `"non_fn_thought_signature"` messages to context
+     - `GeminiLLMAdapter` handling of `"non_fn_thought_signature"` messages
+  6. An expansion of `TranscriptProcessor` to process LLM thoughts in addition
+     to user and assistant utterances. See:
+     - `TranscriptProcessor(process_thoughts=True)` (defaults to `False`)
+     - `ThoughtTranscriptionMessage`, which is now also emitted with the
+       `"on_transcript_update"` event
diff --git a/examples/foundational/07n-interruptible-google-http.py b/examples/foundational/07n-interruptible-google-http.py
@@ -75,8 +75,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     llm = GoogleLLMService(
         api_key=os.getenv("GOOGLE_API_KEY"),
         model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
     )
 
     messages = [

diff --git a/examples/foundational/07n-interruptible-google.py b/examples/foundational/07n-interruptible-google.py
@@ -75,8 +75,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     llm = GoogleLLMService(
         api_key=os.getenv("GOOGLE_API_KEY"),
         model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
     )
 
     messages = [

diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py
@@ -224,8 +224,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     llm = GoogleLLMService(
         api_key=os.getenv("GOOGLE_API_KEY"),
         model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
     )
 
     tts = GoogleTTSService(

diff --git a/examples/foundational/49a-thinking-anthropic.py b/examples/foundational/49a-thinking-anthropic.py
@@ -0,0 +1,161 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ThoughtTranscriptionMessage, TranscriptionMessage
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.anthropic.llm import AnthropicLLMService
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = AnthropicLLMService(
+        api_key=os.getenv("ANTHROPIC_API_KEY"),
+        params=AnthropicLLMService.InputParams(
+            thinking=AnthropicLLMService.ThinkingConfig(type="enabled", budget_tokens=2048)
+        ),
+    )
+
+    transcript = TranscriptProcessor(process_thoughts=True)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            transcript.user(),  # User transcripts
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            transcript.assistant(),  # Assistant transcripts (including thoughts)
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "user",
+                "content": "Say hello briefly.",
+            }
+        )
+        # Here are some example example prompts conducive to demonstrating
+        # thinking (picked from Google and Anthropic docs).
+        # messages.append(
+        #     {
+        #         "role": "user",
+        #         "content": "Analogize photosynthesis and growing up. Keep your answer concise.",
+        #         # "content": "Compare and contrast electric cars and hybrid cars."
+        #         # "content": "Are there an infinite number of prime numbers such that n mod 4 == 3?"
+        #     }
+        # )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    # Register event handler for transcript updates
+    @transcript.event_handler("on_transcript_update")
+    async def on_transcript_update(processor, frame):
+        for msg in frame.messages:
+            if isinstance(msg, (ThoughtTranscriptionMessage, TranscriptionMessage)):
+                timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
+                role = "THOUGHT" if isinstance(msg, ThoughtTranscriptionMessage) else msg.role
+                logger.info(f"Transcript: {timestamp}{role}: {msg.content}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()