pipecat-ai · kompfner · Dec 11, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/changelog/3212.added.md b/changelog/3212.added.md
@@ -0,0 +1,6 @@
+- Added to `AWSNovaSonicLLMService` functionality related to the new (and now
+  default) Nova 2 Sonic model (`"amazon.nova-2-sonic-v1:0"`):
+  - Added the `endpointing_sensitivity` parameter to control how quickly the
+    model decides the user has stopped speaking.
+  - Made the assistant-response-trigger hack a no-op. It's only needed for the
+    older Nova Sonic model.
diff --git a/changelog/3212.changed.md b/changelog/3212.changed.md
@@ -0,0 +1 @@
+- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`.
diff --git a/changelog/3212.fixed.md b/changelog/3212.fixed.md
@@ -0,0 +1,2 @@
+- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled
+  tool calls in the context, resulting in errors.
diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py
@@ -5,7 +5,9 @@
 #
 
 
+import asyncio
 import os
+import random
 from datetime import datetime
 
 from dotenv import load_dotenv
@@ -33,7 +35,16 @@
 
 
 async def fetch_weather_from_api(params: FunctionCallParams):
-    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
+    temperature = (
+        random.randint(60, 85)
+        if params.arguments["format"] == "fahrenheit"
+        else random.randint(15, 30)
+    )
+    # Simulate a long network delay.
+    # You can continue chatting while waiting for this to complete.
+    # With Nova 2 Sonic (the default model), the assistant will respond
+    # appropriately once the function call is complete.
+    await asyncio.sleep(10)
     await params.result_callback(
         {
             "conditions": "nice",
@@ -91,23 +102,31 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     logger.info(f"Starting bot")
 
     # Specify initial system instruction.
-    # HACK: note that, for now, we need to inject a special bit of text into this instruction to
-    # allow the first assistant response to be programmatically triggered (which happens in the
-    # on_client_connected handler, below)
     system_instruction = (
         "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging "
         "the transcripts of a natural real-time conversation. Keep your responses short, generally "
-        "two or three sentences for chatty scenarios. "
-        f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
+        "two or three sentences for chatty scenarios."
+        # HACK: if using the older Nova Sonic (pre-2) model, note that you need to inject a special
+        # bit of text into this instruction to allow the first assistant response to be
+        # programmatically triggered (which happens in the on_client_connected handler)
+        # f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
     )
 
     # Create the AWS Nova Sonic LLM service
     llm = AWSNovaSonicLLMService(
         secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
         access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
-        region=os.getenv("AWS_REGION"),  # as of 2025-05-06, us-east-1 is the only supported region
+        # as of 2025-12-09, these are the supported regions:
+        # - Nova 2 Sonic (the default model):
+        #   - us-east-1
+        #   - us-west-2
+        #   - ap-northeast-1
+        # - Nova Sonic (the older model):
+        #   - us-east-1
+        #   - ap-northeast-1
+        region=os.getenv("AWS_REGION"),
         session_token=os.getenv("AWS_SESSION_TOKEN"),
-        voice_id="tiffany",  # matthew, tiffany, amy
+        voice_id="tiffany",
         # you could choose to pass instruction here rather than via context
         # system_instruction=system_instruction
         # you could choose to pass tools here rather than via context
@@ -117,7 +136,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     # Register function for function calls
     # you can either register a single function for all function calls, or specific functions
     # llm.register_function(None, fetch_weather_from_api)
-    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function(
+        "get_current_weather", fetch_weather_from_api, cancel_on_interruption=False
+    )
 
     # Set up context and context management.
     context = LLMContext(
@@ -159,10 +180,10 @@ async def on_client_connected(transport, client):
         logger.info(f"Client connected")
         # Kick off the conversation.
         await task.queue_frames([LLMRunFrame()])
-        # HACK: for now, we need this special way of triggering the first assistant response in AWS
-        # Nova Sonic. Note that this trigger requires a special corresponding bit of text in the
-        # system instruction. In the future, simply queueing the context frame should be sufficient.
-        await llm.trigger_assistant_response()
+        # HACK: if using the older Nova Sonic (pre-2) model, you need this special way of
+        # triggering the first assistant response. Note that this trigger requires a special
+        # corresponding bit of text in the system instruction.
+        # await llm.trigger_assistant_response()
 
     # Handle client disconnection events
     @transport.event_handler("on_client_disconnected")

diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -157,6 +157,12 @@ class Params(BaseModel):
         max_tokens: Maximum number of tokens to generate.
         top_p: Nucleus sampling parameter.
         temperature: Sampling temperature for text generation.
+        endpointing_sensitivity: Controls how quickly Nova Sonic decides the
+            user has stopped speaking. Can be "LOW", "MEDIUM", or "HIGH", with
+            "HIGH" being the most sensitive (i.e., causing the model to respond
+            most quickly).
+            If not set, uses the model's default behavior.
+            Only supported with Nova 2 Sonic (the default model).
     """
 
     # Audio input
@@ -174,6 +180,9 @@ class Params(BaseModel):
     top_p: Optional[float] = Field(default=0.9)
     temperature: Optional[float] = Field(default=0.7)
 
+    # Turn-taking
+    endpointing_sensitivity: Optional[str] = Field(default=None)
+
 
 class AWSNovaSonicLLMService(LLMService):
     """AWS Nova Sonic speech-to-speech LLM service.
@@ -192,8 +201,8 @@ def __init__(
         access_key_id: str,
         session_token: Optional[str] = None,
         region: str,
-        model: str = "amazon.nova-sonic-v1:0",
-        voice_id: str = "matthew",  # matthew, tiffany, amy
+        model: str = "amazon.nova-2-sonic-v1:0",
+        voice_id: str = "matthew",
         params: Optional[Params] = None,
         system_instruction: Optional[str] = None,
         tools: Optional[ToolsSchema] = None,
@@ -207,8 +216,15 @@ def __init__(
             access_key_id: AWS access key ID for authentication.
             session_token: AWS session token for authentication.
             region: AWS region where the service is hosted.
-            model: Model identifier. Defaults to "amazon.nova-sonic-v1:0".
-            voice_id: Voice ID for speech synthesis. Options: matthew, tiffany, amy.
+                Supported regions:
+                - Nova 2 Sonic (the default model): "us-east-1", "us-west-2", "ap-northeast-1"
+                - Nova Sonic (the older model): "us-east-1", "ap-northeast-1"
+            model: Model identifier. Defaults to "amazon.nova-2-sonic-v1:0".
+            voice_id: Voice ID for speech synthesis.
+                Note that some voices are designed for use with a specific language.
+                Options:
+                - Nova 2 Sonic (the default model): see https://docs.aws.amazon.com/nova/latest/nova2-userguide/sonic-language-support.html
+                - Nova Sonic (the older model): see https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html.
             params: Model parameters for audio configuration and inference.
             system_instruction: System-level instruction for the model.
             tools: Available tools/functions for the model to use.
@@ -232,6 +248,17 @@ def __init__(
         self._system_instruction = system_instruction
         self._tools = tools
 
+        # Validate endpointing_sensitivity parameter
+        if (
+            self._params.endpointing_sensitivity
+            and not self._is_endpointing_sensitivity_supported()
+        ):
+            logger.warning(
+                f"endpointing_sensitivity is not supported for model '{model}' and will be ignored. "
+                "This parameter is only supported starting with Nova 2 Sonic (amazon.nova-2-sonic-v1:0)."
+            )
+            self._params.endpointing_sensitivity = None
+
         if not send_transcription_frames:
             import warnings
 
@@ -459,7 +486,7 @@ async def _start_connecting(self):
     async def _process_completed_function_calls(self, send_new_results: bool):
         # Check for set of completed function calls in the context
         for message in self._context.get_messages():
-            if message.get("role") and message.get("content") != "IN_PROGRESS":
+            if message.get("role") and message.get("content") not in ["IN_PROGRESS", "CANCELLED"]:
                 tool_call_id = message.get("tool_call_id")
                 if tool_call_id and tool_call_id not in self._completed_tool_calls:
                     # Found a newly-completed function call - send the result to the service
@@ -591,11 +618,33 @@ def _create_client(self) -> BedrockRuntimeClient:
         )
         return BedrockRuntimeClient(config=config)
 
+    def _is_first_generation_sonic_model(self) -> bool:
+        # Nova Sonic (the older model) is identified by "amazon.nova-sonic-v1:0"
+        return self._model == "amazon.nova-sonic-v1:0"
+
+    def _is_endpointing_sensitivity_supported(self) -> bool:
+        # endpointing_sensitivity is only supported with Nova 2 Sonic (and,
+        # presumably, future models)
+        return not self._is_first_generation_sonic_model()
+
+    def _is_assistant_response_trigger_needed(self) -> bool:
+        # Assistant response trigger audio is only needed with the older model
+        return self._is_first_generation_sonic_model()
+
     #
     # LLM communication: input events (pipecat -> LLM)
     #
 
     async def _send_session_start_event(self):
+        turn_detection_config = (
+            f""",
+              "turnDetectionConfiguration": {{
+                "endpointingSensitivity": "{self._params.endpointing_sensitivity}"
+              }}"""
+            if self._params.endpointing_sensitivity
+            else ""
+        )
+
         session_start = f"""
         {{
           "event": {{
@@ -604,7 +653,7 @@ async def _send_session_start_event(self):
                 "maxTokens": {self._params.max_tokens},
                 "topP": {self._params.top_p},
                 "temperature": {self._params.temperature}
-              }}
+              }}{turn_detection_config}
             }}
           }}
         }}
@@ -1189,7 +1238,8 @@ def create_context_aggregator(
         )
 
     #
-    # assistant response trigger (HACK)
+    # assistant response trigger
+    # HACK: only needed for the older Nova Sonic (as opposed to Nova 2 Sonic) model
     #
 
     # Class variable
@@ -1203,12 +1253,17 @@ async def trigger_assistant_response(self):
 
         Sends a pre-recorded "ready" audio trigger to prompt the assistant
         to start speaking. This is useful for controlling conversation flow.
-
-        Returns:
-            False if already triggering a response, True otherwise.
         """
+        if not self._is_assistant_response_trigger_needed():
+            logger.warning(
+                f"Assistant response trigger not needed for model '{self._model}'; skipping. "
+                "An LLMRunFrame() should be sufficient to prompt the assistant to respond, "
+                "assuming the context ends in a user message."
+            )
+            return
+
         if self._triggering_assistant_response:
-            return False
+            return
 
         self._triggering_assistant_response = True
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled
		tool calls in the context, resulting in errors.