fix: unsupported message structure should not break loop; add transcription delta support

plutoless · plutoless · commit 1749ecd638a1 · 2025-04-01T17:37:40.000Z
diff --git a/realtime_agent/realtime/connection.py b/realtime_agent/realtime/connection.py
@@ -107,7 +107,7 @@ def handle_server_message(self, message: str) -> ServerToClientMessage:
             return parse_server_message(message)
         except Exception as e:
             logger.error("Error handling message: " + str(e))
-            raise e
+            #raise e
 
     async def close(self):
         # Close the websocket connection if it exists
diff --git a/realtime_agent/realtime/struct.py b/realtime_agent/realtime/struct.py
@@ -184,6 +184,7 @@ class EventType(str, Enum):
     ITEM_DELETED = "conversation.item.deleted"
     ITEM_TRUNCATED = "conversation.item.truncated"
     ITEM_INPUT_AUDIO_TRANSCRIPTION_COMPLETED = "conversation.item.input_audio_transcription.completed"
+    ITEM_INPUT_AUDIO_TRANSCRIPTION_DELTA = "conversation.item.input_audio_transcription.delta"
     ITEM_INPUT_AUDIO_TRANSCRIPTION_FAILED = "conversation.item.input_audio_transcription.failed"
 
     RESPONSE_CREATED = "response.created"
@@ -481,6 +482,13 @@ class ItemInputAudioTranscriptionCompleted(ServerToClientMessage):
     transcript: str  # The transcribed text
     type: str = EventType.ITEM_INPUT_AUDIO_TRANSCRIPTION_COMPLETED  # Fixed event type
 
+@dataclass
+class ItemInputAudioTranscriptionDelta(ServerToClientMessage):
+    item_id: str  # The ID of the item for which transcription was completed
+    content_index: int  # Index of the content part that was transcribed
+    delta: str  # The transcribed text
+    type: str = EventType.ITEM_INPUT_AUDIO_TRANSCRIPTION_DELTA  # Fixed event type
+
 @dataclass
 class ItemInputAudioTranscriptionFailed(ServerToClientMessage):
     item_id: str  # The ID of the item for which transcription failed
@@ -726,6 +734,8 @@ def parse_server_message(unparsed_string: str) -> ServerToClientMessage:
         return from_dict(ItemInputAudioTranscriptionCompleted, data)
     elif data["type"] == EventType.ITEM_INPUT_AUDIO_TRANSCRIPTION_FAILED:
         return from_dict(ItemInputAudioTranscriptionFailed, data)
+    elif data["type"] == EventType.ITEM_INPUT_AUDIO_TRANSCRIPTION_DELTA:
+        return from_dict(ItemInputAudioTranscriptionDelta, data)
 
     raise ValueError(f"Unknown message type: {data['type']}")