5
5
import os
6
6
from copy import deepcopy
7
7
from dataclasses import dataclass
8
- from typing import AsyncIterable , Callable , Literal , overload
8
+ from typing import AsyncIterable , Callable , Literal , cast , overload
9
9
from urllib .parse import urlencode
10
10
11
11
import aiohttp
31
31
"response_content_done" ,
32
32
"response_output_done" ,
33
33
"response_done" ,
34
+ "function_calls_collected" ,
35
+ "function_calls_finished" ,
34
36
]
35
37
36
38
@@ -505,9 +507,10 @@ def create(
505
507
}
506
508
else :
507
509
# function_call
508
- if not message .tool_calls :
510
+ if not message .tool_calls or message . name is None :
509
511
logger .warning (
510
- "function call message has no tool calls" ,
512
+ "function call message has no name or tool calls: %s" ,
513
+ message ,
511
514
extra = self ._sess .logging_extra (),
512
515
)
513
516
return
@@ -530,6 +533,13 @@ def create(
530
533
},
531
534
}
532
535
else :
536
+ if message_content is None :
537
+ logger .warning (
538
+ "message content is None, skipping: %s" ,
539
+ message ,
540
+ extra = self ._sess .logging_extra (),
541
+ )
542
+ return
533
543
if not isinstance (message_content , list ):
534
544
message_content = [message_content ]
535
545
@@ -595,18 +605,23 @@ def create(
595
605
system_contents : list [api_proto .InputTextContent ] = []
596
606
for cnt in message_content :
597
607
if isinstance (cnt , str ):
598
- system_contents .append (
599
- {
600
- "id" : message .id ,
601
- "type" : "input_text" ,
602
- "text" : cnt ,
603
- }
604
- )
608
+ system_contents .append ({"type" : "input_text" , "text" : cnt })
605
609
elif isinstance (cnt , llm .ChatAudio ):
606
610
logger .warning (
607
611
"audio content in system message is not supported"
608
612
)
609
613
614
+ event = {
615
+ "type" : "conversation.item.create" ,
616
+ "previous_item_id" : previous_item_id ,
617
+ "item" : {
618
+ "id" : message .id ,
619
+ "type" : "message" ,
620
+ "role" : "system" ,
621
+ "content" : system_contents ,
622
+ },
623
+ }
624
+
610
625
if event is None :
611
626
logger .warning (
612
627
"chat message is not supported inside the realtime API %s" ,
@@ -654,7 +669,7 @@ async def acreate(
654
669
async def adelete (self , * , item_id : str ) -> None :
655
670
fut = asyncio .Future [None ]()
656
671
self ._sess ._item_deleted_futs [item_id ] = fut
657
- self .delete (item_id )
672
+ self .delete (item_id = item_id )
658
673
await fut
659
674
del self ._sess ._item_deleted_futs [item_id ]
660
675
@@ -851,9 +866,9 @@ async def set_chat_ctx(self, new_ctx: llm.ChatContext) -> None:
851
866
},
852
867
)
853
868
854
- # append an empty audio message if all messages are text
855
- if new_ctx . messages and not any (
856
- isinstance (msg .content , llm .ChatAudio ) for msg in new_ctx . messages
869
+ # append an empty audio message if all new messages are text
870
+ if changes . to_add and not any (
871
+ isinstance (msg .content , llm .ChatAudio ) for _ , msg in changes . to_add
857
872
):
858
873
# Patch: add an empty audio message to the chat context
859
874
# to set the API in audio mode
@@ -901,8 +916,8 @@ def _update_converstation_item_content(
901
916
) -> None :
902
917
item = self ._remote_converstation_items .get (item_id )
903
918
if item is None :
904
- logger .error (
905
- "conversation item not found" ,
919
+ logger .warning (
920
+ "conversation item not found, skipping update " ,
906
921
extra = {"item_id" : item_id },
907
922
)
908
923
return
@@ -1118,11 +1133,13 @@ def _handle_conversation_item_created(
1118
1133
# Leave the content empty and fill it in later from the content parts
1119
1134
if item_type == "message" :
1120
1135
# Handle message items (system/user/assistant)
1136
+ item = cast (api_proto .SystemItem | api_proto .UserItem , item )
1121
1137
role = item ["role" ]
1122
1138
message = llm .ChatMessage (id = item_id , role = role )
1123
1139
if item .get ("content" ):
1124
1140
content = item ["content" ][0 ]
1125
1141
if content ["type" ] in ("text" , "input_text" ):
1142
+ content = cast (api_proto .InputTextContent , content )
1126
1143
message .content = content ["text" ]
1127
1144
elif content ["type" ] == "input_audio" and content .get ("audio" ):
1128
1145
audio_data = base64 .b64decode (content ["audio" ])
@@ -1137,6 +1154,7 @@ def _handle_conversation_item_created(
1137
1154
1138
1155
elif item_type == "function_call" :
1139
1156
# Handle function call items
1157
+ item = cast (api_proto .FunctionCallItem , item )
1140
1158
message = llm .ChatMessage (
1141
1159
id = item_id ,
1142
1160
role = "assistant" ,
@@ -1146,6 +1164,7 @@ def _handle_conversation_item_created(
1146
1164
1147
1165
elif item_type == "function_call_output" :
1148
1166
# Handle function call output items
1167
+ item = cast (api_proto .FunctionCallOutputItem , item )
1149
1168
message = llm .ChatMessage (
1150
1169
id = item_id ,
1151
1170
role = "tool" ,
0 commit comments