Skip to content

Commit 28e8e7b

Browse files
committed
chore: fix type check
1 parent 41e8152 commit 28e8e7b

File tree

5 files changed

+54
-28
lines changed

5 files changed

+54
-28
lines changed

examples/multimodal_agent.py

-4
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,6 @@ async def get_weather(
7070

7171
# create a chat context with chat history
7272
chat_ctx = llm.ChatContext()
73-
74-
# Add some test context to verify if the sync_chat_ctx works
75-
# FIXME: OAI realtime API does not support this properly when the chat context is too many
76-
# It may answer with the text responses only for some cases
7773
chat_ctx.append(text="I'm planning a trip to Paris next month.", role="user")
7874
chat_ctx.append(
7975
text="How exciting! Paris is a beautiful city. I'd be happy to suggest some must-visit places and help you plan your trip.",

livekit-agents/livekit/agents/llm/chat_context.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,16 @@ class ChatAudio:
4646
@dataclass
4747
class ChatMessage:
4848
role: ChatRole
49-
id: str | None = None # used by the OAI realtime API
49+
id: str = field(
50+
default_factory=lambda: utils.shortuuid("item_")
51+
) # used by the OAI realtime API
5052
name: str | None = None
5153
content: ChatContent | list[ChatContent] | None = None
5254
tool_calls: list[function_context.FunctionCallInfo] | None = None
5355
tool_call_id: str | None = None
5456
tool_exception: Exception | None = None
5557
_metadata: dict[str, Any] = field(default_factory=dict, repr=False, init=False)
5658

57-
def __post_init__(self):
58-
if self.id is None:
59-
self.id = utils.shortuuid("item_")
60-
6159
@staticmethod
6260
def create_tool_from_called_function(
6361
called_function: function_context.CalledFunction,
@@ -97,6 +95,7 @@ def create(
9795
role: ChatRole = "system",
9896
id: str | None = None,
9997
) -> "ChatMessage":
98+
id = id or utils.shortuuid("item_")
10099
if len(images) == 0:
101100
return ChatMessage(role=role, content=text, id=id)
102101
else:

livekit-agents/livekit/agents/utils/_message_change.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ def _compute_list_changes(old_list: list[T], new_list: list[T]) -> MessageChange
7171
first_idx = old_list.index(new_list[0])
7272
except ValueError:
7373
# Special case: if first item is new, delete everything
74-
prev_item = None
75-
to_add = []
74+
prev_item: T | None = None
75+
to_add: list[tuple[T | None, T]] = []
7676
for x in new_list:
7777
to_add.append((prev_item, x))
7878
prev_item = x
@@ -106,7 +106,7 @@ def _compute_list_changes(old_list: list[T], new_list: list[T]) -> MessageChange
106106
to_delete.extend(x for x in remaining_old if x not in kept_items)
107107

108108
# Compute items to add by following new list order
109-
to_add: list[tuple[T | None, T]] = []
109+
to_add = []
110110
prev_item = None
111111
for x in new_list:
112112
if x not in kept_items:

livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/realtime/api_proto.py

+12
Original file line numberDiff line numberDiff line change
@@ -214,30 +214,42 @@ class InputAudioBufferClear(TypedDict):
214214
type: Literal["input_audio_buffer.clear"]
215215

216216
class UserItemCreate(TypedDict):
217+
id: str | None
217218
type: Literal["message"]
218219
role: Literal["user"]
219220
content: list[InputTextContent | InputAudioContent]
220221

221222
class AssistantItemCreate(TypedDict):
223+
id: str | None
222224
type: Literal["message"]
223225
role: Literal["assistant"]
224226
content: list[TextContent]
225227

226228
class SystemItemCreate(TypedDict):
229+
id: str | None
227230
type: Literal["message"]
228231
role: Literal["system"]
229232
content: list[InputTextContent]
230233

231234
class FunctionCallOutputItemCreate(TypedDict):
235+
id: str | None
232236
type: Literal["function_call_output"]
233237
call_id: str
234238
output: str
235239

240+
class FunctionCallItemCreate(TypedDict):
241+
id: str | None
242+
type: Literal["function_call"]
243+
call_id: str
244+
name: str
245+
arguments: str
246+
236247
ConversationItemCreateContent = Union[
237248
UserItemCreate,
238249
AssistantItemCreate,
239250
SystemItemCreate,
240251
FunctionCallOutputItemCreate,
252+
FunctionCallItemCreate,
241253
]
242254

243255
class ConversationItemCreate(TypedDict):

livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/realtime/realtime_model.py

+35-16
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66
from copy import deepcopy
77
from dataclasses import dataclass
8-
from typing import AsyncIterable, Callable, Literal, overload
8+
from typing import AsyncIterable, Callable, Literal, cast, overload
99
from urllib.parse import urlencode
1010

1111
import aiohttp
@@ -31,6 +31,8 @@
3131
"response_content_done",
3232
"response_output_done",
3333
"response_done",
34+
"function_calls_collected",
35+
"function_calls_finished",
3436
]
3537

3638

@@ -505,9 +507,10 @@ def create(
505507
}
506508
else:
507509
# function_call
508-
if not message.tool_calls:
510+
if not message.tool_calls or message.name is None:
509511
logger.warning(
510-
"function call message has no tool calls",
512+
"function call message has no name or tool calls: %s",
513+
message,
511514
extra=self._sess.logging_extra(),
512515
)
513516
return
@@ -530,6 +533,13 @@ def create(
530533
},
531534
}
532535
else:
536+
if message_content is None:
537+
logger.warning(
538+
"message content is None, skipping: %s",
539+
message,
540+
extra=self._sess.logging_extra(),
541+
)
542+
return
533543
if not isinstance(message_content, list):
534544
message_content = [message_content]
535545

@@ -595,18 +605,23 @@ def create(
595605
system_contents: list[api_proto.InputTextContent] = []
596606
for cnt in message_content:
597607
if isinstance(cnt, str):
598-
system_contents.append(
599-
{
600-
"id": message.id,
601-
"type": "input_text",
602-
"text": cnt,
603-
}
604-
)
608+
system_contents.append({"type": "input_text", "text": cnt})
605609
elif isinstance(cnt, llm.ChatAudio):
606610
logger.warning(
607611
"audio content in system message is not supported"
608612
)
609613

614+
event = {
615+
"type": "conversation.item.create",
616+
"previous_item_id": previous_item_id,
617+
"item": {
618+
"id": message.id,
619+
"type": "message",
620+
"role": "system",
621+
"content": system_contents,
622+
},
623+
}
624+
610625
if event is None:
611626
logger.warning(
612627
"chat message is not supported inside the realtime API %s",
@@ -654,7 +669,7 @@ async def acreate(
654669
async def adelete(self, *, item_id: str) -> None:
655670
fut = asyncio.Future[None]()
656671
self._sess._item_deleted_futs[item_id] = fut
657-
self.delete(item_id)
672+
self.delete(item_id=item_id)
658673
await fut
659674
del self._sess._item_deleted_futs[item_id]
660675

@@ -851,9 +866,9 @@ async def set_chat_ctx(self, new_ctx: llm.ChatContext) -> None:
851866
},
852867
)
853868

854-
# append an empty audio message if all messages are text
855-
if new_ctx.messages and not any(
856-
isinstance(msg.content, llm.ChatAudio) for msg in new_ctx.messages
869+
# append an empty audio message if all new messages are text
870+
if changes.to_add and not any(
871+
isinstance(msg.content, llm.ChatAudio) for _, msg in changes.to_add
857872
):
858873
# Patch: add an empty audio message to the chat context
859874
# to set the API in audio mode
@@ -901,8 +916,8 @@ def _update_converstation_item_content(
901916
) -> None:
902917
item = self._remote_converstation_items.get(item_id)
903918
if item is None:
904-
logger.error(
905-
"conversation item not found",
919+
logger.warning(
920+
"conversation item not found, skipping update",
906921
extra={"item_id": item_id},
907922
)
908923
return
@@ -1118,11 +1133,13 @@ def _handle_conversation_item_created(
11181133
# Leave the content empty and fill it in later from the content parts
11191134
if item_type == "message":
11201135
# Handle message items (system/user/assistant)
1136+
item = cast(api_proto.SystemItem | api_proto.UserItem, item)
11211137
role = item["role"]
11221138
message = llm.ChatMessage(id=item_id, role=role)
11231139
if item.get("content"):
11241140
content = item["content"][0]
11251141
if content["type"] in ("text", "input_text"):
1142+
content = cast(api_proto.InputTextContent, content)
11261143
message.content = content["text"]
11271144
elif content["type"] == "input_audio" and content.get("audio"):
11281145
audio_data = base64.b64decode(content["audio"])
@@ -1137,6 +1154,7 @@ def _handle_conversation_item_created(
11371154

11381155
elif item_type == "function_call":
11391156
# Handle function call items
1157+
item = cast(api_proto.FunctionCallItem, item)
11401158
message = llm.ChatMessage(
11411159
id=item_id,
11421160
role="assistant",
@@ -1146,6 +1164,7 @@ def _handle_conversation_item_created(
11461164

11471165
elif item_type == "function_call_output":
11481166
# Handle function call output items
1167+
item = cast(api_proto.FunctionCallOutputItem, item)
11491168
message = llm.ChatMessage(
11501169
id=item_id,
11511170
role="tool",

0 commit comments

Comments
 (0)