Skip to content

Commit

Permalink
feat: support tool
Browse files Browse the repository at this point in the history
  • Loading branch information
plutoless committed Dec 31, 2024
1 parent ef24ca2 commit 3a026f6
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 33 deletions.
28 changes: 18 additions & 10 deletions agents/examples/default/property.json
Original file line number Diff line number Diff line change
Expand Up @@ -546,35 +546,35 @@
"connections": [
{
"extension": "agora_rtc",
"audio_frame": [
"cmd": [
{
"name": "pcm_frame",
"name": "on_user_joined",
"dest": [
{
"extension": "v2v"
}
]
}
],
"cmd": [
},
{
"name": "on_user_joined",
"name": "on_user_left",
"dest": [
{
"extension": "v2v"
}
]
},
{
"name": "on_user_left",
"name": "on_connection_failure",
"dest": [
{
"extension": "v2v"
}
]
},
}
],
"audio_frame": [
{
"name": "on_connection_failure",
"name": "pcm_frame",
"dest": [
{
"extension": "v2v"
Expand Down Expand Up @@ -698,7 +698,7 @@
"max_memory_length": 10,
"max_tokens": 512,
"model": "${env:OPENAI_MODEL}",
"prompt": "",
"prompt": "You are an ai agent bot producing child picture books. Each response should be short and no more than 50 words as it's for child. \nFor each response, you will use the 'image_generate' tool to create an image based on the description or key moment in that part of the story. The story should be set in a fantasy world. Try asking questions relevant to the story to decide how the story should proceed. Each response should include rich, vivid descriptions that will guide the 'image_generate' tool to produce an image that aligns with the scene or mood.\n Whether it’s the setting, a character’s expression, or a dramatic moment, the paragraph should give enough detail for a meaningful visual representation.",
"proxy_url": "${env:OPENAI_PROXY_URL}"
}
},
Expand Down Expand Up @@ -838,6 +838,14 @@
"extension": "message_collector"
}
]
},
{
"name": "raw_text_data",
"dest": [
{
"extension": "message_collector"
}
]
}
]
},
Expand Down
11 changes: 11 additions & 0 deletions agents/ten_packages/extension/message_collector/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@
"type": "bool"
}
}
},
{
"name": "raw_text_data",
"property": {
"text": {
"type": "string"
},
"end_of_segment": {
"type": "bool"
}
}
}
],
"data_out": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None:
"text": text,
}

# Add the raw data type if the data is raw text data
if data.get_name() == "raw_text_data":
base_msg_data["data_type"] = "raw"

try:
chunks = _text_to_base64_chunks(ten_env, json.dumps(base_msg_data), message_id)
for chunk in chunks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, name: str):
self.config = None
self.client = None
self.sentence_fragment = ""
self.tool_task_future = None
self.tool_task_future: asyncio.Future | None = None
self.users_count = 0

async def on_init(self, async_ten_env: AsyncTenEnv) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for more information.
#
import json
from ten import (
TenEnv,
AsyncTenEnv,
Expand Down Expand Up @@ -63,11 +64,6 @@ async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMTool
ten_env.log_info(f"Generated image: {response_url}")
result = LLMToolResultDirectRawResponse(
type="direct_raw_response",
content=[
LLMChatCompletionContentPartImageParam(
type="image_url",
image_url=response_url
)
]
content=json.dumps({"data":{"image_url": response_url}, "type": "image_url"}),
)
return result
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
"type": "string"
},
"arguments": {
"type": "object"
"type": "object",
"properties": {}
}
},
"required": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class LLMToolResultDirectSpeechResponse(TypedDict, total=False):

class LLMToolResultDirectRawResponse(TypedDict, total=False):
type: Required[Literal["direct_raw_response"]]
content: Required[Union[str, Iterable[LLMChatCompletionContentPartParam]]]
content: Required[str]

LLMToolResult: TypeAlias = Union[
LLMToolResultRequery,
Expand Down
38 changes: 25 additions & 13 deletions playground/src/manager/rtc/rtc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ export class RtcManager extends AGEventEmitter<RtcEvents> {
this.emit("localTracksChanged", this.localTracks);
}

async switchVideoSource(type:VideoSourceType) {
async switchVideoSource(type: VideoSourceType) {
if (type === VideoSourceType.SCREEN) {
await this.createScreenShareTrack();
if(this.localTracks.screenTrack) {
if (this.localTracks.screenTrack) {
this.client.unpublish(this.localTracks.videoTrack);
this.localTracks.videoTrack?.close();
this.localTracks.videoTrack = undefined;
Expand All @@ -98,7 +98,7 @@ export class RtcManager extends AGEventEmitter<RtcEvents> {
}
} else if (type === VideoSourceType.CAMERA) {
await this.createCameraTracks();
if(this.localTracks.videoTrack) {
if (this.localTracks.videoTrack) {
this.client.unpublish(this.localTracks.screenTrack);
this.localTracks.screenTrack?.close();
this.localTracks.screenTrack = undefined;
Expand Down Expand Up @@ -228,18 +228,30 @@ export class RtcManager extends AGEventEmitter<RtcEvents> {
const completeMessage = this.reconstructMessage(
this.messageCache[message_id]
);
const { stream_id, is_final, text, text_ts } = JSON.parse(
const { stream_id, is_final, text, text_ts, data_type } = JSON.parse(
atob(completeMessage)
);
const textItem: ITextItem = {
uid: `${stream_id}`,
time: text_ts,
dataType: "transcribe",
text: text,
isFinal: is_final,
};

if (text.trim().length > 0) {
let textItem: ITextItem;

if (data_type === "raw") {
textItem = {
uid: `${stream_id}`,
time: text_ts,
dataType: "image_url",
text: text,
isFinal: is_final,
}
} else {
textItem = {
uid: `${stream_id}`,
time: text_ts,
dataType: "transcribe",
text: text,
isFinal: is_final,
};
}

if (text.trim().length > 0 && textItem) {
this.emit("textChanged", textItem);
}

Expand Down
2 changes: 1 addition & 1 deletion playground/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export interface IChatItem {

/** @deprecated */
export interface ITextItem {
dataType: "transcribe" | "translate";
dataType: "transcribe" | "translate" | "image_url";
uid: string;
time: number;
text: string;
Expand Down

0 comments on commit 3a026f6

Please sign in to comment.