@@ -16,8 +16,8 @@ async def entrypoint(job: JobContext):
16
16
logger .info ("starting tts example agent" )
17
17
18
18
tts = cartesia .TTS (
19
- speed = "fastest" ,
20
- emotion = ["surprise:highest" ],
19
+ # speed="fastest",
20
+ # emotion=["surprise:highest"],
21
21
)
22
22
23
23
source = rtc .AudioSource (tts .sample_rate , tts .num_channels )
@@ -29,14 +29,28 @@ async def entrypoint(job: JobContext):
29
29
publication = await job .room .local_participant .publish_track (track , options )
30
30
await publication .wait_for_subscription ()
31
31
32
- logger .info ('Saying "Hello!"' )
33
- async for output in tts .synthesize ("Hello I hope you are having a great day." ):
34
- await source .capture_frame (output .frame )
32
+ stream = tts .stream ()
35
33
36
- await asyncio .sleep (4 )
37
- logger .info ('Saying "Goodbye."' )
38
- async for output in tts .synthesize ("Goodbye I hope to see you again soon." ):
39
- await source .capture_frame (output .frame )
34
+ async def _playback_task ():
35
+ async for audio in stream :
36
+ await source .capture_frame (audio .frame )
37
+
38
+ task = asyncio .create_task (_playback_task ())
39
+
40
+ text = "hello from Cartesia. I hope you are having a great day."
41
+
42
+ # split into two word chunks to simulate LLM streaming
43
+ words = text .split ()
44
+ for i in range (0 , len (words ), 2 ):
45
+ chunk = " " .join (words [i : i + 2 ])
46
+ if chunk :
47
+ logger .info (f'pushing chunk: "{ chunk } "' )
48
+ stream .push_text (chunk + " " )
49
+
50
+ # Mark end of input segment
51
+ stream .flush ()
52
+ stream .end_input ()
53
+ await asyncio .gather (task )
40
54
41
55
42
56
if __name__ == "__main__" :
0 commit comments