Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Added support for `VideoSDKTransport` in `src/pipecat/transports/videosdk`, enabling Pipecat bots to join VideoSDK.live meetings with bidirectional audio and handle participant chat messages via VideoSDK PubSub.
- Added `videosdk>=0.2.7` dependency to `pyproject.toml`.
- Added `VIDEOSDK_AUTH_TOKEN` and `VIDEOSDK_MEETING_ID` fields to `env.example`.
- Added new example `examples/foundational/04c-transports-videosdk.py` demonstrating usage of `VideoSDKTransport`.

- Added `wait_for_all` argument to the base `LLMService`. When enabled, this
ensures all function calls complete before returning results to the LLM (i.e.,
before running a new inference with those results).
Expand Down
4 changes: 4 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ TOGETHER_API_KEY=...
TWILIO_ACCOUNT_SID=...
TWILIO_AUTH_TOKEN=...

# Videosdk.live
VIDEOSDK_AUTH_TOKEN=...
VIDEOSDK_MEETING_ID=...

# WhatsApp
WHATSAPP_TOKEN=...
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
Expand Down
141 changes: 141 additions & 0 deletions examples/foundational/04c-transports-videosdk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import os
import sys

from dotenv import load_dotenv
from loguru import logger

from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import (
LLMRunFrame,
TranscriptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
InterruptionFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
from pipecat.services.deepgram.tts import DeepgramTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.google.llm import GoogleLLMService
from pipecat.transports.videosdk.transport import VideoSDKTransport,VideoSDKParams

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
if not os.getenv("VIDEOSDK_AUTH_TOKEN") or not os.getenv("VIDEOSDK_MEETING_ID"):
logger.error("Please set VIDEOSDK_AUTH_TOKEN and VIDEOSDK_MEETING_ID in your .env file")
return

transport = VideoSDKTransport(
params=VideoSDKParams(
token=os.getenv("VIDEOSDK_AUTH_TOKEN"),
meeting_id=os.getenv("VIDEOSDK_MEETING_ID"),
name=os.getenv("NAME", "Pipecat-AI Bot"),
audio_out_enabled=True,
audio_in_enabled=True,
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
pubsub_topic="CHAT", # Subscribe to pubsub topic for text messages (default: "CHAT").
)
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))

tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"))

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a VideoSDK meeting. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
},
]

context = LLMContext(messages)
context_aggregator = LLMContextAggregatorPair(context)

pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # Speech-to-Text (transcribe user audio)
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)

task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)

@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation when first participant joins
logger.info(f"First participant joined: {participant.id}")
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])

@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant):
# When a participant leaves, cancel the task to stop the bot
logger.info(f"Participant left: {participant.id}")
await task.cancel()

# Register an event handler to receive text messages from participants via PubSub
# This allows users to send text messages that the bot can respond to with voice
@transport.event_handler("on_pubsub_message_received")
async def on_pubsub_message_received(transport, message, sender_id):
logger.info(f"Received text message from {sender_id}: {message}")

message_text = message.get("message", "")
timestamp = message.get("timestamp", "")

if not message_text:
logger.warning("Received empty message")
return

logger.info(f"Processing text message: {message_text}")

await task.queue_frames(
[
InterruptionFrame(),
UserStartedSpeakingFrame(),
TranscriptionFrame(
user_id=sender_id,
timestamp=timestamp,
text=message_text,
),
UserStoppedSpeakingFrame(),
],
)

runner = PipelineRunner()

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ tavus=[]
together = []
tracing = [ "opentelemetry-sdk>=1.33.0", "opentelemetry-api>=1.33.0", "opentelemetry-instrumentation>=0.54b0" ]
ultravox = [ "transformers>=4.48.0", "vllm>=0.9.0" ]
videosdk = [ "videosdk>=0.2.7"]
webrtc = [ "aiortc>=1.13.0,<2", "opencv-python>=4.11.0.86,<5" ]
websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.122.0" ]
websockets-base = [ "websockets>=13.1,<16.0" ]
Expand Down
Empty file.
Loading