diff --git a/changelog/3210.added.md b/changelog/3210.added.md new file mode 100644 index 0000000000..7c994438de --- /dev/null +++ b/changelog/3210.added.md @@ -0,0 +1,2 @@ +- Added support for the HeyGen LiveAvatar API + (see https://www.liveavatar.com/). \ No newline at end of file diff --git a/changelog/3210.changed.md b/changelog/3210.changed.md new file mode 100644 index 0000000000..17ec79a62e --- /dev/null +++ b/changelog/3210.changed.md @@ -0,0 +1,10 @@ +- Updated `HeyGenVideoService` and `HeyGenTransport` to support both HeyGen APIs (Interactive Avatar and Live Avatar). + Using them is as simple as specifying the `service_type` when creating the `HeyGenVideoService` and the `HeyGenTransport`: + ```python + heyGen = HeyGenVideoService( + api_key=os.getenv("HEYGEN_LIVE_AVATAR_API_KEY"), + service_type=ServiceType.LIVE_AVATAR, + session=session, + ) + ``` + diff --git a/env.example b/env.example index 98ddec262b..8aa8f4f817 100644 --- a/env.example +++ b/env.example @@ -84,6 +84,7 @@ GROQ_API_KEY=... # Heygen HEYGEN_API_KEY=... +HEYGEN_LIVE_AVATAR_API_KEY=... # Hume HUME_API_KEY=... diff --git a/examples/foundational/43a-heygen-video-service.py b/examples/foundational/43a-heygen-video-service.py index d4c4090635..1add6aeeaa 100644 --- a/examples/foundational/43a-heygen-video-service.py +++ b/examples/foundational/43a-heygen-video-service.py @@ -25,7 +25,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService -from pipecat.services.heygen.api import AvatarQuality, NewSessionRequest +from pipecat.services.heygen.client import ServiceType from pipecat.services.heygen.video import HeyGenVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams, DailyTransport @@ -73,11 +73,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY")) heyGen = HeyGenVideoService( - api_key=os.getenv("HEYGEN_API_KEY"), + api_key=os.getenv("HEYGEN_LIVE_AVATAR_API_KEY"), + service_type=ServiceType.LIVE_AVATAR, session=session, - session_request=NewSessionRequest( - avatar_id="Shawn_Therapist_public", version="v2", quality=AvatarQuality.high - ), ) messages = [ diff --git a/src/pipecat/services/heygen/api.py b/src/pipecat/services/heygen/api_interactive_avatar.py similarity index 91% rename from src/pipecat/services/heygen/api.py rename to src/pipecat/services/heygen/api_interactive_avatar.py index 6c9a4cf0e2..6dbd64e013 100644 --- a/src/pipecat/services/heygen/api.py +++ b/src/pipecat/services/heygen/api_interactive_avatar.py @@ -16,6 +16,8 @@ from loguru import logger from pydantic import BaseModel, Field +from pipecat.services.heygen.base_api import BaseAvatarApi, StandardSessionResponse + class AvatarQuality(str, Enum): """Enum representing different avatar quality levels.""" @@ -136,7 +138,7 @@ def __init__(self, message: str, status: int, response_text: str) -> None: self.response_text = response_text -class HeyGenApi: +class HeyGenApi(BaseAvatarApi): """HeyGen Streaming API client.""" BASE_URL = "https://api.heygen.com/v1" @@ -193,8 +195,8 @@ async def _request(self, path: str, params: Dict[str, Any], expect_data: bool = logger.error(f"Network error while calling HeyGen API: {str(e)}") raise - async def new_session(self, request_data: NewSessionRequest) -> HeyGenSession: - """Create a new streaming session. + async def new_session(self, request_data: NewSessionRequest) -> StandardSessionResponse: + """Create a new streaming session and start it immediately. https://docs.heygen.com/reference/new-session @@ -202,7 +204,7 @@ async def new_session(self, request_data: NewSessionRequest) -> HeyGenSession: request_data: Session configuration parameters. Returns: - Session information, including ID and access token. + StandardSessionResponse: Standardized session information with HeyGen raw response. """ params = { "quality": request_data.quality, @@ -225,9 +227,21 @@ async def new_session(self, request_data: NewSessionRequest) -> HeyGenSession: session_info = await self._request("/streaming.new", params) print("heygen session info", session_info) - return HeyGenSession.model_validate(session_info) + heygen_session = HeyGenSession.model_validate(session_info) + + await self._start_session(heygen_session.session_id) + + # Convert to standardized response + return StandardSessionResponse( + session_id=heygen_session.session_id, + access_token=heygen_session.access_token, + livekit_url=heygen_session.url, + livekit_agent_token=heygen_session.livekit_agent_token, + ws_url=heygen_session.realtime_endpoint, + raw_response=heygen_session, + ) - async def start_session(self, session_id: str) -> Any: + async def _start_session(self, session_id: str) -> Any: """Start the streaming session. https://docs.heygen.com/reference/start-session diff --git a/src/pipecat/services/heygen/api_liveavatar.py b/src/pipecat/services/heygen/api_liveavatar.py new file mode 100644 index 0000000000..a7ca78892b --- /dev/null +++ b/src/pipecat/services/heygen/api_liveavatar.py @@ -0,0 +1,339 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""LiveAvatar API. + +API to communicate with LiveAvatar Streaming API. +""" + +from typing import Any, Dict, Optional + +import aiohttp +from loguru import logger +from pydantic import BaseModel + +from pipecat.services.heygen.base_api import BaseAvatarApi, StandardSessionResponse + + +class AvatarPersona(BaseModel): + """Avatar persona settings for LiveAvatar. + + Parameters: + voice_id (Optional[str]): ID of the voice to be used. + context_id (Optional[str]): Context ID for the avatar. + language (str): Language code for the avatar (default: "en"). + """ + + voice_id: Optional[str] = None + context_id: Optional[str] = None + language: str = "en" + + +class CustomSDKLiveKitConfig(BaseModel): + """Custom LiveKit configuration. + + Parameters: + livekit_url (str): LiveKit server URL. + livekit_room (str): LiveKit room name. + livekit_client_token (str): LiveKit client access token. + """ + + livekit_url: str + livekit_room: str + livekit_client_token: str + + +class LiveAvatarNewSessionRequest(BaseModel): + """Request model for creating a LiveAvatar session token. + + Parameters: + mode (str): Session mode (default: "CUSTOM"). + avatar_id (str): Unique identifier for the avatar. + avatar_persona (AvatarPersona): Avatar persona configuration. + """ + + mode: str = "CUSTOM" + avatar_id: str + avatar_persona: Optional[AvatarPersona] = None + livekit_config: Optional[CustomSDKLiveKitConfig] = None + + +class SessionTokenData(BaseModel): + """Data model for session token response. + + Parameters: + session_id (str): Unique identifier for the session. + session_token (str): Session token for authentication. + """ + + session_id: str + session_token: str + + +class SessionTokenResponse(BaseModel): + """Response model for LiveAvatar session token. + + Parameters: + code (int): Response status code. + data (SessionTokenData): Session token data containing session_id and session_token. + message (str): Response message. + """ + + code: int + data: SessionTokenData + message: str + + +class LiveAvatarSessionData(BaseModel): + """Data model for LiveAvatar session response. + + Parameters: + session_id (str): Unique identifier for the streaming session. + livekit_url (str): LiveKit server URL for the session. + livekit_client_token (str): Access token for LiveKit. + max_session_duration (int): Maximum session duration in seconds. + ws_url (str): WebSocket URL for the session. + """ + + session_id: str + livekit_url: str + livekit_client_token: str + max_session_duration: int + ws_url: str + + +class LiveAvatarSessionResponse(BaseModel): + """Response model for LiveAvatar session start. + + Parameters: + code (int): Response status code. + data (LiveAvatarSessionData): Session data containing connection details. + message (str): Response message. + """ + + code: int + data: LiveAvatarSessionData + message: str + + +class LiveAvatarApiError(Exception): + """Custom exception for LiveAvatar API errors.""" + + def __init__(self, message: str, status: int, response_text: str) -> None: + """Initialize the LiveAvatar API error. + + Args: + message: Error message + status: HTTP status code + response_text: Raw response text from the API + """ + super().__init__(message) + self.status = status + self.response_text = response_text + + +class LiveAvatarApi(BaseAvatarApi): + """LiveAvatar Streaming API client.""" + + BASE_URL = "https://api.liveavatar.com/v1" + + def __init__(self, api_key: str, session: aiohttp.ClientSession) -> None: + """Initialize the LiveAvatar API. + + Args: + api_key: LiveAvatar API key + session: aiohttp client session + """ + self._api_key = api_key + self._session = session + self._session_token = None + + async def _request( + self, + method: str, + path: str, + params: Optional[Dict[str, Any]] = None, + bearer_token: Optional[str] = None, + ) -> Any: + """Make a request to the LiveAvatar API. + + Args: + method: HTTP method (GET, POST, etc.). + path: API endpoint path. + params: JSON-serializable parameters. + bearer_token: Optional bearer token for authorization. + + Returns: + Parsed JSON response data. + + Raises: + LiveAvatarApiError: If the API response is not successful. + aiohttp.ClientError: For network-related errors. + """ + url = f"{self.BASE_URL}{path}" + headers = { + "accept": "application/json", + } + + if bearer_token: + headers["authorization"] = f"Bearer {bearer_token}" + else: + headers["X-API-KEY"] = self._api_key + + if params is not None: + headers["content-type"] = "application/json" + + logger.debug(f"LiveAvatar API request: {method} {url}") + + try: + async with self._session.request(method, url, json=params, headers=headers) as response: + if not response.ok: + response_text = await response.text() + logger.error(f"LiveAvatar API error: {response_text}") + raise LiveAvatarApiError( + f"API request failed with status {response.status}", + response.status, + response_text, + ) + return await response.json() + except aiohttp.ClientError as e: + logger.error(f"Network error while calling LiveAvatar API: {str(e)}") + raise + + async def create_session_token( + self, request_data: LiveAvatarNewSessionRequest + ) -> SessionTokenResponse: + """Create a session token for LiveAvatar. + + https://docs.liveavatar.com/reference/create_session_token_v1_sessions_token_post + + Args: + request_data: Session token configuration parameters. + + Returns: + Session token information. + """ + params: dict[str, Any] = { + "mode": request_data.mode, + "avatar_id": request_data.avatar_id, + } + + # Only include avatar_persona if it exists and has non-None values + if request_data.avatar_persona is not None: + avatar_persona = { + "voice_id": request_data.avatar_persona.voice_id, + "context_id": request_data.avatar_persona.context_id, + "language": request_data.avatar_persona.language, + } + # Remove None values from avatar_persona + avatar_persona = {k: v for k, v in avatar_persona.items() if v is not None} + params["avatar_persona"] = avatar_persona + + response = await self._request("POST", "/sessions/token", params) + logger.debug(f"LiveAvatar session token created") + + return SessionTokenResponse.model_validate(response) + + async def start_session(self, session_token: str) -> LiveAvatarSessionResponse: + """Start a new LiveAvatar session. + + https://docs.liveavatar.com/reference/start_session_v1_sessions_start_post + + Args: + session_token: Session token obtained from create_session_token. + + Returns: + Session information including room URL and session ID. + """ + response = await self._request("POST", "/sessions/start", bearer_token=session_token) + logger.debug(f"LiveAvatar session started") + + return LiveAvatarSessionResponse.model_validate(response) + + async def stop_session(self, session_id: str, session_token: str) -> Any: + """Stop an active LiveAvatar session. + + https://docs.liveavatar.com/reference/stop_session_v1_sessions_stop_post + + Args: + session_id: ID of the session to stop. + session_token: Session token for authentication. + + Returns: + Response data from the stop session API call. + + Raises: + ValueError: If session ID is not set. + """ + if not session_id: + raise ValueError("Session ID is not set.") + + params = {"session_id": session_id} + + response = await self._request( + "POST", "/sessions/stop", params=params, bearer_token=session_token + ) + return response + + async def new_session( + self, request_data: LiveAvatarNewSessionRequest + ) -> StandardSessionResponse: + """Create and start a new LiveAvatar session (convenience method). + + This combines create_session_token and start_session into a single call. + + Args: + request_data: Session token configuration parameters. + + Returns: + StandardSessionResponse: Standardized session information with LiveAvatar raw response. + """ + # Create session token + token_response = await self.create_session_token(request_data) + self._session_token = token_response.data.session_token + + # Start the session using the session_token from the data field + session_response = await self.start_session(token_response.data.session_token) + + # Convert to standardized response + return StandardSessionResponse( + session_id=session_response.data.session_id, + access_token=session_response.data.livekit_client_token, + livekit_url=session_response.data.livekit_url, + # TODO: HeyGen will create a new token for Pipecat + # Right now they are creating a single token, which is supposed to be used by the user + # Due to this, HeyGenTransport it is not going to work yet. + livekit_agent_token=session_response.data.livekit_client_token, + ws_url=session_response.data.ws_url, + raw_response=session_response, + ) + + async def close_session(self, session_id: str) -> Any: + """Close an active LiveAvatar session (convenience method). + + This is a convenience method that closes a session using the stored session token + from the most recent `new_session()` call. It automatically uses the internally + stored session token, eliminating the need to manually track tokens. + + Args: + session_id: ID of the session to close. + + Returns: + Response data from the stop session API call. + + Raises: + ValueError: If no session token is available (i.e., `new_session()` + hasn't been called yet or the stored token is None). + + Note: + This method requires that `new_session()` has been called previously to + establish a stored session token. For more control over session tokens, + use `stop_session()` directly with an explicit token parameter. + """ + if not self._session_token: + raise ValueError("Session token is not set. Call new_session first.") + + return await self.stop_session(session_id, self._session_token) diff --git a/src/pipecat/services/heygen/base_api.py b/src/pipecat/services/heygen/base_api.py new file mode 100644 index 0000000000..8b124a4727 --- /dev/null +++ b/src/pipecat/services/heygen/base_api.py @@ -0,0 +1,67 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Base API for HeyGen avatar services. + +Base class defining the common interface for HeyGen avatar service APIs. +""" + +from abc import ABC, abstractmethod +from typing import Any + +from pydantic import BaseModel + + +class StandardSessionResponse(BaseModel): + """Standardized session response that all HeyGen avatar services will provide. + + This contains the common fields that the client needs to operate, + while also storing the raw response for service-specific data access. + + Parameters: + session_id (str): Unique identifier for the streaming session. + access_token (str): Token for accessing the session securely. + livekit_agent_token (str): Token for HeyGen’s audio agents(Pipecat). + ws_url (str): WebSocket URL for the session. + livekit_url (str): LiveKit server URL for the session. + """ + + session_id: str + access_token: str + livekit_agent_token: str + + livekit_url: str = None + ws_url: str = None + + raw_response: Any + + +class BaseAvatarApi(ABC): + """Base class for avatar service APIs.""" + + @abstractmethod + async def new_session(self, request_data: Any) -> StandardSessionResponse: + """Create a new avatar session. + + Args: + request_data: Service-specific session request data + + Returns: + StandardSessionResponse: Standardized session information + """ + pass + + @abstractmethod + async def close_session(self, session_id: str) -> Any: + """Close an avatar session. + + Args: + session_id: ID of the session to close + + Returns: + Response data from the close session API call + """ + pass diff --git a/src/pipecat/services/heygen/client.py b/src/pipecat/services/heygen/client.py index ba517090c2..cd62b1af0e 100644 --- a/src/pipecat/services/heygen/client.py +++ b/src/pipecat/services/heygen/client.py @@ -16,7 +16,8 @@ import json import time import uuid -from typing import Awaitable, Callable, Optional +from enum import Enum +from typing import Awaitable, Callable, Optional, Union import aiohttp from loguru import logger @@ -28,7 +29,12 @@ StartFrame, ) from pipecat.processors.frame_processor import FrameProcessorSetup -from pipecat.services.heygen.api import HeyGenApi, HeyGenSession, NewSessionRequest +from pipecat.services.heygen.api_interactive_avatar import HeyGenApi, NewSessionRequest +from pipecat.services.heygen.api_liveavatar import ( + LiveAvatarApi, + LiveAvatarNewSessionRequest, +) +from pipecat.services.heygen.base_api import StandardSessionResponse from pipecat.transports.base_transport import TransportParams from pipecat.utils.asyncio.task_manager import BaseTaskManager @@ -45,6 +51,13 @@ HEY_GEN_SAMPLE_RATE = 24000 +class ServiceType(Enum): + """Enum for HeyGen service types.""" + + INTERACTIVE_AVATAR = "INTERACTIVE_AVATAR" + LIVE_AVATAR = "LIVE_AVATAR" + + class HeyGenCallbacks(BaseModel): """Callback handlers for HeyGen events. @@ -78,10 +91,8 @@ def __init__( api_key: str, session: aiohttp.ClientSession, params: TransportParams, - session_request: NewSessionRequest = NewSessionRequest( - avatarName="Shawn_Therapist_public", - version="v2", - ), + session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, + service_type: Optional[ServiceType] = None, callbacks: HeyGenCallbacks, connect_as_user: bool = False, ) -> None: @@ -91,12 +102,52 @@ def __init__( api_key: HeyGen API key for authentication session: HTTP client session for API requests params: Transport configuration parameters - session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar) + session_request: Configuration for the HeyGen session (optional) + service_type: Type of service to use callbacks: Callback handlers for HeyGen events connect_as_user: Whether to connect using the user token or not (default: False) """ - self._api = HeyGenApi(api_key, session=session) - self._heyGen_session: Optional[HeyGenSession] = None + # Set default service type for backwards compatibility + self._service_type = ( + service_type if service_type is not None else ServiceType.INTERACTIVE_AVATAR + ) + + # Validate session_request matches service_type if both are provided + if session_request is not None and service_type is not None: + if service_type == ServiceType.LIVE_AVATAR and not isinstance( + session_request, LiveAvatarNewSessionRequest + ): + logger.warning( + f"Service type is LIVE_AVATAR but session_request is not SessionTokenRequest. Ignoring session_request." + ) + session_request = None + elif service_type == ServiceType.INTERACTIVE_AVATAR and not isinstance( + session_request, NewSessionRequest + ): + logger.warning( + f"Service type is INTERACTIVE_AVATAR but session_request is not NewSessionRequest. Ignoring session_request." + ) + session_request = None + + # Create default session_request based on service_type if not provided + if session_request is None: + if self._service_type == ServiceType.INTERACTIVE_AVATAR: + session_request = NewSessionRequest( + avatar_id="Shawn_Therapist_public", + version="v2", + ) + else: # LIVE_AVATAR + session_request = LiveAvatarNewSessionRequest( + avatar_id="1c690fe7-23e0-49f9-bfba-14344450285b" + ) + + # Initialize API based on service type + if self._service_type == ServiceType.INTERACTIVE_AVATAR: + self._api = HeyGenApi(api_key, session=session) + else: + self._api = LiveAvatarApi(api_key, session=session) + + self._heyGen_session: Optional[StandardSessionResponse] = None self._websocket = None self._task_manager: Optional[BaseTaskManager] = None self._params = params @@ -130,14 +181,12 @@ def __init__( async def _initialize(self): self._heyGen_session = await self._api.new_session(self._session_request) logger.debug(f"HeyGen sessionId: {self._heyGen_session.session_id}") - logger.debug(f"HeyGen realtime_endpoint: {self._heyGen_session.realtime_endpoint}") - logger.debug(f"HeyGen livekit URL: {self._heyGen_session.url}") - logger.debug(f"HeyGen livekit toke: {self._heyGen_session.access_token}") + logger.debug(f"HeyGen realtime_endpoint: {self._heyGen_session.ws_url}") + logger.debug(f"HeyGen livekit URL: {self._heyGen_session.livekit_url}") + logger.debug(f"HeyGen livekit token: {self._heyGen_session.access_token}") logger.info( - f"Full Link: https://meet.livekit.io/custom?liveKitUrl={self._heyGen_session.url}&token={self._heyGen_session.access_token}" + f"Full Link: https://meet.livekit.io/custom?liveKitUrl={self._heyGen_session.livekit_url}&token={self._heyGen_session.access_token}" ) - - await self._api.start_session(self._heyGen_session.session_id) logger.info("HeyGen session started") async def setup(self, setup: FrameProcessorSetup) -> None: @@ -222,7 +271,7 @@ async def _ws_connect(self): return logger.debug(f"HeyGenClient ws connecting") self._websocket = await websocket_connect( - uri=self._heyGen_session.realtime_endpoint, + uri=self._heyGen_session.ws_url, ) self._connected = True self._receive_task = self._task_manager.create_task( @@ -509,7 +558,9 @@ async def _process_video_frames(self, stream: rtc.VideoStream): async def _livekit_connect(self): """Connect to LiveKit room.""" try: - logger.debug(f"HeyGenClient livekit connecting to room URL: {self._heyGen_session.url}") + logger.debug( + f"HeyGenClient livekit connecting to room URL: {self._heyGen_session.livekit_url}" + ) self._livekit_room = rtc.Room() @self._livekit_room.on("participant_connected") @@ -574,7 +625,8 @@ def on_participant_disconnected(participant: rtc.RemoteParticipant): if not self._connect_as_user else self._heyGen_session.access_token ) - await self._livekit_room.connect(self._heyGen_session.url, access_token) + + await self._livekit_room.connect(self._heyGen_session.livekit_url, access_token) logger.debug(f"Successfully connected to LiveKit room: {self._livekit_room.name}") logger.debug(f"Local participant SID: {self._livekit_room.local_participant.sid}") logger.debug( diff --git a/src/pipecat/services/heygen/video.py b/src/pipecat/services/heygen/video.py index b2df15119b..63df0fd9d8 100644 --- a/src/pipecat/services/heygen/video.py +++ b/src/pipecat/services/heygen/video.py @@ -12,7 +12,7 @@ """ import asyncio -from typing import Optional +from typing import Optional, Union import aiohttp from loguru import logger @@ -37,8 +37,14 @@ ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup from pipecat.services.ai_service import AIService -from pipecat.services.heygen.api import NewSessionRequest -from pipecat.services.heygen.client import HEY_GEN_SAMPLE_RATE, HeyGenCallbacks, HeyGenClient +from pipecat.services.heygen.api_interactive_avatar import NewSessionRequest +from pipecat.services.heygen.api_liveavatar import LiveAvatarNewSessionRequest +from pipecat.services.heygen.client import ( + HEY_GEN_SAMPLE_RATE, + HeyGenCallbacks, + HeyGenClient, + ServiceType, +) from pipecat.transports.base_transport import TransportParams # Using the same values that we do in the BaseOutputTransport @@ -72,7 +78,8 @@ def __init__( *, api_key: str, session: aiohttp.ClientSession, - session_request: NewSessionRequest = NewSessionRequest(avatar_id="Shawn_Therapist_public"), + session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, + service_type: Optional[ServiceType] = None, **kwargs, ) -> None: """Initialize the HeyGen video service. @@ -80,7 +87,8 @@ def __init__( Args: api_key: HeyGen API key for authentication session: HTTP client session for API requests - session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar) + session_request: Configuration for the HeyGen session + service_type: Service type for the avatar session **kwargs: Additional arguments passed to parent AIService """ super().__init__(**kwargs) @@ -91,6 +99,7 @@ def __init__( self._resampler = create_stream_resampler() self._is_interrupting = False self._session_request = session_request + self._service_type = service_type self._other_participant_has_joined = False self._event_id = None self._audio_chunk_size = 0 @@ -117,6 +126,7 @@ async def setup(self, setup: FrameProcessorSetup): audio_out_sample_rate=HEY_GEN_SAMPLE_RATE, ), session_request=self._session_request, + service_type=self._service_type, callbacks=HeyGenCallbacks( on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected, diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py index 24f47a8aab..2d6b543fb8 100644 --- a/src/pipecat/transports/heygen/transport.py +++ b/src/pipecat/transports/heygen/transport.py @@ -16,7 +16,7 @@ - HeyGenTransport: Main transport implementation that coordinates input/output transports """ -from typing import Any, Optional +from typing import Any, Optional, Union import aiohttp from loguru import logger @@ -36,8 +36,9 @@ UserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup -from pipecat.services.heygen.api import NewSessionRequest -from pipecat.services.heygen.client import HeyGenCallbacks, HeyGenClient +from pipecat.services.heygen.api_interactive_avatar import NewSessionRequest +from pipecat.services.heygen.api_liveavatar import LiveAvatarNewSessionRequest +from pipecat.services.heygen.client import HeyGenCallbacks, HeyGenClient, ServiceType from pipecat.transports.base_input import BaseInputTransport from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams @@ -297,10 +298,8 @@ def __init__( params: HeyGenParams = HeyGenParams(), input_name: Optional[str] = None, output_name: Optional[str] = None, - session_request: NewSessionRequest = NewSessionRequest( - avatar_id="Shawn_Therapist_public", - version="v2", - ), + session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, + service_type: Optional[ServiceType] = None, ): """Initialize the HeyGen transport. @@ -313,7 +312,8 @@ def __init__( params: HeyGen-specific configuration parameters (default: HeyGenParams()) input_name: Optional custom name for the input transport output_name: Optional custom name for the output transport - session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar) + session_request: Configuration for the HeyGen session + service_type: Service type for the avatar session Note: The transport will automatically join the same virtual room as the HeyGen Avatar @@ -326,6 +326,7 @@ def __init__( session=session, params=params, session_request=session_request, + service_type=service_type, callbacks=HeyGenCallbacks( on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected,