From 27136c5c0bc28e30994add896989ae49e22bedd7 Mon Sep 17 00:00:00 2001 From: hozen-groq Date: Fri, 17 Oct 2025 11:20:02 -0400 Subject: [PATCH] update agent TTS and README --- .env.example | 1 - README.md | 10 ++++------ agent.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.env.example b/.env.example index 29f6a77..b2fc837 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,4 @@ LIVEKIT_URL=wss://testproject-12345.livekit.cloud LIVEKIT_API_KEY=livekit-api-key LIVEKIT_API_SECRET=livekit-api-secret -ELEVEN_API_KEY=elevenlabs-api-key GROQ_API_KEY=groq-api-key \ No newline at end of file diff --git a/README.md b/README.md index ecb320b..4df4aac 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,13 @@ # LiveKit + Groq Voice AI Assistant -[LiveKit](https://livekit.io) complements Groq's high-performance speech recognition capabilities by providing text-to-speech and real-time communication features. This integration enables you to build end-to-end AI voice applications with: +[LiveKit](https://livekit.io) complements Groq's fast AI inference with real-time communication features. This integration enables you to build end-to-end AI voice applications with: -- **Complete Voice Pipeline:** Combine Groq's fast and accurate speech-to-text (STT) with LiveKit's text-to-speech (TTS) capabilities +- **Complete Voice Pipeline:** Combine Groq's fast and accurate text, speech-to-text (STT), and text-to-speech (TTS) models with LiveKit's infrastructure - **Real-time Communication:** Enable multi-user voice interactions with LiveKit's WebRTC infrastructure -- **Flexible TTS Options:** Access multiple text-to-speech voices and languages through LiveKit's TTS integrations - **Scalable Architecture:** Handle thousands of concurrent users with LiveKit's distributed system - **Web Search Enabled:** This template uses Groq's `compound-mini` model with built-in web search capabilities. Try asking questions like "What's the weather in San Francisco?" or "What are today's top headlines?" and watch it fetch real-time information from the web! -This repository is a complete starter template for building end-to-end voice AI assistants with natural voice conversations and sub-second response times using Groq's optimized models and LiveKit's real-time media platform. +This repository is a complete starter template for building end-to-end voice AI assistants with natural voice conversations and sub-second response times using models hosted on Groq and LiveKit's real-time media platform. ## Live Demo @@ -39,7 +38,7 @@ You'll need a free LiveKit Cloud account to handle the real-time media infrastru 3. Get your API credentials from the project settings **Key Features:** -- **Sub-second response times** with Groq's optimized inference +- **Sub-second response times** with Groq's inference - **Real-time voice streaming** via LiveKit's infrastructure - **Production-ready** noise cancellation and turn detection - **Modern React UI** with real-time transcription display @@ -159,7 +158,6 @@ LIVEKIT_API_SECRET=your-api-secret # Groq API key (get from Groq Console) GROQ_API_KEY=your-groq-api-key -ELEVEN_API_KEY=elevenlabs-api-key **You also need to create a `.env.local` in the `voice-assistant-frontend/` directory:** ```bash diff --git a/agent.py b/agent.py index 27c3b30..765dc4d 100644 --- a/agent.py +++ b/agent.py @@ -41,7 +41,7 @@ async def entrypoint(ctx: agents.JobContext): session = AgentSession( stt=groq.STT(model="whisper-large-v3-turbo", language="en"), llm=groq.LLM(model="groq/compound-mini"), - tts=elevenlabs.TTS(model="eleven_flash_v2_5", voice_id="Xb7hH8MSUJpSbSDYk0k2"), + tts=groq.TTS(model="playai-tts"), vad=silero.VAD.load(), turn_detection=MultilingualModel(), )