diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs index 9fb38f5..91abc47 100644 --- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs @@ -22,6 +22,7 @@ public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint private const string EnableSsmlParsingParameter = "enable_ssml_parsing"; private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency"; private const string OutputFormatParameter = "output_format"; + private const string InactivityTimeoutParameter = "inactivity_timeout"; public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client) { @@ -63,13 +64,17 @@ public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client) /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings /// (best latency, but can mispronounce eg numbers and dates). /// + /// + /// The number of seconds that the connection can be inactive before it is automatically closed. + /// Defaults to 20 seconds, with a maximum allowed value of 180 seconds. + /// /// Optional, . /// Raised when is null or empty. /// Raised when is null. public async Task StartTextToSpeechAsync(Voice voice, Func partialClipCallback, VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null, - bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null, + bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null, int? inactivityTimeout = null, CancellationToken cancellationToken = default) { if (voice == null || @@ -104,6 +109,11 @@ public async Task StartTextToSpeechAsync(Voice voice, Func part parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString()); } + if (inactivityTimeout.HasValue) + { + parameters.Add(InactivityTimeoutParameter, inactivityTimeout.ToString()); + } + await client.WebSocketClient.ConnectAsync( new Uri(GetWebSocketUrl($"/{voice.Id}/stream-input", parameters)), cancellationToken); @@ -139,11 +149,6 @@ public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tr throw new InvalidOperationException("WebSocket is not open!"); } - if (string.IsNullOrWhiteSpace(text)) - { - throw new ArgumentNullException($"{nameof(text)} cannot be null or empty!"); - } - TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration); await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true, cancellationToken);