From 9020979da9bc075d2076caddf67a5040d7821f64 Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Thu, 25 Jul 2024 12:27:29 +0200
Subject: [PATCH 1/6] - Initial WebSockets implementation.

---
 .../ElevenLabsClientSettings.cs               | 120 ++++++++----
 .../Common/ElevenLabsBaseEndPoint.cs          |  81 ++++----
 ElevenLabs-DotNet/ElevenLabsClient.cs         |  73 +++++--
 ElevenLabs-DotNet/TextToSpeech/Alignment.cs   |  22 +++
 .../TextToSpeech/GenerationConfig.cs          |  20 ++
 .../TextToSpeechWebSocketEndpoint.cs          | 183 ++++++++++++++++++
 ...extToSpeechWebSocketFirstMessageRequest.cs |  36 ++++
 ...TextToSpeechWebSocketLastMessageRequest.cs |  21 ++
 .../TextToSpeechWebSocketRequest.cs           |  50 +++++
 .../TextToSpeechWebSocketResponse.cs          |  49 +++++
 10 files changed, 561 insertions(+), 94 deletions(-)
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/Alignment.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
 create mode 100644 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs
diff --git a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
index c876e39..16d1749 100644
--- a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
+++ b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
@@ -2,62 +2,98 @@
 
 using System;
 
-namespace ElevenLabs
+namespace ElevenLabs;
+
+public sealed class ElevenLabsClientSettings
 {
-    public sealed class ElevenLabsClientSettings
+    internal const string HttpProtocol = "http://";
+    internal const string HttpsProtocol = "https://";
+    internal const string WsProtocol = "ws://";
+    internal const string WssProtocol = "wss://";
+    internal const string DefaultApiVersion = "v1";
+    internal const string ElevenLabsDomain = "api.elevenlabs.io";
+
+    /// <summary>
+    ///     Creates a new instance of <see cref="ElevenLabsClientSettings" /> for use with ElevenLabs API.
+    /// </summary>
+    public ElevenLabsClientSettings()
+    {
+        Domain = ElevenLabsDomain;
+        ApiVersion = DefaultApiVersion;
+        Protocol = HttpsProtocol;
+        WebSocketProtocol = WssProtocol;
+        BaseRequest = $"/{ApiVersion}/";
+        BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}";
+        BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}";
+    }
+
+    /// <summary>
+    ///     Creates a new instance of <see cref="ElevenLabsClientSettings" /> for use with ElevenLabs API.
+    /// </summary>
+    /// <param name="domain">Base api domain. Starts with https or wss.</param>
+    /// <param name="apiVersion">The version of the ElevenLabs api you want to use.</param>
+    public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion)
     {
-        internal const string Https = "https://";
-        internal const string DefaultApiVersion = "v1";
-        internal const string ElevenLabsDomain = "api.elevenlabs.io";
-
-        /// <summary>
-        /// Creates a new instance of <see cref="ElevenLabsClientSettings"/> for use with ElevenLabs API.
-        /// </summary>
-        public ElevenLabsClientSettings()
+        if (string.IsNullOrWhiteSpace(domain))
         {
-            Domain = ElevenLabsDomain;
-            ApiVersion = "v1";
-            BaseRequest = $"/{ApiVersion}/";
-            BaseRequestUrlFormat = $"{Https}{Domain}{BaseRequest}{{0}}";
+            domain = ElevenLabsDomain;
         }
 
-        /// <summary>
-        /// Creates a new instance of <see cref="ElevenLabsClientSettings"/> for use with ElevenLabs API.
-        /// </summary>
-        /// <param name="domain">Base api domain.</param>
-        /// <param name="apiVersion">The version of the ElevenLabs api you want to use.</param>
-        public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion)
+        if (!domain.Contains('.') &&
+            !domain.Contains(':'))
         {
-            if (string.IsNullOrWhiteSpace(domain))
-            {
-                domain = ElevenLabsDomain;
-            }
+            throw new ArgumentException(
+                $"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor.");
+        }
 
-            if (!domain.Contains('.') &&
-                !domain.Contains(':'))
+        // extract anything before the :// to split the domain and protocol
+        var splitDomain = domain.Split("://", StringSplitOptions.RemoveEmptyEntries);
+        if (splitDomain.Length == 2)
+        {
+            Protocol = splitDomain[0];
+            // if the protocol is not https or http, throw an exception
+            if (Protocol != HttpsProtocol &&
+                Protocol != HttpProtocol)
             {
-                throw new ArgumentException($"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor.");
+                throw new ArgumentException(
+                    $"The protocol \"{Protocol}\" is not supported. Please use \"{HttpsProtocol}\" or \"{HttpProtocol}\".");
             }
 
-            if (string.IsNullOrWhiteSpace(apiVersion))
-            {
-                apiVersion = DefaultApiVersion;
-            }
+            WebSocketProtocol = Protocol == HttpsProtocol ? WssProtocol : WsProtocol;
+            Domain = splitDomain[1];
+        }
+        else
+        {
+            Protocol = HttpsProtocol;
+            WebSocketProtocol = WssProtocol;
+            Domain = domain;
+        }
 
-            Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
-            ApiVersion = apiVersion;
-            BaseRequest = $"/{ApiVersion}/";
-            BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";
+        if (string.IsNullOrWhiteSpace(apiVersion))
+        {
+            apiVersion = DefaultApiVersion;
         }
 
-        public string Domain { get; }
+        Domain = domain;
+        ApiVersion = apiVersion;
+        BaseRequest = $"/{ApiVersion}/";
+        BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}";
+        BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}";
+    }
+
+    public string Protocol { get; }
 
-        public string ApiVersion { get; }
+    public string WebSocketProtocol { get; }
 
-        public string BaseRequest { get; }
+    public string Domain { get; }
 
-        public string BaseRequestUrlFormat { get; }
+    public string ApiVersion { get; }
 
-        public static ElevenLabsClientSettings Default { get; } = new();
-    }
-}
+    public string BaseRequest { get; }
+
+    public string BaseRequestUrlFormat { get; }
+
+    public string BaseRequestWebSocketUrlFormat { get; }
+
+    public static ElevenLabsClientSettings Default { get; } = new();
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
index 94807c3..00fa4f6 100644
--- a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
+++ b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
@@ -3,47 +3,58 @@
 using System.Collections.Generic;
 using System.Linq;
 
-namespace ElevenLabs
+namespace ElevenLabs;
+
+public abstract class ElevenLabsBaseEndPoint
 {
-    public abstract class ElevenLabsBaseEndPoint
+    internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client;
+
+    // ReSharper disable once InconsistentNaming
+    protected readonly ElevenLabsClient client;
+
+    /// <summary>
+    ///     The root endpoint address.
+    /// </summary>
+    protected abstract string Root { get; }
+
+    /// <summary>
+    ///     Gets the full formatted url for the API endpoint.
+    /// </summary>
+    /// <param name="endpoint">The endpoint url.</param>
+    /// <param name="queryParameters">Optional, parameters to add to the endpoint.</param>
+    protected string GetUrl(string endpoint = "", Dictionary<string, string> queryParameters = null)
     {
-        internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client;
-
-        // ReSharper disable once InconsistentNaming
-        protected readonly ElevenLabsClient client;
-
-        /// <summary>
-        /// The root endpoint address.
-        /// </summary>
-        protected abstract string Root { get; }
-
-        /// <summary>
-        /// Gets the full formatted url for the API endpoint.
-        /// </summary>
-        /// <param name="endpoint">The endpoint url.</param>
-        /// <param name="queryParameters">Optional, parameters to add to the endpoint.</param>
-        protected string GetUrl(string endpoint = "", Dictionary<string, string> queryParameters = null)
-        {
-            var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}");
-
-            if (queryParameters is { Count: not 0 })
-            {
-                result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
-            }
+        var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}");
 
-            return result;
+        if (queryParameters is { Count: not 0 })
+        {
+            result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
         }
 
-        private bool enableDebug;
+        return result;
+    }
+
+    protected string GetWebSocketUrl(string endpoint = "", Dictionary<string, string> queryParameters = null)
+    {
+        var result = string.Format(client.ElevenLabsClientSettings.BaseRequestWebSocketUrlFormat, $"{Root}{endpoint}");
 
-        /// <summary>
-        /// Enables or disables the logging of all http responses of header and body information for this endpoint.<br/>
-        /// WARNING! Enabling this in your production build, could potentially leak sensitive information!
-        /// </summary>
-        public bool EnableDebug
+        if (queryParameters is { Count: not 0 })
         {
-            get => enableDebug || client.EnableDebug;
-            set => enableDebug = value;
+            result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
         }
+
+        return result;
+    }
+
+    private bool enableDebug;
+
+    /// <summary>
+    ///     Enables or disables the logging of all http responses of header and body information for this endpoint.<br />
+    ///     WARNING! Enabling this in your production build, could potentially leak sensitive information!
+    /// </summary>
+    public bool EnableDebug
+    {
+        get => enableDebug || client.EnableDebug;
+        set => enableDebug = value;
     }
-}
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs
index 8efcedc..3a97963 100644
--- a/ElevenLabs-DotNet/ElevenLabsClient.cs
+++ b/ElevenLabs-DotNet/ElevenLabsClient.cs
@@ -10,6 +10,7 @@
 using ElevenLabs.Voices;
 using System;
 using System.Net.Http;
+using System.Net.WebSockets;
 using System.Security.Authentication;
 using System.Text.Json;
 using System.Text.Json.Serialization;
@@ -19,31 +20,41 @@ namespace ElevenLabs
     public sealed class ElevenLabsClient : IDisposable
     {
         /// <summary>
-        /// Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints.
+        ///     Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints.
         /// </summary>
-        /// <param name="authentication">The API authentication information to use for API calls,
-        /// or <see langword="null"/> to attempt to use the <see cref="ElevenLabsAuthentication.Default"/>,
-        /// potentially loading from environment vars or from a config file.
+        /// <param name="authentication">
+        ///     The API authentication information to use for API calls,
+        ///     or <see langword="null" /> to attempt to use the <see cref="ElevenLabsAuthentication.Default" />,
+        ///     potentially loading from environment vars or from a config file.
         /// </param>
         /// <param name="settings">
-        /// Optional, <see cref="ElevenLabsClientSettings"/> for specifying a proxy domain.
+        ///     Optional, <see cref="ElevenLabsClientSettings" /> for specifying a proxy domain.
         /// </param>
-        /// <param name="httpClient">Optional, <see cref="HttpClient"/>.</param>
+        /// <param name="httpClient">Optional, <see cref="HttpClient" />.</param>
+        /// <param name="webSocketClient">Optional, <see cref="WebSocket" />.</param>
         /// <exception cref="AuthenticationException">Raised when authentication details are missing or invalid.</exception>
-        /// <see cref="ElevenLabsClient"/> implements <see cref="IDisposable"/> to manage the lifecycle of the resources it uses, including <see cref="HttpClient"/>.
+        /// <see cref="ElevenLabsClient" />
+        /// implements
+        /// <see cref="IDisposable" />
+        /// to manage the lifecycle of the resources it uses, including
+        /// <see cref="HttpClient" />
+        /// .
         /// <remarks>
-        /// When you initialize <see cref="ElevenLabsClient"/>, it will create an internal <see cref="HttpClient"/> instance if one is not provided.
-        /// This internal HttpClient is disposed of when ElevenLabsClient is disposed of.
-        /// If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal.
+        ///     When you initialize <see cref="ElevenLabsClient" />, it will create an internal <see cref="HttpClient" /> instance
+        ///     if one is not provided.
+        ///     This internal HttpClient is disposed of when ElevenLabsClient is disposed of.
+        ///     If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal.
         /// </remarks>
-        public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLabsClientSettings settings = null, HttpClient httpClient = null)
+        public ElevenLabsClient(ElevenLabsAuthentication authentication = null,
+            ElevenLabsClientSettings settings = null, HttpClient httpClient = null, ClientWebSocket webSocketClient = null)
         {
             ElevenLabsAuthentication = authentication ?? ElevenLabsAuthentication.Default;
             ElevenLabsClientSettings = settings ?? ElevenLabsClientSettings.Default;
 
             if (string.IsNullOrWhiteSpace(ElevenLabsAuthentication?.ApiKey))
             {
-                throw new AuthenticationException("You must provide API authentication.  Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details.");
+                throw new AuthenticationException(
+                    "You must provide API authentication.  Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details.");
             }
 
             if (httpClient == null)
@@ -58,16 +69,30 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa
                 isCustomClient = true;
             }
 
+            if (webSocketClient == null)
+            {
+                webSocketClient = new ClientWebSocket();
+            }
+            else
+            {
+                isCustomWebSocketClient = true;
+            }
+
             Client = httpClient;
             Client.DefaultRequestHeaders.Add("User-Agent", "ElevenLabs-DotNet");
             Client.DefaultRequestHeaders.Add("xi-api-key", ElevenLabsAuthentication.ApiKey);
 
+            WebSocketClient = webSocketClient;
+            WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet");
+            WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey);
+
             UserEndpoint = new UserEndpoint(this);
             VoicesEndpoint = new VoicesEndpoint(this);
             SharedVoicesEndpoint = new SharedVoicesEndpoint(this);
             ModelsEndpoint = new ModelsEndpoint(this);
             HistoryEndpoint = new HistoryEndpoint(this);
             TextToSpeechEndpoint = new TextToSpeechEndpoint(this);
+            TextToSpeechWebSocketEndpoint = new TextToSpeechWebSocketEndpoint(this);
             VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this);
             SoundGenerationEndpoint = new SoundGenerationEndpoint(this);
             DubbingEndpoint = new DubbingEndpoint(this);
@@ -96,6 +121,11 @@ private void Dispose(bool disposing)
                 {
                     Client?.Dispose();
                 }
+                
+                if (!isCustomWebSocketClient)
+                {
+                    WebSocketClient?.Dispose();
+                }
 
                 isDisposed = true;
             }
@@ -105,13 +135,20 @@ private void Dispose(bool disposing)
 
         private bool isCustomClient;
 
+        private bool isCustomWebSocketClient;
+
         /// <summary>
-        /// <see cref="HttpClient"/> to use when making calls to the API.
+        ///     <see cref="HttpClient" /> to use when making calls to the API.
         /// </summary>
         internal HttpClient Client { get; }
 
         /// <summary>
-        /// The <see cref="JsonSerializationOptions"/> to use when making calls to the API.
+        ///     <see cref="ClientWebSocket" /> to use when making calls to the API.
+        /// </summary>
+        internal ClientWebSocket WebSocketClient { get; }
+
+        /// <summary>
+        ///     The <see cref="JsonSerializationOptions" /> to use when making calls to the API.
         /// </summary>
         internal static JsonSerializerOptions JsonSerializationOptions { get; } = new()
         {
@@ -119,12 +156,12 @@ private void Dispose(bool disposing)
         };
 
         /// <summary>
-        /// Enables or disables debugging for all endpoints.
+        ///     Enables or disables debugging for all endpoints.
         /// </summary>
         public bool EnableDebug { get; set; }
 
         /// <summary>
-        /// The API authentication information to use for API calls
+        ///     The API authentication information to use for API calls
         /// </summary>
         public ElevenLabsAuthentication ElevenLabsAuthentication { get; }
 
@@ -141,6 +178,8 @@ private void Dispose(bool disposing)
         public HistoryEndpoint HistoryEndpoint { get; }
 
         public TextToSpeechEndpoint TextToSpeechEndpoint { get; }
+        
+        public TextToSpeechWebSocketEndpoint TextToSpeechWebSocketEndpoint { get; }
 
         public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; }
 
@@ -148,4 +187,4 @@ private void Dispose(bool disposing)
 
         public DubbingEndpoint DubbingEndpoint { get; }
     }
-}
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/Alignment.cs b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
new file mode 100644
index 0000000..037965e
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
@@ -0,0 +1,22 @@
+﻿using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public class Alignment
+{
+    [JsonPropertyName("char_start_times_ms")]
+    public int[] CharStartTimesMs { get; }
+
+    [JsonPropertyName("chars_durations_ms")]
+    public int[] CharsDurationsMs { get; }
+
+    [JsonPropertyName("chars")]
+    public string[] Chars { get; }
+
+    public Alignment(int[] charStartTimesMs, int[] charsDurationsMs, string[] chars)
+    {
+        CharStartTimesMs = charStartTimesMs;
+        CharsDurationsMs = charsDurationsMs;
+        Chars = chars;
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs
new file mode 100644
index 0000000..f827742
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs
@@ -0,0 +1,20 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class GenerationConfig
+{
+    [JsonPropertyName("chunk_length_schedule")]
+    public int[] ChunkLengthSchedule { get; }
+
+    public GenerationConfig() : this([120, 160, 250, 290])
+    {
+    }
+
+    public GenerationConfig(int[] chunkLengthSchedule)
+    {
+        ChunkLengthSchedule = chunkLengthSchedule;
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
new file mode 100644
index 0000000..aef6616
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -0,0 +1,183 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Models;
+using ElevenLabs.Voices;
+using System;
+using System.Collections.Generic;
+using System.Net.WebSockets;
+using System.Text;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace ElevenLabs.TextToSpeech;
+
+/// <summary>
+///     Access to convert text to synthesized speech.
+/// </summary>
+public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint
+{
+    private const string ModelIdParameter = "model_id";
+    private const string EnableLoggingParameter = "enable_logging";
+    private const string EnableSsmlParsingParameter = "enable_ssml_parsing";
+    private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency";
+    private const string OutputFormatParameter = "output_format";
+
+    public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client)
+    {
+    }
+
+    protected override string Root => "text-to-speech";
+
+    /// <summary>
+    ///     Converts text into speech using a voice of your choice and returns audio.
+    /// </summary>
+    /// <param name="voice">
+    ///     <see cref="Voice" /> to use.
+    /// </param>
+    /// <param name="partialClipCallback">
+    ///     Callback for streamed audio as it comes in.<br />
+    ///     Returns partial <see cref="VoiceClip" />.
+    /// </param>
+    /// <param name="voiceSettings">
+    ///     Optional, <see cref="VoiceSettings" /> that will override the default settings in <see cref="Voice.Settings" />.
+    /// </param>
+    /// <param name="generationConfig">Optional, <see cref="GenerationConfig" />.</param>
+    /// <param name="model">
+    ///     Optional, <see cref="Model" /> to use. Defaults to <see cref="Model.MonoLingualV1" />.
+    /// </param>
+    /// <param name="outputFormat">
+    ///     Output format of the generated audio.<br />
+    ///     Defaults to <see cref="OutputFormat.MP3_44100_128" />
+    /// </param>
+    /// <param name="enableLogging">Optional, enable logging.</param>
+    /// <param name="enableSsmlParsing">Optional, enable SSML parsing.</param>
+    /// <param name="optimizeStreamingLatency">
+    ///     Optional, You can turn on latency optimizations at some cost of quality.
+    ///     The best possible final latency varies by model.<br />
+    ///     Possible values:<br />
+    ///     0 - default mode (no latency optimizations)<br />
+    ///     1 - normal latency optimizations (about 50% of possible latency improvement of option 3)<br />
+    ///     2 - strong latency optimizations (about 75% of possible latency improvement of option 3)<br />
+    ///     3 - max latency optimizations<br />
+    ///     4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
+    ///     (best latency, but can mispronounce eg numbers and dates).
+    /// </param>
+    /// <param name="cancellationToken">Optional, <see cref="CancellationToken" />.</param>
+    /// <returns><see cref="VoiceClip" />.</returns>
+    public async Task StartTextToSpeechAsync(Voice voice, Func<VoiceClip, Task> partialClipCallback,
+        VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null,
+        OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null,
+        bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null,
+        CancellationToken cancellationToken = default)
+    {
+        if (voice == null ||
+            string.IsNullOrWhiteSpace(voice.Id))
+        {
+            throw new ArgumentNullException(nameof(voice));
+        }
+
+        if (partialClipCallback == null)
+        {
+            throw new ArgumentNullException(nameof(partialClipCallback));
+        }
+
+        var parameters = new Dictionary<string, string>
+        {
+            { ModelIdParameter, model?.Id ?? Model.MonoLingualV1.Id },
+            { OutputFormatParameter, outputFormat.ToString().ToLower() }
+        };
+
+        if (enableLogging.HasValue)
+        {
+            parameters.Add(EnableLoggingParameter, enableLogging.ToString());
+        }
+
+        if (enableSsmlParsing.HasValue)
+        {
+            parameters.Add(EnableSsmlParsingParameter, enableSsmlParsing.ToString());
+        }
+
+        if (optimizeStreamingLatency.HasValue)
+        {
+            parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString());
+        }
+
+        await client.WebSocketClient.ConnectAsync(
+            new Uri(GetWebSocketUrl($"/{voice.Id}/stream-input", parameters)), cancellationToken);
+
+        // start receiving messages in a separate task
+        _ = Task.Run(async () => await ReceiveMessagesAsync(partialClipCallback, voice, cancellationToken),
+            cancellationToken);
+
+        TextToSpeechWebSocketFirstMessageRequest firstMessageRequest = new(voiceSettings, generationConfig);
+        await client.WebSocketClient.SendAsync(firstMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true,
+            cancellationToken);
+    }
+
+    public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tryTriggerGeneration = false,
+        CancellationToken cancellationToken = default)
+    {
+        TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration);
+        await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true,
+            cancellationToken);
+    }
+
+    public async Task EndTextToSpeechAsync(CancellationToken cancellationToken = default)
+    {
+        TextToSpeechWebSocketLastMessageRequest lastMessageRequest = new();
+        await client.WebSocketClient.SendAsync(lastMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true,
+            cancellationToken);
+    }
+
+    private async Task ReceiveMessagesAsync(Func<VoiceClip, Task> partialClipCallback, Voice voice,
+        CancellationToken cancellationToken)
+    {
+        byte[] buffer = new byte[8192];
+        StringBuilder message = new();
+
+        while (client.WebSocketClient.State == WebSocketState.Open)
+        {
+            WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync(
+                new ArraySegment<byte>(buffer), cancellationToken);
+
+            if (receiveResult.MessageType == WebSocketMessageType.Close)
+            {
+                await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,
+                    cancellationToken);
+                break;
+            }
+
+            string jsonString = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count);
+            message.Append(jsonString);
+
+            if (!receiveResult.EndOfMessage)
+            {
+                continue;
+            }
+
+            TextToSpeechWebSocketResponse response = JsonSerializer.Deserialize<TextToSpeechWebSocketResponse>(
+                message.ToString(), ElevenLabsClient.JsonSerializationOptions);
+
+            if (response == null)
+            {
+                throw new ArgumentException("Failed to parse response!");
+            }
+
+            message.Clear();
+
+            if (!string.IsNullOrWhiteSpace(response.Audio))
+            {
+                string text = response.Alignment is { Chars: not null }
+                    ? string.Concat(response.Alignment.Chars)
+                    : null;
+                VoiceClip voiceClip = new(string.Empty, text, voice, response.AudioBytes);
+                await partialClipCallback(voiceClip).ConfigureAwait(false);
+            }
+            else
+            {
+                await partialClipCallback(null).ConfigureAwait(false);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs
new file mode 100644
index 0000000..4e828a9
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs
@@ -0,0 +1,36 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using ElevenLabs.Voices;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketFirstMessageRequest
+{
+    public TextToSpeechWebSocketFirstMessageRequest(
+        VoiceSettings voiceSettings = null,
+        GenerationConfig generationConfig = null)
+    {
+        VoiceSettings = voiceSettings;
+        GenerationConfig = generationConfig;
+    }
+
+    [JsonPropertyName("text"), JsonInclude]
+    public string Text { get; } = " ";
+
+    [JsonPropertyName("voice_settings")]
+    public VoiceSettings VoiceSettings { get; }
+
+    [JsonPropertyName("generation_config")]
+    public GenerationConfig GenerationConfig { get; }
+    
+    public ArraySegment<byte> ToArraySegment()
+    {
+        string json = JsonSerializer.Serialize(this);
+        byte[] bytes = Encoding.UTF8.GetBytes(json);
+        return new ArraySegment<byte>(bytes);
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs
new file mode 100644
index 0000000..4eca5b7
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs
@@ -0,0 +1,21 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketLastMessageRequest
+{
+    [JsonPropertyName("text"), JsonInclude]
+    public string Text { get; } = "";
+    
+    public ArraySegment<byte> ToArraySegment()
+    {
+        string json = JsonSerializer.Serialize(this);
+        byte[] bytes = Encoding.UTF8.GetBytes(json);
+        return new ArraySegment<byte>(bytes);
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
new file mode 100644
index 0000000..9c59cf3
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
@@ -0,0 +1,50 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketRequest
+{
+    public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTriggerGeneration = false)
+    {
+        if (string.IsNullOrWhiteSpace(text))
+        {
+            throw new ArgumentNullException(nameof(text));
+        }
+
+        // if the last character of the text is not a space, append one
+        Text = text[^1] != ' ' ? text + ' ' : text;
+        TryTriggerGeneration = tryTriggerGeneration;
+        Flush = flush;
+    }
+
+    /// <summary>
+    ///     The text to be converted to speech. The last character of the text must be a space.
+    /// </summary>
+    [JsonPropertyName("text"), JsonInclude]
+    public string Text { get; }
+
+    /// <summary>
+    ///     Use this to attempt to immediately trigger the generation of audio. Most users shouldn't use this.
+    /// </summary>
+    [JsonPropertyName("try_trigger_generation")]
+    public bool TryTriggerGeneration { get; }
+
+    /// <summary>
+    ///     Flush forces the generation of audio. Set this value to true when you have finished sending text,
+    ///     but want to keep the websocket connection open.
+    /// </summary>
+    [JsonPropertyName("flush"), JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+    public bool? Flush { get; }
+    
+    public ArraySegment<byte> ToArraySegment()
+    {
+        string json = JsonSerializer.Serialize(this);
+        byte[] bytes = Encoding.UTF8.GetBytes(json);
+        return new ArraySegment<byte>(bytes);
+    }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs
new file mode 100644
index 0000000..7ad6494
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs
@@ -0,0 +1,49 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketResponse
+{
+    /// <summary>
+    ///     A generated partial audio chunk, encoded using the selected output_format,
+    ///     by default this is MP3 encoded as a base64 string.
+    /// </summary>
+    [JsonPropertyName("audio")]
+    public string Audio { get; }
+
+    /// <summary>
+    ///     A generated partial audio chunk, encoded using the selected output_format,
+    /// </summary>
+    [JsonIgnore]
+    public byte[] AudioBytes { get; }
+
+    /// <summary>
+    ///     Indicates if the generation is complete. If set to True, audio will be null.
+    /// </summary>
+    [JsonPropertyName("isFinal")]
+    public bool? IsFinal { get; }
+
+    /// <summary>
+    ///     Alignment information for the generated audio given the input normalized text sequence.
+    /// </summary>
+    [JsonPropertyName("normalizedAlignment")]
+    public Alignment NormalizedAlignment { get; }
+
+    /// <summary>
+    ///     Alignment information for the generated audio given the original text sequence.
+    /// </summary>
+    [JsonPropertyName("alignment")]
+    public Alignment Alignment { get; }
+
+    public TextToSpeechWebSocketResponse(string audio, bool? isFinal, Alignment normalizedAlignment, Alignment alignment)
+    {
+        Audio = audio;
+        IsFinal = isFinal;
+        NormalizedAlignment = normalizedAlignment;
+        Alignment = alignment;
+        AudioBytes = audio != null ? Convert.FromBase64String(audio) : null;
+    }
+}
\ No newline at end of file

From 94c252989c6c0f7082104795201d4910005100b6 Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Fri, 26 Jul 2024 11:34:04 +0200
Subject: [PATCH 2/6] - adding the turbo 2.5 model

---
 ElevenLabs-DotNet/Models/Model.cs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ElevenLabs-DotNet/Models/Model.cs b/ElevenLabs-DotNet/Models/Model.cs
index a1bf3c7..658c474 100644
--- a/ElevenLabs-DotNet/Models/Model.cs
+++ b/ElevenLabs-DotNet/Models/Model.cs
@@ -93,6 +93,9 @@ public Model(string id)
         /// <summary>
         /// Our state-of-the-art speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations.
         /// </summary>
+        [JsonIgnore]
+        public static Model TurboV25 { get; } = new("eleven_turbo_v2_5");
+
         [JsonIgnore]
         public static Model EnglishSpeechToSpeechV2 { get; } = new("eleven_english_sts_v2");
 

From dd0de4fcaacd5383458c342e00e85bc2ddf2e7a9 Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Wed, 31 Jul 2024 11:08:39 +0200
Subject: [PATCH 3/6] - added several checks and documentation, made the custom
 WebSocket version into a spawner function for re-using it

---
 ElevenLabs-DotNet/ElevenLabsClient.cs         |  37 +++--
 .../TextToSpeechWebSocketEndpoint.cs          | 132 ++++++++++++------
 .../TextToSpeechWebSocketRequest.cs           |  15 +-
 3 files changed, 120 insertions(+), 64 deletions(-)

diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs
index 3a97963..b3c24e7 100644
--- a/ElevenLabs-DotNet/ElevenLabsClient.cs
+++ b/ElevenLabs-DotNet/ElevenLabsClient.cs
@@ -31,7 +31,7 @@ public sealed class ElevenLabsClient : IDisposable
         ///     Optional, <see cref="ElevenLabsClientSettings" /> for specifying a proxy domain.
         /// </param>
         /// <param name="httpClient">Optional, <see cref="HttpClient" />.</param>
-        /// <param name="webSocketClient">Optional, <see cref="WebSocket" />.</param>
+        /// <param name="clientWebSocketSpawner">Optional, to create custom versions of <see cref="ClientWebSocket" />.</param>
         /// <exception cref="AuthenticationException">Raised when authentication details are missing or invalid.</exception>
         /// <see cref="ElevenLabsClient" />
         /// implements
@@ -46,7 +46,8 @@ public sealed class ElevenLabsClient : IDisposable
         ///     If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal.
         /// </remarks>
         public ElevenLabsClient(ElevenLabsAuthentication authentication = null,
-            ElevenLabsClientSettings settings = null, HttpClient httpClient = null, ClientWebSocket webSocketClient = null)
+            ElevenLabsClientSettings settings = null, HttpClient httpClient = null,
+            Func<ClientWebSocket> clientWebSocketSpawner = null)
         {
             ElevenLabsAuthentication = authentication ?? ElevenLabsAuthentication.Default;
             ElevenLabsClientSettings = settings ?? ElevenLabsClientSettings.Default;
@@ -69,20 +70,12 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null,
                 isCustomClient = true;
             }
 
-            if (webSocketClient == null)
-            {
-                webSocketClient = new ClientWebSocket();
-            }
-            else
-            {
-                isCustomWebSocketClient = true;
-            }
-
             Client = httpClient;
             Client.DefaultRequestHeaders.Add("User-Agent", "ElevenLabs-DotNet");
             Client.DefaultRequestHeaders.Add("xi-api-key", ElevenLabsAuthentication.ApiKey);
 
-            WebSocketClient = webSocketClient;
+            this.clientWebSocketSpawner = clientWebSocketSpawner;
+            WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner();
             WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet");
             WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey);
 
@@ -98,6 +91,14 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null,
             DubbingEndpoint = new DubbingEndpoint(this);
         }
 
+        public void ReinitializeWebSocketClient()
+        {
+            WebSocketClient.Dispose();
+            WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner();
+            WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet");
+            WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey);
+        }
+
         ~ElevenLabsClient()
         {
             Dispose(false);
@@ -121,12 +122,8 @@ private void Dispose(bool disposing)
                 {
                     Client?.Dispose();
                 }
-                
-                if (!isCustomWebSocketClient)
-                {
-                    WebSocketClient?.Dispose();
-                }
 
+                WebSocketClient?.Dispose();
                 isDisposed = true;
             }
         }
@@ -135,7 +132,7 @@ private void Dispose(bool disposing)
 
         private bool isCustomClient;
 
-        private bool isCustomWebSocketClient;
+        private Func<ClientWebSocket> clientWebSocketSpawner;
 
         /// <summary>
         ///     <see cref="HttpClient" /> to use when making calls to the API.
@@ -145,7 +142,7 @@ private void Dispose(bool disposing)
         /// <summary>
         ///     <see cref="ClientWebSocket" /> to use when making calls to the API.
         /// </summary>
-        internal ClientWebSocket WebSocketClient { get; }
+        internal ClientWebSocket WebSocketClient { get; private set; }
 
         /// <summary>
         ///     The <see cref="JsonSerializationOptions" /> to use when making calls to the API.
@@ -178,7 +175,7 @@ private void Dispose(bool disposing)
         public HistoryEndpoint HistoryEndpoint { get; }
 
         public TextToSpeechEndpoint TextToSpeechEndpoint { get; }
-        
+
         public TextToSpeechWebSocketEndpoint TextToSpeechWebSocketEndpoint { get; }
 
         public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; }
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
index aef6616..409eae0 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -13,7 +13,7 @@
 namespace ElevenLabs.TextToSpeech;
 
 /// <summary>
-///     Access to convert text to synthesized speech.
+///     Access to convert text to synthesized speech using a WebSocket connection.
 /// </summary>
 public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint
 {
@@ -64,7 +64,8 @@ public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client)
     ///     (best latency, but can mispronounce eg numbers and dates).
     /// </param>
     /// <param name="cancellationToken">Optional, <see cref="CancellationToken" />.</param>
-    /// <returns><see cref="VoiceClip" />.</returns>
+    /// <exception cref="ArgumentNullException">Raised when <paramref name="voice" /> is null or empty.</exception>
+    /// <exception cref="ArgumentNullException">Raised when <paramref name="partialClipCallback" /> is null.</exception>
     public async Task StartTextToSpeechAsync(Voice voice, Func<VoiceClip, Task> partialClipCallback,
         VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null,
         OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null,
@@ -115,69 +116,114 @@ await client.WebSocketClient.SendAsync(firstMessageRequest.ToArraySegment(), Web
             cancellationToken);
     }
 
+    /// <summary>
+    ///     Sends text to the WebSocket for speech synthesis.
+    /// </summary>
+    /// <param name="text">Text input to synthesize speech for. Needs to end with a space and cannot be null or empty.</param>
+    /// <param name="flush">
+    ///     Forces the generation of audio. Set this value to true when you have finished sending text, but
+    ///     want to keep the websocket connection open.
+    /// </param>
+    /// <param name="tryTriggerGeneration">
+    ///     Use this to attempt to immediately trigger the generation of audio. Most users
+    ///     shouldn't use this.
+    /// </param>
+    /// <param name="cancellationToken">Optional, <see cref="CancellationToken" />.</param>
+    /// <exception cref="InvalidOperationException">Raised when the WebSocket is not open.</exception>
+    /// <exception cref="ArgumentNullException">Raised when <paramref name="text" /> is null or empty.</exception>
     public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tryTriggerGeneration = false,
         CancellationToken cancellationToken = default)
     {
+        if (client.WebSocketClient.State != WebSocketState.Open)
+        {
+            throw new InvalidOperationException("WebSocket is not open!");
+        }
+
+        if (string.IsNullOrWhiteSpace(text))
+        {
+            throw new ArgumentNullException($"{nameof(text)} cannot be null or empty!");
+        }
+
         TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration);
         await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true,
             cancellationToken);
     }
 
+    /// <summary>
+    ///     Closes the text to speech WebSocket connection.
+    /// </summary>
+    /// <param name="cancellationToken">Optional, <see cref="CancellationToken" />.</param>
+    /// <exception cref="InvalidOperationException">Raised when the WebSocket is not open.</exception>
     public async Task EndTextToSpeechAsync(CancellationToken cancellationToken = default)
     {
+        if (client.WebSocketClient.State != WebSocketState.Open)
+        {
+            throw new InvalidOperationException("WebSocket is not open!");
+        }
+
         TextToSpeechWebSocketLastMessageRequest lastMessageRequest = new();
         await client.WebSocketClient.SendAsync(lastMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true,
             cancellationToken);
+        await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, cancellationToken);
     }
 
     private async Task ReceiveMessagesAsync(Func<VoiceClip, Task> partialClipCallback, Voice voice,
         CancellationToken cancellationToken)
     {
-        byte[] buffer = new byte[8192];
-        StringBuilder message = new();
-
-        while (client.WebSocketClient.State == WebSocketState.Open)
+        try
         {
-            WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync(
-                new ArraySegment<byte>(buffer), cancellationToken);
-
-            if (receiveResult.MessageType == WebSocketMessageType.Close)
-            {
-                await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,
-                    cancellationToken);
-                break;
-            }
-
-            string jsonString = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count);
-            message.Append(jsonString);
-
-            if (!receiveResult.EndOfMessage)
-            {
-                continue;
-            }
-
-            TextToSpeechWebSocketResponse response = JsonSerializer.Deserialize<TextToSpeechWebSocketResponse>(
-                message.ToString(), ElevenLabsClient.JsonSerializationOptions);
+            byte[] buffer = new byte[8192];
+            StringBuilder message = new();
 
-            if (response == null)
+            while (client.WebSocketClient.State == WebSocketState.Open)
             {
-                throw new ArgumentException("Failed to parse response!");
-            }
-
-            message.Clear();
-
-            if (!string.IsNullOrWhiteSpace(response.Audio))
-            {
-                string text = response.Alignment is { Chars: not null }
-                    ? string.Concat(response.Alignment.Chars)
-                    : null;
-                VoiceClip voiceClip = new(string.Empty, text, voice, response.AudioBytes);
-                await partialClipCallback(voiceClip).ConfigureAwait(false);
-            }
-            else
-            {
-                await partialClipCallback(null).ConfigureAwait(false);
+                WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync(
+                    new ArraySegment<byte>(buffer), cancellationToken);
+
+                Console.WriteLine($"{receiveResult.MessageType} - {receiveResult.Count}");
+
+                if (receiveResult.MessageType == WebSocketMessageType.Close)
+                {
+                    await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,
+                        cancellationToken);
+                    break;
+                }
+
+                string jsonString = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count);
+                message.Append(jsonString);
+
+                if (!receiveResult.EndOfMessage)
+                {
+                    continue;
+                }
+
+                TextToSpeechWebSocketResponse response = JsonSerializer.Deserialize<TextToSpeechWebSocketResponse>(
+                    message.ToString(), ElevenLabsClient.JsonSerializationOptions);
+
+                if (response == null)
+                {
+                    throw new ArgumentException("Failed to parse response!");
+                }
+
+                message.Clear();
+
+                if (!string.IsNullOrWhiteSpace(response.Audio))
+                {
+                    string text = response.Alignment is { Chars: not null }
+                        ? string.Concat(response.Alignment.Chars)
+                        : null;
+                    VoiceClip voiceClip = new(string.Empty, text, voice, response.AudioBytes);
+                    await partialClipCallback(voiceClip);
+                }
+                else
+                {
+                    await partialClipCallback(null);
+                }
             }
         }
+        finally
+        {
+            client.ReinitializeWebSocketClient();
+        }
     }
 }
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
index 9c59cf3..379f17b 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
@@ -9,6 +9,19 @@ namespace ElevenLabs.TextToSpeech;
 
 public sealed class TextToSpeechWebSocketRequest
 {
+    /// <summary>
+    ///     Text needs to end with a space and cannot be null or empty.
+    /// </summary>
+    /// <param name="text">The text to be converted to speech. Needs to end with a space, cannot be null or empty.</param>
+    /// <param name="flush">
+    ///     Forces the generation of audio. Set this value to true when you have finished sending text, but
+    ///     want to keep the websocket connection open.
+    /// </param>
+    /// <param name="tryTriggerGeneration">
+    ///     Use this to attempt to immediately trigger the generation of audio. Most users
+    ///     shouldn't use this.
+    /// </param>
+    /// <exception cref="ArgumentNullException">Thrown when <paramref name="text" /> is null or empty.</exception>
     public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTriggerGeneration = false)
     {
         if (string.IsNullOrWhiteSpace(text))
@@ -40,7 +53,7 @@ public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTri
     /// </summary>
     [JsonPropertyName("flush"), JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
     public bool? Flush { get; }
-    
+
     public ArraySegment<byte> ToArraySegment()
     {
         string json = JsonSerializer.Serialize(this);

From fed407fc407f9d2d5363114c636527be3d7c6121 Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Wed, 31 Jul 2024 11:13:47 +0200
Subject: [PATCH 4/6] - removing debug output

---
 ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
index 409eae0..929f662 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -180,8 +180,6 @@ private async Task ReceiveMessagesAsync(Func<VoiceClip, Task> partialClipCallbac
                 WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync(
                     new ArraySegment<byte>(buffer), cancellationToken);
 
-                Console.WriteLine($"{receiveResult.MessageType} - {receiveResult.Count}");
-
                 if (receiveResult.MessageType == WebSocketMessageType.Close)
                 {
                     await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,

From b44389f6e107c6cab47f61ff59af730fb609b7b8 Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Tue, 22 Oct 2024 15:18:57 +0200
Subject: [PATCH 5/6] Rebasing on main.

---
 .../TextToSpeech/TextToSpeechWebSocketEndpoint.cs           | 6 +++++-
 .../TextToSpeech/TextToSpeechWebSocketRequest.cs            | 5 -----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
index 929f662..9fb38f5 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -85,7 +85,7 @@ public async Task StartTextToSpeechAsync(Voice voice, Func<VoiceClip, Task> part
 
         var parameters = new Dictionary<string, string>
         {
-            { ModelIdParameter, model?.Id ?? Model.MonoLingualV1.Id },
+            { ModelIdParameter, model?.Id ?? Model.EnglishV1.Id },
             { OutputFormatParameter, outputFormat.ToString().ToLower() }
         };
 
@@ -219,6 +219,10 @@ await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, stri
                 }
             }
         }
+        catch (Exception e)
+        {
+            Console.WriteLine(e);
+        }
         finally
         {
             client.ReinitializeWebSocketClient();
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
index 379f17b..df47eb9 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
@@ -24,11 +24,6 @@ public sealed class TextToSpeechWebSocketRequest
     /// <exception cref="ArgumentNullException">Thrown when <paramref name="text" /> is null or empty.</exception>
     public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTriggerGeneration = false)
     {
-        if (string.IsNullOrWhiteSpace(text))
-        {
-            throw new ArgumentNullException(nameof(text));
-        }
-
         // if the last character of the text is not a space, append one
         Text = text[^1] != ' ' ? text + ' ' : text;
         TryTriggerGeneration = tryTriggerGeneration;

From 5d4d0d0c48c984191197becdc899f48f0db172cc Mon Sep 17 00:00:00 2001
From: Nico Nonne <nico@glome.ai>
Date: Tue, 22 Oct 2024 16:01:54 +0200
Subject: [PATCH 6/6] Add the inactivity timeout parameter. Remove the
 keep-alive messages (as they don't work and stop the connection). And allow
 the sending of white space messages for testing.

---
 .../TextToSpeechWebSocketEndpoint.cs            | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
index 9fb38f5..91abc47 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -22,6 +22,7 @@ public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint
     private const string EnableSsmlParsingParameter = "enable_ssml_parsing";
     private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency";
     private const string OutputFormatParameter = "output_format";
+    private const string InactivityTimeoutParameter = "inactivity_timeout";
 
     public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client)
     {
@@ -63,13 +64,17 @@ public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client)
     ///     4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
     ///     (best latency, but can mispronounce eg numbers and dates).
     /// </param>
+    /// <param name="inactivityTimeout">
+    ///     The number of seconds that the connection can be inactive before it is automatically closed.
+    ///     Defaults to 20 seconds, with a maximum allowed value of 180 seconds.
+    /// </param>
     /// <param name="cancellationToken">Optional, <see cref="CancellationToken" />.</param>
     /// <exception cref="ArgumentNullException">Raised when <paramref name="voice" /> is null or empty.</exception>
     /// <exception cref="ArgumentNullException">Raised when <paramref name="partialClipCallback" /> is null.</exception>
     public async Task StartTextToSpeechAsync(Voice voice, Func<VoiceClip, Task> partialClipCallback,
         VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null,
         OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null,
-        bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null,
+        bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null, int? inactivityTimeout = null,
         CancellationToken cancellationToken = default)
     {
         if (voice == null ||
@@ -104,6 +109,11 @@ public async Task StartTextToSpeechAsync(Voice voice, Func<VoiceClip, Task> part
             parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString());
         }
 
+        if (inactivityTimeout.HasValue)
+        {
+            parameters.Add(InactivityTimeoutParameter, inactivityTimeout.ToString());
+        }
+
         await client.WebSocketClient.ConnectAsync(
             new Uri(GetWebSocketUrl($"/{voice.Id}/stream-input", parameters)), cancellationToken);
 
@@ -139,11 +149,6 @@ public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tr
             throw new InvalidOperationException("WebSocket is not open!");
         }
 
-        if (string.IsNullOrWhiteSpace(text))
-        {
-            throw new ArgumentNullException($"{nameof(text)} cannot be null or empty!");
-        }
-
         TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration);
         await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true,
             cancellationToken);