diff --git a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
index c876e39..16d1749 100644
--- a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
+++ b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs
@@ -2,62 +2,98 @@
using System;
-namespace ElevenLabs
+namespace ElevenLabs;
+
+public sealed class ElevenLabsClientSettings
{
- public sealed class ElevenLabsClientSettings
+ internal const string HttpProtocol = "http://";
+ internal const string HttpsProtocol = "https://";
+ internal const string WsProtocol = "ws://";
+ internal const string WssProtocol = "wss://";
+ internal const string DefaultApiVersion = "v1";
+ internal const string ElevenLabsDomain = "api.elevenlabs.io";
+
+ ///
+ /// Creates a new instance of for use with ElevenLabs API.
+ ///
+ public ElevenLabsClientSettings()
+ {
+ Domain = ElevenLabsDomain;
+ ApiVersion = DefaultApiVersion;
+ Protocol = HttpsProtocol;
+ WebSocketProtocol = WssProtocol;
+ BaseRequest = $"/{ApiVersion}/";
+ BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}";
+ BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}";
+ }
+
+ ///
+ /// Creates a new instance of for use with ElevenLabs API.
+ ///
+ /// Base api domain. Starts with https or wss.
+ /// The version of the ElevenLabs api you want to use.
+ public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion)
{
- internal const string Https = "https://";
- internal const string DefaultApiVersion = "v1";
- internal const string ElevenLabsDomain = "api.elevenlabs.io";
-
- ///
- /// Creates a new instance of for use with ElevenLabs API.
- ///
- public ElevenLabsClientSettings()
+ if (string.IsNullOrWhiteSpace(domain))
{
- Domain = ElevenLabsDomain;
- ApiVersion = "v1";
- BaseRequest = $"/{ApiVersion}/";
- BaseRequestUrlFormat = $"{Https}{Domain}{BaseRequest}{{0}}";
+ domain = ElevenLabsDomain;
}
- ///
- /// Creates a new instance of for use with ElevenLabs API.
- ///
- /// Base api domain.
- /// The version of the ElevenLabs api you want to use.
- public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion)
+ if (!domain.Contains('.') &&
+ !domain.Contains(':'))
{
- if (string.IsNullOrWhiteSpace(domain))
- {
- domain = ElevenLabsDomain;
- }
+ throw new ArgumentException(
+ $"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor.");
+ }
- if (!domain.Contains('.') &&
- !domain.Contains(':'))
+ // extract anything before the :// to split the domain and protocol
+ var splitDomain = domain.Split("://", StringSplitOptions.RemoveEmptyEntries);
+ if (splitDomain.Length == 2)
+ {
+ Protocol = splitDomain[0];
+ // if the protocol is not https or http, throw an exception
+ if (Protocol != HttpsProtocol &&
+ Protocol != HttpProtocol)
{
- throw new ArgumentException($"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor.");
+ throw new ArgumentException(
+ $"The protocol \"{Protocol}\" is not supported. Please use \"{HttpsProtocol}\" or \"{HttpProtocol}\".");
}
- if (string.IsNullOrWhiteSpace(apiVersion))
- {
- apiVersion = DefaultApiVersion;
- }
+ WebSocketProtocol = Protocol == HttpsProtocol ? WssProtocol : WsProtocol;
+ Domain = splitDomain[1];
+ }
+ else
+ {
+ Protocol = HttpsProtocol;
+ WebSocketProtocol = WssProtocol;
+ Domain = domain;
+ }
- Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
- ApiVersion = apiVersion;
- BaseRequest = $"/{ApiVersion}/";
- BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";
+ if (string.IsNullOrWhiteSpace(apiVersion))
+ {
+ apiVersion = DefaultApiVersion;
}
- public string Domain { get; }
+ Domain = domain;
+ ApiVersion = apiVersion;
+ BaseRequest = $"/{ApiVersion}/";
+ BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}";
+ BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}";
+ }
+
+ public string Protocol { get; }
- public string ApiVersion { get; }
+ public string WebSocketProtocol { get; }
- public string BaseRequest { get; }
+ public string Domain { get; }
- public string BaseRequestUrlFormat { get; }
+ public string ApiVersion { get; }
- public static ElevenLabsClientSettings Default { get; } = new();
- }
-}
+ public string BaseRequest { get; }
+
+ public string BaseRequestUrlFormat { get; }
+
+ public string BaseRequestWebSocketUrlFormat { get; }
+
+ public static ElevenLabsClientSettings Default { get; } = new();
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
index 94807c3..00fa4f6 100644
--- a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
+++ b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs
@@ -3,47 +3,58 @@
using System.Collections.Generic;
using System.Linq;
-namespace ElevenLabs
+namespace ElevenLabs;
+
+public abstract class ElevenLabsBaseEndPoint
{
- public abstract class ElevenLabsBaseEndPoint
+ internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client;
+
+ // ReSharper disable once InconsistentNaming
+ protected readonly ElevenLabsClient client;
+
+ ///
+ /// The root endpoint address.
+ ///
+ protected abstract string Root { get; }
+
+ ///
+ /// Gets the full formatted url for the API endpoint.
+ ///
+ /// The endpoint url.
+ /// Optional, parameters to add to the endpoint.
+ protected string GetUrl(string endpoint = "", Dictionary queryParameters = null)
{
- internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client;
-
- // ReSharper disable once InconsistentNaming
- protected readonly ElevenLabsClient client;
-
- ///
- /// The root endpoint address.
- ///
- protected abstract string Root { get; }
-
- ///
- /// Gets the full formatted url for the API endpoint.
- ///
- /// The endpoint url.
- /// Optional, parameters to add to the endpoint.
- protected string GetUrl(string endpoint = "", Dictionary queryParameters = null)
- {
- var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}");
-
- if (queryParameters is { Count: not 0 })
- {
- result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
- }
+ var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}");
- return result;
+ if (queryParameters is { Count: not 0 })
+ {
+ result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
}
- private bool enableDebug;
+ return result;
+ }
+
+ protected string GetWebSocketUrl(string endpoint = "", Dictionary queryParameters = null)
+ {
+ var result = string.Format(client.ElevenLabsClientSettings.BaseRequestWebSocketUrlFormat, $"{Root}{endpoint}");
- ///
- /// Enables or disables the logging of all http responses of header and body information for this endpoint.
- /// WARNING! Enabling this in your production build, could potentially leak sensitive information!
- ///
- public bool EnableDebug
+ if (queryParameters is { Count: not 0 })
{
- get => enableDebug || client.EnableDebug;
- set => enableDebug = value;
+ result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}";
}
+
+ return result;
+ }
+
+ private bool enableDebug;
+
+ ///
+ /// Enables or disables the logging of all http responses of header and body information for this endpoint.
+ /// WARNING! Enabling this in your production build, could potentially leak sensitive information!
+ ///
+ public bool EnableDebug
+ {
+ get => enableDebug || client.EnableDebug;
+ set => enableDebug = value;
}
-}
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs
index 8efcedc..b3c24e7 100644
--- a/ElevenLabs-DotNet/ElevenLabsClient.cs
+++ b/ElevenLabs-DotNet/ElevenLabsClient.cs
@@ -10,6 +10,7 @@
using ElevenLabs.Voices;
using System;
using System.Net.Http;
+using System.Net.WebSockets;
using System.Security.Authentication;
using System.Text.Json;
using System.Text.Json.Serialization;
@@ -19,31 +20,42 @@ namespace ElevenLabs
public sealed class ElevenLabsClient : IDisposable
{
///
- /// Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints.
+ /// Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints.
///
- /// The API authentication information to use for API calls,
- /// or to attempt to use the ,
- /// potentially loading from environment vars or from a config file.
+ ///
+ /// The API authentication information to use for API calls,
+ /// or to attempt to use the ,
+ /// potentially loading from environment vars or from a config file.
///
///
- /// Optional, for specifying a proxy domain.
+ /// Optional, for specifying a proxy domain.
///
- /// Optional, .
+ /// Optional, .
+ /// Optional, to create custom versions of .
/// Raised when authentication details are missing or invalid.
- /// implements to manage the lifecycle of the resources it uses, including .
+ ///
+ /// implements
+ ///
+ /// to manage the lifecycle of the resources it uses, including
+ ///
+ /// .
///
- /// When you initialize , it will create an internal instance if one is not provided.
- /// This internal HttpClient is disposed of when ElevenLabsClient is disposed of.
- /// If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal.
+ /// When you initialize , it will create an internal instance
+ /// if one is not provided.
+ /// This internal HttpClient is disposed of when ElevenLabsClient is disposed of.
+ /// If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal.
///
- public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLabsClientSettings settings = null, HttpClient httpClient = null)
+ public ElevenLabsClient(ElevenLabsAuthentication authentication = null,
+ ElevenLabsClientSettings settings = null, HttpClient httpClient = null,
+ Func clientWebSocketSpawner = null)
{
ElevenLabsAuthentication = authentication ?? ElevenLabsAuthentication.Default;
ElevenLabsClientSettings = settings ?? ElevenLabsClientSettings.Default;
if (string.IsNullOrWhiteSpace(ElevenLabsAuthentication?.ApiKey))
{
- throw new AuthenticationException("You must provide API authentication. Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details.");
+ throw new AuthenticationException(
+ "You must provide API authentication. Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details.");
}
if (httpClient == null)
@@ -62,17 +74,31 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa
Client.DefaultRequestHeaders.Add("User-Agent", "ElevenLabs-DotNet");
Client.DefaultRequestHeaders.Add("xi-api-key", ElevenLabsAuthentication.ApiKey);
+ this.clientWebSocketSpawner = clientWebSocketSpawner;
+ WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner();
+ WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet");
+ WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey);
+
UserEndpoint = new UserEndpoint(this);
VoicesEndpoint = new VoicesEndpoint(this);
SharedVoicesEndpoint = new SharedVoicesEndpoint(this);
ModelsEndpoint = new ModelsEndpoint(this);
HistoryEndpoint = new HistoryEndpoint(this);
TextToSpeechEndpoint = new TextToSpeechEndpoint(this);
+ TextToSpeechWebSocketEndpoint = new TextToSpeechWebSocketEndpoint(this);
VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this);
SoundGenerationEndpoint = new SoundGenerationEndpoint(this);
DubbingEndpoint = new DubbingEndpoint(this);
}
+ public void ReinitializeWebSocketClient()
+ {
+ WebSocketClient.Dispose();
+ WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner();
+ WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet");
+ WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey);
+ }
+
~ElevenLabsClient()
{
Dispose(false);
@@ -97,6 +123,7 @@ private void Dispose(bool disposing)
Client?.Dispose();
}
+ WebSocketClient?.Dispose();
isDisposed = true;
}
}
@@ -105,13 +132,20 @@ private void Dispose(bool disposing)
private bool isCustomClient;
+ private Func clientWebSocketSpawner;
+
///
- /// to use when making calls to the API.
+ /// to use when making calls to the API.
///
internal HttpClient Client { get; }
///
- /// The to use when making calls to the API.
+ /// to use when making calls to the API.
+ ///
+ internal ClientWebSocket WebSocketClient { get; private set; }
+
+ ///
+ /// The to use when making calls to the API.
///
internal static JsonSerializerOptions JsonSerializationOptions { get; } = new()
{
@@ -119,12 +153,12 @@ private void Dispose(bool disposing)
};
///
- /// Enables or disables debugging for all endpoints.
+ /// Enables or disables debugging for all endpoints.
///
public bool EnableDebug { get; set; }
///
- /// The API authentication information to use for API calls
+ /// The API authentication information to use for API calls
///
public ElevenLabsAuthentication ElevenLabsAuthentication { get; }
@@ -142,10 +176,12 @@ private void Dispose(bool disposing)
public TextToSpeechEndpoint TextToSpeechEndpoint { get; }
+ public TextToSpeechWebSocketEndpoint TextToSpeechWebSocketEndpoint { get; }
+
public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; }
public SoundGenerationEndpoint SoundGenerationEndpoint { get; }
public DubbingEndpoint DubbingEndpoint { get; }
}
-}
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/Models/Model.cs b/ElevenLabs-DotNet/Models/Model.cs
index a1bf3c7..658c474 100644
--- a/ElevenLabs-DotNet/Models/Model.cs
+++ b/ElevenLabs-DotNet/Models/Model.cs
@@ -93,6 +93,9 @@ public Model(string id)
///
/// Our state-of-the-art speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations.
///
+ [JsonIgnore]
+ public static Model TurboV25 { get; } = new("eleven_turbo_v2_5");
+
[JsonIgnore]
public static Model EnglishSpeechToSpeechV2 { get; } = new("eleven_english_sts_v2");
diff --git a/ElevenLabs-DotNet/TextToSpeech/Alignment.cs b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
new file mode 100644
index 0000000..037965e
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
@@ -0,0 +1,22 @@
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public class Alignment
+{
+ [JsonPropertyName("char_start_times_ms")]
+ public int[] CharStartTimesMs { get; }
+
+ [JsonPropertyName("chars_durations_ms")]
+ public int[] CharsDurationsMs { get; }
+
+ [JsonPropertyName("chars")]
+ public string[] Chars { get; }
+
+ public Alignment(int[] charStartTimesMs, int[] charsDurationsMs, string[] chars)
+ {
+ CharStartTimesMs = charStartTimesMs;
+ CharsDurationsMs = charsDurationsMs;
+ Chars = chars;
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs
new file mode 100644
index 0000000..f827742
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs
@@ -0,0 +1,20 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class GenerationConfig
+{
+ [JsonPropertyName("chunk_length_schedule")]
+ public int[] ChunkLengthSchedule { get; }
+
+ public GenerationConfig() : this([120, 160, 250, 290])
+ {
+ }
+
+ public GenerationConfig(int[] chunkLengthSchedule)
+ {
+ ChunkLengthSchedule = chunkLengthSchedule;
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
new file mode 100644
index 0000000..91abc47
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs
@@ -0,0 +1,236 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Models;
+using ElevenLabs.Voices;
+using System;
+using System.Collections.Generic;
+using System.Net.WebSockets;
+using System.Text;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace ElevenLabs.TextToSpeech;
+
+///
+/// Access to convert text to synthesized speech using a WebSocket connection.
+///
+public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint
+{
+ private const string ModelIdParameter = "model_id";
+ private const string EnableLoggingParameter = "enable_logging";
+ private const string EnableSsmlParsingParameter = "enable_ssml_parsing";
+ private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency";
+ private const string OutputFormatParameter = "output_format";
+ private const string InactivityTimeoutParameter = "inactivity_timeout";
+
+ public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client)
+ {
+ }
+
+ protected override string Root => "text-to-speech";
+
+ ///
+ /// Converts text into speech using a voice of your choice and returns audio.
+ ///
+ ///
+ /// to use.
+ ///
+ ///
+ /// Callback for streamed audio as it comes in.
+ /// Returns partial .
+ ///
+ ///
+ /// Optional, that will override the default settings in .
+ ///
+ /// Optional, .
+ ///
+ /// Optional, to use. Defaults to .
+ ///
+ ///
+ /// Output format of the generated audio.
+ /// Defaults to
+ ///
+ /// Optional, enable logging.
+ /// Optional, enable SSML parsing.
+ ///
+ /// Optional, You can turn on latency optimizations at some cost of quality.
+ /// The best possible final latency varies by model.
+ /// Possible values:
+ /// 0 - default mode (no latency optimizations)
+ /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ /// 3 - max latency optimizations
+ /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
+ /// (best latency, but can mispronounce eg numbers and dates).
+ ///
+ ///
+ /// The number of seconds that the connection can be inactive before it is automatically closed.
+ /// Defaults to 20 seconds, with a maximum allowed value of 180 seconds.
+ ///
+ /// Optional, .
+ /// Raised when is null or empty.
+ /// Raised when is null.
+ public async Task StartTextToSpeechAsync(Voice voice, Func partialClipCallback,
+ VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null,
+ OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null,
+ bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null, int? inactivityTimeout = null,
+ CancellationToken cancellationToken = default)
+ {
+ if (voice == null ||
+ string.IsNullOrWhiteSpace(voice.Id))
+ {
+ throw new ArgumentNullException(nameof(voice));
+ }
+
+ if (partialClipCallback == null)
+ {
+ throw new ArgumentNullException(nameof(partialClipCallback));
+ }
+
+ var parameters = new Dictionary
+ {
+ { ModelIdParameter, model?.Id ?? Model.EnglishV1.Id },
+ { OutputFormatParameter, outputFormat.ToString().ToLower() }
+ };
+
+ if (enableLogging.HasValue)
+ {
+ parameters.Add(EnableLoggingParameter, enableLogging.ToString());
+ }
+
+ if (enableSsmlParsing.HasValue)
+ {
+ parameters.Add(EnableSsmlParsingParameter, enableSsmlParsing.ToString());
+ }
+
+ if (optimizeStreamingLatency.HasValue)
+ {
+ parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString());
+ }
+
+ if (inactivityTimeout.HasValue)
+ {
+ parameters.Add(InactivityTimeoutParameter, inactivityTimeout.ToString());
+ }
+
+ await client.WebSocketClient.ConnectAsync(
+ new Uri(GetWebSocketUrl($"/{voice.Id}/stream-input", parameters)), cancellationToken);
+
+ // start receiving messages in a separate task
+ _ = Task.Run(async () => await ReceiveMessagesAsync(partialClipCallback, voice, cancellationToken),
+ cancellationToken);
+
+ TextToSpeechWebSocketFirstMessageRequest firstMessageRequest = new(voiceSettings, generationConfig);
+ await client.WebSocketClient.SendAsync(firstMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true,
+ cancellationToken);
+ }
+
+ ///
+ /// Sends text to the WebSocket for speech synthesis.
+ ///
+ /// Text input to synthesize speech for. Needs to end with a space and cannot be null or empty.
+ ///
+ /// Forces the generation of audio. Set this value to true when you have finished sending text, but
+ /// want to keep the websocket connection open.
+ ///
+ ///
+ /// Use this to attempt to immediately trigger the generation of audio. Most users
+ /// shouldn't use this.
+ ///
+ /// Optional, .
+ /// Raised when the WebSocket is not open.
+ /// Raised when is null or empty.
+ public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tryTriggerGeneration = false,
+ CancellationToken cancellationToken = default)
+ {
+ if (client.WebSocketClient.State != WebSocketState.Open)
+ {
+ throw new InvalidOperationException("WebSocket is not open!");
+ }
+
+ TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration);
+ await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true,
+ cancellationToken);
+ }
+
+ ///
+ /// Closes the text to speech WebSocket connection.
+ ///
+ /// Optional, .
+ /// Raised when the WebSocket is not open.
+ public async Task EndTextToSpeechAsync(CancellationToken cancellationToken = default)
+ {
+ if (client.WebSocketClient.State != WebSocketState.Open)
+ {
+ throw new InvalidOperationException("WebSocket is not open!");
+ }
+
+ TextToSpeechWebSocketLastMessageRequest lastMessageRequest = new();
+ await client.WebSocketClient.SendAsync(lastMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true,
+ cancellationToken);
+ await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, cancellationToken);
+ }
+
+ private async Task ReceiveMessagesAsync(Func partialClipCallback, Voice voice,
+ CancellationToken cancellationToken)
+ {
+ try
+ {
+ byte[] buffer = new byte[8192];
+ StringBuilder message = new();
+
+ while (client.WebSocketClient.State == WebSocketState.Open)
+ {
+ WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync(
+ new ArraySegment(buffer), cancellationToken);
+
+ if (receiveResult.MessageType == WebSocketMessageType.Close)
+ {
+ await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,
+ cancellationToken);
+ break;
+ }
+
+ string jsonString = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count);
+ message.Append(jsonString);
+
+ if (!receiveResult.EndOfMessage)
+ {
+ continue;
+ }
+
+ TextToSpeechWebSocketResponse response = JsonSerializer.Deserialize(
+ message.ToString(), ElevenLabsClient.JsonSerializationOptions);
+
+ if (response == null)
+ {
+ throw new ArgumentException("Failed to parse response!");
+ }
+
+ message.Clear();
+
+ if (!string.IsNullOrWhiteSpace(response.Audio))
+ {
+ string text = response.Alignment is { Chars: not null }
+ ? string.Concat(response.Alignment.Chars)
+ : null;
+ VoiceClip voiceClip = new(string.Empty, text, voice, response.AudioBytes);
+ await partialClipCallback(voiceClip);
+ }
+ else
+ {
+ await partialClipCallback(null);
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine(e);
+ }
+ finally
+ {
+ client.ReinitializeWebSocketClient();
+ }
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs
new file mode 100644
index 0000000..4e828a9
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs
@@ -0,0 +1,36 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using ElevenLabs.Voices;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketFirstMessageRequest
+{
+ public TextToSpeechWebSocketFirstMessageRequest(
+ VoiceSettings voiceSettings = null,
+ GenerationConfig generationConfig = null)
+ {
+ VoiceSettings = voiceSettings;
+ GenerationConfig = generationConfig;
+ }
+
+ [JsonPropertyName("text"), JsonInclude]
+ public string Text { get; } = " ";
+
+ [JsonPropertyName("voice_settings")]
+ public VoiceSettings VoiceSettings { get; }
+
+ [JsonPropertyName("generation_config")]
+ public GenerationConfig GenerationConfig { get; }
+
+ public ArraySegment ToArraySegment()
+ {
+ string json = JsonSerializer.Serialize(this);
+ byte[] bytes = Encoding.UTF8.GetBytes(json);
+ return new ArraySegment(bytes);
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs
new file mode 100644
index 0000000..4eca5b7
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs
@@ -0,0 +1,21 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketLastMessageRequest
+{
+ [JsonPropertyName("text"), JsonInclude]
+ public string Text { get; } = "";
+
+ public ArraySegment ToArraySegment()
+ {
+ string json = JsonSerializer.Serialize(this);
+ byte[] bytes = Encoding.UTF8.GetBytes(json);
+ return new ArraySegment(bytes);
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
new file mode 100644
index 0000000..df47eb9
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs
@@ -0,0 +1,58 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketRequest
+{
+ ///
+ /// Text needs to end with a space and cannot be null or empty.
+ ///
+ /// The text to be converted to speech. Needs to end with a space, cannot be null or empty.
+ ///
+ /// Forces the generation of audio. Set this value to true when you have finished sending text, but
+ /// want to keep the websocket connection open.
+ ///
+ ///
+ /// Use this to attempt to immediately trigger the generation of audio. Most users
+ /// shouldn't use this.
+ ///
+ /// Thrown when is null or empty.
+ public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTriggerGeneration = false)
+ {
+ // if the last character of the text is not a space, append one
+ Text = text[^1] != ' ' ? text + ' ' : text;
+ TryTriggerGeneration = tryTriggerGeneration;
+ Flush = flush;
+ }
+
+ ///
+ /// The text to be converted to speech. The last character of the text must be a space.
+ ///
+ [JsonPropertyName("text"), JsonInclude]
+ public string Text { get; }
+
+ ///
+ /// Use this to attempt to immediately trigger the generation of audio. Most users shouldn't use this.
+ ///
+ [JsonPropertyName("try_trigger_generation")]
+ public bool TryTriggerGeneration { get; }
+
+ ///
+ /// Flush forces the generation of audio. Set this value to true when you have finished sending text,
+ /// but want to keep the websocket connection open.
+ ///
+ [JsonPropertyName("flush"), JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+ public bool? Flush { get; }
+
+ public ArraySegment ToArraySegment()
+ {
+ string json = JsonSerializer.Serialize(this);
+ byte[] bytes = Encoding.UTF8.GetBytes(json);
+ return new ArraySegment(bytes);
+ }
+}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs
new file mode 100644
index 0000000..7ad6494
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs
@@ -0,0 +1,49 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech;
+
+public sealed class TextToSpeechWebSocketResponse
+{
+ ///
+ /// A generated partial audio chunk, encoded using the selected output_format,
+ /// by default this is MP3 encoded as a base64 string.
+ ///
+ [JsonPropertyName("audio")]
+ public string Audio { get; }
+
+ ///
+ /// A generated partial audio chunk, encoded using the selected output_format,
+ ///
+ [JsonIgnore]
+ public byte[] AudioBytes { get; }
+
+ ///
+ /// Indicates if the generation is complete. If set to True, audio will be null.
+ ///
+ [JsonPropertyName("isFinal")]
+ public bool? IsFinal { get; }
+
+ ///
+ /// Alignment information for the generated audio given the input normalized text sequence.
+ ///
+ [JsonPropertyName("normalizedAlignment")]
+ public Alignment NormalizedAlignment { get; }
+
+ ///
+ /// Alignment information for the generated audio given the original text sequence.
+ ///
+ [JsonPropertyName("alignment")]
+ public Alignment Alignment { get; }
+
+ public TextToSpeechWebSocketResponse(string audio, bool? isFinal, Alignment normalizedAlignment, Alignment alignment)
+ {
+ Audio = audio;
+ IsFinal = isFinal;
+ NormalizedAlignment = normalizedAlignment;
+ Alignment = alignment;
+ AudioBytes = audio != null ? Convert.FromBase64String(audio) : null;
+ }
+}
\ No newline at end of file