diff --git a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs index c876e39..16d1749 100644 --- a/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs +++ b/ElevenLabs-DotNet/Authentication/ElevenLabsClientSettings.cs @@ -2,62 +2,98 @@ using System; -namespace ElevenLabs +namespace ElevenLabs; + +public sealed class ElevenLabsClientSettings { - public sealed class ElevenLabsClientSettings + internal const string HttpProtocol = "http://"; + internal const string HttpsProtocol = "https://"; + internal const string WsProtocol = "ws://"; + internal const string WssProtocol = "wss://"; + internal const string DefaultApiVersion = "v1"; + internal const string ElevenLabsDomain = "api.elevenlabs.io"; + + /// + /// Creates a new instance of for use with ElevenLabs API. + /// + public ElevenLabsClientSettings() + { + Domain = ElevenLabsDomain; + ApiVersion = DefaultApiVersion; + Protocol = HttpsProtocol; + WebSocketProtocol = WssProtocol; + BaseRequest = $"/{ApiVersion}/"; + BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}"; + BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}"; + } + + /// + /// Creates a new instance of for use with ElevenLabs API. + /// + /// Base api domain. Starts with https or wss. + /// The version of the ElevenLabs api you want to use. + public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion) { - internal const string Https = "https://"; - internal const string DefaultApiVersion = "v1"; - internal const string ElevenLabsDomain = "api.elevenlabs.io"; - - /// - /// Creates a new instance of for use with ElevenLabs API. - /// - public ElevenLabsClientSettings() + if (string.IsNullOrWhiteSpace(domain)) { - Domain = ElevenLabsDomain; - ApiVersion = "v1"; - BaseRequest = $"/{ApiVersion}/"; - BaseRequestUrlFormat = $"{Https}{Domain}{BaseRequest}{{0}}"; + domain = ElevenLabsDomain; } - /// - /// Creates a new instance of for use with ElevenLabs API. - /// - /// Base api domain. - /// The version of the ElevenLabs api you want to use. - public ElevenLabsClientSettings(string domain, string apiVersion = DefaultApiVersion) + if (!domain.Contains('.') && + !domain.Contains(':')) { - if (string.IsNullOrWhiteSpace(domain)) - { - domain = ElevenLabsDomain; - } + throw new ArgumentException( + $"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor."); + } - if (!domain.Contains('.') && - !domain.Contains(':')) + // extract anything before the :// to split the domain and protocol + var splitDomain = domain.Split("://", StringSplitOptions.RemoveEmptyEntries); + if (splitDomain.Length == 2) + { + Protocol = splitDomain[0]; + // if the protocol is not https or http, throw an exception + if (Protocol != HttpsProtocol && + Protocol != HttpProtocol) { - throw new ArgumentException($"You're attempting to pass a \"resourceName\" parameter to \"{nameof(domain)}\". Please specify \"resourceName:\" for this parameter in constructor."); + throw new ArgumentException( + $"The protocol \"{Protocol}\" is not supported. Please use \"{HttpsProtocol}\" or \"{HttpProtocol}\"."); } - if (string.IsNullOrWhiteSpace(apiVersion)) - { - apiVersion = DefaultApiVersion; - } + WebSocketProtocol = Protocol == HttpsProtocol ? WssProtocol : WsProtocol; + Domain = splitDomain[1]; + } + else + { + Protocol = HttpsProtocol; + WebSocketProtocol = WssProtocol; + Domain = domain; + } - Domain = domain.Contains("http") ? domain : $"{Https}{domain}"; - ApiVersion = apiVersion; - BaseRequest = $"/{ApiVersion}/"; - BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}"; + if (string.IsNullOrWhiteSpace(apiVersion)) + { + apiVersion = DefaultApiVersion; } - public string Domain { get; } + Domain = domain; + ApiVersion = apiVersion; + BaseRequest = $"/{ApiVersion}/"; + BaseRequestUrlFormat = $"{Protocol}{Domain}{BaseRequest}{{0}}"; + BaseRequestWebSocketUrlFormat = $"{WebSocketProtocol}{Domain}{BaseRequest}{{0}}"; + } + + public string Protocol { get; } - public string ApiVersion { get; } + public string WebSocketProtocol { get; } - public string BaseRequest { get; } + public string Domain { get; } - public string BaseRequestUrlFormat { get; } + public string ApiVersion { get; } - public static ElevenLabsClientSettings Default { get; } = new(); - } -} + public string BaseRequest { get; } + + public string BaseRequestUrlFormat { get; } + + public string BaseRequestWebSocketUrlFormat { get; } + + public static ElevenLabsClientSettings Default { get; } = new(); +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs index 94807c3..00fa4f6 100644 --- a/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs +++ b/ElevenLabs-DotNet/Common/ElevenLabsBaseEndPoint.cs @@ -3,47 +3,58 @@ using System.Collections.Generic; using System.Linq; -namespace ElevenLabs +namespace ElevenLabs; + +public abstract class ElevenLabsBaseEndPoint { - public abstract class ElevenLabsBaseEndPoint + internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client; + + // ReSharper disable once InconsistentNaming + protected readonly ElevenLabsClient client; + + /// + /// The root endpoint address. + /// + protected abstract string Root { get; } + + /// + /// Gets the full formatted url for the API endpoint. + /// + /// The endpoint url. + /// Optional, parameters to add to the endpoint. + protected string GetUrl(string endpoint = "", Dictionary queryParameters = null) { - internal ElevenLabsBaseEndPoint(ElevenLabsClient client) => this.client = client; - - // ReSharper disable once InconsistentNaming - protected readonly ElevenLabsClient client; - - /// - /// The root endpoint address. - /// - protected abstract string Root { get; } - - /// - /// Gets the full formatted url for the API endpoint. - /// - /// The endpoint url. - /// Optional, parameters to add to the endpoint. - protected string GetUrl(string endpoint = "", Dictionary queryParameters = null) - { - var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}"); - - if (queryParameters is { Count: not 0 }) - { - result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}"; - } + var result = string.Format(client.ElevenLabsClientSettings.BaseRequestUrlFormat, $"{Root}{endpoint}"); - return result; + if (queryParameters is { Count: not 0 }) + { + result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}"; } - private bool enableDebug; + return result; + } + + protected string GetWebSocketUrl(string endpoint = "", Dictionary queryParameters = null) + { + var result = string.Format(client.ElevenLabsClientSettings.BaseRequestWebSocketUrlFormat, $"{Root}{endpoint}"); - /// - /// Enables or disables the logging of all http responses of header and body information for this endpoint.
- /// WARNING! Enabling this in your production build, could potentially leak sensitive information! - ///
- public bool EnableDebug + if (queryParameters is { Count: not 0 }) { - get => enableDebug || client.EnableDebug; - set => enableDebug = value; + result += $"?{string.Join('&', queryParameters.Select(parameter => $"{parameter.Key}={parameter.Value}"))}"; } + + return result; + } + + private bool enableDebug; + + /// + /// Enables or disables the logging of all http responses of header and body information for this endpoint.
+ /// WARNING! Enabling this in your production build, could potentially leak sensitive information! + ///
+ public bool EnableDebug + { + get => enableDebug || client.EnableDebug; + set => enableDebug = value; } -} +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs index 8efcedc..b3c24e7 100644 --- a/ElevenLabs-DotNet/ElevenLabsClient.cs +++ b/ElevenLabs-DotNet/ElevenLabsClient.cs @@ -10,6 +10,7 @@ using ElevenLabs.Voices; using System; using System.Net.Http; +using System.Net.WebSockets; using System.Security.Authentication; using System.Text.Json; using System.Text.Json.Serialization; @@ -19,31 +20,42 @@ namespace ElevenLabs public sealed class ElevenLabsClient : IDisposable { /// - /// Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints. + /// Creates a new client for the Eleven Labs API, handling auth and allowing for access to various API endpoints. /// - /// The API authentication information to use for API calls, - /// or to attempt to use the , - /// potentially loading from environment vars or from a config file. + /// + /// The API authentication information to use for API calls, + /// or to attempt to use the , + /// potentially loading from environment vars or from a config file. /// /// - /// Optional, for specifying a proxy domain. + /// Optional, for specifying a proxy domain. /// - /// Optional, . + /// Optional, . + /// Optional, to create custom versions of . /// Raised when authentication details are missing or invalid. - /// implements to manage the lifecycle of the resources it uses, including . + /// + /// implements + /// + /// to manage the lifecycle of the resources it uses, including + /// + /// . /// - /// When you initialize , it will create an internal instance if one is not provided. - /// This internal HttpClient is disposed of when ElevenLabsClient is disposed of. - /// If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal. + /// When you initialize , it will create an internal instance + /// if one is not provided. + /// This internal HttpClient is disposed of when ElevenLabsClient is disposed of. + /// If you provide an external HttpClient instance to ElevenLabsClient, you are responsible for managing its disposal. /// - public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLabsClientSettings settings = null, HttpClient httpClient = null) + public ElevenLabsClient(ElevenLabsAuthentication authentication = null, + ElevenLabsClientSettings settings = null, HttpClient httpClient = null, + Func clientWebSocketSpawner = null) { ElevenLabsAuthentication = authentication ?? ElevenLabsAuthentication.Default; ElevenLabsClientSettings = settings ?? ElevenLabsClientSettings.Default; if (string.IsNullOrWhiteSpace(ElevenLabsAuthentication?.ApiKey)) { - throw new AuthenticationException("You must provide API authentication. Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details."); + throw new AuthenticationException( + "You must provide API authentication. Please refer to https://github.com/RageAgainstThePixel/ElevenLabs-DotNet#authentication for details."); } if (httpClient == null) @@ -62,17 +74,31 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa Client.DefaultRequestHeaders.Add("User-Agent", "ElevenLabs-DotNet"); Client.DefaultRequestHeaders.Add("xi-api-key", ElevenLabsAuthentication.ApiKey); + this.clientWebSocketSpawner = clientWebSocketSpawner; + WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner(); + WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet"); + WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey); + UserEndpoint = new UserEndpoint(this); VoicesEndpoint = new VoicesEndpoint(this); SharedVoicesEndpoint = new SharedVoicesEndpoint(this); ModelsEndpoint = new ModelsEndpoint(this); HistoryEndpoint = new HistoryEndpoint(this); TextToSpeechEndpoint = new TextToSpeechEndpoint(this); + TextToSpeechWebSocketEndpoint = new TextToSpeechWebSocketEndpoint(this); VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this); SoundGenerationEndpoint = new SoundGenerationEndpoint(this); DubbingEndpoint = new DubbingEndpoint(this); } + public void ReinitializeWebSocketClient() + { + WebSocketClient.Dispose(); + WebSocketClient = clientWebSocketSpawner == null ? new ClientWebSocket() : clientWebSocketSpawner(); + WebSocketClient.Options.SetRequestHeader("User-Agent", "ElevenLabs-DotNet"); + WebSocketClient.Options.SetRequestHeader("xi-api-key", ElevenLabsAuthentication.ApiKey); + } + ~ElevenLabsClient() { Dispose(false); @@ -97,6 +123,7 @@ private void Dispose(bool disposing) Client?.Dispose(); } + WebSocketClient?.Dispose(); isDisposed = true; } } @@ -105,13 +132,20 @@ private void Dispose(bool disposing) private bool isCustomClient; + private Func clientWebSocketSpawner; + /// - /// to use when making calls to the API. + /// to use when making calls to the API. /// internal HttpClient Client { get; } /// - /// The to use when making calls to the API. + /// to use when making calls to the API. + /// + internal ClientWebSocket WebSocketClient { get; private set; } + + /// + /// The to use when making calls to the API. /// internal static JsonSerializerOptions JsonSerializationOptions { get; } = new() { @@ -119,12 +153,12 @@ private void Dispose(bool disposing) }; /// - /// Enables or disables debugging for all endpoints. + /// Enables or disables debugging for all endpoints. /// public bool EnableDebug { get; set; } /// - /// The API authentication information to use for API calls + /// The API authentication information to use for API calls /// public ElevenLabsAuthentication ElevenLabsAuthentication { get; } @@ -142,10 +176,12 @@ private void Dispose(bool disposing) public TextToSpeechEndpoint TextToSpeechEndpoint { get; } + public TextToSpeechWebSocketEndpoint TextToSpeechWebSocketEndpoint { get; } + public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; } public SoundGenerationEndpoint SoundGenerationEndpoint { get; } public DubbingEndpoint DubbingEndpoint { get; } } -} +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/Models/Model.cs b/ElevenLabs-DotNet/Models/Model.cs index a1bf3c7..658c474 100644 --- a/ElevenLabs-DotNet/Models/Model.cs +++ b/ElevenLabs-DotNet/Models/Model.cs @@ -93,6 +93,9 @@ public Model(string id) /// /// Our state-of-the-art speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations. /// + [JsonIgnore] + public static Model TurboV25 { get; } = new("eleven_turbo_v2_5"); + [JsonIgnore] public static Model EnglishSpeechToSpeechV2 { get; } = new("eleven_english_sts_v2"); diff --git a/ElevenLabs-DotNet/TextToSpeech/Alignment.cs b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs new file mode 100644 index 0000000..037965e --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs @@ -0,0 +1,22 @@ +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public class Alignment +{ + [JsonPropertyName("char_start_times_ms")] + public int[] CharStartTimesMs { get; } + + [JsonPropertyName("chars_durations_ms")] + public int[] CharsDurationsMs { get; } + + [JsonPropertyName("chars")] + public string[] Chars { get; } + + public Alignment(int[] charStartTimesMs, int[] charsDurationsMs, string[] chars) + { + CharStartTimesMs = charStartTimesMs; + CharsDurationsMs = charsDurationsMs; + Chars = chars; + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs new file mode 100644 index 0000000..f827742 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/GenerationConfig.cs @@ -0,0 +1,20 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public sealed class GenerationConfig +{ + [JsonPropertyName("chunk_length_schedule")] + public int[] ChunkLengthSchedule { get; } + + public GenerationConfig() : this([120, 160, 250, 290]) + { + } + + public GenerationConfig(int[] chunkLengthSchedule) + { + ChunkLengthSchedule = chunkLengthSchedule; + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs new file mode 100644 index 0000000..91abc47 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketEndpoint.cs @@ -0,0 +1,236 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Models; +using ElevenLabs.Voices; +using System; +using System.Collections.Generic; +using System.Net.WebSockets; +using System.Text; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace ElevenLabs.TextToSpeech; + +/// +/// Access to convert text to synthesized speech using a WebSocket connection. +/// +public sealed class TextToSpeechWebSocketEndpoint : ElevenLabsBaseEndPoint +{ + private const string ModelIdParameter = "model_id"; + private const string EnableLoggingParameter = "enable_logging"; + private const string EnableSsmlParsingParameter = "enable_ssml_parsing"; + private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency"; + private const string OutputFormatParameter = "output_format"; + private const string InactivityTimeoutParameter = "inactivity_timeout"; + + public TextToSpeechWebSocketEndpoint(ElevenLabsClient client) : base(client) + { + } + + protected override string Root => "text-to-speech"; + + /// + /// Converts text into speech using a voice of your choice and returns audio. + /// + /// + /// to use. + /// + /// + /// Callback for streamed audio as it comes in.
+ /// Returns partial . + /// + /// + /// Optional, that will override the default settings in . + /// + /// Optional, . + /// + /// Optional, to use. Defaults to . + /// + /// + /// Output format of the generated audio.
+ /// Defaults to + /// + /// Optional, enable logging. + /// Optional, enable SSML parsing. + /// + /// Optional, You can turn on latency optimizations at some cost of quality. + /// The best possible final latency varies by model.
+ /// Possible values:
+ /// 0 - default mode (no latency optimizations)
+ /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ /// 3 - max latency optimizations
+ /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings + /// (best latency, but can mispronounce eg numbers and dates). + /// + /// + /// The number of seconds that the connection can be inactive before it is automatically closed. + /// Defaults to 20 seconds, with a maximum allowed value of 180 seconds. + /// + /// Optional, . + /// Raised when is null or empty. + /// Raised when is null. + public async Task StartTextToSpeechAsync(Voice voice, Func partialClipCallback, + VoiceSettings voiceSettings = null, GenerationConfig generationConfig = null, Model model = null, + OutputFormat outputFormat = OutputFormat.MP3_44100_128, bool? enableLogging = null, + bool? enableSsmlParsing = null, int? optimizeStreamingLatency = null, int? inactivityTimeout = null, + CancellationToken cancellationToken = default) + { + if (voice == null || + string.IsNullOrWhiteSpace(voice.Id)) + { + throw new ArgumentNullException(nameof(voice)); + } + + if (partialClipCallback == null) + { + throw new ArgumentNullException(nameof(partialClipCallback)); + } + + var parameters = new Dictionary + { + { ModelIdParameter, model?.Id ?? Model.EnglishV1.Id }, + { OutputFormatParameter, outputFormat.ToString().ToLower() } + }; + + if (enableLogging.HasValue) + { + parameters.Add(EnableLoggingParameter, enableLogging.ToString()); + } + + if (enableSsmlParsing.HasValue) + { + parameters.Add(EnableSsmlParsingParameter, enableSsmlParsing.ToString()); + } + + if (optimizeStreamingLatency.HasValue) + { + parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString()); + } + + if (inactivityTimeout.HasValue) + { + parameters.Add(InactivityTimeoutParameter, inactivityTimeout.ToString()); + } + + await client.WebSocketClient.ConnectAsync( + new Uri(GetWebSocketUrl($"/{voice.Id}/stream-input", parameters)), cancellationToken); + + // start receiving messages in a separate task + _ = Task.Run(async () => await ReceiveMessagesAsync(partialClipCallback, voice, cancellationToken), + cancellationToken); + + TextToSpeechWebSocketFirstMessageRequest firstMessageRequest = new(voiceSettings, generationConfig); + await client.WebSocketClient.SendAsync(firstMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true, + cancellationToken); + } + + /// + /// Sends text to the WebSocket for speech synthesis. + /// + /// Text input to synthesize speech for. Needs to end with a space and cannot be null or empty. + /// + /// Forces the generation of audio. Set this value to true when you have finished sending text, but + /// want to keep the websocket connection open. + /// + /// + /// Use this to attempt to immediately trigger the generation of audio. Most users + /// shouldn't use this. + /// + /// Optional, . + /// Raised when the WebSocket is not open. + /// Raised when is null or empty. + public async Task SendTextToSpeechAsync(string text, bool? flush = null, bool tryTriggerGeneration = false, + CancellationToken cancellationToken = default) + { + if (client.WebSocketClient.State != WebSocketState.Open) + { + throw new InvalidOperationException("WebSocket is not open!"); + } + + TextToSpeechWebSocketRequest request = new(text, flush, tryTriggerGeneration); + await client.WebSocketClient.SendAsync(request.ToArraySegment(), WebSocketMessageType.Text, true, + cancellationToken); + } + + /// + /// Closes the text to speech WebSocket connection. + /// + /// Optional, . + /// Raised when the WebSocket is not open. + public async Task EndTextToSpeechAsync(CancellationToken cancellationToken = default) + { + if (client.WebSocketClient.State != WebSocketState.Open) + { + throw new InvalidOperationException("WebSocket is not open!"); + } + + TextToSpeechWebSocketLastMessageRequest lastMessageRequest = new(); + await client.WebSocketClient.SendAsync(lastMessageRequest.ToArraySegment(), WebSocketMessageType.Text, true, + cancellationToken); + await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, cancellationToken); + } + + private async Task ReceiveMessagesAsync(Func partialClipCallback, Voice voice, + CancellationToken cancellationToken) + { + try + { + byte[] buffer = new byte[8192]; + StringBuilder message = new(); + + while (client.WebSocketClient.State == WebSocketState.Open) + { + WebSocketReceiveResult receiveResult = await client.WebSocketClient.ReceiveAsync( + new ArraySegment(buffer), cancellationToken); + + if (receiveResult.MessageType == WebSocketMessageType.Close) + { + await client.WebSocketClient.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, + cancellationToken); + break; + } + + string jsonString = Encoding.UTF8.GetString(buffer, 0, receiveResult.Count); + message.Append(jsonString); + + if (!receiveResult.EndOfMessage) + { + continue; + } + + TextToSpeechWebSocketResponse response = JsonSerializer.Deserialize( + message.ToString(), ElevenLabsClient.JsonSerializationOptions); + + if (response == null) + { + throw new ArgumentException("Failed to parse response!"); + } + + message.Clear(); + + if (!string.IsNullOrWhiteSpace(response.Audio)) + { + string text = response.Alignment is { Chars: not null } + ? string.Concat(response.Alignment.Chars) + : null; + VoiceClip voiceClip = new(string.Empty, text, voice, response.AudioBytes); + await partialClipCallback(voiceClip); + } + else + { + await partialClipCallback(null); + } + } + } + catch (Exception e) + { + Console.WriteLine(e); + } + finally + { + client.ReinitializeWebSocketClient(); + } + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs new file mode 100644 index 0000000..4e828a9 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketFirstMessageRequest.cs @@ -0,0 +1,36 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text; +using System.Text.Json; +using ElevenLabs.Voices; +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public sealed class TextToSpeechWebSocketFirstMessageRequest +{ + public TextToSpeechWebSocketFirstMessageRequest( + VoiceSettings voiceSettings = null, + GenerationConfig generationConfig = null) + { + VoiceSettings = voiceSettings; + GenerationConfig = generationConfig; + } + + [JsonPropertyName("text"), JsonInclude] + public string Text { get; } = " "; + + [JsonPropertyName("voice_settings")] + public VoiceSettings VoiceSettings { get; } + + [JsonPropertyName("generation_config")] + public GenerationConfig GenerationConfig { get; } + + public ArraySegment ToArraySegment() + { + string json = JsonSerializer.Serialize(this); + byte[] bytes = Encoding.UTF8.GetBytes(json); + return new ArraySegment(bytes); + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs new file mode 100644 index 0000000..4eca5b7 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketLastMessageRequest.cs @@ -0,0 +1,21 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public sealed class TextToSpeechWebSocketLastMessageRequest +{ + [JsonPropertyName("text"), JsonInclude] + public string Text { get; } = ""; + + public ArraySegment ToArraySegment() + { + string json = JsonSerializer.Serialize(this); + byte[] bytes = Encoding.UTF8.GetBytes(json); + return new ArraySegment(bytes); + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs new file mode 100644 index 0000000..df47eb9 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketRequest.cs @@ -0,0 +1,58 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public sealed class TextToSpeechWebSocketRequest +{ + /// + /// Text needs to end with a space and cannot be null or empty. + /// + /// The text to be converted to speech. Needs to end with a space, cannot be null or empty. + /// + /// Forces the generation of audio. Set this value to true when you have finished sending text, but + /// want to keep the websocket connection open. + /// + /// + /// Use this to attempt to immediately trigger the generation of audio. Most users + /// shouldn't use this. + /// + /// Thrown when is null or empty. + public TextToSpeechWebSocketRequest(string text, bool? flush = null, bool tryTriggerGeneration = false) + { + // if the last character of the text is not a space, append one + Text = text[^1] != ' ' ? text + ' ' : text; + TryTriggerGeneration = tryTriggerGeneration; + Flush = flush; + } + + /// + /// The text to be converted to speech. The last character of the text must be a space. + /// + [JsonPropertyName("text"), JsonInclude] + public string Text { get; } + + /// + /// Use this to attempt to immediately trigger the generation of audio. Most users shouldn't use this. + /// + [JsonPropertyName("try_trigger_generation")] + public bool TryTriggerGeneration { get; } + + /// + /// Flush forces the generation of audio. Set this value to true when you have finished sending text, + /// but want to keep the websocket connection open. + /// + [JsonPropertyName("flush"), JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public bool? Flush { get; } + + public ArraySegment ToArraySegment() + { + string json = JsonSerializer.Serialize(this); + byte[] bytes = Encoding.UTF8.GetBytes(json); + return new ArraySegment(bytes); + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs new file mode 100644 index 0000000..7ad6494 --- /dev/null +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechWebSocketResponse.cs @@ -0,0 +1,49 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Text.Json.Serialization; + +namespace ElevenLabs.TextToSpeech; + +public sealed class TextToSpeechWebSocketResponse +{ + /// + /// A generated partial audio chunk, encoded using the selected output_format, + /// by default this is MP3 encoded as a base64 string. + /// + [JsonPropertyName("audio")] + public string Audio { get; } + + /// + /// A generated partial audio chunk, encoded using the selected output_format, + /// + [JsonIgnore] + public byte[] AudioBytes { get; } + + /// + /// Indicates if the generation is complete. If set to True, audio will be null. + /// + [JsonPropertyName("isFinal")] + public bool? IsFinal { get; } + + /// + /// Alignment information for the generated audio given the input normalized text sequence. + /// + [JsonPropertyName("normalizedAlignment")] + public Alignment NormalizedAlignment { get; } + + /// + /// Alignment information for the generated audio given the original text sequence. + /// + [JsonPropertyName("alignment")] + public Alignment Alignment { get; } + + public TextToSpeechWebSocketResponse(string audio, bool? isFinal, Alignment normalizedAlignment, Alignment alignment) + { + Audio = audio; + IsFinal = isFinal; + NormalizedAlignment = normalizedAlignment; + Alignment = alignment; + AudioBytes = audio != null ? Convert.FromBase64String(audio) : null; + } +} \ No newline at end of file