diff --git a/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj b/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
index cdfde1a..08a3542 100644
--- a/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
+++ b/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
@@ -25,8 +25,12 @@ All copyrights, trademarks, logos, and assets are the property of their respecti
false
true
true
- 3.0.3
+ 3.1.0
+Version 3.1.0
+- Refactored TextToSpeechEndpoint endpoint to accept TextToSpeechRequest object
+ - Added text encoding options to TextToSpeechRequest
+ - Added previous text input parameter to TextToSpeechRequest
Version 3.0.3
- Fix DubbingRequest.DropBackgroundAudio flag not properly being set
- Added DubbingRequest.UseProfanityFilter flag
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
index e036fa1..bfe7098 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
@@ -8,6 +8,7 @@
using System.IO;
using System.Linq;
using System.Net.Http;
+using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
@@ -55,7 +56,7 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
/// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
/// 3 - max latency optimizations
/// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
- /// (best latency, but can mispronounce eg numbers and dates).
+ /// (best latency, but can mispronounce e.g. numbers and dates).
///
///
/// Optional, Callback to enable streaming audio as it comes in.
@@ -65,30 +66,34 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
/// .
public async Task TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, Func partialClipCallback = null, CancellationToken cancellationToken = default)
{
- if (text.Length > 5000)
- {
- throw new ArgumentOutOfRangeException(nameof(text), $"{nameof(text)} cannot exceed 5000 characters");
- }
-
- if (voice == null ||
- string.IsNullOrWhiteSpace(voice.Id))
- {
- throw new ArgumentNullException(nameof(voice));
- }
-
var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
- using var payload = JsonSerializer.Serialize(new TextToSpeechRequest(text, model, defaultVoiceSettings)).ToJsonStringContent();
+ return await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, defaultVoiceSettings, outputFormat, optimizeStreamingLatency, model), partialClipCallback, cancellationToken).ConfigureAwait(false);
+ }
+
+ ///
+ /// Converts text into speech using a voice of your choice and returns audio.
+ ///
+ /// .
+ ///
+ /// Optional, Callback to enable streaming audio as it comes in.
+ /// Returns partial .
+ ///
+ /// Optional, .
+ /// .
+ public async Task TextToSpeechAsync(TextToSpeechRequest request, Func partialClipCallback = null, CancellationToken cancellationToken = default)
+ {
+ using var payload = JsonSerializer.Serialize(request, ElevenLabsClient.JsonSerializationOptions).ToJsonStringContent();
var parameters = new Dictionary
{
- { OutputFormatParameter, outputFormat.ToString().ToLower() }
+ { OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
};
- if (optimizeStreamingLatency.HasValue)
+ if (request.OptimizeStreamingLatency.HasValue)
{
- parameters.Add(OptimizeStreamingLatencyParameter, optimizeStreamingLatency.ToString());
+ parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
}
- using var postRequest = new HttpRequestMessage(HttpMethod.Post, GetUrl($"/{voice.Id}{(partialClipCallback == null ? string.Empty : "/stream")}", parameters));
+ using var postRequest = new HttpRequestMessage(HttpMethod.Post, GetUrl($"/{request.Voice.Id}{(partialClipCallback == null ? string.Empty : "/stream")}", parameters));
postRequest.Content = payload;
var requestOption = partialClipCallback == null
? HttpCompletionOption.ResponseContentRead
@@ -116,7 +121,7 @@ public async Task TextToSpeechAsync(string text, Voice voice, VoiceSe
{
try
{
- await partialClipCallback(new VoiceClip(clipId, text, voice, new ReadOnlyMemory(memoryStream.GetBuffer(), totalBytesRead, bytesRead))).ConfigureAwait(false);
+ await partialClipCallback(new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(memoryStream.GetBuffer(), totalBytesRead, bytesRead))).ConfigureAwait(false);
}
catch (Exception e)
{
@@ -127,7 +132,7 @@ public async Task TextToSpeechAsync(string text, Voice voice, VoiceSe
totalBytesRead += bytesRead;
}
- return new VoiceClip(clipId, text, voice, new ReadOnlyMemory(memoryStream.GetBuffer(), 0, totalBytesRead));
+ return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(memoryStream.GetBuffer(), 0, totalBytesRead));
}
}
}
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
index 42e6acb..1ca2a29 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
@@ -3,22 +3,90 @@
using ElevenLabs.Models;
using ElevenLabs.Voices;
using System;
+using System.Text;
using System.Text.Json.Serialization;
namespace ElevenLabs.TextToSpeech
{
public sealed class TextToSpeechRequest
{
- public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings)
+ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings) :
+ this(null, text, voiceSettings: voiceSettings, model: model)
+ {
+ }
+
+ ///
+ /// Constructor.
+ ///
+ ///
+ /// to use.
+ ///
+ ///
+ /// Text input to synthesize speech for. Maximum 5000 characters.
+ ///
+ /// to use for .
+ ///
+ /// Optional, that will override the default settings in .
+ ///
+ ///
+ /// Optional, to use. Defaults to .
+ ///
+ ///
+ /// Output format of the generated audio.
+ /// Defaults to
+ ///
+ ///
+ /// Optional, You can turn on latency optimizations at some cost of quality.
+ /// The best possible final latency varies by model.
+ /// Possible values:
+ /// 0 - default mode (no latency optimizations)
+ /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ /// 3 - max latency optimizations
+ /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
+ /// (best latency, but can mispronounce e.g. numbers and dates).
+ ///
+ ///
+ ///
+ ///
+ public TextToSpeechRequest(
+ Voice voice,
+ string text,
+ Encoding encoding = null,
+ VoiceSettings voiceSettings = null,
+ OutputFormat outputFormat = OutputFormat.MP3_44100_128,
+ int? optimizeStreamingLatency = null,
+ Model model = null,
+ string previousText = null)
{
if (string.IsNullOrWhiteSpace(text))
{
throw new ArgumentNullException(nameof(text));
}
+ if (text.Length > 5000)
+ {
+ throw new ArgumentOutOfRangeException(nameof(text), $"{nameof(text)} cannot exceed 5000 characters");
+ }
+
+ if (voice == null ||
+ string.IsNullOrWhiteSpace(voice.Id))
+ {
+ throw new ArgumentNullException(nameof(voice));
+ }
+
+ if (encoding?.Equals(Encoding.UTF8) == false)
+ {
+ text = Encoding.UTF8.GetString(encoding.GetBytes(text));
+ }
+
Text = text;
- Model = model ?? Models.Model.EnglishV1;
- VoiceSettings = voiceSettings ?? throw new ArgumentNullException(nameof(voiceSettings));
+ Model = model ?? Models.Model.MultiLingualV2;
+ Voice = voice;
+ VoiceSettings = voiceSettings ?? voice.Settings ?? throw new ArgumentNullException(nameof(voiceSettings));
+ PreviousText = previousText;
+ OutputFormat = outputFormat;
+ OptimizeStreamingLatency = optimizeStreamingLatency;
}
[JsonPropertyName("text")]
@@ -27,7 +95,20 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
[JsonPropertyName("model_id")]
public string Model { get; }
+ [JsonIgnore]
+ public Voice Voice { get; }
+
[JsonPropertyName("voice_settings")]
public VoiceSettings VoiceSettings { get; }
+
+ [JsonPropertyName("previous_text")]
+ [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
+ public string PreviousText { get; }
+
+ [JsonIgnore]
+ public OutputFormat OutputFormat { get; }
+
+ [JsonIgnore]
+ public int? OptimizeStreamingLatency { get; }
}
}