Implemented language code parameter for text to speach.

- Added parameter to text-to-speach endpoint and request - Added a check - Added a test for the new parameter This belongs to RageAgainstThePixel#66
Mylan719 · Oct 27, 2024 · f1ed50c · f1ed50c
1 parent 41570cb
commit f1ed50c
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 2 deletions.
diff --git a/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs b/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
@@ -43,5 +43,25 @@ public async Task Test_02_StreamTextToSpeech()
             Assert.NotNull(voiceClip);
             Console.WriteLine(voiceClip.Id);
         }
+
+        [Test]
+        public async Task Test_TurboV2_5_LanguageEnforced_TextToSpeech()
+        {
+            Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+            var voice = Voices.Voice.Adam;
+            Assert.NotNull(voice);
+            var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(
+                "Příliš žluťoučký kůň úpěl ďábelské ódy",
+                voice, 
+                defaultVoiceSettings,
+                Models.Model.TurboV2_5,
+                OutputFormat.MP3_44100_192,
+                null,
+                "cs");
+
+            Assert.NotNull(voiceClip);
+            Console.WriteLine(voiceClip.Id);
+        }
     }
 }
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
@@ -43,6 +43,10 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
         /// <param name="model">
         /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
         /// </param>
+        /// <param name="languageCode">
+        /// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement. 
+        /// For other models, an error will be returned if language code is provided.
+        /// </param>
         /// <param name="outputFormat">
         /// Output format of the generated audio.<br/>
         /// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
@@ -64,10 +68,10 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
         /// </param>
         /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
         /// <returns><see cref="VoiceClip"/>.</returns>
-        public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, Func<VoiceClip, Task> partialClipCallback = null, CancellationToken cancellationToken = default)
+        public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, string languageCode =  null, Func<VoiceClip, Task> partialClipCallback = null, CancellationToken cancellationToken = default)
         {
             var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
-            return await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, defaultVoiceSettings, outputFormat, optimizeStreamingLatency, model), partialClipCallback, cancellationToken).ConfigureAwait(false);
+            return await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, defaultVoiceSettings, outputFormat, optimizeStreamingLatency, model, languageCode), partialClipCallback, cancellationToken).ConfigureAwait(false);
         }
 
         /// <summary>

diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
@@ -31,6 +31,10 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
         /// <param name="model">
         /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
         /// </param>
+        /// <param name="languageCode">
+        /// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement. 
+        /// For other models, an error will be returned if language code is provided.
+        /// </param>
         /// <param name="outputFormat">
         /// Output format of the generated audio.<br/>
         /// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
@@ -57,6 +61,7 @@ public TextToSpeechRequest(
             OutputFormat outputFormat = OutputFormat.MP3_44100_128,
             int? optimizeStreamingLatency = null,
             Model model = null,
+            string languageCode = null,
             string previousText = null)
         {
             if (string.IsNullOrWhiteSpace(text))
@@ -80,13 +85,19 @@ public TextToSpeechRequest(
                 text = Encoding.UTF8.GetString(encoding.GetBytes(text));
             }
 
+            if(!string.IsNullOrEmpty(languageCode) && model != Models.Model.TurboV2_5)
+            {
+                throw new ArgumentException($"Currently only Turbo v2.5 model supports language enforcement.", nameof(languageCode));
+            }
+
             Text = text;
             Model = model ?? Models.Model.MultiLingualV2;
             Voice = voice;
             VoiceSettings = voiceSettings ?? voice.Settings ?? throw new ArgumentNullException(nameof(voiceSettings));
             PreviousText = previousText;
             OutputFormat = outputFormat;
             OptimizeStreamingLatency = optimizeStreamingLatency;
+            LanguageCode = languageCode;
         }
 
         [JsonPropertyName("text")]
@@ -95,6 +106,9 @@ public TextToSpeechRequest(
         [JsonPropertyName("model_id")]
         public string Model { get; }
 
+        [JsonPropertyName("language_code")]
+        public string LanguageCode { get; }
+
         [JsonIgnore]
         public Voice Voice { get; }