Skip to content

Commit

Permalink
Implemented language code parameter for text to speach.
Browse files Browse the repository at this point in the history
- Added parameter to text-to-speach endpoint and request
- Added a check
- Added a test for the new parameter
This belongs to RageAgainstThePixel#66
  • Loading branch information
Milan Mikuš committed Oct 27, 2024
1 parent 41570cb commit f1ed50c
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 2 deletions.
20 changes: 20 additions & 0 deletions ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,25 @@ public async Task Test_02_StreamTextToSpeech()
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}

[Test]
public async Task Test_TurboV2_5_LanguageEnforced_TextToSpeech()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(
"Příliš žluťoučký kůň úpěl ďábelské ódy",
voice,
defaultVoiceSettings,
Models.Model.TurboV2_5,
OutputFormat.MP3_44100_192,
null,
"cs");

Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}
}
}
8 changes: 6 additions & 2 deletions ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
/// <param name="model">
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
/// </param>
/// <param name="languageCode">
/// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement.
/// For other models, an error will be returned if language code is provided.
/// </param>
/// <param name="outputFormat">
/// Output format of the generated audio.<br/>
/// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
Expand All @@ -64,10 +68,10 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
/// </param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="VoiceClip"/>.</returns>
public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, Func<VoiceClip, Task> partialClipCallback = null, CancellationToken cancellationToken = default)
public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, string languageCode = null, Func<VoiceClip, Task> partialClipCallback = null, CancellationToken cancellationToken = default)
{
var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
return await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, defaultVoiceSettings, outputFormat, optimizeStreamingLatency, model), partialClipCallback, cancellationToken).ConfigureAwait(false);
return await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, defaultVoiceSettings, outputFormat, optimizeStreamingLatency, model, languageCode), partialClipCallback, cancellationToken).ConfigureAwait(false);
}

/// <summary>
Expand Down
14 changes: 14 additions & 0 deletions ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
/// <param name="model">
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
/// </param>
/// <param name="languageCode">
/// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement.
/// For other models, an error will be returned if language code is provided.
/// </param>
/// <param name="outputFormat">
/// Output format of the generated audio.<br/>
/// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
Expand All @@ -57,6 +61,7 @@ public TextToSpeechRequest(
OutputFormat outputFormat = OutputFormat.MP3_44100_128,
int? optimizeStreamingLatency = null,
Model model = null,
string languageCode = null,
string previousText = null)
{
if (string.IsNullOrWhiteSpace(text))
Expand All @@ -80,13 +85,19 @@ public TextToSpeechRequest(
text = Encoding.UTF8.GetString(encoding.GetBytes(text));
}

if(!string.IsNullOrEmpty(languageCode) && model != Models.Model.TurboV2_5)
{
throw new ArgumentException($"Currently only Turbo v2.5 model supports language enforcement.", nameof(languageCode));
}

Text = text;
Model = model ?? Models.Model.MultiLingualV2;
Voice = voice;
VoiceSettings = voiceSettings ?? voice.Settings ?? throw new ArgumentNullException(nameof(voiceSettings));
PreviousText = previousText;
OutputFormat = outputFormat;
OptimizeStreamingLatency = optimizeStreamingLatency;
LanguageCode = languageCode;
}

[JsonPropertyName("text")]
Expand All @@ -95,6 +106,9 @@ public TextToSpeechRequest(
[JsonPropertyName("model_id")]
public string Model { get; }

[JsonPropertyName("language_code")]
public string LanguageCode { get; }

[JsonIgnore]
public Voice Voice { get; }

Expand Down

0 comments on commit f1ed50c

Please sign in to comment.