diff --git a/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.mp4 b/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.mp4 new file mode 100644 index 0000000..3c1be58 Binary files /dev/null and b/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.mp4 differ diff --git a/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.srt b/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.srt new file mode 100644 index 0000000..47549b6 --- /dev/null +++ b/ElevenLabs-DotNet-Tests/Assets/online.dubbed.ja.srt @@ -0,0 +1,27 @@ +1 +00:00:00,000 --> 00:00:04,086 +イレブンラボは象徴的なボイスコレクションを紹介します。 + +2 +00:00:04,051 --> 00:00:10,553 +有名なAIボイスの中から、あの有名なJudy +Garlandを含む、私たちの独占的なセレクションから選んでください。 + +3 +00:00:10,560 --> 00:00:19,291 + +お気に入りの物語、出版物、アップロードを最も本物に近い方法で読めるようにします。オズの魔法使い含む。 + +4 +00:00:19,781 --> 00:00:25,797 + +その瞬間、ドロシーはテーブルの上に東の魔女のものだった銀の靴が置かれているのを見ました。だから今日、Eleven + +5 +00:00:25,797 --> 00:00:28,697 +Labs Readerアプリをダウンロードしましょう。 + +6 +00:00:28,683 --> 00:00:32,166 +お気に入りの声で、お気に入りの物語を。 + diff --git a/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.ogg b/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.ogg new file mode 100644 index 0000000..c2a02f8 Binary files /dev/null and b/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.ogg differ diff --git a/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.srt b/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.srt new file mode 100644 index 0000000..3ebfbcf --- /dev/null +++ b/ElevenLabs-DotNet-Tests/Assets/test_sample_01.ogg.dubbed.es.srt @@ -0,0 +1,56 @@ +1 +00:00:00,000 --> 00:00:01,625 +Lorem Ipsum. + +2 +00:00:01,628 --> 00:00:05,664 +¿Qué es lorem ipsum preguntas? Bueno, +lorem ipsum es simplemente texto de + +3 +00:00:05,664 --> 00:00:09,476 +relleno de la industria de la impresión y +la composición tipográfica. + +4 +00:00:09,481 --> 00:00:14,510 +Lorem ipsum ha sido el texto de relleno +estándar de la industria desde el siglo + +5 +00:00:14,510 --> 00:00:19,280 +XVI cuando un impresor desconocido tomó +un conjunto de tipos y lo revuelto. + +6 +00:00:19,250 --> 00:00:21,387 +para hacer un libro de muestras de tipos. + +7 +00:00:21,357 --> 00:00:26,466 +Ha sobrevivido no solo cinco siglos, sino +también lo menos. + +8 +00:00:26,465 --> 00:00:29,112 +salto a la composición económica. + +9 +00:00:29,083 --> 00:00:34,692 +permaneciendo esencialmente sin cambios. +Se popularizó en la década de 1960 con el + +10 +00:00:34,692 --> 00:00:36,699 +lanzamiento de hojas LORISET. + +11 +00:00:36,681 --> 00:00:42,267 +contiene pasajes de lorem ipsum, y más +recientemente, el software de autoedición + +12 +00:00:42,267 --> 00:00:46,155 +de Algis Pagemaker, incluyendo versiones +de lorem ipsum. + diff --git a/ElevenLabs-DotNet-Tests/Test_Fixture_07_DubbingEndpoint.cs b/ElevenLabs-DotNet-Tests/Test_Fixture_07_DubbingEndpoint.cs new file mode 100644 index 0000000..5e36c7a --- /dev/null +++ b/ElevenLabs-DotNet-Tests/Test_Fixture_07_DubbingEndpoint.cs @@ -0,0 +1,97 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. +namespace ElevenLabs.Tests; + +using ElevenLabs.Dubbing; +using NUnit.Framework; +using System; +using System.IO; +using System.Threading.Tasks; + +internal class Test_Fixture_07_DubbingEndpoint : AbstractTestFixture +{ + [Test] + public async Task Test_01_Dubbing_File() + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + + (string FilePath, string MediaType) audio = (Path.GetFullPath("../../../Assets/test_sample_01.ogg"), "audio/mpeg"); + DubbingRequest request = new() + { + File = audio, + SourceLanguage = "en", + TargetLanguage = "es", + NumSpeakers = 1, + Watermark = false, + }; + + DubbingResponse response = await ElevenLabsClient.DubbingEndpoint.StartDubbingAsync(request); + Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId)); + Assert.IsTrue(response.ExpectedDurationSeconds > 0); + Console.WriteLine($"Expected Duration: {response.ExpectedDurationSeconds:0.00} seconds"); + + Assert.IsTrue(await ElevenLabsClient.DubbingEndpoint.WaitForDubbingCompletionAsync(response.DubbingId, progress: new Progress(msg => Console.WriteLine(msg)))); + + FileInfo srcFile = new(audio.FilePath); + FileInfo dubbedPath = new($"{srcFile.FullName}.dubbed.{request.TargetLanguage}{srcFile.Extension}"); + { + await using FileStream fs = File.Open(dubbedPath.FullName, FileMode.Create); + await foreach (byte[] chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage)) + { + await fs.WriteAsync(chunk); + } + } + Assert.IsTrue(dubbedPath.Exists); + Assert.IsTrue(dubbedPath.Length > 0); + + FileInfo transcriptPath = new($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt"); + { + string transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage, "srt"); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + } + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + } + + [Test] + public async Task Test_02_Dubbing_Url() + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + + Uri uri = new("https://youtu.be/Zo5-rhYOlNk"); + DubbingRequest request = new() + { + SourceUrl = uri.AbsoluteUri, + SourceLanguage = "en", + TargetLanguage = "ja", + NumSpeakers = 1, + Watermark = true, + }; + + DubbingResponse response = await ElevenLabsClient.DubbingEndpoint.StartDubbingAsync(request); + Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId)); + Assert.IsTrue(response.ExpectedDurationSeconds > 0); + Console.WriteLine($"Expected Duration: {response.ExpectedDurationSeconds:0.00} seconds"); + + Assert.IsTrue(await ElevenLabsClient.DubbingEndpoint.WaitForDubbingCompletionAsync(response.DubbingId, progress: new Progress(msg => Console.WriteLine(msg)))); + + string assetsDir = Path.GetFullPath("../../../Assets"); + FileInfo dubbedPath = new(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.mp4")); + { + await using FileStream fs = File.Open(dubbedPath.FullName, FileMode.Create); + await foreach (byte[] chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage)) + { + await fs.WriteAsync(chunk); + } + } + Assert.IsTrue(dubbedPath.Exists); + Assert.IsTrue(dubbedPath.Length > 0); + + FileInfo transcriptPath = new(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt")); + { + string transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage, "srt"); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + } + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs b/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs new file mode 100644 index 0000000..b7b8a32 --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs @@ -0,0 +1,260 @@ +namespace ElevenLabs.Dubbing; + +using ElevenLabs.Extensions; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Runtime.CompilerServices; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +/// +/// Access to dubbing an audio or video file into a given language. +/// +public sealed class DubbingEndpoint(ElevenLabsClient client) : ElevenLabsBaseEndPoint(client) +{ + private const string DubbingId = "dubbing_id"; + private const string ExpectedDurationSecs = "expected_duration_sec"; + + /// + /// Gets or sets the maximum number of retry attempts to wait for the dubbing completion status. + /// + public int DefaultMaxRetries { get; set; } = 30; + + /// + /// Gets or sets the timeout interval for waiting between dubbing status checks. + /// + public TimeSpan DefaultTimeoutInterval { get; set; } = TimeSpan.FromSeconds(10); + + protected override string Root => "dubbing"; + + /// + /// Initiates a dubbing operation asynchronously based on the provided . + /// + /// The containing dubbing configuration and files. + /// A to cancel the operation. + /// + /// A task representing the asynchronous dubbing operation. The task completes with the dubbing ID and expected duration + /// in seconds if the operation succeeds. + /// + /// Thrown when is . + public async Task StartDubbingAsync(DubbingRequest request, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(request); + + using MultipartFormDataContent content = []; + + if (!string.IsNullOrEmpty(request.Mode)) + { + content.Add(new StringContent(request.Mode), "mode"); + } + + if (request.File.HasValue) + { + AppendFileToForm(content, "file", new(request.File.Value.FilePath), MediaTypeHeaderValue.Parse(request.File.Value.MediaType)); + } + + if (!string.IsNullOrEmpty(request.CsvFilePath)) + { + AppendFileToForm(content, "csv_file", new(request.CsvFilePath), new("text/csv")); + } + + if (!string.IsNullOrEmpty(request.ForegroundAudioFilePath)) + { + AppendFileToForm(content, "foreground_audio_file", new(request.ForegroundAudioFilePath), new("audio/mpeg")); + } + + if (!string.IsNullOrEmpty(request.BackgroundAudioFilePath)) + { + AppendFileToForm(content, "background_audio_file", new(request.BackgroundAudioFilePath), new("audio/mpeg")); + } + + if (!string.IsNullOrEmpty(request.Name)) + { + content.Add(new StringContent(request.Name), "name"); + } + + if (!string.IsNullOrEmpty(request.SourceUrl)) + { + content.Add(new StringContent(request.SourceUrl), "source_url"); + } + + if (!string.IsNullOrEmpty(request.SourceLanguage)) + { + content.Add(new StringContent(request.SourceLanguage), "source_lang"); + } + + if (!string.IsNullOrEmpty(request.TargetLanguage)) + { + content.Add(new StringContent(request.TargetLanguage), "target_lang"); + } + + if (request.NumSpeakers.HasValue) + { + content.Add(new StringContent(request.NumSpeakers.Value.ToString(CultureInfo.InvariantCulture)), "num_speakers"); + } + + if (request.Watermark.HasValue) + { + content.Add(new StringContent(request.Watermark.Value.ToString()), "watermark"); + } + + if (request.StartTime.HasValue) + { + content.Add(new StringContent(request.StartTime.Value.ToString(CultureInfo.InvariantCulture)), "start_time"); + } + + if (request.EndTime.HasValue) + { + content.Add(new StringContent(request.EndTime.Value.ToString(CultureInfo.InvariantCulture)), "end_time"); + } + + if (request.HighestResolution.HasValue) + { + content.Add(new StringContent(request.HighestResolution.Value.ToString()), "highest_resolution"); + } + + if (request.DubbingStudio.HasValue) + { + content.Add(new StringContent(request.DubbingStudio.Value.ToString()), "dubbing_studio"); + } + + using HttpResponseMessage response = await client.Client.PostAsync(GetUrl(), content, cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false); + + using Stream responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false); + return await JsonSerializer.DeserializeAsync(responseStream, cancellationToken: cancellationToken).ConfigureAwait(false); + } + + private static void AppendFileToForm(MultipartFormDataContent content, string name, FileInfo fileInfo, MediaTypeHeaderValue mediaType) + { + if (!fileInfo.Exists) + { + throw new FileNotFoundException($"File not found: {fileInfo.FullName}"); + } + + FileStream fileStream = fileInfo.OpenRead(); + StreamContent fileContent = new(fileStream); + fileContent.Headers.ContentDisposition = new ContentDispositionHeaderValue("form-data") + { + Name = name, + FileName = fileInfo.Name, + }; + fileContent.Headers.ContentType = mediaType; + content.Add(fileContent); + } + + /// + /// Waits asynchronously for a dubbing operation to complete. This method polls the dubbing status at regular intervals, + /// reporting progress updates if a progress reporter is provided. + /// + /// The ID of the dubbing project. + /// The maximum number of retries for checking the dubbing completion status. If not specified, a default value is used. + /// The time to wait between each status check. If not specified, a default interval is used. + /// An optional implementation to report progress updates, such as status messages and errors. + /// A to cancel the waiting operation. + /// + /// A task that represents the asynchronous wait operation. The task result is if the dubbing completes successfully within the specified number of retries and timeout interval; otherwise, . + /// + /// + /// This method checks the dubbing status by sending requests to the dubbing service at intervals defined by the parameter. + /// If the dubbing status is "dubbed", the method returns . If the dubbing fails or the specified number of is reached without successful completion, the method returns . + /// + public async Task WaitForDubbingCompletionAsync(string dubbingId, int? maxRetries = null, TimeSpan? timeoutInterval = null, IProgress progress = null, CancellationToken cancellationToken = default) + { + maxRetries ??= DefaultMaxRetries; + timeoutInterval ??= DefaultTimeoutInterval; + for (int i = 0; i < maxRetries; i++) + { + DubbingProjectMetadata metadata = await GetDubbingProjectMetadataAsync(dubbingId, cancellationToken).ConfigureAwait(false); + if (metadata.Status.Equals("dubbed", StringComparison.Ordinal)) + { + return true; + } + else if (metadata.Status.Equals("dubbing", StringComparison.Ordinal)) + { + progress?.Report($"Dubbing for {dubbingId} in progress... Will check status again in {timeoutInterval.Value.TotalSeconds} seconds."); + await Task.Delay(timeoutInterval.Value, cancellationToken).ConfigureAwait(false); + } + else + { + progress?.Report($"Dubbing for {dubbingId} failed: {metadata.Error}"); + return false; + } + } + progress?.Report($"Dubbing for {dubbingId} timed out or exceeded expected duration."); + return false; + } + + private async Task GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default) + { + string url = $"{GetUrl()}/{dubbingId}"; + HttpResponseMessage response = await client.Client.GetAsync(url, cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false); + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + return JsonSerializer.Deserialize(responseBody) + ?? throw new JsonException("Could not deserialize the dubbing project metadata!"); + } + + /// + /// Retrieves the dubbed file asynchronously as a sequence of byte arrays. + /// + /// The ID of the dubbing project. + /// The language code of the dubbed content. + /// The size of the buffer used to read data from the response stream. Default is 8192 bytes. + /// A to cancel the operation. + /// + /// An asynchronous enumerable of byte arrays representing the dubbed file content. Each byte array + /// contains a chunk of the dubbed file data. + /// + /// + /// This method streams the dubbed file content in chunks to optimize memory usage and improve performance. + /// Adjust the parameter based on your specific requirements to achieve optimal performance. + /// + public async IAsyncEnumerable GetDubbedFileAsync(string dubbingId, string languageCode, int bufferSize = 8192, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + string url = $"{GetUrl()}/{dubbingId}/audio/{languageCode}"; + using HttpResponseMessage response = await client.Client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false); + + using Stream responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false); + byte[] buffer = new byte[bufferSize]; + int bytesRead; + while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0) + { + byte[] chunk = new byte[bytesRead]; + Array.Copy(buffer, chunk, bytesRead); + yield return chunk; + } + } + + /// + /// Retrieves the transcript for the dub asynchronously in the specified format (SRT or WebVTT). + /// + /// The ID of the dubbing project. + /// The language code of the transcript. + /// Optional. The format type of the transcript file, either 'srt' or 'webvtt'. + /// A to cancel the operation. + /// + /// A task representing the asynchronous operation. The task completes with the transcript content + /// as a string in the specified format. + /// + /// + /// If is not specified, the method retrieves the transcript in its default format. + /// + public async Task GetTranscriptForDubAsync(string dubbingId, string languageCode, string formatType = null, CancellationToken cancellationToken = default) + { + string url = $"{GetUrl()}/{dubbingId}/transcript/{languageCode}"; + if (!string.IsNullOrEmpty(formatType)) + { + url += $"?format_type={formatType}"; + } + using HttpResponseMessage response = await client.Client.GetAsync(url, cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false); + return await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs b/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs new file mode 100644 index 0000000..ba70bba --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs @@ -0,0 +1,22 @@ +namespace ElevenLabs.Dubbing; + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +public sealed class DubbingProjectMetadata +{ + [JsonPropertyName("dubbing_id")] + public string DubbingId { get; set; } + + [JsonPropertyName("name")] + public string Name { get; set; } + + [JsonPropertyName("status")] + public string Status { get; set; } + + [JsonPropertyName("target_languages")] + public List TargetLanguages { get; set; } + + [JsonPropertyName("error")] + public string Error { get; set; } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs b/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs new file mode 100644 index 0000000..1573acc --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs @@ -0,0 +1,85 @@ +namespace ElevenLabs.Dubbing; + +public sealed class DubbingRequest +{ + /// + /// automatic or manual. Manual mode is only supported when creating a dubbing studio project + /// + public string Mode { get; init; } = "automatic"; + + /// + /// A video (MediaType: "video/mp4") or audio (MediaType: "audio/mpeg") file intended for voice cloning + /// + public (string FilePath, string MediaType)? File { get; init; } + + /// + /// CSV file containing transcription/translation metadata + /// + public string CsvFilePath { get; init; } + + /// + /// For use only with csv input + /// + public string ForegroundAudioFilePath { get; init; } + + /// + /// For use only with csv input + /// + public string BackgroundAudioFilePath { get; init; } + + /// + /// Name of the dubbing project. + /// + public string Name { get; init; } + + /// + /// URL of the source video/audio file. + /// + public string SourceUrl { get; init; } + + /// + /// Source language. + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string SourceLanguage { get; init; } + + /// + /// The Target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string TargetLanguage { get; init; } + + /// + /// Number of speakers to use for the dubbing. Set to 0 to automatically detect the number of speakers + /// + public int? NumSpeakers { get; init; } + + /// + /// Whether to apply watermark to the output video. + /// + public bool? Watermark { get; init; } + + /// + /// Start time of the source video/audio file. + /// + public int? StartTime { get; init; } + + /// + /// End time of the source video/audio file. + /// + public int? EndTime { get; init; } + + /// + /// Whether to use the highest resolution available. + /// + public bool? HighestResolution { get; init; } + + /// + /// Whether to prepare dub for edits in dubbing studio. + /// + public bool? DubbingStudio { get; init; } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs b/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs new file mode 100644 index 0000000..b392460 --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs @@ -0,0 +1,12 @@ +namespace ElevenLabs.Dubbing; + +using System.Text.Json.Serialization; + +public sealed class DubbingResponse +{ + [JsonPropertyName("dubbing_id")] + public string DubbingId { get; set; } + + [JsonPropertyName("expected_duration_sec")] + public float ExpectedDurationSeconds { get; set; } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs index a064398..e8170cc 100644 --- a/ElevenLabs-DotNet/ElevenLabsClient.cs +++ b/ElevenLabs-DotNet/ElevenLabsClient.cs @@ -1,5 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using ElevenLabs.Dubbing; using ElevenLabs.History; using ElevenLabs.Models; using ElevenLabs.TextToSpeech; @@ -66,6 +67,7 @@ public ElevenLabsClient(ElevenLabsAuthentication elevenLabsAuthentication = null HistoryEndpoint = new HistoryEndpoint(this); TextToSpeechEndpoint = new TextToSpeechEndpoint(this); VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this); + DubbingEndpoint = new DubbingEndpoint(this); } ~ElevenLabsClient() @@ -136,5 +138,7 @@ private void Dispose(bool disposing) public TextToSpeechEndpoint TextToSpeechEndpoint { get; } public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; } + + public DubbingEndpoint DubbingEndpoint { get; } } }