Skip to content

Commit

Permalink
Feature Dubbing (#50)
Browse files Browse the repository at this point in the history
- dubbing access; upload, asynchronous partial download, & transcriptions
- dubbing unit test on the sample audio file. confirmed for videos too, you just might need to enable the watermark depending on your price tier
  • Loading branch information
StephenHodgson authored Sep 2, 2024
2 parents d737b88 + 4e00a49 commit 56da036
Show file tree
Hide file tree
Showing 11 changed files with 643 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,4 @@ ASALocalRun/
# BeatPulse healthcheck temp database
healthchecksdb
.vscode
*.dubbed.*
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
using System.Threading.Tasks;

namespace ElevenLabs.Proxy
{
Expand Down
108 changes: 108 additions & 0 deletions ElevenLabs-DotNet-Tests/TestFixture_08_DubbingEndpoint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.Dubbing;
using NUnit.Framework;
using System;
using System.IO;
using System.Threading.Tasks;

namespace ElevenLabs.Tests
{
internal class TestFixture_08_DubbingEndpoint : AbstractTestFixture
{
[Test]
public async Task Test_01_Dubbing_File()
{
Assert.NotNull(ElevenLabsClient.DubbingEndpoint);
var filePath = Path.GetFullPath("../../../Assets/test_sample_01.ogg");
var request = new DubbingRequest(filePath, "es", "en", 1);
var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress<DubbingProjectMetadata>(metadata =>
{
switch (metadata.Status)
{
case "dubbing":
Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
break;
case "dubbed":
Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
break;
default:
Console.WriteLine($"Status: {metadata.Status}");
break;
}
}));
Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId));
Assert.IsTrue(response.ExpectedDurationSeconds > 0);

var srcFile = new FileInfo(filePath);
var dubbedPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}{srcFile.Extension}");
{
await using var fs = File.Open(dubbedPath.FullName, FileMode.Create);
await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage))
{
await fs.WriteAsync(chunk);
}
}
Assert.IsTrue(dubbedPath.Exists);
Assert.IsTrue(dubbedPath.Length > 0);

var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
{
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
}
Assert.IsTrue(transcriptPath.Exists);
Assert.IsTrue(transcriptPath.Length > 0);

await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId);
}

[Test]
public async Task Test_02_Dubbing_Url()
{
Assert.NotNull(ElevenLabsClient.DubbingEndpoint);

var uri = new Uri("https://youtu.be/Zo5-rhYOlNk");
var request = new DubbingRequest(uri, "ja", "en", 1, true);
var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress<DubbingProjectMetadata>(metadata =>
{
switch (metadata.Status)
{
case "dubbing":
Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
break;
case "dubbed":
Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
break;
default:
Console.WriteLine($"Status: {metadata.Status}");
break;
}
}));
Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId));
Assert.IsTrue(response.ExpectedDurationSeconds > 0);

var assetsDir = Path.GetFullPath("../../../Assets");
var dubbedPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.mp4"));
{
await using var fs = File.Open(dubbedPath.FullName, FileMode.Create);
await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage))
{
await fs.WriteAsync(chunk);
}
}
Assert.IsTrue(dubbedPath.Exists);
Assert.IsTrue(dubbedPath.Length > 0);

var transcriptPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt"));
{
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
}
Assert.IsTrue(transcriptPath.Exists);
Assert.IsTrue(transcriptPath.Length > 0);

await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId);
}
}
}
222 changes: 222 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.Extensions;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;

namespace ElevenLabs.Dubbing
{
/// <summary>
/// Access to dubbing an audio or video file into a given language.
/// </summary>
public sealed class DubbingEndpoint(ElevenLabsClient client) : ElevenLabsBaseEndPoint(client)
{
private const string DubbingId = "dubbing_id";
private const string ExpectedDurationSecs = "expected_duration_sec";

protected override string Root => "dubbing";

/// <summary>
/// Dubs provided audio or video file into given language.
/// </summary>
/// <param name="request">The <see cref="DubbingRequest"/> containing dubbing configuration and files.</param>
/// <param name="progress"></param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <param name="maxRetries"></param>
/// <param name="pollingInterval"></param>
/// <returns> <see cref="DubbingProjectMetadata"/>.</returns>
public async Task<DubbingProjectMetadata> DubAsync(DubbingRequest request, int? maxRetries = null, TimeSpan? pollingInterval = null, IProgress<DubbingProjectMetadata> progress = null, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
using var payload = new MultipartFormDataContent();

try
{
foreach (var (fileName, mediaType, stream) in request.Files)
{
await payload.AppendFileToFormAsync("file", stream, fileName, new(mediaType), cancellationToken);
}

if (!string.IsNullOrEmpty(request.ProjectName))
{
payload.Add(new StringContent(request.ProjectName), "name");
}

if (request.SourceUrl != null)
{
payload.Add(new StringContent(request.SourceUrl.ToString()), "source_url");
}

if (!string.IsNullOrEmpty(request.SourceLanguage))
{
payload.Add(new StringContent(request.SourceLanguage), "source_lang");
}

if (!string.IsNullOrEmpty(request.TargetLanguage))
{
payload.Add(new StringContent(request.TargetLanguage), "target_lang");
}

if (request.NumberOfSpeakers.HasValue)
{
payload.Add(new StringContent(request.NumberOfSpeakers.Value.ToString(CultureInfo.InvariantCulture)), "num_speakers");
}

if (request.Watermark.HasValue)
{
payload.Add(new StringContent(request.Watermark.Value.ToString()), "watermark");
}

if (request.StartTime.HasValue)
{
payload.Add(new StringContent(request.StartTime.Value.ToString(CultureInfo.InvariantCulture)), "start_time");
}

if (request.EndTime.HasValue)
{
payload.Add(new StringContent(request.EndTime.Value.ToString(CultureInfo.InvariantCulture)), "end_time");
}

if (request.HighestResolution.HasValue)
{
payload.Add(new StringContent(request.HighestResolution.Value.ToString()), "highest_resolution");
}
}
finally
{
request.Dispose();
}

using var response = await client.Client.PostAsync(GetUrl(), payload, cancellationToken).ConfigureAwait(false);
var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
var dubResponse = JsonSerializer.Deserialize<DubbingResponse>(responseBody);
var metadata = await WaitForDubbingCompletionAsync(dubResponse, maxRetries ?? 60, pollingInterval ?? TimeSpan.FromSeconds(dubResponse.ExpectedDurationSeconds), pollingInterval == null, progress, cancellationToken);
return metadata;
}

private async Task<DubbingProjectMetadata> WaitForDubbingCompletionAsync(DubbingResponse dubbingResponse, int maxRetries, TimeSpan pollingInterval, bool adjustInterval, IProgress<DubbingProjectMetadata> progress = null, CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();

for (var i = 1; i < maxRetries + 1; i++)
{
var metadata = await GetDubbingProjectMetadataAsync(dubbingResponse, cancellationToken).ConfigureAwait(false);
metadata.ExpectedDurationSeconds = dubbingResponse.ExpectedDurationSeconds;

if (metadata.Status.Equals("dubbed", StringComparison.Ordinal))
{
stopwatch.Stop();
metadata.TimeCompleted = stopwatch.Elapsed;
progress?.Report(metadata);
return metadata;
}

progress?.Report(metadata);

if (metadata.Status.Equals("dubbing", StringComparison.Ordinal))
{
if (EnableDebug)
{
Console.WriteLine($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
}

if (adjustInterval)
{
pollingInterval = TimeSpan.FromSeconds(dubbingResponse.ExpectedDurationSeconds / Math.Pow(2, i));
}

await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
}
else
{
throw new Exception($"Dubbing for {dubbingResponse.DubbingId} failed: {metadata.Error}");
}
}

throw new TimeoutException($"Dubbing for {dubbingResponse.DubbingId} timed out or exceeded expected duration.");
}

/// <summary>
/// Returns metadata about a dubbing project, including whether it’s still in progress or not.
/// </summary>
/// <param name="dubbingId"></param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="DubbingProjectMetadata"/>.</returns>
public async Task<DubbingProjectMetadata> GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default)
{
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false);
var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
return JsonSerializer.Deserialize<DubbingProjectMetadata>(responseBody);
}

/// <summary>
/// Returns transcript for the dub in the specified format (SRT or WebVTT).
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="languageCode">The language code of the transcript.</param>
/// <param name="formatType">Optional. The format type of the transcript file, either 'srt' or 'webvtt'.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>
/// A task representing the asynchronous operation. The task completes with the transcript content
/// as a string in the specified format.
/// </returns>
/// <remarks>
/// If <paramref name="formatType"/> is not specified, the method retrieves the transcript in its default format.
/// </remarks>
public async Task<string> GetTranscriptForDubAsync(string dubbingId, string languageCode, DubbingFormat formatType = DubbingFormat.Srt, CancellationToken cancellationToken = default)
{
var @params = new Dictionary<string, string> { { "format_type", formatType.ToString().ToLower() } };
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/transcript/{languageCode}", @params), cancellationToken).ConfigureAwait(false);
return await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
}

/// <summary>
/// Returns dubbed file as a streamed file.
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="languageCode">The language code of the dubbed content.</param>
/// <param name="bufferSize">The size of the buffer used to read data from the response stream. Default is 8192 bytes.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>
/// An asynchronous enumerable of byte arrays representing the dubbed file content. Each byte array
/// contains a chunk of the dubbed file data.
/// </returns>
/// <remarks>
/// This method streams the dubbed file content in chunks to optimize memory usage and improve performance.
/// Adjust the <paramref name="bufferSize"/> parameter based on your specific requirements to achieve optimal performance.
/// </remarks>
public async IAsyncEnumerable<byte[]> GetDubbedFileAsync(string dubbingId, string languageCode, int bufferSize = 8192, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/audio/{languageCode}"), HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var buffer = new byte[bufferSize];
int bytesRead;

while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
{
var chunk = new byte[bytesRead];
Array.Copy(buffer, chunk, bytesRead);
yield return chunk;
}
}

/// <summary>
/// Deletes a dubbing project.
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
public async Task DeleteDubbingProjectAsync(string dubbingId, CancellationToken cancellationToken = default)
{
using var response = await client.Client.DeleteAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false);
await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
}
}
}
14 changes: 14 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingFormat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Runtime.Serialization;

namespace ElevenLabs.Dubbing
{
public enum DubbingFormat
{
[EnumMember(Value = "srt")]
Srt,
[EnumMember(Value = "webvtt")]
WebVtt
}
}
37 changes: 37 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;

namespace ElevenLabs.Dubbing
{
public sealed class DubbingProjectMetadata
{
[JsonInclude]
[JsonPropertyName("dubbing_id")]
public string DubbingId { get; private set; }

[JsonInclude]
[JsonPropertyName("name")]
public string Name { get; private set; }

[JsonInclude]
[JsonPropertyName("status")]
public string Status { get; private set; }

[JsonInclude]
[JsonPropertyName("target_languages")]
public List<string> TargetLanguages { get; private set; }

[JsonInclude]
[JsonPropertyName("error")]
public string Error { get; private set; }

[JsonIgnore]
public float ExpectedDurationSeconds { get; internal set; }

[JsonIgnore]
public TimeSpan TimeCompleted { get; internal set; }
}
}
Loading

0 comments on commit 56da036

Please sign in to comment.