diff --git a/.gitignore b/.gitignore index 2e31c64..b1d6357 100644 --- a/.gitignore +++ b/.gitignore @@ -339,3 +339,4 @@ ASALocalRun/ # BeatPulse healthcheck temp database healthchecksdb .vscode +*.dubbed.* diff --git a/ElevenLabs-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs b/ElevenLabs-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs index 824f918..8e423c0 100644 --- a/ElevenLabs-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs +++ b/ElevenLabs-DotNet-Proxy/Proxy/AbstractAuthenticationFilter.cs @@ -1,7 +1,7 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using System.Threading.Tasks; using Microsoft.AspNetCore.Http; +using System.Threading.Tasks; namespace ElevenLabs.Proxy { diff --git a/ElevenLabs-DotNet-Tests/TestFixture_08_DubbingEndpoint.cs b/ElevenLabs-DotNet-Tests/TestFixture_08_DubbingEndpoint.cs new file mode 100644 index 0000000..421bd44 --- /dev/null +++ b/ElevenLabs-DotNet-Tests/TestFixture_08_DubbingEndpoint.cs @@ -0,0 +1,108 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Dubbing; +using NUnit.Framework; +using System; +using System.IO; +using System.Threading.Tasks; + +namespace ElevenLabs.Tests +{ + internal class TestFixture_08_DubbingEndpoint : AbstractTestFixture + { + [Test] + public async Task Test_01_Dubbing_File() + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + var filePath = Path.GetFullPath("../../../Assets/test_sample_01.ogg"); + var request = new DubbingRequest(filePath, "es", "en", 1); + var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => + { + switch (metadata.Status) + { + case "dubbing": + Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Console.WriteLine($"Status: {metadata.Status}"); + break; + } + })); + Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId)); + Assert.IsTrue(response.ExpectedDurationSeconds > 0); + + var srcFile = new FileInfo(filePath); + var dubbedPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}{srcFile.Extension}"); + { + await using var fs = File.Open(dubbedPath.FullName, FileMode.Create); + await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage)) + { + await fs.WriteAsync(chunk); + } + } + Assert.IsTrue(dubbedPath.Exists); + Assert.IsTrue(dubbedPath.Length > 0); + + var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt"); + { + var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + } + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + + await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId); + } + + [Test] + public async Task Test_02_Dubbing_Url() + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + + var uri = new Uri("https://youtu.be/Zo5-rhYOlNk"); + var request = new DubbingRequest(uri, "ja", "en", 1, true); + var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => + { + switch (metadata.Status) + { + case "dubbing": + Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Console.WriteLine($"Status: {metadata.Status}"); + break; + } + })); + Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId)); + Assert.IsTrue(response.ExpectedDurationSeconds > 0); + + var assetsDir = Path.GetFullPath("../../../Assets"); + var dubbedPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.mp4")); + { + await using var fs = File.Open(dubbedPath.FullName, FileMode.Create); + await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage)) + { + await fs.WriteAsync(chunk); + } + } + Assert.IsTrue(dubbedPath.Exists); + Assert.IsTrue(dubbedPath.Length > 0); + + var transcriptPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt")); + { + var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + } + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + + await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId); + } + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs b/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs new file mode 100644 index 0000000..c9d0e98 --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs @@ -0,0 +1,222 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Extensions; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.Net.Http; +using System.Runtime.CompilerServices; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace ElevenLabs.Dubbing +{ + /// + /// Access to dubbing an audio or video file into a given language. + /// + public sealed class DubbingEndpoint(ElevenLabsClient client) : ElevenLabsBaseEndPoint(client) + { + private const string DubbingId = "dubbing_id"; + private const string ExpectedDurationSecs = "expected_duration_sec"; + + protected override string Root => "dubbing"; + + /// + /// Dubs provided audio or video file into given language. + /// + /// The containing dubbing configuration and files. + /// + /// Optional, . + /// + /// + /// . + public async Task DubAsync(DubbingRequest request, int? maxRetries = null, TimeSpan? pollingInterval = null, IProgress progress = null, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(request); + using var payload = new MultipartFormDataContent(); + + try + { + foreach (var (fileName, mediaType, stream) in request.Files) + { + await payload.AppendFileToFormAsync("file", stream, fileName, new(mediaType), cancellationToken); + } + + if (!string.IsNullOrEmpty(request.ProjectName)) + { + payload.Add(new StringContent(request.ProjectName), "name"); + } + + if (request.SourceUrl != null) + { + payload.Add(new StringContent(request.SourceUrl.ToString()), "source_url"); + } + + if (!string.IsNullOrEmpty(request.SourceLanguage)) + { + payload.Add(new StringContent(request.SourceLanguage), "source_lang"); + } + + if (!string.IsNullOrEmpty(request.TargetLanguage)) + { + payload.Add(new StringContent(request.TargetLanguage), "target_lang"); + } + + if (request.NumberOfSpeakers.HasValue) + { + payload.Add(new StringContent(request.NumberOfSpeakers.Value.ToString(CultureInfo.InvariantCulture)), "num_speakers"); + } + + if (request.Watermark.HasValue) + { + payload.Add(new StringContent(request.Watermark.Value.ToString()), "watermark"); + } + + if (request.StartTime.HasValue) + { + payload.Add(new StringContent(request.StartTime.Value.ToString(CultureInfo.InvariantCulture)), "start_time"); + } + + if (request.EndTime.HasValue) + { + payload.Add(new StringContent(request.EndTime.Value.ToString(CultureInfo.InvariantCulture)), "end_time"); + } + + if (request.HighestResolution.HasValue) + { + payload.Add(new StringContent(request.HighestResolution.Value.ToString()), "highest_resolution"); + } + } + finally + { + request.Dispose(); + } + + using var response = await client.Client.PostAsync(GetUrl(), payload, cancellationToken).ConfigureAwait(false); + var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); + var dubResponse = JsonSerializer.Deserialize(responseBody); + var metadata = await WaitForDubbingCompletionAsync(dubResponse, maxRetries ?? 60, pollingInterval ?? TimeSpan.FromSeconds(dubResponse.ExpectedDurationSeconds), pollingInterval == null, progress, cancellationToken); + return metadata; + } + + private async Task WaitForDubbingCompletionAsync(DubbingResponse dubbingResponse, int maxRetries, TimeSpan pollingInterval, bool adjustInterval, IProgress progress = null, CancellationToken cancellationToken = default) + { + var stopwatch = Stopwatch.StartNew(); + + for (var i = 1; i < maxRetries + 1; i++) + { + var metadata = await GetDubbingProjectMetadataAsync(dubbingResponse, cancellationToken).ConfigureAwait(false); + metadata.ExpectedDurationSeconds = dubbingResponse.ExpectedDurationSeconds; + + if (metadata.Status.Equals("dubbed", StringComparison.Ordinal)) + { + stopwatch.Stop(); + metadata.TimeCompleted = stopwatch.Elapsed; + progress?.Report(metadata); + return metadata; + } + + progress?.Report(metadata); + + if (metadata.Status.Equals("dubbing", StringComparison.Ordinal)) + { + if (EnableDebug) + { + Console.WriteLine($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds."); + } + + if (adjustInterval) + { + pollingInterval = TimeSpan.FromSeconds(dubbingResponse.ExpectedDurationSeconds / Math.Pow(2, i)); + } + + await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false); + } + else + { + throw new Exception($"Dubbing for {dubbingResponse.DubbingId} failed: {metadata.Error}"); + } + } + + throw new TimeoutException($"Dubbing for {dubbingResponse.DubbingId} timed out or exceeded expected duration."); + } + + /// + /// Returns metadata about a dubbing project, including whether it’s still in progress or not. + /// + /// + /// Optional, . + /// . + public async Task GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default) + { + using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false); + var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); + return JsonSerializer.Deserialize(responseBody); + } + + /// + /// Returns transcript for the dub in the specified format (SRT or WebVTT). + /// + /// The ID of the dubbing project. + /// The language code of the transcript. + /// Optional. The format type of the transcript file, either 'srt' or 'webvtt'. + /// Optional, . + /// + /// A task representing the asynchronous operation. The task completes with the transcript content + /// as a string in the specified format. + /// + /// + /// If is not specified, the method retrieves the transcript in its default format. + /// + public async Task GetTranscriptForDubAsync(string dubbingId, string languageCode, DubbingFormat formatType = DubbingFormat.Srt, CancellationToken cancellationToken = default) + { + var @params = new Dictionary { { "format_type", formatType.ToString().ToLower() } }; + using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/transcript/{languageCode}", @params), cancellationToken).ConfigureAwait(false); + return await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); + } + + /// + /// Returns dubbed file as a streamed file. + /// + /// The ID of the dubbing project. + /// The language code of the dubbed content. + /// The size of the buffer used to read data from the response stream. Default is 8192 bytes. + /// Optional, . + /// + /// An asynchronous enumerable of byte arrays representing the dubbed file content. Each byte array + /// contains a chunk of the dubbed file data. + /// + /// + /// This method streams the dubbed file content in chunks to optimize memory usage and improve performance. + /// Adjust the parameter based on your specific requirements to achieve optimal performance. + /// + public async IAsyncEnumerable GetDubbedFileAsync(string dubbingId, string languageCode, int bufferSize = 8192, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/audio/{languageCode}"), HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false); + await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false); + var buffer = new byte[bufferSize]; + int bytesRead; + + while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0) + { + var chunk = new byte[bytesRead]; + Array.Copy(buffer, chunk, bytesRead); + yield return chunk; + } + } + + /// + /// Deletes a dubbing project. + /// + /// The ID of the dubbing project. + /// Optional, . + public async Task DeleteDubbingProjectAsync(string dubbingId, CancellationToken cancellationToken = default) + { + using var response = await client.Client.DeleteAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false); + await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false); + } + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingFormat.cs b/ElevenLabs-DotNet/Dubbing/DubbingFormat.cs new file mode 100644 index 0000000..a4a749c --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingFormat.cs @@ -0,0 +1,14 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace ElevenLabs.Dubbing +{ + public enum DubbingFormat + { + [EnumMember(Value = "srt")] + Srt, + [EnumMember(Value = "webvtt")] + WebVtt + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs b/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs new file mode 100644 index 0000000..556bc77 --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs @@ -0,0 +1,37 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace ElevenLabs.Dubbing +{ + public sealed class DubbingProjectMetadata + { + [JsonInclude] + [JsonPropertyName("dubbing_id")] + public string DubbingId { get; private set; } + + [JsonInclude] + [JsonPropertyName("name")] + public string Name { get; private set; } + + [JsonInclude] + [JsonPropertyName("status")] + public string Status { get; private set; } + + [JsonInclude] + [JsonPropertyName("target_languages")] + public List TargetLanguages { get; private set; } + + [JsonInclude] + [JsonPropertyName("error")] + public string Error { get; private set; } + + [JsonIgnore] + public float ExpectedDurationSeconds { get; internal set; } + + [JsonIgnore] + public TimeSpan TimeCompleted { get; internal set; } + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs b/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs new file mode 100644 index 0000000..f00afa5 --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingRequest.cs @@ -0,0 +1,218 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; + +namespace ElevenLabs.Dubbing +{ + public sealed class DubbingRequest : IDisposable + { + public DubbingRequest( + string filePath, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this([filePath], targetLanguage, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest( + IEnumerable filePaths, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest( + Uri sourceUrl, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + private DubbingRequest( + string targetLanguage, + Uri sourceUrl = null, + IEnumerable filePaths = null, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + { + ArgumentException.ThrowIfNullOrWhiteSpace(targetLanguage); + TargetLanguage = targetLanguage; + + if (filePaths == null && sourceUrl == null) + { + throw new ArgumentException("Either sourceUrl or filePaths must be provided."); + } + + var files = new List<(string, string, Stream)>(); + + if (filePaths != null) + { + foreach (var filePath in filePaths) + { + if (string.IsNullOrWhiteSpace(filePath)) + { + throw new ArgumentException("File path cannot be empty."); + } + + var fileInfo = new FileInfo(filePath); + + if (!fileInfo.Exists) + { + throw new FileNotFoundException($"File not found: {filePath}"); + } + + var stream = fileInfo.OpenRead(); + var extension = fileInfo.Extension.ToLowerInvariant(); + var mediaType = extension switch + { + ".3gp" => "video/3gpp", + ".acc" => "audio/aac", + ".avi" => "video/x-msvideo", + ".flac" => "audio/flac", + ".ogg" => "audio/ogg", + ".mov" => "video/quicktime", + ".mp3" => "audio/mp3", + ".mp4" => "video/mp4", + ".raw" => "audio/raw", + ".wav" => "audio/wav", + ".webm" => "video/webm", + _ => "application/octet-stream" + }; + files.Add((fileInfo.Name, mediaType, stream)); + } + } + + Files = files; + SourceUrl = sourceUrl; + SourceLanguage = sourceLanguage; + NumberOfSpeakers = numberOfSpeakers; + Watermark = watermark; + StartTime = startTime; + EndTime = endTime; + HighestResolution = highestResolution; + DropBackgroundAudio = dropBackgroundAudio; + ProjectName = projectName; + } + + ~DubbingRequest() => Dispose(false); + + /// + /// Files to dub. + /// + public IReadOnlyList<(string, string, Stream)> Files { get; } + + /// + /// URL of the source video/audio file. + /// + public Uri SourceUrl { get; } + + /// + /// Source language. + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string SourceLanguage { get; } + + /// + /// The Target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string TargetLanguage { get; } + + /// + /// Number of speakers to use for the dubbing. Set to 0 to automatically detect the number of speakers + /// + public int? NumberOfSpeakers { get; } + + /// + /// Whether to apply watermark to the output video. + /// + public bool? Watermark { get; } + + /// + /// Start time of the source video/audio file. + /// + public int? StartTime { get; } + + /// + /// End time of the source video/audio file. + /// + public int? EndTime { get; } + + /// + /// Whether to use the highest resolution available. + /// + public bool? HighestResolution { get; } + + /// + /// An advanced setting. Whether to drop background audio from the final dub. + /// This can improve dub quality where it's known that audio shouldn't have a background track such as for speeches or monologues. + /// + public bool? DropBackgroundAudio { get; } + + /// + /// Name of the dubbing project. + /// + public string ProjectName { get; } + + private void Dispose(bool disposing) + { + if (disposing) + { + if (Files == null) { return; } + foreach (var (_, _, stream) in Files) + { + try + { + stream?.Close(); + stream?.Dispose(); + } + catch (Exception e) + { + Console.WriteLine(e); + } + } + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } +} diff --git a/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs b/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs new file mode 100644 index 0000000..0b18f5d --- /dev/null +++ b/ElevenLabs-DotNet/Dubbing/DubbingResponse.cs @@ -0,0 +1,19 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Text.Json.Serialization; + +namespace ElevenLabs.Dubbing +{ + public sealed class DubbingResponse + { + [JsonInclude] + [JsonPropertyName("dubbing_id")] + public string DubbingId { get; private set; } + + [JsonInclude] + [JsonPropertyName("expected_duration_sec")] + public float ExpectedDurationSeconds { get; private set; } + + public static implicit operator string(DubbingResponse response) => response?.DubbingId; + } +} \ No newline at end of file diff --git a/ElevenLabs-DotNet/ElevenLabsClient.cs b/ElevenLabs-DotNet/ElevenLabsClient.cs index 09113bc..8efcedc 100644 --- a/ElevenLabs-DotNet/ElevenLabsClient.cs +++ b/ElevenLabs-DotNet/ElevenLabsClient.cs @@ -1,5 +1,6 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using ElevenLabs.Dubbing; using ElevenLabs.History; using ElevenLabs.Models; using ElevenLabs.SoundGeneration; @@ -69,6 +70,7 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa TextToSpeechEndpoint = new TextToSpeechEndpoint(this); VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this); SoundGenerationEndpoint = new SoundGenerationEndpoint(this); + DubbingEndpoint = new DubbingEndpoint(this); } ~ElevenLabsClient() @@ -143,5 +145,7 @@ private void Dispose(bool disposing) public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; } public SoundGenerationEndpoint SoundGenerationEndpoint { get; } + + public DubbingEndpoint DubbingEndpoint { get; } } } diff --git a/ElevenLabs-DotNet/Extensions/HttpResponseMessageExtensions.cs b/ElevenLabs-DotNet/Extensions/HttpResponseMessageExtensions.cs index e66275f..a7eaaa3 100644 --- a/ElevenLabs-DotNet/Extensions/HttpResponseMessageExtensions.cs +++ b/ElevenLabs-DotNet/Extensions/HttpResponseMessageExtensions.cs @@ -5,6 +5,7 @@ using System.IO; using System.Linq; using System.Net.Http; +using System.Net.Http.Headers; using System.Runtime.CompilerServices; using System.Text; using System.Text.Encodings.Web; @@ -174,5 +175,21 @@ internal static async Task ReadAsStringAsync(this HttpResponseMessage re return responseAsString; } + + internal static async Task AppendFileToFormAsync(this MultipartFormDataContent content, string name, Stream stream, string fileName, MediaTypeHeaderValue mediaType = null, CancellationToken cancellationToken = default) + { + using var audioData = new MemoryStream(); + await stream.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false); + var fileContent = new ByteArrayContent(audioData.ToArray()); + const string formData = "form-data"; + fileContent.Headers.ContentDisposition = new ContentDispositionHeaderValue(formData) + { + Name = name, + FileName = fileName + }; + const string contentType = "application/octet-stream"; + fileContent.Headers.ContentType = mediaType ?? new MediaTypeHeaderValue(contentType); + content.Add(fileContent); + } } } diff --git a/ElevenLabs-DotNet/Voices/VoicesEndpoint.cs b/ElevenLabs-DotNet/Voices/VoicesEndpoint.cs index 0246f56..fb0b742 100644 --- a/ElevenLabs-DotNet/Voices/VoicesEndpoint.cs +++ b/ElevenLabs-DotNet/Voices/VoicesEndpoint.cs @@ -5,7 +5,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Net; using System.Net.Http; using System.Net.Http.Headers; using System.Text.Json; @@ -196,16 +195,7 @@ public async Task AddVoiceAsync(VoiceRequest request, CancellationToken c foreach (var (fileName, sample) in request.Samples) { - using var audioData = new MemoryStream(); - await sample.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false); - var content = new ByteArrayContent(audioData.ToArray()); - content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); - content.Headers.ContentDisposition = new ContentDispositionHeaderValue("form-data") - { - Name = "files", - FileName = fileName - }; - payload.Add(content, "files", fileName); + await payload.AppendFileToFormAsync("files", sample, fileName, null, cancellationToken); } if (request.Labels != null) @@ -218,12 +208,7 @@ public async Task AddVoiceAsync(VoiceRequest request, CancellationToken c request.Dispose(); } - using var httpRequest = new HttpRequestMessage(HttpMethod.Post, GetUrl("/add")); - httpRequest.Content = payload; - httpRequest.Version = HttpVersion.Version10; - httpRequest.Headers.ExpectContinue = true; - httpRequest.Headers.ConnectionClose = false; - using var response = await client.Client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + using var response = await client.Client.PostAsync(GetUrl("/add"), payload, cancellationToken).ConfigureAwait(false); var responseAsString = await response.ReadAsStringAsync(EnableDebug, payload, cancellationToken).ConfigureAwait(false); var voiceResponse = JsonSerializer.Deserialize(responseAsString, ElevenLabsClient.JsonSerializationOptions); return await GetVoiceAsync(voiceResponse.VoiceId, cancellationToken: cancellationToken).ConfigureAwait(false);