|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.IO; |
| 4 | +using System.Net.Http; |
| 5 | +using System.Text; |
| 6 | +using System.Threading.Tasks; |
| 7 | +using OpenAI_API.Chat; |
| 8 | +using OpenAI_API.Models; |
| 9 | +using static System.Net.WebRequestMethods; |
| 10 | + |
| 11 | +namespace OpenAI_API.Audio |
| 12 | +{ |
| 13 | + /// <summary> |
| 14 | + /// The Endpoint for the Text to Speech API. This allows you to generate audio from text. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech"/> |
| 15 | + /// </summary> |
| 16 | + public class TextToSpeechEndpoint : EndpointBase, ITextToSpeechEndpoint |
| 17 | + { |
| 18 | + /// <inheritdoc/> |
| 19 | + protected override string Endpoint => "audio/speech"; |
| 20 | + |
| 21 | + /// <summary> |
| 22 | + /// This allows you to set default parameters for every request, for example to set a default voice or model. For every request, if you do not have a parameter set on the request but do have it set here as a default, the request will automatically pick up the default value. |
| 23 | + /// </summary> |
| 24 | + public TextToSpeechRequest DefaultTTSRequestArgs { get; set; } = new TextToSpeechRequest(); |
| 25 | + |
| 26 | + /// <summary> |
| 27 | + /// Constructor of the api endpoint. Rather than instantiating this yourself, access it through an instance of <see cref="OpenAIAPI"/> as <see cref="OpenAIAPI.Completions"/>. |
| 28 | + /// </summary> |
| 29 | + /// <param name="api">Pass in the instance of the api</param> |
| 30 | + internal TextToSpeechEndpoint(OpenAIAPI api) : base(api) { } |
| 31 | + |
| 32 | + /// <summary> |
| 33 | + /// Calls the API to create speech from text, and returns the raw stream of the audio file. |
| 34 | + /// </summary> |
| 35 | + /// <param name="request">The text to speech request to submit to the API</param> |
| 36 | + /// <returns>A stream of the audio file in the requested format.</returns> |
| 37 | + public async Task<Stream> GetSpeechAsStreamAsync(TextToSpeechRequest request) |
| 38 | + { |
| 39 | + return await HttpRequest(verb: HttpMethod.Post, postData: request); |
| 40 | + } |
| 41 | + |
| 42 | + /// <summary> |
| 43 | + /// Calls the API to create speech from text, and returns the raw stream of the audio file. |
| 44 | + /// </summary> |
| 45 | + /// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param> |
| 46 | + /// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param> |
| 47 | + /// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param> |
| 48 | + /// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param> |
| 49 | + /// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param> |
| 50 | + /// <returns>A stream of the audio file in the requested format.</returns> |
| 51 | + public async Task<Stream> GetSpeechAsStreamAsync(string input, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null) |
| 52 | + { |
| 53 | + var request = new TextToSpeechRequest() |
| 54 | + { |
| 55 | + Input = input, |
| 56 | + Voice = voice ?? DefaultTTSRequestArgs.Voice, |
| 57 | + Speed = speed ?? DefaultTTSRequestArgs.Speed, |
| 58 | + Model = model ?? DefaultTTSRequestArgs.Model, |
| 59 | + ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat |
| 60 | + }; |
| 61 | + return await HttpRequest(verb: HttpMethod.Post, postData: request); |
| 62 | + } |
| 63 | + |
| 64 | + /// <summary> |
| 65 | + /// Calls the API to create speech from text, and saves the audio file to disk. |
| 66 | + /// </summary> |
| 67 | + /// <param name="request">The text to speech request to submit to the API</param> |
| 68 | + /// <param name="localPath">The local path to save the audio file to.</param> |
| 69 | + /// <returns>A <see cref="FileInfo"/> representing the saved speech file.</returns> |
| 70 | + public async Task<FileInfo> SaveSpeechToFileAsync(TextToSpeechRequest request, string localPath) |
| 71 | + { |
| 72 | + using (var stream = await GetSpeechAsStreamAsync(request)) |
| 73 | + using (var outputFileStream = new FileStream(localPath, FileMode.Create)) |
| 74 | + { |
| 75 | + await stream.CopyToAsync(outputFileStream); |
| 76 | + } |
| 77 | + return new FileInfo(localPath); |
| 78 | + } |
| 79 | + |
| 80 | + /// <summary> |
| 81 | + /// Calls the API to create speech from text, and saves the audio file to disk. |
| 82 | + /// </summary> |
| 83 | + /// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param> |
| 84 | + /// <param name="localPath">The local path to save the audio file to.</param> |
| 85 | + /// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param> |
| 86 | + /// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param> |
| 87 | + /// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param> |
| 88 | + /// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param> |
| 89 | + /// <returns>A stream of the audio file in the requested format.</returns> |
| 90 | + public async Task<FileInfo> SaveSpeechToFileAsync(string input, string localPath, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null) |
| 91 | + { |
| 92 | + var request = new TextToSpeechRequest() |
| 93 | + { |
| 94 | + Input = input, |
| 95 | + Voice = voice ?? DefaultTTSRequestArgs.Voice, |
| 96 | + Speed = speed ?? DefaultTTSRequestArgs.Speed, |
| 97 | + Model = model ?? DefaultTTSRequestArgs.Model, |
| 98 | + ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat |
| 99 | + }; |
| 100 | + return await SaveSpeechToFileAsync(request, localPath); |
| 101 | + } |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | + } |
| 106 | +} |
0 commit comments