Skip to content

Commit baad602

Browse files
committed
Add text-to-speech
1 parent 626eb33 commit baad602

13 files changed

+752
-380
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
using System.IO;
2+
using System.Threading.Tasks;
3+
using OpenAI_API.Models;
4+
5+
namespace OpenAI_API.Audio
6+
{
7+
/// <summary>
8+
/// The Endpoint for the Text to Speech API. This allows you to generate audio from text. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech"/>
9+
/// </summary>
10+
public interface ITextToSpeechEndpoint
11+
{
12+
/// <summary>
13+
/// This allows you to set default parameters for every request, for example to set a default voice or model. For every request, if you do not have a parameter set on the request but do have it set here as a default, the request will automatically pick up the default value.
14+
/// </summary>
15+
TextToSpeechRequest DefaultTTSRequestArgs { get; set; }
16+
17+
/// <summary>
18+
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
19+
/// </summary>
20+
/// <param name="request">The text to speech request to submit to the API</param>
21+
/// <returns>A stream of the audio file in the requested format.</returns>
22+
Task<Stream> GetSpeechAsStreamAsync(TextToSpeechRequest request);
23+
24+
/// <summary>
25+
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
26+
/// </summary>
27+
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
28+
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
29+
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
30+
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
31+
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
32+
/// <returns>A stream of the audio file in the requested format.</returns>
33+
Task<Stream> GetSpeechAsStreamAsync(string input, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null);
34+
35+
/// <summary>
36+
/// Calls the API to create speech from text, and saves the audio file to disk.
37+
/// </summary>
38+
/// <param name="request">The text to speech request to submit to the API</param>
39+
/// <param name="localPath">The local path to save the audio file to.</param>
40+
/// <returns>A <see cref="FileInfo"/> representing the saved speech file.</returns>
41+
Task<FileInfo> SaveSpeechToFileAsync(TextToSpeechRequest request, string localPath);
42+
43+
/// <summary>
44+
/// Calls the API to create speech from text, and saves the audio file to disk.
45+
/// </summary>
46+
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
47+
/// <param name="localPath">The local path to save the audio file to.</param>
48+
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
49+
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
50+
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
51+
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
52+
/// <returns>A stream of the audio file in the requested format.</returns>
53+
Task<FileInfo> SaveSpeechToFileAsync(string input, string localPath, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null);
54+
55+
56+
}
57+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Net.Http;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
using OpenAI_API.Chat;
8+
using OpenAI_API.Models;
9+
using static System.Net.WebRequestMethods;
10+
11+
namespace OpenAI_API.Audio
12+
{
13+
/// <summary>
14+
/// The Endpoint for the Text to Speech API. This allows you to generate audio from text. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech"/>
15+
/// </summary>
16+
public class TextToSpeechEndpoint : EndpointBase, ITextToSpeechEndpoint
17+
{
18+
/// <inheritdoc/>
19+
protected override string Endpoint => "audio/speech";
20+
21+
/// <summary>
22+
/// This allows you to set default parameters for every request, for example to set a default voice or model. For every request, if you do not have a parameter set on the request but do have it set here as a default, the request will automatically pick up the default value.
23+
/// </summary>
24+
public TextToSpeechRequest DefaultTTSRequestArgs { get; set; } = new TextToSpeechRequest();
25+
26+
/// <summary>
27+
/// Constructor of the api endpoint. Rather than instantiating this yourself, access it through an instance of <see cref="OpenAIAPI"/> as <see cref="OpenAIAPI.Completions"/>.
28+
/// </summary>
29+
/// <param name="api">Pass in the instance of the api</param>
30+
internal TextToSpeechEndpoint(OpenAIAPI api) : base(api) { }
31+
32+
/// <summary>
33+
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
34+
/// </summary>
35+
/// <param name="request">The text to speech request to submit to the API</param>
36+
/// <returns>A stream of the audio file in the requested format.</returns>
37+
public async Task<Stream> GetSpeechAsStreamAsync(TextToSpeechRequest request)
38+
{
39+
return await HttpRequest(verb: HttpMethod.Post, postData: request);
40+
}
41+
42+
/// <summary>
43+
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
44+
/// </summary>
45+
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
46+
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
47+
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
48+
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
49+
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
50+
/// <returns>A stream of the audio file in the requested format.</returns>
51+
public async Task<Stream> GetSpeechAsStreamAsync(string input, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null)
52+
{
53+
var request = new TextToSpeechRequest()
54+
{
55+
Input = input,
56+
Voice = voice ?? DefaultTTSRequestArgs.Voice,
57+
Speed = speed ?? DefaultTTSRequestArgs.Speed,
58+
Model = model ?? DefaultTTSRequestArgs.Model,
59+
ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat
60+
};
61+
return await HttpRequest(verb: HttpMethod.Post, postData: request);
62+
}
63+
64+
/// <summary>
65+
/// Calls the API to create speech from text, and saves the audio file to disk.
66+
/// </summary>
67+
/// <param name="request">The text to speech request to submit to the API</param>
68+
/// <param name="localPath">The local path to save the audio file to.</param>
69+
/// <returns>A <see cref="FileInfo"/> representing the saved speech file.</returns>
70+
public async Task<FileInfo> SaveSpeechToFileAsync(TextToSpeechRequest request, string localPath)
71+
{
72+
using (var stream = await GetSpeechAsStreamAsync(request))
73+
using (var outputFileStream = new FileStream(localPath, FileMode.Create))
74+
{
75+
await stream.CopyToAsync(outputFileStream);
76+
}
77+
return new FileInfo(localPath);
78+
}
79+
80+
/// <summary>
81+
/// Calls the API to create speech from text, and saves the audio file to disk.
82+
/// </summary>
83+
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
84+
/// <param name="localPath">The local path to save the audio file to.</param>
85+
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
86+
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
87+
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
88+
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
89+
/// <returns>A stream of the audio file in the requested format.</returns>
90+
public async Task<FileInfo> SaveSpeechToFileAsync(string input, string localPath, string voice = null, decimal? speed = null, string responseFormat = null, Model model = null)
91+
{
92+
var request = new TextToSpeechRequest()
93+
{
94+
Input = input,
95+
Voice = voice ?? DefaultTTSRequestArgs.Voice,
96+
Speed = speed ?? DefaultTTSRequestArgs.Speed,
97+
Model = model ?? DefaultTTSRequestArgs.Model,
98+
ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat
99+
};
100+
return await SaveSpeechToFileAsync(request, localPath);
101+
}
102+
103+
104+
105+
}
106+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
using Newtonsoft.Json;
5+
6+
namespace OpenAI_API.Audio
7+
{
8+
public class TextToSpeechRequest
9+
{
10+
/// <summary>
11+
/// The model to use for this request
12+
/// </summary>
13+
[JsonProperty("model")]
14+
public string Model { get; set; } = OpenAI_API.Models.Model.DefaultTTSModel;
15+
16+
/// <summary>
17+
/// The text to generate audio for. The maximum length is 4096 characters.
18+
/// </summary>
19+
[JsonProperty("input")]
20+
public string Input { get; set; }
21+
22+
/// <summary>
23+
/// The voice to use when generating the audio. Supported voices can be found in <see cref="Voices"/>.
24+
/// </summary>
25+
[JsonProperty("voice")]
26+
public string Voice { get; set; } = Voices.Alloy;
27+
28+
/// <summary>
29+
/// The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/>
30+
/// </summary>
31+
[JsonProperty("response_format", DefaultValueHandling=DefaultValueHandling.Ignore)]
32+
public string ResponseFormat { get; set; } = null;
33+
34+
/// <summary>
35+
/// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
36+
/// </summary>
37+
[JsonProperty("speed", DefaultValueHandling = DefaultValueHandling.Ignore)]
38+
public decimal? Speed { get; set; } = null;
39+
40+
/// <summary>
41+
/// Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/voice-options"/>.
42+
/// </summary>
43+
public static class Voices
44+
{
45+
#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
46+
public const string Alloy = "alloy";
47+
public const string Echo = "echo";
48+
public const string Fable = "fable";
49+
public const string Onyx = "onyx";
50+
public const string Nova = "nova";
51+
public const string Shimmer = "shimmer";
52+
#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member
53+
}
54+
55+
/// <summary>
56+
/// The format to return for the generated audio. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/>
57+
/// </summary>
58+
public static class ResponseFormats
59+
{
60+
/// <summary>
61+
/// The default, industry-standard audio format
62+
/// </summary>
63+
public const string MP3 = "mp3";
64+
/// <summary>
65+
/// For lossless audio compression, favored by audio enthusiasts for archiving
66+
/// </summary>
67+
public const string FLAC = "flac";
68+
/// <summary>
69+
/// For digital audio compression, preferred by YouTube, Android, iOS
70+
/// </summary>
71+
public const string AAC = "aac";
72+
/// <summary>
73+
/// For internet streaming and communication, low latency.
74+
/// </summary>
75+
public const string OPUS = "opus";
76+
}
77+
}
78+
}

OpenAI_API/Chat/ChatRequest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public class ChatRequest
2020
/// The model to use for this request
2121
/// </summary>
2222
[JsonProperty("model")]
23-
public string Model { get; set; } = OpenAI_API.Models.Model.ChatGPTTurbo;
23+
public string Model { get; set; } = OpenAI_API.Models.Model.DefaultChatModel;
2424

2525
/// <summary>
2626
/// The messages to send with this Chat Request

0 commit comments

Comments
 (0)