Skip to content

Commit b8c9020

Browse files
committed
Speech to text
1 parent 1892e46 commit b8c9020

6 files changed

Lines changed: 127 additions & 17 deletions

File tree

Siv3D/include/Siv3D/OpenAI/Speech.hpp

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,68 @@ namespace s3d
1919
{
2020
namespace Speech
2121
{
22+
namespace Model
23+
{
24+
/// @brief 音声合成モデル tts-1 | Speech Synthesis Model tts-1
25+
/// @see https://platform.openai.com/docs/models/tts
26+
inline constexpr StringView TTS1{ U"tts-1" };
27+
28+
/// @brief 音声合成モデル tts-1-hd | Speech Synthesis Model tts-1-hd
29+
/// @see https://platform.openai.com/docs/models/tts
30+
/// @remark tts-1 よりも高品質な音声を生成します。 | Generates higher quality audio than tts-1.
31+
inline constexpr StringView TTS1HD{ U"tts-1-hd" };
32+
}
33+
34+
namespace Voice
35+
{
36+
inline constexpr StringView Alloy{ U"alloy" };
37+
38+
inline constexpr StringView Echo{ U"echo" };
39+
40+
inline constexpr StringView Fable{ U"fable" };
41+
42+
inline constexpr StringView Onyx{ U"onyx" };
43+
44+
inline constexpr StringView Nova{ U"nova" };
45+
46+
inline constexpr StringView Shimmer{ U"shimmer" };
47+
}
48+
49+
namespace ResponseFormat
50+
{
51+
inline constexpr StringView MP3{ U"mp3" };
52+
53+
inline constexpr StringView Opus{ U"opus" };
54+
55+
inline constexpr StringView AAC{ U"aac" };
56+
57+
inline constexpr StringView FLAC{ U"flac" };
58+
}
59+
60+
inline constexpr size_t MaxInputLength = 4096;
61+
62+
inline constexpr double MinSpeed = 0.25;
63+
64+
inline constexpr double DefaultSpeed = 1.0;
65+
66+
inline constexpr double MaxSpeed = 4.0;
67+
2268
struct Request
2369
{
24-
String model = U"tts-1";
70+
String model{ OpenAI::Speech::Model::TTS1 };
2571

2672
String input;
2773

28-
String voice = U"alloy";
74+
String voice{ OpenAI::Speech::Voice::Alloy };
2975

30-
String responseFormat = U"mp3";
76+
String responseFormat{ OpenAI::Speech::ResponseFormat::MP3 };
3177

32-
double speed = 1.0;
78+
double speed = OpenAI::Speech::DefaultSpeed;
3379
};
3480

35-
bool Create(const Request& request, FilePathView path);
81+
bool Create(StringView apiKey, const Request& request, FilePathView path);
82+
83+
AsyncTask<bool> CreateAsync(StringView apiKey, const Request& request, FilePathView path);
3684
}
3785
}
3886
}

Siv3D/src/Siv3D/OpenAI/OpenAICommon.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# pragma once
1313
# include <Siv3D/String.hpp>
14+
# include <Siv3D/Duration.hpp>
1415
# include <Siv3D/HashTable.hpp>
1516

1617
namespace s3d
@@ -20,5 +21,8 @@ namespace s3d
2021
// HTTP ヘッダーを作成する
2122
[[nodiscard]]
2223
HashTable<String, String> MakeHeaders(StringView apiKey);
24+
25+
// 非同期タスクのポーリング間隔
26+
constexpr Milliseconds TaskPollingInterval{ 5 };
2327
}
2428
}

Siv3D/src/Siv3D/OpenAI/SivOpenAIChat.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace s3d
2121
namespace detail
2222
{
2323
// OpenAI のチャット API の URL
24-
constexpr URLView ChatV1URL = U"https://api.openai.com/v1/chat/completions";
24+
constexpr URLView ChatCompletionsEndpoint = U"https://api.openai.com/v1/chat/completions";
2525

2626
// チャット API に送信するリクエストを作成する
2727
[[nodiscard]]
@@ -77,7 +77,7 @@ namespace s3d
7777

7878
MemoryWriter memoryWriter;
7979

80-
if (const auto response = SimpleHTTP::Post(detail::ChatV1URL, headers, data.data(), data.size(), memoryWriter))
80+
if (const auto response = SimpleHTTP::Post(detail::ChatCompletionsEndpoint, headers, data.data(), data.size(), memoryWriter))
8181
{
8282
if (const HTTPStatusCode statusCode = response.getStatusCode();
8383
statusCode == HTTPStatusCode::OK)
@@ -120,7 +120,7 @@ namespace s3d
120120

121121
const auto headers = detail::MakeHeaders(apiKey);
122122

123-
return SimpleHTTP::PostAsync(detail::ChatV1URL, headers, data.data(), data.size());
123+
return SimpleHTTP::PostAsync(detail::ChatCompletionsEndpoint, headers, data.data(), data.size());
124124
}
125125

126126
String GetContent(const JSON& response)

Siv3D/src/Siv3D/OpenAI/SivOpenAIEmbedding.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace s3d
2222
namespace detail
2323
{
2424
// OpenAI の Embeddings API の URL
25-
constexpr URLView EmbeddingsV1URL = U"https://api.openai.com/v1/embeddings";
25+
constexpr URLView EmbeddingsEndpoint = U"https://api.openai.com/v1/embeddings";
2626

2727
// Embeddings API に送信するリクエストを作成する
2828
[[nodiscard]]
@@ -113,7 +113,7 @@ namespace s3d
113113

114114
MemoryWriter memoryWriter;
115115

116-
if (const auto response = SimpleHTTP::Post(detail::EmbeddingsV1URL, headers, data.data(), data.size(), memoryWriter))
116+
if (const auto response = SimpleHTTP::Post(detail::EmbeddingsEndpoint, headers, data.data(), data.size(), memoryWriter))
117117
{
118118
if (const HTTPStatusCode statusCode = response.getStatusCode();
119119
statusCode == HTTPStatusCode::OK)
@@ -151,7 +151,7 @@ namespace s3d
151151

152152
const auto headers = detail::MakeHeaders(apiKey);
153153

154-
return SimpleHTTP::PostAsync(detail::EmbeddingsV1URL, headers, data.data(), data.size());
154+
return SimpleHTTP::PostAsync(detail::EmbeddingsEndpoint, headers, data.data(), data.size());
155155
}
156156

157157
Array<float> GetVector(const JSON& response)

Siv3D/src/Siv3D/OpenAI/SivOpenAIImage.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
# include <Siv3D/OpenAI.hpp>
1313
# include <Siv3D/JSON.hpp>
14-
# include <Siv3D/Duration.hpp>
1514
# include <Siv3D/System.hpp>
1615
# include <Siv3D/SimpleHTTP.hpp>
1716
# include <Siv3D/MemoryViewReader.hpp>
@@ -22,10 +21,7 @@ namespace s3d
2221
namespace detail
2322
{
2423
// OpenAI の画像生成 API の URL
25-
constexpr URLView ImageGenerationV1URL = U"https://api.openai.com/v1/images/generations";
26-
27-
// 非同期タスクのポーリング間隔
28-
constexpr Milliseconds TaskPollingInterval{ 5 };
24+
constexpr URLView ImageGenerationsEndpoint = U"https://api.openai.com/v1/images/generations";
2925

3026
// 画像生成 API に送信するリクエストを作成する
3127
[[nodiscard]]
@@ -59,7 +55,7 @@ namespace s3d
5955
const auto headers = detail::MakeHeaders(apiKey);
6056

6157
// 画像生成 API に非同期でリクエストを送信する
62-
AsyncHTTPTask task1 = SimpleHTTP::PostAsync(detail::ImageGenerationV1URL, headers, data.data(), data.size());
58+
AsyncHTTPTask task1 = SimpleHTTP::PostAsync(detail::ImageGenerationsEndpoint, headers, data.data(), data.size());
6359

6460
// タスクが完了するまでポーリングする
6561
while (not task1.isReady())

Siv3D/src/Siv3D/OpenAI/SivOpenAISpeech.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,70 @@
1010
//-----------------------------------------------
1111

1212
# include <Siv3D/OpenAI.hpp>
13+
# include <Siv3D/JSON.hpp>
14+
# include <Siv3D/System.hpp>
15+
# include "OpenAICommon.hpp"
1316

1417
namespace s3d
1518
{
19+
namespace detail
20+
{
21+
constexpr URLView TextToSpeechEndpoint = U"https://api.openai.com/v1/audio/speech";
1622

23+
[[nodiscard]]
24+
static std::string MakeSpeechRequest(const OpenAI::Speech::Request& request)
25+
{
26+
JSON json;
27+
json[U"model"] = request.model;
28+
json[U"input"] = request.input;
29+
json[U"voice"] = request.voice;
30+
json[U"response_format"] = request.responseFormat;
31+
json[U"speed"] = request.speed;
32+
return json.formatUTF8();
33+
}
34+
35+
static bool CreateSpeechImpl(const String apiKey, const OpenAI::Speech::Request request, const FilePath path)
36+
{
37+
// API キーが空の文字列である場合は失敗
38+
if (apiKey.isEmpty())
39+
{
40+
return false;
41+
}
42+
43+
const auto headers = MakeHeaders(apiKey);
44+
45+
const std::string data = MakeSpeechRequest(request);
46+
47+
AsyncHTTPTask task = SimpleHTTP::PostAsync(TextToSpeechEndpoint, headers, data.data(), data.size(), path);
48+
49+
// タスクが完了するまでポーリングする
50+
while (not task.isReady())
51+
{
52+
System::Sleep(TaskPollingInterval);
53+
}
54+
55+
if (not task.getResponse().isOK())
56+
{
57+
return false;
58+
}
59+
60+
return true;
61+
}
62+
}
63+
64+
namespace OpenAI
65+
{
66+
namespace Speech
67+
{
68+
bool Create(const StringView apiKey, const Request& request, const FilePathView path)
69+
{
70+
return detail::CreateSpeechImpl(String{ apiKey }, request, FilePath{ path });
71+
}
72+
73+
AsyncTask<bool> CreateAsync(const StringView apiKey, const Request& request, const FilePathView path)
74+
{
75+
return Async(detail::CreateSpeechImpl, String{ apiKey }, request, FilePath{ path });
76+
}
77+
}
78+
}
1779
}

0 commit comments

Comments
 (0)