33package together
44
55import (
6- "context"
7- "net/http"
8- "slices"
9-
106 "github.com/togethercomputer/together-go/internal/apijson"
11- "github.com/togethercomputer/together-go/internal/requestconfig"
127 "github.com/togethercomputer/together-go/option"
13- "github.com/togethercomputer/together-go/packages/param"
148 "github.com/togethercomputer/together-go/packages/respjson"
15- "github.com/togethercomputer/together-go/packages/ssestream"
169)
1710
1811// AudioService contains methods and other services that help with interacting with
@@ -23,6 +16,7 @@ import (
2316// the [NewAudioService] method instead.
2417type AudioService struct {
2518 Options []option.RequestOption
19+ Speech AudioSpeechService
2620 Voices AudioVoiceService
2721 Transcriptions AudioTranscriptionService
2822 Translations AudioTranslationService
@@ -34,34 +28,13 @@ type AudioService struct {
3428func NewAudioService (opts ... option.RequestOption ) (r AudioService ) {
3529 r = AudioService {}
3630 r .Options = opts
31+ r .Speech = NewAudioSpeechService (opts ... )
3732 r .Voices = NewAudioVoiceService (opts ... )
3833 r .Transcriptions = NewAudioTranscriptionService (opts ... )
3934 r .Translations = NewAudioTranslationService (opts ... )
4035 return
4136}
4237
43- // Generate audio from input text
44- func (r * AudioService ) New (ctx context.Context , body AudioNewParams , opts ... option.RequestOption ) (res * http.Response , err error ) {
45- opts = slices .Concat (r .Options , opts )
46- opts = append ([]option.RequestOption {option .WithHeader ("Accept" , "application/octet-stream" )}, opts ... )
47- path := "audio/speech"
48- err = requestconfig .ExecuteNewRequest (ctx , http .MethodPost , path , body , & res , opts ... )
49- return
50- }
51-
52- // Generate audio from input text
53- func (r * AudioService ) NewStreaming (ctx context.Context , body AudioNewParams , opts ... option.RequestOption ) (stream * ssestream.Stream [AudioSpeechStreamChunk ]) {
54- var (
55- raw * http.Response
56- err error
57- )
58- opts = slices .Concat (r .Options , opts )
59- opts = append ([]option.RequestOption {option .WithHeader ("Accept" , "application/octet-stream" ), option .WithJSONSet ("stream" , true )}, opts ... )
60- path := "audio/speech"
61- err = requestconfig .ExecuteNewRequest (ctx , http .MethodPost , path , body , & raw , opts ... )
62- return ssestream .NewStream [AudioSpeechStreamChunk ](ssestream .NewDecoder (raw ), err )
63- }
64-
6538type AudioSpeechStreamChunk struct {
6639 // base64 encoded audio stream
6740 B64 string `json:"b64,required"`
@@ -89,105 +62,3 @@ type AudioSpeechStreamChunkObject string
8962const (
9063 AudioSpeechStreamChunkObjectAudioTtsChunk AudioSpeechStreamChunkObject = "audio.tts.chunk"
9164)
92-
93- type AudioNewParams struct {
94- // Input text to generate the audio for
95- Input string `json:"input,required"`
96- // The name of the model to query.
97- //
98- // [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
99- // The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
100- // canopylabs/orpheus-3b-0.1-ft
101- Model AudioNewParamsModel `json:"model,omitzero,required"`
102- // The voice to use for generating the audio. The voices supported are different
103- // for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
104- // supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
105- // af_alloy and for cartesia/sonic, one of the voices supported is "friendly
106- // sidekick".
107- //
108- // You can view the voices supported for each model using the /v1/voices endpoint
109- // sending the model name as the query parameter.
110- // [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
111- Voice string `json:"voice,required"`
112- // Sampling rate to use for the output audio. The default sampling rate for
113- // canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
114- // cartesia/sonic is 44100.
115- SampleRate param.Opt [float64 ] `json:"sample_rate,omitzero"`
116- // Language of input text.
117- //
118- // Any of "en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru",
119- // "sv", "tr", "zh".
120- Language AudioNewParamsLanguage `json:"language,omitzero"`
121- // Audio encoding of response
122- //
123- // Any of "pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw".
124- ResponseEncoding AudioNewParamsResponseEncoding `json:"response_encoding,omitzero"`
125- // The format of audio output. Supported formats are mp3, wav, raw if streaming is
126- // false. If streaming is true, the only supported format is raw.
127- //
128- // Any of "mp3", "wav", "raw".
129- ResponseFormat AudioNewParamsResponseFormat `json:"response_format,omitzero"`
130- paramObj
131- }
132-
133- func (r AudioNewParams ) MarshalJSON () (data []byte , err error ) {
134- type shadow AudioNewParams
135- return param .MarshalObject (r , (* shadow )(& r ))
136- }
137- func (r * AudioNewParams ) UnmarshalJSON (data []byte ) error {
138- return apijson .UnmarshalRoot (data , r )
139- }
140-
141- // The name of the model to query.
142- //
143- // [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
144- // The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
145- // canopylabs/orpheus-3b-0.1-ft
146- type AudioNewParamsModel string
147-
148- const (
149- AudioNewParamsModelCartesiaSonic AudioNewParamsModel = "cartesia/sonic"
150- AudioNewParamsModelHexgradKokoro82M AudioNewParamsModel = "hexgrad/Kokoro-82M"
151- AudioNewParamsModelCanopylabsOrpheus3b0_1Ft AudioNewParamsModel = "canopylabs/orpheus-3b-0.1-ft"
152- )
153-
154- // Language of input text.
155- type AudioNewParamsLanguage string
156-
157- const (
158- AudioNewParamsLanguageEn AudioNewParamsLanguage = "en"
159- AudioNewParamsLanguageDe AudioNewParamsLanguage = "de"
160- AudioNewParamsLanguageFr AudioNewParamsLanguage = "fr"
161- AudioNewParamsLanguageEs AudioNewParamsLanguage = "es"
162- AudioNewParamsLanguageHi AudioNewParamsLanguage = "hi"
163- AudioNewParamsLanguageIt AudioNewParamsLanguage = "it"
164- AudioNewParamsLanguageJa AudioNewParamsLanguage = "ja"
165- AudioNewParamsLanguageKo AudioNewParamsLanguage = "ko"
166- AudioNewParamsLanguageNl AudioNewParamsLanguage = "nl"
167- AudioNewParamsLanguagePl AudioNewParamsLanguage = "pl"
168- AudioNewParamsLanguagePt AudioNewParamsLanguage = "pt"
169- AudioNewParamsLanguageRu AudioNewParamsLanguage = "ru"
170- AudioNewParamsLanguageSv AudioNewParamsLanguage = "sv"
171- AudioNewParamsLanguageTr AudioNewParamsLanguage = "tr"
172- AudioNewParamsLanguageZh AudioNewParamsLanguage = "zh"
173- )
174-
175- // Audio encoding of response
176- type AudioNewParamsResponseEncoding string
177-
178- const (
179- AudioNewParamsResponseEncodingPcmF32le AudioNewParamsResponseEncoding = "pcm_f32le"
180- AudioNewParamsResponseEncodingPcmS16le AudioNewParamsResponseEncoding = "pcm_s16le"
181- AudioNewParamsResponseEncodingPcmMulaw AudioNewParamsResponseEncoding = "pcm_mulaw"
182- AudioNewParamsResponseEncodingPcmAlaw AudioNewParamsResponseEncoding = "pcm_alaw"
183- )
184-
185- // The format of audio output. Supported formats are mp3, wav, raw if streaming is
186- // false. If streaming is true, the only supported format is raw.
187- type AudioNewParamsResponseFormat string
188-
189- const (
190- AudioNewParamsResponseFormatMP3 AudioNewParamsResponseFormat = "mp3"
191- AudioNewParamsResponseFormatWav AudioNewParamsResponseFormat = "wav"
192- AudioNewParamsResponseFormatRaw AudioNewParamsResponseFormat = "raw"
193- )
0 commit comments