Skip to content

Commit f906b2e

Browse files
feat(api): Change TTS call signature
BREAKING CHANGE: Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
1 parent 15519be commit f906b2e

File tree

5 files changed

+170
-139
lines changed

5 files changed

+170
-139
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 46
22
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
33
openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
4-
config_hash: b337cdd3c62dbd3383529592a029b347
4+
config_hash: afbbabb8eb5bfbbf8139546a13addd9a

api.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,11 @@ Response Types:
162162

163163
- <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechStreamChunk">AudioSpeechStreamChunk</a>
164164

165+
## Speech
166+
165167
Methods:
166168

167-
- <code title="post /audio/speech">client.Audio.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioService.New">New</a>(ctx <a href="https://pkg.go.dev/context">context</a>.<a href="https://pkg.go.dev/context#Context">Context</a>, body <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioNewParams">AudioNewParams</a>) (http.Response, <a href="https://pkg.go.dev/builtin#error">error</a>)</code>
169+
- <code title="post /audio/speech">client.Audio.Speech.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechService.New">New</a>(ctx <a href="https://pkg.go.dev/context">context</a>.<a href="https://pkg.go.dev/context#Context">Context</a>, body <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechNewParams">AudioSpeechNewParams</a>) (http.Response, <a href="https://pkg.go.dev/builtin#error">error</a>)</code>
168170

169171
## Voices
170172

audio.go

Lines changed: 2 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,9 @@
33
package together
44

55
import (
6-
"context"
7-
"net/http"
8-
"slices"
9-
106
"github.com/togethercomputer/together-go/internal/apijson"
11-
"github.com/togethercomputer/together-go/internal/requestconfig"
127
"github.com/togethercomputer/together-go/option"
13-
"github.com/togethercomputer/together-go/packages/param"
148
"github.com/togethercomputer/together-go/packages/respjson"
15-
"github.com/togethercomputer/together-go/packages/ssestream"
169
)
1710

1811
// AudioService contains methods and other services that help with interacting with
@@ -23,6 +16,7 @@ import (
2316
// the [NewAudioService] method instead.
2417
type AudioService struct {
2518
Options []option.RequestOption
19+
Speech AudioSpeechService
2620
Voices AudioVoiceService
2721
Transcriptions AudioTranscriptionService
2822
Translations AudioTranslationService
@@ -34,34 +28,13 @@ type AudioService struct {
3428
func NewAudioService(opts ...option.RequestOption) (r AudioService) {
3529
r = AudioService{}
3630
r.Options = opts
31+
r.Speech = NewAudioSpeechService(opts...)
3732
r.Voices = NewAudioVoiceService(opts...)
3833
r.Transcriptions = NewAudioTranscriptionService(opts...)
3934
r.Translations = NewAudioTranslationService(opts...)
4035
return
4136
}
4237

43-
// Generate audio from input text
44-
func (r *AudioService) New(ctx context.Context, body AudioNewParams, opts ...option.RequestOption) (res *http.Response, err error) {
45-
opts = slices.Concat(r.Options, opts)
46-
opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream")}, opts...)
47-
path := "audio/speech"
48-
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
49-
return
50-
}
51-
52-
// Generate audio from input text
53-
func (r *AudioService) NewStreaming(ctx context.Context, body AudioNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[AudioSpeechStreamChunk]) {
54-
var (
55-
raw *http.Response
56-
err error
57-
)
58-
opts = slices.Concat(r.Options, opts)
59-
opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream"), option.WithJSONSet("stream", true)}, opts...)
60-
path := "audio/speech"
61-
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
62-
return ssestream.NewStream[AudioSpeechStreamChunk](ssestream.NewDecoder(raw), err)
63-
}
64-
6538
type AudioSpeechStreamChunk struct {
6639
// base64 encoded audio stream
6740
B64 string `json:"b64,required"`
@@ -89,105 +62,3 @@ type AudioSpeechStreamChunkObject string
8962
const (
9063
AudioSpeechStreamChunkObjectAudioTtsChunk AudioSpeechStreamChunkObject = "audio.tts.chunk"
9164
)
92-
93-
type AudioNewParams struct {
94-
// Input text to generate the audio for
95-
Input string `json:"input,required"`
96-
// The name of the model to query.
97-
//
98-
// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
99-
// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
100-
// canopylabs/orpheus-3b-0.1-ft
101-
Model AudioNewParamsModel `json:"model,omitzero,required"`
102-
// The voice to use for generating the audio. The voices supported are different
103-
// for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
104-
// supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
105-
// af_alloy and for cartesia/sonic, one of the voices supported is "friendly
106-
// sidekick".
107-
//
108-
// You can view the voices supported for each model using the /v1/voices endpoint
109-
// sending the model name as the query parameter.
110-
// [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
111-
Voice string `json:"voice,required"`
112-
// Sampling rate to use for the output audio. The default sampling rate for
113-
// canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
114-
// cartesia/sonic is 44100.
115-
SampleRate param.Opt[float64] `json:"sample_rate,omitzero"`
116-
// Language of input text.
117-
//
118-
// Any of "en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru",
119-
// "sv", "tr", "zh".
120-
Language AudioNewParamsLanguage `json:"language,omitzero"`
121-
// Audio encoding of response
122-
//
123-
// Any of "pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw".
124-
ResponseEncoding AudioNewParamsResponseEncoding `json:"response_encoding,omitzero"`
125-
// The format of audio output. Supported formats are mp3, wav, raw if streaming is
126-
// false. If streaming is true, the only supported format is raw.
127-
//
128-
// Any of "mp3", "wav", "raw".
129-
ResponseFormat AudioNewParamsResponseFormat `json:"response_format,omitzero"`
130-
paramObj
131-
}
132-
133-
func (r AudioNewParams) MarshalJSON() (data []byte, err error) {
134-
type shadow AudioNewParams
135-
return param.MarshalObject(r, (*shadow)(&r))
136-
}
137-
func (r *AudioNewParams) UnmarshalJSON(data []byte) error {
138-
return apijson.UnmarshalRoot(data, r)
139-
}
140-
141-
// The name of the model to query.
142-
//
143-
// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
144-
// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
145-
// canopylabs/orpheus-3b-0.1-ft
146-
type AudioNewParamsModel string
147-
148-
const (
149-
AudioNewParamsModelCartesiaSonic AudioNewParamsModel = "cartesia/sonic"
150-
AudioNewParamsModelHexgradKokoro82M AudioNewParamsModel = "hexgrad/Kokoro-82M"
151-
AudioNewParamsModelCanopylabsOrpheus3b0_1Ft AudioNewParamsModel = "canopylabs/orpheus-3b-0.1-ft"
152-
)
153-
154-
// Language of input text.
155-
type AudioNewParamsLanguage string
156-
157-
const (
158-
AudioNewParamsLanguageEn AudioNewParamsLanguage = "en"
159-
AudioNewParamsLanguageDe AudioNewParamsLanguage = "de"
160-
AudioNewParamsLanguageFr AudioNewParamsLanguage = "fr"
161-
AudioNewParamsLanguageEs AudioNewParamsLanguage = "es"
162-
AudioNewParamsLanguageHi AudioNewParamsLanguage = "hi"
163-
AudioNewParamsLanguageIt AudioNewParamsLanguage = "it"
164-
AudioNewParamsLanguageJa AudioNewParamsLanguage = "ja"
165-
AudioNewParamsLanguageKo AudioNewParamsLanguage = "ko"
166-
AudioNewParamsLanguageNl AudioNewParamsLanguage = "nl"
167-
AudioNewParamsLanguagePl AudioNewParamsLanguage = "pl"
168-
AudioNewParamsLanguagePt AudioNewParamsLanguage = "pt"
169-
AudioNewParamsLanguageRu AudioNewParamsLanguage = "ru"
170-
AudioNewParamsLanguageSv AudioNewParamsLanguage = "sv"
171-
AudioNewParamsLanguageTr AudioNewParamsLanguage = "tr"
172-
AudioNewParamsLanguageZh AudioNewParamsLanguage = "zh"
173-
)
174-
175-
// Audio encoding of response
176-
type AudioNewParamsResponseEncoding string
177-
178-
const (
179-
AudioNewParamsResponseEncodingPcmF32le AudioNewParamsResponseEncoding = "pcm_f32le"
180-
AudioNewParamsResponseEncodingPcmS16le AudioNewParamsResponseEncoding = "pcm_s16le"
181-
AudioNewParamsResponseEncodingPcmMulaw AudioNewParamsResponseEncoding = "pcm_mulaw"
182-
AudioNewParamsResponseEncodingPcmAlaw AudioNewParamsResponseEncoding = "pcm_alaw"
183-
)
184-
185-
// The format of audio output. Supported formats are mp3, wav, raw if streaming is
186-
// false. If streaming is true, the only supported format is raw.
187-
type AudioNewParamsResponseFormat string
188-
189-
const (
190-
AudioNewParamsResponseFormatMP3 AudioNewParamsResponseFormat = "mp3"
191-
AudioNewParamsResponseFormatWav AudioNewParamsResponseFormat = "wav"
192-
AudioNewParamsResponseFormatRaw AudioNewParamsResponseFormat = "raw"
193-
)

audiospeech.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2+
3+
package together
4+
5+
import (
6+
"context"
7+
"net/http"
8+
"slices"
9+
10+
"github.com/togethercomputer/together-go/internal/apijson"
11+
"github.com/togethercomputer/together-go/internal/requestconfig"
12+
"github.com/togethercomputer/together-go/option"
13+
"github.com/togethercomputer/together-go/packages/param"
14+
"github.com/togethercomputer/together-go/packages/ssestream"
15+
)
16+
17+
// AudioSpeechService contains methods and other services that help with
18+
// interacting with the together API.
19+
//
20+
// Note, unlike clients, this service does not read variables from the environment
21+
// automatically. You should not instantiate this service directly, and instead use
22+
// the [NewAudioSpeechService] method instead.
23+
type AudioSpeechService struct {
24+
Options []option.RequestOption
25+
}
26+
27+
// NewAudioSpeechService generates a new service that applies the given options to
28+
// each request. These options are applied after the parent client's options (if
29+
// there is one), and before any request-specific options.
30+
func NewAudioSpeechService(opts ...option.RequestOption) (r AudioSpeechService) {
31+
r = AudioSpeechService{}
32+
r.Options = opts
33+
return
34+
}
35+
36+
// Generate audio from input text
37+
func (r *AudioSpeechService) New(ctx context.Context, body AudioSpeechNewParams, opts ...option.RequestOption) (res *http.Response, err error) {
38+
opts = slices.Concat(r.Options, opts)
39+
opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream")}, opts...)
40+
path := "audio/speech"
41+
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
42+
return
43+
}
44+
45+
// Generate audio from input text
46+
func (r *AudioSpeechService) NewStreaming(ctx context.Context, body AudioSpeechNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[AudioSpeechStreamChunk]) {
47+
var (
48+
raw *http.Response
49+
err error
50+
)
51+
opts = slices.Concat(r.Options, opts)
52+
opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream"), option.WithJSONSet("stream", true)}, opts...)
53+
path := "audio/speech"
54+
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
55+
return ssestream.NewStream[AudioSpeechStreamChunk](ssestream.NewDecoder(raw), err)
56+
}
57+
58+
type AudioSpeechNewParams struct {
59+
// Input text to generate the audio for
60+
Input string `json:"input,required"`
61+
// The name of the model to query.
62+
//
63+
// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
64+
// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
65+
// canopylabs/orpheus-3b-0.1-ft
66+
Model AudioSpeechNewParamsModel `json:"model,omitzero,required"`
67+
// The voice to use for generating the audio. The voices supported are different
68+
// for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
69+
// supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
70+
// af_alloy and for cartesia/sonic, one of the voices supported is "friendly
71+
// sidekick".
72+
//
73+
// You can view the voices supported for each model using the /v1/voices endpoint
74+
// sending the model name as the query parameter.
75+
// [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
76+
Voice string `json:"voice,required"`
77+
// Sampling rate to use for the output audio. The default sampling rate for
78+
// canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
79+
// cartesia/sonic is 44100.
80+
SampleRate param.Opt[float64] `json:"sample_rate,omitzero"`
81+
// Language of input text.
82+
//
83+
// Any of "en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru",
84+
// "sv", "tr", "zh".
85+
Language AudioSpeechNewParamsLanguage `json:"language,omitzero"`
86+
// Audio encoding of response
87+
//
88+
// Any of "pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw".
89+
ResponseEncoding AudioSpeechNewParamsResponseEncoding `json:"response_encoding,omitzero"`
90+
// The format of audio output. Supported formats are mp3, wav, raw if streaming is
91+
// false. If streaming is true, the only supported format is raw.
92+
//
93+
// Any of "mp3", "wav", "raw".
94+
ResponseFormat AudioSpeechNewParamsResponseFormat `json:"response_format,omitzero"`
95+
paramObj
96+
}
97+
98+
func (r AudioSpeechNewParams) MarshalJSON() (data []byte, err error) {
99+
type shadow AudioSpeechNewParams
100+
return param.MarshalObject(r, (*shadow)(&r))
101+
}
102+
func (r *AudioSpeechNewParams) UnmarshalJSON(data []byte) error {
103+
return apijson.UnmarshalRoot(data, r)
104+
}
105+
106+
// The name of the model to query.
107+
//
108+
// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
109+
// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
110+
// canopylabs/orpheus-3b-0.1-ft
111+
type AudioSpeechNewParamsModel string
112+
113+
const (
114+
AudioSpeechNewParamsModelCartesiaSonic AudioSpeechNewParamsModel = "cartesia/sonic"
115+
AudioSpeechNewParamsModelHexgradKokoro82M AudioSpeechNewParamsModel = "hexgrad/Kokoro-82M"
116+
AudioSpeechNewParamsModelCanopylabsOrpheus3b0_1Ft AudioSpeechNewParamsModel = "canopylabs/orpheus-3b-0.1-ft"
117+
)
118+
119+
// Language of input text.
120+
type AudioSpeechNewParamsLanguage string
121+
122+
const (
123+
AudioSpeechNewParamsLanguageEn AudioSpeechNewParamsLanguage = "en"
124+
AudioSpeechNewParamsLanguageDe AudioSpeechNewParamsLanguage = "de"
125+
AudioSpeechNewParamsLanguageFr AudioSpeechNewParamsLanguage = "fr"
126+
AudioSpeechNewParamsLanguageEs AudioSpeechNewParamsLanguage = "es"
127+
AudioSpeechNewParamsLanguageHi AudioSpeechNewParamsLanguage = "hi"
128+
AudioSpeechNewParamsLanguageIt AudioSpeechNewParamsLanguage = "it"
129+
AudioSpeechNewParamsLanguageJa AudioSpeechNewParamsLanguage = "ja"
130+
AudioSpeechNewParamsLanguageKo AudioSpeechNewParamsLanguage = "ko"
131+
AudioSpeechNewParamsLanguageNl AudioSpeechNewParamsLanguage = "nl"
132+
AudioSpeechNewParamsLanguagePl AudioSpeechNewParamsLanguage = "pl"
133+
AudioSpeechNewParamsLanguagePt AudioSpeechNewParamsLanguage = "pt"
134+
AudioSpeechNewParamsLanguageRu AudioSpeechNewParamsLanguage = "ru"
135+
AudioSpeechNewParamsLanguageSv AudioSpeechNewParamsLanguage = "sv"
136+
AudioSpeechNewParamsLanguageTr AudioSpeechNewParamsLanguage = "tr"
137+
AudioSpeechNewParamsLanguageZh AudioSpeechNewParamsLanguage = "zh"
138+
)
139+
140+
// Audio encoding of response
141+
type AudioSpeechNewParamsResponseEncoding string
142+
143+
const (
144+
AudioSpeechNewParamsResponseEncodingPcmF32le AudioSpeechNewParamsResponseEncoding = "pcm_f32le"
145+
AudioSpeechNewParamsResponseEncodingPcmS16le AudioSpeechNewParamsResponseEncoding = "pcm_s16le"
146+
AudioSpeechNewParamsResponseEncodingPcmMulaw AudioSpeechNewParamsResponseEncoding = "pcm_mulaw"
147+
AudioSpeechNewParamsResponseEncodingPcmAlaw AudioSpeechNewParamsResponseEncoding = "pcm_alaw"
148+
)
149+
150+
// The format of audio output. Supported formats are mp3, wav, raw if streaming is
151+
// false. If streaming is true, the only supported format is raw.
152+
type AudioSpeechNewParamsResponseFormat string
153+
154+
const (
155+
AudioSpeechNewParamsResponseFormatMP3 AudioSpeechNewParamsResponseFormat = "mp3"
156+
AudioSpeechNewParamsResponseFormatWav AudioSpeechNewParamsResponseFormat = "wav"
157+
AudioSpeechNewParamsResponseFormatRaw AudioSpeechNewParamsResponseFormat = "raw"
158+
)

audio_test.go renamed to audiospeech_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"github.com/togethercomputer/together-go/option"
1616
)
1717

18-
func TestAudioNewWithOptionalParams(t *testing.T) {
18+
func TestAudioSpeechNewWithOptionalParams(t *testing.T) {
1919
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
2020
w.WriteHeader(200)
2121
w.Write([]byte("abc"))
@@ -26,13 +26,13 @@ func TestAudioNewWithOptionalParams(t *testing.T) {
2626
option.WithBaseURL(baseURL),
2727
option.WithAPIKey("My API Key"),
2828
)
29-
resp, err := client.Audio.New(context.TODO(), together.AudioNewParams{
29+
resp, err := client.Audio.Speech.New(context.TODO(), together.AudioSpeechNewParams{
3030
Input: "input",
31-
Model: together.AudioNewParamsModelCartesiaSonic,
31+
Model: together.AudioSpeechNewParamsModelCartesiaSonic,
3232
Voice: "voice",
33-
Language: together.AudioNewParamsLanguageEn,
34-
ResponseEncoding: together.AudioNewParamsResponseEncodingPcmF32le,
35-
ResponseFormat: together.AudioNewParamsResponseFormatMP3,
33+
Language: together.AudioSpeechNewParamsLanguageEn,
34+
ResponseEncoding: together.AudioSpeechNewParamsResponseEncodingPcmF32le,
35+
ResponseFormat: together.AudioSpeechNewParamsResponseFormatMP3,
3636
SampleRate: together.Float(0),
3737
})
3838
if err != nil {

0 commit comments

Comments
 (0)