feat(api): Change TTS call signature

stainless-app[bot] · stainless-app[bot] · commit f906b2e51c2a · 2025-11-21T15:16:29.000Z
BREAKING CHANGE: Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 46
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
 openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
-config_hash: b337cdd3c62dbd3383529592a029b347
+config_hash: afbbabb8eb5bfbbf8139546a13addd9a
diff --git a/api.md b/api.md
@@ -162,9 +162,11 @@ Response Types:
 
 - <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechStreamChunk">AudioSpeechStreamChunk</a>
 
+## Speech
+
 Methods:
 
-- <code title="post /audio/speech">client.Audio.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioService.New">New</a>(ctx <a href="https://pkg.go.dev/context">context</a>.<a href="https://pkg.go.dev/context#Context">Context</a>, body <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioNewParams">AudioNewParams</a>) (http.Response, <a href="https://pkg.go.dev/builtin#error">error</a>)</code>
+- <code title="post /audio/speech">client.Audio.Speech.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechService.New">New</a>(ctx <a href="https://pkg.go.dev/context">context</a>.<a href="https://pkg.go.dev/context#Context">Context</a>, body <a href="https://pkg.go.dev/github.com/togethercomputer/together-go">together</a>.<a href="https://pkg.go.dev/github.com/togethercomputer/together-go#AudioSpeechNewParams">AudioSpeechNewParams</a>) (http.Response, <a href="https://pkg.go.dev/builtin#error">error</a>)</code>
 
 ## Voices
 
diff --git a/audio.go b/audio.go
@@ -3,16 +3,9 @@
 package together
 
 import (
-	"context"
-	"net/http"
-	"slices"
-
 	"github.com/togethercomputer/together-go/internal/apijson"
-	"github.com/togethercomputer/together-go/internal/requestconfig"
 	"github.com/togethercomputer/together-go/option"
-	"github.com/togethercomputer/together-go/packages/param"
 	"github.com/togethercomputer/together-go/packages/respjson"
-	"github.com/togethercomputer/together-go/packages/ssestream"
 )
 
 // AudioService contains methods and other services that help with interacting with
@@ -23,6 +16,7 @@ import (
 // the [NewAudioService] method instead.
 type AudioService struct {
 	Options        []option.RequestOption
+	Speech         AudioSpeechService
 	Voices         AudioVoiceService
 	Transcriptions AudioTranscriptionService
 	Translations   AudioTranslationService
@@ -34,34 +28,13 @@ type AudioService struct {
 func NewAudioService(opts ...option.RequestOption) (r AudioService) {
 	r = AudioService{}
 	r.Options = opts
+	r.Speech = NewAudioSpeechService(opts...)
 	r.Voices = NewAudioVoiceService(opts...)
 	r.Transcriptions = NewAudioTranscriptionService(opts...)
 	r.Translations = NewAudioTranslationService(opts...)
 	return
 }
 
-// Generate audio from input text
-func (r *AudioService) New(ctx context.Context, body AudioNewParams, opts ...option.RequestOption) (res *http.Response, err error) {
-	opts = slices.Concat(r.Options, opts)
-	opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream")}, opts...)
-	path := "audio/speech"
-	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
-	return
-}
-
-// Generate audio from input text
-func (r *AudioService) NewStreaming(ctx context.Context, body AudioNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[AudioSpeechStreamChunk]) {
-	var (
-		raw *http.Response
-		err error
-	)
-	opts = slices.Concat(r.Options, opts)
-	opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream"), option.WithJSONSet("stream", true)}, opts...)
-	path := "audio/speech"
-	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
-	return ssestream.NewStream[AudioSpeechStreamChunk](ssestream.NewDecoder(raw), err)
-}
-
 type AudioSpeechStreamChunk struct {
 	// base64 encoded audio stream
 	B64   string `json:"b64,required"`
@@ -89,105 +62,3 @@ type AudioSpeechStreamChunkObject string
 const (
 	AudioSpeechStreamChunkObjectAudioTtsChunk AudioSpeechStreamChunkObject = "audio.tts.chunk"
 )
-
-type AudioNewParams struct {
-	// Input text to generate the audio for
-	Input string `json:"input,required"`
-	// The name of the model to query.
-	//
-	// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-	// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-	// canopylabs/orpheus-3b-0.1-ft
-	Model AudioNewParamsModel `json:"model,omitzero,required"`
-	// The voice to use for generating the audio. The voices supported are different
-	// for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-	// supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-	// af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-	// sidekick".
-	//
-	// You can view the voices supported for each model using the /v1/voices endpoint
-	// sending the model name as the query parameter.
-	// [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-	Voice string `json:"voice,required"`
-	// Sampling rate to use for the output audio. The default sampling rate for
-	// canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-	// cartesia/sonic is 44100.
-	SampleRate param.Opt[float64] `json:"sample_rate,omitzero"`
-	// Language of input text.
-	//
-	// Any of "en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru",
-	// "sv", "tr", "zh".
-	Language AudioNewParamsLanguage `json:"language,omitzero"`
-	// Audio encoding of response
-	//
-	// Any of "pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw".
-	ResponseEncoding AudioNewParamsResponseEncoding `json:"response_encoding,omitzero"`
-	// The format of audio output. Supported formats are mp3, wav, raw if streaming is
-	// false. If streaming is true, the only supported format is raw.
-	//
-	// Any of "mp3", "wav", "raw".
-	ResponseFormat AudioNewParamsResponseFormat `json:"response_format,omitzero"`
-	paramObj
-}
-
-func (r AudioNewParams) MarshalJSON() (data []byte, err error) {
-	type shadow AudioNewParams
-	return param.MarshalObject(r, (*shadow)(&r))
-}
-func (r *AudioNewParams) UnmarshalJSON(data []byte) error {
-	return apijson.UnmarshalRoot(data, r)
-}
-
-// The name of the model to query.
-//
-// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-// canopylabs/orpheus-3b-0.1-ft
-type AudioNewParamsModel string
-
-const (
-	AudioNewParamsModelCartesiaSonic            AudioNewParamsModel = "cartesia/sonic"
-	AudioNewParamsModelHexgradKokoro82M         AudioNewParamsModel = "hexgrad/Kokoro-82M"
-	AudioNewParamsModelCanopylabsOrpheus3b0_1Ft AudioNewParamsModel = "canopylabs/orpheus-3b-0.1-ft"
-)
-
-// Language of input text.
-type AudioNewParamsLanguage string
-
-const (
-	AudioNewParamsLanguageEn AudioNewParamsLanguage = "en"
-	AudioNewParamsLanguageDe AudioNewParamsLanguage = "de"
-	AudioNewParamsLanguageFr AudioNewParamsLanguage = "fr"
-	AudioNewParamsLanguageEs AudioNewParamsLanguage = "es"
-	AudioNewParamsLanguageHi AudioNewParamsLanguage = "hi"
-	AudioNewParamsLanguageIt AudioNewParamsLanguage = "it"
-	AudioNewParamsLanguageJa AudioNewParamsLanguage = "ja"
-	AudioNewParamsLanguageKo AudioNewParamsLanguage = "ko"
-	AudioNewParamsLanguageNl AudioNewParamsLanguage = "nl"
-	AudioNewParamsLanguagePl AudioNewParamsLanguage = "pl"
-	AudioNewParamsLanguagePt AudioNewParamsLanguage = "pt"
-	AudioNewParamsLanguageRu AudioNewParamsLanguage = "ru"
-	AudioNewParamsLanguageSv AudioNewParamsLanguage = "sv"
-	AudioNewParamsLanguageTr AudioNewParamsLanguage = "tr"
-	AudioNewParamsLanguageZh AudioNewParamsLanguage = "zh"
-)
-
-// Audio encoding of response
-type AudioNewParamsResponseEncoding string
-
-const (
-	AudioNewParamsResponseEncodingPcmF32le AudioNewParamsResponseEncoding = "pcm_f32le"
-	AudioNewParamsResponseEncodingPcmS16le AudioNewParamsResponseEncoding = "pcm_s16le"
-	AudioNewParamsResponseEncodingPcmMulaw AudioNewParamsResponseEncoding = "pcm_mulaw"
-	AudioNewParamsResponseEncodingPcmAlaw  AudioNewParamsResponseEncoding = "pcm_alaw"
-)
-
-// The format of audio output. Supported formats are mp3, wav, raw if streaming is
-// false. If streaming is true, the only supported format is raw.
-type AudioNewParamsResponseFormat string
-
-const (
-	AudioNewParamsResponseFormatMP3 AudioNewParamsResponseFormat = "mp3"
-	AudioNewParamsResponseFormatWav AudioNewParamsResponseFormat = "wav"
-	AudioNewParamsResponseFormatRaw AudioNewParamsResponseFormat = "raw"
-)
diff --git a/audiospeech.go b/audiospeech.go
@@ -0,0 +1,158 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+package together
+
+import (
+	"context"
+	"net/http"
+	"slices"
+
+	"github.com/togethercomputer/together-go/internal/apijson"
+	"github.com/togethercomputer/together-go/internal/requestconfig"
+	"github.com/togethercomputer/together-go/option"
+	"github.com/togethercomputer/together-go/packages/param"
+	"github.com/togethercomputer/together-go/packages/ssestream"
+)
+
+// AudioSpeechService contains methods and other services that help with
+// interacting with the together API.
+//
+// Note, unlike clients, this service does not read variables from the environment
+// automatically. You should not instantiate this service directly, and instead use
+// the [NewAudioSpeechService] method instead.
+type AudioSpeechService struct {
+	Options []option.RequestOption
+}
+
+// NewAudioSpeechService generates a new service that applies the given options to
+// each request. These options are applied after the parent client's options (if
+// there is one), and before any request-specific options.
+func NewAudioSpeechService(opts ...option.RequestOption) (r AudioSpeechService) {
+	r = AudioSpeechService{}
+	r.Options = opts
+	return
+}
+
+// Generate audio from input text
+func (r *AudioSpeechService) New(ctx context.Context, body AudioSpeechNewParams, opts ...option.RequestOption) (res *http.Response, err error) {
+	opts = slices.Concat(r.Options, opts)
+	opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream")}, opts...)
+	path := "audio/speech"
+	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
+	return
+}
+
+// Generate audio from input text
+func (r *AudioSpeechService) NewStreaming(ctx context.Context, body AudioSpeechNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[AudioSpeechStreamChunk]) {
+	var (
+		raw *http.Response
+		err error
+	)
+	opts = slices.Concat(r.Options, opts)
+	opts = append([]option.RequestOption{option.WithHeader("Accept", "application/octet-stream"), option.WithJSONSet("stream", true)}, opts...)
+	path := "audio/speech"
+	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
+	return ssestream.NewStream[AudioSpeechStreamChunk](ssestream.NewDecoder(raw), err)
+}
+
+type AudioSpeechNewParams struct {
+	// Input text to generate the audio for
+	Input string `json:"input,required"`
+	// The name of the model to query.
+	//
+	// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+	// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+	// canopylabs/orpheus-3b-0.1-ft
+	Model AudioSpeechNewParamsModel `json:"model,omitzero,required"`
+	// The voice to use for generating the audio. The voices supported are different
+	// for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+	// supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+	// af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+	// sidekick".
+	//
+	// You can view the voices supported for each model using the /v1/voices endpoint
+	// sending the model name as the query parameter.
+	// [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+	Voice string `json:"voice,required"`
+	// Sampling rate to use for the output audio. The default sampling rate for
+	// canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+	// cartesia/sonic is 44100.
+	SampleRate param.Opt[float64] `json:"sample_rate,omitzero"`
+	// Language of input text.
+	//
+	// Any of "en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru",
+	// "sv", "tr", "zh".
+	Language AudioSpeechNewParamsLanguage `json:"language,omitzero"`
+	// Audio encoding of response
+	//
+	// Any of "pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw".
+	ResponseEncoding AudioSpeechNewParamsResponseEncoding `json:"response_encoding,omitzero"`
+	// The format of audio output. Supported formats are mp3, wav, raw if streaming is
+	// false. If streaming is true, the only supported format is raw.
+	//
+	// Any of "mp3", "wav", "raw".
+	ResponseFormat AudioSpeechNewParamsResponseFormat `json:"response_format,omitzero"`
+	paramObj
+}
+
+func (r AudioSpeechNewParams) MarshalJSON() (data []byte, err error) {
+	type shadow AudioSpeechNewParams
+	return param.MarshalObject(r, (*shadow)(&r))
+}
+func (r *AudioSpeechNewParams) UnmarshalJSON(data []byte) error {
+	return apijson.UnmarshalRoot(data, r)
+}
+
+// The name of the model to query.
+//
+// [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+// The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+// canopylabs/orpheus-3b-0.1-ft
+type AudioSpeechNewParamsModel string
+
+const (
+	AudioSpeechNewParamsModelCartesiaSonic            AudioSpeechNewParamsModel = "cartesia/sonic"
+	AudioSpeechNewParamsModelHexgradKokoro82M         AudioSpeechNewParamsModel = "hexgrad/Kokoro-82M"
+	AudioSpeechNewParamsModelCanopylabsOrpheus3b0_1Ft AudioSpeechNewParamsModel = "canopylabs/orpheus-3b-0.1-ft"
+)
+
+// Language of input text.
+type AudioSpeechNewParamsLanguage string
+
+const (
+	AudioSpeechNewParamsLanguageEn AudioSpeechNewParamsLanguage = "en"
+	AudioSpeechNewParamsLanguageDe AudioSpeechNewParamsLanguage = "de"
+	AudioSpeechNewParamsLanguageFr AudioSpeechNewParamsLanguage = "fr"
+	AudioSpeechNewParamsLanguageEs AudioSpeechNewParamsLanguage = "es"
+	AudioSpeechNewParamsLanguageHi AudioSpeechNewParamsLanguage = "hi"
+	AudioSpeechNewParamsLanguageIt AudioSpeechNewParamsLanguage = "it"
+	AudioSpeechNewParamsLanguageJa AudioSpeechNewParamsLanguage = "ja"
+	AudioSpeechNewParamsLanguageKo AudioSpeechNewParamsLanguage = "ko"
+	AudioSpeechNewParamsLanguageNl AudioSpeechNewParamsLanguage = "nl"
+	AudioSpeechNewParamsLanguagePl AudioSpeechNewParamsLanguage = "pl"
+	AudioSpeechNewParamsLanguagePt AudioSpeechNewParamsLanguage = "pt"
+	AudioSpeechNewParamsLanguageRu AudioSpeechNewParamsLanguage = "ru"
+	AudioSpeechNewParamsLanguageSv AudioSpeechNewParamsLanguage = "sv"
+	AudioSpeechNewParamsLanguageTr AudioSpeechNewParamsLanguage = "tr"
+	AudioSpeechNewParamsLanguageZh AudioSpeechNewParamsLanguage = "zh"
+)
+
+// Audio encoding of response
+type AudioSpeechNewParamsResponseEncoding string
+
+const (
+	AudioSpeechNewParamsResponseEncodingPcmF32le AudioSpeechNewParamsResponseEncoding = "pcm_f32le"
+	AudioSpeechNewParamsResponseEncodingPcmS16le AudioSpeechNewParamsResponseEncoding = "pcm_s16le"
+	AudioSpeechNewParamsResponseEncodingPcmMulaw AudioSpeechNewParamsResponseEncoding = "pcm_mulaw"
+	AudioSpeechNewParamsResponseEncodingPcmAlaw  AudioSpeechNewParamsResponseEncoding = "pcm_alaw"
+)
+
+// The format of audio output. Supported formats are mp3, wav, raw if streaming is
+// false. If streaming is true, the only supported format is raw.
+type AudioSpeechNewParamsResponseFormat string
+
+const (
+	AudioSpeechNewParamsResponseFormatMP3 AudioSpeechNewParamsResponseFormat = "mp3"
+	AudioSpeechNewParamsResponseFormatWav AudioSpeechNewParamsResponseFormat = "wav"
+	AudioSpeechNewParamsResponseFormatRaw AudioSpeechNewParamsResponseFormat = "raw"
+)
diff --git a/audiospeech_test.go b/audiospeech_test.go
@@ -15,7 +15,7 @@ import (
 	"github.com/togethercomputer/together-go/option"
 )
 
-func TestAudioNewWithOptionalParams(t *testing.T) {
+func TestAudioSpeechNewWithOptionalParams(t *testing.T) {
 	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(200)
 		w.Write([]byte("abc"))
@@ -26,13 +26,13 @@ func TestAudioNewWithOptionalParams(t *testing.T) {
 		option.WithBaseURL(baseURL),
 		option.WithAPIKey("My API Key"),
 	)
-	resp, err := client.Audio.New(context.TODO(), together.AudioNewParams{
+	resp, err := client.Audio.Speech.New(context.TODO(), together.AudioSpeechNewParams{
 		Input:            "input",
-		Model:            together.AudioNewParamsModelCartesiaSonic,
+		Model:            together.AudioSpeechNewParamsModelCartesiaSonic,
 		Voice:            "voice",
-		Language:         together.AudioNewParamsLanguageEn,
-		ResponseEncoding: together.AudioNewParamsResponseEncodingPcmF32le,
-		ResponseFormat:   together.AudioNewParamsResponseFormatMP3,
+		Language:         together.AudioSpeechNewParamsLanguageEn,
+		ResponseEncoding: together.AudioSpeechNewParamsResponseEncodingPcmF32le,
+		ResponseFormat:   together.AudioSpeechNewParamsResponseFormatMP3,
 		SampleRate:       together.Float(0),
 	})
 	if err != nil {