22
33import { APIResource } from '../../core/resource' ;
44import * as AudioAPI from './audio' ;
5+ import * as SpeechAPI from './speech' ;
6+ import {
7+ Speech ,
8+ SpeechCreateParams ,
9+ SpeechCreateParamsNonStreaming ,
10+ SpeechCreateParamsStreaming ,
11+ } from './speech' ;
512import * as TranscriptionsAPI from './transcriptions' ;
613import { TranscriptionCreateParams , TranscriptionCreateResponse , Transcriptions } from './transcriptions' ;
714import * as TranslationsAPI from './translations' ;
815import { TranslationCreateParams , TranslationCreateResponse , Translations } from './translations' ;
916import * as VoicesAPI from './voices' ;
1017import { VoiceListResponse , Voices } from './voices' ;
11- import { APIPromise } from '../../core/api-promise' ;
12- import { Stream } from '../../core/streaming' ;
13- import { buildHeaders } from '../../internal/headers' ;
14- import { RequestOptions } from '../../internal/request-options' ;
1518
1619export class Audio extends APIResource {
20+ speech : SpeechAPI . Speech = new SpeechAPI . Speech ( this . _client ) ;
1721 voices : VoicesAPI . Voices = new VoicesAPI . Voices ( this . _client ) ;
1822 transcriptions : TranscriptionsAPI . Transcriptions = new TranscriptionsAPI . Transcriptions ( this . _client ) ;
1923 translations : TranslationsAPI . Translations = new TranslationsAPI . Translations ( this . _client ) ;
20-
21- /**
22- * Generate audio from input text
23- *
24- * @example
25- * ```ts
26- * const audio = await client.audio.create({
27- * input: 'input',
28- * model: 'canopylabs/orpheus-3b-0.1-ft',
29- * voice: 'voice',
30- * });
31- *
32- * const content = await audio.blob();
33- * console.log(content);
34- * ```
35- */
36- create ( body : AudioCreateParamsNonStreaming , options ?: RequestOptions ) : APIPromise < Response > ;
37- create (
38- body : AudioCreateParamsStreaming ,
39- options ?: RequestOptions ,
40- ) : APIPromise < Stream < AudioSpeechStreamChunk > > ;
41- create (
42- body : AudioCreateParamsBase ,
43- options ?: RequestOptions ,
44- ) : APIPromise < Stream < AudioSpeechStreamChunk > | Response > ;
45- create (
46- body : AudioCreateParams ,
47- options ?: RequestOptions ,
48- ) : APIPromise < Response > | APIPromise < Stream < AudioSpeechStreamChunk > > {
49- return this . _client . post ( '/audio/speech' , {
50- body,
51- ...options ,
52- headers : buildHeaders ( [ { Accept : 'application/octet-stream' } , options ?. headers ] ) ,
53- stream : body . stream ?? false ,
54- __binaryResponse : true ,
55- } ) as APIPromise < Response > | APIPromise < Stream < AudioSpeechStreamChunk > > ;
56- }
5724}
5825
5926export type AudioFile = AudioFile . AudioSpeechStreamEvent | AudioFile . StreamSentinel ;
@@ -79,116 +46,19 @@ export interface AudioSpeechStreamChunk {
7946 object : 'audio.tts.chunk' ;
8047}
8148
82- export type AudioCreateParams = AudioCreateParamsNonStreaming | AudioCreateParamsStreaming ;
83-
84- export interface AudioCreateParamsBase {
85- /**
86- * Input text to generate the audio for
87- */
88- input : string ;
89-
90- /**
91- * The name of the model to query.
92- *
93- * [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
94- * The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
95- * canopylabs/orpheus-3b-0.1-ft
96- */
97- model : 'cartesia/sonic' | 'hexgrad/Kokoro-82M' | 'canopylabs/orpheus-3b-0.1-ft' | ( string & { } ) ;
98-
99- /**
100- * The voice to use for generating the audio. The voices supported are different
101- * for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
102- * supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
103- * af_alloy and for cartesia/sonic, one of the voices supported is "friendly
104- * sidekick".
105- *
106- * You can view the voices supported for each model using the /v1/voices endpoint
107- * sending the model name as the query parameter.
108- * [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
109- */
110- voice : string ;
111-
112- /**
113- * Language of input text.
114- */
115- language ?:
116- | 'en'
117- | 'de'
118- | 'fr'
119- | 'es'
120- | 'hi'
121- | 'it'
122- | 'ja'
123- | 'ko'
124- | 'nl'
125- | 'pl'
126- | 'pt'
127- | 'ru'
128- | 'sv'
129- | 'tr'
130- | 'zh' ;
131-
132- /**
133- * Audio encoding of response
134- */
135- response_encoding ?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_mulaw' | 'pcm_alaw' ;
136-
137- /**
138- * The format of audio output. Supported formats are mp3, wav, raw if streaming is
139- * false. If streaming is true, the only supported format is raw.
140- */
141- response_format ?: 'mp3' | 'wav' | 'raw' ;
142-
143- /**
144- * Sampling rate to use for the output audio. The default sampling rate for
145- * canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
146- * cartesia/sonic is 44100.
147- */
148- sample_rate ?: number ;
149-
150- /**
151- * If true, output is streamed for several characters at a time instead of waiting
152- * for the full response. The stream terminates with `data: [DONE]`. If false,
153- * return the encoded audio as octet stream
154- */
155- stream ?: boolean ;
156- }
157-
158- export namespace AudioCreateParams {
159- export type AudioCreateParamsNonStreaming = AudioAPI . AudioCreateParamsNonStreaming ;
160- export type AudioCreateParamsStreaming = AudioAPI . AudioCreateParamsStreaming ;
161- }
162-
163- export interface AudioCreateParamsNonStreaming extends AudioCreateParamsBase {
164- /**
165- * If true, output is streamed for several characters at a time instead of waiting
166- * for the full response. The stream terminates with `data: [DONE]`. If false,
167- * return the encoded audio as octet stream
168- */
169- stream ?: false ;
170- }
171-
172- export interface AudioCreateParamsStreaming extends AudioCreateParamsBase {
173- /**
174- * If true, output is streamed for several characters at a time instead of waiting
175- * for the full response. The stream terminates with `data: [DONE]`. If false,
176- * return the encoded audio as octet stream
177- */
178- stream : true ;
179- }
180-
49+ Audio . Speech = Speech ;
18150Audio . Voices = Voices ;
18251Audio . Transcriptions = Transcriptions ;
18352Audio . Translations = Translations ;
18453
18554export declare namespace Audio {
55+ export { type AudioFile as AudioFile , type AudioSpeechStreamChunk as AudioSpeechStreamChunk } ;
56+
18657 export {
187- type AudioFile as AudioFile ,
188- type AudioSpeechStreamChunk as AudioSpeechStreamChunk ,
189- type AudioCreateParams as AudioCreateParams ,
190- type AudioCreateParamsNonStreaming as AudioCreateParamsNonStreaming ,
191- type AudioCreateParamsStreaming as AudioCreateParamsStreaming ,
58+ Speech as Speech ,
59+ type SpeechCreateParams as SpeechCreateParams ,
60+ type SpeechCreateParamsNonStreaming as SpeechCreateParamsNonStreaming ,
61+ type SpeechCreateParamsStreaming as SpeechCreateParamsStreaming ,
19262 } ;
19363
19464 export { Voices as Voices , type VoiceListResponse as VoiceListResponse } ;
0 commit comments