Skip to content

Commit 82dba38

Browse files
orangetinjdreamerz
andauthored
audio apis (#238)
* do not merge - audio api init * Get audio stuff working. (#245) * Initially getting things working. * More closely match spec * Formatting fixes. * Adjust handling of different types to make linter happy. * Add type definition * Decode bytes in ternary * bump to version 1.3.14 --------- Co-authored-by: jdreamerz <[email protected]> Co-authored-by: Justin Driemeyer <[email protected]>
1 parent c185015 commit 82dba38

File tree

9 files changed

+325
-9
lines changed

9 files changed

+325
-9
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
1212

1313
[tool.poetry]
1414
name = "together"
15-
version = "1.3.13"
15+
version = "1.3.14"
1616
authors = [
1717
"Together AI <[email protected]>"
1818
]

src/together/abstract/api_requestor.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def parse_stream_helper(line: bytes) -> str | None:
7878
line = line[len(b"data: ") :]
7979
else:
8080
line = line[len(b"data:") :]
81-
if line.strip() == b"[DONE]":
81+
if line.strip().upper() == b"[DONE]":
8282
# return here will cause GeneratorExit exception in urllib3
8383
# and it will close http connection with TCP Reset
8484
return None
@@ -620,17 +620,22 @@ def _interpret_response(
620620
self, result: requests.Response, stream: bool
621621
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
622622
"""Returns the response(s) and a bool indicating whether it is a stream."""
623-
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
623+
content_type = result.headers.get("Content-Type", "")
624+
if stream and "text/event-stream" in content_type:
624625
return (
625626
self._interpret_response_line(
626627
line, result.status_code, result.headers, stream=True
627628
)
628629
for line in parse_stream(result.iter_lines())
629630
), True
630631
else:
632+
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
633+
content = result.content
634+
else:
635+
content = result.content.decode("utf-8")
631636
return (
632637
self._interpret_response_line(
633-
result.content.decode("utf-8"),
638+
content,
634639
result.status_code,
635640
result.headers,
636641
stream=False,
@@ -670,7 +675,7 @@ async def _interpret_async_response(
670675
)
671676

672677
def _interpret_response_line(
673-
self, rbody: str, rcode: int, rheaders: Any, stream: bool
678+
self, rbody: str | bytes, rcode: int, rheaders: Any, stream: bool
674679
) -> TogetherResponse:
675680
# HTTP 204 response code does not have any content in the body.
676681
if rcode == 204:
@@ -684,13 +689,16 @@ def _interpret_response_line(
684689
)
685690

686691
try:
687-
if "text/plain" in rheaders.get("Content-Type", ""):
688-
data: Dict[str, Any] = {"message": rbody}
692+
content_type = rheaders.get("Content-Type", "")
693+
if isinstance(rbody, bytes):
694+
data: Dict[str, Any] | bytes = rbody
695+
elif "text/plain" in content_type:
696+
data = {"message": rbody}
689697
else:
690698
data = json.loads(rbody)
691699
except (JSONDecodeError, UnicodeDecodeError) as e:
692700
raise error.APIError(
693-
f"Error code: {rcode} -{rbody}",
701+
f"Error code: {rcode} -{rbody if isinstance(rbody, str) else rbody.decode()}",
694702
http_status=rcode,
695703
headers=rheaders,
696704
) from e

src/together/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class Together:
1919
models: resources.Models
2020
fine_tuning: resources.FineTuning
2121
rerank: resources.Rerank
22+
audio: resources.Audio
2223

2324
# client options
2425
client: TogetherClient
@@ -79,6 +80,7 @@ def __init__(
7980
self.models = resources.Models(self.client)
8081
self.fine_tuning = resources.FineTuning(self.client)
8182
self.rerank = resources.Rerank(self.client)
83+
self.audio = resources.Audio(self.client)
8284

8385

8486
class AsyncTogether:

src/together/resources/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from together.resources.images import AsyncImages, Images
77
from together.resources.models import AsyncModels, Models
88
from together.resources.rerank import AsyncRerank, Rerank
9+
from together.resources.audio import AsyncAudio, Audio
910

1011

1112
__all__ = [
@@ -25,4 +26,6 @@
2526
"Models",
2627
"AsyncRerank",
2728
"Rerank",
29+
"AsyncAudio",
30+
"Audio",
2831
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from functools import cached_property
2+
3+
from together.resources.audio.speech import AsyncSpeech, Speech
4+
from together.types import (
5+
TogetherClient,
6+
)
7+
8+
9+
class Audio:
10+
def __init__(self, client: TogetherClient) -> None:
11+
self._client = client
12+
13+
@cached_property
14+
def speech(self) -> Speech:
15+
return Speech(self._client)
16+
17+
18+
class AsyncAudio:
19+
def __init__(self, client: TogetherClient) -> None:
20+
self._client = client
21+
22+
@cached_property
23+
def speech(self) -> AsyncSpeech:
24+
return AsyncSpeech(self._client)
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from __future__ import annotations
2+
3+
from typing import Any, AsyncGenerator, Dict, Iterator, List, Union
4+
5+
from together.abstract import api_requestor
6+
from together.together_response import TogetherResponse
7+
from together.types import (
8+
AudioSpeechRequest,
9+
AudioResponseFormat,
10+
AudioLanguage,
11+
AudioResponseEncoding,
12+
AudioSpeechStreamChunk,
13+
AudioSpeechStreamEvent,
14+
AudioSpeechStreamResponse,
15+
TogetherClient,
16+
TogetherRequest,
17+
)
18+
19+
20+
class Speech:
21+
def __init__(self, client: TogetherClient) -> None:
22+
self._client = client
23+
24+
def create(
25+
self,
26+
*,
27+
model: str,
28+
input: str,
29+
voice: str | None = None,
30+
response_format: str = "wav",
31+
language: str = "en",
32+
response_encoding: str = "pcm_f32le",
33+
sample_rate: int = 44100,
34+
stream: bool = False,
35+
**kwargs: Any,
36+
) -> AudioSpeechStreamResponse:
37+
"""
38+
Method to generate audio from input text using a specified model.
39+
40+
Args:
41+
model (str): The name of the model to query.
42+
input (str): Input text to generate the audio for.
43+
voice (str, optional): The voice to use for generating the audio.
44+
Defaults to None.
45+
response_format (str, optional): The format of audio output.
46+
Defaults to "wav".
47+
language (str, optional): Language of input text.
48+
Defaults to "en".
49+
response_encoding (str, optional): Audio encoding of response.
50+
Defaults to "pcm_f32le".
51+
sample_rate (int, optional): Sampling rate to use for the output audio.
52+
Defaults to 44100.
53+
stream (bool, optional): If true, output is streamed for several characters at a time.
54+
Defaults to False.
55+
56+
Returns:
57+
Union[bytes, Iterator[AudioSpeechStreamChunk]]: The generated audio as bytes or an iterator over audio stream chunks.
58+
"""
59+
60+
requestor = api_requestor.APIRequestor(
61+
client=self._client,
62+
)
63+
64+
parameter_payload = AudioSpeechRequest(
65+
model=model,
66+
input=input,
67+
voice=voice,
68+
response_format=AudioResponseFormat(response_format),
69+
language=AudioLanguage(language),
70+
response_encoding=AudioResponseEncoding(response_encoding),
71+
sample_rate=sample_rate,
72+
stream=stream,
73+
**kwargs,
74+
).model_dump(exclude_none=True)
75+
76+
response, streamed, _ = requestor.request(
77+
options=TogetherRequest(
78+
method="POST",
79+
url="audio/speech",
80+
params=parameter_payload,
81+
),
82+
stream=stream,
83+
)
84+
85+
return AudioSpeechStreamResponse(response=response)
86+
87+
88+
class AsyncSpeech:
89+
def __init__(self, client: TogetherClient) -> None:
90+
self._client = client
91+
92+
async def create(
93+
self,
94+
*,
95+
model: str,
96+
input: str,
97+
voice: str | None = None,
98+
response_format: str = "wav",
99+
language: str = "en",
100+
response_encoding: str = "pcm_f32le",
101+
sample_rate: int = 44100,
102+
stream: bool = False,
103+
**kwargs: Any,
104+
) -> AudioSpeechStreamResponse:
105+
"""
106+
Async method to generate audio from input text using a specified model.
107+
108+
Args:
109+
model (str): The name of the model to query.
110+
input (str): Input text to generate the audio for.
111+
voice (str, optional): The voice to use for generating the audio.
112+
Defaults to None.
113+
response_format (str, optional): The format of audio output.
114+
Defaults to "wav".
115+
language (str, optional): Language of input text.
116+
Defaults to "en".
117+
response_encoding (str, optional): Audio encoding of response.
118+
Defaults to "pcm_f32le".
119+
sample_rate (int, optional): Sampling rate to use for the output audio.
120+
Defaults to 44100.
121+
stream (bool, optional): If true, output is streamed for several characters at a time.
122+
Defaults to False.
123+
124+
Returns:
125+
Union[bytes, AsyncGenerator[AudioSpeechStreamChunk, None]]: The generated audio as bytes or an async generator over audio stream chunks.
126+
"""
127+
128+
requestor = api_requestor.APIRequestor(
129+
client=self._client,
130+
)
131+
132+
parameter_payload = AudioSpeechRequest(
133+
model=model,
134+
input=input,
135+
voice=voice,
136+
response_format=AudioResponseFormat(response_format),
137+
language=AudioLanguage(language),
138+
response_encoding=AudioResponseEncoding(response_encoding),
139+
sample_rate=sample_rate,
140+
stream=stream,
141+
**kwargs,
142+
).model_dump(exclude_none=True)
143+
144+
response, _, _ = await requestor.arequest(
145+
options=TogetherRequest(
146+
method="POST",
147+
url="audio/speech",
148+
params=parameter_payload,
149+
),
150+
stream=stream,
151+
)
152+
153+
return AudioSpeechStreamResponse(response=response)

src/together/together_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class TogetherResponse:
88
API Response class. Stores headers and response data.
99
"""
1010

11-
def __init__(self, data: Dict[str, Any], headers: Dict[str, Any]):
11+
def __init__(self, data: Any, headers: Dict[str, Any]):
1212
self._headers = headers
1313
self.data = data
1414

src/together/types/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@
4242
RerankRequest,
4343
RerankResponse,
4444
)
45+
from together.types.audio_speech import (
46+
AudioSpeechRequest,
47+
AudioResponseFormat,
48+
AudioLanguage,
49+
AudioResponseEncoding,
50+
AudioSpeechStreamChunk,
51+
AudioSpeechStreamEvent,
52+
AudioSpeechStreamResponse,
53+
)
4554

4655
__all__ = [
4756
"TogetherClient",
@@ -77,4 +86,11 @@
7786
"RerankRequest",
7887
"RerankResponse",
7988
"FinetuneTrainingLimits",
89+
"AudioSpeechRequest",
90+
"AudioResponseFormat",
91+
"AudioLanguage",
92+
"AudioResponseEncoding",
93+
"AudioSpeechStreamChunk",
94+
"AudioSpeechStreamEvent",
95+
"AudioSpeechStreamResponse",
8096
]

0 commit comments

Comments
 (0)