Skip to content

Commit 262eed8

Browse files
authored
add video url support to chat.completions (#256)
* add support for video_url in chat.completions * update documentation * bump version
1 parent 9e6120f commit 262eed8

File tree

4 files changed

+139
-23
lines changed

4 files changed

+139
-23
lines changed

README.md

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,101 @@ This repo contains both a Python Library and a CLI. We'll demonstrate how to use
5252
### Chat Completions
5353

5454
```python
55-
import os
5655
from together import Together
5756

58-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
57+
client = Together()
5958

59+
# Simple text message
6060
response = client.chat.completions.create(
6161
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
6262
messages=[{"role": "user", "content": "tell me about new york"}],
6363
)
6464
print(response.choices[0].message.content)
65+
66+
# Multi-modal message with text and image
67+
response = client.chat.completions.create(
68+
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
69+
messages=[{
70+
"role": "user",
71+
"content": [
72+
{
73+
"type": "text",
74+
"text": "What's in this image?"
75+
},
76+
{
77+
"type": "image_url",
78+
"image_url": {
79+
"url": "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
80+
}
81+
}
82+
]
83+
}]
84+
)
85+
print(response.choices[0].message.content)
86+
87+
# Multi-modal message with multiple images
88+
response = client.chat.completions.create(
89+
model="Qwen/Qwen2.5-VL-72B-Instruct",
90+
messages=[{
91+
"role": "user",
92+
"content": [
93+
{
94+
"type": "text",
95+
"text": "Compare these two images."
96+
},
97+
{
98+
"type": "image_url",
99+
"image_url": {
100+
"url": "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
101+
}
102+
},
103+
{
104+
"type": "image_url",
105+
"image_url": {
106+
"url": "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/slack.png"
107+
}
108+
}
109+
]
110+
}]
111+
)
112+
print(response.choices[0].message.content)
113+
114+
# Multi-modal message with text and video
115+
response = client.chat.completions.create(
116+
model="Qwen/Qwen2.5-VL-72B-Instruct",
117+
messages=[{
118+
"role": "user",
119+
"content": [
120+
{
121+
"type": "text",
122+
"text": "What's happening in this video?"
123+
},
124+
{
125+
"type": "video_url",
126+
"video_url": {
127+
"url": "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
128+
}
129+
}
130+
]
131+
}]
132+
)
133+
print(response.choices[0].message.content)
65134
```
66135

136+
The chat completions API supports three types of content:
137+
- Plain text messages using the `content` field directly
138+
- Multi-modal messages with images using `type: "image_url"`
139+
- Multi-modal messages with videos using `type: "video_url"`
140+
141+
When using multi-modal content, the `content` field becomes an array of content objects, each with its own type and corresponding data.
142+
67143
#### Streaming
68144

69145
```python
70146
import os
71147
from together import Together
72148

73-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
149+
client = Together()
74150
stream = client.chat.completions.create(
75151
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
76152
messages=[{"role": "user", "content": "tell me about new york"}],
@@ -84,17 +160,17 @@ for chunk in stream:
84160
#### Async usage
85161

86162
```python
87-
import os, asyncio
163+
import asyncio
88164
from together import AsyncTogether
89165

90-
async_client = AsyncTogether(api_key=os.environ.get("TOGETHER_API_KEY"))
166+
async_client = AsyncTogether()
91167
messages = [
92168
"What are the top things to do in San Francisco?",
93169
"What country is Paris in?",
94170
]
95171

96172
async def async_chat_completion(messages):
97-
async_client = AsyncTogether(api_key=os.environ.get("TOGETHER_API_KEY"))
173+
async_client = AsyncTogether()
98174
tasks = [
99175
async_client.chat.completions.create(
100176
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -115,10 +191,9 @@ asyncio.run(async_chat_completion(messages))
115191
Completions are for code and language models shown [here](https://docs.together.ai/docs/inference-models). Below, a code model example is shown.
116192

117193
```python
118-
import os
119194
from together import Together
120195

121-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
196+
client = Together()
122197

123198
response = client.completions.create(
124199
model="codellama/CodeLlama-34b-Python-hf",
@@ -131,10 +206,9 @@ print(response.choices[0].text)
131206
#### Streaming
132207

133208
```python
134-
import os
135209
from together import Together
136210

137-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
211+
client = Together()
138212
stream = client.completions.create(
139213
model="codellama/CodeLlama-34b-Python-hf",
140214
prompt="Write a Next.js component with TailwindCSS for a header component.",
@@ -148,10 +222,10 @@ for chunk in stream:
148222
#### Async usage
149223

150224
```python
151-
import os, asyncio
225+
import asyncio
152226
from together import AsyncTogether
153227

154-
async_client = AsyncTogether(api_key=os.environ.get("TOGETHER_API_KEY"))
228+
async_client = AsyncTogether()
155229
prompts = [
156230
"Write a Next.js component with TailwindCSS for a header component.",
157231
"Write a python function for the fibonacci sequence",
@@ -176,10 +250,9 @@ asyncio.run(async_chat_completion(prompts))
176250
### Image generation
177251

178252
```python
179-
import os
180253
from together import Together
181254

182-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
255+
client = Together()
183256

184257
response = client.images.generate(
185258
prompt="space robots",
@@ -196,7 +269,7 @@ print(response.data[0].b64_json)
196269
from typing import List
197270
from together import Together
198271

199-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
272+
client = Together()
200273

201274
def get_embeddings(texts: List[str], model: str) -> List[List[float]]:
202275
texts = [text.replace("\n", " ") for text in texts]
@@ -215,7 +288,7 @@ print(embeddings)
215288
from typing import List
216289
from together import Together
217290

218-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
291+
client = Together()
219292

220293
def get_reranked_documents(query: str, documents: List[str], model: str, top_n: int = 3) -> List[str]:
221294
outputs = client.rerank.create(model=model, query=query, documents=documents, top_n=top_n)
@@ -237,10 +310,9 @@ Read more about Reranking [here](https://docs.together.ai/docs/rerank-overview).
237310
The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrive files, and delete files. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-python).
238311

239312
```python
240-
import os
241313
from together import Together
242314

243-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
315+
client = Together()
244316

245317
client.files.upload(file="somedata.jsonl") # uploads a file
246318
client.files.list() # lists all uploaded files
@@ -254,10 +326,9 @@ client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a
254326
The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-python).
255327

256328
```python
257-
import os
258329
from together import Together
259330

260-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
331+
client = Together()
261332

262333
client.fine_tuning.create(
263334
training_file = 'file-d0d318cb-b7d9-493a-bd70-1cfe089d3815',
@@ -281,10 +352,9 @@ client.fine_tuning.download(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b") # down
281352
This lists all the models that Together supports.
282353

283354
```python
284-
import os
285355
from together import Together
286356

287-
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
357+
client = Together()
288358

289359
models = client.models.list()
290360

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
1212

1313
[tool.poetry]
1414
name = "together"
15-
version = "1.4.1"
15+
version = "1.4.2"
1616
authors = [
1717
"Together AI <[email protected]>"
1818
]

src/together/types/chat_completions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,22 @@ class ToolCalls(BaseModel):
4444
class ChatCompletionMessageContentType(str, Enum):
4545
TEXT = "text"
4646
IMAGE_URL = "image_url"
47+
VIDEO_URL = "video_url"
4748

4849

4950
class ChatCompletionMessageContentImageURL(BaseModel):
5051
url: str
5152

5253

54+
class ChatCompletionMessageContentVideoURL(BaseModel):
55+
url: str
56+
57+
5358
class ChatCompletionMessageContent(BaseModel):
5459
type: ChatCompletionMessageContentType
5560
text: str | None = None
5661
image_url: ChatCompletionMessageContentImageURL | None = None
62+
video_url: ChatCompletionMessageContentVideoURL | None = None
5763

5864

5965
class ChatCompletionMessage(BaseModel):

tests/unit/test_video_url.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from together.types.chat_completions import (
2+
ChatCompletionMessage,
3+
ChatCompletionMessageContent,
4+
ChatCompletionMessageContentType,
5+
ChatCompletionMessageContentVideoURL,
6+
MessageRole,
7+
)
8+
9+
10+
def test_video_url_message():
11+
# Test creating a message with video_url content
12+
message = ChatCompletionMessage(
13+
role=MessageRole.USER,
14+
content=[
15+
ChatCompletionMessageContent(
16+
type=ChatCompletionMessageContentType.TEXT, text="What's in this video?"
17+
),
18+
ChatCompletionMessageContent(
19+
type=ChatCompletionMessageContentType.VIDEO_URL,
20+
video_url=ChatCompletionMessageContentVideoURL(
21+
url="https://example.com/video.mp4"
22+
),
23+
),
24+
],
25+
)
26+
27+
# Verify the message structure
28+
assert message.role == MessageRole.USER
29+
assert isinstance(message.content, list)
30+
assert len(message.content) == 2
31+
32+
# Verify text content
33+
assert message.content[0].type == ChatCompletionMessageContentType.TEXT
34+
assert message.content[0].text == "What's in this video?"
35+
assert message.content[0].video_url is None
36+
37+
# Verify video_url content
38+
assert message.content[1].type == ChatCompletionMessageContentType.VIDEO_URL
39+
assert message.content[1].text is None
40+
assert message.content[1].video_url.url == "https://example.com/video.mp4"

0 commit comments

Comments
 (0)